Source code for diffsptk.modules.medfilt

# ------------------------------------------------------------------------ #
# Copyright 2022 SPTK Working Group                                        #
#                                                                          #
# Licensed under the Apache License, Version 2.0 (the "License");          #
# you may not use this file except in compliance with the License.         #
# You may obtain a copy of the License at                                  #
#                                                                          #
#     http://www.apache.org/licenses/LICENSE-2.0                           #
#                                                                          #
# Unless required by applicable law or agreed to in writing, software      #
# distributed under the License is distributed on an "AS IS" BASIS,        #
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
# See the License for the specific language governing permissions and      #
# limitations under the License.                                           #
# ------------------------------------------------------------------------ #

import torch
import torch.nn.functional as F

from ..typing import Precomputed
from ..utils.private import filter_values
from .base import BaseFunctionalModule


[docs] class MedianFilter(BaseFunctionalModule): """See `this page <https://sp-nitech.github.io/sptk/latest/main/medfilt.html>`_ for details. Parameters ---------- filter_length : int > 0 The length of the median filter, :math:`L`. across_features : bool If True, apply the filter across the feature dimension. magic_number : float or None The magic number representing unvoiced frames. """ def __init__( self, filter_length: int, across_features: bool = False, magic_number: float | None = None, ) -> None: super().__init__() self.values = self._precompute(**filter_values(locals()))
[docs] def forward(self, x: torch.Tensor) -> torch.Tensor: """Apply median filtering to the input sequence. Parameters ---------- x : Tensor [shape=(B, N, D) or (N, D) or (N,)] The input sequence. Returns ------- out : Tensor [shape=(B, N, D) or (B, N) or (N, D) or (N,)] The filtered sequence. Examples -------- >>> import torch >>> import diffsptk >>> medfilt = diffsptk.MedianFilter(3) >>> x = torch.tensor([0, 2, -2, 7, 4, 8]).float() >>> y = medfilt(x) >>> y tensor([1., 0., 2., 4., 7., 6.]) """ return self._forward(x, *self.values)
@staticmethod def _func(x: torch.Tensor, *args, **kwargs) -> torch.Tensor: values = MedianFilter._precompute(*args, **kwargs) return MedianFilter._forward(x, *values) @staticmethod def _takes_input_size() -> bool: return False @staticmethod def _check(filter_length: int) -> None: if filter_length <= 0: raise ValueError("filter_length must be positive.") @staticmethod def _precompute( filter_length: int, across_features: bool, magic_number: float | None ) -> Precomputed: MedianFilter._check(filter_length) if filter_length % 2 == 1: padding = ((filter_length - 1) // 2, (filter_length - 1) // 2) else: padding = (filter_length // 2, (filter_length - 2) // 2) padding = (0, 0) + padding # No padding for feature dimension return (filter_length, padding, across_features, magic_number) @staticmethod def _forward( x: torch.Tensor, filter_length: int, padding: tuple[int, int], across_features: bool, magic_number: float | None, ) -> torch.Tensor: d = x.dim() if d == 1: x = x.reshape(1, -1, 1) elif d == 2: x = x.unsqueeze(0) if x.dim() != 3: raise ValueError("Input must be 1D, 2D, or 3D tensor.") if magic_number is not None: mask = x == magic_number x = x.masked_fill(mask, float("nan")) y = F.pad(x, padding, value=float("nan")) y = y.unfold(1, filter_length, 1) if across_features: y = y.flatten(start_dim=-2) y = y.nanquantile(0.5, dim=-1) if magic_number is not None: m = F.pad(mask.float(), padding, value=float("nan")) m = m.unfold(1, filter_length, 1) if across_features: m = m.flatten(start_dim=-2) magic_count = m.nansum(dim=-1) valid_count = (1 - m).nansum(dim=-1) is_magic_dominant = magic_count > valid_count y = torch.where(is_magic_dominant, torch.full_like(y, magic_number), y) if d == 1: y = y.view(-1) elif d == 2: y = y.squeeze(0) return y