Source code for diffsptk.modules.medfilt
# ------------------------------------------------------------------------ #
# Copyright 2022 SPTK Working Group #
# #
# Licensed under the Apache License, Version 2.0 (the "License"); #
# you may not use this file except in compliance with the License. #
# You may obtain a copy of the License at #
# #
# http://www.apache.org/licenses/LICENSE-2.0 #
# #
# Unless required by applicable law or agreed to in writing, software #
# distributed under the License is distributed on an "AS IS" BASIS, #
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
# See the License for the specific language governing permissions and #
# limitations under the License. #
# ------------------------------------------------------------------------ #
import torch
import torch.nn.functional as F
from ..typing import Precomputed
from ..utils.private import filter_values
from .base import BaseFunctionalModule
[docs]
class MedianFilter(BaseFunctionalModule):
"""See `this page <https://sp-nitech.github.io/sptk/latest/main/medfilt.html>`_
for details.
Parameters
----------
filter_length : int > 0
The length of the median filter, :math:`L`.
across_features : bool
If True, apply the filter across the feature dimension.
magic_number : float or None
The magic number representing unvoiced frames.
"""
def __init__(
self,
filter_length: int,
across_features: bool = False,
magic_number: float | None = None,
) -> None:
super().__init__()
self.values = self._precompute(**filter_values(locals()))
[docs]
def forward(self, x: torch.Tensor) -> torch.Tensor:
"""Apply median filtering to the input sequence.
Parameters
----------
x : Tensor [shape=(B, N, D) or (N, D) or (N,)]
The input sequence.
Returns
-------
out : Tensor [shape=(B, N, D) or (B, N) or (N, D) or (N,)]
The filtered sequence.
Examples
--------
>>> import torch
>>> import diffsptk
>>> medfilt = diffsptk.MedianFilter(3)
>>> x = torch.tensor([0, 2, -2, 7, 4, 8]).float()
>>> y = medfilt(x)
>>> y
tensor([1., 0., 2., 4., 7., 6.])
"""
return self._forward(x, *self.values)
@staticmethod
def _func(x: torch.Tensor, *args, **kwargs) -> torch.Tensor:
values = MedianFilter._precompute(*args, **kwargs)
return MedianFilter._forward(x, *values)
@staticmethod
def _takes_input_size() -> bool:
return False
@staticmethod
def _check(filter_length: int) -> None:
if filter_length <= 0:
raise ValueError("filter_length must be positive.")
@staticmethod
def _precompute(
filter_length: int, across_features: bool, magic_number: float | None
) -> Precomputed:
MedianFilter._check(filter_length)
if filter_length % 2 == 1:
padding = ((filter_length - 1) // 2, (filter_length - 1) // 2)
else:
padding = (filter_length // 2, (filter_length - 2) // 2)
padding = (0, 0) + padding # No padding for feature dimension
return (filter_length, padding, across_features, magic_number)
@staticmethod
def _forward(
x: torch.Tensor,
filter_length: int,
padding: tuple[int, int],
across_features: bool,
magic_number: float | None,
) -> torch.Tensor:
d = x.dim()
if d == 1:
x = x.reshape(1, -1, 1)
elif d == 2:
x = x.unsqueeze(0)
if x.dim() != 3:
raise ValueError("Input must be 1D, 2D, or 3D tensor.")
if magic_number is not None:
mask = x == magic_number
x = x.masked_fill(mask, float("nan"))
y = F.pad(x, padding, value=float("nan"))
y = y.unfold(1, filter_length, 1)
if across_features:
y = y.flatten(start_dim=-2)
y = y.nanquantile(0.5, dim=-1)
if magic_number is not None:
m = F.pad(mask.float(), padding, value=float("nan"))
m = m.unfold(1, filter_length, 1)
if across_features:
m = m.flatten(start_dim=-2)
magic_count = m.nansum(dim=-1)
valid_count = (1 - m).nansum(dim=-1)
is_magic_dominant = magic_count > valid_count
y = torch.where(is_magic_dominant, torch.full_like(y, magic_number), y)
if d == 1:
y = y.view(-1)
elif d == 2:
y = y.squeeze(0)
return y