Source code for diffsptk.modules.pnorm

# ------------------------------------------------------------------------ #
# Copyright 2022 SPTK Working Group                                        #
#                                                                          #
# Licensed under the Apache License, Version 2.0 (the "License");          #
# you may not use this file except in compliance with the License.         #
# You may obtain a copy of the License at                                  #
#                                                                          #
#     http://www.apache.org/licenses/LICENSE-2.0                           #
#                                                                          #
# Unless required by applicable law or agreed to in writing, software      #
# distributed under the License is distributed on an "AS IS" BASIS,        #
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
# See the License for the specific language governing permissions and      #
# limitations under the License.                                           #
# ------------------------------------------------------------------------ #

import inspect

import torch
from torch import nn

from ..typing import Callable, Precomputed
from ..utils.private import filter_values, get_layer
from .base import BaseFunctionalModule
from .c2acr import CepstrumToAutocorrelation
from .freqt import FrequencyTransform


[docs] class MelCepstrumPowerNormalization(BaseFunctionalModule): """See `this page <https://sp-nitech.github.io/sptk/latest/main/pnorm.html>`_ for details. Parameters ---------- cep_order : int >= 0 The order of the cepstrum, :math:`M`. alpha : float in (-1, 1) The frequency warping factor, :math:`\\alpha`. ir_length : int >= 1 The length of the impulse response. device : torch.device or None The device of this module. dtype : torch.dtype or None The data type of this module. """ def __init__( self, cep_order: int, alpha: float = 0, ir_length: int = 128, device: torch.device | None = None, dtype: torch.dtype | None = None, ) -> None: super().__init__() _, layers, _ = self._precompute(**filter_values(locals())) self.layers = nn.ModuleList(layers)
[docs] def forward(self, x: torch.Tensor) -> torch.Tensor: """Perform mel-cepstrum power normalization. Parameters ---------- x : Tensor [shape=(..., M+1)] The input mel-cepstrum. Returns ------- out : Tensor [shape=(..., M+2)] The log power and power-normalized mel-cepstrum. Examples -------- >>> import diffsptk >>> pnorm = diffsptk.MelCepstrumPowerNormalization(3, alpha=0.1) >>> x = diffsptk.ramp(1, 4) >>> y = pnorm(x) >>> y tensor([16.5884, -7.2942, 2.0000, 3.0000, 4.0000]) """ return self._forward(x, *self.layers)
@staticmethod def _func(x: torch.Tensor, *args, **kwargs) -> torch.Tensor: _, layers, _ = MelCepstrumPowerNormalization._precompute( x.size(-1) - 1, *args, **kwargs, device=x.device, dtype=x.dtype ) return MelCepstrumPowerNormalization._forward(x, *layers) @staticmethod def _takes_input_size() -> bool: return True @staticmethod def _check() -> None: pass @staticmethod def _precompute( cep_order: int, alpha: float, ir_length: int, device: torch.device | None, dtype: torch.dtype | None, ) -> Precomputed: MelCepstrumPowerNormalization._check() module = inspect.stack()[1].function != "_func" freqt = get_layer( module, FrequencyTransform, dict( in_order=cep_order, out_order=ir_length - 1, alpha=-alpha, device=device, dtype=dtype, ), ) c2acr = get_layer( module, CepstrumToAutocorrelation, dict( cep_order=ir_length - 1, acr_order=0, n_fft=ir_length, ), ) return None, (freqt, c2acr), None @staticmethod def _forward(x: torch.Tensor, freqt: Callable, c2acr: Callable) -> torch.Tensor: x0, x1 = torch.split(x, [1, x.size(-1) - 1], dim=-1) P = torch.log(c2acr(freqt(x))) y = torch.cat((P, x0 - 0.5 * P, x1), dim=-1) return y