Source code for diffsptk.modules.mgc2sp

# ------------------------------------------------------------------------ #
# Copyright 2022 SPTK Working Group                                        #
#                                                                          #
# Licensed under the Apache License, Version 2.0 (the "License");          #
# you may not use this file except in compliance with the License.         #
# You may obtain a copy of the License at                                  #
#                                                                          #
#     http://www.apache.org/licenses/LICENSE-2.0                           #
#                                                                          #
# Unless required by applicable law or agreed to in writing, software      #
# distributed under the License is distributed on an "AS IS" BASIS,        #
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
# See the License for the specific language governing permissions and      #
# limitations under the License.                                           #
# ------------------------------------------------------------------------ #

import inspect
import math

import torch
from torch import nn

from ..typing import Callable, Precomputed
from ..utils.private import check_size, filter_values, get_layer
from .base import BaseFunctionalModule
from .mgc2mgc import MelGeneralizedCepstrumToMelGeneralizedCepstrum


[docs] class MelGeneralizedCepstrumToSpectrum(BaseFunctionalModule): """See `this page <https://sp-nitech.github.io/sptk/latest/main/mgc2sp.html>`_ for details. Parameters ---------- cep_order : int >= 0 The order of the mel-cepstrum, :math:`M`. fft_length : int >= 2 The number of FFT bins, :math:`L`. alpha : float in (-1, 1) The frequency warping factor, :math:`\\alpha`. gamma : float in [-1, 1] The gamma parameter, :math:`\\gamma`. norm : bool If True, the input is assumed to be normalized. mul : bool If True, the input is assumed to be gamma-multiplied. n_fft : int >> L The number of FFT bins. Accurate conversion requires a large value. out_format : ['db', 'log-magnitude', 'magnitude', 'power', \ 'cycle', 'radian', 'degree', 'complex'] The output format. device : torch.device or None The device of this module. dtype : torch.dtype or None The data type of this module. """ def __init__( self, cep_order: int, fft_length: int, *, alpha: float = 0, gamma: float = 0, norm: bool = False, mul: bool = False, n_fft=512, out_format: str | int = "power", device: torch.device | None = None, dtype: torch.dtype | None = None, ) -> None: super().__init__() self.in_dim = cep_order + 1 self.values, layers, _ = self._precompute(**filter_values(locals())) self.layers = nn.ModuleList(layers)
[docs] def forward(self, mc: torch.Tensor) -> torch.Tensor: """Convert mel-cepstrum to spectrum. Parameters ---------- mc : Tensor [shape=(..., M+1)] Mel-cepstrum. Returns ------- out : Tensor [shape=(..., L/2+1)] Spectrum. Examples -------- >>> x = diffsptk.ramp(19) >>> stft = diffsptk.STFT(frame_length=10, frame_period=10, fft_length=16) >>> mcep = diffsptk.MelCepstralAnalysis(3, 16, 0.1, n_iter=1) >>> mc = mcep(stft(x)) >>> mc tensor([[-0.8851, 0.7917, -0.1737, 0.0175], [-0.3522, 4.4222, -1.0882, -0.0511]]) >>> mc2sp = diffsptk.MelGeneralizedCepstrumToSpectrum(3, 8, 0.1) >>> sp = mc2sp(mc) >>> sp tensor([[6.0634e-01, 4.6702e-01, 1.7489e-01, 4.4821e-02, 2.3869e-02], [3.5677e+02, 1.9435e+02, 6.0078e-01, 2.4278e-04, 8.8537e-06]]) """ check_size(mc.size(-1), self.in_dim, "dimension of cepstrum") return self._forward(mc, *self.values, *self.layers)
@staticmethod def _func(mc: torch.Tensor, *args, **kwargs) -> torch.Tensor: values, layers, _ = MelGeneralizedCepstrumToSpectrum._precompute( mc.size(-1) - 1, *args, **kwargs, dtype=mc.dtype, device=mc.device ) return MelGeneralizedCepstrumToSpectrum._forward(mc, *values, *layers) @staticmethod def _takes_input_size() -> bool: return True @staticmethod def _check() -> None: pass @staticmethod def _precompute( cep_order: int, fft_length: int, alpha: float, gamma: float, norm: bool, mul: bool, n_fft: int, out_format: str | int, device: torch.device | None, dtype: torch.dtype | None, ) -> Precomputed: MelGeneralizedCepstrumToSpectrum._check() if out_format in (0, "db"): formatter = lambda x: x.real * (20 / math.log(10)) elif out_format in (1, "log-magnitude"): formatter = lambda x: x.real elif out_format in (2, "magnitude"): formatter = lambda x: torch.exp(x.real) elif out_format in (3, "power"): formatter = lambda x: torch.exp(2 * x.real) elif out_format in (4, "cycle"): formatter = lambda x: x.imag / torch.pi elif out_format in (5, "radian"): formatter = lambda x: x.imag elif out_format in (6, "degree"): formatter = lambda x: x.imag * (180 / torch.pi) elif out_format == "complex": formatter = lambda x: torch.polar(torch.exp(x.real), x.imag) else: raise ValueError(f"out_format {out_format} is not supported.") mgc2c = get_layer( inspect.stack()[1].function != "_func", MelGeneralizedCepstrumToMelGeneralizedCepstrum, dict( in_order=cep_order, in_alpha=alpha, in_gamma=gamma, in_norm=norm, in_mul=mul, out_order=fft_length // 2, out_alpha=0, out_gamma=0, out_norm=False, out_mul=False, n_fft=n_fft, device=device, dtype=dtype, ), ) return (formatter,), (mgc2c,), None @staticmethod def _forward( mc: torch.Tensor, formatter: Callable, mgc2c: Callable, ) -> torch.Tensor: c = mgc2c(mc) sp = torch.fft.rfft(c, n=(c.size(-1) - 1) * 2) sp = formatter(sp) return sp