Source code for diffsptk.modules.mgc2mgc

# ------------------------------------------------------------------------ #
# Copyright 2022 SPTK Working Group                                        #
#                                                                          #
# Licensed under the Apache License, Version 2.0 (the "License");          #
# you may not use this file except in compliance with the License.         #
# You may obtain a copy of the License at                                  #
#                                                                          #
#     http://www.apache.org/licenses/LICENSE-2.0                           #
#                                                                          #
# Unless required by applicable law or agreed to in writing, software      #
# distributed under the License is distributed on an "AS IS" BASIS,        #
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
# See the License for the specific language governing permissions and      #
# limitations under the License.                                           #
# ------------------------------------------------------------------------ #

import inspect

import torch
import torch.nn.functional as F
from torch import nn

from ..utils.private import cexp
from ..utils.private import check_size
from ..utils.private import clog
from ..utils.private import get_values
from ..utils.private import to
from .base import BaseFunctionalModule
from .freqt import FrequencyTransform
from .gnorm import GeneralizedCepstrumGainNormalization as GainNormalization
from .ignorm import (
    GeneralizedCepstrumInverseGainNormalization as InverseGainNormalization,
)


[docs] class MelGeneralizedCepstrumToMelGeneralizedCepstrum(BaseFunctionalModule): """See `this page <https://sp-nitech.github.io/sptk/latest/main/mgc2mgc.html>`_ for details. Parameters ---------- in_order : int >= 0 The order of the input cepstrum, :math:`M_1`. out_order : int >= 0 The order of the output cepstrum, :math:`M_2`. in_alpha : float in (-1, 1) The input alpha, :math:`\\alpha_1`. out_alpha : float in (-1, 1) The output alpha, :math:`\\alpha_2`. in_gamma : float in [-1, 1] The input gamma, :math:`\\gamma_1`. out_gamma : float in [-1, 1] The output gamma, :math:`\\gamma_2`. in_norm : bool If True, the input is assumed to be normalized. out_norm : bool If True, the output is assumed to be normalized. in_mul : bool If True, the input is assumed to be gamma-multiplied. out_mul : bool If True, the output is assumed to be gamma-multiplied. n_fft : int >> M1, M2 The number of FFT bins. Accurate conversion requires a large value. References ---------- .. [1] K. Tokuda et al., "Mel-generalized cepstral analysis - A unified approach to speech spectral estimation", *Proceedings of ICSLP*, pp. 1043-1046, 1996. """ def __init__( self, in_order, out_order, in_alpha=0, out_alpha=0, in_gamma=0, out_gamma=0, in_norm=False, out_norm=False, in_mul=False, out_mul=False, n_fft=512, ): super().__init__() _, layers, _ = self._precompute(*get_values(locals())) self.seq = nn.Sequential(*layers[0])
[docs] def forward(self, mc): """Convert mel-generalized cepstrum to mel-generalized cepstrum. Parameters ---------- mc : Tensor [shape=(..., M1+1)] The input mel-cepstrum. Returns ------- out : Tensor [shape=(..., M2+1)] The output mel-cepstrum. Examples -------- >>> c1 = diffsptk.ramp(3) >>> mgc2mgc = diffsptk.MelGeneralizedCepstrumToMelGeneralizedCepstrum(3, 4, 0.1) >>> c2 = mgc2mgc(c1) >>> c2 tensor([-0.0830, 0.6831, 1.1464, 3.1334, 0.9063]) """ return self._forward(mc, self.seq)
@staticmethod def _func(mc, *args, **kwargs): _, layers, _ = MelGeneralizedCepstrumToMelGeneralizedCepstrum._precompute( mc.size(-1) - 1, *args, **kwargs ) def seq(x): for layer in layers[0]: x = layer(x) return x return MelGeneralizedCepstrumToMelGeneralizedCepstrum._forward(mc, seq) @staticmethod def _takes_input_size(): return True @staticmethod def _check( in_order, out_order, in_alpha, out_alpha, in_gamma, out_gamma, in_mul, n_fft ): if in_order < 0: raise ValueError("in_order must be non-negative.") if out_order < 0: raise ValueError("out_order must be non-negative.") if 1 <= abs(in_alpha): raise ValueError("in_alpha must be in (-1, 1).") if 1 <= abs(out_alpha): raise ValueError("out_alpha must be in (-1, 1).") if 1 < abs(in_gamma): raise ValueError("in_gamma must be in [-1, 1].") if 1 < abs(out_gamma): raise ValueError("out_gamma must be in [-1, 1].") if n_fft <= max(in_order, out_order) + 1: raise ValueError("n_fft must be much larger then order of cepstrum.") if 0 == in_gamma and in_mul: raise ValueError("Invalid combination of in_gamma and in_mul.") @staticmethod def _precompute( in_order, out_order, in_alpha, out_alpha, in_gamma, out_gamma, in_norm, out_norm, in_mul, out_mul, n_fft, ): def choice(use_module, module, module_params, common_params): if use_module: return module(*module_params, *common_params) else: return lambda c: module._func(c, *common_params) MelGeneralizedCepstrumToMelGeneralizedCepstrum._check( in_order, out_order, in_alpha, out_alpha, in_gamma, out_gamma, in_mul, n_fft ) module = inspect.stack()[1].function == "__init__" seq = [] if not in_norm and in_mul: seq.append(choice(module, ZerothGammaDivision, [in_order], [in_gamma])) alpha = (out_alpha - in_alpha) / (1 - in_alpha * out_alpha) if 0 == alpha: if in_order == out_order and in_gamma == out_gamma: if not in_mul and out_mul: seq.append( choice(module, GammaMultiplication, [in_order], [in_gamma]) ) if not in_norm and out_norm: seq.append( choice(module, GainNormalization, [in_order], [in_gamma]) ) if in_norm and not out_norm: seq.append( choice( module, InverseGainNormalization, [out_order], [out_gamma] ) ) if in_mul and not out_mul: seq.append(choice(module, GammaDivision, [out_order], [out_gamma])) else: if in_mul: seq.append(choice(module, GammaDivision, [in_order], [in_gamma])) if not in_norm: seq.append( choice(module, GainNormalization, [in_order], [in_gamma]) ) if True: seq.append( choice( module, GeneralizedCepstrumToGeneralizedCepstrum, [in_order], [out_order, in_gamma, out_gamma, n_fft], ) ) if not out_norm: seq.append( choice( module, InverseGainNormalization, [out_order], [out_gamma] ) ) if out_mul: seq.append( choice(module, GammaMultiplication, [out_order], [out_gamma]) ) else: if in_mul: seq.append(choice(module, GammaDivision, [in_order], [in_gamma])) if in_norm: seq.append( choice(module, InverseGainNormalization, [in_order], [in_gamma]) ) if True: seq.append( choice(module, FrequencyTransform, [in_order], [out_order, alpha]) ) if out_norm or in_gamma != out_gamma: seq.append(choice(module, GainNormalization, [out_order], [in_gamma])) if in_gamma != out_gamma: seq.append( choice( module, GeneralizedCepstrumToGeneralizedCepstrum, [out_order], [out_order, in_gamma, out_gamma, n_fft], ) ) if not out_norm and in_gamma != out_gamma: seq.append( choice(module, InverseGainNormalization, [out_order], [out_gamma]) ) if out_mul: seq.append( choice(module, GammaMultiplication, [out_order], [out_gamma]) ) if not out_norm and out_mul: seq.append( choice(module, ZerothGammaMultiplication, [out_order], [out_gamma]) ) return None, (seq,), None @staticmethod def _forward(mc, seq): return seq(mc)
class GeneralizedCepstrumToGeneralizedCepstrum(nn.Module): def __init__(self, in_order, out_order, in_gamma, out_gamma, n_fft=512): super().__init__() self.in_order = in_order self.out_order = out_order self.in_gamma = in_gamma self.out_gamma = out_gamma self.n_fft = n_fft def forward(self, c): check_size(c.size(-1), self.in_order + 1, "dimension of cepstrum") return self._forward( c, self.out_order, self.in_gamma, self.out_gamma, self.n_fft ) @staticmethod def _forward(c1, out_order, in_gamma, out_gamma, n_fft): c01 = F.pad(c1[..., 1:], (1, 0)) C1 = torch.fft.fft(c01, n=n_fft) if in_gamma == 0: sC1 = cexp(C1) else: C1 *= in_gamma C1.real += 1 r = C1.abs() ** (1 / in_gamma) theta = C1.angle() / in_gamma sC1 = torch.polar(r, theta) if out_gamma == 0: C2 = clog(sC1) else: r = sC1.abs() ** out_gamma theta = sC1.angle() * out_gamma C2 = (r * torch.cos(theta) - 1) / out_gamma c02 = torch.fft.ifft(C2).real[..., : out_order + 1] c2 = torch.cat((c1[..., :1], 2 * c02[..., 1:]), dim=-1) return c2 _func = _forward class GammaDivision(nn.Module): def __init__(self, cep_order, gamma): super().__init__() g = torch.full((cep_order + 1,), 1 / gamma) g[0] = 1 self.register_buffer("g", to(g)) def forward(self, c): return c * self.g @staticmethod def _func(c, gamma): c0, c1 = torch.split(c, [1, c.size(-1) - 1], dim=-1) return torch.cat((c0, c1 / gamma), dim=-1) class GammaMultiplication(nn.Module): def __init__(self, cep_order, gamma): super().__init__() g = torch.full((cep_order + 1,), gamma) g[0] = 1 self.register_buffer("g", to(g)) def forward(self, c): return c * self.g @staticmethod def _func(c, gamma): c0, c1 = torch.split(c, [1, c.size(-1) - 1], dim=-1) return torch.cat((c0, c1 * gamma), dim=-1) class ZerothGammaDivision(nn.Module): def __init__(self, cep_order, gamma): super().__init__() self.cep_order = cep_order self.g = 1 / gamma def forward(self, c): c0, c1 = torch.split(c, [1, self.cep_order], dim=-1) return torch.cat(((c0 - 1) * self.g, c1), dim=-1) @staticmethod def _func(c, gamma): c0, c1 = torch.split(c, [1, c.size(-1) - 1], dim=-1) return torch.cat(((c0 - 1) / gamma, c1), dim=-1) class ZerothGammaMultiplication(nn.Module): def __init__(self, cep_order, gamma): super().__init__() self.cep_order = cep_order self.g = gamma def forward(self, c): c0, c1 = torch.split(c, [1, self.cep_order], dim=-1) return torch.cat((c0 * self.g + 1, c1), dim=-1) @staticmethod def _func(c, gamma): c0, c1 = torch.split(c, [1, c.size(-1) - 1], dim=-1) return torch.cat((c0 * gamma + 1, c1), dim=-1)