# ------------------------------------------------------------------------ #
# Copyright 2022 SPTK Working Group #
# #
# Licensed under the Apache License, Version 2.0 (the "License"); #
# you may not use this file except in compliance with the License. #
# You may obtain a copy of the License at #
# #
# http://www.apache.org/licenses/LICENSE-2.0 #
# #
# Unless required by applicable law or agreed to in writing, software #
# distributed under the License is distributed on an "AS IS" BASIS, #
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
# See the License for the specific language governing permissions and #
# limitations under the License. #
# ------------------------------------------------------------------------ #
import torch
from ..typing import Precomputed
from ..utils.private import check_size, get_values, to
from .b2mc import MLSADigitalFilterCoefficientsToMelCepstrum
from .base import BaseFunctionalModule
[docs]
class MelCepstrumToMLSADigitalFilterCoefficients(BaseFunctionalModule):
"""See `this page <https://sp-nitech.github.io/sptk/latest/main/mc2b.html>`_
for details.
Parameters
----------
cep_order : int >= 0
The order of the cepstrum, :math:`M`.
alpha : float in (-1, 1)
The frequency warping factor, :math:`\\alpha`.
References
----------
.. [1] K. Tokuda et al., "Spectral estimation of speech by mel-generalized cepstral
analysis," *Electronics and Communications in Japan, part 3*, vol. 76, no. 2,
pp. 30-43, 1993.
"""
def __init__(self, cep_order: int, alpha: float = 0):
super().__init__()
self.in_dim = cep_order + 1
_, _, tensors = self._precompute(*get_values(locals()))
self.register_buffer("A", tensors[0])
[docs]
def forward(self, mc: torch.Tensor) -> torch.Tensor:
"""Convert mel-cepstrum to MLSA filter coefficients.
Parameters
----------
mc : Tensor [shape=(..., M+1)]
The mel-cepstral coefficients.
Returns
-------
out : Tensor [shape=(..., M+1)]
The MLSA filter coefficients.
Examples
--------
>>> mc = diffsptk.ramp(4)
>>> mc2b = diffsptk.MelCepstrumToMLSADigitalFilterCoefficients(4, 0.3)
>>> b = mc2b(mc)
>>> b
tensor([-0.1686, 0.5620, 1.4600, 1.8000, 4.0000])
"""
check_size(mc.size(-1), self.in_dim, "dimension of cepstrum")
return self._forward(mc, **self._buffers)
@staticmethod
def _func(mc: torch.Tensor, alpha: float) -> torch.Tensor:
M = mc.size(-1) - 1
MelCepstrumToMLSADigitalFilterCoefficients._check(M, alpha)
b = torch.zeros_like(mc)
b[..., M] = mc[..., M]
for m in reversed(range(M)):
b[..., m] = mc[..., m] - alpha * b[..., m + 1]
return b
@staticmethod
def _takes_input_size() -> bool:
return True
@staticmethod
def _check(*args, **kwargs) -> None:
MLSADigitalFilterCoefficientsToMelCepstrum._check(*args, **kwargs)
@staticmethod
def _precompute(
cep_order: int,
alpha: float,
device: torch.device | None = None,
dtype: torch.dtype | None = None,
) -> Precomputed:
MelCepstrumToMLSADigitalFilterCoefficients._check(cep_order, alpha)
a = 1
A = torch.eye(cep_order + 1, device=device, dtype=torch.double)
for m in range(1, len(A)):
a *= -alpha
A[:, m:].fill_diagonal_(a)
return None, None, (to(A.T, dtype=dtype),)
@staticmethod
def _forward(mc: torch.Tensor, A: torch.Tensor) -> torch.Tensor:
return torch.matmul(mc, A)