Source code for diffsptk.modules.mgc2mgc

# ------------------------------------------------------------------------ #
# Copyright 2022 SPTK Working Group                                        #
#                                                                          #
# Licensed under the Apache License, Version 2.0 (the "License");          #
# you may not use this file except in compliance with the License.         #
# You may obtain a copy of the License at                                  #
#                                                                          #
#     http://www.apache.org/licenses/LICENSE-2.0                           #
#                                                                          #
# Unless required by applicable law or agreed to in writing, software      #
# distributed under the License is distributed on an "AS IS" BASIS,        #
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
# See the License for the specific language governing permissions and      #
# limitations under the License.                                           #
# ------------------------------------------------------------------------ #

import inspect

import torch
import torch.nn.functional as F
from torch import nn

from ..utils.private import cexp
from ..utils.private import check_size
from ..utils.private import clog
from ..utils.private import get_values
from ..utils.private import to
from .base import BaseFunctionalModule
from .freqt import FrequencyTransform
from .gnorm import GeneralizedCepstrumGainNormalization as GainNormalization
from .ignorm import (
    GeneralizedCepstrumInverseGainNormalization as InverseGainNormalization,
)



[docs]
class MelGeneralizedCepstrumToMelGeneralizedCepstrum(BaseFunctionalModule):
    """See `this page <https://sp-nitech.github.io/sptk/latest/main/mgc2mgc.html>`_
    for details.

    Parameters
    ----------
    in_order : int >= 0
        The order of the input cepstrum, :math:`M_1`.

    out_order : int >= 0
        The order of the output cepstrum, :math:`M_2`.

    in_alpha : float in (-1, 1)
        The input alpha, :math:`\\alpha_1`.

    out_alpha : float in (-1, 1)
        The output alpha, :math:`\\alpha_2`.

    in_gamma : float in [-1, 1]
        The input gamma, :math:`\\gamma_1`.

    out_gamma : float in [-1, 1]
        The output gamma, :math:`\\gamma_2`.

    in_norm : bool
        If True, the input is assumed to be normalized.

    out_norm : bool
        If True, the output is assumed to be normalized.

    in_mul : bool
        If True, the input is assumed to be gamma-multiplied.

    out_mul : bool
        If True, the output is assumed to be gamma-multiplied.

    n_fft : int >> M1, M2
        The number of FFT bins. Accurate conversion requires a large value.

    References
    ----------
    .. [1] K. Tokuda et al., "Mel-generalized cepstral analysis - A unified approach to
           speech spectral estimation", *Proceedings of ICSLP*, pp. 1043-1046, 1996.

    """

    def __init__(
        self,
        in_order,
        out_order,
        in_alpha=0,
        out_alpha=0,
        in_gamma=0,
        out_gamma=0,
        in_norm=False,
        out_norm=False,
        in_mul=False,
        out_mul=False,
        n_fft=512,
    ):
        super().__init__()

        _, layers, _ = self._precompute(*get_values(locals()))
        self.seq = nn.Sequential(*layers[0])


[docs]
    def forward(self, mc):
        """Convert mel-generalized cepstrum to mel-generalized cepstrum.

        Parameters
        ----------
        mc : Tensor [shape=(..., M1+1)]
            The input mel-cepstrum.

        Returns
        -------
        out : Tensor [shape=(..., M2+1)]
            The output mel-cepstrum.

        Examples
        --------
        >>> c1 = diffsptk.ramp(3)
        >>> mgc2mgc = diffsptk.MelGeneralizedCepstrumToMelGeneralizedCepstrum(3, 4, 0.1)
        >>> c2 = mgc2mgc(c1)
        >>> c2
        tensor([-0.0830,  0.6831,  1.1464,  3.1334,  0.9063])

        """
        return self._forward(mc, self.seq)


    @staticmethod
    def _func(mc, *args, **kwargs):
        _, layers, _ = MelGeneralizedCepstrumToMelGeneralizedCepstrum._precompute(
            mc.size(-1) - 1, *args, **kwargs
        )

        def seq(x):
            for layer in layers[0]:
                x = layer(x)
            return x

        return MelGeneralizedCepstrumToMelGeneralizedCepstrum._forward(mc, seq)

    @staticmethod
    def _takes_input_size():
        return True

    @staticmethod
    def _check(
        in_order, out_order, in_alpha, out_alpha, in_gamma, out_gamma, in_mul, n_fft
    ):
        if in_order < 0:
            raise ValueError("in_order must be non-negative.")
        if out_order < 0:
            raise ValueError("out_order must be non-negative.")
        if 1 <= abs(in_alpha):
            raise ValueError("in_alpha must be in (-1, 1).")
        if 1 <= abs(out_alpha):
            raise ValueError("out_alpha must be in (-1, 1).")
        if 1 < abs(in_gamma):
            raise ValueError("in_gamma must be in [-1, 1].")
        if 1 < abs(out_gamma):
            raise ValueError("out_gamma must be in [-1, 1].")
        if n_fft <= max(in_order, out_order) + 1:
            raise ValueError("n_fft must be much larger then order of cepstrum.")
        if 0 == in_gamma and in_mul:
            raise ValueError("Invalid combination of in_gamma and in_mul.")

    @staticmethod
    def _precompute(
        in_order,
        out_order,
        in_alpha,
        out_alpha,
        in_gamma,
        out_gamma,
        in_norm,
        out_norm,
        in_mul,
        out_mul,
        n_fft,
    ):
        def choice(use_module, module, module_params, common_params):
            if use_module:
                return module(*module_params, *common_params)
            else:
                return lambda c: module._func(c, *common_params)

        MelGeneralizedCepstrumToMelGeneralizedCepstrum._check(
            in_order, out_order, in_alpha, out_alpha, in_gamma, out_gamma, in_mul, n_fft
        )
        module = inspect.stack()[1].function == "__init__"

        seq = []
        if not in_norm and in_mul:
            seq.append(choice(module, ZerothGammaDivision, [in_order], [in_gamma]))

        alpha = (out_alpha - in_alpha) / (1 - in_alpha * out_alpha)
        if 0 == alpha:
            if in_order == out_order and in_gamma == out_gamma:
                if not in_mul and out_mul:
                    seq.append(
                        choice(module, GammaMultiplication, [in_order], [in_gamma])
                    )
                if not in_norm and out_norm:
                    seq.append(
                        choice(module, GainNormalization, [in_order], [in_gamma])
                    )
                if in_norm and not out_norm:
                    seq.append(
                        choice(
                            module, InverseGainNormalization, [out_order], [out_gamma]
                        )
                    )
                if in_mul and not out_mul:
                    seq.append(choice(module, GammaDivision, [out_order], [out_gamma]))
            else:
                if in_mul:
                    seq.append(choice(module, GammaDivision, [in_order], [in_gamma]))
                if not in_norm:
                    seq.append(
                        choice(module, GainNormalization, [in_order], [in_gamma])
                    )
                if True:
                    seq.append(
                        choice(
                            module,
                            GeneralizedCepstrumToGeneralizedCepstrum,
                            [in_order],
                            [out_order, in_gamma, out_gamma, n_fft],
                        )
                    )
                if not out_norm:
                    seq.append(
                        choice(
                            module, InverseGainNormalization, [out_order], [out_gamma]
                        )
                    )
                if out_mul:
                    seq.append(
                        choice(module, GammaMultiplication, [out_order], [out_gamma])
                    )
        else:
            if in_mul:
                seq.append(choice(module, GammaDivision, [in_order], [in_gamma]))
            if in_norm:
                seq.append(
                    choice(module, InverseGainNormalization, [in_order], [in_gamma])
                )
            if True:
                seq.append(
                    choice(module, FrequencyTransform, [in_order], [out_order, alpha])
                )
            if out_norm or in_gamma != out_gamma:
                seq.append(choice(module, GainNormalization, [out_order], [in_gamma]))
            if in_gamma != out_gamma:
                seq.append(
                    choice(
                        module,
                        GeneralizedCepstrumToGeneralizedCepstrum,
                        [out_order],
                        [out_order, in_gamma, out_gamma, n_fft],
                    )
                )
            if not out_norm and in_gamma != out_gamma:
                seq.append(
                    choice(module, InverseGainNormalization, [out_order], [out_gamma])
                )
            if out_mul:
                seq.append(
                    choice(module, GammaMultiplication, [out_order], [out_gamma])
                )

        if not out_norm and out_mul:
            seq.append(
                choice(module, ZerothGammaMultiplication, [out_order], [out_gamma])
            )

        return None, (seq,), None

    @staticmethod
    def _forward(mc, seq):
        return seq(mc)



class GeneralizedCepstrumToGeneralizedCepstrum(nn.Module):
    def __init__(self, in_order, out_order, in_gamma, out_gamma, n_fft=512):
        super().__init__()

        self.in_order = in_order
        self.out_order = out_order
        self.in_gamma = in_gamma
        self.out_gamma = out_gamma
        self.n_fft = n_fft

    def forward(self, c):
        check_size(c.size(-1), self.in_order + 1, "dimension of cepstrum")
        return self._forward(
            c, self.out_order, self.in_gamma, self.out_gamma, self.n_fft
        )

    @staticmethod
    def _forward(c1, out_order, in_gamma, out_gamma, n_fft):
        c01 = F.pad(c1[..., 1:], (1, 0))
        C1 = torch.fft.fft(c01, n=n_fft)

        if in_gamma == 0:
            sC1 = cexp(C1)
        else:
            C1 *= in_gamma
            C1.real += 1
            r = C1.abs() ** (1 / in_gamma)
            theta = C1.angle() / in_gamma
            sC1 = torch.polar(r, theta)

        if out_gamma == 0:
            C2 = clog(sC1)
        else:
            r = sC1.abs() ** out_gamma
            theta = sC1.angle() * out_gamma
            C2 = (r * torch.cos(theta) - 1) / out_gamma

        c02 = torch.fft.ifft(C2).real[..., : out_order + 1]
        c2 = torch.cat((c1[..., :1], 2 * c02[..., 1:]), dim=-1)
        return c2

    _func = _forward


class GammaDivision(nn.Module):
    def __init__(self, cep_order, gamma):
        super().__init__()
        g = torch.full((cep_order + 1,), 1 / gamma)
        g[0] = 1
        self.register_buffer("g", to(g))

    def forward(self, c):
        return c * self.g

    @staticmethod
    def _func(c, gamma):
        c0, c1 = torch.split(c, [1, c.size(-1) - 1], dim=-1)
        return torch.cat((c0, c1 / gamma), dim=-1)


class GammaMultiplication(nn.Module):
    def __init__(self, cep_order, gamma):
        super().__init__()
        g = torch.full((cep_order + 1,), gamma)
        g[0] = 1
        self.register_buffer("g", to(g))

    def forward(self, c):
        return c * self.g

    @staticmethod
    def _func(c, gamma):
        c0, c1 = torch.split(c, [1, c.size(-1) - 1], dim=-1)
        return torch.cat((c0, c1 * gamma), dim=-1)


class ZerothGammaDivision(nn.Module):
    def __init__(self, cep_order, gamma):
        super().__init__()
        self.cep_order = cep_order
        self.g = 1 / gamma

    def forward(self, c):
        c0, c1 = torch.split(c, [1, self.cep_order], dim=-1)
        return torch.cat(((c0 - 1) * self.g, c1), dim=-1)

    @staticmethod
    def _func(c, gamma):
        c0, c1 = torch.split(c, [1, c.size(-1) - 1], dim=-1)
        return torch.cat(((c0 - 1) / gamma, c1), dim=-1)


class ZerothGammaMultiplication(nn.Module):
    def __init__(self, cep_order, gamma):
        super().__init__()
        self.cep_order = cep_order
        self.g = gamma

    def forward(self, c):
        c0, c1 = torch.split(c, [1, self.cep_order], dim=-1)
        return torch.cat((c0 * self.g + 1, c1), dim=-1)

    @staticmethod
    def _func(c, gamma):
        c0, c1 = torch.split(c, [1, c.size(-1) - 1], dim=-1)
        return torch.cat((c0 * gamma + 1, c1), dim=-1)