Source code for diffsptk.modules.pnorm

# ------------------------------------------------------------------------ #
# Copyright 2022 SPTK Working Group                                        #
#                                                                          #
# Licensed under the Apache License, Version 2.0 (the "License");          #
# you may not use this file except in compliance with the License.         #
# You may obtain a copy of the License at                                  #
#                                                                          #
#     http://www.apache.org/licenses/LICENSE-2.0                           #
#                                                                          #
# Unless required by applicable law or agreed to in writing, software      #
# distributed under the License is distributed on an "AS IS" BASIS,        #
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
# See the License for the specific language governing permissions and      #
# limitations under the License.                                           #
# ------------------------------------------------------------------------ #

import inspect

import torch
from torch import nn

from ..typing import Callable, Precomputed
from ..utils.private import filter_values, get_layer
from .base import BaseFunctionalModule
from .c2acr import CepstrumToAutocorrelation
from .freqt import FrequencyTransform



[docs]
class MelCepstrumPowerNormalization(BaseFunctionalModule):
    """See `this page <https://sp-nitech.github.io/sptk/latest/main/pnorm.html>`_
    for details.

    Parameters
    ----------
    cep_order : int >= 0
        The order of the cepstrum, :math:`M`.

    alpha : float in (-1, 1)
        The frequency warping factor, :math:`\\alpha`.

    ir_length : int >= 1
        The length of the impulse response.

    device : torch.device or None
        The device of this module.

    dtype : torch.dtype or None
        The data type of this module.

    """

    def __init__(
        self,
        cep_order: int,
        alpha: float = 0,
        ir_length: int = 128,
        device: torch.device | None = None,
        dtype: torch.dtype | None = None,
    ) -> None:
        super().__init__()

        _, layers, _ = self._precompute(**filter_values(locals()))
        self.layers = nn.ModuleList(layers)


[docs]
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """Perform mel-cepstrum power normalization.

        Parameters
        ----------
        x : Tensor [shape=(..., M+1)]
            The input mel-cepstrum.

        Returns
        -------
        out : Tensor [shape=(..., M+2)]
            The log power and power-normalized mel-cepstrum.

        Examples
        --------
        >>> import diffsptk
        >>> pnorm = diffsptk.MelCepstrumPowerNormalization(3, alpha=0.1)
        >>> x = diffsptk.ramp(1, 4)
        >>> y = pnorm(x)
        >>> y
        tensor([16.5884, -7.2942,  2.0000,  3.0000,  4.0000])

        """
        return self._forward(x, *self.layers)


    @staticmethod
    def _func(x: torch.Tensor, *args, **kwargs) -> torch.Tensor:
        _, layers, _ = MelCepstrumPowerNormalization._precompute(
            x.size(-1) - 1, *args, **kwargs, device=x.device, dtype=x.dtype
        )
        return MelCepstrumPowerNormalization._forward(x, *layers)

    @staticmethod
    def _takes_input_size() -> bool:
        return True

    @staticmethod
    def _check() -> None:
        pass

    @staticmethod
    def _precompute(
        cep_order: int,
        alpha: float,
        ir_length: int,
        device: torch.device | None,
        dtype: torch.dtype | None,
    ) -> Precomputed:
        MelCepstrumPowerNormalization._check()
        module = inspect.stack()[1].function != "_func"

        freqt = get_layer(
            module,
            FrequencyTransform,
            dict(
                in_order=cep_order,
                out_order=ir_length - 1,
                alpha=-alpha,
                device=device,
                dtype=dtype,
            ),
        )
        c2acr = get_layer(
            module,
            CepstrumToAutocorrelation,
            dict(
                cep_order=ir_length - 1,
                acr_order=0,
                n_fft=ir_length,
            ),
        )
        return None, (freqt, c2acr), None

    @staticmethod
    def _forward(x: torch.Tensor, freqt: Callable, c2acr: Callable) -> torch.Tensor:
        x0, x1 = torch.split(x, [1, x.size(-1) - 1], dim=-1)
        P = torch.log(c2acr(freqt(x)))
        y = torch.cat((P, x0 - 0.5 * P, x1), dim=-1)
        return y