Source code for diffsptk.modules.lsp2sp

# ------------------------------------------------------------------------ #
# Copyright 2022 SPTK Working Group                                        #
#                                                                          #
# Licensed under the Apache License, Version 2.0 (the "License");          #
# you may not use this file except in compliance with the License.         #
# You may obtain a copy of the License at                                  #
#                                                                          #
#     http://www.apache.org/licenses/LICENSE-2.0                           #
#                                                                          #
# Unless required by applicable law or agreed to in writing, software      #
# distributed under the License is distributed on an "AS IS" BASIS,        #
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
# See the License for the specific language governing permissions and      #
# limitations under the License.                                           #
# ------------------------------------------------------------------------ #

import numpy as np
import torch
from torch import nn

from ..misc.utils import check_size
from ..misc.utils import to

LOG_ZERO = -1.0e10



[docs]
class LineSpectralPairsToSpectrum(nn.Module):
    """See `this page <https://sp-nitech.github.io/sptk/latest/main/mglsp2sp.html>`_
    for details.

    Parameters
    ----------
    lsp_order : int >= 0
        Order of line spectral pairs, :math:`M`.

    fft_length : int >= 1
        Number of FFT bins, :math:`L`.

    alpha : float in (-1, 1)
        Warping factor, :math:`\\alpha`.

    gamma : float in [-1, 0)
        Gamma, :math:`\\gamma`.

    log_gain : bool
        If True, assume input gain is in log scale.

    out_format : ['db', 'log-magnitude', 'magnitude', 'power']
        Output format.

    References
    ----------
    .. [1] A. V. Oppenheim et al., "Discrete representation of signals," *Proceedings of
           the IEEE*, vol. 60, no. 6, pp. 681-691, 1972.

    .. [2] N. Sugamura et al., "Speech data compression by LSP speech analysis-synthesis
           technique," *IEICE trans*, vol. J64-A, no. 8, pp. 599-606, 1981.

    """

    def __init__(
        self,
        lsp_order,
        fft_length,
        alpha=0,
        gamma=-1,
        log_gain=False,
        out_format="power",
    ):
        super().__init__()

        assert 0 <= lsp_order
        assert 1 <= fft_length
        assert abs(alpha) < 1
        assert -1 <= gamma < 0

        self.lsp_order = lsp_order
        self.log_gain = log_gain
        self.formatter = self._formatter(out_format)

        cos_omega, p_bias, q_bias, self.c1, self.c2 = self._precompute(
            lsp_order, fft_length, alpha, gamma
        )
        self.register_buffer("cos_omega", cos_omega)
        self.register_buffer("p_bias", p_bias)
        self.register_buffer("q_bias", q_bias)


[docs]
    def forward(self, w):
        """Convert line spectral pairs to spectrum.

        Parameters
        ----------
        w : Tensor [shape=(..., M+1)]
            Line spectral pairs in radians.

        Returns
        -------
        out : Tensor [shape=(..., L/2+1)]
            Spectrum.

        Examples
        --------
        >>> x = diffsptk.nrand(4)
        >>> x
        tensor([ 2.1110, -1.4767,  1.2490,  2.4201,  1.5429])
        >>> lpc = diffsptk.LPC(3, 5)
        >>> a = lpc(x)
        >>> lpc2lsp = diffsptk.LinearPredictiveCoefficientsToLineSpectralPairs(3)
        >>> w = lpc2lsp(a)
        >>> lsp2sp = diffsptk.LineSpectralPairsToSpectrum(3, 8)
        >>> sp = lsp2sp(w)
        >>> sp
        tensor([31.3541, 13.7932, 14.7454, 16.9510, 10.4759])

        """
        check_size(w.size(-1), self.lsp_order + 1, "dimension of LSP")
        return self._forward(
            w,
            self.log_gain,
            self.formatter,
            self.cos_omega,
            self.p_bias,
            self.q_bias,
            self.c1,
            self.c2,
        )


    @staticmethod
    def _forward(w, log_gain, formatter, cos_omega, p_bias, q_bias, c1, c2):
        def floor_log(x):
            return torch.clip(torch.log(x), min=LOG_ZERO)

        K, w = torch.split(w, [1, w.size(-1) - 1], dim=-1)
        if not log_gain:
            K = floor_log(K)

        cos_w = torch.cos(w).unsqueeze(-2)
        pq = floor_log(torch.abs(cos_omega - cos_w))  # [..., L/2+1, M]
        p = pq[..., 1::2].sum(-1)
        q = pq[..., 0::2].sum(-1)
        r = torch.logsumexp(2 * torch.stack([p + p_bias, q + q_bias], dim=-1), dim=-1)
        sp = K + c1 * (c2 + r)
        sp = formatter(sp)
        return sp

    @staticmethod
    def _func(w, fft_length, alpha, gamma, log_gain, out_format):
        formatter = LineSpectralPairsToSpectrum._formatter(out_format)
        precomputes = LineSpectralPairsToSpectrum._precompute(
            w.size(-1) - 1, fft_length, alpha, gamma, dtype=w.dtype, device=w.device
        )
        return LineSpectralPairsToSpectrum._forward(
            w, log_gain, formatter, *precomputes
        )

    @staticmethod
    def _precompute(lsp_order, fft_length, alpha, gamma, dtype=None, device=None):
        omega = torch.linspace(
            0, torch.pi, fft_length // 2 + 1, dtype=torch.double, device=device
        )
        warped_omega = omega + 2 * torch.atan(
            alpha * torch.sin(omega) / (1 - alpha * torch.cos(omega))
        )
        cos_omega = torch.cos(warped_omega).view(-1, 1)
        cos_omega = to(cos_omega, dtype=dtype)

        def floor_log(x):
            return torch.nan_to_num(torch.log(x), nan=LOG_ZERO, neginf=LOG_ZERO)

        if lsp_order % 2 == 0:
            p = floor_log(torch.sin(0.5 * warped_omega))
            q = floor_log(torch.cos(0.5 * warped_omega))
        else:
            p = floor_log(torch.sin(warped_omega))
            q = torch.zeros_like(warped_omega)
        p_bias = to(p, dtype=dtype)
        q_bias = to(q, dtype=dtype)

        c1 = 0.5 / gamma
        c2 = np.log(2) * (lsp_order if lsp_order % 2 == 0 else (lsp_order - 1))
        return cos_omega, p_bias, q_bias, c1, c2

    @staticmethod
    def _formatter(out_format):
        if out_format in (0, "db"):
            c = 20 / np.log(10)
            return lambda x: x * c
        elif out_format in (1, "log-magnitude"):
            return lambda x: x
        elif out_format in (2, "magnitude"):
            return lambda x: torch.exp(x)
        elif out_format in (3, "power"):
            return lambda x: torch.exp(2 * x)
        raise ValueError(f"out_format {out_format} is not supported.")