Source code for diffsptk.modules.delta

# ------------------------------------------------------------------------ #
# Copyright 2022 SPTK Working Group                                        #
#                                                                          #
# Licensed under the Apache License, Version 2.0 (the "License");          #
# you may not use this file except in compliance with the License.         #
# You may obtain a copy of the License at                                  #
#                                                                          #
#     http://www.apache.org/licenses/LICENSE-2.0                           #
#                                                                          #
# Unless required by applicable law or agreed to in writing, software      #
# distributed under the License is distributed on an "AS IS" BASIS,        #
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
# See the License for the specific language governing permissions and      #
# limitations under the License.                                           #
# ------------------------------------------------------------------------ #

import torch
import torch.nn.functional as F

from ..typing import ArrayLike, Precomputed
from ..utils.private import filter_values, to
from .base import BaseFunctionalModule



[docs]
class Delta(BaseFunctionalModule):
    """See `this page <https://sp-nitech.github.io/sptk/latest/main/delta.html>`_
    for details.

    Parameters
    ----------
    seed : list[list[float]] or list[int]
        The delta coefficients or the width(s) of 1st (and 2nd) regression coefficients.

    static_out : bool
        If False, outputs only the delta components.

    device : torch.device or None
        The device of this module.

    dtype : torch.dtype or None
        The data type of this module.

    """

    def __init__(
        self,
        seed: ArrayLike[ArrayLike[float]] | ArrayLike[int] = [
            [-0.5, 0, 0.5],
            [1, -2, 1],
        ],
        static_out: bool = True,
        device: torch.device | None = None,
        dtype: torch.dtype | None = None,
    ) -> None:
        super().__init__()

        _, _, tensors = self._precompute(**filter_values(locals()))
        self.register_buffer("window", tensors[0])


[docs]
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """Compute the delta components.

        Parameters
        ----------
        x : Tensor [shape=(B, T, D) or (T, D)]
            The static components.

        Returns
        -------
        out : Tensor [shape=(B, T, DxH) or (T, DxH)]
            The delta (and static) components.

        Examples
        --------
        >>> x = diffsptk.ramp(1, 8).view(1, -1, 2)
        >>> x
        tensor([[[1., 2.],
                 [3., 4.],
                 [5., 6.],
                 [7., 8.]]])
        >>> delta = diffsptk.Delta([[-0.5, 0], [0, 0, 0.5]])
        >>> y = delta(x)
        >>> y
        tensor([[[ 1.0000,  2.0000, -0.5000, -1.0000,  1.5000,  2.0000],
                 [ 3.0000,  4.0000, -0.5000, -1.0000,  2.5000,  3.0000],
                 [ 5.0000,  6.0000, -1.5000, -2.0000,  3.5000,  4.0000],
                 [ 7.0000,  8.0000, -2.5000, -3.0000,  3.5000,  4.0000]]])

        """
        return self._forward(x, self.window)


    @staticmethod
    def _func(x: torch.Tensor, *args, **kwargs) -> torch.Tensor:
        _, _, tensors = Delta._precompute(
            *args, **kwargs, device=x.device, dtype=x.dtype
        )
        return Delta._forward(x, *tensors)

    @staticmethod
    def _takes_input_size() -> bool:
        return False

    @staticmethod
    def _check(seed: ArrayLike[ArrayLike[float]] | ArrayLike[int]) -> None:
        if not isinstance(seed, (tuple, list)):
            raise ValueError("seed must be tuple or list.")

    @staticmethod
    def _precompute(
        seed: ArrayLike[ArrayLike[float]] | ArrayLike[int],
        static_out: bool,
        device: torch.device | None,
        dtype: torch.dtype | None,
    ) -> Precomputed:
        Delta._check(seed)

        if isinstance(seed[0], (tuple, list)):
            # Make window from delta coefficients.
            if static_out:
                seed = [[1]] + list(seed)

            max_len = max([len(coefficients) for coefficients in seed])
            if max_len % 2 == 0:
                max_len += 1

            window = []
            for coefficients in seed:
                diff = max_len - len(coefficients)
                if diff % 2 == 0:
                    left_pad = diff // 2
                    right_pad = diff // 2
                else:
                    left_pad = (diff - 1) // 2
                    right_pad = (diff + 1) // 2
                w = torch.tensor(
                    [0] * left_pad + coefficients + [0] * right_pad,
                    device=device,
                    dtype=torch.double,
                )
                window.append(w)
        else:
            # Make window from width of regression coefficients.
            if min(seed) <= 0:
                raise ValueError(
                    "The width of regression coefficients must be positive."
                )
            max_len = max(seed) * 2 + 1

            window = []
            if static_out:
                w = torch.zeros(max_len, device=device, dtype=torch.double)
                w[(max_len - 1) // 2] = 1
                window.append(w)

            # Compute 1st order coefficients.
            if True:
                n = seed[0]
                z = 1 / (n * (n + 1) * (2 * n + 1) / 3)
                j = torch.arange(-n, n + 1, device=device, dtype=torch.double)
                pad_width = (max_len - (n * 2 + 1)) // 2
                window.append(F.pad(j * z, (pad_width, pad_width)))

            # Compute 2nd order coefficients.
            if 2 <= len(seed):
                n = seed[1]
                a0 = 2 * n + 1
                a1 = a0 * n * (n + 1) / 3
                a2 = a1 * (3 * n * n + 3 * n - 1) / 5
                z = 1 / (2 * (a2 * a0 - a1 * a1))
                j = torch.arange(-n, n + 1, device=device, dtype=torch.double)
                pad_width = (max_len - (n * 2 + 1)) // 2
                window.append(F.pad((a0 * j * j - a1) * z, (pad_width, pad_width)))

            if 3 <= len(seed):
                raise ValueError("3rd order regression is not supported.")

        window = torch.stack(window)  # (H, W)
        return None, None, (to(window, dtype=dtype),)

    @staticmethod
    def _forward(x: torch.Tensor, window: torch.Tensor) -> torch.Tensor:
        d = x.dim()
        if d == 2:
            x = x.unsqueeze(0)
        if x.dim() != 3:
            raise ValueError("Input must be 2D or 3D tensor.")

        B, T, _ = x.shape
        W = window.size(-1)
        pad_width = (W - 1) // 2

        x = x.unsqueeze(1)
        x = F.pad(x, (0, 0, pad_width, pad_width), mode="replicate")
        w = window.view(-1, 1, W, 1)
        y = F.conv2d(x, w, padding="valid")  # (B, H, T, D)
        y = y.permute(0, 2, 1, 3)
        y = y.reshape(B, T, -1)

        if d == 2:
            y = y.squeeze(0)
        return y