Source code for diffsptk.modules.mdct
# ------------------------------------------------------------------------ #
# Copyright 2022 SPTK Working Group #
# #
# Licensed under the Apache License, Version 2.0 (the "License"); #
# you may not use this file except in compliance with the License. #
# You may obtain a copy of the License at #
# #
# http://www.apache.org/licenses/LICENSE-2.0 #
# #
# Unless required by applicable law or agreed to in writing, software #
# distributed under the License is distributed on an "AS IS" BASIS, #
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
# See the License for the specific language governing permissions and #
# limitations under the License. #
# ------------------------------------------------------------------------ #
import torch
import torch.nn.functional as F
from torch import nn
from ..typing import Callable, Precomputed
from ..utils.private import check_size, get_layer, get_values, to
from .base import BaseFunctionalModule
from .frame import Frame
from .window import Window
LEARNABLES = ("basis", "window")
[docs]
class ModifiedDiscreteCosineTransform(BaseFunctionalModule):
"""This module is a simple cascade of framing, windowing, and modified DCT.
Parameters
----------
frame_length : int >= 2
The frame length, :math:`L`.
window : ['sine', 'vorbis', 'kbd', 'rectangular']
The window type.
learnable : bool or list[str]
Indicates whether the parameters are learnable. If a boolean, it specifies
whether all parameters are learnable. If a list, it contains the keys of the
learnable parameters, which can only be "basis" and "window".
"""
def __init__(
self,
frame_length: int,
window: str = "sine",
learnable: bool | list[str] = False,
) -> None:
super().__init__()
self.values, layers, _ = self._precompute(*get_values(locals(), full=True))
self.layers = nn.ModuleList(layers)
[docs]
def forward(self, x: torch.Tensor) -> torch.Tensor:
"""Compute modified discrete cosine transform.
Parameters
----------
x : Tensor [shape=(..., T)]
The input waveform.
Returns
-------
out : Tensor [shape=(..., 2T/L, L/2)]
The spectrum.
Examples
--------
>>> x = diffsptk.ramp(3)
>>> x
tensor([0., 1., 2., 3.])
>>> mdct = diffsptk.MDCT(frame_length=4)
>>> y = mdct(x)
>>> y
tensor([[-0.3536, -0.1464],
[-3.1213, -0.2929],
[-0.7678, 1.8536]])
"""
return self._forward(x, *self.values, *self.layers)
@staticmethod
def _func(x: torch.Tensor, *args, **kwargs) -> torch.Tensor:
values, layers, _ = ModifiedDiscreteCosineTransform._precompute(
*args, **kwargs, module=False
)
return ModifiedDiscreteCosineTransform._forward(x, *values, *layers)
@staticmethod
def _takes_input_size() -> bool:
return False
@staticmethod
def _check(learnable: bool | list[str]) -> None:
if isinstance(learnable, (tuple, list)):
if any(x not in LEARNABLES for x in learnable):
raise ValueError("An unsupported key is found in learnable.")
elif not isinstance(learnable, bool):
raise ValueError("learnable must be boolean or list.")
@staticmethod
def _precompute(
frame_length: int,
window: str,
learnable: bool | list[str] = False,
transform: str = "cosine",
module: bool = True,
) -> Precomputed:
ModifiedDiscreteCosineTransform._check(learnable)
frame_period = frame_length // 2
if learnable is True:
learnable = LEARNABLES
elif learnable is False:
learnable = ()
frame = get_layer(
module,
Frame,
dict(
frame_length=frame_length,
frame_period=frame_period,
),
)
window_ = get_layer(
module,
Window,
dict(
in_length=frame_length,
out_length=None,
window=window,
norm="none",
learnable="window" in learnable,
),
)
mdt = get_layer(
module,
ModifiedDiscreteTransform,
dict(
length=frame_length,
window=window,
transform=transform,
learnable="basis" in learnable,
),
)
return (frame_period,), (frame, window_, mdt), None
@staticmethod
def _forward(
x: torch.Tensor,
frame_period: int,
frame: Callable,
window: Callable,
mdt: Callable,
) -> torch.Tensor:
# This padding is for perfect reconstruction.
x = F.pad(x, (0, frame_period))
return mdt(window(frame(x)))
class ModifiedDiscreteTransform(BaseFunctionalModule):
"""Oddly stacked modified discrete cosine/sine transform module.
Parameters
----------
length : int >= 2
The input length, :math:`L`.
window : str
The window type used to determine whether it is rectangular or not.
transform : ['cosine', 'sine']
The transform type.
learnable : bool
Whether to make the DCT matrix learnable.
"""
def __init__(
self,
length: int,
window: str,
transform: str = "cosine",
learnable: bool = False,
) -> None:
super().__init__()
self.in_dim = length
_, _, tensors = self._precompute(*get_values(locals()))
if learnable:
self.W = nn.Parameter(tensors[0])
else:
self.register_buffer("W", tensors[0])
def forward(self, x: torch.Tensor) -> torch.Tensor:
"""Apply MDCT/MDST to the input.
Parameters
----------
x : Tensor [shape=(..., L)]
The input.
Returns
-------
out : Tensor [shape=(..., L/2)]
The output.
"""
check_size(x.size(-1), self.in_dim, "dimension of input")
return self._forward(x, **self._buffers, **self._parameters)
@staticmethod
def _func(x: torch.Tensor, *args, **kwargs) -> torch.Tensor:
_, _, tensors = ModifiedDiscreteTransform._precompute(
x.size(-1), *args, **kwargs, device=x.device, dtype=x.dtype
)
return ModifiedDiscreteTransform._forward(x, *tensors)
@staticmethod
def _takes_input_size() -> bool:
return True
@staticmethod
def _check(length: int) -> None:
if length < 2 or length % 2 == 1:
raise ValueError("length must be at least 2 and even.")
@staticmethod
def _precompute(
length: int,
window: str,
transform: str = "cosine",
device: torch.device | None = None,
dtype: torch.dtype | None = None,
) -> Precomputed:
ModifiedDiscreteTransform._check(length)
L2 = length
L = L2 // 2
n = torch.arange(L2, device=device, dtype=torch.double) + 0.5
k = (torch.pi / L) * n[:L]
n += L / 2
z = 2 / L
if window != "rectangular":
z *= 2
z **= 0.5
if transform == "cosine":
W = z * torch.cos(k.unsqueeze(0) * n.unsqueeze(1))
elif transform == "sine":
W = z * torch.sin(k.unsqueeze(0) * n.unsqueeze(1))
else:
raise ValueError("transform must be either 'cosine' or 'sine'.")
return None, None, (to(W, dtype=dtype),)
@staticmethod
def _forward(x: torch.Tensor, W: torch.Tensor) -> torch.Tensor:
return torch.matmul(x, W)