# ------------------------------------------------------------------------ #
# Copyright 2022 SPTK Working Group #
# #
# Licensed under the Apache License, Version 2.0 (the "License"); #
# you may not use this file except in compliance with the License. #
# You may obtain a copy of the License at #
# #
# http://www.apache.org/licenses/LICENSE-2.0 #
# #
# Unless required by applicable law or agreed to in writing, software #
# distributed under the License is distributed on an "AS IS" BASIS, #
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
# See the License for the specific language governing permissions and #
# limitations under the License. #
# ------------------------------------------------------------------------ #
import inspect
import torch
from torch import nn
from ..typing import Callable, Precomputed
from ..utils.private import get_layer, get_values, remove_gain
from .base import BaseFunctionalModule
from .fftr import RealValuedFastFourierTransform
[docs]
class Spectrum(BaseFunctionalModule):
"""See `this page <https://sp-nitech.github.io/sptk/latest/main/spec.html>`_
for details.
Parameters
----------
fft_length : int >= 2
The number of FFT bins, :math:`L`.
eps : float >= 0
A small value added to the power spectrum.
relative_floor : float < 0 or None
The relative floor of the power spectrum in dB.
out_format : ['db', 'log-magnitude', 'magnitude', 'power']
The output format.
learnable : bool
Whether to make the DFT basis learnable.
"""
def __init__(
self,
fft_length: int,
*,
eps: float = 0,
relative_floor: float | None = None,
out_format: str | int = "power",
learnable: bool = False,
) -> None:
super().__init__()
self.values, layers, _ = self._precompute(*get_values(locals()))
self.layers = nn.ModuleList(layers)
[docs]
def forward(
self, b: torch.Tensor | None = None, a: torch.Tensor | None = None
) -> torch.Tensor:
"""Compute spectrum.
Parameters
----------
b : Tensor [shape=(..., M+1)] or None
The numerator coefficients.
a : Tensor [shape=(..., N+1)] or None
The denominator coefficients.
Returns
-------
out : Tensor [shape=(..., L/2+1)]
The spectrum.
Examples
--------
>>> x = diffsptk.ramp(1, 3)
>>> x
tensor([1., 2., 3.])
>>> spec = diffsptk.Spectrum(8)
>>> y = spec(x)
>>> y
tensor([36.0000, 25.3137, 8.0000, 2.6863, 4.0000])
"""
return self._forward(b, a, *self.values, *self.layers)
@staticmethod
def _func(
b: torch.Tensor | None = None, a: torch.Tensor | None = None, *args, **kwargs
) -> torch.Tensor:
values, layers, _ = Spectrum._precompute(*args, **kwargs)
return Spectrum._forward(b, a, *values, *layers)
@staticmethod
def _takes_input_size() -> bool:
return False
@staticmethod
def _check(fft_length: int, eps: float, relative_floor: float | None) -> None:
if fft_length <= 1:
raise ValueError("fft_length must be greater than 1.")
if eps < 0:
raise ValueError("eps must be non-negative.")
if relative_floor is not None and 0 <= relative_floor:
raise ValueError("relative_floor must be negative.")
@staticmethod
def _precompute(
fft_length: int,
eps: float,
relative_floor: float | None,
out_format: str | int,
learnable: bool = False,
) -> Precomputed:
Spectrum._check(fft_length, eps, relative_floor)
module = inspect.stack()[1].function == "__init__"
if relative_floor is not None:
relative_floor = 10 ** (relative_floor / 10)
if out_format in (0, "db"):
formatter = lambda x: 10 * torch.log10(x)
elif out_format in (1, "log-magnitude"):
formatter = lambda x: 0.5 * torch.log(x)
elif out_format in (2, "magnitude"):
formatter = lambda x: torch.sqrt(x)
elif out_format in (3, "power"):
formatter = lambda x: x
else:
raise ValueError(f"out_format {out_format} is not supported.")
fftr = get_layer(
module,
RealValuedFastFourierTransform,
dict(
fft_length=fft_length,
out_format="amplitude",
learnable=learnable,
),
)
return (eps, relative_floor, formatter), (fftr,), None
@staticmethod
def _forward(
b: torch.Tensor | None,
a: torch.Tensor | None,
eps: float,
relative_floor: float | None,
formatter: Callable,
fftr: Callable,
) -> torch.Tensor:
if b is None and a is None:
raise ValueError("Either b or a must be specified.")
if b is not None:
B = fftr(b)
if a is not None:
K, a = remove_gain(a, return_gain=True)
A = fftr(a)
if b is None:
X = K / A
elif a is None:
X = B
else:
X = K * (B / A)
s = torch.square(X) + eps
if relative_floor is not None:
m = torch.amax(s, dim=-1, keepdim=True)
s = torch.maximum(s, m * relative_floor)
s = formatter(s)
return s