Source code for diffsptk.modules.griffin

# ------------------------------------------------------------------------ #
# Copyright 2022 SPTK Working Group                                        #
#                                                                          #
# Licensed under the Apache License, Version 2.0 (the "License");          #
# you may not use this file except in compliance with the License.         #
# You may obtain a copy of the License at                                  #
#                                                                          #
#     http://www.apache.org/licenses/LICENSE-2.0                           #
#                                                                          #
# Unless required by applicable law or agreed to in writing, software      #
# distributed under the License is distributed on an "AS IS" BASIS,        #
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
# See the License for the specific language governing permissions and      #
# limitations under the License.                                           #
# ------------------------------------------------------------------------ #

import inspect
import logging

import torch
from torch import nn

from ..typing import Callable, Precomputed
from ..utils.private import TAU, filter_values, get_layer, get_logger
from .base import BaseFunctionalModule
from .istft import InverseShortTimeFourierTransform
from .stft import ShortTimeFourierTransform


[docs] class GriffinLim(BaseFunctionalModule): """Griffin-Lim phase reconstruction module. Parameters ---------- frame_length : int >= 1 The frame length in samples, :math:`L`. frame_period : int >= 1 The frame period in samples, :math:`P`. fft_length : int >= L The number of FFT bins, :math:`N`. center : bool If True, pad the input on both sides so that the frame is centered. window : ['blackman', 'hamming', 'hanning', 'bartlett', 'trapezoidal', \ 'rectangular', 'nuttall'] The window type. norm : ['none', 'power', 'magnitude'] The normalization type of the window. symmetric : bool If True, the window is symmetric, otherwise periodic. n_iter : int >= 0 The number of iterations for phase reconstruction. alpha : float >= 0 The momentum factor, :math:`\\alpha`. beta : float >= 0 The momentum factor, :math:`\\beta`. gamma : float >= 0 The smoothing factor, :math:`\\gamma`. init_phase : ['zeros', 'random'] The initial phase for the reconstruction. verbose : bool If True, print the SNR at each iteration. device : torch.device or None The device of this module. dtype : torch.dtype or None The data type of this module. References ---------- .. [1] R. Nenov et al., "Faster than fast: Accelerating the Griffin-Lim algorithm," *Proceedings of ICASSP*, 2023. """ def __init__( self, frame_length: int, frame_period: int, fft_length: int, *, center: bool = True, mode: str = "constant", window: str = "blackman", norm: str = "power", symmetric: bool = True, n_iter: int = 100, alpha: float = 0.99, beta: float = 0.99, gamma: float = 1.1, init_phase: str = "random", verbose: bool = False, device: torch.device | None = None, dtype: torch.dtype | None = None, ) -> None: super().__init__() self.values, layers, _ = self._precompute(**filter_values(locals())) self.layers = nn.ModuleList(layers)
[docs] def forward(self, y: torch.Tensor, out_length: int | None = None) -> torch.Tensor: """Reconstruct a waveform from the spectrum using the Griffin-Lim algorithm. Parameters ---------- y : Tensor [shape=(..., T/P, N/2+1)] The power spectrum. out_length : int > 0 or None The length of the output waveform. Returns ------- out : Tensor [shape=(..., T)] The reconstructed waveform. Examples -------- >>> import diffsptk >>> stft_params = {"frame_length": 3, "frame_period": 1, "fft_length": 8} >>> stft = diffsptk.STFT(**stft_params, out_format="power") >>> griffin = diffsptk.GriffinLim(**stft_params, n_iter=10, init_phase="zeros") >>> x = diffsptk.ramp(1, 3) >>> x tensor([1., 2., 3.]) >>> y = griffin(stft(x), out_length=3).round() >>> y tensor([-1., -2., -3.]) """ return self._forward(y, out_length, *self.values, *self.layers)
@staticmethod def _func(y: torch.Tensor, out_length: int | None, *args, **kwargs) -> torch.Tensor: values, layers, _ = GriffinLim._precompute( *args, **kwargs, device=y.device, dtype=y.dtype ) return GriffinLim._forward(y, out_length, *values, *layers) @staticmethod def _takes_input_size() -> bool: return False @staticmethod def _check( n_iter: int, alpha: float, beta: float, gamma: float, ) -> None: if n_iter < 0: raise ValueError("n_iter must be non-negative.") if alpha < 0: raise ValueError("alpha must be non-negative.") if beta < 0: raise ValueError("beta must be non-negative.") if gamma < 0: raise ValueError("gamma must be non-negative.") @staticmethod def _precompute( frame_length: int, frame_period: int, fft_length: int, center: bool, mode: str, window: str, norm: str, symmetric: bool, n_iter: int, alpha: float, beta: float, gamma: float, init_phase: str, verbose: bool, device: torch.device | None, dtype: torch.dtype | None, ) -> Precomputed: GriffinLim._check(n_iter, alpha, beta, gamma) module = inspect.stack()[1].function != "_func" if init_phase == "zeros": phase_generator = lambda x: torch.zeros_like(x) elif init_phase == "random": phase_generator = lambda x: TAU * torch.rand_like(x) else: raise ValueError(f"init_phase: {init_phase} is not supported.") if verbose: logger = get_logger("griffin") else: logger = None stft = get_layer( module, ShortTimeFourierTransform, dict( frame_length=frame_length, frame_period=frame_period, fft_length=fft_length, center=center, zmean=False, mode=mode, window=window, norm=norm, symmetric=symmetric, eps=0, relative_floor=None, out_format="complex", device=device, dtype=dtype, ), ) istft = get_layer( module, InverseShortTimeFourierTransform, dict( frame_length=frame_length, frame_period=frame_period, fft_length=fft_length, center=center, window=window, norm=norm, symmetric=symmetric, device=device, dtype=dtype, ), ) return ( (n_iter, alpha, beta, gamma, phase_generator, logger), (stft, istft), None, ) @staticmethod def _forward( y: torch.Tensor, out_length: int | None, n_iter: int, alpha: float, beta: float, gamma: float, phase_generator: Callable, logger: logging.Logger | None, stft: Callable, istft: Callable, ) -> torch.Tensor: if logger is not None: logger.info(f"alpha: {alpha}, beta: {beta}, gamma: {gamma}") eps = 1e-16 s = torch.sqrt(y + eps) angle = torch.exp(1j * phase_generator(s)) t_prev = d_prev = 0 # This suppresses F821 and F841. for n in range(n_iter): t = stft(istft(s * angle, out_length=out_length)) t = t[..., : s.shape[-2], :] if 0 == n: c = d = t else: t = (1 - gamma) * d_prev + gamma * t diff = t - t_prev c = t + alpha * diff d = t + beta * diff angle = c / (c.abs() + eps) t_prev = t d_prev = d if logger is not None: snr = -10 * torch.log10( torch.linalg.norm(c.abs() - s) / torch.linalg.norm(s) ) logger.info(f" iter {n + 1:5d}: SNR = {snr:g}") return istft(s * angle, out_length=out_length)