Source code for diffsptk.functional

# ------------------------------------------------------------------------ #
# Copyright 2022 SPTK Working Group                                        #
#                                                                          #
# Licensed under the Apache License, Version 2.0 (the "License");          #
# you may not use this file except in compliance with the License.         #
# You may obtain a copy of the License at                                  #
#                                                                          #
#     http://www.apache.org/licenses/LICENSE-2.0                           #
#                                                                          #
# Unless required by applicable law or agreed to in writing, software      #
# distributed under the License is distributed on an "AS IS" BASIS,        #
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
# See the License for the specific language governing permissions and      #
# limitations under the License.                                           #
# ------------------------------------------------------------------------ #

from torch import Tensor

from . import modules as nn
from .typing import ArrayLike


[docs] def acorr(x: Tensor, acr_order: int, out_format: str = "naive") -> Tensor: """Estimate the autocorrelation of the input waveform. Parameters ---------- x : Tensor [shape=(..., L)] The framed waveform. acr_order : int >= 0 The order of the autocorrelation, :math:`M`. out_format : ['naive', 'normalized', 'biased'] The type of the autocorrelation. Returns ------- out : Tensor [shape=(..., M+1)] The autocorrelation. """ return nn.Autocorrelation._func(x, acr_order=acr_order, out_format=out_format)
[docs] def acr2csm(r: Tensor) -> Tensor: """Convert autocorrelation to CSM coefficients. Parameters ---------- r : Tensor [shape=(..., M+1)] The autocorrelation. Returns ------- out : Tensor [shape=(..., M+1)] The CSM coefficients. """ return nn.AutocorrelationToCompositeSinusoidalModelCoefficients._func(r)
[docs] def alaw(x: Tensor, abs_max: float = 1, a: float = 87.6) -> Tensor: """Compress the input waveform using the A-law algorithm. Parameters ---------- x : Tensor [shape=(...,)] The input waveform. abs_max : float > 0 The absolute maximum value of the input waveform. a : float >= 1 The compression factor, :math:`A`. Returns ------- out : Tensor [shape=(...,)] The compressed waveform. """ return nn.ALawCompression._func(x, abs_max=abs_max, a=a)
[docs] def b2mc(b: Tensor, alpha: float = 0) -> Tensor: """Convert MLSA filter coefficients to mel-cepstrum. Parameters ---------- b : Tensor [shape=(..., M+1)] The MLSA filter coefficients. alpha : float in (-1, 1) The frequency warping factor, :math:`\\alpha`. Returns ------- out : Tensor [shape=(..., M+1)] The mel-cepstral coefficients. """ return nn.MLSADigitalFilterCoefficientsToMelCepstrum._func(b, alpha=alpha)
[docs] def c2acr(c: Tensor, acr_order: int, n_fft: int = 512) -> Tensor: """Convert cepstrum to autocorrelation. Parameters ---------- c : Tensor [shape=(..., M+1)] The cepstral coefficients. acr_order : int >= 0 The order of the autocorrelation, :math:`N`. n_fft : int >> N The number of FFT bins used for conversion. Returns ------- out : Tensor [shape=(..., N+1)] The autocorrelation. """ return nn.CepstrumToAutocorrelation._func(c, acr_order=acr_order, n_fft=n_fft)
[docs] def c2mpir(c: Tensor, ir_length: int, n_fft: int = 512) -> Tensor: """Convert cepstrum to minimum phase impulse response. Parameters ---------- c : Tensor [shape=(..., M+1)] The cepstral coefficients. ir_length : int >= 1 The length of the impulse response, :math:`N`. n_fft : int >> N The number of FFT bins used for conversion. Returns ------- out : Tensor [shape=(..., N)] The truncated minimum phase impulse response. """ return nn.CepstrumToMinimumPhaseImpulseResponse._func( c, ir_length=ir_length, n_fft=n_fft )
[docs] def c2ndps(c: Tensor, fft_length: int) -> Tensor: """Convert cepstrum to NDPS. Parameters ---------- c : Tensor [shape=(..., M+1)] The cepstrum. fft_length : int >= 2 The number of FFT bins, :math:`L`. Returns ------- out : Tensor [shape=(..., L/2+1)] The NDPS. """ return nn.CepstrumToNegativeDerivativeOfPhaseSpectrum._func( c, fft_length=fft_length )
[docs] def cdist( c1: Tensor, c2: Tensor, full: bool = False, reduction: str = "mean" ) -> Tensor: """Calculate the cepstral distance between two inputs. Parameters ---------- c1 : Tensor [shape=(..., M+1)] The input cepstral coefficients. c2 : Tensor [shape=(..., M+1)] The target cepstral coefficients. full : bool If True, include the constant term in the distance calculation. reduction : ['none', 'mean', 'batchmean', 'sum'] The reduction type. Returns ------- out : Tensor [shape=(...,) or scalar] The cepstral distance. """ return nn.CepstralDistance._func(c1, c2, full=full, reduction=reduction)
[docs] def chroma( x: Tensor, n_channel: int, sample_rate: int, norm: float = float("inf"), use_power: bool = True, ) -> Tensor: """Apply chroma-filter banks to the STFT. Parameters ---------- x : Tensor [shape=(..., L/2+1)] The power spectrum. n_channel : int >= 1 The number of chroma filter banks, :math:`C`. sample_rate : int >= 1 The sample rate in Hz. norm : float The normalization factor. use_power : bool If True, use the power spectrum instead of the amplitude spectrum. Returns ------- out : Tensor [shape=(..., C)] The chroma filter bank output. """ return nn.ChromaFilterBankAnalysis._func( x, n_channel=n_channel, sample_rate=sample_rate, norm=norm, use_power=use_power, )
[docs] def csm2acr(c: Tensor) -> Tensor: """Convert CSM coefficients to autocorrelation. Parameters ---------- c : Tensor [shape=(..., M+1)] The CSM coefficients. Returns ------- out : Tensor [shape=(..., M+1)] The autocorrelation. """ return nn.CompositeSinusoidalModelCoefficientsToAutocorrelation._func(c)
[docs] def dct(x: Tensor, dct_type: int = 2) -> Tensor: """Compute DCT. Parameters ---------- x : Tensor [shape=(..., L)] The input. dct_type : int in [1, 4] The DCT type. Returns ------- out : Tensor [shape=(..., L)] The DCT output. """ return nn.DiscreteCosineTransform._func(x, dct_type=dct_type)
[docs] def decimate(x: Tensor, period: int = 1, start: int = 0, dim: int = -1) -> Tensor: """Decimate the input signal. Parameters ---------- x : Tensor [shape=(..., T, ...)] The input signal. period : int >= 1 The decimation period, :math:`P`. start : int >= 0 The start point, :math:`S`. dim : int The dimension along which to decimate the tensors. Returns ------- out : Tensor [shape=(..., T/P-S, ...)] The decimated signal. """ return nn.Decimation._func(x, period=period, start=start, dim=dim)
[docs] def delay(x: Tensor, start: int = 0, keeplen: bool = False, dim: int = -1) -> Tensor: """Delay the input signal. Parameters ---------- x : Tensor [shape=(..., T, ...)] The input signal. start : int The start point, :math:`S`. If negative, advance the signal. keeplen : bool If True, the output has the same length of the input. dim : int The dimension along which to delay the tensors. Returns ------- out : Tensor [shape=(..., T-S, ...)] or [shape=(..., T, ...)] The delayed signal. """ return nn.Delay._func(x, start=start, keeplen=keeplen, dim=dim)
[docs] def delta( x: Tensor, seed: ArrayLike[ArrayLike[float]] | ArrayLike[int] = [[-0.5, 0, 0.5]], static_out: bool = True, ) -> Tensor: """Compute the delta components. Parameters ---------- x : Tensor [shape=(B, T, D) or (T, D)] The static components. seed : list[list[float]] or list[int] The delta coefficients or the width(s) of 1st (and 2nd) regression coefficients. static_out : bool If False, outputs only the delta components. Returns ------- out : Tensor [shape=(B, T, DxH) or (T, DxH)] The delta (and static) components. """ return nn.Delta._func(x, seed, static_out=static_out)
[docs] def dequantize( y: Tensor, abs_max: float = 1, n_bit: int = 8, quantizer: str = "mid-rise" ) -> Tensor: """Dequantize the input waveform. Parameters ---------- y : Tensor [shape=(...,)] The quantized waveform. abs_max : float > 0 The absolute maximum value of the original waveform. n_bit : int >= 1 The number of quantization bits. quantizer : ['mid-rise', 'mid-tread'] The quantizer type. Returns ------- out : Tensor [shape=(...,)] The dequantized waveform. """ return nn.InverseUniformQuantization._func( y, abs_max=abs_max, n_bit=n_bit, quantizer=quantizer )
[docs] def dfs( x: Tensor, b: Tensor | None = None, a: Tensor | None = None, ir_length: int | None = None, ) -> Tensor: """Apply an IIR digital filter to the input waveform. Parameters ---------- x : Tensor [shape=(..., T)] The input waveform. b : Tensor [shape=(M+1,)] or None The numerator coefficients. a : Tensor [shape=(N+1,)] or None The denominator coefficients. ir_length : int >= 1 or None The length of the truncated impulse response. If given, the filter is approximated by an FIR filter. Returns ------- out : Tensor [shape=(..., T)] The filtered waveform. """ return nn.InfiniteImpulseResponseDigitalFilter._func( x, b=b, a=a, ir_length=ir_length )
[docs] def dht(x: Tensor, dht_type: int = 2) -> Tensor: """Compute DHT. Parameters ---------- x : Tensor [shape=(..., L)] The input. dht_type : int in [1, 4] The DHT type. Returns ------- out : Tensor [shape=(..., L)] The DHT output. """ return nn.DiscreteHartleyTransform._func(x, dht_type=dht_type)
[docs] def drc( x: Tensor, threshold: float, ratio: float, attack_time: float, release_time: float, sample_rate: int, makeup_gain: float = 0, abs_max: float = 1, ) -> Tensor: """Perform dynamic range compression. Parameters ---------- x : Tensor [shape=(..., T)] The input waveform. threshold : float <= 0 The threshold in dB. ratio : float > 1 The input/output ratio. attack_time : float > 0 The attack time in msec. release_time : float > 0 The release time in msec. sample_rate : int >= 1 The sample rate in Hz. makeup_gain : float >= 0 The make-up gain in dB. abs_max : float > 0 The absolute maximum value of input. Returns ------- out : Tensor [shape=(..., T)] The compressed waveform. """ return nn.DynamicRangeCompression._func( x, threshold=threshold, ratio=ratio, attack_time=attack_time, release_time=release_time, sample_rate=sample_rate, makeup_gain=makeup_gain, abs_max=abs_max, )
[docs] def dst(x: Tensor, dst_type: int = 2) -> Tensor: """Compute DST. Parameters ---------- x : Tensor [shape=(..., L)] The input. dst_type : int in [1, 4] The DST type. Returns ------- out : Tensor [shape=(..., L)] The DST output. """ return nn.DiscreteSineTransform._func(x, dst_type=dst_type)
[docs] def entropy(p: Tensor, out_format: str = "nat") -> Tensor: """Calculate the entropy of a probability distribution. Parameters ---------- p : Tensor [shape=(..., N)] The probability. out_format : ['bit', 'nat', 'dit'] The output format. Returns ------- out : Tensor [shape=(...,)] The entropy. """ return nn.Entropy._func(p, out_format=out_format)
[docs] def excite( p: Tensor, frame_period: int = 80, *, voiced_region: str = "pulse", unvoiced_region: str = "gauss", polarity: str = "auto", init_phase: str = "zeros", ) -> Tensor: """Generate a simple excitation signal. Parameters ---------- p : Tensor [shape=(..., N)] The pitch in seconds. frame_period : int >= 1 The frame period in samples, :math:`P`. voiced_region : ['pulse', 'sinusoidal', 'sawtooth', 'inverted-sawtooth', \ 'triangle', 'square'] The type of voiced region. unvoiced_region : ['zeros', 'gauss'] The type of unvoiced region. polarity : ['auto', 'unipolar', 'bipolar'] The polarity. init_phase : ['zeros', 'random'] The initial phase. Returns ------- out : Tensor [shape=(..., NxP)] The excitation signal. """ return nn.ExcitationGeneration._func( p, frame_period=frame_period, voiced_region=voiced_region, unvoiced_region=unvoiced_region, polarity=polarity, init_phase=init_phase, )
[docs] def fbank( x: Tensor, n_channel: int, sample_rate: int, f_min: float = 0, f_max: float | None = None, floor: float = 1e-5, gamma: float = 0, use_power: bool = False, out_format: str = "y", ) -> tuple[Tensor, Tensor] | Tensor: """Apply mel-filter banks to the STFT. Parameters ---------- x : Tensor [shape=(..., L/2+1)] The power spectrum. n_channel : int >= 1 The number of mel filter banks, :math:`C`. sample_rate : int >= 1 The sample rate in Hz. f_min : float >= 0 The minimum frequency in Hz. f_max : float <= sample_rate // 2 The maximum frequency in Hz. floor : float > 0 The minimum mel filter bank output in linear scale. gamma : float in [-1, 1] The parameter of the generalized logarithmic function. use_power : bool If True, use the power spectrum instead of the amplitude spectrum. out_format : ['y', 'yE', 'y,E'] `y` is mel filber bank output and `E` is energy. If this is `yE`, the two output tensors are concatenated and return the tensor instead of the tuple. Returns ------- y : Tensor [shape=(..., C)] The mel filter bank output. E : Tensor [shape=(..., 1)] (optional) The energy. """ return nn.MelFilterBankAnalysis._func( x, n_channel=n_channel, sample_rate=sample_rate, f_min=f_min, f_max=f_max, floor=floor, gamma=gamma, use_power=use_power, out_format=out_format, )
[docs] def fftcep(x: Tensor, cep_order: int, accel: float = 0, n_iter: int = 0) -> Tensor: """Perform cepstral analysis. Parameters ---------- x : Tensor [shape=(..., L/2+1)] The power spectrum. cep_order : int >= 0 The order of the cepstrum, :math:`M`. accel : float >= 0 The acceleration factor. n_iter : int >= 0 The number of iterations. Returns ------- out : Tensor [shape=(..., M+1)] The cepstrum. """ return nn.CepstralAnalysis._func(x, cep_order=cep_order, accel=accel, n_iter=n_iter)
[docs] def fftr( x: Tensor, fft_length: int | None = None, out_format: str = "complex" ) -> Tensor: """Compute FFT of a real signal. Parameters ---------- x : Tensor [shape=(..., N)] The real input signal. fft_length : int >= 2 or None The FFT length, :math:`L`. out_format : ['complex', 'real', 'imaginary', 'amplitude', 'power'] The output format. Returns ------- out : Tensor [shape=(..., L/2+1)] The output spectrum. """ return nn.RealValuedFastFourierTransform._func( x, fft_length=fft_length, out_format=out_format )
[docs] def flux( x: Tensor, y: Tensor | None = None, lag: int = 1, norm: int | float = 2, reduction: str = "mean", ) -> Tensor: """Calculate flux. Parameters ---------- x : Tensor [shape=(..., N, D)] The input. y : Tensor [shape=(..., N, D)] or None The target (optional). lag : int or float The lag of the distance calculation, :math:`L`. norm : int or float The order of the norm. reduction : ['none', 'mean', 'batchmean', 'sum'] The reduction type. Returns ------- out : Tensor [shape=(..., N-\\|L\\|) or scalar] The flux. """ return nn.Flux._func(x, y, lag=lag, norm=norm, reduction=reduction)
[docs] def frame( x: Tensor, frame_length: int = 400, frame_period: int = 80, center: bool = True, zmean: bool = False, mode: str = "constant", ) -> Tensor: """Apply framing to the given waveform. Parameters ---------- x : Tensor [shape=(..., T)] The waveform. frame_length : int >= 1 The frame length in samples, :math:`L`. frame_period : int >= 1 The frame period in samples, :math:`P`. center : bool If True, pad the input on both sides so that the frame is centered. zmean : bool If True, perform mean subtraction on each frame. mode : ['constant', 'reflect', 'replicate', 'circular'] The padding method. Returns ------- out : Tensor [shape=(..., T/P, L)] The framed waveform. """ return nn.Frame._func( x, frame_length=frame_length, frame_period=frame_period, center=center, zmean=zmean, mode=mode, )
[docs] def freqt(c: Tensor, out_order: int, alpha: float = 0) -> Tensor: """Perform frequency transform. Parameters ---------- c : Tensor [shape=(..., M1+1)] The cepstral coefficients. out_order : int >= 0 The order of the output cepstrum, :math:`M_2`. alpha : float in (-1, 1) The frequency warping factor, :math:`\\alpha`. Returns ------- out : Tensor [shape=(..., M2+1)] The warped cepstral coefficients. """ return nn.FrequencyTransform._func(c, out_order=out_order, alpha=alpha)
[docs] def freqt2( c: Tensor, out_order: int, alpha: float = 0, theta: float = 0, n_fft: int = 512 ) -> Tensor: """Perform second-order all-pass frequency transform. Parameters ---------- c : Tensor [shape=(..., M1+1)] The cepstral coefficients. out_order : int >= 0 The order of the output sequence, :math:`M_2`. alpha : float in (-1, 1) The frequency warping factor, :math:`\\alpha`. theta : float in [0, 1] The emphasis frequency, :math:`\\theta`. n_fft : int >> M2 The number of FFT bins. The accurate conversion requires the large value. Returns ------- out : Tensor [shape=(..., M2+1)] The warped cepstral coefficients. """ return nn.SecondOrderAllPassFrequencyTransform._func( c, out_order=out_order, alpha=alpha, theta=theta, n_fft=n_fft )
[docs] def gnorm(x: Tensor, gamma: float = 0, c: int | None = None) -> Tensor: """Perform cepstrum gain normalization. Parameters ---------- x : Tensor [shape=(..., M+1)] The generalized cepstrum. gamma : float in [-1, 1] The gamma parameter, :math:`\\gamma`. c : int >= 1 or None The number of filter stages. Returns ------- out : Tensor [shape=(..., M+1)] The normalized generalized cepstrum. """ return nn.GeneralizedCepstrumGainNormalization._func(x, gamma=gamma, c=c)
[docs] def grpdelay( b: Tensor | None = None, a: Tensor | None = None, *, fft_length: int = 512, alpha: float = 1, gamma: float = 1, **kwargs, ) -> Tensor: """Compute group delay. Parameters ---------- b : Tensor [shape=(..., M+1)] or None The numerator coefficients. a : Tensor [shape=(..., N+1)] or None The denominator coefficients. fft_length : int >= 2 The number of FFT bins, :math:`L`. alpha : float > 0 The tuning parameter, :math:`\\alpha`. gamma : float > 0 The tuning parameter, :math:`\\gamma`. Returns ------- out : Tensor [shape=(..., L/2+1)] The group delay or modified group delay function. """ return nn.GroupDelay._func( b, a, fft_length=fft_length, alpha=alpha, gamma=gamma, **kwargs )
[docs] def hilbert(x: Tensor, fft_length: int | None = None, dim: int = -1) -> Tensor: """Compute the analytic signal using the Hilbert transform. Parameters ---------- x : Tensor [shape=(..., T, ...)] The input signal. fft_length : int >= 1 or None The number of FFT bins. If None, set to :math:`T`. dim : int The dimension along which to take the Hilbert transform. Returns ------- out : Tensor [shape=(..., T, ...)] The analytic signal. """ return nn.HilbertTransform._func(x, fft_length=fft_length, dim=dim)
[docs] def hilbert2( x: Tensor, fft_length: ArrayLike[int] | int | None = None, dim: ArrayLike[int] = (-2, -1), ) -> Tensor: """Compute the analytic signal using the Hilbert transform. Parameters ---------- x : Tensor [shape=(..., T1, T2, ...)] The input signal. fft_length : int, list[int], or None The number of FFT bins. If None, set to (:math:`T1`, :math:`T2`). dim : list[int] The dimensions along which to take the Hilbert transform. Returns ------- out : Tensor [shape=(..., T1, T2, ...)] The analytic signal. """ return nn.TwoDimensionalHilbertTransform._func(x, fft_length=fft_length, dim=dim)
[docs] def histogram( x: Tensor, n_bin: int = 10, lower_bound: float = 0, upper_bound: float = 1, norm: bool = False, softness: float = 1e-3, ) -> Tensor: """Compute histogram. Parameters ---------- x : Tensor [shape=(..., T)] The input data. n_bin : int >= 1 The number of bins, :math:`K`. lower_bound : float < U The lower bound of the histogram, :math:`L`. upper_bound : float > L The upper bound of the histogram, :math:`U`. norm : bool If True, normalizes the histogram. softness : float > 0 A smoothing parameter. The smaller value makes the output closer to the true histogram, but the gradient vanishes. Returns ------- out : Tensor [shape=(..., K)] The histogram. """ return nn.Histogram._func( x, n_bin=n_bin, lower_bound=lower_bound, upper_bound=upper_bound, norm=norm, softness=softness, )
[docs] def ialaw(y: Tensor, abs_max: float = 1, a: float = 87.6) -> Tensor: """Expand the waveform using the A-law algorithm. Parameters ---------- y : Tensor [shape=(...,)] The compressed waveform. abs_max : float > 0 The absolute maximum value of the original input waveform. a : float >= 1 The compression factor, :math:`A`. Returns ------- out : Tensor [shape=(...,)] The expanded waveform. """ return nn.ALawExpansion._func(y, abs_max=abs_max, a=a)
[docs] def idct(y: Tensor, dct_type: int = 2) -> Tensor: """Compute inverse DCT. Parameters ---------- y : Tensor [shape=(..., L)] The input. dct_type : int in [1, 4] The DCT type. Returns ------- out : Tensor [shape=(..., L)] The inverse DCT output. """ return nn.InverseDiscreteCosineTransform._func(y, dct_type=dct_type)
[docs] def idht(y: Tensor, dht_type: int = 2) -> Tensor: """Compute inverse DHT. Parameters ---------- y : Tensor [shape=(..., L)] The input. dht_type : int in [1, 4] The DHT type. Returns ------- out : Tensor [shape=(..., L)] The inverse DHT output. """ return nn.InverseDiscreteHartleyTransform._func(y, dht_type=dht_type)
[docs] def idst(y: Tensor, dst_type: int = 2) -> Tensor: """Compute inverse DST. Parameters ---------- y : Tensor [shape=(..., L)] The input. dst_type : int in [1, 4] The DST type. Returns ------- out : Tensor [shape=(..., L)] The inverse DST output. """ return nn.InverseDiscreteSineTransform._func(y, dst_type=dst_type)
[docs] def ifftr(y: Tensor, out_length: int | None = None) -> Tensor: """Compute inverse FFT of a complex spectrum. Parameters ---------- y : Tensor [shape=(..., L/2+1)] The complex input spectrum. out_length : int or None The output length, :math:`N`. Returns ------- out : Tensor [shape=(..., N)] The real output signal. """ return nn.RealValuedInverseFastFourierTransform._func(y, out_length=out_length)
[docs] def ifreqt2( c: Tensor, out_order: int, alpha: float = 0, theta: float = 0, n_fft: int = 512 ) -> Tensor: """Perform second-order all-pass inverse frequency transform. Parameters ---------- c : Tensor [shape=(..., M1+1)] The cepstral coefficients. out_order : int >= 0 The order of the output sequence, :math:`M_2`. alpha : float in (-1, 1) The frequency warping factor, :math:`\\alpha`. theta : float in [0, 1] The emphasis frequency, :math:`\\theta`. n_fft : int >> M2 The number of FFT bins. Returns ------- out : Tensor [shape=(..., M2+1)] The warped cepstral coefficients. """ return nn.SecondOrderAllPassInverseFrequencyTransform._func( c, out_order=out_order, alpha=alpha, theta=theta, n_fft=n_fft )
[docs] def ignorm(y: Tensor, gamma: float = 0, c: int | None = None) -> Tensor: """Perform cepstrum inverse gain normalization. Parameters ---------- y : Tensor [shape=(..., M+1)] The normalized generalized cepstrum. gamma : float in [-1, 1] The gamma parameter, :math:`\\gamma`. c : int >= 1 or None The number of filter stages. Returns ------- out : Tensor [shape=(..., M+1)] The generalized cepstrum. """ return nn.GeneralizedCepstrumInverseGainNormalization._func(y, gamma=gamma, c=c)
[docs] def imdct( y: Tensor, out_length: int | None = None, frame_length: int = 400, window: str = "sine", ) -> Tensor: """Compute inverse modified discrete cosine transform. Parameters ---------- y : Tensor [shape=(..., 2T/L, L/2)] The spectrum. out_length : int or None The length of the output waveform. frame_length : int >= 2 The frame length, :math:`L`. window : ['sine', 'vorbis', 'kbd', 'rectangular'] The window type. Returns ------- out : Tensor [shape=(..., T)] The reconstructed waveform. """ return nn.InverseModifiedDiscreteCosineTransform._func( y, out_length=out_length, frame_length=frame_length, window=window )
[docs] def imdst( y: Tensor, out_length: int | None = None, frame_length: int = 400, window: str = "sine", ) -> Tensor: """Compute inverse modified discrete sine transform. Parameters ---------- y : Tensor [shape=(..., 2T/L, L/2)] The spectrum. out_length : int or None The length of the output waveform. frame_length : int >= 2 The frame length, :math:`L`. window : ['sine', 'vorbis', 'kbd', 'rectangular'] The window type. Returns ------- out : Tensor [shape=(..., T)] The reconstructed waveform. """ return nn.InverseModifiedDiscreteSineTransform._func( y, out_length=out_length, frame_length=frame_length, window=window )
[docs] def interpolate(x: Tensor, period: int = 1, start: int = 0, dim: int = -1) -> Tensor: """Interpolate the input signal. Parameters ---------- x : Tensor [shape=(..., T, ...)] The input signal. period : int >= 1 The interpolation period, :math:`P`. start : int >= 0 The start point, :math:`S`. dim : int The dimension along which to interpolate the tensors. Returns ------- out : Tensor [shape=(..., TxP+S, ...)] The interpolated signal. """ return nn.Interpolation._func(x, period=period, start=start, dim=dim)
[docs] def ipnorm(y: Tensor) -> Tensor: """Perform cepstrum inverse power normalization. Parameters ---------- y : Tensor [shape=(..., M+2)] The log power and power-normalized cepstrum. Returns ------- out : Tensor [shape=(..., M+1)] The cepstrum. """ return nn.MelCepstrumInversePowerNormalization._func(y)
[docs] def is2par(s: Tensor) -> Tensor: """Convert IS to PARCOR. Parameters ---------- s : Tensor [shape=(..., M+1)] The inverse sine coefficients. Returns ------- out : Tensor [shape=(..., M+1)] The PARCOR coefficients. """ return nn.InverseSineToParcorCoefficients._func(s)
[docs] def istft( y: Tensor, *, out_length: int | None = None, frame_length: int = 400, frame_period: int = 80, fft_length: int = 512, center: bool = True, window: str = "blackman", norm: str = "power", symmetric: bool = True, ) -> Tensor: """Compute inverse short-time Fourier transform. Parameters ---------- y : Tensor [shape=(..., T/P, N/2+1)] The complex spectrogram. out_length : int >= 1 or None The length of the output waveform. frame_length : int >= 1 The frame length in samples, :math:`L`. frame_period : int >= 1 The frame period in samples, :math:`P`. fft_length : int >= L The number of FFT bins, :math:`N`. center : bool If True, pad the input on both sides so that the frame is centered. window : ['blackman', 'hamming', 'hanning', 'bartlett', 'trapezoidal', \ 'rectangular', 'nuttall'] The window type. norm : ['none', 'power', 'magnitude'] The normalization type of the window. symmetric : bool If True, the window is symmetric, otherwise periodic. Returns ------- out : Tensor [shape=(..., T)] The reconstructed waveform. """ return nn.InverseShortTimeFourierTransform._func( y, out_length=out_length, frame_length=frame_length, frame_period=frame_period, fft_length=fft_length, center=center, window=window, norm=norm, symmetric=symmetric, )
[docs] def iulaw(y: Tensor, abs_max: float = 1, mu: int = 255) -> Tensor: """Expand the waveform using the :math:`\\mu`-law algorithm. Parameters ---------- y : Tensor [shape=(...,)] The compressed waveform. abs_max : float > 0 The absolute maximum value of the original input waveform. mu : int >= 1 The compression factor, :math:`\\mu`. Returns ------- out : Tensor [shape=(...,)] The expanded waveform. """ return nn.MuLawExpansion._func(y, abs_max=abs_max, mu=mu)
[docs] def iwht(y: Tensor, wht_type: str = "natural") -> Tensor: """Compute inverse WHT. Parameters ---------- y : Tensor [shape=(..., L)] The input. wht_type : ['sequency', 'natural', 'dyadic'] The order of the coefficients in the Walsh matrix. Returns ------- out : Tensor [shape=(..., L)] The inverse WHT output. """ return nn.InverseWalshHadamardTransform._func(y, wht_type=wht_type)
[docs] def lar2par(g: Tensor) -> Tensor: """Convert LAR to PARCOR. Parameters ---------- g : Tensor [shape=(..., M+1)] The log area ratio. Returns ------- out : Tensor [shape=(..., M+1)] The PARCOR coefficients. """ return nn.LogAreaRatioToParcorCoefficients._func(g)
[docs] def levdur(r: Tensor, eps: float = 0) -> Tensor: """Solve a Yule-Walker linear system. Parameters ---------- r : Tensor [shape=(..., M+1)] The autocorrelation. eps : float >= 0 A small value to improve numerical stability. Returns ------- out : Tensor [shape=(..., M+1)] The gain and LPC coefficients. """ return nn.LevinsonDurbin._func(r, eps=eps)
[docs] def linear_intpl(x: Tensor, upsampling_factor: int = 80) -> Tensor: """Interpolate filter coefficients. Parameters ---------- x : Tensor [shape=(B, N, D) or (N, D) or (N,)] The filter coefficients. upsampling_factor : int >= 1 The upsampling factor, :math:`P`. Returns ------- y : Tensor [shape=(B, NxP, D) or (NxP, D) or (NxP,)] The upsampled filter coefficients. """ return nn.LinearInterpolation._func(x, upsampling_factor=upsampling_factor)
[docs] def lpc(x: Tensor, lpc_order: int, eps: float = 1e-6) -> Tensor: """Perform LPC analysis. Parameters ---------- x : Tensor [shape=(..., L)] The famed waveform. lpc_order : int >= 0 The order of the LPC coefficients, :math:`M`. eps : float >= 0 A small value to improve numerical stability. Returns ------- out : Tensor [shape=(..., M+1)] The gain and LPC coefficients. """ return nn.LinearPredictiveCodingAnalysis._func(x, lpc_order=lpc_order, eps=eps)
[docs] def lpc2lsp( a: Tensor, log_gain: bool = False, sample_rate: int | None = None, out_format: str = "radian", ) -> Tensor: """Convert LPC to LSP. Parameters ---------- a : Tensor [shape=(..., M+1)] The LPC coefficients. log_gain : bool If True, output the gain in logarithmic scale. sample_rate : int >= 1 or None The sample rate in Hz. out_format : ['radian', 'cycle', 'khz', 'hz'] The output format. Returns ------- out : Tensor [shape=(..., M+1)] The LSP frequencies. """ return nn.LinearPredictiveCoefficientsToLineSpectralPairs._func( a, log_gain=log_gain, sample_rate=sample_rate, out_format=out_format )
[docs] def lpc2par(a: Tensor, gamma: float = 1, c: int | None = None) -> Tensor: """Convert LPC to PARCOR. Parameters ---------- a : Tensor [shape=(..., M+1)] The LPC coefficients. gamma : float in [-1, 1] The gamma parameter, :math:`\\gamma`. c : int >= 1 or None The number of filter stages. Returns ------- out : Tensor [shape=(..., M+1)] The PARCOR coefficients. """ return nn.LinearPredictiveCoefficientsToParcorCoefficients._func( a, gamma=gamma, c=c )
[docs] def lpccheck(a: Tensor, margin: float = 1e-16, warn_type: str = "warn") -> Tensor: """Check stability of LPC coefficients. Parameters ---------- a : Tensor [shape=(..., M+1)] The input LPC coefficients. margin : float in (0, 1) The margin to guarantee the stability of LPC. warn_type : ['ignore', 'warn', 'exit'] The warning type. Returns ------- out : Tensor [shape=(..., M+1)] The modified LPC coefficients. """ return nn.LinearPredictiveCoefficientsStabilityCheck._func( a, margin=margin, warn_type=warn_type )
[docs] def lsp2lpc( w: Tensor, log_gain: bool = False, sample_rate: int | None = None, in_format: str = "radian", ) -> Tensor: """Convert LSP to LPC. Parameters ---------- w : Tensor [shape=(..., M+1)] The LSP frequencies. log_gain : bool If True, assume the input gain is in logarithmic scale. sample_rate : int >= 1 or None The sample rate in Hz. in_format : ['radian', 'cycle', 'khz', 'hz'] The input format. Returns ------- out : Tensor [shape=(..., M+1)] The LPC coefficients. """ return nn.LineSpectralPairsToLinearPredictiveCoefficients._func( w, log_gain=log_gain, sample_rate=sample_rate, in_format=in_format )
[docs] def lspcheck( w: Tensor, rate: float = 0, n_iter: int = 1, warn_type: str = "warn" ) -> Tensor: """Check the stability of the input LSP coefficients. Parameters ---------- w : Tensor [shape=(..., M+1)] The input LSP coefficients in radians. rate : float in [0, 1] The rate of distance between two adjacent LSPs. n_iter : int >= 0 The number of iterations for the modification. warn_type : ['ignore', 'warn', 'exit'] The warning type. Returns ------- out : Tensor [shape=(..., M+1)] The modified LSP frequencies. """ return nn.LineSpectralPairsStabilityCheck._func( w, rate=rate, n_iter=n_iter, warn_type=warn_type )
[docs] def lsp2sp( w: Tensor, fft_length: int, alpha: float = 0, gamma: float = -1, log_gain: bool = False, out_format: str = "power", ) -> Tensor: """Convert line spectral pairs to spectrum. Parameters ---------- w : Tensor [shape=(..., M+1)] The line spectral pairs in radians. fft_length : int >= 2 The number of FFT bins, :math:`L`. alpha : float in (-1, 1) The warping factor, :math:`\\alpha`. gamma : float in [-1, 0) The gamma parameter, :math:`\\gamma`. log_gain : bool If True, assume the input gain is in logarithmic scale. out_format : ['db', 'log-magnitude', 'magnitude', 'power'] The output format. Returns ------- out : Tensor [shape=(..., L/2+1)] The spectrum. """ return nn.LineSpectralPairsToSpectrum._func( w, fft_length=fft_length, alpha=alpha, gamma=gamma, log_gain=log_gain, out_format=out_format, )
[docs] def magic_intpl(x: Tensor, magic_number: float = 0) -> Tensor: """Interpolate magic number. Parameters ---------- x : Tensor [shape=(B, N, D) or (N, D) or (N,)] The data containing magic number. magic_number : float or Tensor The magic number to be interpolated. Returns ------- out : Tensor [shape=(B, N, D) or (N, D) or (N,)] The data after interpolation. """ return nn.MagicNumberInterpolation._func(x, magic_number=magic_number)
[docs] def mc2b(mc: Tensor, alpha: float = 0) -> Tensor: """Convert mel-cepstrum to MLSA digital filter coefficients. Parameters ---------- mc : Tensor [shape=(..., M+1)] The mel-cepstral coefficients. alpha : float in (-1, 1) The frequency warping factor, :math:`\\alpha`. Returns ------- out : Tensor [shape=(..., M+1)] The MLSA digital filter coefficients. """ return nn.MelCepstrumToMLSADigitalFilterCoefficients._func(mc, alpha=alpha)
[docs] def mcep(x: Tensor, cep_order: int, alpha: float = 0, n_iter: int = 0) -> Tensor: """Perform mel-cepstral analysis. Parameters ---------- x : Tensor [shape=(..., L/2+1)] The power spectrum. cep_order : int >= 0 The order of the cepstrum, :math:`M`. alpha : float in (-1, 1) The frequency warping factor, :math:`\\alpha`. n_iter : int >= 0 The number of iterations. Returns ------- out : Tensor [shape=(..., M+1)] The mel-cepstrum. """ return nn.MelCepstralAnalysis._func( x, cep_order=cep_order, alpha=alpha, n_iter=n_iter )
[docs] def mcpf( mc: Tensor, alpha: float = 0, beta: float = 0, onset: int = 2, ir_length: int = 128 ) -> Tensor: """Perform mel-cesptrum postfiltering. Parameters ---------- mc : Tensor [shape=(..., M+1)] The input mel-cepstral coefficients. alpha : float in (-1, 1) The frequency warping factor, :math:`\\alpha`. beta : float The intensity parameter, :math:`\\beta`. onset : int >= 0 The onset index. ir_length : int >= 1 The length of the impulse response. Returns ------- out : Tensor [shape=(..., M+1)] The postfiltered mel-cepstral coefficients. """ return nn.MelCepstrumPostfiltering._func( mc, alpha=alpha, beta=beta, onset=onset, ir_length=ir_length )
[docs] def mdct(x: Tensor, frame_length: int = 400, window: str = "sine") -> Tensor: """Compute modified discrete cosine transform. Parameters ---------- x : Tensor [shape=(..., T)] The input waveform. frame_length : int >= 2 The frame length, :math:`L`. window : ['sine', 'vorbis', 'kbd', 'rectangular'] The window type. Returns ------- out : Tensor [shape=(..., 2T/L, L/2)] The spectrum. """ return nn.ModifiedDiscreteCosineTransform._func( x, frame_length=frame_length, window=window )
[docs] def mdst(x: Tensor, frame_length: int = 400, window: str = "sine") -> Tensor: """Compute modified discrete sine transform. Parameters ---------- x : Tensor [shape=(..., T)] The input waveform. frame_length : int >= 2 The frame length, :math:`L`. window : ['sine', 'vorbis', 'kbd', 'rectangular'] The window type. Returns ------- out : Tensor [shape=(..., 2T/L, L/2)] The spectrum. """ return nn.ModifiedDiscreteSineTransform._func( x, frame_length=frame_length, window=window )
[docs] def mfcc( x: Tensor, mfcc_order: int, n_channel: int, sample_rate: int, lifter: int = 1, f_min: float = 0, f_max: float | None = None, floor: float = 1e-5, gamma: float = 0, out_format: str = "y", ) -> Tensor: """Compute the MFCC from the power spectrum. Parameters ---------- x : Tensor [shape=(..., L/2+1)] The power spectrum. mfcc_order : int >= 1 The order of the MFCC, :math:`M`. n_channel : int >= 1 The number of mel filter banks, :math:`C`. sample_rate : int >= 1 The sample rate in Hz. lifter : int >= 1 The liftering coefficient. f_min : float >= 0 The minimum frequency in Hz. f_max : float <= sample_rate // 2 The maximum frequency in Hz. floor : float > 0 The minimum mel filter bank output in linear scale. gamma : float in [-1, 1] The parameter of the generalized logarithmic function. out_format : ['y', 'yE', 'yc', 'ycE'] `y` is MFCC, `c` is C0, and `E` is energy. Returns ------- y : Tensor [shape=(..., M)] The MFCC without C0. E : Tensor [shape=(..., 1)] (optional) The energy. c : Tensor [shape=(..., 1)] (optional) The C0. """ return nn.MelFrequencyCepstralCoefficientsAnalysis._func( x, mfcc_order=mfcc_order, n_channel=n_channel, sample_rate=sample_rate, lifter=lifter, f_min=f_min, f_max=f_max, floor=floor, gamma=gamma, out_format=out_format, )
[docs] def mgc2mgc( mc: Tensor, out_order: int, in_alpha: float = 0, out_alpha: float = 0, in_gamma: float = 0, out_gamma: float = 0, in_norm: bool = False, out_norm: bool = False, in_mul: bool = False, out_mul: bool = False, n_fft: int = 512, ) -> Tensor: """Convert mel-generalized cepstrum to mel-generalized cepstrum. Parameters ---------- mc : Tensor [shape=(..., M1+1)] The input mel-cepstrum. out_order : int >= 0 The order of the output cepstrum, :math:`M_2`. in_alpha : float in (-1, 1) The input alpha, :math:`\\alpha_1`. out_alpha : float in (-1, 1) The output alpha, :math:`\\alpha_2`. in_gamma : float in [-1, 1] The input gamma, :math:`\\gamma_1`. out_gamma : float in [-1, 1] The output gamma, :math:`\\gamma_2`. in_norm : bool If True, the input is assumed to be normalized. out_norm : bool If True, the output is assumed to be normalized. in_mul : bool If True, the input is assumed to be gamma-multiplied. out_mul : bool If True, the output is assumed to be gamma-multiplied. n_fft : int >> M1, M2 The number of FFT bins. Returns ------- out : Tensor [shape=(..., M2+1)] The converted mel-cepstrum. """ return nn.MelGeneralizedCepstrumToMelGeneralizedCepstrum._func( mc, out_order=out_order, in_alpha=in_alpha, out_alpha=out_alpha, in_gamma=in_gamma, out_gamma=out_gamma, in_norm=in_norm, out_norm=out_norm, in_mul=in_mul, out_mul=out_mul, n_fft=n_fft, )
[docs] def mgc2sp( mc: Tensor, fft_length: int, alpha: float = 0, gamma: float = 0, norm: bool = False, mul: bool = False, n_fft: int = 512, out_format: str = "power", ) -> Tensor: """Convert mel-cepstrum to spectrum. Parameters ---------- mc : Tensor [shape=(..., M+1)] Mel-cepstrum. fft_length : int >= 2 The number of FFT bins, :math:`L`. alpha : float in (-1, 1) The frequency warping factor, :math:`\\alpha`. gamma : float in [-1, 1] The gamma parameter, :math:`\\gamma`. norm : bool If True, the input is assumed to be normalized. mul : bool If True, the input is assumed to be gamma-multiplied. n_fft : int >> L The number of FFT bins. out_format : ['db', 'log-magnitude', 'magnitude', 'power', \ 'cycle', 'radian', 'degree', 'complex'] The output format. Returns ------- out : Tensor [shape=(..., L/2+1)] Spectrum. """ return nn.MelGeneralizedCepstrumToSpectrum._func( mc, fft_length=fft_length, alpha=alpha, gamma=gamma, norm=norm, mul=mul, n_fft=n_fft, out_format=out_format, )
[docs] def mlpg( u: Tensor, seed: ArrayLike[ArrayLike[float]] | ArrayLike[int] = [[-0.5, 0, 0.5], [1, -2, 1]], ) -> Tensor: """Perform MLPG given the mean vectors with delta components. Parameters ---------- u : Tensor [shape=(..., T, DxH)] The time-variant mean vectors with delta components. seed : list[list[float]] or list[int] The delta coefficients or the width(s) of 1st (and 2nd) regression coefficients. Returns ------- out : Tensor [shape=(..., T, D)] The smoothed static components. """ return nn.MaximumLikelihoodParameterGeneration._func(u, seed=seed)
[docs] def mlsacheck( c: Tensor, *, alpha: float = 0, pade_order: int = 4, strict: bool = True, threshold: float | None = None, fast: bool = True, n_fft: int = 256, warn_type: str = "warn", mod_type: str = "scale", ) -> Tensor: """Check the stability of the MLSA digital filter. Parameters ---------- c : Tensor [shape=(..., M+1)] The input Mel-cepstrum. alpha : float in (-1, 1) The frequency warping factor, :math:`\\alpha`. pade_order : int in [4, 7] The order of the Pade approximation. strict : bool If True, prioritizes maintaining the maximum log approximation error over MLSA filter stability. threshold : float > 0 or None The threshold value. If None, it is automatically computed. fast : bool Enables fast mode (do not use FFT). n_fft : int > M The number of FFT bins. Used only in non-fast mode. warn_type : ['ignore', 'warn', 'exit'] The warning type. mod_type : ['clip', 'scale'] The modification method. Returns ------- out : Tensor [shape=(..., M+1)] The modified mel-cepstrum. """ return nn.MLSADigitalFilterStabilityCheck._func( c, alpha=alpha, pade_order=pade_order, strict=strict, threshold=threshold, fast=fast, n_fft=n_fft, warn_type=warn_type, mod_type=mod_type, )
[docs] def mpir2c(h: Tensor, cep_order: int, n_fft: int = 512) -> Tensor: """Convert minimum-phase impulse response to cepstrum. Parameters ---------- h : Tensor [shape=(..., N)] The truncated minimum-phase impulse response. cep_order : int >= 0 The order of the cepstrum, :math:`M`. n_fft : int >> N The number of FFT bins used for conversion. The accurate conversion requires the large value. Returns ------- out : Tensor [shape=(..., M+1)] The cepstral coefficients. """ return nn.MinimumPhaseImpulseResponseToCepstrum._func( h, cep_order=cep_order, n_fft=n_fft )
[docs] def ndps2c(n: Tensor, cep_order: int) -> Tensor: """Convert NPDS to cepstrum. Parameters ---------- n : Tensor [shape=(..., L/2+1)] The NDPS, where :math:`L` is the number of FFT bins. cep_order : int >= 0 The order of the cepstrum, :math:`M`. Returns ------- out : Tensor [shape=(..., M+1)] The cepstrum. """ return nn.NegativeDerivativeOfPhaseSpectrumToCepstrum._func(n, cep_order=cep_order)
[docs] def norm0(a: Tensor) -> Tensor: """Convert all-pole to all-zero filter coefficients vice versa. Parameters ---------- a : Tensor [shape=(..., M+1)] The all-pole or all-zero filter coefficients. Returns ------- out : Tensor [shape=(..., M+1)] The all-zero or all-pole filter coefficients. """ return nn.AllPoleToAllZeroDigitalFilterCoefficients._func(a)
[docs] def par2is(k: Tensor) -> Tensor: """Convert PARCOR to IS. Parameters ---------- k : Tensor [shape=(..., M+1)] The PARCOR coefficients. Returns ------- out : Tensor [shape=(..., M+1)] The inverse sine coefficients. """ return nn.ParcorCoefficientsToInverseSine._func(k)
[docs] def par2lar(k: Tensor) -> Tensor: """Convert PARCOR to LAR. Parameters ---------- k : Tensor [shape=(..., M+1)] The PARCOR coefficients. Returns ------- out : Tensor [shape=(..., M+1)] The log area ratio. """ return nn.ParcorCoefficientsToLogAreaRatio._func(k)
[docs] def par2lpc(k: Tensor, gamma: float = 1, c: int | None = None) -> Tensor: """Convert PARCOR to LPC. Parameters ---------- k : Tensor [shape=(..., M+1)] The PARCOR coefficients. gamma : float in [-1, 1] The gamma parameter, :math:`\\gamma`. c : int >= 1 or None The number of filter stages. Returns ------- out : Tensor [shape=(..., M+1)] The LPC coefficients. """ return nn.ParcorCoefficientsToLinearPredictiveCoefficients._func( k, gamma=gamma, c=c )
[docs] def phase( b: Tensor | None = None, a: Tensor | None = None, *, fft_length: int = 512, unwrap: bool = False, ) -> Tensor: """Compute phase spectrum. Parameters ---------- b : Tensor [shape=(..., M+1)] or None The numerator coefficients. a : Tensor [shape=(..., N+1)] or None The denominator coefficients. fft_length : int >= 2 The number of FFT bins, :math:`L`. unwrap : bool If True, perform the phase unwrapping. Returns ------- out : Tensor [shape=(..., L/2+1)] The phase spectrum [:math:`\\pi` rad]. """ return nn.Phase._func(b, a, fft_length=fft_length, unwrap=unwrap)
[docs] def plp( x: Tensor, plp_order: int, n_channel: int, sample_rate: int, compression_factor: float = 0.33, lifter: int = 1, f_min: float = 0, f_max: float | None = None, floor: float = 1e-5, gamma: float = 0, n_fft: int = 512, out_format: str = "y", ) -> Tensor: """Compute the MFCC from the power spectrum. Parameters ---------- x : Tensor [shape=(..., L/2+1)] The power spectrum. plp_order : int >= 1 The order of the PLP, :math:`M`. n_channel : int >= 1 The number of mel filter banks, :math:`C`. sample_rate : int >= 1 The sample rate in Hz. compression_factor : float > 0 The amplitude compression factor. lifter : int >= 1 The liftering coefficient. f_min : float >= 0 The minimum frequency in Hz. f_max : float <= sample_rate // 2 The maximum frequency in Hz. floor : float > 0 The minimum mel filter bank output in linear scale. gamma : float in [-1, 1] The parameter of the generalized logarithmic function. n_fft : int >> M The number of FFT bins for the conversion from LPC to cepstrum. out_format : ['y', 'yE', 'yc', 'ycE'] `y` is MFCC, `c` is C0, and `E` is energy. Returns ------- y : Tensor [shape=(..., M)] The MFCC without C0. E : Tensor [shape=(..., 1)] (optional) The energy. c : Tensor [shape=(..., 1)] (optional) The C0. """ return nn.PerceptualLinearPredictiveCoefficientsAnalysis._func( x, plp_order=plp_order, n_channel=n_channel, sample_rate=sample_rate, compression_factor=compression_factor, lifter=lifter, f_min=f_min, f_max=f_max, floor=floor, gamma=gamma, n_fft=n_fft, out_format=out_format, )
[docs] def pnorm(x: Tensor, alpha: float = 0, ir_length: int = 128) -> Tensor: """Perform cepstrum power normalization. Parameters ---------- x : Tensor [shape=(..., M+1)] The input mel-cepstrum. alpha : float in (-1, 1) The frequency warping factor, :math:`\\alpha`. ir_length : int >= 1 The length of the impulse response. Returns ------- out : Tensor [shape=(..., M+2)] The power-normalized cepstrum. """ return nn.MelCepstrumPowerNormalization._func(x, alpha=alpha, ir_length=ir_length)
[docs] def pol_root( x: Tensor, *, eps: float | None = None, in_format: str = "rectangular" ) -> Tensor: """Convert roots to polynomial coefficients. Parameters ---------- x : Tensor [shape=(..., M)] The roots, can be complex. eps : float >= 0 or None If the absolute values of the imaginary parts of the polynomial coefficients are all less than this value, they are considered as real numbers. in_format : ['rectangular', 'polar'] The input format. Returns ------- out : Tensor [shape=(..., M+1)] The polynomial coefficients. """ return nn.RootsToPolynomial._func(x, eps=eps, in_format=in_format)
[docs] def poledf( x: Tensor, a: Tensor, frame_period: int = 80, ignore_gain: bool = False ) -> Tensor: """Apply an all-pole digital filter. Parameters ---------- x : Tensor [shape=(..., T)] The excitation signal. a : Tensor [shape=(..., T/P, M+1)] The filter coefficients. frame_period : int >= 1 The frame period in samples, :math:`P`. ignore_gain : bool If True, perform filtering without the gain. Returns ------- out : Tensor [shape=(..., T)] The output signal. """ return nn.AllPoleDigitalFilter._func( x, a, frame_period=frame_period, ignore_gain=ignore_gain )
[docs] def quantize( x: Tensor, abs_max: float = 1, n_bit: int = 8, quantizer: str = "mid-rise" ) -> Tensor: """Quantize the input waveform. Parameters ---------- x : Tensor [shape=(...,)] The input waveform. abs_max : float > 0 The absolute maximum value of the input waveform. n_bit : int >= 1 The number of quantization bits. quantizer : ['mid-rise', 'mid-tread'] The quantizer type. Returns ------- out : Tensor [shape=(...,)] The quantized waveform. """ return nn.UniformQuantization._func( x, abs_max=abs_max, n_bit=n_bit, quantizer=quantizer )
[docs] def rlevdur(a: Tensor) -> Tensor: """Solve a Yule-Walker linear system given the LPC coefficients. Parameters ---------- a : Tensor [shape=(..., M+1)] The gain and the LPC coefficients. Returns ------- out : Tensor [shape=(..., M+1)] The autocorrelation. """ return nn.ReverseLevinsonDurbin._func(a)
[docs] def rmse(x: Tensor, y: Tensor, reduction: str = "mean") -> Tensor: """Calculate RMSE. Parameters ---------- x : Tensor [shape=(..., D)] The input. y : Tensor [shape=(..., D)] The target. reduction : ['none', 'mean', 'sum'] The reduction type. Returns ------- out : Tensor [shape=(...,) or scalar] The RMSE. """ return nn.RootMeanSquareError._func(x, y, reduction=reduction)
[docs] def root_pol( a: Tensor, *, eps: float | None = None, out_format: str = "rectangular" ) -> Tensor: """Compute roots of polynomial. Parameters ---------- a : Tensor [shape=(..., M+1)] The polynomial coefficients. eps : float >= 0 or None If the absolute values of the imaginary parts of the roots are all less than this value, they are considered as real roots. out_format : ['rectangular', 'polar'] Output format. Returns ------- out : Tensor [shape=(..., M)] The roots. """ return nn.PolynomialToRoots._func( a, eps=eps, out_format=out_format, )
[docs] def smcep( x: Tensor, cep_order: int, alpha: float = 0, theta: float = 0, n_iter: int = 0, accuracy_factor: int = 4, ) -> Tensor: """Perform mel-cepstral analysis. Parameters ---------- x : Tensor [shape=(..., L/2+1)] The power spectrum. cep_order : int >= 0 The order of the cepstrum, :math:`M`. alpha : float in (-1, 1) The frequency warping factor, :math:`\\alpha`. theta : float in [0, 1] The emphasis frequency, :math:`\\theta`. n_iter : int >= 0 The number of iterations. accuracy_factor : int >= 1 The accuracy factor multiplied by the FFT length. Returns ------- out : Tensor [shape=(..., M+1)] The mel-cepstrum. """ return nn.SecondOrderAllPassMelCepstralAnalysis._func( x, cep_order=cep_order, alpha=alpha, theta=theta, n_iter=n_iter, accuracy_factor=accuracy_factor, )
[docs] def snr( s: Tensor, sn: Tensor, frame_length: int | None = None, full: bool = False, reduction: str = "mean", eps: float = 1e-8, ) -> Tensor: """Calculate SNR. Parameters ---------- s : Tensor [shape=(..., T)] The signal. sn : Tensor [shape=(..., T)] The signal with noise. frame_length : int >= 1 or None The frame length in samples, :math:`L`. If given, calculate the segmental SNR. full : bool If True, include the constant term in the SNR calculation. reduction : ['none', 'mean', 'sum'] The reduction type. eps : float >= 0 A small value to avoid NaN. Returns ------- out : Tensor [shape=(...,) or scalar] The SNR. """ return nn.SignalToNoiseRatio._func( s, sn, frame_length=frame_length, full=full, reduction=reduction, eps=eps )
[docs] def spec( b: Tensor | None = None, a: Tensor | None = None, *, fft_length: int = 512, eps: float = 0, relative_floor: float | None = None, out_format: str = "power", ) -> Tensor: """Compute spectrum. Parameters ---------- b : Tensor [shape=(..., M+1)] or None The numerator coefficients. a : Tensor [shape=(..., N+1)] or None The denominator coefficients. fft_length : int >= 2 The number of FFT bins, :math:`L`. eps : float >= 0 A small value added to the power spectrum. relative_floor : float < 0 or None The relative floor of the power spectrum in dB. out_format : ['db', 'log-magnitude', 'magnitude', 'power'] The output format. Returns ------- out : Tensor [shape=(..., L/2+1)] The spectrum. """ return nn.Spectrum._func( b, a, fft_length=fft_length, eps=eps, relative_floor=relative_floor, out_format=out_format, )
[docs] def stft( x: Tensor, *, frame_length: int = 400, frame_period: int = 80, fft_length: int = 512, center: bool = True, zmean: bool = False, mode: str = "constant", window: str = "blackman", norm: str = "power", symmetric: bool = True, eps: float = 1e-9, relative_floor: float | None = None, out_format: str = "power", ) -> Tensor: """Compute short-time Fourier transform. Parameters ---------- x : Tensor [shape=(..., T)] The input waveform. frame_length : int >= 1 The frame length in samples, :math:`L`. frame_period : int >= 1 The frame period in samples, :math:`P`. fft_length : int >= L The number of FFT bins, :math:`N`. center : bool If True, pad the input on both sides so that the frame is centered. zmean : bool If True, perform mean subtraction on each frame. mode : ['constant', 'reflect', 'replicate', 'circular'] The padding method. window : ['blackman', 'hamming', 'hanning', 'bartlett', 'trapezoidal', \ 'rectangular', 'nuttall'] The window type. norm : ['none', 'power', 'magnitude'] The normalization type of the window. symmetric : bool If True, the window is symmetric, otherwise periodic. eps : float >= 0 A small value added to the power spectrum. relative_floor : float < 0 or None The relative floor of the power spectrum in dB. out_format : ['db', 'log-magnitude', 'magnitude', 'power', 'complex'] The output format. Returns ------- out : Tensor [shape=(..., T/P, N/2+1)] The output spectrogram. """ return nn.ShortTimeFourierTransform._func( x, frame_length=frame_length, frame_period=frame_period, fft_length=fft_length, center=center, zmean=zmean, mode=mode, window=window, norm=norm, symmetric=symmetric, eps=eps, relative_floor=relative_floor, out_format=out_format, )
[docs] def ulaw(x: Tensor, abs_max: float = 1, mu: int = 255) -> Tensor: """Compress the input waveform using the :math:`\\mu`-law algorithm. Parameters ---------- x : Tensor [shape=(...,)] The input waveform. abs_max : float > 0 The absolute maximum value of the input waveform. mu : int >= 1 The compression factor, :math:`\\mu`. Returns ------- out : Tensor [shape=(...,)] The compressed waveform. """ return nn.MuLawCompression._func(x, abs_max=abs_max, mu=mu)
[docs] def unframe( y: Tensor, out_length: int | None = None, *, frame_period: int = 80, center: bool = True, window: str = "rectangular", norm: str = "none", symmetric: bool = True, ) -> Tensor: """Revert framed waveform. Parameters ---------- y : Tensor [shape=(..., T/P, L)] The framed waveform. out_length : int >= 1 or None The length of the original waveform, :math:`T`. frame_peirod : int >= 1 The frame period in samples, :math:`P`. center : bool If True, pad the input on both sides so that the frame is centered. window : ['blackman', 'hamming', 'hanning', 'bartlett', 'trapezoidal', \ 'rectangular', 'nuttall'] The window type. norm : ['none', 'power', 'magnitude'] The normalization type of the window. symmetric : bool If True, the window is symmetric, otherwise periodic. Returns ------- out : Tensor [shape=(..., T)] The unframed waveform. """ return nn.Unframe._func( y, out_length=out_length, frame_period=frame_period, center=center, window=window, norm=norm, symmetric=symmetric, )
[docs] def wht(x: Tensor, wht_type: str = "natural") -> Tensor: """Apply WHT to the input. Parameters ---------- x : Tensor [shape=(..., L)] The input. wht_type : ['sequency', 'natural', 'dyadic'] The order of the coefficients in the Walsh matrix. Returns ------- out : Tensor [shape=(..., L)] The WHT output. """ return nn.WalshHadamardTransform._func(x, wht_type=wht_type)
[docs] def window( x: Tensor, out_length: int | None = None, *, window: str = "blackman", norm: str = "power", symmetric: bool = True, ) -> Tensor: """Apply a window function to the given waveform. Parameters ---------- x : Tensor [shape=(..., L1)] The input framed waveform. out_length : int >= L1 or None The output length, :math:`L_2`. If :math:`L_2 > L_1`, output is zero-padded. If None, :math:`L_2 = L_1`. window : ['blackman', 'hamming', 'hanning', 'bartlett', 'trapezoidal', \ 'rectangular', 'nuttall'] The window type. norm : ['none', 'power', 'magnitude'] The normalization type of the window. symmetric : bool If True, the window is symmetric, otherwise periodic. Returns ------- out : Tensor [shape=(..., L2)] The windowed waveform. """ return nn.Window._func( x, out_length=out_length, window=window, norm=norm, symmetric=symmetric )
def yingram( x: Tensor, sample_rate: int = 22050, lag_min: int = 22, lag_max: int | None = None, n_bin: int = 20, ) -> Tensor: """Compute the YIN derivatives from the waveform. Parameters ---------- x : Tensor [shape=(..., L)] The framed waveform. sample_rate : int >= 8000 The sample rate in Hz. lag_min : int >= 1 The minimum lag in points. lag_max : int < L The maximum lag in points. n_bin : int >= 1 The number of bins to represent a semitone range. Returns ------- out : Tensor [shape=(..., M)] The Yingram. """ return nn.Yingram._func( x, sample_rate=sample_rate, lag_min=lag_min, lag_max=lag_max, n_bin=n_bin )
[docs] def zcross( x: Tensor, frame_length: int, norm: bool = False, softness: float = 1e-3 ) -> Tensor: """Compute zero-crossing rate. Parameters ---------- x : Tensor [shape=(..., T)] The input waveform. frame_length : int >= 1 The frame length in samples, :math:`L`. norm : bool If True, divide the zero-crossing rate by the frame length. softness : float > 0 A smoothing parameter. The smaller value makes the output closer to the true zero-crossing rate, but the gradient vanishes. Returns ------- out : Tensor [shape=(..., T/L)] The zero-crossing rate. """ return nn.ZeroCrossingAnalysis._func( x, frame_length=frame_length, norm=norm, softness=softness )
[docs] def zerodf( x: Tensor, b: Tensor, frame_period: int = 80, ignore_gain: bool = False ) -> Tensor: """Apply an all-zero digital filter. Parameters ---------- x : Tensor [shape=(..., T)] The excitation signal. b : Tensor [shape=(..., T/P, M+1)] The filter coefficients. frame_period : int >= 1 The frame period in samples, :math:`P`. ignore_gain : bool If True, perform filtering without the gain. Returns ------- out : Tensor [shape=(..., T)] The output signal. """ return nn.AllZeroDigitalFilter._func( x, b, frame_period=frame_period, ignore_gain=ignore_gain )