-
Notifications
You must be signed in to change notification settings - Fork 92
/
utils.py
88 lines (63 loc) · 2.37 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import copy
import torch
import numpy as np
from scipy import signal
from librosa.filters import mel
from scipy.signal import get_window
def butter_highpass(cutoff, fs, order=5):
nyq = 0.5 * fs
normal_cutoff = cutoff / nyq
b, a = signal.butter(order, normal_cutoff, btype='high', analog=False)
return b, a
def pySTFT(x, fft_length=1024, hop_length=256):
x = np.pad(x, int(fft_length//2), mode='reflect')
noverlap = fft_length - hop_length
shape = x.shape[:-1]+((x.shape[-1]-noverlap)//hop_length, fft_length)
strides = x.strides[:-1]+(hop_length*x.strides[-1], x.strides[-1])
result = np.lib.stride_tricks.as_strided(x, shape=shape,
strides=strides)
fft_window = get_window('hann', fft_length, fftbins=True)
result = np.fft.rfft(fft_window * result, n=fft_length).T
return np.abs(result)
def speaker_normalization(f0, index_nonzero, mean_f0, std_f0):
# f0 is logf0
f0 = f0.astype(float).copy()
#index_nonzero = f0 != 0
f0[index_nonzero] = (f0[index_nonzero] - mean_f0) / std_f0 / 4.0
f0[index_nonzero] = np.clip(f0[index_nonzero], -1, 1)
f0[index_nonzero] = (f0[index_nonzero] + 1) / 2.0
return f0
def quantize_f0_numpy(x, num_bins=256):
# x is logf0
assert x.ndim==1
x = x.astype(float).copy()
uv = (x<=0)
x[uv] = 0.0
assert (x >= 0).all() and (x <= 1).all()
x = np.round(x * (num_bins-1))
x = x + 1
x[uv] = 0.0
enc = np.zeros((len(x), num_bins+1), dtype=np.float32)
enc[np.arange(len(x)), x.astype(np.int32)] = 1.0
return enc, x.astype(np.int64)
def quantize_f0_torch(x, num_bins=256):
# x is logf0
B = x.size(0)
x = x.view(-1).clone()
uv = (x<=0)
x[uv] = 0
assert (x >= 0).all() and (x <= 1).all()
x = torch.round(x * (num_bins-1))
x = x + 1
x[uv] = 0
enc = torch.zeros((x.size(0), num_bins+1), device=x.device)
enc[torch.arange(x.size(0)), x.long()] = 1
return enc.view(B, -1, num_bins+1), x.view(B, -1).long()
def get_mask_from_lengths(lengths, max_len):
ids = torch.arange(0, max_len, device=lengths.device)
mask = (ids >= lengths.unsqueeze(1)).bool()
return mask
def pad_seq_to_2(x, len_out=128):
len_pad = (len_out - x.shape[1])
assert len_pad >= 0
return np.pad(x, ((0,0),(0,len_pad),(0,0)), 'constant'), len_pad