This repository has been archived by the owner on Apr 5, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 6
/
utils.py
108 lines (98 loc) · 3.88 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
from __future__ import division
import tensorflow as tf
from keras import backend as K
def data_gen(audio_processor, sess,
batch_size=128,
background_frequency=0.3, background_volume_range=0.15,
foreground_frequency=0.3, foreground_volume_range=0.15,
time_shift_frequency=0.3, time_shift_range=[-500, 0],
mode='validation', pseudo_frequency=0.33, flip_frequency=0.0,
silence_volume_range=0.3):
ep_count = 0
offset = 0
if mode != 'training':
background_frequency = 0.0
background_volume_range = 0.0
foreground_frequency = 0.0
foreground_volume_range = 0.0
pseudo_frequency = 0.0
time_shift_frequency = 0.0
time_shift_range = [0, 0]
flip_frequency = 0.0
# silence_volume_range: stays the same for validation
while True:
X, y = audio_processor.get_data(
how_many=batch_size, offset=0 if mode == 'training' else offset,
background_frequency=background_frequency,
background_volume_range=background_volume_range,
foreground_frequency=foreground_frequency,
foreground_volume_range=foreground_volume_range,
time_shift_frequency=time_shift_frequency,
time_shift_range=time_shift_range,
mode=mode, sess=sess,
pseudo_frequency=pseudo_frequency,
flip_frequency=flip_frequency,
silence_volume_range=silence_volume_range)
offset += batch_size
if offset > audio_processor.set_size(mode) - batch_size:
offset = 0
# if mode == 'training':
# if 20 >= ep_count:
# pseudo_frequency = 1.0
# elif 30 >= ep_count > 20:
# pseudo_frequency = 0.7
# elif 40 >= ep_count > 30:
# pseudo_frequency = 0.4
# else:
# pseudo_frequency = 0.2
print("\n[Ep:%03d: %s-mode]: Pseudo: %.3f"
% (ep_count, mode, pseudo_frequency))
ep_count += 1
yield X, y
def tf_roll(a, shift, a_len=16000):
# https://stackoverflow.com/questions/42651714/vector-shift-roll-in-tensorflow
def roll_left(a, shift, a_len):
shift %= a_len
rolled = tf.concat(
[a[a_len - shift:, :], a[:a_len - shift, :]], axis=0)
return rolled
def roll_right(a, shift, a_len):
shift = -shift
shift %= a_len
rolled = tf.concat([a[shift:, :], a[:shift, :]], axis=0)
return rolled
# https://stackoverflow.com/questions/35833011/how-to-add-if-condition-in-a-tensorflow-graph
return tf.cond(
tf.greater_equal(shift, 0),
true_fn=lambda: roll_left(a, shift, a_len),
false_fn=lambda: roll_right(a, shift, a_len))
def center_crop(data, desired_size=16000):
if data.ndim == 1:
left = (len(data) - desired_size) // 2
return data[left: left + desired_size]
elif data.ndim == 2:
left = (data.shape[1] - desired_size) // 2
return data[:, left: left + desired_size]
else:
raise RuntimeError("Invalid tensor shape: %s" % (list(data.shape)))
def smooth_categorical_crossentropy(
target, output, from_logits=False, label_smoothing=0.0):
"""Categorical crossentropy between an output tensor and a target tensor.
# Arguments
target: A tensor of the same shape as `output`.
output: A tensor resulting from a softmax
(unless `from_logits` is True, in which
case `output` is expected to be the logits).
from_logits: Boolean, whether `output` is the
result of a softmax, or is a tensor of logits.
# Returns
Output tensor.
"""
# Note: tf.nn.softmax_cross_entropy_with_logits
# expects logits, Keras expects probabilities.
if not from_logits:
_epsilon = tf.convert_to_tensor(K.epsilon(), output.dtype.base_dtype)
output = tf.clip_by_value(output, _epsilon, 1 - _epsilon)
output = tf.log(output)
return tf.losses.softmax_cross_entropy(
target, output, label_smoothing=label_smoothing)