-
Notifications
You must be signed in to change notification settings - Fork 6
/
attack.py
166 lines (138 loc) · 6.21 KB
/
attack.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
from stft import STFT, magphase
import torch.nn as nn
import torch
import Levenshtein
import torchaudio
import numpy as np
import matplotlib.pyplot as plt
def target_sentence_to_label(sentence, labels="_'ABCDEFGHIJKLMNOPQRSTUVWXYZ "):
out = []
for word in sentence:
out.append(labels.index(word))
return torch.IntTensor(out)
def torch_spectrogram(sound, torch_stft):
real, imag = torch_stft(sound)
mag, cos, sin = magphase(real, imag)
mag = torch.log1p(mag)
mean = mag.mean()
std = mag.std()
mag = mag - mean
mag = mag / std
mag = mag.permute(0,1,3,2)
return mag
class Attacker:
def __init__(self, model, sound, target, decoder, sample_rate=16000, device="cpu", save=None):
"""
model: deepspeech model
sound: raw sound data [-1 to +1] (read from torchaudio.load)
label: string
"""
self.sound = sound
self.sample_rate = sample_rate
self.target_string = target
self.target = target
self.__init_target()
self.model = model
self.model.to(device)
self.model.train()
self.decoder = decoder
self.criterion = nn.CTCLoss()
self.device = device
n_fft = int(self.sample_rate * 0.02)
hop_length = int(self.sample_rate * 0.01)
win_length = int(self.sample_rate * 0.02)
self.torch_stft = STFT(n_fft=n_fft , hop_length=hop_length, win_length=win_length , window='hamming', center=True, pad_mode='reflect', freeze_parameters=True, device=self.device)
self.save = save
def get_ori_spec(self, save=None):
spec = torch_spectrogram(self.sound.to(self.device), self.torch_stft)
plt.imshow(spec.cpu().numpy()[0][0])
if save:
plt.savefig(save)
plt.clf()
else:
plt.show()
def get_adv_spec(self, save=None):
spec = torch_spectrogram(self.perturbed_data.to(self.device), self.torch_stft)
plt.imshow(spec.cpu().numpy()[0][0])
if save:
plt.savefig(save)
plt.clf()
else:
plt.show()
# prepare
def __init_target(self):
self.target = target_sentence_to_label(self.target)
self.target = self.target.view(1,-1)
self.target_lengths = torch.IntTensor([self.target.shape[1]]).view(1,-1)
# FGSM
def fgsm_attack(self, sound, epsilon, data_grad):
# find direction of gradient
sign_data_grad = data_grad.sign()
# add noise "epilon * direction" to the ori sound
perturbed_sound = sound - epsilon * sign_data_grad
return perturbed_sound
# PGD
def pgd_attack(self, sound, ori_sound, eps, alpha, data_grad) :
adv_sound = sound - alpha * data_grad.sign() # + -> - !!!
eta = torch.clamp(adv_sound - ori_sound.data, min=-eps, max=eps)
sound = ori_sound + eta
return sound
def attack(self, epsilon, alpha, attack_type = "FGSM", PGD_round=40):
print("Start attack")
data, target = self.sound.to(self.device), self.target.to(self.device)
data_raw = data.clone().detach()
# initial prediction
spec = torch_spectrogram(data, self.torch_stft)
input_sizes = torch.IntTensor([spec.size(3)]).int()
out, output_sizes = self.model(spec, input_sizes)
decoded_output, decoded_offsets = self.decoder.decode(out, output_sizes)
original_output = decoded_output[0][0]
print(f"Original prediction: {decoded_output[0][0]}")
# ATTACK
############ ATTACK GENERATION ##############
if attack_type == "FGSM":
data.requires_grad = True
spec = torch_spectrogram(data, self.torch_stft)
input_sizes = torch.IntTensor([spec.size(3)]).int()
out, output_sizes = self.model(spec, input_sizes)
out = out.transpose(0, 1) # TxNxH
out = out.log_softmax(2)
loss = self.criterion(out, self.target, output_sizes, self.target_lengths)
self.model.zero_grad()
loss.backward()
data_grad = data.grad.data
perturbed_data = self.fgsm_attack(data, epsilon, data_grad)
elif attack_type == "PGD":
for i in range(PGD_round):
print(f"PGD processing ... {i+1} / {PGD_round}", end="\r")
data.requires_grad = True
spec = torch_spectrogram(data, self.torch_stft)
input_sizes = torch.IntTensor([spec.size(3)]).int()
out, output_sizes = self.model(spec, input_sizes)
out = out.transpose(0, 1) # TxNxH
out = out.log_softmax(2)
loss = self.criterion(out, self.target, output_sizes, self.target_lengths)
self.model.zero_grad()
loss.backward()
data_grad = data.grad.data
data = self.pgd_attack(data, data_raw, epsilon, alpha, data_grad).detach_()
perturbed_data = data
############ ATTACK GENERATION ##############
# prediction of adversarial sound
spec = torch_spectrogram(perturbed_data, self.torch_stft)
input_sizes = torch.IntTensor([spec.size(3)]).int()
out, output_sizes = self.model(spec, input_sizes)
decoded_output, decoded_offsets = self.decoder.decode(out, output_sizes)
final_output = decoded_output[0][0]
perturbed_data = perturbed_data.detach()
abs_ori = 20*np.log10(np.sqrt(np.mean(np.absolute(data_raw.cpu().numpy())**2)))
abs_after = 20*np.log10(np.sqrt(np.mean(np.absolute(perturbed_data.cpu().numpy())**2)))
db_difference = abs_after-abs_ori
l_distance = Levenshtein.distance(self.target_string, final_output)
print(f"Max Decibel Difference: {db_difference:.4f}")
print(f"Adversarial prediction: {decoded_output[0][0]}")
print(f"Levenshtein Distance {l_distance}")
if self.save:
torchaudio.save(self.save, src=perturbed_data.cpu(), sample_rate=self.sample_rate)
self.perturbed_data = perturbed_data
return db_difference, l_distance, self.target_string, final_output