-
Notifications
You must be signed in to change notification settings - Fork 0
/
loop.py
209 lines (185 loc) · 9.29 KB
/
loop.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
import os
import subprocess
import argparse
from secrets import randbelow as rand_below
import csv
from datetime import datetime
from mutator import havoc
# 0th queue cycle
# 1. Put all input files into the queue
# 2. Run lzbench on all files in the queue without mutation, record performance numbers
# 3. Save these original file-perf tuples somewhere else, or flag as original files
# 1st ~ nth queue cycle
# 1. Randomly select a file from the queue, mutate, and run lzbench
# 2. Put to the queue if unique value (Leave only one file per 10MB/s range),
# otherwise discard
# 1: Number of loops to run?
# --> Default 2, but can be changed by the user
# 2: Per one run, how many "mutation+run"s to perform?
# --> One.
# 3: Do we run the feedback loop for all the files in the queue,
# or only for the files that are newly added to the queue?
# --> Most recent mutation that resulted in unique performance per each original file
parser = argparse.ArgumentParser()
parser.add_argument("--algo", help="zstd or snappy", default="zstd", type=str)
parser.add_argument("--cord", help="compress or decompress", default="decompress", type=str)
parser.add_argument("--queue-cycles", help="Queue cycles for the whole benchmark files", \
default=2, type=int)
parser.add_argument("--n", help="Number of mutations to run per file", \
default=1, type=int)
parser.add_argument("--newonly", help="Whether to only run loop on newly found files or not", \
action='store_true', default=False)
parser.add_argument("--bigfiles", help="If true, run loop on 4KB+ files. If false, run on 1KB files", \
action='store_true', default=False)
parser.add_argument("--no-delete", help="Exclude deletion in havoc", \
action='store_true', default=False)
args = parser.parse_args()
basedir = '/home/junsun2/fuzz/'
setting = ''
setting += ('ZSTD' if args.algo=='zstd' else 'Snappy')
setting += '-'
setting += ('COMPRESS' if args.cord=='compress' else 'DECOMPRESS')
setting += '-big' if args.bigfiles else '-1KB'
benchmark_dir = basedir + 'afl_in/' + setting
benchmark_dir_mutate = basedir + 'afl_in/mutate/newonly/' + setting \
if args.newonly else \
basedir + 'afl_in/mutate/' + setting
lzbench_binary_path = basedir + 'lzbench/lzbench'
lzbench_result_path = basedir + 'lzbench_result/' + args.algo + '_' + args.cord + '_' + \
'cycle' + str(args.queue_cycles) + '_n' + str(args.n)
lzbench_result_path += ('_big_' if args.bigfiles else '_')
lzbench_result_path += 'lzbench_result' + ('newonly.log' if args.newonly else '.log')
fuzz_result_path = basedir + 'result/' + args.algo + '_' + args.cord + '_' + \
'cycle' + str(args.queue_cycles) + '_n' + str(args.n)
if args.bigfiles:
fuzz_result_path += '_big'
fuzz_result_path += '_newonly.csv' if args.newonly else '.csv'
file_queue_dict = dict()
perf_dict = dict()
# Algorithm should be a string all in small cases
# Return throughput and compression ratio
def run_lzbench(input_path, compress_or_decompress):
if args.algo == 'zstd':
comp_level = int(input_path.split('/')[-1].split('_')[1][2:])
result = subprocess.run(\
f"{lzbench_binary_path} -ezstd,{comp_level} -t1,1 -o4 {input_path} > {lzbench_result_path}", \
shell=True)
elif args.algo == 'snappy':
result = subprocess.run(\
f"{lzbench_binary_path} -esnappy -t1,1 -o4 {input_path} > {lzbench_result_path}", \
shell=True)
# lzbench_result.log format is like:
# Compressor name,Compression speed,Decompression speed,Original size,Compressed size,Ratio,Filename
# memcpy line
# zstd 1.5.2 -1,101.11,253.59,1024,655,63.96,afl_in/ZSTD-DECOMPRESS/009999_cl1_ws10
with open(lzbench_result_path, 'r') as file:
lines = file.readlines()
third_line = lines[2]
throughput = 0.0
if compress_or_decompress == 'compress':
throughput = float(third_line.split(',')[1])
else:
throughput = float(third_line.split(',')[2])
uncomp_size = int(third_line.split(',')[3])
comp_ratio = float(third_line.split(',')[5])
return throughput, comp_ratio, uncomp_size
def write_result(file_queue_dict):
with open(fuzz_result_path, 'w') as file:
writer = csv.writer(file)
writer.writerow(['original_file', 'throughput', 'uncomp_size', 'comp_ratio', 'comp_level', 'mutation_cycle'])
for filename in file_queue_dict.keys():
for file_perf_dict in file_queue_dict[filename]:
# file_queue_dict[filename] is a list of dicts
writer.writerow([file_perf_dict['original_file'], file_perf_dict['throughput'], \
file_perf_dict['uncomp_size'], file_perf_dict['comp_ratio'], \
file_perf_dict['comp_level'], file_perf_dict['mutation_cycle']])
def main():
# Create a queue.
# Each element is a list of dictionaries of the following format:
# [original_file, throughput, uncomp_size, comp_ratio, comp_level, mutation cycle]
# file_queue_dict = dict()
# Create a list of performance numbers
# Contain only 1 file per 10MB/s (ex: 100~110MB/s)
# perf_dict = dict()
new_file_count = 0
filelist = os.listdir(benchmark_dir)
subprocess.run(f"rm -rf {benchmark_dir_mutate}", shell=True)
subprocess.run(f"mkdir {benchmark_dir_mutate}", shell=True)
# Do for the 0th queue cycle
print('Queue cycle: ', 0, datetime.now().time())
filecount = 0
for filename in filelist:
filecount += 1
if filecount % 1000 == 0:
print('filecount is ', filecount, datetime.now().time())
# filename is like '009488_cl1_ws10'
comp_level = int(filename.split('_')[1][2:])
filepath = benchmark_dir + '/' + filename
throughput, uncomp_size, comp_ratio = run_lzbench(filepath, args.cord)
perf_dict[int(throughput)//10] = throughput
file_queue_dict[filename] = \
[{'original_file': filename,
'throughput': throughput,
'comp_ratio': comp_ratio,
'uncomp_size': uncomp_size,
'comp_level': comp_level,
'mutation_cycle': 0}]
subprocess.run(f"cp {benchmark_dir}/{filename} {benchmark_dir_mutate}/{filename}--0", \
shell=True)
# Backup the performance numbers of the original benchmark files
perf_dict_original = perf_dict.copy()
# Do for the 1th ~ nth queue cycle
for queue_cycle in range(1, 1+args.queue_cycles):
print('Queue cycle: ', queue_cycle, datetime.now().time())
filecount = 0
# For all files in the benchmark directory, do the following:
# (Originally, we have to select random files from the benchmark directory)
for filename in filelist:
filecount += 1
if filecount % 1000 == 0:
print('filecount is ', filecount, datetime.now().time())
# Select a file from the last queue cycle (most recent version)
most_recent_filename = file_queue_dict[filename][-1]['original_file'] + '--' + \
str(file_queue_dict[filename][-1]['mutation_cycle'])
most_recent_filepath = benchmark_dir_mutate + '/' + most_recent_filename
new_filename = file_queue_dict[filename][-1]['original_file'] + '--' + \
str(queue_cycle)
new_filepath = benchmark_dir_mutate + '/' + new_filename
condition = (args.newonly and \
file_queue_dict[filename][-1]['mutation_cycle']+1 == queue_cycle) or \
(not args.newonly)
if condition:
# Copy the original file
subprocess.run(f"cp {most_recent_filepath} {new_filepath}", \
shell=True)
# Mutate the file n times
for i in range(args.n):
if args.no_delete:
havoc(rand_below(57), new_filepath)
else:
havoc(rand_below(65), new_filepath)
# Run lzbench on the new file
comp_level = int(new_filename.split('_')[1][2:])
# comp_level should match filename or most_recent_filename's split('_')[1][2:]
throughput, comp_ratio, uncomp_size = run_lzbench(new_filepath, args.cord)
# If the new file is unique, add to the queue
# Otherwise, discard
if int(throughput)//10 not in perf_dict.keys():
perf_dict[int(throughput)//10] = throughput
original_filename = file_queue_dict[filename][-1]['original_file']
assert filename==original_filename, "filename should match with original_filename"
file_queue_dict[filename].append(
{'original_file': original_filename,
'throughput': throughput,
'comp_ratio': comp_ratio,
'uncomp_size': uncomp_size,
'comp_level': comp_level,
'mutation_cycle': queue_cycle})
new_file_count += 1
else:
subprocess.run(f"rm {new_filepath}", shell=True)
# Checkpoint the results per every queue cycle
print('New files found: ', new_file_count)
write_result(file_queue_dict)
if __name__ == "__main__":
main()