-
Notifications
You must be signed in to change notification settings - Fork 17
/
main_stream_layer_splitting.py
122 lines (107 loc) · 5.06 KB
/
main_stream_layer_splitting.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
""" This main file uses a fixed layer-core allocation with the last layer split across multiple cores"""
import logging as _logging
import pickle
import re
from zigzag.stages.main import MainStage
from stream.stages.allocation.genetic_algorithm_allocation import GeneticAlgorithmAllocationStage
from stream.stages.estimation.zigzag_core_mapping_estimation import ZigZagCoreMappingEstimationStage
from stream.stages.generation.hint_loops_partitioned_workload_generation import (
HintLoopsPartitionedWorkloadGenerationStage,
)
from stream.stages.parsing.accelerator_parser import AcceleratorParserStage as AcceleratorParserStage_
from stream.stages.parsing.onnx_model_parser import ONNXModelParserStage as StreamONNXModelParserStage
from stream.visualization.memory_usage import plot_memory_usage
from stream.visualization.schedule import (
plot_timeline_brokenaxes,
visualize_timeline_plotly,
)
_logging_level = _logging.INFO
_logging_format = "%(asctime)s - %(name)s.%(funcName)s +%(lineno)s - %(levelname)s - %(message)s"
_logging.basicConfig(level=_logging_level, format=_logging_format)
################################INPUTS################################
accelerator = "stream/inputs/examples/hardware/tpu_like_quad_core.yaml"
workload_path = "stream/inputs/examples/workload/resnet18.onnx"
mapping_path = "stream/inputs/examples/mapping/tpu_like_quad_core_resnet18_fixed_split.yaml"
CN_define_mode = 4 # automatically split layers if too big to fit
split_W_percentage = 0.5 # max percentage of capacity a single node's weights can be
hint_loops = []
nb_ga_individuals = 16 # number of individuals in each generation
nb_ga_generations = 16 # number of genetic algorithm generations
######################################################################
################################PARSING###############################
hw_name = accelerator.split("/")[-1].split(".")[0]
wl_name = re.split(r"/|\.", workload_path)[-1]
if wl_name == "onnx":
wl_name = re.split(r"/|\.", workload_path)[-2]
hint_loops_str_list = []
for dim, size in hint_loops:
hint_loops_str_list.extend([str(dim).lower(), str(size)])
hint_loops_str = "_".join(hint_loops_str_list)
experiment_id = f"{hw_name}-{wl_name}-hintloop_{hint_loops_str}-fixed-split"
node_hw_cost_pkl_name = f"{experiment_id}-saved_cn_hw_cost"
scme_pkl_name = f"{experiment_id}-scme"
######################################################################
############PLOTTING#############
plot_file_name = f"-{experiment_id}-"
plot_full_schedule = True
draw_dependencies = True
plot_data_transfer = True
section_start_percent = (0,)
percent_shown = (100,)
#################################
################################PATHS################################
node_hw_performances_path = f"outputs/{node_hw_cost_pkl_name}.pickle"
scme_path = f"outputs/{scme_pkl_name}.pickle"
timeline_fig_path_plotly = f"outputs/{experiment_id}-schedule.html"
timeline_fig_path_matplotlib = f"outputs/{experiment_id}-schedule.png"
memory_fig_path = f"outputs/{experiment_id}-memory.png"
#####################################################################
mainstage = MainStage(
[ # Initializes the MainStage as entry point
AcceleratorParserStage_, # Parses the accelerator
StreamONNXModelParserStage, # Parses the ONNX Model into the workload
# UserDefinedModelParserStage, # Parses the user-defined Model into the workload
HintLoopsPartitionedWorkloadGenerationStage,
ZigZagCoreMappingEstimationStage,
GeneticAlgorithmAllocationStage,
],
accelerator=accelerator, # required by AcceleratorParserStage
workload_path=workload_path, # required by ModelParserStage
mapping_path=mapping_path, # required by ModelParserStage
loma_lpf_limit=6, # required by LomaEngine
nb_ga_individuals=nb_ga_individuals, # number of individuals in each genetic algorithm generation
nb_ga_generations=nb_ga_generations, # number of genetic algorithm generations
node_hw_performances_path=node_hw_performances_path, # saved node_hw_performances to skip re-computation
plot_hof=True, # Save schedule and memory usage plot of each individual in the Genetic Algorithm hall of fame
plot_file_name=plot_file_name,
plot_full_schedule=plot_full_schedule,
plot_data_transfer=plot_data_transfer,
cn_define_mode=CN_define_mode,
hint_loops=hint_loops,
scheduler_candidate_selection="memory",
operands_to_prefetch=[],
split_W_percentage=split_W_percentage,
)
# Launch the MainStage
scme, _ = mainstage.run()
scme = scme[0]
# Save the scme to a pickle file
with open(scme_path, "wb") as fp:
pickle.dump(scme, fp)
# Plotting results using Plotly
visualize_timeline_plotly(
scme,
draw_dependencies=draw_dependencies,
draw_communication=plot_data_transfer,
fig_path=timeline_fig_path_plotly,
)
# Ploting results using Matplotlib
plot_timeline_brokenaxes(
scme,
draw_dependencies,
section_start_percent,
percent_shown,
plot_data_transfer,
fig_path=timeline_fig_path_matplotlib,
)
plot_memory_usage(scme, section_start_percent, percent_shown, fig_path=memory_fig_path)