From 6d031e3df5b068b4cd496243e3c26d7b25414c87 Mon Sep 17 00:00:00 2001 From: auphelia Date: Wed, 15 May 2024 15:36:50 +0100 Subject: [PATCH 1/2] [MNV1] Update depth triggers for rtl threshold for ZCU104 --- build/mobilenet-v1/build.py | 7 - .../folding_config/ZCU104_folding_config.json | 189 ++++++------- .../ZCU104_specialize_layers_config.json | 261 ------------------ 3 files changed, 81 insertions(+), 376 deletions(-) delete mode 100644 build/mobilenet-v1/specialize_layers_config/ZCU104_specialize_layers_config.json diff --git a/build/mobilenet-v1/build.py b/build/mobilenet-v1/build.py index 711a603..8feca4e 100644 --- a/build/mobilenet-v1/build.py +++ b/build/mobilenet-v1/build.py @@ -124,12 +124,6 @@ def select_build_steps(platform): # for Zynq, use the board name as the release name # e.g. ZCU104 release_platform_name = platform_name - # for ZCU104 we provide a specialize layer json - specialize_layer_file = ( - "specialize_layers_config/ZCU104_specialize_layers_config.json" - if platform_name == "ZCU104" - else None - ) platform_dir = "release/%s" % release_platform_name os.makedirs(platform_dir, exist_ok=True) @@ -137,7 +131,6 @@ def select_build_steps(platform): steps=select_build_steps(platform_name), output_dir="output_%s_%s" % (model_name, release_platform_name), folding_config_file="folding_config/%s_folding_config.json" % platform_name, - specialize_layers_config_file=specialize_layer_file, synth_clk_period_ns=select_clk_period(platform_name), board=platform_name, shell_flow_type=shell_flow_type, diff --git a/build/mobilenet-v1/folding_config/ZCU104_folding_config.json b/build/mobilenet-v1/folding_config/ZCU104_folding_config.json index e300886..b86daf0 100755 --- a/build/mobilenet-v1/folding_config/ZCU104_folding_config.json +++ b/build/mobilenet-v1/folding_config/ZCU104_folding_config.json @@ -20,11 +20,10 @@ "ram_style": "auto", "depth": 64 }, - "Thresholding_hls_0": { + "Thresholding_rtl_0": { "PE": 1, - "ram_style": "distributed", - "mem_mode": "internal_embedded", - "runtime_writeable_weights": 0 + "depth_trigger_bram": 512, + "depth_trigger_uram": 1024 }, "StreamingFIFO_rtl_6": { "ram_style": "auto", @@ -49,11 +48,10 @@ "ram_style": "auto", "depth": 256 }, - "Thresholding_hls_1": { + "Thresholding_rtl_1": { "PE": 1, - "ram_style": "distributed", - "mem_mode": "internal_embedded", - "runtime_writeable_weights": 0 + "depth_trigger_bram": 512, + "depth_trigger_uram": 1024 }, "MVAU_rtl_1": { "PE": 8, @@ -63,11 +61,10 @@ "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "Thresholding_hls_2": { + "Thresholding_rtl_2": { "PE": 2, - "ram_style": "distributed", - "mem_mode": "internal_embedded", - "runtime_writeable_weights": 0 + "depth_trigger_bram": 1024, + "depth_trigger_uram": 2048 }, "StreamingFIFO_rtl_17": { "ram_style": "auto", @@ -88,11 +85,10 @@ "PE": 8, "resType": "lut" }, - "Thresholding_hls_3": { + "Thresholding_rtl_3": { "PE": 1, - "ram_style": "distributed", - "mem_mode": "internal_embedded", - "runtime_writeable_weights": 0 + "depth_trigger_bram": 1024, + "depth_trigger_uram": 2048 }, "StreamingFIFO_rtl_24": { "ram_style": "auto", @@ -106,11 +102,10 @@ "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "Thresholding_hls_4": { + "Thresholding_rtl_4": { "PE": 2, - "ram_style": "distributed", - "mem_mode": "internal_embedded", - "runtime_writeable_weights": 0 + "depth_trigger_bram": 2048, + "depth_trigger_uram": 4096 }, "StreamingFIFO_rtl_27": { "ram_style": "auto", @@ -131,11 +126,10 @@ "PE": 16, "resType": "lut" }, - "Thresholding_hls_5": { + "Thresholding_rtl_5": { "PE": 2, - "ram_style": "distributed", - "mem_mode": "internal_embedded", - "runtime_writeable_weights": 0 + "depth_trigger_bram": 2048, + "depth_trigger_uram": 4096 }, "StreamingFIFO_rtl_34": { "ram_style": "auto", @@ -149,11 +143,10 @@ "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "Thresholding_hls_6": { + "Thresholding_rtl_6": { "PE": 2, - "ram_style": "distributed", - "mem_mode": "internal_embedded", - "runtime_writeable_weights": 0 + "depth_trigger_bram": 2048, + "depth_trigger_uram": 4096 }, "StreamingFIFO_rtl_37": { "ram_style": "auto", @@ -174,11 +167,10 @@ "PE": 4, "resType": "lut" }, - "Thresholding_hls_7": { + "Thresholding_rtl_7": { "PE": 1, - "ram_style": "distributed", - "mem_mode": "internal_embedded", - "runtime_writeable_weights": 0 + "depth_trigger_bram": 2048, + "depth_trigger_uram": 4096 }, "StreamingFIFO_rtl_44": { "ram_style": "auto", @@ -192,11 +184,10 @@ "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "Thresholding_hls_8": { + "Thresholding_rtl_8": { "PE": 1, - "ram_style": "distributed", - "mem_mode": "internal_embedded", - "runtime_writeable_weights": 0 + "depth_trigger_bram": 4096, + "depth_trigger_uram": 8192 }, "StreamingFIFO_rtl_47": { "ram_style": "ultra", @@ -217,11 +208,10 @@ "PE": 8, "resType": "lut" }, - "Thresholding_hls_9": { + "Thresholding_rtl_9": { "PE": 1, - "ram_style": "distributed", - "mem_mode": "internal_embedded", - "runtime_writeable_weights": 0 + "depth_trigger_bram": 4096, + "depth_trigger_uram": 8192 }, "StreamingFIFO_rtl_54": { "ram_style": "auto", @@ -235,11 +225,10 @@ "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "Thresholding_hls_10": { + "Thresholding_rtl_10": { "PE": 1, - "ram_style": "distributed", - "mem_mode": "internal_embedded", - "runtime_writeable_weights": 0 + "depth_trigger_bram": 4096, + "depth_trigger_uram": 8192 }, "StreamingFIFO_rtl_57": { "ram_style": "ultra", @@ -260,11 +249,10 @@ "PE": 2, "resType": "lut" }, - "Thresholding_hls_11": { + "Thresholding_rtl_11": { "PE": 1, - "ram_style": "distributed", - "mem_mode": "internal_embedded", - "runtime_writeable_weights": 0 + "depth_trigger_bram": 4096, + "depth_trigger_uram": 8192 }, "StreamingFIFO_rtl_64": { "ram_style": "auto", @@ -278,11 +266,10 @@ "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "Thresholding_hls_12": { + "Thresholding_rtl_12": { "PE": 1, - "ram_style": "distributed", - "mem_mode": "internal_embedded", - "runtime_writeable_weights": 0 + "depth_trigger_bram": 8192, + "depth_trigger_uram": 16384 }, "StreamingFIFO_rtl_67": { "ram_style": "ultra", @@ -303,11 +290,10 @@ "PE": 4, "resType": "lut" }, - "Thresholding_hls_13": { + "Thresholding_rtl_13": { "PE": 1, - "ram_style": "distributed", - "mem_mode": "internal_embedded", - "runtime_writeable_weights": 0 + "depth_trigger_bram": 8192, + "depth_trigger_uram": 16384 }, "StreamingFIFO_rtl_74": { "ram_style": "auto", @@ -321,11 +307,10 @@ "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "Thresholding_hls_14": { + "Thresholding_rtl_14": { "PE": 1, - "ram_style": "distributed", - "mem_mode": "internal_embedded", - "runtime_writeable_weights": 0 + "depth_trigger_bram": 8192, + "depth_trigger_uram": 16384 }, "StreamingFIFO_rtl_77": { "ram_style": "ultra", @@ -346,11 +331,10 @@ "PE": 4, "resType": "lut" }, - "Thresholding_hls_15": { + "Thresholding_rtl_15": { "PE": 1, - "ram_style": "distributed", - "mem_mode": "internal_embedded", - "runtime_writeable_weights": 0 + "depth_trigger_bram": 8192, + "depth_trigger_uram": 16384 }, "StreamingFIFO_rtl_84": { "ram_style": "auto", @@ -364,11 +348,10 @@ "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "Thresholding_hls_16": { + "Thresholding_rtl_16": { "PE": 1, - "ram_style": "distributed", - "mem_mode": "internal_embedded", - "runtime_writeable_weights": 0 + "depth_trigger_bram": 8192, + "depth_trigger_uram": 16384 }, "StreamingFIFO_rtl_87": { "ram_style": "ultra", @@ -389,11 +372,10 @@ "PE": 4, "resType": "lut" }, - "Thresholding_hls_17": { + "Thresholding_rtl_17": { "PE": 1, - "ram_style": "distributed", - "mem_mode": "internal_embedded", - "runtime_writeable_weights": 0 + "depth_trigger_bram": 8192, + "depth_trigger_uram": 16384 }, "StreamingFIFO_rtl_94": { "ram_style": "auto", @@ -407,11 +389,10 @@ "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "Thresholding_hls_18": { + "Thresholding_rtl_18": { "PE": 1, - "ram_style": "distributed", - "mem_mode": "internal_embedded", - "runtime_writeable_weights": 0 + "depth_trigger_bram": 8192, + "depth_trigger_uram": 16384 }, "StreamingFIFO_rtl_97": { "ram_style": "ultra", @@ -432,11 +413,10 @@ "PE": 4, "resType": "lut" }, - "Thresholding_hls_19": { + "Thresholding_rtl_19": { "PE": 1, - "ram_style": "distributed", - "mem_mode": "internal_embedded", - "runtime_writeable_weights": 0 + "depth_trigger_bram": 8192, + "depth_trigger_uram": 16384 }, "StreamingFIFO_rtl_104": { "ram_style": "auto", @@ -450,11 +430,10 @@ "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "Thresholding_hls_20": { + "Thresholding_rtl_20": { "PE": 1, - "ram_style": "distributed", - "mem_mode": "internal_embedded", - "runtime_writeable_weights": 0 + "depth_trigger_bram": 8192, + "depth_trigger_uram": 16384 }, "StreamingFIFO_rtl_107": { "ram_style": "ultra", @@ -475,11 +454,10 @@ "PE": 4, "resType": "lut" }, - "Thresholding_hls_21": { + "Thresholding_rtl_21": { "PE": 1, - "ram_style": "distributed", - "mem_mode": "internal_embedded", - "runtime_writeable_weights": 0 + "depth_trigger_bram": 8192, + "depth_trigger_uram": 16384 }, "StreamingFIFO_rtl_114": { "ram_style": "auto", @@ -493,11 +471,10 @@ "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "Thresholding_hls_22": { + "Thresholding_rtl_22": { "PE": 1, - "ram_style": "distributed", - "mem_mode": "internal_embedded", - "runtime_writeable_weights": 0 + "depth_trigger_bram": 8192, + "depth_trigger_uram": 16384 }, "StreamingFIFO_rtl_117": { "ram_style": "ultra", @@ -518,11 +495,10 @@ "PE": 1, "resType": "lut" }, - "Thresholding_hls_23": { + "Thresholding_rtl_23": { "PE": 1, - "ram_style": "distributed", - "mem_mode": "internal_embedded", - "runtime_writeable_weights": 0 + "depth_trigger_bram": 8192, + "depth_trigger_uram": 16384 }, "StreamingFIFO_rtl_122": { "ram_style": "auto", @@ -536,11 +512,10 @@ "mem_mode": "internal_decoupled", "runtime_writeable_weights": 1 }, - "Thresholding_hls_24": { + "Thresholding_rtl_24": { "PE": 1, - "ram_style": "distributed", - "mem_mode": "internal_embedded", - "runtime_writeable_weights": 0 + "depth_trigger_bram": 16384, + "depth_trigger_uram": 32768 }, "StreamingFIFO_rtl_125": { "ram_style": "ultra", @@ -561,11 +536,10 @@ "PE": 2, "resType": "lut" }, - "Thresholding_hls_25": { + "Thresholding_rtl_25": { "PE": 1, - "ram_style": "distributed", - "mem_mode": "internal_embedded", - "runtime_writeable_weights": 0 + "depth_trigger_bram": 16384, + "depth_trigger_uram": 32768 }, "StreamingFIFO_rtl_132": { "ram_style": "auto", @@ -579,11 +553,10 @@ "mem_mode": "internal_decoupled", "runtime_writeable_weights": 1 }, - "Thresholding_hls_26": { + "Thresholding_rtl_26": { "PE": 1, - "ram_style": "distributed", - "mem_mode": "internal_embedded", - "runtime_writeable_weights": 0 + "depth_trigger_bram": 16384, + "depth_trigger_uram": 32768 }, "ConvolutionInputGenerator_rtl_14": { "SIMD": 1, diff --git a/build/mobilenet-v1/specialize_layers_config/ZCU104_specialize_layers_config.json b/build/mobilenet-v1/specialize_layers_config/ZCU104_specialize_layers_config.json deleted file mode 100644 index f766d4a..0000000 --- a/build/mobilenet-v1/specialize_layers_config/ZCU104_specialize_layers_config.json +++ /dev/null @@ -1,261 +0,0 @@ -{ - "Defaults": {}, - "ConvolutionInputGenerator_0": { - "preferred_impl_style": "rtl" - }, - "MVAU_0": { - "preferred_impl_style": "rtl" - }, - "Thresholding_0": { - "preferred_impl_style": "hls" - }, - "FMPadding_0": { - "preferred_impl_style": "rtl" - }, - "ConvolutionInputGenerator_1": { - "preferred_impl_style": "rtl" - }, - "VVAU_0": { - "preferred_impl_style": "hls" - }, - "Thresholding_1": { - "preferred_impl_style": "hls" - }, - "MVAU_1": { - "preferred_impl_style": "rtl" - }, - "Thresholding_2": { - "preferred_impl_style": "hls" - }, - "FMPadding_1": { - "preferred_impl_style": "rtl" - }, - "ConvolutionInputGenerator_2": { - "preferred_impl_style": "rtl" - }, - "VVAU_1": { - "preferred_impl_style": "hls" - }, - "Thresholding_3": { - "preferred_impl_style": "hls" - }, - "MVAU_2": { - "preferred_impl_style": "rtl" - }, - "Thresholding_4": { - "preferred_impl_style": "hls" - }, - "FMPadding_2": { - "preferred_impl_style": "rtl" - }, - "ConvolutionInputGenerator_3": { - "preferred_impl_style": "rtl" - }, - "VVAU_2": { - "preferred_impl_style": "hls" - }, - "Thresholding_5": { - "preferred_impl_style": "hls" - }, - "MVAU_3": { - "preferred_impl_style": "rtl" - }, - "Thresholding_6": { - "preferred_impl_style": "hls" - }, - "FMPadding_3": { - "preferred_impl_style": "rtl" - }, - "ConvolutionInputGenerator_4": { - "preferred_impl_style": "rtl" - }, - "VVAU_3": { - "preferred_impl_style": "hls" - }, - "Thresholding_7": { - "preferred_impl_style": "hls" - }, - "MVAU_4": { - "preferred_impl_style": "rtl" - }, - "Thresholding_8": { - "preferred_impl_style": "hls" - }, - "FMPadding_4": { - "preferred_impl_style": "rtl" - }, - "ConvolutionInputGenerator_5": { - "preferred_impl_style": "rtl" - }, - "VVAU_4": { - "preferred_impl_style": "hls" - }, - "Thresholding_9": { - "preferred_impl_style": "hls" - }, - "MVAU_5": { - "preferred_impl_style": "rtl" - }, - "Thresholding_10": { - "preferred_impl_style": "hls" - }, - "FMPadding_5": { - "preferred_impl_style": "rtl" - }, - "ConvolutionInputGenerator_6": { - "preferred_impl_style": "rtl" - }, - "VVAU_5": { - "preferred_impl_style": "hls" - }, - "Thresholding_11": { - "preferred_impl_style": "hls" - }, - "MVAU_6": { - "preferred_impl_style": "rtl" - }, - "Thresholding_12": { - "preferred_impl_style": "hls" - }, - "FMPadding_6": { - "preferred_impl_style": "rtl" - }, - "ConvolutionInputGenerator_7": { - "preferred_impl_style": "rtl" - }, - "VVAU_6": { - "preferred_impl_style": "hls" - }, - "Thresholding_13": { - "preferred_impl_style": "hls" - }, - "MVAU_7": { - "preferred_impl_style": "rtl" - }, - "Thresholding_14": { - "preferred_impl_style": "hls" - }, - "FMPadding_7": { - "preferred_impl_style": "rtl" - }, - "ConvolutionInputGenerator_8": { - "preferred_impl_style": "rtl" - }, - "VVAU_7": { - "preferred_impl_style": "hls" - }, - "Thresholding_15": { - "preferred_impl_style": "hls" - }, - "MVAU_8": { - "preferred_impl_style": "rtl" - }, - "Thresholding_16": { - "preferred_impl_style": "hls" - }, - "FMPadding_8": { - "preferred_impl_style": "rtl" - }, - "ConvolutionInputGenerator_9": { - "preferred_impl_style": "rtl" - }, - "VVAU_8": { - "preferred_impl_style": "hls" - }, - "Thresholding_17": { - "preferred_impl_style": "hls" - }, - "MVAU_9": { - "preferred_impl_style": "rtl" - }, - "Thresholding_18": { - "preferred_impl_style": "hls" - }, - "FMPadding_9": { - "preferred_impl_style": "rtl" - }, - "ConvolutionInputGenerator_10": { - "preferred_impl_style": "rtl" - }, - "VVAU_9": { - "preferred_impl_style": "hls" - }, - "Thresholding_19": { - "preferred_impl_style": "hls" - }, - "MVAU_10": { - "preferred_impl_style": "rtl" - }, - "Thresholding_20": { - "preferred_impl_style": "hls" - }, - "FMPadding_10": { - "preferred_impl_style": "rtl" - }, - "ConvolutionInputGenerator_11": { - "preferred_impl_style": "rtl" - }, - "VVAU_10": { - "preferred_impl_style": "hls" - }, - "Thresholding_21": { - "preferred_impl_style": "hls" - }, - "MVAU_11": { - "preferred_impl_style": "rtl" - }, - "Thresholding_22": { - "preferred_impl_style": "hls" - }, - "FMPadding_11": { - "preferred_impl_style": "rtl" - }, - "ConvolutionInputGenerator_12": { - "preferred_impl_style": "rtl" - }, - "VVAU_11": { - "preferred_impl_style": "hls" - }, - "Thresholding_23": { - "preferred_impl_style": "hls" - }, - "MVAU_12": { - "preferred_impl_style": "rtl" - }, - "Thresholding_24": { - "preferred_impl_style": "hls" - }, - "FMPadding_12": { - "preferred_impl_style": "rtl" - }, - "ConvolutionInputGenerator_13": { - "preferred_impl_style": "rtl" - }, - "VVAU_12": { - "preferred_impl_style": "hls" - }, - "Thresholding_25": { - "preferred_impl_style": "hls" - }, - "MVAU_13": { - "preferred_impl_style": "rtl" - }, - "Thresholding_26": { - "preferred_impl_style": "hls" - }, - "ConvolutionInputGenerator_14": { - "preferred_impl_style": "rtl" - }, - "Pool_0": { - "preferred_impl_style": "hls" - }, - "MVAU_14": { - "preferred_impl_style": "rtl" - }, - "ChannelwiseOp_0": { - "preferred_impl_style": "hls" - }, - "LabelSelect_0": { - "preferred_impl_style": "hls" - } - } From 94a4f7414646ebd42d2691920f4d0a48aa811fe5 Mon Sep 17 00:00:00 2001 From: auphelia Date: Wed, 15 May 2024 15:44:35 +0100 Subject: [PATCH 2/2] Run pre-commit --- finn_examples/driver.py | 2 +- finn_examples/qonnx/core/datatype.py | 2 +- finn_examples/qonnx/util/basic.py | 19 +++++++++++++------ 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/finn_examples/driver.py b/finn_examples/driver.py index f3a0462..aa54f84 100644 --- a/finn_examples/driver.py +++ b/finn_examples/driver.py @@ -488,4 +488,4 @@ def throughput_test(self): end = time.time() runtime = end - start res["unfold_output[ms]"] = runtime * 1000 - return res \ No newline at end of file + return res diff --git a/finn_examples/qonnx/core/datatype.py b/finn_examples/qonnx/core/datatype.py index 8d66b65..f37d4ee 100644 --- a/finn_examples/qonnx/core/datatype.py +++ b/finn_examples/qonnx/core/datatype.py @@ -437,4 +437,4 @@ def get_smallest_possible(value): dt = DataType[cand] if (dt.min() <= value) and (value <= dt.max()): return dt - raise Exception("Could not find a suitable int datatype for " + str(value)) \ No newline at end of file + raise Exception("Could not find a suitable int datatype for " + str(value)) diff --git a/finn_examples/qonnx/util/basic.py b/finn_examples/qonnx/util/basic.py index 1ddc9b6..82cf3d1 100644 --- a/finn_examples/qonnx/util/basic.py +++ b/finn_examples/qonnx/util/basic.py @@ -31,7 +31,6 @@ import random import string import warnings - from qonnx.core.datatype import DataType # TODO solve by moving onnx-dependent fxns to onnx.py @@ -64,7 +63,11 @@ def qonnx_make_model(graph_proto, **kwargs): def is_finn_op(op_type): "Return whether given op_type string is a QONNX or FINN custom op" - return op_type.startswith("finn") or op_type.startswith("qonnx.custom_op") or op_type.startswith("onnx.brevitas") + return ( + op_type.startswith("finn") + or op_type.startswith("qonnx.custom_op") + or op_type.startswith("onnx.brevitas") + ) def get_num_default_workers(): @@ -209,8 +212,10 @@ def pad_tensor_to_multiple_of(ndarray, pad_to_dims, val=0, distr_pad=False): def calculate_matvec_accumulator_range(matrix: np.ndarray, vec_dt: DataType): - """Calculate the minimum and maximum possible result (accumulator) values for a dot product x * A, - given matrix A of dims (MW, MH), and vector (1, MW) with datatype vec_dt. Returns (acc_min, acc_max).""" + """Calculate the minimum and maximum possible result (accumulator) values for a + dot product x * A given matrix A of dims (MW, MH), and vector (1, MW) with datatype vec_dt. + Returns (acc_min, acc_max). + """ max_vectors = np.where(matrix > 0, vec_dt.max(), vec_dt.min()) min_vectors = np.where(matrix > 0, vec_dt.min(), vec_dt.max()) max_value = (matrix * max_vectors).sum(axis=0).max() @@ -236,7 +241,9 @@ def gen_finn_dt_tensor(finn_dt, tensor_shape): elif finn_dt == DataType["FLOAT32"]: tensor_values = np.random.randn(*tensor_shape) else: - raise ValueError("Datatype {} is not supported, no tensor could be generated".format(finn_dt)) + raise ValueError( + "Datatype {} is not supported, no tensor could be generated".format(finn_dt) + ) # always use float type as container return tensor_values.astype(np.float32) @@ -337,4 +344,4 @@ def auto_pad_to_explicit_padding(autopad_str, idim_h, idim_w, k_h, k_w, stride_h elif autopad_str == "SAME_LOWER": return [pad_half_large_h, pad_half_large_w, pad_half_small_h, pad_half_small_w] else: - raise Exception("Unsupported auto_pad: " + autopad_str) \ No newline at end of file + raise Exception("Unsupported auto_pad: " + autopad_str)