From b7e70eecd1167f41fcde1d45a43edbb63fe77d5d Mon Sep 17 00:00:00 2001 From: Chris Sidebottom Date: Sun, 8 Aug 2021 21:17:43 +0000 Subject: [PATCH] Run AOT tests against reference system This introduces an alternative way of running AOT tests using the reference system added in https://github.com/apache/tvm/pull/8514. This gives us additional assurance that the AOT output runs successfully on embedded platforms in our core test suite. I've also changed calculate_workspace_sizes to debug_workspace_sizes and default to False in most cases as it only needs to be True for a few cases to check theoutput with the debug flag - this was discovered trying to allocate 16MB in an embedded test :scream_cat: Co-authored-by: Grant Watson --- tests/python/relay/aot/aot_test_utils.py | 108 +++++-- tests/python/relay/aot/corstone300.ld | 287 ++++++++++++++++++ tests/python/relay/aot/corstone300.mk | 109 +++++++ .../relay/aot/{aot_test.mk => default.mk} | 8 + tests/python/relay/aot/test_crt_aot.py | 81 ++--- tests/python/relay/test_external_codegen.py | 6 +- 6 files changed, 532 insertions(+), 67 deletions(-) create mode 100644 tests/python/relay/aot/corstone300.ld create mode 100644 tests/python/relay/aot/corstone300.mk rename tests/python/relay/aot/{aot_test.mk => default.mk} (95%) diff --git a/tests/python/relay/aot/aot_test_utils.py b/tests/python/relay/aot/aot_test_utils.py index 36c415ec8c83..ad8dd6276995 100644 --- a/tests/python/relay/aot/aot_test_utils.py +++ b/tests/python/relay/aot/aot_test_utils.py @@ -39,6 +39,9 @@ _LOG = logging.getLogger(__name__) +AOT_SUCCESS_TOKEN = "AOT_TEST_SUCCESS" +AOT_FAILURE_TOKEN = "AOT_TEST_FAILURE" + class AOTTestModel(NamedTuple): """Class to describe a model under test @@ -64,6 +67,38 @@ class AOTTestModel(NamedTuple): params: Optional[Dict[str, np.array]] = None +class AOTTestRunner(NamedTuple): + """Class to describe a test runner for AOT code + + Parameters + ---------- + makefile: str + Premade Makefile to use from the AOT test folder + prologue: str + Code to prepend to the main function + includes: List[str] + Additional includes required to run the AOT test runner + parameters: Map[str, str] + Additional parameters to pass to the make command + """ + + makefile: str = "default" + prologue: str = "" + includes: List[str] = [] + parameters: Dict[str, str] = {} + + +AOT_DEFAULT_RUNNER = AOTTestRunner() +AOT_CORSTONE300_RUNNER = AOTTestRunner( + makefile="corstone300", + prologue=""" + uart_init(); + """, + includes=["uart.h"], + parameters={"NPU_VARIANT": "256"}, +) + + def mangle_name(mod_name, name): mod_name = mangle_module_name(mod_name) return mod_name + "_" + name @@ -114,17 +149,27 @@ def parametrize_aot_options(test): interface_api = ["packed", "c"] use_unpacked_api = [True, False] - use_calculated_workspaces = [True, False] + test_runner = [AOT_DEFAULT_RUNNER, AOT_CORSTONE300_RUNNER] + + all_combinations = itertools.product(interface_api, use_unpacked_api, test_runner) - all_combinations = itertools.product(interface_api, use_unpacked_api, use_calculated_workspaces) # Filter out packed operators with c interface valid_combinations = filter( - lambda parameters: not (parameters[0] == "c" and parameters[1] == False), + lambda parameters: not (parameters[0] == "c" and not parameters[1]), all_combinations, ) + # Only use reference system for C interface and unpacked API calls + valid_combinations = filter( + lambda parameters: not ( + parameters[2] == AOT_CORSTONE300_RUNNER + and (parameters[0] == "packed" or not parameters[1]) + ), + valid_combinations, + ) + return pytest.mark.parametrize( - ["interface_api", "use_unpacked_api", "use_calculated_workspaces"], + ["interface_api", "use_unpacked_api", "test_runner"], valid_combinations, )(test) @@ -160,7 +205,7 @@ def subprocess_log_output(cmd, cwd, logfile): return proc.wait() -def emit_main_prologue(main_file, workspace_bytes): +def emit_main_prologue(main_file, custom_prologue, workspace_bytes): # Add TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES because of memory alignment. main_file.write( f"#define WORKSPACE_SIZE ({workspace_bytes} + TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES)\n" @@ -185,6 +230,7 @@ def emit_main_prologue(main_file, workspace_bytes): int main(){\n """ ) + main_file.write(custom_prologue) def emit_main_data(main_file, input_map, output_list, mod_name): @@ -297,11 +343,11 @@ def emit_main_compare(main_file, output_list, mod_name): main_file.write(f"for (int i = 0; i<{actual_data_name}{i}_len; i++){{\n") if is_float_dtype: main_file.write( - f'if (fabs({actual_data_name}{i}[i]-{expected_data_name}{i}[i]) > 0.001f){{\n\tprintf("ko\\n");\n\treturn -1;}}\n' + f'if (fabs({actual_data_name}{i}[i]-{expected_data_name}{i}[i]) > 0.001f){{\n\tprintf("{AOT_FAILURE_TOKEN}\\n");\n\treturn -1;}}\n' ) else: main_file.write( - f'if ({actual_data_name}{i}[i]!={expected_data_name}{i}[i]){{\n\tprintf("ko\\n");\n\treturn -1;}}\n' + f'if ({actual_data_name}{i}[i]!={expected_data_name}{i}[i]){{\n\tprintf("{AOT_FAILURE_TOKEN}\\n");\n\treturn -1;}}\n' ) main_file.write("}\n") @@ -312,36 +358,40 @@ def emit_main_init_memory_manager(main_file): def emit_main_epilogue(main_file): - main_file.write('printf("ok\\n");') + main_file.write(f'printf("{AOT_SUCCESS_TOKEN}\\n");') main_file.write("return 0;") main_file.write("}\n") -def emit_main_common_includes(main_file): +def emit_main_common_includes(main_file, custom_includes): main_file.write("#include \n") main_file.write("#include \n") main_file.write('#include "tvm/runtime/c_runtime_api.h"\n') main_file.write('#include "tvm/runtime/crt/stack_allocator.h"\n') + for include in custom_includes: + main_file.write(f'#include "{include}"\n') def emit_main_micro_include(main_file, mod_name): main_file.write(f"#include <{mangle_module_name(mod_name)}.h>\n") -def create_main(test_name, models, output_path, interface_api, workspace_bytes): +def create_main( + test_name, models, output_path, custom_includes, custom_prologue, interface_api, workspace_bytes +): file_path = pathlib.Path(f"{output_path}/" + test_name).resolve() # create header file raw_path = file_path.with_suffix(".c").resolve() with open(raw_path, "w") as main_file: - emit_main_common_includes(main_file) + emit_main_common_includes(main_file, custom_includes) if interface_api == "c": for model in models: emit_main_micro_include(main_file, model.name) - - emit_main_prologue(main_file, workspace_bytes) for model in models: emit_main_data(main_file, model.inputs, model.outputs, model.name) + + emit_main_prologue(main_file, custom_prologue, workspace_bytes) emit_main_init_memory_manager(main_file) if interface_api == "c": @@ -396,9 +446,10 @@ def extract_main_workspace_size_bytes(extract_dir): def compile_and_run( models: Union[List[AOTTestModel], AOTTestModel], + runner: AOTTestRunner, interface_api, use_unpacked_api, - use_calculated_workspaces, + debug_calculated_workspaces=False, workspace_byte_alignment=8, enable_op_fusion=True, ): @@ -414,7 +465,7 @@ def compile_and_run( models = [models] # The calculated workspaces will not account for stack allocator tags used for debugging - if not use_calculated_workspaces: + if debug_calculated_workspaces: cflags += "-DTVM_CRT_STACK_ALLOCATOR_ENABLE_LIFO_CHECK " config = {"tir.disable_vectorize": True} @@ -452,10 +503,7 @@ def compile_and_run( t = tarfile.open(tar_file) t.extractall(base_path) - if use_calculated_workspaces: - workspace_bytes += extract_main_workspace_size_bytes(base_path) - else: - workspace_bytes += 16384 * 1024 + workspace_bytes += extract_main_workspace_size_bytes(base_path) for key in model.inputs: create_header_file( @@ -480,6 +528,8 @@ def compile_and_run( "test.c", models, build_path, + runner.includes, + runner.prologue, interface_api, workspace_bytes, ) @@ -487,24 +537,32 @@ def compile_and_run( # Verify that compiles fine file_dir = os.path.dirname(os.path.abspath(__file__)) codegen_path = os.path.join(base_path, "codegen") - makefile = os.path.join(file_dir, "aot_test.mk") - make_cmd = ( - f"make CFLAGS='{cflags}' -f {makefile} build_dir=" - + build_path + makefile = os.path.join(file_dir, f"{runner.makefile}.mk") + custom_params = " ".join([f" {param}='{value}'" for param, value in runner.parameters.items()]) + make_command = ( + f"make -f {makefile} build_dir={build_path}" + + f" CFLAGS='{cflags}'" + f" TVM_ROOT={file_dir}/../../../.." + + f" AOT_TEST_ROOT={file_dir}" + f" CODEGEN_ROOT={codegen_path}" + f" STANDALONE_CRT_DIR={tvm.micro.get_standalone_crt_dir()}" + + custom_params ) compile_log_path = os.path.join(build_path, "test_compile.log") - ret = subprocess_log_output(make_cmd, ".", compile_log_path) + compile_command = f"{make_command} aot_test_runner" + ret = subprocess_log_output(compile_command, ".", compile_log_path) assert ret == 0 # Verify that runs fine run_log_path = os.path.join(build_path, "test_run.log") - ret = subprocess_log_output("./aot_test_runner", build_path, run_log_path) + run_command = f"{make_command} run" + ret = subprocess_log_output(run_command, build_path, run_log_path) assert ret == 0 + with open(run_log_path) as run_log: + assert AOT_SUCCESS_TOKEN in run_log.read() + def generate_ref_data(mod, input_data, params=None, target="llvm"): """Generate reference data through executing the relay module""" diff --git a/tests/python/relay/aot/corstone300.ld b/tests/python/relay/aot/corstone300.ld new file mode 100644 index 000000000000..4a6b22480d9f --- /dev/null +++ b/tests/python/relay/aot/corstone300.ld @@ -0,0 +1,287 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*------------------ Reference System Memories ------------- + +===================+============+=======+============+============+ + | Memory | Address | Size | CPU Access | NPU Access | + +===================+============+=======+============+============+ + | ITCM | 0x00000000 | 512KB | Yes (RO) | No | + +-------------------+------------+-------+------------+------------+ + | DTCM | 0x20000000 | 512KB | Yes (R/W) | No | + +-------------------+------------+-------+------------+------------+ + | SSE-300 SRAM | 0x21000000 | 2MB | Yes (R/W) | Yes (R/W) | + +-------------------+------------+-------+------------+------------+ + | Data SRAM | 0x01000000 | 2MB | Yes (R/W) | Yes (R/W) | + +-------------------+------------+-------+------------+------------+ + | DDR | 0x60000000 | 32MB | Yes (R/W) | Yes (R/W) | + +-------------------+------------+-------+------------+------------+ */ + +/*---------------------- ITCM Configuration ---------------------------------- + Flash Configuration + Flash Base Address <0x0-0xFFFFFFFF:8> + Flash Size (in Bytes) <0x0-0xFFFFFFFF:8> + + -----------------------------------------------------------------------------*/ +__ROM_BASE = 0x00000000; +__ROM_SIZE = 0x00080000; + +/*--------------------- DTCM RAM Configuration ---------------------------- + RAM Configuration + RAM Base Address <0x0-0xFFFFFFFF:8> + RAM Size (in Bytes) <0x0-0xFFFFFFFF:8> + + -----------------------------------------------------------------------------*/ +__RAM_BASE = 0x20000000; +__RAM_SIZE = 0x00080000; + +/*----------------------- Data SRAM Configuration ------------------------------ + Data SRAM Configuration + DATA_SRAM Base Address <0x0-0xFFFFFFFF:8> + DATA_SRAM Size (in Bytes) <0x0-0xFFFFFFFF:8> + + -----------------------------------------------------------------------------*/ +__DATA_SRAM_BASE = 0x01000000; +__DATA_SRAM_SIZE = 0x00200000; + +/*--------------------- Embedded SRAM Configuration ---------------------------- + SRAM Configuration + SRAM Base Address <0x0-0xFFFFFFFF:8> + SRAM Size (in Bytes) <0x0-0xFFFFFFFF:8> + + -----------------------------------------------------------------------------*/ +__SRAM_BASE = 0x21000000; +__SRAM_SIZE = 0x00200000; + +/*--------------------- Stack / Heap Configuration ---------------------------- + Stack / Heap Configuration + Stack Size (in Bytes) <0x0-0xFFFFFFFF:8> + Heap Size (in Bytes) <0x0-0xFFFFFFFF:8> + + -----------------------------------------------------------------------------*/ +__STACK_SIZE = 0x00008000; +__HEAP_SIZE = 0x00008000; + +/*--------------------- Embedded RAM Configuration ---------------------------- + DDR Configuration + DDR Base Address <0x0-0xFFFFFFFF:8> + DDR Size (in Bytes) <0x0-0xFFFFFFFF:8> + + -----------------------------------------------------------------------------*/ +__DDR_BASE = 0x60000000; +__DDR_SIZE = 0x02000000; + +/* + *-------------------- <<< end of configuration section >>> ------------------- + */ + +MEMORY +{ + ITCM (rx) : ORIGIN = __ROM_BASE, LENGTH = __ROM_SIZE + DTCM (rwx) : ORIGIN = __RAM_BASE, LENGTH = __RAM_SIZE + DATA_SRAM (rwx) : ORIGIN = __DATA_SRAM_BASE, LENGTH = __DATA_SRAM_SIZE + SRAM (rwx) : ORIGIN = __SRAM_BASE, LENGTH = __SRAM_SIZE + DDR (rwx) : ORIGIN = __DDR_BASE, LENGTH = __DDR_SIZE +} + +/* Linker script to place sections and symbol values. Should be used together + * with other linker script that defines memory regions ITCM and RAM. + * It references following symbols, which must be defined in code: + * Reset_Handler : Entry of reset handler + * + * It defines following symbols, which code can use without definition: + * __exidx_start + * __exidx_end + * __copy_table_start__ + * __copy_table_end__ + * __zero_table_start__ + * __zero_table_end__ + * __etext + * __data_start__ + * __preinit_array_start + * __preinit_array_end + * __init_array_start + * __init_array_end + * __fini_array_start + * __fini_array_end + * __data_end__ + * __bss_start__ + * __bss_end__ + * __end__ + * end + * __HeapLimit + * __StackLimit + * __StackTop + * __stack + */ +ENTRY(Reset_Handler) + +SECTIONS +{ + .text : + { + KEEP(*(.vectors)) + *(.text*) + + KEEP(*(.init)) + KEEP(*(.fini)) + + /* .ctors */ + *crtbegin.o(.ctors) + *crtbegin?.o(.ctors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors) + *(SORT(.ctors.*)) + *(.ctors) + + /* .dtors */ + *crtbegin.o(.dtors) + *crtbegin?.o(.dtors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors) + *(SORT(.dtors.*)) + *(.dtors) + + *(.rodata*) + + KEEP(*(.eh_frame*)) + } > ITCM + + .ARM.extab : + { + *(.ARM.extab* .gnu.linkonce.armextab.*) + } > ITCM + + __exidx_start = .; + .ARM.exidx : + { + *(.ARM.exidx* .gnu.linkonce.armexidx.*) + } > ITCM + __exidx_end = .; + + .copy.table : + { + . = ALIGN(4); + __copy_table_start__ = .; + LONG (__etext) + LONG (__data_start__) + LONG (__data_end__ - __data_start__) + /* Add each additional data section here */ + __copy_table_end__ = .; + } > ITCM + + .zero.table : + { + . = ALIGN(4); + __zero_table_start__ = .; + __zero_table_end__ = .; + } > ITCM + + /** + * Location counter can end up 2byte aligned with narrow Thumb code but + * __etext is assumed by startup code to be the LMA of a section in DTCM + * which must be 4byte aligned + */ + __etext = ALIGN (4); + + .data : AT (__etext) + { + __data_start__ = .; + *(vtable) + *(.data) + *(.data.*) + + . = ALIGN(4); + /* preinit data */ + PROVIDE_HIDDEN (__preinit_array_start = .); + KEEP(*(.preinit_array)) + PROVIDE_HIDDEN (__preinit_array_end = .); + + . = ALIGN(4); + /* init data */ + PROVIDE_HIDDEN (__init_array_start = .); + KEEP(*(SORT(.init_array.*))) + KEEP(*(.init_array)) + PROVIDE_HIDDEN (__init_array_end = .); + + + . = ALIGN(4); + /* finit data */ + PROVIDE_HIDDEN (__fini_array_start = .); + KEEP(*(SORT(.fini_array.*))) + KEEP(*(.fini_array)) + PROVIDE_HIDDEN (__fini_array_end = .); + + KEEP(*(.jcr*)) + . = ALIGN(4); + /* All data end */ + __data_end__ = .; + + } > DTCM + + .sram : + { + . = ALIGN(16); + *(.bss.ethosu_fast_memory); + . = ALIGN(16); + } > SRAM AT > SRAM + + .bss.NoInit : + { + . = ALIGN(16); + *(.bss.NoInit) + . = ALIGN(16); + } > DDR AT > DDR + + .bss : + { + . = ALIGN(4); + __bss_start__ = .; + *(.bss) + *(.bss.*) + *(COMMON) + . = ALIGN(4); + __bss_end__ = .; + } > DTCM AT > DTCM + + .data_sram : + { + . = ALIGN(16); + } > DATA_SRAM + + .heap (COPY) : + { + . = ALIGN(8); + __end__ = .; + PROVIDE(end = .); + . = . + __HEAP_SIZE; + . = ALIGN(8); + __HeapLimit = .; + } > DTCM + + .stack (ORIGIN(DTCM) + LENGTH(DTCM) - __STACK_SIZE) (COPY) : + { + . = ALIGN(8); + __StackLimit = .; + . = . + __STACK_SIZE; + . = ALIGN(8); + __StackTop = .; + } > DTCM + PROVIDE(__stack = __StackTop); + + /* Check if data + stack exceeds DTCM limit */ + ASSERT(__StackLimit >= __bss_end__, "region DTCM overflowed with stack") +} diff --git a/tests/python/relay/aot/corstone300.mk b/tests/python/relay/aot/corstone300.mk new file mode 100644 index 000000000000..bca5dd266491 --- /dev/null +++ b/tests/python/relay/aot/corstone300.mk @@ -0,0 +1,109 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Makefile to build and run AOT tests against the reference system + +# Setup build environment +build_dir := build +TVM_ROOT=$(shell cd ../../../../..; pwd) +CRT_ROOT ?= ${TVM_ROOT}/build/standalone_crt +ifeq ($(shell ls -lhd $(CRT_ROOT)),) +$(error "CRT not found. Ensure you have built the standalone_crt target and try again") +endif + +ARM_CPU=ARMCM55 +DMLC_CORE=${TVM_ROOT}/3rdparty/dmlc-core +ETHOSU_PATH=/opt/arm/ethosu +CMSIS_PATH=${ETHOSU_PATH}/cmsis +PLATFORM_PATH=${ETHOSU_PATH}/core_platform/targets/corstone-300 +PKG_COMPILE_OPTS = -g -Wall -O2 -Wno-incompatible-pointer-types -Wno-format -mcpu=cortex-m55 -mthumb -mfloat-abi=hard -std=gnu99 +CC = arm-none-eabi-gcc +AR = arm-none-eabi-ar +RANLIB = arm-none-eabi-ranlib +CC_OPTS = CC=$(CC) AR=$(AR) RANLIB=$(RANLIB) +PKG_CFLAGS = ${PKG_COMPILE_OPTS} \ + ${CFLAGS} \ + -I$(build_dir)/../include \ + -I$(CODEGEN_ROOT)/host/include \ + -I${PLATFORM_PATH} \ + -I${CMSIS_PATH}/Device/ARM/${ARM_CPU}/Include/ \ + -I${CMSIS_PATH}/CMSIS/Core/Include \ + -isystem$(STANDALONE_CRT_DIR)/include \ + +PKG_LDFLAGS = -lm -specs=nosys.specs -static -T ${AOT_TEST_ROOT}/corstone300.ld + +$(ifeq VERBOSE,1) +QUIET ?= +$(else) +QUIET ?= @ +$(endif) + +CRT_SRCS = $(shell find $(CRT_ROOT)) +CODEGEN_SRCS = $(shell find $(abspath $(CODEGEN_ROOT)/host/src/*.c)) +CODEGEN_OBJS = $(subst .c,.o,$(CODEGEN_SRCS)) +CMSIS_STARTUP_SRCS = $(shell find ${CMSIS_PATH}/Device/ARM/${ARM_CPU}/Source/*.c) +UART_SRCS = $(shell find ${PLATFORM_PATH}/*.c) + +aot_test_runner: $(build_dir)/aot_test_runner + +$(build_dir)/stack_allocator.o: $(TVM_ROOT)/src/runtime/crt/memory/stack_allocator.c + $(QUIET)mkdir -p $(@D) + $(QUIET)$(CC) -c $(PKG_CFLAGS) -o $@ $^ + +$(build_dir)/crt_backend_api.o: $(TVM_ROOT)/src/runtime/crt/common/crt_backend_api.c + $(QUIET)mkdir -p $(@D) + $(QUIET)$(CC) -c $(PKG_CFLAGS) -o $@ $^ + +$(build_dir)/libcodegen.a: $(CODEGEN_SRCS) + $(QUIET)cd $(abspath $(CODEGEN_ROOT)/host/src) && $(CC) -c $(PKG_CFLAGS) $(CODEGEN_SRCS) + $(QUIET)$(AR) -cr $(abspath $(build_dir)/libcodegen.a) $(CODEGEN_OBJS) + $(QUIET)$(RANLIB) $(abspath $(build_dir)/libcodegen.a) + +${build_dir}/libcmsis_startup.a: $(CMSIS_STARTUP_SRCS) + $(QUIET)mkdir -p $(abspath $(build_dir)/libcmsis_startup) + $(QUIET)cd $(abspath $(build_dir)/libcmsis_startup) && $(CC) -c $(PKG_CFLAGS) -D${ARM_CPU} $^ + $(QUIET)$(AR) -cr $(abspath $(build_dir)/libcmsis_startup.a) $(abspath $(build_dir))/libcmsis_startup/*.o + $(QUIET)$(RANLIB) $(abspath $(build_dir)/libcmsis_startup.a) + +${build_dir}/libuart.a: $(UART_SRCS) + $(QUIET)mkdir -p $(abspath $(build_dir)/libuart) + $(QUIET)cd $(abspath $(build_dir)/libuart) && $(CC) -c $(PKG_CFLAGS) $^ + $(QUIET)$(AR) -cr $(abspath $(build_dir)/libuart.a) $(abspath $(build_dir))/libuart/*.o + $(QUIET)$(RANLIB) $(abspath $(build_dir)/libuart.a) + +$(build_dir)/aot_test_runner: $(build_dir)/test.c $(build_dir)/crt_backend_api.o $(build_dir)/stack_allocator.o ${build_dir}/libcmsis_startup.a ${build_dir}/libuart.a $(build_dir)/libcodegen.a + $(QUIET)mkdir -p $(@D) + $(QUIET)$(CC) $(PKG_CFLAGS) -o $@ -Wl,--whole-archive $^ -Wl,--no-whole-archive $(PKG_LDFLAGS) + +clean: + $(QUIET)rm -rf $(build_dir)/crt + +cleanall: + $(QUIET)rm -rf $(build_dir) + +run: $(build_dir)/aot_test_runner + /opt/arm/FVP_Corstone_SSE-300_Ethos-U55/models/Linux64_GCC-6.4/FVP_Corstone_SSE-300_Ethos-U55 -C cpu0.CFGDTCMSZ=15 \ + -C cpu0.CFGITCMSZ=15 -C mps3_board.uart0.out_file=\"-\" -C mps3_board.uart0.shutdown_tag=\"EXITTHESIM\" \ + -C mps3_board.visualisation.disable-visualisation=1 -C mps3_board.telnetterminal0.start_telnet=0 \ + -C mps3_board.telnetterminal1.start_telnet=0 -C mps3_board.telnetterminal2.start_telnet=0 -C mps3_board.telnetterminal5.start_telnet=0 \ + -C ethosu.num_macs=$(NPU_VARIANT) $(build_dir)/aot_test_runner + +.SUFFIXES: + +.DEFAULT: aot_test_runner + +.PHONY: run \ No newline at end of file diff --git a/tests/python/relay/aot/aot_test.mk b/tests/python/relay/aot/default.mk similarity index 95% rename from tests/python/relay/aot/aot_test.mk rename to tests/python/relay/aot/default.mk index 04a77f0e59d0..f5edcb3d6422 100644 --- a/tests/python/relay/aot/aot_test.mk +++ b/tests/python/relay/aot/default.mk @@ -62,5 +62,13 @@ clean: $(QUIET)rm -rf $(build_dir)/crt cleanall: $(QUIET)rm -rf $(build_dir) + +run: $(build_dir)/aot_test_runner + $(build_dir)/aot_test_runner + # Don't define implicit rules; they tend to match on logical target names that aren't targets (i.e. bundle_static) .SUFFIXES: + +.DEFAULT: aot_test_runner + +.PHONY: run diff --git a/tests/python/relay/aot/test_crt_aot.py b/tests/python/relay/aot/test_crt_aot.py index abbf350bff77..36cffefcd0bb 100644 --- a/tests/python/relay/aot/test_crt_aot.py +++ b/tests/python/relay/aot/test_crt_aot.py @@ -28,6 +28,7 @@ from tvm.relay.testing import byoc from aot_test_utils import ( AOTTestModel, + AOT_DEFAULT_RUNNER, generate_ref_data, convert_to_relay, compile_and_run, @@ -38,7 +39,7 @@ def test_error_c_interface_with_packed_api(): interface_api = "c" use_unpacked_api = False - use_calculated_workspaces = True + test_runner = AOT_DEFAULT_RUNNER two = relay.add(relay.const(1), relay.const(1)) func = relay.Function([], two) @@ -48,14 +49,14 @@ def test_error_c_interface_with_packed_api(): AOTTestModel( module=IRModule.from_expr(func), inputs={}, outputs=generate_ref_data(func, {}) ), + test_runner, interface_api, use_unpacked_api, - use_calculated_workspaces, ) @parametrize_aot_options -def test_conv_with_params(interface_api, use_unpacked_api, use_calculated_workspaces): +def test_conv_with_params(interface_api, use_unpacked_api, test_runner): RELAY_MODEL = """ #[version = "0.0.5"] def @main(%data : Tensor[(1, 3, 64, 64), uint8], %weight : Tensor[(8, 3, 5, 5), int8]) { @@ -85,14 +86,14 @@ def @main(%data : Tensor[(1, 3, 64, 64), uint8], %weight : Tensor[(8, 3, 5, 5), compile_and_run( AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params), + test_runner, interface_api, use_unpacked_api, - use_calculated_workspaces, ) @parametrize_aot_options -def test_add_with_params(interface_api, use_unpacked_api, use_calculated_workspaces): +def test_add_with_params(interface_api, use_unpacked_api, test_runner): x = relay.var("x", shape=(1, 10)) y = relay.var("y", shape=(1, 10)) z = relay.add(x, y) @@ -109,15 +110,15 @@ def test_add_with_params(interface_api, use_unpacked_api, use_calculated_workspa AOTTestModel( module=IRModule.from_expr(func), inputs=inputs, outputs=output_list, params=params ), + test_runner, interface_api, use_unpacked_api, - use_calculated_workspaces, ) @parametrize_aot_options @pytest.mark.parametrize("groups,weight_shape", [(1, 32), (32, 1)]) -def test_conv2d(use_calculated_workspaces, interface_api, use_unpacked_api, groups, weight_shape): +def test_conv2d(interface_api, use_unpacked_api, test_runner, groups, weight_shape): """Test a subgraph with a single conv2d operator.""" dtype = "float32" ishape = (1, 32, 14, 14) @@ -139,14 +140,14 @@ def test_conv2d(use_calculated_workspaces, interface_api, use_unpacked_api, grou output_list = generate_ref_data(mod, inputs) compile_and_run( AOTTestModel(module=mod, inputs=inputs, outputs=output_list), + test_runner, interface_api, use_unpacked_api, - use_calculated_workspaces, ) @parametrize_aot_options -def test_concatenate(interface_api, use_unpacked_api, use_calculated_workspaces): +def test_concatenate(interface_api, use_unpacked_api, test_runner): dtype = "float32" x = relay.var("x", shape=(10, 5), dtype=dtype) y = relay.var("y", shape=(10, 5), dtype=dtype) @@ -163,14 +164,14 @@ def test_concatenate(interface_api, use_unpacked_api, use_calculated_workspaces) output_list = generate_ref_data(func, inputs) compile_and_run( AOTTestModel(module=IRModule.from_expr(func), inputs=inputs, outputs=output_list), + test_runner, interface_api, use_unpacked_api, - use_calculated_workspaces, ) @parametrize_aot_options -def test_nested_tuples(interface_api, use_unpacked_api, use_calculated_workspaces): +def test_nested_tuples(interface_api, use_unpacked_api, test_runner): x = relay.var("x", shape=(10,)) x1 = x + relay.const(1.0) x2 = x1 + relay.const(1.0) @@ -185,27 +186,27 @@ def test_nested_tuples(interface_api, use_unpacked_api, use_calculated_workspace compile_and_run( AOTTestModel(module=IRModule.from_expr(func), inputs=inputs, outputs=output_list), + test_runner, interface_api, use_unpacked_api, - use_calculated_workspaces, ) @parametrize_aot_options -def test_tuple_getitem(interface_api, use_unpacked_api, use_calculated_workspaces): +def test_tuple_getitem(interface_api, use_unpacked_api, test_runner): func = relay.Function([], relay.TupleGetItem(relay.Tuple([relay.const(1), relay.const(2)]), 0)) output_list = generate_ref_data(func, {}) compile_and_run( AOTTestModel(module=IRModule.from_expr(func), inputs={}, outputs=output_list), + test_runner, interface_api, use_unpacked_api, - use_calculated_workspaces, ) @parametrize_aot_options -def test_id(interface_api, use_unpacked_api, use_calculated_workspaces): +def test_id(interface_api, use_unpacked_api, test_runner): x = relay.var("x", "float32") ident = relay.Function([x], x) one = np.array(1.0, "float32") @@ -214,28 +215,28 @@ def test_id(interface_api, use_unpacked_api, use_calculated_workspaces): compile_and_run( AOTTestModel(module=IRModule.from_expr(ident), inputs=inputs, outputs=output_list), + test_runner, interface_api, use_unpacked_api, - use_calculated_workspaces, ) @parametrize_aot_options -def test_add_const(interface_api, use_unpacked_api, use_calculated_workspaces): +def test_add_const(interface_api, use_unpacked_api, test_runner): two = relay.add(relay.const(1), relay.const(1)) func = relay.Function([], two) output_list = generate_ref_data(func, {}) compile_and_run( AOTTestModel(module=IRModule.from_expr(func), inputs={}, outputs=output_list), + test_runner, interface_api, use_unpacked_api, - use_calculated_workspaces, ) @parametrize_aot_options -def test_mul_param(interface_api, use_unpacked_api, use_calculated_workspaces): +def test_mul_param(interface_api, use_unpacked_api, test_runner): x = relay.var("x", shape=(10, 10)) y = relay.var("y", shape=(1, 10)) func = relay.Function([x, y], relay.multiply(x, y)) @@ -247,14 +248,14 @@ def test_mul_param(interface_api, use_unpacked_api, use_calculated_workspaces): compile_and_run( AOTTestModel(module=IRModule.from_expr(func), inputs=inputs, outputs=output_list), + test_runner, interface_api, use_unpacked_api, - use_calculated_workspaces, ) @parametrize_aot_options -def test_subtract(interface_api, use_unpacked_api, use_calculated_workspaces): +def test_subtract(interface_api, use_unpacked_api, test_runner): i = relay.var("i", shape=[], dtype="int32") sub = relay.subtract(i, relay.const(1, dtype="int32")) func = relay.Function([i], sub, ret_type=relay.TensorType([], "int32")) @@ -263,14 +264,14 @@ def test_subtract(interface_api, use_unpacked_api, use_calculated_workspaces): output_list = generate_ref_data(func, inputs) compile_and_run( AOTTestModel(module=IRModule.from_expr(func), inputs=inputs, outputs=output_list), + test_runner, interface_api, use_unpacked_api, - use_calculated_workspaces, ) @parametrize_aot_options -def test_tuple_output(interface_api, use_unpacked_api, use_calculated_workspaces): +def test_tuple_output(interface_api, use_unpacked_api, test_runner): x = relay.var("x", shape=(6, 9)) y = relay.split(x, 3).astuple() a = relay.TupleGetItem(y, 0) @@ -282,18 +283,19 @@ def test_tuple_output(interface_api, use_unpacked_api, use_calculated_workspaces output_list = generate_ref_data(func, inputs) compile_and_run( AOTTestModel(module=IRModule.from_expr(func), inputs=inputs, outputs=output_list), + test_runner, interface_api, use_unpacked_api, - use_calculated_workspaces, ) @pytest.mark.parametrize( - ["use_calculated_workspaces", "workspace_byte_alignment"], [(True, 1), (True, 16), (False, 1)] + ["debug_calculated_workspaces", "workspace_byte_alignment"], [(True, 1), (True, 16), (False, 1)] ) -def test_mobilenet(use_calculated_workspaces, workspace_byte_alignment): +def test_mobilenet(debug_calculated_workspaces, workspace_byte_alignment): use_unpacked_api = True interface_api = "c" + test_runner = AOT_DEFAULT_RUNNER mod, params = testing.mobilenet.get_workload(batch_size=1) data_shape = [int(x) for x in mod["main"].checked_type.arg_types[0].shape] @@ -302,18 +304,19 @@ def test_mobilenet(use_calculated_workspaces, workspace_byte_alignment): output_list = generate_ref_data(mod, inputs, params) compile_and_run( AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params), + test_runner, interface_api, use_unpacked_api, - use_calculated_workspaces, - workspace_byte_alignment, + workspace_byte_alignment=workspace_byte_alignment, + debug_calculated_workspaces=debug_calculated_workspaces, ) -@pytest.mark.parametrize("use_calculated_workspaces", [True, False]) -def test_byoc_microtvm(use_calculated_workspaces): +def test_byoc_microtvm(): """This is a simple test case to check BYOC capabilities of AOT""" use_unpacked_api = False interface_api = "packed" + test_runner = AOT_DEFAULT_RUNNER x = relay.var("x", shape=(10, 10)) w0 = relay.var("w0", shape=(10, 10)) @@ -358,14 +361,14 @@ def test_byoc_microtvm(use_calculated_workspaces): input_list.extend([map_inputs["w{}".format(i)] for i in range(8)]) compile_and_run( AOTTestModel(name="my_mod", module=mod, inputs=map_inputs, outputs=output_list), + test_runner, interface_api, use_unpacked_api, - use_calculated_workspaces, ) @parametrize_aot_options -def test_add_name_mangling_with_params(interface_api, use_unpacked_api, use_calculated_workspaces): +def test_add_name_mangling_with_params(interface_api, use_unpacked_api, test_runner): x = relay.var("x", shape=(1, 10)) y = relay.var("y", shape=(1, 10)) z = relay.add(x, y) @@ -380,14 +383,14 @@ def test_add_name_mangling_with_params(interface_api, use_unpacked_api, use_calc compile_and_run( AOTTestModel(name="my_mod", module=func, inputs=inputs, outputs=output_list, params=params), + test_runner, interface_api, use_unpacked_api, - use_calculated_workspaces, ) @parametrize_aot_options -def test_multiple_models(interface_api, use_unpacked_api, use_calculated_workspaces): +def test_multiple_models(interface_api, use_unpacked_api, test_runner): # Identity model without params x = relay.var("x", "float32") mod1 = relay.Function([x], x) @@ -433,9 +436,9 @@ def @main(%data : Tensor[(1, 3, 64, 64), uint8], %weight : Tensor[(8, 3, 5, 5), name="mod2", module=mod2, inputs=inputs2, outputs=output_list2, params=params2 ), ], + test_runner, interface_api, use_unpacked_api, - use_calculated_workspaces, ) @@ -449,7 +452,7 @@ def test_quant_mobilenet_tfl(): interface_api = "packed" use_unpacked_api = False - use_calculated_workspaces = True + test_runner = AOT_DEFAULT_RUNNER tflite_model_file = tf_testing.get_workload_official( "https://storage.googleapis.com/download.tensorflow.org/" @@ -466,14 +469,14 @@ def test_quant_mobilenet_tfl(): output_list = generate_ref_data(mod, inputs, params) compile_and_run( AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params), + test_runner, interface_api, use_unpacked_api, - use_calculated_workspaces, ) @parametrize_aot_options -def test_transpose(interface_api, use_unpacked_api, use_calculated_workspaces): +def test_transpose(interface_api, use_unpacked_api, test_runner): """Test that non-inpleaceable operations (e.g., transpose) do not happen in-place.""" dtype = "float32" @@ -493,9 +496,9 @@ def test_transpose(interface_api, use_unpacked_api, use_calculated_workspaces): output_list = generate_ref_data(func, inputs) compile_and_run( AOTTestModel(module=IRModule.from_expr(func), inputs=inputs, outputs=output_list), + test_runner, interface_api, use_unpacked_api, - use_calculated_workspaces, enable_op_fusion=False, ) diff --git a/tests/python/relay/test_external_codegen.py b/tests/python/relay/test_external_codegen.py index 36453cd41d5d..30db5facc208 100644 --- a/tests/python/relay/test_external_codegen.py +++ b/tests/python/relay/test_external_codegen.py @@ -80,16 +80,16 @@ def check_aot_executor_result( pytest.skip("MicroTVM support not enabled. Set USE_MICRO=ON in config.cmake to enable.") # Late import to avoid breaking test with USE_MICRO=OFF. - from aot.aot_test_utils import AOTTestModel, compile_and_run + from aot.aot_test_utils import AOTTestModel, AOT_DEFAULT_RUNNER, compile_and_run interface_api = "packed" use_unpacked_api = False - use_calculated_workspaces = True + test_runner = AOT_DEFAULT_RUNNER compile_and_run( AOTTestModel(module=mod, inputs=map_inputs, outputs=[result]), + test_runner, interface_api, use_unpacked_api, - use_calculated_workspaces, )