From f5805e5dad068da4fc443972b6f813521d48f98e Mon Sep 17 00:00:00 2001 From: anilmartha Date: Fri, 6 Nov 2020 22:43:15 +0530 Subject: [PATCH] [BYOC][CONTRIB] Vitis-AI codegen integration (#6343) * [BYOC][CONTRIB] VITIS-AI integration * Remove environment related files * Update vitis_ai.rst * Add review changes * Remove new lines and note frame in vitis_ai.rst * use sys.exit * Add condition for vitis_ai runtime exec function * remove unused graph_json * correct indentation * use code python instead of bash * Rename VITISAI.cmake to VitisAI.cmake * use relay.ext.vitis_ai.options.build_dir in comparison * Re-add deleted docker related files * Make use of PyXIR XGraph and RuntimeModule serialization & refactor flow * Fix linter errors * Fix linter errors * Address sphinx warnings * Add infertype to fix Vitis-AI annotation test * Renaming util to utils * Add Vitis-AI flag to config.cmake file * Move vitis-ai config options to compiler sources instead of runtime sources * Fix clang-format errors Co-authored-by: Anil Martha Co-authored-by: anilm (generated by with_the_same_user script) Co-authored-by: Jorn Tuyls --- CMakeLists.txt | 2 + cmake/config.cmake | 3 + cmake/modules/contrib/VitisAI.cmake | 47 ++ docs/deploy/index.rst | 1 + docs/deploy/vitis_ai.rst | 652 ++++++++++++++++++ python/tvm/contrib/target/vitis_ai.py | 156 +++++ python/tvm/relay/op/contrib/vitis_ai.py | 100 +++ .../contrib/vitis_ai/config_vitis_ai.cc | 46 ++ .../contrib/vitis_ai/vitis_ai_runtime.cc | 194 ++++++ .../contrib/vitis_ai/vitis_ai_runtime.h | 115 +++ .../python/contrib/test_vitis_ai/__init__.py | 18 + .../contrib/test_vitis_ai/infrastructure.py | 171 +++++ .../test_vitis_ai/test_vitis_ai_codegen.py | 336 +++++++++ .../test_vitis_ai_runtime_cpu_part.py | 82 +++ tests/scripts/task_config_build_cpu.sh | 1 + 15 files changed, 1924 insertions(+) create mode 100644 cmake/modules/contrib/VitisAI.cmake create mode 100755 docs/deploy/vitis_ai.rst create mode 100644 python/tvm/contrib/target/vitis_ai.py create mode 100644 python/tvm/relay/op/contrib/vitis_ai.py create mode 100644 src/relay/backend/contrib/vitis_ai/config_vitis_ai.cc create mode 100755 src/runtime/contrib/vitis_ai/vitis_ai_runtime.cc create mode 100755 src/runtime/contrib/vitis_ai/vitis_ai_runtime.h create mode 100644 tests/python/contrib/test_vitis_ai/__init__.py create mode 100644 tests/python/contrib/test_vitis_ai/infrastructure.py create mode 100644 tests/python/contrib/test_vitis_ai/test_vitis_ai_codegen.py create mode 100644 tests/python/contrib/test_vitis_ai/test_vitis_ai_runtime_cpu_part.py diff --git a/CMakeLists.txt b/CMakeLists.txt index f8ecf4635fbe..3c1ff7035d62 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -82,6 +82,7 @@ tvm_option(USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME "Build with Arm Compute Library gra tvm_option(USE_TENSORRT_CODEGEN "Build with TensorRT Codegen support" OFF) tvm_option(USE_TENSORRT_RUNTIME "Build with TensorRT runtime" OFF) tvm_option(USE_RUST_EXT "Build with Rust based compiler extensions, STATIC, DYNAMIC, or OFF" OFF) +tvm_option(USE_VITIS_AI "Build with VITIS-AI Codegen support" OFF) # include directories include_directories(${CMAKE_INCLUDE_PATH}) @@ -367,6 +368,7 @@ include(cmake/modules/contrib/CoreML.cmake) include(cmake/modules/contrib/ONNX.cmake) include(cmake/modules/contrib/ArmComputeLib.cmake) include(cmake/modules/contrib/TensorRT.cmake) +include(cmake/modules/contrib/VitisAI.cmake) include(cmake/modules/Git.cmake) include(cmake/modules/LibInfo.cmake) include(cmake/modules/RustExt.cmake) diff --git a/cmake/config.cmake b/cmake/config.cmake index 36eeac729969..6a3ace2c9283 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -232,6 +232,9 @@ set(USE_ETHOSN_HW OFF) set(USE_TENSORRT_CODEGEN OFF) set(USE_TENSORRT_RUNTIME OFF) +# Whether use VITIS-AI codegen +set(USE_VITIS_AI OFF) + # Build ANTLR parser for Relay text format # Possible values: # - ON: enable ANTLR by searching default locations (cmake find_program for antlr4 and /usr/local for jar) diff --git a/cmake/modules/contrib/VitisAI.cmake b/cmake/modules/contrib/VitisAI.cmake new file mode 100644 index 000000000000..083bd6d7adc8 --- /dev/null +++ b/cmake/modules/contrib/VitisAI.cmake @@ -0,0 +1,47 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +if(USE_VITIS_AI) + set(PYXIR_SHARED_LIB libpyxir.so) + find_package(PythonInterp 3.6 REQUIRED) + if(NOT PYTHON) + find_program(PYTHON NAMES python3 python3.6) + endif() + execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c" + "import pyxir as px; print(px.get_include_dir()); print(px.get_lib_dir());" + RESULT_VARIABLE __result + OUTPUT_VARIABLE __output + OUTPUT_STRIP_TRAILING_WHITESPACE) + + if(__result MATCHES 0) + string(REGEX REPLACE ";" "\\\\;" __values ${__output}) + string(REGEX REPLACE "\r?\n" ";" __values ${__values}) + list(GET __values 0 PYXIR_INCLUDE_DIR) + list(GET __values 1 PYXIR_LIB_DIR) + else() + message(FATAL_ERROR "Can't build TVM with Vitis-AI because PyXIR can't be found") + endif() + message(STATUS "Build with contrib.vitisai") + include_directories(${PYXIR_INCLUDE_DIR}) + file(GLOB VAI_CONTRIB_SRC src/runtime/contrib/vitis_ai/*.cc) + file(GLOB COMPILER_VITIS_AI_SRCS + CONFIGURE_DEPENDS src/relay/backend/contrib/vitis_ai/*) + list(APPEND COMPILER_SRCS ${COMPILER_VITIS_AI_SRCS}) + link_directories(${PYXIR_LIB_DIR}) + list(APPEND TVM_RUNTIME_LINKER_LIBS "pyxir") + list(APPEND RUNTIME_SRCS ${VAI_CONTRIB_SRC}) +endif(USE_VITIS_AI) diff --git a/docs/deploy/index.rst b/docs/deploy/index.rst index 68843ba18248..e47b0a3c72fe 100644 --- a/docs/deploy/index.rst +++ b/docs/deploy/index.rst @@ -70,3 +70,4 @@ target device without relying on RPC. see the following resources on how to do s hls arm_compute_lib tensorrt + vitis_ai diff --git a/docs/deploy/vitis_ai.rst b/docs/deploy/vitis_ai.rst new file mode 100755 index 000000000000..f0bd3edcd6e2 --- /dev/null +++ b/docs/deploy/vitis_ai.rst @@ -0,0 +1,652 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + +Vitis-AI Integration +==================== + +`Vitis-AI `__ is Xilinx's +development stack for hardware-accelerated AI inference on Xilinx +platforms, including both edge devices and Alveo cards. It consists of +optimized IP, tools, libraries, models, and example designs. It is +designed with high efficiency and ease of use in mind, unleashing the +full potential of AI acceleration on Xilinx FPGA and ACAP. + +The current Vitis-AI Byoc flow inside TVM enables acceleration of Neural +Network model inference on edge and cloud. The identifiers for the +supported edge and cloud Deep Learning Processor Units (DPU's) are +DPUCZDX8G respectively DPUCADX8G. DPUCZDX8G and DPUCADX8G are hardware +accelerators for convolutional neural networks (CNN's) on top of the +Xilinx `Zynq Ultrascale+ +MPSoc `__ +respectively +`Alveo `__ +(U200/U250) platforms. For more information about the DPU identifiers +see the section on `DPU naming information <#dpu-naming-information>`__. + +On this page you will find information on how to +`build <#build-instructions>`__ TVM with Vitis-AI and on how to `get +started <#getting-started>`__ with an example. + +DPU naming information +---------------------- + ++---------------------------------+-----------------+-------------------------------------------------------------------------+------------------------------------------------------------+---------------------------------------------------+--------------------------------------------------------------------------+ +| DPU | Application | HW Platform | Quantization Method | Quantization Bitwidth | Design Target | ++=================================+=================+=========================================================================+============================================================+===================================================+==========================================================================+ +| Deep Learning Processing Unit | C: CNN R: RNN | AD: Alveo DDR AH: Alveo HBM VD: Versal DDR with AIE & PL ZD: Zynq DDR | X: DECENT I: Integer threshold F: Float threshold R: RNN | 4: 4-bit 8: 8-bit 16: 16-bit M: Mixed Precision | G: General purpose H: High throughput L: Low latency C: Cost optimized | ++---------------------------------+-----------------+-------------------------------------------------------------------------+------------------------------------------------------------+---------------------------------------------------+--------------------------------------------------------------------------+ + +Build instructions +------------------ + +This section lists the instructions for building TVM with Vitis-AI for +both `cloud <#cloud-dpucadx8g>`__ and `edge <#edge-dpuczdx8g>`__. + +Cloud (DPUCADX8G) +~~~~~~~~~~~~~~~~~ + +For Vitis-AI acceleration in the cloud TVM has to be built on top of the +Xilinx Alveo platform. + +System requirements +^^^^^^^^^^^^^^^^^^^ + +The following table lists system requirements for running docker +containers as well as Alveo cards. + ++-----------------------------------------------------+----------------------------------------------------------+ +| **Component** | **Requirement** | ++=====================================================+==========================================================+ +| Motherboard | PCI Express 3.0-compliant with one dual-width x16 slot | ++-----------------------------------------------------+----------------------------------------------------------+ +| System Power Supply | 225W | ++-----------------------------------------------------+----------------------------------------------------------+ +| Operating System | Ubuntu 16.04, 18.04 | ++-----------------------------------------------------+----------------------------------------------------------+ +| | CentOS 7.4, 7.5 | ++-----------------------------------------------------+----------------------------------------------------------+ +| | RHEL 7.4, 7.5 | ++-----------------------------------------------------+----------------------------------------------------------+ +| CPU | Intel i3/i5/i7/i9/Xeon 64-bit CPU | ++-----------------------------------------------------+----------------------------------------------------------+ +| GPU (Optional to accelerate quantization) | NVIDIA GPU with a compute capability > 3.0 | ++-----------------------------------------------------+----------------------------------------------------------+ +| CUDA Driver (Optional to accelerate quantization) | nvidia-410 | ++-----------------------------------------------------+----------------------------------------------------------+ +| FPGA | Xilinx Alveo U200 or U250 | ++-----------------------------------------------------+----------------------------------------------------------+ +| Docker Version | 19.03.1 | ++-----------------------------------------------------+----------------------------------------------------------+ + +Hardware setup and docker build +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +1. Clone the Vitis AI repository: + + .. code:: bash + + git clone --recurse-submodules https://github.com/Xilinx/Vitis-AI + +2. Install Docker, and add the user to the docker group. Link the user + to docker installation instructions from the following docker's + website: + + + - https://docs.docker.com/install/linux/docker-ce/ubuntu/ + - https://docs.docker.com/install/linux/docker-ce/centos/ + - https://docs.docker.com/install/linux/linux-postinstall/ + +3. Download the latest Vitis AI Docker with the following command. This container runs on CPU. + + .. code:: bash + + docker pull xilinx/vitis-ai:latest + + To accelerate the quantization, you can optionally use the Vitis-AI GPU docker image. Use the below commands to build the Vitis-AI GPU docker container: + + .. code:: bash + + cd Vitis-AI/docker + ./docker_build_gpu.sh + +4. Set up Vitis AI to target Alveo cards. To target Alveo cards with + Vitis AI for machine learning workloads, you must install the + following software components: + + - Xilinx Runtime (XRT) + - Alveo Deployment Shells (DSAs) + - Xilinx Resource Manager (XRM) (xbutler) + - Xilinx Overlaybins (Accelerators to Dynamically Load - binary + programming files) + + While it is possible to install all of these software components + individually, a script has been provided to automatically install + them at once. To do so: + + - Run the following commands: + + .. code:: bash + + cd Vitis-AI/alveo/packages + sudo su + ./install.sh + + - Power cycle the system. + +5. Clone tvm repo and pyxir repo + + .. code:: bash + + git clone --recursive https://github.com/apache/incubator-tvm.git + git clone --recursive https://github.com/Xilinx/pyxir.git + +6. Build and start the tvm runtime Vitis-AI Docker Container. + + .. code:: bash + + ./incubator-tvm/docker/build.sh demo_vitis_ai bash + ./incubator-tvm/docker/bash.sh tvm.demo_vitis_ai + + #Setup inside container + source /opt/xilinx/xrt/setup.sh + . $VAI_ROOT/conda/etc/profile.d/conda.sh + conda activate vitis-ai-tensorflow + +7. Install PyXIR + + .. code:: bash + + cd pyxir + python3 setup.py install --use_vai_rt_dpucadx8g --user + + +8. Build TVM inside the container with Vitis-AI + + .. code:: bash + + cd incubator-tvm + mkdir build + cp cmake/config.cmake build + cd build + echo set\(USE_LLVM ON\) >> config.cmake + echo set\(USE_VITIS_AI ON\) >> config.cmake + cmake .. + make -j$(nproc) + +9. Install TVM + + .. code:: bash + + cd incubator-tvm/python + pip3 install -e . --user + +Edge (DPUCZDX8G) +^^^^^^^^^^^^^^^^ + + +For edge deployment we make use of two systems referred to as host and +edge. The `host <#host-requirements>`__ system is responsible for +quantization and compilation of the neural network model in a first +offline step. Afterwards, the model will de deployed on the +`edge <#edge-requirements>`__ system. + +Host requirements +^^^^^^^^^^^^^^^^^ + +The following table lists system requirements for running the TVM - +Vitis-AI docker container. + ++-----------------------------------------------------+----------------------------------------------+ +| **Component** | **Requirement** | ++=====================================================+==============================================+ +| Operating System | Ubuntu 16.04, 18.04 | ++-----------------------------------------------------+----------------------------------------------+ +| | CentOS 7.4, 7.5 | ++-----------------------------------------------------+----------------------------------------------+ +| | RHEL 7.4, 7.5 | ++-----------------------------------------------------+----------------------------------------------+ +| CPU | Intel i3/i5/i7/i9/Xeon 64-bit CPU | ++-----------------------------------------------------+----------------------------------------------+ +| GPU (Optional to accelerate quantization) | NVIDIA GPU with a compute capability > 3.0 | ++-----------------------------------------------------+----------------------------------------------+ +| CUDA Driver (Optional to accelerate quantization) | nvidia-410 | ++-----------------------------------------------------+----------------------------------------------+ +| FPGA | Not necessary on host | ++-----------------------------------------------------+----------------------------------------------+ +| Docker Version | 19.03.1 | ++-----------------------------------------------------+----------------------------------------------+ + +Host setup and docker build +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +1. Clone tvm repo + + .. code:: bash + + git clone --recursive https://github.com/apache/incubator-tvm.git +2. Build and start the tvm runtime Vitis-AI Docker Container. + + .. code:: bash + + cd incubator-tvm + ./incubator-tvm/docker/build.sh demo_vitis_ai bash + ./incubator-tvm/docker/bash.sh tvm.demo_vitis_ai + + #Setup inside container + . $VAI_ROOT/conda/etc/profile.d/conda.sh + conda activate vitis-ai-tensorflow + +3. Install PyXIR + + .. code:: bash + + git clone --recursive https://github.com/Xilinx/pyxir.git + cd pyxir + python3 setup.py install --user + + +4. Build TVM inside the container with Vitis-AI. + + .. code:: bash + + cd incubator-tvm + mkdir build + cp cmake/config.cmake build + cd build + echo set\(USE_LLVM ON\) >> config.cmake + echo set\(USE_VITIS_AI ON\) >> config.cmake + cmake .. + make -j$(nproc) + +5. Install TVM + + .. code:: bash + + cd incubator-tvm/python + pip3 install -e . --user + +Edge requirements +^^^^^^^^^^^^^^^^^ + +The DPUCZDX8G can be deployed on the `Zynq Ultrascale+ +MPSoc `__ +platform. The following development boards can be used out-of-the-box: + ++--------------------+----------------------+-----------------------------------------------------------------------+ +| **Target board** | **TVM identifier** | **Info** | ++====================+======================+=======================================================================+ +| Ultra96 | DPUCZDX8G-ultra96 | https://www.xilinx.com/products/boards-and-kits/1-vad4rl.html | ++--------------------+----------------------+-----------------------------------------------------------------------+ +| ZCU104 | DPUCZDX8G-zcu104 | https://www.xilinx.com/products/boards-and-kits/zcu104.html | ++--------------------+----------------------+-----------------------------------------------------------------------+ +| ZCU102 | DPUCZDX8G-zcu102 | https://www.xilinx.com/products/boards-and-kits/ek-u1-zcu102-g.html | ++--------------------+----------------------+-----------------------------------------------------------------------+ + +Edge hardware setup +^^^^^^^^^^^^^^^^^^^ +.. note:: + + This section provides instructions for setting up with the `Pynq `__ platform but + Petalinux based flows are also supported. + +1. Download the Pynq v2.5 image for your target (use Z1 or Z2 for + Ultra96 target depending on board version) Link to image: + https://github.com/Xilinx/PYNQ/releases/tag/v2.5 +2. Follow Pynq instructions for setting up the board: `pynq + setup `__ +3. After connecting to the board, make sure to run as root. Execute + ``su`` +4. Set up DPU on Pynq by following the steps here: `DPU Pynq + setup `__ +5. Run the following command to download the DPU bitstream: + + .. code:: bash + + python3 -c 'from pynq_dpu import DpuOverlay ; overlay = DpuOverlay("dpu.bit")' + +6. Check whether the DPU kernel is alive: + + .. code:: bash + + dexplorer -w + +Edge TVM setup +^^^^^^^^^^^^^^ + +.. note:: + + When working on Petalinux instead of Pynq, the following steps might take more manual work (e.g building + hdf5 from source). Also, TVM has a scipy dependency which you then might have to build from source or + circumvent. We don't depend on scipy in our flow. + +Building TVM depends on the Xilinx +`PyXIR `__ package. PyXIR acts as an +interface between TVM and Vitis-AI tools. + +1. First install the PyXIR h5py and pydot dependencies: + + .. code:: bash + + apt-get install libhdf5-dev + pip3 install pydot h5py + +2. Install PyXIR + + .. code:: bash + + git clone --recursive https://github.com/Xilinx/pyxir.git + cd pyxir + sudo python3 setup.py install --use_vai_rt_dpuczdx8g + +3. Build TVM with Vitis-AI + + .. code:: bash + + git clone --recursive https://github.com/apache/incubator-tvm + cd incubator-tvm + mkdir build + cp cmake/config.cmake build + cd build + echo set\(USE_VITIS_AI ON\) >> config.cmake + cmake .. + make + +4. Install TVM + + .. code:: bash + + cd incubator-tvm/python + pip3 install -e . --user + +5. Check whether the setup was successful in the Python shell: + + .. code:: bash + + python3 -c 'import pyxir; import tvm' + + +Getting started +--------------- + +This section shows how to use TVM with Vitis-AI. For this it's important +to understand that neural network models are quantized for Vitis-AI +execution in fixed point arithmetic. The approach we take here is to +quantize on-the-fly using the first N inputs as explained in the next +section. + +On-the-fly quantization +~~~~~~~~~~~~~~~~~~~~~~~ + +Usually, to be able to accelerate inference of Neural Network models +with Vitis-AI DPU accelerators, those models need to quantized upfront. +In TVM - Vitis-AI flow, we make use of on-the-fly quantization to remove +this additional preprocessing step. In this flow, one doesn't need to +quantize his/her model upfront but can make use of the typical inference +execution calls (module.run) to quantize the model on-the-fly using the +first N inputs that are provided (see more information below). This will +set up and calibrate the Vitis-AI DPU and from that point onwards +inference will be accelerated for all next inputs. Note that the edge +flow deviates slightly from the explained flow in that inference won't +be accelerated after the first N inputs but the model will have been +quantized and compiled and can be moved to the edge device for +deployment. Please check out the `edge <#Edge%20usage>`__ usage +instructions below for more information. + +Config/Settings +~~~~~~~~~~~~~~~ + +A couple of environment variables can be used to customize the Vitis-AI +Byoc flow. + ++----------------------------+----------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| **Environment Variable** | **Default if unset** | **Explanation** | ++============================+========================================+============================================================================================================================================================================================================================================================================================================================================+ +| PX\_QUANT\_SIZE | 128 | The number of inputs that will be used for quantization (necessary for Vitis-AI acceleration) | ++----------------------------+----------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| PX\_BUILD\_DIR | Use the on-the-fly quantization flow | Loads the quantization and compilation information from the provided build directory and immediately starts Vitis-AI hardware acceleration. This configuration can be used if the model has been executed before using on-the-fly quantization during which the quantization and comilation information was cached in a build directory. | ++----------------------------+----------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + +Cloud usage +~~~~~~~~~~~ + +This section shows how to accelerate a convolutional neural network +model in TVM with Vitis-AI on the cloud. + +To be able to target the Vitis-AI cloud DPUCADX8G target we first have +to import the target in PyXIR. This PyXIR package is the interface being +used by TVM to integrate with the Vitis-AI stack. Additionaly, import +the typical TVM and Relay modules and the Vitis-AI contrib module inside +TVM. + +.. code:: python + + import pyxir + import pyxir.contrib.target.DPUCADX8G + + import tvm + import tvm.relay as relay + from tvm.contrib.target import vitis_ai + from tvm.contrib import util, graph_runtime + from tvm.relay.build_module import bind_params_by_name + from tvm.relay.op.contrib.vitis_ai import annotation + +After importing a convolutional neural network model using the usual +Relay API's, annotate the Relay expression for the given Vitis-AI DPU +target and partition the graph. + +.. code:: python + + mod["main"] = bind_params_by_name(mod["main"], params) + mod = annotation(mod, params, target) + mod = relay.transform.MergeCompilerRegions()(mod) + mod = relay.transform.PartitionGraph()(mod) + +Now, we can build the TVM runtime library for executing the model. The +TVM target is 'llvm' as the operations that can't be handled by the DPU +are executed on the CPU. The Vitis-AI target is DPUCADX8G as we are +targeting the cloud DPU and this target is passed as a config to the TVM +build call. + +.. code:: python + + tvm_target = 'llvm' + target='DPUCADX8G' + + with tvm.transform.PassContext(opt_level=3, config= {'relay.ext.vitis_ai.options.target': target}): + lib = relay.build(mod, tvm_target, params=params) + +As one more step before we can accelerate a model with Vitis-AI in TVM +we have to quantize and compile the model for execution on the DPU. We +make use of on-the-fly quantization for this. Using this method one +doesn’t need to quantize their model upfront and can make use of the +typical inference execution calls (module.run) to calibrate the model +on-the-fly using the first N inputs that are provided. After the first N +iterations, computations will be accelerated on the DPU. So now we will +feed N inputs to the TVM runtime module. Note that these first N inputs +will take a substantial amount of time. + +.. code:: python + + module = graph_runtime.GraphModule(lib["default"](tvm.cpu())) + + # First N (default = 128) inputs are used for quantization calibration and will + # be executed on the CPU + # This config can be changed by setting the 'PX_QUANT_SIZE' (e.g. export PX_QUANT_SIZE=64) + for i in range(128): + module.set_input(input_name, inputs[i]) + module.run() + +Afterwards, inference will be accelerated on the DPU. + +.. code:: python + + module.set_input(name, data) + module.run() + +To save and load the built module, one can use the typical TVM API's: + +.. code:: python + + lib_path = "deploy_lib.so" + lib.export_library(lib_path) + +Load the module from compiled files and run inference + +.. code:: python + + # load the module into memory + loaded_lib = tvm.runtime.load_module(lib_path) + + module = graph_runtime.GraphModule(lib["default"](tvm.cpu())) + module.set_input(name, data) + module.run() + +Edge usage +~~~~~~~~~~ + +This section shows how to accelerate a convolutional neural network +model in TVM with Vitis-AI at the edge. The first couple of steps will +have to be run on the host machine and take care of quantization and +compilation for deployment at the edge. + +Host steps +^^^^^^^^^^ + +To be able to target the Vitis-AI cloud DPUCZDX8G target we first have +to import the target in PyXIR. This PyXIR package is the interface being +used by TVM to integrate with the Vitis-AI stack. Additionaly, import +the typical TVM and Relay modules and the Vitis-AI contrib module inside +TVM. + +.. code:: python + + import pyxir + import pyxir.contrib.target.DPUCZDX8G + + import tvm + import tvm.relay as relay + from tvm.contrib.target import vitis_ai + from tvm.contrib import util, graph_runtime + from tvm.relay.build_module import bind_params_by_name + from tvm.relay.op.contrib.vitis_ai import annotation + +After importing a convolutional neural network model using the usual +Relay API's, annotate the Relay expression for the given Vitis-AI DPU +target and partition the graph. + +.. code:: python + + mod["main"] = bind_params_by_name(mod["main"], params) + mod = annotation(mod, params, target) + mod = relay.transform.MergeCompilerRegions()(mod) + mod = relay.transform.PartitionGraph()(mod) + +Now, we can build the TVM runtime library for executing the model. The +TVM target is 'llvm' as the operations that can't be handled by the DPU +are executed on the CPU. At this point that means the CPU on the host machine. +The Vitis-AI target is DPUCZDX8G-zcu104 as we are targeting the edge DPU +on the ZCU104 board and this target is passed as a config to the TVM +build call. Note that different identifiers can be passed for different +targets, see `edge targets info <#edge-requirements>`__. Additionally, we +provide the 'export_runtime_module' config that points to a file to which we +can export the Vitis-AI runtime module. We have to do this because we will +first be compiling and quantizing the model on the host machine before building +the model for edge deployment. As you will see later on, the exported runtime +module will be passed to the edge build so that the Vitis-AI runtime module +can be included. + +.. code:: python + + from tvm.contrib import util + + temp = util.tempdir() + + tvm_target = 'llvm' + target='DPUCZDX8G-zcu104' + export_rt_mod_file = temp.relpath("vitis_ai.rtmod") + + with tvm.transform.PassContext(opt_level=3, config= {'relay.ext.vitis_ai.options.target': target, + 'relay.ext.vitis_ai.options.export_runtime_module': export_rt_mod_file}): + lib = relay.build(mod, tvm_target, params=params) + +We will quantize and compile the model for execution on the DPU using on-the-fly +quantization on the host machine. This makes use of TVM inference calls +(module.run) to quantize the model on the host with the first N inputs. + +.. code:: python + + module = graph_runtime.GraphModule(lib["default"](tvm.cpu())) + + # First N (default = 128) inputs are used for quantization calibration and will + # be executed on the CPU + # This config can be changed by setting the 'PX_QUANT_SIZE' (e.g. export PX_QUANT_SIZE=64) + for i in range(128): + module.set_input(input_name, inputs[i]) + module.run() + +Save the TVM lib module so that the Vitis-AI runtime module will also be exported +(to the 'export_runtime_module' path we previously passed as a config). + +.. code:: python + + from tvm.contrib import util + + temp = util.tempdir() + lib.export_library(temp.relpath("tvm_lib.so")) + +After quantizing and compiling the model for Vitis-AI acceleration using the +first N inputs we can build the model for execution on the ARM edge device. +Here we pass the previously exported Vitis-AI runtime module so it can be included +in the TVM build. + +.. code:: python + + # Export lib for aarch64 target + tvm_target = tvm.target.arm_cpu('ultra96') + lib_kwargs = { + 'fcompile': contrib.cc.create_shared, + 'cc': "/usr/aarch64-linux-gnu/bin/ld" + } + + with tvm.transform.PassContext(opt_level=3, + config={'relay.ext.vitis_ai.options.load_runtime_module': export_rt_mod_file}): + lib_arm = relay.build(mod, tvm_target, params=params) + + lib_dpuv2.export_library('tvm_dpu_arm.so', **lib_kwargs) + +Now, move the TVM build files (tvm\_dpu\_arm.json, tvm\_dpu\_arm.so, +tvm\_dpu\_arm.params) to the edge device. For information on setting +up the edge device check out the `edge setup <#edge-dpuczdx8g>`__ +section. + +Edge steps +^^^^^^^^^^ + +After setting up TVM with Vitis-AI on the edge device, you can now load +the TVM runtime module into memory and feed inputs for inference. + +.. code:: python + + ctx = tvm.cpu() + + # load the module into memory + lib = tvm.runtime.load_module("tvm_dpu_arm.so") + + module = graph_runtime.GraphModule(lib["default"](tvm.cpu())) + module.set_input(name, data) + module.run() diff --git a/python/tvm/contrib/target/vitis_ai.py b/python/tvm/contrib/target/vitis_ai.py new file mode 100644 index 000000000000..d4931d9e3f48 --- /dev/null +++ b/python/tvm/contrib/target/vitis_ai.py @@ -0,0 +1,156 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=invalid-name, unused-argument, import-outside-toplevel + +"""Utility to offload (sub-)models to Vitis-AI""" + +import warnings + +import pyxir +import pyxir.frontend.tvm + +from tvm.relay.expr import Tuple, Call, TupleGetItem +import tvm._ffi + + +class CodegenVitisAI: + + """Traverse Relay expression and convert into PyXIR XGraph format""" + + def __init__(self, model_name, function): + self.model_name = model_name + self.function = function + self.params = {} + + def convert_pyxir(self, target): + """Convert Relay expression to PyXIR XGraph""" + xgraph = pyxir.frontend.tvm.from_relay( + self.function, params=self.params, postprocessing=None + ) + xgraph = pyxir.partition(xgraph, targets=[target]) + return xgraph + + def get_output_names(self): + """Get output names from Relay expression""" + func = self.function + output_relay_ids = [] + expr = func.body + if isinstance(expr, Tuple): + for field in expr.fields: + output_relay_ids.append(hash(field)) + elif isinstance(expr, Call): + output_relay_ids.append(hash(expr)) + elif isinstance(expr, TupleGetItem): + output_relay_ids.append(hash(expr.tuple_value)) + else: + raise ValueError("Vitis-AI codegen does not support {} as output".format(type(expr))) + return output_relay_ids + + +@tvm._ffi.register_func("relay.ext.vitis_ai") +def vitis_ai_compiler(ref): + """Create a Vitis-AI runtime from the provided Relay expression""" + assert isinstance(ref, tvm.relay.function.Function) + + out_tensor_names = [] + name = str(ref.attrs.global_symbol) + + pass_context = tvm.get_global_func("transform.GetCurrentPassContext")() + + # The target Vitis-AI accelerator device + target = ( + str(pass_context.config["relay.ext.vitis_ai.options.target"]) + if "relay.ext.vitis_ai.options.target" in pass_context.config + else None + ) + + # (Optional configs) The build and work directories to be used by Vitis-AI + vai_build_dir = ( + str(pass_context.config["relay.ext.vitis_ai.options.build_dir"]) + if "relay.ext.vitis_ai.options.build_dir" in pass_context.config + else tvm.contrib.utils.tempdir().relpath("") + ) + vai_work_dir = ( + str(pass_context.config["relay.ext.vitis_ai.options.work_dir"]) + if "relay.ext.vitis_ai.options.work_dir" in pass_context.config + else tvm.contrib.utils.tempdir().relpath("") + ) + + # (Optional configs) Export and load PyXIR runtime module to file if provided. This is used to + # compile and quantize a model on the host and deploy it at the edge + export_runtime_module = ( + str(pass_context.config["relay.ext.vitis_ai.options.export_runtime_module"]) + if "relay.ext.vitis_ai.options.export_runtime_module" in pass_context.config + else "" + ) + load_runtime_module = ( + str(pass_context.config["relay.ext.vitis_ai.options.load_runtime_module"]) + if "relay.ext.vitis_ai.options.load_runtime_module" in pass_context.config + else "" + ) + + # Config checks + if load_runtime_module and target is not None: + warnings.warn( + "Both `load_runtime_module` and `target` configs were specified." + " The `load_runtime_module` points to a prebuilt runtime module with" + " an internal target so the `target` config will be ignored" + ) + if load_runtime_module and "relay.ext.vitis_ai.options.build_dir" in pass_context.config: + warnings.warn( + "Both `load_runtime_module` and `build_dir` configs were specified." + " The `load_runtime_module` points to a prebuilt runtime module with" + " an internal build directory so the `build_dir` config will be ignored" + ) + if load_runtime_module and "relay.ext.vitis_ai.options.work_dir" in pass_context.config: + warnings.warn( + "Both `load_runtime_module` and `work_dir` configs were specified." + " The `load_runtime_module` points to a prebuilt runtime module with" + " an internal work directory so the `work_dir` config will be ignored" + ) + + # If load_runtime_module is not set, we will build the PyXIR runtime module from scratch + if load_runtime_module == "": + # Convert Relay expression into XGraph and do partitioning inside PyXIR + builder = CodegenVitisAI(name, ref) + xgraph = builder.convert_pyxir(target) + output_relay_ids = builder.get_output_names() + layers = xgraph.get_layers() + + # Get the output tensor names using XGraph and output Relay ids + out_tensor_names = [] + for layer in layers: + if not layer.internal: + for relay_id in layer.attrs["relay_id"]: + if relay_id in output_relay_ids: + out_tensor_names.append(layer.name) + break + if not out_tensor_names: + raise ValueError( + "During codegeneration the loading of subexpression \ + failed due to output tensor name mismatch in Relay PyXIR interface." + ) + xgraph.meta_attrs["tvm_out_tensors"] = out_tensor_names + xgraph_str = pyxir.get_xgraph_str(xgraph) + + runtime_func = "tvm.vitis_ai_runtime.from_xgraph" + fcreate = tvm._ffi.get_global_func(runtime_func) + return fcreate(name, xgraph_str, target, vai_build_dir, vai_work_dir, export_runtime_module) + + runtime_func = "tvm.vitis_ai_runtime.from_rt_mod" + fcreate = tvm._ffi.get_global_func(runtime_func) + return fcreate(name, load_runtime_module, export_runtime_module) diff --git a/python/tvm/relay/op/contrib/vitis_ai.py b/python/tvm/relay/op/contrib/vitis_ai.py new file mode 100644 index 000000000000..fa17c63fc00a --- /dev/null +++ b/python/tvm/relay/op/contrib/vitis_ai.py @@ -0,0 +1,100 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=invalid-name, unused-argument, no-else-return, E1102 +"""Vitis-AI codegen annotation of supported operators""" + +import numpy as np + +import pyxir +import pyxir.frontend.tvm + +from tvm import relay +import tvm._ffi +from tvm.relay.expr import Tuple, TupleGetItem +from tvm.relay import transform +from tvm.relay.op.annotation import compiler_begin, compiler_end + + +@transform.function_pass(opt_level=0) +class VitisAIAnnotationPass: + """Responsible for annotating Relay expressions for Vitis-AI DPU accelerators""" + + def __init__(self, compiler, relay_ids): + self.compiler = compiler + self.relay_ids = relay_ids + + def transform_function(self, func, mod, ctx): + """Transform function for annotating Relay module""" + annotator = self + + class Annotator(tvm.relay.ExprMutator): + """Annotator for Vitis-AI DPU accelerators""" + + def visit_tuple(self, tup): + """Add compiler_begin and compiler_end annotations to Tuple""" + field_list = [] + cond = int(hash(tup)) + for field in tup.fields: + if cond in annotator.relay_ids: + field_list.append(compiler_begin(super().visit(field), annotator.compiler)) + else: + field_list.append(super().visit(field)) + if cond in annotator.relay_ids: + return compiler_end(Tuple(field_list), annotator.compiler) + else: + return Tuple(field_list) + + def visit_tuple_getitem(self, op): + """Add compiler_begin and compiler_end annotations to TupleGetItem""" + if int(hash(op.tuple_value)) in annotator.relay_ids: + tuple_value = compiler_begin(super().visit(op.tuple_value), annotator.compiler) + return compiler_end(TupleGetItem(tuple_value, op.index), annotator.compiler) + else: + tuple_value = super().visit(op.tuple_value) + return TupleGetItem(tuple_value, op.index) + + def visit_call(self, call): + """Add compiler_begin and compiler_end annotations to the Call expr""" + if int(hash(call)) in annotator.relay_ids: + new_args = [] + for arg in call.args: + ann = compiler_begin(super().visit(arg), annotator.compiler) + new_args.append(ann) + new_call = relay.Call(call.op, new_args, call.attrs, call.type_args) + return compiler_end(new_call, annotator.compiler) + + else: + return super().visit_call(call) + + return Annotator().visit(func) + + +def annotation(mod, params, target): + """Annotate Relay expression for Vitis-AI DPU accelerators""" + xgraph = pyxir.frontend.tvm.from_relay(mod, params, postprocessing=None) + xgraph = pyxir.partition(xgraph, targets=[target]) + + layers = xgraph.get_layers() + relay_ids = [ + list(np.array(layer.attrs["relay_id"]).flatten()) + for layer in layers + if layer.target == target + ] + relay_ids_flatten = [item for sublist in relay_ids for item in sublist] + mod = VitisAIAnnotationPass("vitis_ai", relay_ids_flatten)(mod) + + return mod diff --git a/src/relay/backend/contrib/vitis_ai/config_vitis_ai.cc b/src/relay/backend/contrib/vitis_ai/config_vitis_ai.cc new file mode 100644 index 000000000000..f74b5306c5f4 --- /dev/null +++ b/src/relay/backend/contrib/vitis_ai/config_vitis_ai.cc @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file src/relay/backend/contrib/vitis_ai/config_vitis_ai.cc + * \brief Register Vitis-AI codegen options. Main codegen is implemented in python. + */ + +#include + +namespace tvm { +namespace relay { +namespace contrib { +namespace vitis_ai { + +/*! \brief The target Vitis-AI accelerator device */ +TVM_REGISTER_PASS_CONFIG_OPTION("relay.ext.vitis_ai.options.target", String); +/*! \brief (Optional config) The build directory to be used by Vitis-AI */ +TVM_REGISTER_PASS_CONFIG_OPTION("relay.ext.vitis_ai.options.build_dir", String); +/*! \brief (Optional config) The work directory to be used by Vitis-AI */ +TVM_REGISTER_PASS_CONFIG_OPTION("relay.ext.vitis_ai.options.work_dir", String); +/*! \brief (Optional config) Export PyXIR runtime module to disk during serialization if provided */ +TVM_REGISTER_PASS_CONFIG_OPTION("relay.ext.vitis_ai.options.export_runtime_module", String); +/*! \brief (Optional config) Load PyXIR runtime module from disk */ +TVM_REGISTER_PASS_CONFIG_OPTION("relay.ext.vitis_ai.options.load_runtime_module", String); + +} // namespace vitis_ai +} // namespace contrib +} // namespace relay +} // namespace tvm diff --git a/src/runtime/contrib/vitis_ai/vitis_ai_runtime.cc b/src/runtime/contrib/vitis_ai/vitis_ai_runtime.cc new file mode 100755 index 000000000000..37dc767d31af --- /dev/null +++ b/src/runtime/contrib/vitis_ai/vitis_ai_runtime.cc @@ -0,0 +1,194 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file vitis_ai_runtime.cc + */ + +#include "vitis_ai_runtime.h" + +#include + +#include +#include +#include +#include + +using namespace pyxir::runtime; + +namespace tvm { +namespace runtime { + +VitisAIRuntime::VitisAIRuntime(const std::string& symbol_name, const Array const_names, + const std::string& serialized_rt_mod, + const std::string& export_rt_mod_path) + : symbol_name_(symbol_name), + const_names_(const_names), + export_rt_mod_path_(export_rt_mod_path) { + std::istringstream sstream(serialized_rt_mod); + rt_mod_.reset(new RuntimeModule()); + rt_mod_->deserialize(sstream); + in_tensor_names_ = rt_mod_->get_in_tensor_names(); + out_tensor_names_ = rt_mod_->get_out_tensor_names(); +} + +VitisAIRuntime::VitisAIRuntime(const std::string& symbol_name, const std::string& xgraph_str, + const Array const_names, const std::string& target, + const std::string& build_dir, const std::string& work_dir, + const std::string& export_rt_mod_path) + : symbol_name_(symbol_name), + const_names_(const_names), + export_rt_mod_path_(export_rt_mod_path) { + std::istringstream xgraph_sstream(xgraph_str); + pyxir::XGraphHolder xgraph = std::make_shared(""); + pyxir::read(xgraph, xgraph_sstream); + in_tensor_names_ = xgraph->get_input_names(); + out_tensor_names_ = xgraph->get_meta_attr("tvm_out_tensors").get_strings(); + + pyxir::partition(xgraph, std::vector{target}, ""); + + pyxir::RunOptionsHolder run_options(new pyxir::runtime::RunOptions()); + run_options->on_the_fly_quantization = true; + run_options->build_dir = build_dir; + if (!work_dir.empty()) run_options->work_dir = work_dir; + rt_mod_ = + pyxir::build_rt(xgraph, target, in_tensor_names_, out_tensor_names_, "vai", run_options); +} + +Module VitisAIRuntimeCreate(const std::string& name, const std::string& xgraph_str, + const std::string& target, const std::string& build_dir, + const std::string& work_dir, const std::string& export_rt_mod_path) { + Array const_vars; + auto exec = make_object(name, xgraph_str, const_vars, target, build_dir, work_dir, + export_rt_mod_path); + return Module(exec); +} + +TVM_REGISTER_GLOBAL("tvm.vitis_ai_runtime.from_xgraph").set_body([](TVMArgs args, TVMRetValue* rv) { + *rv = VitisAIRuntimeCreate(args[0], args[1], args[2], args[3], args[4], args[5]); +}); + +Module VitisAIRuntimeCreate(const std::string& name, const std::string& serialized_rt_mod, + const std::string& export_rt_mod_path) { + Array const_vars; + auto exec = make_object(name, const_vars, serialized_rt_mod, export_rt_mod_path); + return Module(exec); +} + +TVM_REGISTER_GLOBAL("tvm.vitis_ai_runtime.from_rt_mod").set_body([](TVMArgs args, TVMRetValue* rv) { + std::string load_rt_mod_path = args[1]; + assert(!load_rt_mod_path.empty()); + std::ifstream in_file(load_rt_mod_path); + std::stringstream buffer; + buffer << in_file.rdbuf(); + std::string serialized_rt_mod = buffer.str(); + in_file.close(); + *rv = VitisAIRuntimeCreate(args[0], serialized_rt_mod, args[2]); +}); + +Module VitisAIRuntimeLoadFromBinary(void* strm) { + dmlc::Stream* stream = static_cast(strm); + std::string symbol_name; + std::vector const_vars; + std::string serialized_rt_mod; + std::string export_rt_mod_path; + stream->Read(&serialized_rt_mod); + stream->Read(&export_rt_mod_path); + stream->Read(&symbol_name); + stream->Read(&const_vars); + Array const_names; + for (const auto& it : const_vars) { + const_names.push_back(it); + } + auto exec = + make_object(symbol_name, const_names, serialized_rt_mod, export_rt_mod_path); + return Module(exec); +} + +TVM_REGISTER_GLOBAL("runtime.module.loadbinary_VitisAIRuntime") + .set_body_typed(VitisAIRuntimeLoadFromBinary); + +void VitisAIRuntime::SaveToBinary(dmlc::Stream* stream) { + std::ostringstream sstream; + rt_mod_->serialize(sstream); + stream->Write(sstream.str()); + stream->Write(export_rt_mod_path_); + stream->Write(symbol_name_); + std::vector consts; + for (const auto& it : const_names_) { + consts.push_back(it); + } + stream->Write(consts); + + // If export_rt_mod_path_ member variable is set, we will additionally export the PyXIR + // runtime_module to the specified file + if (!export_rt_mod_path_.empty()) { + std::ofstream out_file(export_rt_mod_path_); + out_file << sstream.str(); + out_file.close(); + } +} + +PackedFunc VitisAIRuntime::GetFunction(const std::string& name, + const ObjectPtr& sptr_to_self) { + if (name == "get_symbol") { + return PackedFunc( + [sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { *rv = this->symbol_name_; }); + } else if (name == "get_const_vars") { + return PackedFunc( + [sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { *rv = this->const_names_; }); + } else if ("__init_" + this->symbol_name_ == name) { + // The function to initialize constant tensors. + return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { + CHECK_EQ(args.size(), 1U); + this->initialized_ = true; + *rv = 0; + }); + } else if (this->symbol_name_ == name) { + return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { + // Initialize input tensors + DLTensor* inputs = args[0]; + std::vector in_tensors; + std::vector in_shape; + for (int i = 0; i < inputs->ndim; ++i) in_shape.push_back(inputs->shape[i]); + in_tensors.push_back(std::shared_ptr( + new pyxir::XBuffer(reinterpret_cast(static_cast(inputs->data)), 4, "f", + in_shape.size(), in_shape, false, false))); + + // Initialize output tensors + std::vector out_tensors; + for (unsigned i = 0; i < out_tensor_names_.size(); ++i) { + DLTensor* output_tensor = args[args.size() - out_tensor_names_.size() + i]; + std::vector out_shape; + for (int i = 0; i < output_tensor->ndim; ++i) out_shape.push_back(output_tensor->shape[i]); + void* output_data = reinterpret_cast(static_cast(output_tensor->data)); + out_tensors.push_back(std::shared_ptr( + new pyxir::XBuffer(output_data, 4, "f", out_shape.size(), out_shape, false, false))); + } + + // Execute the subgraph. + rt_mod_->execute(in_tensors, out_tensors); + }); + } else { + return PackedFunc(); + } +} + +} // namespace runtime +} // namespace tvm diff --git a/src/runtime/contrib/vitis_ai/vitis_ai_runtime.h b/src/runtime/contrib/vitis_ai/vitis_ai_runtime.h new file mode 100755 index 000000000000..1092bc0ba27b --- /dev/null +++ b/src/runtime/contrib/vitis_ai/vitis_ai_runtime.h @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \brief Vitis-AI runtime that can run model + * containing only tvm PackedFunc. + * \file vitis_ai_runtime.h + */ +#ifndef TVM_RUNTIME_CONTRIB_VITIS_AI_VITIS_AI_RUNTIME_H_ +#define TVM_RUNTIME_CONTRIB_VITIS_AI_VITIS_AI_RUNTIME_H_ +#include +#include +#include +// clang-format off +#include +#include +#include +// clang-format on +#include +#include + +namespace tvm { +namespace runtime { + +/*! + * \brief VAI runtime. + * + * This runtime can be accessed in various language via + * TVM runtime PackedFunc API. + */ +class VitisAIRuntime : public ModuleNode { + public: + /*! + * \brief Create VitisAI runtime from serialized XGraph + * \param symbol_name The name of the function. + * \param const_names The names of each constant in the sub-graph. + * \param serialized_rt_mod The serialized runtime module. + * \param export_rt_mod_path The path to the file to be used for exporting the + * PyXIR runtime module. + */ + VitisAIRuntime(const std::string& symbol_name, const Array const_names, + const std::string& serialized_rt_mod, const std::string& export_rt_mod); + + /*! + * \brief Create VitisAI runtime from serialized XGraph + * \param symbol_name The name of the function. + * \param xgraph_str serialized XGraph representation + * \param const_names The names of each constant in the sub-graph. + * \param target The Vitis-AI device target (e.g. DPUCADX8G, DPUCZDX8G). + * \param build_dir The directory to be used for Vitis-AI build files. + * \param work_dir The directory to be used for Vitis-AI work files. + * \param export_rt_mod_path The path to the file to be used for exporting the + * PyXIR runtime module. + */ + VitisAIRuntime(const std::string& symbol_name, const std::string& xgraph_str, + const Array const_names, const std::string& target, + const std::string& build_dir, const std::string& work_dir, + const std::string& export_runtime_module_path); + + /*! + * \brief Get member function to front-end. + * \param name The name of the function. + * \param sptr_to_self The pointer to the module node. + * \return The corresponding member function. + */ + virtual PackedFunc GetFunction(const std::string& name, const ObjectPtr& sptr_to_self); + + /*! + * \return The type key of the executor. + */ + const char* type_key() const { return "VitisAIRuntime"; } + + /*! + * \brief Serialize the content of the pyxir directory and save it to + * binary stream. + * \param stream The binary stream to save to. + */ + void SaveToBinary(dmlc::Stream* stream) final; + + private: + /*! \brief The only subgraph name for this module */ + std::string symbol_name_; + /*! \brief The required constant names */ + Array const_names_; + /*! \brief The runtime module */ + pyxir::RtModHolder rt_mod_; + /*! \brief The XGraph input tensor names in the order as provided by TVM */ + std::vector in_tensor_names_; + /*! \brief The XGraph output tensor names in the order as provided by TVM */ + std::vector out_tensor_names_; + /*! \brief The file path for exporting the runtime module if set */ + std::string export_rt_mod_path_; + /*! \brief Whether constant tensors have been initialized */ + bool initialized_{false}; +}; + +} // namespace runtime +} // namespace tvm +#endif // TVM_RUNTIME_CONTRIB_VITIS_AI_VITIS_AI_RUNTIME_H_ diff --git a/tests/python/contrib/test_vitis_ai/__init__.py b/tests/python/contrib/test_vitis_ai/__init__.py new file mode 100644 index 000000000000..c5fe1539b059 --- /dev/null +++ b/tests/python/contrib/test_vitis_ai/__init__.py @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" Infrastructure and tests for Vitis-AI codegen """ diff --git a/tests/python/contrib/test_vitis_ai/infrastructure.py b/tests/python/contrib/test_vitis_ai/infrastructure.py new file mode 100644 index 000000000000..df7836a37647 --- /dev/null +++ b/tests/python/contrib/test_vitis_ai/infrastructure.py @@ -0,0 +1,171 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=no-else-return, unidiomatic-typecheck, invalid-name, W0611, C0413 + +"""Expose Vitis-AI test functions to the Python frontend""" + +import sys +import numpy as np + +import pytest + +pytest.importorskip("pyxir") +import pyxir.contrib.target.DPUCADX8G +import pyxir.contrib.target.DPUCZDX8G + +import tvm +from tvm import relay +from tvm import runtime +from tvm.relay import transform +from tvm.relay.op.contrib.vitis_ai import annotation +from tvm.relay.build_module import bind_params_by_name +from tvm.contrib.target import vitis_ai +from tvm.contrib import graph_runtime +from tvm.contrib import utils + + +def get_cpu_op_count(mod): + """Traverse graph counting ops offloaded to TVM.""" + + class Counter(tvm.relay.ExprVisitor): + def __init__(self): + super().__init__() + self.count = 0 + + def visit_call(self, call): + if isinstance(call.op, tvm.ir.Op): + self.count += 1 + + super().visit_call(call) + + c = Counter() + c.visit(mod["main"]) + return c.count + + +def skip_test(): + """Skip test if it requires the Vitis-AI codegen and it's not present.""" + if not tvm.get_global_func("relay.ext.vitis_ai", True): + print("Skip test because Vitis-AI codegen is not available.") + return True + return False + + +def build_module( + mod, + target, + dpu_target="DPUCADX8G", + params=None, + enable_vitis_ai=True, + tvm_ops=0, + vitis_ai_partitions=1, +): + """Build module for Vitis-AI codegen.""" + if isinstance(mod, tvm.relay.expr.Call): + mod = tvm.IRModule.from_expr(mod) + if params is None: + params = {} + + with tvm.transform.PassContext( + opt_level=3, config={"relay.ext.vitis_ai.options.target": dpu_target} + ): + if enable_vitis_ai: + mod["main"] = bind_params_by_name(mod["main"], params) + mod = annotation(mod, params, dpu_target) + mod = transform.MergeCompilerRegions()(mod) + mod = transform.PartitionGraph()(mod) + tvm_op_count = get_cpu_op_count(mod) + assert tvm_op_count == tvm_ops, "Got {} TVM operators, expected {}".format( + tvm_op_count, tvm_ops + ) + partition_count = 0 + for global_var in mod.get_global_vars(): + if "vitis_ai" in global_var.name_hint: + partition_count += 1 + + assert ( + vitis_ai_partitions == partition_count + ), "Got {} Vitis-AI partitions, expected {}".format( + partition_count, vitis_ai_partitions + ) + relay.backend.compile_engine.get().clear() + return relay.build(mod, target, params=params) + + +def update_lib(lib, cross_compile=None): + tmp_path = utils.tempdir() + lib_name = "lib.so" + lib_path = tmp_path.relpath(lib_name) + if cross_compile: + lib.export_library(lib_path, cc=cross_compile) + else: + lib.export_library(lib_path) + lib = runtime.load_module(lib_path) + return lib + + +def extract_vitis_ai_modules(module): + """Get the Vits-AI runtime module from llvm module.""" + return list( + filter(lambda mod: mod.type_key == "VitisAIRuntime", module.get_lib().imported_modules) + ) + + +def verify_codegen( + module, num_vitis_ai_modules=1, params=None, target="llvm", dpu_target="DPUCADX8G" +): + """Check Vitis-AI codegen against a known good output.""" + module = build_module(module, target, params=params, dpu_target=dpu_target) + vitis_ai_modules = extract_vitis_ai_modules(module) + + assert len(vitis_ai_modules) == num_vitis_ai_modules, ( + f"The number of Vitis-AI modules produced ({len(vitis_ai_modules)}) does not " + f"match the expected value ({num_vitis_ai_modules})." + ) + + +def verify_result( + mod, + map_inputs, + out_shape, + result, + tol=1e-5, + target="llvm", + ctx=tvm.cpu(), + params=None, + dpu_target="DPUCADX8G", + tvm_ops=0, +): + """To check the result between reference and byoc vitis-ai flow""" + + lib = build_module(mod, target, params=params, dpu_target=dpu_target, tvm_ops=tvm_ops) + lib = update_lib(lib) + ctx = tvm.cpu() + rt_mod = graph_runtime.GraphModule(lib["default"](tvm.cpu())) + + for name, data in map_inputs.items(): + rt_mod.set_input(name, data) + rt_mod.set_input(**params) + rt_mod.run() + + out_shapes = out_shape if isinstance(out_shape, list) else [out_shape] + results = result if isinstance(result, list) else [result] + + for idx, shape in enumerate(out_shapes): + out = tvm.nd.empty(shape, ctx=ctx) + out = rt_mod.get_output(idx, out) + tvm.testing.assert_allclose(out.asnumpy(), results[idx], rtol=tol, atol=tol) diff --git a/tests/python/contrib/test_vitis_ai/test_vitis_ai_codegen.py b/tests/python/contrib/test_vitis_ai/test_vitis_ai_codegen.py new file mode 100644 index 000000000000..4d5d5dc92c41 --- /dev/null +++ b/tests/python/contrib/test_vitis_ai/test_vitis_ai_codegen.py @@ -0,0 +1,336 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=no-else-return, unidiomatic-typecheck, invalid-name, W0611, C0413 + +"""Vitis-AI codegen tests""" + +import sys +import numpy as np + +import pytest + +pytest.importorskip("pyxir") +import pyxir.contrib.target.DPUCADX8G +import pyxir.contrib.target.DPUCZDX8G + +import tvm +from tvm import relay +from tvm.relay import transform +from tvm.relay.op.contrib.vitis_ai import annotation +from tvm.relay.build_module import bind_params_by_name +from tvm.contrib.target import vitis_ai + +from .infrastructure import skip_test, verify_codegen + + +def set_func_attr(func, compile_name, symbol_name): + func = func.with_attr("Primitive", tvm.tir.IntImm("int32", 1)) + func = func.with_attr("Inline", tvm.tir.IntImm("int32", 1)) + func = func.with_attr("Compiler", compile_name) + func = func.with_attr("global_symbol", symbol_name) + return func + + +def test_conv2d(): + """Test conv2d operator for Vitis-AI DPUCADX8G and DPUCZDX8G-zcu104 targets""" + + x = relay.var("x", shape=(1, 3, 224, 224)) + w = relay.const(np.zeros((16, 3, 3, 3), dtype="float32")) + y = relay.nn.conv2d(x, w, strides=[2, 2], padding=[1, 1, 1, 1], kernel_size=[3, 3]) + func = relay.Function([x], y) + params = {} + params["x"] = np.zeros((1, 3, 224, 224), dtype="float32") + params["w"] = np.random.rand(16, 3, 3, 3).astype("float32") + mod = tvm.IRModule() + mod["main"] = func + verify_codegen(mod, params=params, dpu_target="DPUCADX8G") + verify_codegen(mod, params=params, dpu_target="DPUCZDX8G-zcu104") + + +def test_depthwise_conv(): + """Test depthwise_conv operator for Vitis-AI DPUCZDX8G-zcu104 target""" + + dtype = "float32" + ishape = (1, 32, 14, 14) + wshape = (32, 1, 3, 3) + data = relay.var("data", shape=(ishape), dtype=dtype) + weights = relay.var("weights", shape=(wshape), dtype=dtype) + depthwise_conv2d = relay.nn.conv2d(data, weights, kernel_size=(3, 3), padding=(1, 1), groups=32) + func = relay.Function([data, weights], depthwise_conv2d) + params = {} + params["weights"] = np.random.randn(32, 1, 3, 3).astype(dtype) + params["data"] = np.random.randn(1, 32, 14, 14).astype(dtype) + mod = tvm.IRModule() + mod["main"] = func + verify_codegen(mod, params=params, dpu_target="DPUCZDX8G-zcu104") + + +def test_bias_add(): + """Test bias_add operator for Vitis-AI DPUCADX8G and DPUCZDX8G-zcu104 targets""" + + dtype = "float32" + ishape = (1, 32, 14, 14) + data = relay.var("data", shape=(ishape), dtype=dtype) + bias = relay.var("bias", relay.TensorType((32,), dtype)) + out = relay.nn.bias_add(data, bias) + func = relay.Function([data, bias], out) + params = {} + params["bias"] = np.random.randn(32).astype(dtype) + params["data"] = np.random.randn(1, 32, 14, 14).astype(dtype) + mod = tvm.IRModule() + mod["main"] = func + verify_codegen(mod, params=params, dpu_target="DPUCADX8G") + verify_codegen(mod, params=params, dpu_target="DPUCZDX8G-zcu104") + + +def test_relu(): + """Test relu operator for Vitis-AI DPUCADX8G and DPUCZDX8G-zcu104 targets""" + + shape = (10, 10) + x = relay.var("x", shape=shape) + y = relay.nn.relu(x) + func = relay.Function([x], y) + mod = tvm.IRModule() + mod["main"] = func + verify_codegen(mod, dpu_target="DPUCADX8G") + verify_codegen(mod, dpu_target="DPUCZDX8G-zcu104") + + +def test_batchnorm(): + """Test batchnorm operator for Vitis-AI DPUCADX8G and DPUCZDX8G-zcu104 targets""" + + data = relay.var("data", shape=(1, 16, 112, 112)) + bn_gamma = relay.var("bn_gamma", relay.TensorType((16,), "float32")) + bn_beta = relay.var("bn_beta", relay.TensorType((16,), "float32")) + bn_mmean = relay.var("bn_mean", relay.TensorType((16,), "float32")) + bn_mvar = relay.var("bn_var", relay.TensorType((16,), "float32")) + bn_output = relay.nn.batch_norm(data, bn_gamma, bn_beta, bn_mmean, bn_mvar) + func = relay.Function([data, bn_gamma, bn_beta, bn_mmean, bn_mvar], bn_output[0]) + params = {} + params["data"] = np.zeros((1, 16, 112, 112), dtype="float32") + params["bn_gamma"] = np.random.rand(16).astype("float32") + params["bn_beta"] = np.random.rand(16).astype("float32") + params["bn_mean"] = np.random.rand(16).astype("float32") + params["bn_var"] = np.random.rand(16).astype("float32") + mod = tvm.IRModule() + mod["main"] = func + verify_codegen(mod, params=params, dpu_target="DPUCADX8G") + verify_codegen(mod, params=params, dpu_target="DPUCZDX8G-zcu104") + + +def test_add(): + """Test add operator for Vitis-AI DPUCADX8G and DPUCZDX8G-zcu104 targets""" + + shape = (10, 10) + x = relay.var("x", shape=shape) + y = x + x + func = relay.Function([x], y) + mod = tvm.IRModule() + mod["main"] = func + verify_codegen(mod, dpu_target="DPUCADX8G") + verify_codegen(mod, dpu_target="DPUCZDX8G-zcu104") + + +def test_global_avg_pool2d(): + """Test global_avg_pool2d operator for Vitis-AI DPUCADX8G and DPUCZDX8G-zcu104 targets""" + + shape = (10, 10, 7, 7) + x = relay.var("x", shape=shape) + y = relay.nn.global_avg_pool2d(x) + func = relay.Function([x], y) + mod = tvm.IRModule() + mod["main"] = func + verify_codegen(mod, dpu_target="DPUCADX8G") + verify_codegen(mod, dpu_target="DPUCZDX8G-zcu104") + + +def test_avg_pool2d(): + """Test avg_pool2d for operator Vitis-AI DPUCADX8G and DPUCZDX8G-zcu104 targets""" + + shape = (10, 10, 10, 10) + x = relay.var("x", shape=shape) + y = relay.nn.avg_pool2d(x, pool_size=(3, 3)) + func = relay.Function([x], y) + mod = tvm.IRModule() + mod["main"] = func + verify_codegen(mod, dpu_target="DPUCADX8G") + verify_codegen(mod, dpu_target="DPUCZDX8G-zcu104") + + +def test_max_pool2d(): + """Test max_pool2d for operator Vitis-AI DPUCADX8G and DPUCZDX8G-zcu104 targets""" + + shape = (64, 512, 10, 10) + x = relay.var("x", shape=shape) + y = relay.nn.max_pool2d(x, pool_size=(3, 3)) + func = relay.Function([x], y) + mod = tvm.IRModule() + mod["main"] = func + verify_codegen(mod, dpu_target="DPUCADX8G") + verify_codegen(mod, dpu_target="DPUCZDX8G-zcu104") + + +def test_global_max_pool2d(): + """Test global_maxpool2d operator for Vitis-AI DPUCADX8G and DPUCZDX8G-zcu104 targets""" + + shape = (1, 512, 7, 7) + x = relay.var("x", shape=shape) + y = relay.nn.global_max_pool2d(x) + func = relay.Function([x], y) + mod = tvm.IRModule() + mod["main"] = func + verify_codegen(mod, dpu_target="DPUCADX8G") + verify_codegen(mod, dpu_target="DPUCZDX8G-zcu104") + + +def test_upsampling(): + """Test upsampling operator for Vitis-AI DPUCADX8G and DPUCZDX8G-zcu104 targets""" + + shape = (64, 512, 10, 10) + x = relay.var("x", shape=shape) + y = relay.nn.upsampling(x, scale_h=2, scale_w=2) + func = relay.Function([x], y) + mod = tvm.IRModule() + mod["main"] = func + verify_codegen(mod, dpu_target="DPUCADX8G") + verify_codegen(mod, dpu_target="DPUCZDX8G-zcu104") + + +def test_conv2d_transpose(): + """Test conv2d_transpose operator for Vitis-AI DPUCADX8G and DPUCZDX8G-zcu104 targets""" + + dshape = (1, 3, 18, 18) + kshape = (3, 10, 3, 3) + x = relay.var("x", shape=dshape) + w = relay.const(np.zeros(kshape, dtype="float32")) + y = relay.nn.conv2d_transpose( + x, w, channels=10, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1) + ) + func = relay.Function([x], y) + params = {} + dtype = "float32" + params["x"] = np.random.uniform(size=dshape).astype(dtype) + params["w"] = np.random.uniform(size=kshape).astype(dtype) + mod = tvm.IRModule() + mod["main"] = func + verify_codegen(mod, params=params, dpu_target="DPUCADX8G") + verify_codegen(mod, params=params, dpu_target="DPUCZDX8G-zcu104") + + +def test_annotate(): + """Test annotation operator for Vitis-AI DPUCADX8G and DPUCZDX8G-zcu104 targets""" + + def partition(dpu_target): + data = relay.var("data", relay.TensorType((1, 3, 224, 224), "float32")) + weight = relay.var("weight", relay.TensorType((16, 3, 3, 3), "float32")) + bn_gamma = relay.var("bn_gamma", relay.TensorType((16,), "float32")) + bn_beta = relay.var("bn_beta", relay.TensorType((16,), "float32")) + bn_mmean = relay.var("bn_mean", relay.TensorType((16,), "float32")) + bn_mvar = relay.var("bn_var", relay.TensorType((16,), "float32")) + + conv = relay.nn.conv2d( + data=data, weight=weight, kernel_size=(3, 3), channels=16, padding=(1, 1) + ) + bn_output = relay.nn.batch_norm(conv, bn_gamma, bn_beta, bn_mmean, bn_mvar) + + func = relay.Function( + [data, weight, bn_gamma, bn_beta, bn_mmean, bn_mvar], bn_output.astuple() + ) + mod = tvm.IRModule() + mod["main"] = func + params = {} + params["weight"] = np.random.rand(16, 3, 3, 3).astype("float32") + params["bn_gamma"] = np.random.rand(16).astype("float32") + params["bn_beta"] = np.random.rand(16).astype("float32") + params["bn_mean"] = np.random.rand(16).astype("float32") + params["bn_var"] = np.random.rand(16).astype("float32") + mod = annotation(mod, params, dpu_target) + + opt_pass = tvm.transform.Sequential( + [ + transform.MergeCompilerRegions(), + transform.PartitionGraph(), + ] + ) + + with tvm.transform.PassContext(opt_level=3): + mod = opt_pass(mod) + + return mod + + def expected(): + # function variables for conv2d + data0 = relay.var("data0", relay.TensorType((1, 3, 224, 224), "float32")) + weight0 = relay.var("weight0", relay.TensorType((16, 3, 3, 3), "float32")) + conv = relay.nn.conv2d( + data=data0, weight=weight0, kernel_size=(3, 3), channels=16, padding=(1, 1) + ) + + # function variables for batch_norm + bn_gamma0 = relay.var("bn_gamma0", relay.TensorType((16,), "float32")) + bn_beta0 = relay.var("bn_beta0", relay.TensorType((16,), "float32")) + bn_mmean0 = relay.var("bn_mean0", relay.TensorType((16,), "float32")) + bn_mvar0 = relay.var("bn_var0", relay.TensorType((16,), "float32")) + bn = relay.nn.batch_norm(conv, bn_gamma0, bn_beta0, bn_mmean0, bn_mvar0) + func0 = relay.Function( + [data0, weight0, bn_gamma0, bn_beta0, bn_mmean0, bn_mvar0], bn.astuple() + ) + func0 = set_func_attr(func0, "vitis_ai", "vitis_ai_0") + gv0 = relay.GlobalVar("vitis_ai_0") + mod = tvm.IRModule() + mod[gv0] = func0 + mod = relay.transform.InferType()(mod) + + # main function + data = relay.var("data", relay.TensorType((1, 3, 224, 224), "float32")) + weight = relay.var("weight", relay.TensorType((16, 3, 3, 3), "float32")) + bn_gamma = relay.var("bn_gamma", relay.TensorType((16,), "float32")) + bn_beta = relay.var("bn_beta", relay.TensorType((16,), "float32")) + bn_mmean = relay.var("bn_mean", relay.TensorType((16,), "float32")) + bn_mvar = relay.var("bn_var", relay.TensorType((16,), "float32")) + call0 = gv0(data, weight, bn_gamma, bn_beta, bn_mmean, bn_mvar) + mod["main"] = relay.Function([data, weight, bn_gamma, bn_beta, bn_mmean, bn_mvar], call0) + mod = relay.transform.InferType()(mod) + return mod + + partitioned_dpuczdx8g_zcu104 = partition("DPUCZDX8G-zcu104") + partitioned_dpucadx8g = partition("DPUCADX8G") + + ref_mod = expected() + + assert tvm.ir.structural_equal(partitioned_dpuczdx8g_zcu104, ref_mod, map_free_vars=True) + assert tvm.ir.structural_equal(partitioned_dpucadx8g, ref_mod, map_free_vars=True) + + +if __name__ == "__main__": + if sys.platform == "win32": + print("Skip test on Windows for now") + sys.exit(0) + + test_conv2d() + test_depthwise_conv() + test_bias_add() + test_relu() + test_add() + test_max_pool2d() + test_global_max_pool2d() + test_batchnorm() + test_global_avg_pool2d() + test_avg_pool2d() + test_upsampling() + test_conv2d_transpose() + test_annotate() diff --git a/tests/python/contrib/test_vitis_ai/test_vitis_ai_runtime_cpu_part.py b/tests/python/contrib/test_vitis_ai/test_vitis_ai_runtime_cpu_part.py new file mode 100644 index 000000000000..030dda372cfe --- /dev/null +++ b/tests/python/contrib/test_vitis_ai/test_vitis_ai_runtime_cpu_part.py @@ -0,0 +1,82 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=no-else-return, unidiomatic-typecheck, invalid-name, W0611, C0413 + +"""Vitis-AI runtime test for CPU only part + +This test verifies as much as possible whether the a model can be correctly offloaded +and executed for Vitis-AI acceleration. This entails: + - Annotating and partitioning model for Vitis-AI acceleration + - Building a Vitis-AI PyXIR runtime module with on-the-fly quantization enabled + - Run first iteration of on-the-fly quantization flow. This will always be run + on CPU as the first N (parameter) will be used for collecting calibration data + for quantization. + +NOTE This is not a full end-to-end test as we need the full Vitis-AI docker environment +and access to an FPGA instance for that. This test verifies the Vitis-AI flow as much as +possible without requiring access to dedicated docker environment and/or hardware setup. +NOTE Quantization is not being tested (we need to be inside Vitis-AI docker environment +for that) buth the internal representation used for quantization is being generated and +functionally tested (CPU). +""" + +import sys +import numpy as np + +import pytest + +pytest.importorskip("pyxir") +import pyxir.contrib.target.DPUCADX8G + +import tvm +import tvm.relay.testing +from tvm import relay + +from .infrastructure import skip_test, verify_result + + +def test_extern_vitis_ai_resnet18(): + """Test first part of Vitis-AI on-the-fly quantization runtime with ResNet 18 model""" + if skip_test(): + return + + dtype = "float32" + ishape = (1, 3, 224, 224) + mod, params = relay.testing.resnet.get_workload(num_layers=18, batch_size=1) + ref_mod, params = relay.testing.resnet.get_workload(num_layers=18, batch_size=1) + + ref_ex = relay.create_executor("graph", mod=ref_mod, ctx=tvm.cpu(0)) + i_data = np.random.uniform(0, 1, ishape).astype(dtype) + + ref_res = ref_ex.evaluate()(i_data, **params) + verify_result( + mod, + {"data": i_data}, + (1, 1000), + ref_res.asnumpy(), + tol=1e-5, + params=params, + dpu_target="DPUCADX8G", + tvm_ops=4, + ) + + +if __name__ == "__main__": + if sys.platform == "win32": + print("Skip test on Windows for now") + sys.exit(0) + test_extern_vitis_ai_resnet18() diff --git a/tests/scripts/task_config_build_cpu.sh b/tests/scripts/task_config_build_cpu.sh index 6fc64966c0ab..9a009b6a4a78 100755 --- a/tests/scripts/task_config_build_cpu.sh +++ b/tests/scripts/task_config_build_cpu.sh @@ -44,3 +44,4 @@ echo set\(USE_TENSORFLOW_PATH \"/tensorflow\"\) >> config.cmake echo set\(USE_FLATBUFFERS_PATH \"/flatbuffers\"\) >> config.cmake echo set\(USE_ETHOSN /opt/arm/ethosn-driver\) >> config.cmake echo set\(USE_ETHOSN_HW OFF\) >> config.cmake +echo set\(USE_VITIS_AI ON\) >> config.cmake