diff --git a/CMakeLists.txt b/CMakeLists.txt
index f8ecf4635fbe..3c1ff7035d62 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -82,6 +82,7 @@ tvm_option(USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME "Build with Arm Compute Library gra
 tvm_option(USE_TENSORRT_CODEGEN "Build with TensorRT Codegen support" OFF)
 tvm_option(USE_TENSORRT_RUNTIME "Build with TensorRT runtime" OFF)
 tvm_option(USE_RUST_EXT "Build with Rust based compiler extensions, STATIC, DYNAMIC, or OFF" OFF)
+tvm_option(USE_VITIS_AI "Build with VITIS-AI Codegen support" OFF)
 
 # include directories
 include_directories(${CMAKE_INCLUDE_PATH})
@@ -367,6 +368,7 @@ include(cmake/modules/contrib/CoreML.cmake)
 include(cmake/modules/contrib/ONNX.cmake)
 include(cmake/modules/contrib/ArmComputeLib.cmake)
 include(cmake/modules/contrib/TensorRT.cmake)
+include(cmake/modules/contrib/VitisAI.cmake)
 include(cmake/modules/Git.cmake)
 include(cmake/modules/LibInfo.cmake)
 include(cmake/modules/RustExt.cmake)
diff --git a/cmake/config.cmake b/cmake/config.cmake
index 36eeac729969..6a3ace2c9283 100644
--- a/cmake/config.cmake
+++ b/cmake/config.cmake
@@ -232,6 +232,9 @@ set(USE_ETHOSN_HW OFF)
 set(USE_TENSORRT_CODEGEN OFF)
 set(USE_TENSORRT_RUNTIME OFF)
 
+# Whether use VITIS-AI codegen
+set(USE_VITIS_AI OFF)
+ 
 # Build ANTLR parser for Relay text format
 # Possible values:
 # - ON: enable ANTLR by searching default locations (cmake find_program for antlr4 and /usr/local for jar)
diff --git a/cmake/modules/contrib/VitisAI.cmake b/cmake/modules/contrib/VitisAI.cmake
new file mode 100644
index 000000000000..083bd6d7adc8
--- /dev/null
+++ b/cmake/modules/contrib/VitisAI.cmake
@@ -0,0 +1,47 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+if(USE_VITIS_AI)
+  set(PYXIR_SHARED_LIB libpyxir.so)
+  find_package(PythonInterp 3.6 REQUIRED)
+  if(NOT PYTHON)
+    find_program(PYTHON NAMES python3 python3.6)
+  endif()
+  execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c"
+    "import pyxir as px; print(px.get_include_dir()); print(px.get_lib_dir());"
+    RESULT_VARIABLE __result
+    OUTPUT_VARIABLE __output
+    OUTPUT_STRIP_TRAILING_WHITESPACE)
+
+  if(__result MATCHES 0)
+    string(REGEX REPLACE ";" "\\\\;" __values ${__output})
+    string(REGEX REPLACE "\r?\n" ";"    __values ${__values})
+    list(GET __values 0 PYXIR_INCLUDE_DIR)
+    list(GET __values 1 PYXIR_LIB_DIR)
+  else()
+    message(FATAL_ERROR "Can't build TVM with Vitis-AI because PyXIR can't be found")
+  endif()
+  message(STATUS "Build with contrib.vitisai")
+  include_directories(${PYXIR_INCLUDE_DIR})  
+  file(GLOB VAI_CONTRIB_SRC src/runtime/contrib/vitis_ai/*.cc)
+  file(GLOB COMPILER_VITIS_AI_SRCS
+       CONFIGURE_DEPENDS src/relay/backend/contrib/vitis_ai/*)
+  list(APPEND COMPILER_SRCS ${COMPILER_VITIS_AI_SRCS})
+  link_directories(${PYXIR_LIB_DIR})
+  list(APPEND TVM_RUNTIME_LINKER_LIBS "pyxir")
+  list(APPEND RUNTIME_SRCS ${VAI_CONTRIB_SRC})
+endif(USE_VITIS_AI)
diff --git a/docs/deploy/index.rst b/docs/deploy/index.rst
index 68843ba18248..e47b0a3c72fe 100644
--- a/docs/deploy/index.rst
+++ b/docs/deploy/index.rst
@@ -70,3 +70,4 @@ target device without relying on RPC. see the following resources on how to do s
    hls
    arm_compute_lib
    tensorrt
+   vitis_ai
diff --git a/docs/deploy/vitis_ai.rst b/docs/deploy/vitis_ai.rst
new file mode 100755
index 000000000000..f0bd3edcd6e2
--- /dev/null
+++ b/docs/deploy/vitis_ai.rst
@@ -0,0 +1,652 @@
+..  Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+..    http://www.apache.org/licenses/LICENSE-2.0
+
+..  Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+
+Vitis-AI Integration
+====================
+
+`Vitis-AI <https://github.com/Xilinx/Vitis-AI>`__ is Xilinx's
+development stack for hardware-accelerated AI inference on Xilinx
+platforms, including both edge devices and Alveo cards. It consists of
+optimized IP, tools, libraries, models, and example designs. It is
+designed with high efficiency and ease of use in mind, unleashing the
+full potential of AI acceleration on Xilinx FPGA and ACAP.
+
+The current Vitis-AI Byoc flow inside TVM enables acceleration of Neural
+Network model inference on edge and cloud. The identifiers for the
+supported edge and cloud Deep Learning Processor Units (DPU's) are
+DPUCZDX8G respectively DPUCADX8G. DPUCZDX8G and DPUCADX8G are hardware
+accelerators for convolutional neural networks (CNN's) on top of the
+Xilinx `Zynq Ultrascale+
+MPSoc <https://www.xilinx.com/products/silicon-devices/soc/zynq-ultrascale-mpsoc.html>`__
+respectively
+`Alveo <https://www.xilinx.com/products/boards-and-kits/alveo.html>`__
+(U200/U250) platforms. For more information about the DPU identifiers
+see the section on `DPU naming information <#dpu-naming-information>`__.
+
+On this page you will find information on how to
+`build <#build-instructions>`__ TVM with Vitis-AI and on how to `get
+started <#getting-started>`__ with an example.
+
+DPU naming information
+----------------------
+
++---------------------------------+-----------------+-------------------------------------------------------------------------+------------------------------------------------------------+---------------------------------------------------+--------------------------------------------------------------------------+
+| DPU                             | Application     | HW Platform                                                             | Quantization Method                                        | Quantization Bitwidth                             | Design Target                                                            |
++=================================+=================+=========================================================================+============================================================+===================================================+==========================================================================+
+| Deep Learning Processing Unit   | C: CNN R: RNN   | AD: Alveo DDR AH: Alveo HBM VD: Versal DDR with AIE & PL ZD: Zynq DDR   | X: DECENT I: Integer threshold F: Float threshold R: RNN   | 4: 4-bit 8: 8-bit 16: 16-bit M: Mixed Precision   | G: General purpose H: High throughput L: Low latency C: Cost optimized   |
++---------------------------------+-----------------+-------------------------------------------------------------------------+------------------------------------------------------------+---------------------------------------------------+--------------------------------------------------------------------------+
+
+Build instructions
+------------------
+
+This section lists the instructions for building TVM with Vitis-AI for
+both `cloud <#cloud-dpucadx8g>`__ and `edge <#edge-dpuczdx8g>`__.
+
+Cloud (DPUCADX8G)
+~~~~~~~~~~~~~~~~~
+
+For Vitis-AI acceleration in the cloud TVM has to be built on top of the
+Xilinx Alveo platform.
+
+System requirements
+^^^^^^^^^^^^^^^^^^^
+
+The following table lists system requirements for running docker
+containers as well as Alveo cards.
+
++-----------------------------------------------------+----------------------------------------------------------+
+| **Component**                                       | **Requirement**                                          |
++=====================================================+==========================================================+
+| Motherboard                                         | PCI Express 3.0-compliant with one dual-width x16 slot   |
++-----------------------------------------------------+----------------------------------------------------------+
+| System Power Supply                                 | 225W                                                     |
++-----------------------------------------------------+----------------------------------------------------------+
+| Operating System                                    | Ubuntu 16.04, 18.04                                      |
++-----------------------------------------------------+----------------------------------------------------------+
+|                                                     | CentOS 7.4, 7.5                                          |
++-----------------------------------------------------+----------------------------------------------------------+
+|                                                     | RHEL 7.4, 7.5                                            |
++-----------------------------------------------------+----------------------------------------------------------+
+| CPU                                                 | Intel i3/i5/i7/i9/Xeon 64-bit CPU                        |
++-----------------------------------------------------+----------------------------------------------------------+
+| GPU (Optional to accelerate quantization)           | NVIDIA GPU with a compute capability > 3.0               |
++-----------------------------------------------------+----------------------------------------------------------+
+| CUDA Driver (Optional to accelerate quantization)   | nvidia-410                                               |
++-----------------------------------------------------+----------------------------------------------------------+
+| FPGA                                                | Xilinx Alveo U200 or U250                                |
++-----------------------------------------------------+----------------------------------------------------------+
+| Docker Version                                      | 19.03.1                                                  |
++-----------------------------------------------------+----------------------------------------------------------+
+
+Hardware setup and docker build
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+1. Clone the Vitis AI repository:
+
+   .. code:: bash
+
+      git clone --recurse-submodules https://github.com/Xilinx/Vitis-AI
+   
+2. Install Docker, and add the user to the docker group. Link the user
+   to docker installation instructions from the following docker's
+   website:
+   
+
+   -  https://docs.docker.com/install/linux/docker-ce/ubuntu/
+   -  https://docs.docker.com/install/linux/docker-ce/centos/
+   -  https://docs.docker.com/install/linux/linux-postinstall/
+
+3. Download the latest Vitis AI Docker with the following command. This container runs on CPU.
+
+   .. code:: bash
+      
+      docker pull xilinx/vitis-ai:latest
+    
+   To accelerate the quantization, you can optionally use the Vitis-AI GPU docker image. Use the below commands to build the Vitis-AI GPU docker container:
+   
+   .. code:: bash
+
+      cd Vitis-AI/docker
+      ./docker_build_gpu.sh
+
+4. Set up Vitis AI to target Alveo cards. To target Alveo cards with
+   Vitis AI for machine learning workloads, you must install the
+   following software components:
+
+   -  Xilinx Runtime (XRT)
+   -  Alveo Deployment Shells (DSAs)
+   -  Xilinx Resource Manager (XRM) (xbutler)
+   -  Xilinx Overlaybins (Accelerators to Dynamically Load - binary
+      programming files)
+
+   While it is possible to install all of these software components
+   individually, a script has been provided to automatically install
+   them at once. To do so:
+
+   -  Run the following commands:
+
+      .. code:: bash
+      
+         cd Vitis-AI/alveo/packages
+         sudo su
+         ./install.sh
+      
+   -  Power cycle the system.
+   
+5. Clone tvm repo and pyxir repo
+
+   .. code:: bash
+     
+      git clone --recursive https://github.com/apache/incubator-tvm.git
+      git clone --recursive https://github.com/Xilinx/pyxir.git
+   
+6. Build and start the tvm runtime Vitis-AI Docker Container.
+
+   .. code:: bash
+
+      ./incubator-tvm/docker/build.sh demo_vitis_ai bash
+      ./incubator-tvm/docker/bash.sh tvm.demo_vitis_ai
+	  
+      #Setup inside container
+      source /opt/xilinx/xrt/setup.sh
+      . $VAI_ROOT/conda/etc/profile.d/conda.sh
+      conda activate vitis-ai-tensorflow
+      
+7. Install PyXIR
+
+   .. code:: bash
+
+     cd pyxir
+     python3 setup.py install --use_vai_rt_dpucadx8g --user
+
+   
+8. Build TVM inside the container with Vitis-AI
+
+   .. code:: bash
+
+      cd incubator-tvm
+      mkdir build
+      cp cmake/config.cmake build
+      cd build  
+      echo set\(USE_LLVM ON\) >> config.cmake
+      echo set\(USE_VITIS_AI ON\) >> config.cmake
+      cmake ..
+      make -j$(nproc)
+   
+9.  Install TVM
+
+    .. code:: bash
+
+      cd incubator-tvm/python
+      pip3 install -e . --user
+      
+Edge (DPUCZDX8G)
+^^^^^^^^^^^^^^^^
+
+
+For edge deployment we make use of two systems referred to as host and
+edge. The `host <#host-requirements>`__ system is responsible for
+quantization and compilation of the neural network model in a first
+offline step. Afterwards, the model will de deployed on the
+`edge <#edge-requirements>`__ system.
+
+Host requirements
+^^^^^^^^^^^^^^^^^
+
+The following table lists system requirements for running the TVM -
+Vitis-AI docker container.
+
++-----------------------------------------------------+----------------------------------------------+
+| **Component**                                       | **Requirement**                              |
++=====================================================+==============================================+
+| Operating System                                    | Ubuntu 16.04, 18.04                          |
++-----------------------------------------------------+----------------------------------------------+
+|                                                     | CentOS 7.4, 7.5                              |
++-----------------------------------------------------+----------------------------------------------+
+|                                                     | RHEL 7.4, 7.5                                |
++-----------------------------------------------------+----------------------------------------------+
+| CPU                                                 | Intel i3/i5/i7/i9/Xeon 64-bit CPU            |
++-----------------------------------------------------+----------------------------------------------+
+| GPU (Optional to accelerate quantization)           | NVIDIA GPU with a compute capability > 3.0   |
++-----------------------------------------------------+----------------------------------------------+
+| CUDA Driver (Optional to accelerate quantization)   | nvidia-410                                   |
++-----------------------------------------------------+----------------------------------------------+
+| FPGA                                                | Not necessary on host                        |
++-----------------------------------------------------+----------------------------------------------+
+| Docker Version                                      | 19.03.1                                      |
++-----------------------------------------------------+----------------------------------------------+
+
+Host setup and docker build
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+1. Clone tvm repo
+
+   .. code:: bash
+
+      git clone --recursive https://github.com/apache/incubator-tvm.git
+2. Build and start the tvm runtime Vitis-AI Docker Container.
+
+   .. code:: bash
+
+      cd incubator-tvm 
+      ./incubator-tvm/docker/build.sh demo_vitis_ai bash
+      ./incubator-tvm/docker/bash.sh tvm.demo_vitis_ai
+   
+      #Setup inside container
+      . $VAI_ROOT/conda/etc/profile.d/conda.sh
+      conda activate vitis-ai-tensorflow
+   
+3. Install PyXIR
+
+   .. code:: bash
+
+      git clone --recursive https://github.com/Xilinx/pyxir.git
+      cd pyxir
+      python3 setup.py install --user
+   
+   
+4. Build TVM inside the container with Vitis-AI.
+
+   .. code:: bash
+
+      cd incubator-tvm 
+      mkdir build
+      cp cmake/config.cmake build
+      cd build
+      echo set\(USE_LLVM ON\) >> config.cmake
+      echo set\(USE_VITIS_AI ON\) >> config.cmake
+      cmake ..
+      make -j$(nproc)
+   
+5. Install TVM
+
+   .. code:: bash
+
+      cd incubator-tvm/python
+      pip3 install -e . --user
+
+Edge requirements
+^^^^^^^^^^^^^^^^^
+
+The DPUCZDX8G can be deployed on the `Zynq Ultrascale+
+MPSoc <https://www.xilinx.com/products/silicon-devices/soc/zynq-ultrascale-mpsoc.html>`__
+platform. The following development boards can be used out-of-the-box:
+
++--------------------+----------------------+-----------------------------------------------------------------------+
+| **Target board**   | **TVM identifier**   | **Info**                                                              |
++====================+======================+=======================================================================+
+| Ultra96            | DPUCZDX8G-ultra96    | https://www.xilinx.com/products/boards-and-kits/1-vad4rl.html         |
++--------------------+----------------------+-----------------------------------------------------------------------+
+| ZCU104             | DPUCZDX8G-zcu104     | https://www.xilinx.com/products/boards-and-kits/zcu104.html           |
++--------------------+----------------------+-----------------------------------------------------------------------+
+| ZCU102             | DPUCZDX8G-zcu102     | https://www.xilinx.com/products/boards-and-kits/ek-u1-zcu102-g.html   |
++--------------------+----------------------+-----------------------------------------------------------------------+
+
+Edge hardware setup
+^^^^^^^^^^^^^^^^^^^
+.. note:: 
+
+  This section provides instructions for setting up with the `Pynq <http://www.pynq.io/>`__ platform but 
+  Petalinux based flows are also supported. 
+
+1. Download the Pynq v2.5 image for your target (use Z1 or Z2 for
+   Ultra96 target depending on board version) Link to image:
+   https://github.com/Xilinx/PYNQ/releases/tag/v2.5
+2. Follow Pynq instructions for setting up the board: `pynq
+   setup <https://pynq.readthedocs.io/en/latest/getting_started.html>`__
+3. After connecting to the board, make sure to run as root. Execute
+   ``su``
+4. Set up DPU on Pynq by following the steps here: `DPU Pynq
+   setup <https://github.com/Xilinx/DPU-PYNQ>`__
+5. Run the following command to download the DPU bitstream:
+
+   .. code:: bash
+
+     python3 -c 'from pynq_dpu import DpuOverlay ; overlay = DpuOverlay("dpu.bit")'
+  
+6. Check whether the DPU kernel is alive:
+
+   .. code:: bash
+
+     dexplorer -w
+
+Edge TVM setup
+^^^^^^^^^^^^^^
+
+.. note:: 
+
+  When working on Petalinux instead of Pynq, the following steps might take more manual work (e.g building     
+  hdf5 from source). Also, TVM has a scipy dependency which you then might have to build from source or 
+  circumvent. We don't depend on scipy in our flow.
+
+Building TVM depends on the Xilinx
+`PyXIR <https://github.com/Xilinx/pyxir>`__ package. PyXIR acts as an
+interface between TVM and Vitis-AI tools.
+
+1. First install the PyXIR h5py and pydot dependencies:
+
+   .. code:: bash
+
+      apt-get install libhdf5-dev
+      pip3 install pydot h5py
+      
+2. Install PyXIR
+
+   .. code:: bash
+
+      git clone --recursive https://github.com/Xilinx/pyxir.git
+      cd pyxir
+      sudo python3 setup.py install --use_vai_rt_dpuczdx8g
+   
+3. Build TVM with Vitis-AI
+
+   .. code:: bash
+
+      git clone --recursive https://github.com/apache/incubator-tvm
+      cd incubator-tvm
+      mkdir build
+      cp cmake/config.cmake build
+      cd build
+      echo set\(USE_VITIS_AI ON\) >> config.cmake
+      cmake ..     
+      make
+   
+4. Install TVM
+
+   .. code:: bash
+
+      cd incubator-tvm/python
+      pip3 install -e . --user
+
+5. Check whether the setup was successful in the Python shell:
+
+   .. code:: bash
+
+      python3 -c 'import pyxir; import tvm'
+
+
+Getting started
+---------------
+
+This section shows how to use TVM with Vitis-AI. For this it's important
+to understand that neural network models are quantized for Vitis-AI
+execution in fixed point arithmetic. The approach we take here is to
+quantize on-the-fly using the first N inputs as explained in the next
+section.
+
+On-the-fly quantization
+~~~~~~~~~~~~~~~~~~~~~~~
+
+Usually, to be able to accelerate inference of Neural Network models
+with Vitis-AI DPU accelerators, those models need to quantized upfront.
+In TVM - Vitis-AI flow, we make use of on-the-fly quantization to remove
+this additional preprocessing step. In this flow, one doesn't need to
+quantize his/her model upfront but can make use of the typical inference
+execution calls (module.run) to quantize the model on-the-fly using the
+first N inputs that are provided (see more information below). This will
+set up and calibrate the Vitis-AI DPU and from that point onwards
+inference will be accelerated for all next inputs. Note that the edge
+flow deviates slightly from the explained flow in that inference won't
+be accelerated after the first N inputs but the model will have been
+quantized and compiled and can be moved to the edge device for
+deployment. Please check out the `edge <#Edge%20usage>`__ usage
+instructions below for more information.
+
+Config/Settings
+~~~~~~~~~~~~~~~
+
+A couple of environment variables can be used to customize the Vitis-AI
+Byoc flow.
+
++----------------------------+----------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| **Environment Variable**   | **Default if unset**                   | **Explanation**                                                                                                                                                                                                                                                                                                                            |
++============================+========================================+============================================================================================================================================================================================================================================================================================================================================+
+| PX\_QUANT\_SIZE            | 128                                    | The number of inputs that will be used for quantization (necessary for Vitis-AI acceleration)                                                                                                                                                                                                                                              |
++----------------------------+----------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| PX\_BUILD\_DIR             | Use the on-the-fly quantization flow   | Loads the quantization and compilation information from the provided build directory and immediately starts Vitis-AI hardware acceleration. This configuration can be used if the model has been executed before using on-the-fly quantization during which the quantization and comilation information was cached in a build directory.   |
++----------------------------+----------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+
+Cloud usage
+~~~~~~~~~~~
+
+This section shows how to accelerate a convolutional neural network
+model in TVM with Vitis-AI on the cloud.
+
+To be able to target the Vitis-AI cloud DPUCADX8G target we first have
+to import the target in PyXIR. This PyXIR package is the interface being
+used by TVM to integrate with the Vitis-AI stack. Additionaly, import
+the typical TVM and Relay modules and the Vitis-AI contrib module inside
+TVM.
+
+.. code:: python
+
+   import pyxir
+   import pyxir.contrib.target.DPUCADX8G
+
+   import tvm
+   import tvm.relay as relay
+   from tvm.contrib.target import vitis_ai
+   from tvm.contrib import util, graph_runtime
+   from tvm.relay.build_module import bind_params_by_name
+   from tvm.relay.op.contrib.vitis_ai import annotation
+
+After importing a convolutional neural network model using the usual
+Relay API's, annotate the Relay expression for the given Vitis-AI DPU
+target and partition the graph.
+
+.. code:: python
+
+   mod["main"] = bind_params_by_name(mod["main"], params)
+   mod = annotation(mod, params, target)
+   mod = relay.transform.MergeCompilerRegions()(mod)
+   mod = relay.transform.PartitionGraph()(mod)
+
+Now, we can build the TVM runtime library for executing the model. The
+TVM target is 'llvm' as the operations that can't be handled by the DPU
+are executed on the CPU. The Vitis-AI target is DPUCADX8G as we are
+targeting the cloud DPU and this target is passed as a config to the TVM
+build call.
+
+.. code:: python
+
+   tvm_target = 'llvm'
+   target='DPUCADX8G'
+
+   with tvm.transform.PassContext(opt_level=3, config= {'relay.ext.vitis_ai.options.target': target}):   
+      lib = relay.build(mod, tvm_target, params=params)
+
+As one more step before we can accelerate a model with Vitis-AI in TVM
+we have to quantize and compile the model for execution on the DPU. We
+make use of on-the-fly quantization for this. Using this method one
+doesn’t need to quantize their model upfront and can make use of the
+typical inference execution calls (module.run) to calibrate the model
+on-the-fly using the first N inputs that are provided. After the first N
+iterations, computations will be accelerated on the DPU. So now we will
+feed N inputs to the TVM runtime module. Note that these first N inputs
+will take a substantial amount of time.
+
+.. code:: python
+
+   module = graph_runtime.GraphModule(lib["default"](tvm.cpu()))
+
+   # First N (default = 128) inputs are used for quantization calibration and will
+   # be executed on the CPU
+   # This config can be changed by setting the 'PX_QUANT_SIZE' (e.g. export PX_QUANT_SIZE=64)
+   for i in range(128):
+      module.set_input(input_name, inputs[i]) 
+      module.run()
+
+Afterwards, inference will be accelerated on the DPU.
+
+.. code:: python
+
+   module.set_input(name, data)
+   module.run()
+
+To save and load the built module, one can use the typical TVM API's:
+
+.. code:: python
+
+   lib_path = "deploy_lib.so"
+   lib.export_library(lib_path)
+
+Load the module from compiled files and run inference
+
+.. code:: python
+
+   # load the module into memory
+   loaded_lib = tvm.runtime.load_module(lib_path)
+
+   module = graph_runtime.GraphModule(lib["default"](tvm.cpu()))
+   module.set_input(name, data)
+   module.run()
+
+Edge usage
+~~~~~~~~~~
+
+This section shows how to accelerate a convolutional neural network
+model in TVM with Vitis-AI at the edge. The first couple of steps will
+have to be run on the host machine and take care of quantization and
+compilation for deployment at the edge.
+
+Host steps
+^^^^^^^^^^
+
+To be able to target the Vitis-AI cloud DPUCZDX8G target we first have
+to import the target in PyXIR. This PyXIR package is the interface being
+used by TVM to integrate with the Vitis-AI stack. Additionaly, import
+the typical TVM and Relay modules and the Vitis-AI contrib module inside
+TVM.
+
+.. code:: python
+
+   import pyxir
+   import pyxir.contrib.target.DPUCZDX8G
+
+   import tvm
+   import tvm.relay as relay
+   from tvm.contrib.target import vitis_ai
+   from tvm.contrib import util, graph_runtime
+   from tvm.relay.build_module import bind_params_by_name
+   from tvm.relay.op.contrib.vitis_ai import annotation
+
+After importing a convolutional neural network model using the usual
+Relay API's, annotate the Relay expression for the given Vitis-AI DPU
+target and partition the graph.
+
+.. code:: python
+
+   mod["main"] = bind_params_by_name(mod["main"], params)
+   mod = annotation(mod, params, target)
+   mod = relay.transform.MergeCompilerRegions()(mod)
+   mod = relay.transform.PartitionGraph()(mod)
+
+Now, we can build the TVM runtime library for executing the model. The
+TVM target is 'llvm' as the operations that can't be handled by the DPU
+are executed on the CPU. At this point that means the CPU on the host machine.
+The Vitis-AI target is DPUCZDX8G-zcu104 as we are targeting the edge DPU
+on the ZCU104 board and this target is passed as a config to the TVM
+build call. Note that different identifiers can be passed for different
+targets, see `edge targets info <#edge-requirements>`__. Additionally, we
+provide the 'export_runtime_module' config that points to a file to which we 
+can export the Vitis-AI runtime module. We have to do this because we will
+first be compiling and quantizing the model on the host machine before building
+the model for edge deployment. As you will see later on, the exported runtime 
+module will be passed to the edge build so that the Vitis-AI runtime module 
+can be included.
+
+.. code:: python
+
+   from tvm.contrib import util
+
+   temp = util.tempdir()
+   
+   tvm_target = 'llvm'
+   target='DPUCZDX8G-zcu104'
+   export_rt_mod_file = temp.relpath("vitis_ai.rtmod")
+  
+   with tvm.transform.PassContext(opt_level=3, config= {'relay.ext.vitis_ai.options.target': target,
+   						        'relay.ext.vitis_ai.options.export_runtime_module': export_rt_mod_file}):   
+      lib = relay.build(mod, tvm_target, params=params)
+      
+We will quantize and compile the model for execution on the DPU using on-the-fly 
+quantization on the host machine. This makes use of TVM inference calls 
+(module.run) to quantize the model on the host with the first N inputs.
+
+.. code:: python
+
+   module = graph_runtime.GraphModule(lib["default"](tvm.cpu()))
+
+   # First N (default = 128) inputs are used for quantization calibration and will
+   # be executed on the CPU
+   # This config can be changed by setting the 'PX_QUANT_SIZE' (e.g. export PX_QUANT_SIZE=64)
+   for i in range(128):
+      module.set_input(input_name, inputs[i]) 
+      module.run()
+      
+Save the TVM lib module so that the Vitis-AI runtime module will also be exported 
+(to the 'export_runtime_module' path we previously passed as a config).
+
+.. code:: python
+
+   from tvm.contrib import util
+
+   temp = util.tempdir()
+   lib.export_library(temp.relpath("tvm_lib.so"))
+
+After quantizing and compiling the model for Vitis-AI acceleration using the 
+first N inputs we can build the model for execution on the ARM edge device. 
+Here we pass the previously exported Vitis-AI runtime module so it can be included 
+in the TVM build.
+
+.. code:: python
+
+   # Export lib for aarch64 target
+   tvm_target = tvm.target.arm_cpu('ultra96')
+   lib_kwargs = {
+        'fcompile': contrib.cc.create_shared,
+        'cc': "/usr/aarch64-linux-gnu/bin/ld"
+   }
+
+   with tvm.transform.PassContext(opt_level=3,
+                                  config={'relay.ext.vitis_ai.options.load_runtime_module': export_rt_mod_file}):
+        lib_arm = relay.build(mod, tvm_target, params=params)
+
+   lib_dpuv2.export_library('tvm_dpu_arm.so', **lib_kwargs)
+
+Now, move the TVM build files (tvm\_dpu\_arm.json, tvm\_dpu\_arm.so,
+tvm\_dpu\_arm.params) to the edge device. For information on setting
+up the edge device check out the `edge setup <#edge-dpuczdx8g>`__
+section.
+
+Edge steps
+^^^^^^^^^^
+
+After setting up TVM with Vitis-AI on the edge device, you can now load 
+the TVM runtime module into memory and feed inputs for inference.
+
+.. code:: python
+
+   ctx = tvm.cpu()
+
+   # load the module into memory
+   lib = tvm.runtime.load_module("tvm_dpu_arm.so")
+
+   module = graph_runtime.GraphModule(lib["default"](tvm.cpu()))
+   module.set_input(name, data)
+   module.run()
diff --git a/python/tvm/contrib/target/vitis_ai.py b/python/tvm/contrib/target/vitis_ai.py
new file mode 100644
index 000000000000..d4931d9e3f48
--- /dev/null
+++ b/python/tvm/contrib/target/vitis_ai.py
@@ -0,0 +1,156 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name, unused-argument, import-outside-toplevel
+
+"""Utility to offload (sub-)models to Vitis-AI"""
+
+import warnings
+
+import pyxir
+import pyxir.frontend.tvm
+
+from tvm.relay.expr import Tuple, Call, TupleGetItem
+import tvm._ffi
+
+
+class CodegenVitisAI:
+
+    """Traverse Relay expression and convert into PyXIR XGraph format"""
+
+    def __init__(self, model_name, function):
+        self.model_name = model_name
+        self.function = function
+        self.params = {}
+
+    def convert_pyxir(self, target):
+        """Convert Relay expression to PyXIR XGraph"""
+        xgraph = pyxir.frontend.tvm.from_relay(
+            self.function, params=self.params, postprocessing=None
+        )
+        xgraph = pyxir.partition(xgraph, targets=[target])
+        return xgraph
+
+    def get_output_names(self):
+        """Get output names from Relay expression"""
+        func = self.function
+        output_relay_ids = []
+        expr = func.body
+        if isinstance(expr, Tuple):
+            for field in expr.fields:
+                output_relay_ids.append(hash(field))
+        elif isinstance(expr, Call):
+            output_relay_ids.append(hash(expr))
+        elif isinstance(expr, TupleGetItem):
+            output_relay_ids.append(hash(expr.tuple_value))
+        else:
+            raise ValueError("Vitis-AI codegen does not support {} as output".format(type(expr)))
+        return output_relay_ids
+
+
+@tvm._ffi.register_func("relay.ext.vitis_ai")
+def vitis_ai_compiler(ref):
+    """Create a Vitis-AI runtime from the provided Relay expression"""
+    assert isinstance(ref, tvm.relay.function.Function)
+
+    out_tensor_names = []
+    name = str(ref.attrs.global_symbol)
+
+    pass_context = tvm.get_global_func("transform.GetCurrentPassContext")()
+
+    # The target Vitis-AI accelerator device
+    target = (
+        str(pass_context.config["relay.ext.vitis_ai.options.target"])
+        if "relay.ext.vitis_ai.options.target" in pass_context.config
+        else None
+    )
+
+    # (Optional configs) The build and work directories to be used by Vitis-AI
+    vai_build_dir = (
+        str(pass_context.config["relay.ext.vitis_ai.options.build_dir"])
+        if "relay.ext.vitis_ai.options.build_dir" in pass_context.config
+        else tvm.contrib.utils.tempdir().relpath("")
+    )
+    vai_work_dir = (
+        str(pass_context.config["relay.ext.vitis_ai.options.work_dir"])
+        if "relay.ext.vitis_ai.options.work_dir" in pass_context.config
+        else tvm.contrib.utils.tempdir().relpath("")
+    )
+
+    # (Optional configs) Export and load PyXIR runtime module to file if provided. This is used to
+    #   compile and quantize a model on the host and deploy it at the edge
+    export_runtime_module = (
+        str(pass_context.config["relay.ext.vitis_ai.options.export_runtime_module"])
+        if "relay.ext.vitis_ai.options.export_runtime_module" in pass_context.config
+        else ""
+    )
+    load_runtime_module = (
+        str(pass_context.config["relay.ext.vitis_ai.options.load_runtime_module"])
+        if "relay.ext.vitis_ai.options.load_runtime_module" in pass_context.config
+        else ""
+    )
+
+    # Config checks
+    if load_runtime_module and target is not None:
+        warnings.warn(
+            "Both `load_runtime_module` and `target` configs were specified."
+            " The `load_runtime_module` points to a prebuilt runtime module with"
+            " an internal target so the `target` config will be ignored"
+        )
+    if load_runtime_module and "relay.ext.vitis_ai.options.build_dir" in pass_context.config:
+        warnings.warn(
+            "Both `load_runtime_module` and `build_dir` configs were specified."
+            " The `load_runtime_module` points to a prebuilt runtime module with"
+            " an internal build directory so the `build_dir` config will be ignored"
+        )
+    if load_runtime_module and "relay.ext.vitis_ai.options.work_dir" in pass_context.config:
+        warnings.warn(
+            "Both `load_runtime_module` and `work_dir` configs were specified."
+            " The `load_runtime_module` points to a prebuilt runtime module with"
+            " an internal work directory so the `work_dir` config will be ignored"
+        )
+
+    # If load_runtime_module is not set, we will build the PyXIR runtime module from scratch
+    if load_runtime_module == "":
+        # Convert Relay expression into XGraph and do partitioning inside PyXIR
+        builder = CodegenVitisAI(name, ref)
+        xgraph = builder.convert_pyxir(target)
+        output_relay_ids = builder.get_output_names()
+        layers = xgraph.get_layers()
+
+        # Get the output tensor names using XGraph and output Relay ids
+        out_tensor_names = []
+        for layer in layers:
+            if not layer.internal:
+                for relay_id in layer.attrs["relay_id"]:
+                    if relay_id in output_relay_ids:
+                        out_tensor_names.append(layer.name)
+                        break
+        if not out_tensor_names:
+            raise ValueError(
+                "During codegeneration the loading of subexpression \
+                             failed due to output tensor name mismatch in Relay PyXIR interface."
+            )
+        xgraph.meta_attrs["tvm_out_tensors"] = out_tensor_names
+        xgraph_str = pyxir.get_xgraph_str(xgraph)
+
+        runtime_func = "tvm.vitis_ai_runtime.from_xgraph"
+        fcreate = tvm._ffi.get_global_func(runtime_func)
+        return fcreate(name, xgraph_str, target, vai_build_dir, vai_work_dir, export_runtime_module)
+
+    runtime_func = "tvm.vitis_ai_runtime.from_rt_mod"
+    fcreate = tvm._ffi.get_global_func(runtime_func)
+    return fcreate(name, load_runtime_module, export_runtime_module)
diff --git a/python/tvm/relay/op/contrib/vitis_ai.py b/python/tvm/relay/op/contrib/vitis_ai.py
new file mode 100644
index 000000000000..fa17c63fc00a
--- /dev/null
+++ b/python/tvm/relay/op/contrib/vitis_ai.py
@@ -0,0 +1,100 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name, unused-argument, no-else-return, E1102
+"""Vitis-AI codegen annotation of supported operators"""
+
+import numpy as np
+
+import pyxir
+import pyxir.frontend.tvm
+
+from tvm import relay
+import tvm._ffi
+from tvm.relay.expr import Tuple, TupleGetItem
+from tvm.relay import transform
+from tvm.relay.op.annotation import compiler_begin, compiler_end
+
+
+@transform.function_pass(opt_level=0)
+class VitisAIAnnotationPass:
+    """Responsible for annotating Relay expressions for Vitis-AI DPU accelerators"""
+
+    def __init__(self, compiler, relay_ids):
+        self.compiler = compiler
+        self.relay_ids = relay_ids
+
+    def transform_function(self, func, mod, ctx):
+        """Transform function for annotating Relay module"""
+        annotator = self
+
+        class Annotator(tvm.relay.ExprMutator):
+            """Annotator for Vitis-AI DPU accelerators"""
+
+            def visit_tuple(self, tup):
+                """Add compiler_begin and compiler_end annotations to Tuple"""
+                field_list = []
+                cond = int(hash(tup))
+                for field in tup.fields:
+                    if cond in annotator.relay_ids:
+                        field_list.append(compiler_begin(super().visit(field), annotator.compiler))
+                    else:
+                        field_list.append(super().visit(field))
+                if cond in annotator.relay_ids:
+                    return compiler_end(Tuple(field_list), annotator.compiler)
+                else:
+                    return Tuple(field_list)
+
+            def visit_tuple_getitem(self, op):
+                """Add compiler_begin and compiler_end annotations to TupleGetItem"""
+                if int(hash(op.tuple_value)) in annotator.relay_ids:
+                    tuple_value = compiler_begin(super().visit(op.tuple_value), annotator.compiler)
+                    return compiler_end(TupleGetItem(tuple_value, op.index), annotator.compiler)
+                else:
+                    tuple_value = super().visit(op.tuple_value)
+                    return TupleGetItem(tuple_value, op.index)
+
+            def visit_call(self, call):
+                """Add compiler_begin and compiler_end annotations to the Call expr"""
+                if int(hash(call)) in annotator.relay_ids:
+                    new_args = []
+                    for arg in call.args:
+                        ann = compiler_begin(super().visit(arg), annotator.compiler)
+                        new_args.append(ann)
+                    new_call = relay.Call(call.op, new_args, call.attrs, call.type_args)
+                    return compiler_end(new_call, annotator.compiler)
+
+                else:
+                    return super().visit_call(call)
+
+        return Annotator().visit(func)
+
+
+def annotation(mod, params, target):
+    """Annotate Relay expression for Vitis-AI DPU accelerators"""
+    xgraph = pyxir.frontend.tvm.from_relay(mod, params, postprocessing=None)
+    xgraph = pyxir.partition(xgraph, targets=[target])
+
+    layers = xgraph.get_layers()
+    relay_ids = [
+        list(np.array(layer.attrs["relay_id"]).flatten())
+        for layer in layers
+        if layer.target == target
+    ]
+    relay_ids_flatten = [item for sublist in relay_ids for item in sublist]
+    mod = VitisAIAnnotationPass("vitis_ai", relay_ids_flatten)(mod)
+
+    return mod
diff --git a/src/relay/backend/contrib/vitis_ai/config_vitis_ai.cc b/src/relay/backend/contrib/vitis_ai/config_vitis_ai.cc
new file mode 100644
index 000000000000..f74b5306c5f4
--- /dev/null
+++ b/src/relay/backend/contrib/vitis_ai/config_vitis_ai.cc
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/relay/backend/contrib/vitis_ai/config_vitis_ai.cc
+ * \brief Register Vitis-AI codegen options. Main codegen is implemented in python.
+ */
+
+#include <tvm/ir/transform.h>
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace vitis_ai {
+
+/*! \brief The target Vitis-AI accelerator device */
+TVM_REGISTER_PASS_CONFIG_OPTION("relay.ext.vitis_ai.options.target", String);
+/*! \brief (Optional config) The build directory to be used by Vitis-AI */
+TVM_REGISTER_PASS_CONFIG_OPTION("relay.ext.vitis_ai.options.build_dir", String);
+/*! \brief (Optional config) The work directory to be used by Vitis-AI */
+TVM_REGISTER_PASS_CONFIG_OPTION("relay.ext.vitis_ai.options.work_dir", String);
+/*! \brief (Optional config) Export PyXIR runtime module to disk during serialization if provided */
+TVM_REGISTER_PASS_CONFIG_OPTION("relay.ext.vitis_ai.options.export_runtime_module", String);
+/*! \brief (Optional config) Load PyXIR runtime module from disk */
+TVM_REGISTER_PASS_CONFIG_OPTION("relay.ext.vitis_ai.options.load_runtime_module", String);
+
+}  // namespace vitis_ai
+}  // namespace contrib
+}  // namespace relay
+}  // namespace tvm
diff --git a/src/runtime/contrib/vitis_ai/vitis_ai_runtime.cc b/src/runtime/contrib/vitis_ai/vitis_ai_runtime.cc
new file mode 100755
index 000000000000..37dc767d31af
--- /dev/null
+++ b/src/runtime/contrib/vitis_ai/vitis_ai_runtime.cc
@@ -0,0 +1,194 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file vitis_ai_runtime.cc
+ */
+
+#include "vitis_ai_runtime.h"
+
+#include <tvm/runtime/registry.h>
+
+#include <fstream>
+#include <streambuf>
+#include <string>
+#include <vector>
+
+using namespace pyxir::runtime;
+
+namespace tvm {
+namespace runtime {
+
+VitisAIRuntime::VitisAIRuntime(const std::string& symbol_name, const Array<String> const_names,
+                               const std::string& serialized_rt_mod,
+                               const std::string& export_rt_mod_path)
+    : symbol_name_(symbol_name),
+      const_names_(const_names),
+      export_rt_mod_path_(export_rt_mod_path) {
+  std::istringstream sstream(serialized_rt_mod);
+  rt_mod_.reset(new RuntimeModule());
+  rt_mod_->deserialize(sstream);
+  in_tensor_names_ = rt_mod_->get_in_tensor_names();
+  out_tensor_names_ = rt_mod_->get_out_tensor_names();
+}
+
+VitisAIRuntime::VitisAIRuntime(const std::string& symbol_name, const std::string& xgraph_str,
+                               const Array<String> const_names, const std::string& target,
+                               const std::string& build_dir, const std::string& work_dir,
+                               const std::string& export_rt_mod_path)
+    : symbol_name_(symbol_name),
+      const_names_(const_names),
+      export_rt_mod_path_(export_rt_mod_path) {
+  std::istringstream xgraph_sstream(xgraph_str);
+  pyxir::XGraphHolder xgraph = std::make_shared<pyxir::graph::XGraph>("");
+  pyxir::read(xgraph, xgraph_sstream);
+  in_tensor_names_ = xgraph->get_input_names();
+  out_tensor_names_ = xgraph->get_meta_attr("tvm_out_tensors").get_strings();
+
+  pyxir::partition(xgraph, std::vector<std::string>{target}, "");
+
+  pyxir::RunOptionsHolder run_options(new pyxir::runtime::RunOptions());
+  run_options->on_the_fly_quantization = true;
+  run_options->build_dir = build_dir;
+  if (!work_dir.empty()) run_options->work_dir = work_dir;
+  rt_mod_ =
+      pyxir::build_rt(xgraph, target, in_tensor_names_, out_tensor_names_, "vai", run_options);
+}
+
+Module VitisAIRuntimeCreate(const std::string& name, const std::string& xgraph_str,
+                            const std::string& target, const std::string& build_dir,
+                            const std::string& work_dir, const std::string& export_rt_mod_path) {
+  Array<String> const_vars;
+  auto exec = make_object<VitisAIRuntime>(name, xgraph_str, const_vars, target, build_dir, work_dir,
+                                          export_rt_mod_path);
+  return Module(exec);
+}
+
+TVM_REGISTER_GLOBAL("tvm.vitis_ai_runtime.from_xgraph").set_body([](TVMArgs args, TVMRetValue* rv) {
+  *rv = VitisAIRuntimeCreate(args[0], args[1], args[2], args[3], args[4], args[5]);
+});
+
+Module VitisAIRuntimeCreate(const std::string& name, const std::string& serialized_rt_mod,
+                            const std::string& export_rt_mod_path) {
+  Array<String> const_vars;
+  auto exec = make_object<VitisAIRuntime>(name, const_vars, serialized_rt_mod, export_rt_mod_path);
+  return Module(exec);
+}
+
+TVM_REGISTER_GLOBAL("tvm.vitis_ai_runtime.from_rt_mod").set_body([](TVMArgs args, TVMRetValue* rv) {
+  std::string load_rt_mod_path = args[1];
+  assert(!load_rt_mod_path.empty());
+  std::ifstream in_file(load_rt_mod_path);
+  std::stringstream buffer;
+  buffer << in_file.rdbuf();
+  std::string serialized_rt_mod = buffer.str();
+  in_file.close();
+  *rv = VitisAIRuntimeCreate(args[0], serialized_rt_mod, args[2]);
+});
+
+Module VitisAIRuntimeLoadFromBinary(void* strm) {
+  dmlc::Stream* stream = static_cast<dmlc::Stream*>(strm);
+  std::string symbol_name;
+  std::vector<std::string> const_vars;
+  std::string serialized_rt_mod;
+  std::string export_rt_mod_path;
+  stream->Read(&serialized_rt_mod);
+  stream->Read(&export_rt_mod_path);
+  stream->Read(&symbol_name);
+  stream->Read(&const_vars);
+  Array<String> const_names;
+  for (const auto& it : const_vars) {
+    const_names.push_back(it);
+  }
+  auto exec =
+      make_object<VitisAIRuntime>(symbol_name, const_names, serialized_rt_mod, export_rt_mod_path);
+  return Module(exec);
+}
+
+TVM_REGISTER_GLOBAL("runtime.module.loadbinary_VitisAIRuntime")
+    .set_body_typed(VitisAIRuntimeLoadFromBinary);
+
+void VitisAIRuntime::SaveToBinary(dmlc::Stream* stream) {
+  std::ostringstream sstream;
+  rt_mod_->serialize(sstream);
+  stream->Write(sstream.str());
+  stream->Write(export_rt_mod_path_);
+  stream->Write(symbol_name_);
+  std::vector<std::string> consts;
+  for (const auto& it : const_names_) {
+    consts.push_back(it);
+  }
+  stream->Write(consts);
+
+  // If export_rt_mod_path_ member variable is set, we will additionally export the PyXIR
+  //  runtime_module to the specified file
+  if (!export_rt_mod_path_.empty()) {
+    std::ofstream out_file(export_rt_mod_path_);
+    out_file << sstream.str();
+    out_file.close();
+  }
+}
+
+PackedFunc VitisAIRuntime::GetFunction(const std::string& name,
+                                       const ObjectPtr<Object>& sptr_to_self) {
+  if (name == "get_symbol") {
+    return PackedFunc(
+        [sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { *rv = this->symbol_name_; });
+  } else if (name == "get_const_vars") {
+    return PackedFunc(
+        [sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { *rv = this->const_names_; });
+  } else if ("__init_" + this->symbol_name_ == name) {
+    // The function to initialize constant tensors.
+    return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
+      CHECK_EQ(args.size(), 1U);
+      this->initialized_ = true;
+      *rv = 0;
+    });
+  } else if (this->symbol_name_ == name) {
+    return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
+      // Initialize input tensors
+      DLTensor* inputs = args[0];
+      std::vector<pyxir::XBufferHolder> in_tensors;
+      std::vector<ssize_t> in_shape;
+      for (int i = 0; i < inputs->ndim; ++i) in_shape.push_back(inputs->shape[i]);
+      in_tensors.push_back(std::shared_ptr<pyxir::XBuffer>(
+          new pyxir::XBuffer(reinterpret_cast<void*>(static_cast<float*>(inputs->data)), 4, "f",
+                             in_shape.size(), in_shape, false, false)));
+
+      // Initialize output tensors
+      std::vector<pyxir::XBufferHolder> out_tensors;
+      for (unsigned i = 0; i < out_tensor_names_.size(); ++i) {
+        DLTensor* output_tensor = args[args.size() - out_tensor_names_.size() + i];
+        std::vector<ssize_t> out_shape;
+        for (int i = 0; i < output_tensor->ndim; ++i) out_shape.push_back(output_tensor->shape[i]);
+        void* output_data = reinterpret_cast<void*>(static_cast<float*>(output_tensor->data));
+        out_tensors.push_back(std::shared_ptr<pyxir::XBuffer>(
+            new pyxir::XBuffer(output_data, 4, "f", out_shape.size(), out_shape, false, false)));
+      }
+
+      // Execute the subgraph.
+      rt_mod_->execute(in_tensors, out_tensors);
+    });
+  } else {
+    return PackedFunc();
+  }
+}
+
+}  // namespace runtime
+}  // namespace tvm
diff --git a/src/runtime/contrib/vitis_ai/vitis_ai_runtime.h b/src/runtime/contrib/vitis_ai/vitis_ai_runtime.h
new file mode 100755
index 000000000000..1092bc0ba27b
--- /dev/null
+++ b/src/runtime/contrib/vitis_ai/vitis_ai_runtime.h
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \brief Vitis-AI runtime that can run model
+ *        containing only tvm PackedFunc.
+ * \file vitis_ai_runtime.h
+ */
+#ifndef TVM_RUNTIME_CONTRIB_VITIS_AI_VITIS_AI_RUNTIME_H_
+#define TVM_RUNTIME_CONTRIB_VITIS_AI_VITIS_AI_RUNTIME_H_
+#include <dlpack/dlpack.h>
+#include <tvm/runtime/ndarray.h>
+#include <tvm/runtime/packed_func.h>
+// clang-format off
+#include <memory>
+#include <string>
+#include <vector>
+// clang-format on
+#include <pyxir/pyxir.hpp>
+#include <pyxir/runtime/run_options.hpp>
+
+namespace tvm {
+namespace runtime {
+
+/*!
+ * \brief VAI runtime.
+ *
+ *  This runtime can be accessed in various language via
+ *  TVM runtime PackedFunc API.
+ */
+class VitisAIRuntime : public ModuleNode {
+ public:
+  /*!
+   * \brief Create VitisAI runtime from serialized XGraph
+   * \param symbol_name The name of the function.
+   * \param const_names The names of each constant in the sub-graph.
+   * \param serialized_rt_mod The serialized runtime module.
+   * \param export_rt_mod_path The path to the file to be used for exporting the
+   *        PyXIR runtime module.
+   */
+  VitisAIRuntime(const std::string& symbol_name, const Array<String> const_names,
+                 const std::string& serialized_rt_mod, const std::string& export_rt_mod);
+
+  /*!
+   * \brief Create VitisAI runtime from serialized XGraph
+   * \param symbol_name The name of the function.
+   * \param xgraph_str serialized XGraph representation
+   * \param const_names The names of each constant in the sub-graph.
+   * \param target The Vitis-AI device target (e.g. DPUCADX8G, DPUCZDX8G).
+   * \param build_dir The directory to be used for Vitis-AI build files.
+   * \param work_dir The directory to be used for Vitis-AI work files.
+   * \param export_rt_mod_path The path to the file to be used for exporting the
+   *        PyXIR runtime module.
+   */
+  VitisAIRuntime(const std::string& symbol_name, const std::string& xgraph_str,
+                 const Array<String> const_names, const std::string& target,
+                 const std::string& build_dir, const std::string& work_dir,
+                 const std::string& export_runtime_module_path);
+
+  /*!
+   * \brief Get member function to front-end.
+   * \param name The name of the function.
+   * \param sptr_to_self The pointer to the module node.
+   * \return The corresponding member function.
+   */
+  virtual PackedFunc GetFunction(const std::string& name, const ObjectPtr<Object>& sptr_to_self);
+
+  /*!
+   * \return The type key of the executor.
+   */
+  const char* type_key() const { return "VitisAIRuntime"; }
+
+  /*!
+   * \brief Serialize the content of the pyxir directory and save it to
+   *        binary stream.
+   * \param stream The binary stream to save to.
+   */
+  void SaveToBinary(dmlc::Stream* stream) final;
+
+ private:
+  /*! \brief The only subgraph name for this module */
+  std::string symbol_name_;
+  /*! \brief The required constant names */
+  Array<String> const_names_;
+  /*! \brief The runtime module */
+  pyxir::RtModHolder rt_mod_;
+  /*! \brief The XGraph input tensor names in the order as provided by TVM */
+  std::vector<std::string> in_tensor_names_;
+  /*! \brief The XGraph output tensor names in the order as provided by TVM */
+  std::vector<std::string> out_tensor_names_;
+  /*! \brief The file path for exporting the runtime module if set */
+  std::string export_rt_mod_path_;
+  /*! \brief Whether constant tensors have been initialized */
+  bool initialized_{false};
+};
+
+}  // namespace runtime
+}  // namespace tvm
+#endif  // TVM_RUNTIME_CONTRIB_VITIS_AI_VITIS_AI_RUNTIME_H_
diff --git a/tests/python/contrib/test_vitis_ai/__init__.py b/tests/python/contrib/test_vitis_ai/__init__.py
new file mode 100644
index 000000000000..c5fe1539b059
--- /dev/null
+++ b/tests/python/contrib/test_vitis_ai/__init__.py
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+""" Infrastructure and tests for Vitis-AI codegen """
diff --git a/tests/python/contrib/test_vitis_ai/infrastructure.py b/tests/python/contrib/test_vitis_ai/infrastructure.py
new file mode 100644
index 000000000000..df7836a37647
--- /dev/null
+++ b/tests/python/contrib/test_vitis_ai/infrastructure.py
@@ -0,0 +1,171 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=no-else-return, unidiomatic-typecheck, invalid-name, W0611, C0413
+
+"""Expose Vitis-AI test functions to the Python frontend"""
+
+import sys
+import numpy as np
+
+import pytest
+
+pytest.importorskip("pyxir")
+import pyxir.contrib.target.DPUCADX8G
+import pyxir.contrib.target.DPUCZDX8G
+
+import tvm
+from tvm import relay
+from tvm import runtime
+from tvm.relay import transform
+from tvm.relay.op.contrib.vitis_ai import annotation
+from tvm.relay.build_module import bind_params_by_name
+from tvm.contrib.target import vitis_ai
+from tvm.contrib import graph_runtime
+from tvm.contrib import utils
+
+
+def get_cpu_op_count(mod):
+    """Traverse graph counting ops offloaded to TVM."""
+
+    class Counter(tvm.relay.ExprVisitor):
+        def __init__(self):
+            super().__init__()
+            self.count = 0
+
+        def visit_call(self, call):
+            if isinstance(call.op, tvm.ir.Op):
+                self.count += 1
+
+            super().visit_call(call)
+
+    c = Counter()
+    c.visit(mod["main"])
+    return c.count
+
+
+def skip_test():
+    """Skip test if it requires the Vitis-AI codegen and it's not present."""
+    if not tvm.get_global_func("relay.ext.vitis_ai", True):
+        print("Skip test because Vitis-AI codegen is not available.")
+        return True
+    return False
+
+
+def build_module(
+    mod,
+    target,
+    dpu_target="DPUCADX8G",
+    params=None,
+    enable_vitis_ai=True,
+    tvm_ops=0,
+    vitis_ai_partitions=1,
+):
+    """Build module for Vitis-AI codegen."""
+    if isinstance(mod, tvm.relay.expr.Call):
+        mod = tvm.IRModule.from_expr(mod)
+    if params is None:
+        params = {}
+
+    with tvm.transform.PassContext(
+        opt_level=3, config={"relay.ext.vitis_ai.options.target": dpu_target}
+    ):
+        if enable_vitis_ai:
+            mod["main"] = bind_params_by_name(mod["main"], params)
+            mod = annotation(mod, params, dpu_target)
+            mod = transform.MergeCompilerRegions()(mod)
+            mod = transform.PartitionGraph()(mod)
+            tvm_op_count = get_cpu_op_count(mod)
+            assert tvm_op_count == tvm_ops, "Got {} TVM operators, expected {}".format(
+                tvm_op_count, tvm_ops
+            )
+            partition_count = 0
+            for global_var in mod.get_global_vars():
+                if "vitis_ai" in global_var.name_hint:
+                    partition_count += 1
+
+            assert (
+                vitis_ai_partitions == partition_count
+            ), "Got {} Vitis-AI partitions, expected {}".format(
+                partition_count, vitis_ai_partitions
+            )
+        relay.backend.compile_engine.get().clear()
+        return relay.build(mod, target, params=params)
+
+
+def update_lib(lib, cross_compile=None):
+    tmp_path = utils.tempdir()
+    lib_name = "lib.so"
+    lib_path = tmp_path.relpath(lib_name)
+    if cross_compile:
+        lib.export_library(lib_path, cc=cross_compile)
+    else:
+        lib.export_library(lib_path)
+    lib = runtime.load_module(lib_path)
+    return lib
+
+
+def extract_vitis_ai_modules(module):
+    """Get the Vits-AI runtime module from llvm module."""
+    return list(
+        filter(lambda mod: mod.type_key == "VitisAIRuntime", module.get_lib().imported_modules)
+    )
+
+
+def verify_codegen(
+    module, num_vitis_ai_modules=1, params=None, target="llvm", dpu_target="DPUCADX8G"
+):
+    """Check Vitis-AI codegen against a known good output."""
+    module = build_module(module, target, params=params, dpu_target=dpu_target)
+    vitis_ai_modules = extract_vitis_ai_modules(module)
+
+    assert len(vitis_ai_modules) == num_vitis_ai_modules, (
+        f"The number of Vitis-AI modules produced ({len(vitis_ai_modules)}) does not "
+        f"match the expected value ({num_vitis_ai_modules})."
+    )
+
+
+def verify_result(
+    mod,
+    map_inputs,
+    out_shape,
+    result,
+    tol=1e-5,
+    target="llvm",
+    ctx=tvm.cpu(),
+    params=None,
+    dpu_target="DPUCADX8G",
+    tvm_ops=0,
+):
+    """To check the result between reference and byoc vitis-ai flow"""
+
+    lib = build_module(mod, target, params=params, dpu_target=dpu_target, tvm_ops=tvm_ops)
+    lib = update_lib(lib)
+    ctx = tvm.cpu()
+    rt_mod = graph_runtime.GraphModule(lib["default"](tvm.cpu()))
+
+    for name, data in map_inputs.items():
+        rt_mod.set_input(name, data)
+    rt_mod.set_input(**params)
+    rt_mod.run()
+
+    out_shapes = out_shape if isinstance(out_shape, list) else [out_shape]
+    results = result if isinstance(result, list) else [result]
+
+    for idx, shape in enumerate(out_shapes):
+        out = tvm.nd.empty(shape, ctx=ctx)
+        out = rt_mod.get_output(idx, out)
+        tvm.testing.assert_allclose(out.asnumpy(), results[idx], rtol=tol, atol=tol)
diff --git a/tests/python/contrib/test_vitis_ai/test_vitis_ai_codegen.py b/tests/python/contrib/test_vitis_ai/test_vitis_ai_codegen.py
new file mode 100644
index 000000000000..4d5d5dc92c41
--- /dev/null
+++ b/tests/python/contrib/test_vitis_ai/test_vitis_ai_codegen.py
@@ -0,0 +1,336 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=no-else-return, unidiomatic-typecheck, invalid-name, W0611, C0413
+
+"""Vitis-AI codegen tests"""
+
+import sys
+import numpy as np
+
+import pytest
+
+pytest.importorskip("pyxir")
+import pyxir.contrib.target.DPUCADX8G
+import pyxir.contrib.target.DPUCZDX8G
+
+import tvm
+from tvm import relay
+from tvm.relay import transform
+from tvm.relay.op.contrib.vitis_ai import annotation
+from tvm.relay.build_module import bind_params_by_name
+from tvm.contrib.target import vitis_ai
+
+from .infrastructure import skip_test, verify_codegen
+
+
+def set_func_attr(func, compile_name, symbol_name):
+    func = func.with_attr("Primitive", tvm.tir.IntImm("int32", 1))
+    func = func.with_attr("Inline", tvm.tir.IntImm("int32", 1))
+    func = func.with_attr("Compiler", compile_name)
+    func = func.with_attr("global_symbol", symbol_name)
+    return func
+
+
+def test_conv2d():
+    """Test conv2d operator for Vitis-AI DPUCADX8G and DPUCZDX8G-zcu104 targets"""
+
+    x = relay.var("x", shape=(1, 3, 224, 224))
+    w = relay.const(np.zeros((16, 3, 3, 3), dtype="float32"))
+    y = relay.nn.conv2d(x, w, strides=[2, 2], padding=[1, 1, 1, 1], kernel_size=[3, 3])
+    func = relay.Function([x], y)
+    params = {}
+    params["x"] = np.zeros((1, 3, 224, 224), dtype="float32")
+    params["w"] = np.random.rand(16, 3, 3, 3).astype("float32")
+    mod = tvm.IRModule()
+    mod["main"] = func
+    verify_codegen(mod, params=params, dpu_target="DPUCADX8G")
+    verify_codegen(mod, params=params, dpu_target="DPUCZDX8G-zcu104")
+
+
+def test_depthwise_conv():
+    """Test depthwise_conv operator for Vitis-AI DPUCZDX8G-zcu104 target"""
+
+    dtype = "float32"
+    ishape = (1, 32, 14, 14)
+    wshape = (32, 1, 3, 3)
+    data = relay.var("data", shape=(ishape), dtype=dtype)
+    weights = relay.var("weights", shape=(wshape), dtype=dtype)
+    depthwise_conv2d = relay.nn.conv2d(data, weights, kernel_size=(3, 3), padding=(1, 1), groups=32)
+    func = relay.Function([data, weights], depthwise_conv2d)
+    params = {}
+    params["weights"] = np.random.randn(32, 1, 3, 3).astype(dtype)
+    params["data"] = np.random.randn(1, 32, 14, 14).astype(dtype)
+    mod = tvm.IRModule()
+    mod["main"] = func
+    verify_codegen(mod, params=params, dpu_target="DPUCZDX8G-zcu104")
+
+
+def test_bias_add():
+    """Test bias_add operator for Vitis-AI DPUCADX8G and DPUCZDX8G-zcu104 targets"""
+
+    dtype = "float32"
+    ishape = (1, 32, 14, 14)
+    data = relay.var("data", shape=(ishape), dtype=dtype)
+    bias = relay.var("bias", relay.TensorType((32,), dtype))
+    out = relay.nn.bias_add(data, bias)
+    func = relay.Function([data, bias], out)
+    params = {}
+    params["bias"] = np.random.randn(32).astype(dtype)
+    params["data"] = np.random.randn(1, 32, 14, 14).astype(dtype)
+    mod = tvm.IRModule()
+    mod["main"] = func
+    verify_codegen(mod, params=params, dpu_target="DPUCADX8G")
+    verify_codegen(mod, params=params, dpu_target="DPUCZDX8G-zcu104")
+
+
+def test_relu():
+    """Test relu operator for Vitis-AI DPUCADX8G and DPUCZDX8G-zcu104 targets"""
+
+    shape = (10, 10)
+    x = relay.var("x", shape=shape)
+    y = relay.nn.relu(x)
+    func = relay.Function([x], y)
+    mod = tvm.IRModule()
+    mod["main"] = func
+    verify_codegen(mod, dpu_target="DPUCADX8G")
+    verify_codegen(mod, dpu_target="DPUCZDX8G-zcu104")
+
+
+def test_batchnorm():
+    """Test batchnorm operator for Vitis-AI DPUCADX8G and DPUCZDX8G-zcu104 targets"""
+
+    data = relay.var("data", shape=(1, 16, 112, 112))
+    bn_gamma = relay.var("bn_gamma", relay.TensorType((16,), "float32"))
+    bn_beta = relay.var("bn_beta", relay.TensorType((16,), "float32"))
+    bn_mmean = relay.var("bn_mean", relay.TensorType((16,), "float32"))
+    bn_mvar = relay.var("bn_var", relay.TensorType((16,), "float32"))
+    bn_output = relay.nn.batch_norm(data, bn_gamma, bn_beta, bn_mmean, bn_mvar)
+    func = relay.Function([data, bn_gamma, bn_beta, bn_mmean, bn_mvar], bn_output[0])
+    params = {}
+    params["data"] = np.zeros((1, 16, 112, 112), dtype="float32")
+    params["bn_gamma"] = np.random.rand(16).astype("float32")
+    params["bn_beta"] = np.random.rand(16).astype("float32")
+    params["bn_mean"] = np.random.rand(16).astype("float32")
+    params["bn_var"] = np.random.rand(16).astype("float32")
+    mod = tvm.IRModule()
+    mod["main"] = func
+    verify_codegen(mod, params=params, dpu_target="DPUCADX8G")
+    verify_codegen(mod, params=params, dpu_target="DPUCZDX8G-zcu104")
+
+
+def test_add():
+    """Test add operator for Vitis-AI DPUCADX8G and DPUCZDX8G-zcu104 targets"""
+
+    shape = (10, 10)
+    x = relay.var("x", shape=shape)
+    y = x + x
+    func = relay.Function([x], y)
+    mod = tvm.IRModule()
+    mod["main"] = func
+    verify_codegen(mod, dpu_target="DPUCADX8G")
+    verify_codegen(mod, dpu_target="DPUCZDX8G-zcu104")
+
+
+def test_global_avg_pool2d():
+    """Test global_avg_pool2d operator for Vitis-AI DPUCADX8G and DPUCZDX8G-zcu104 targets"""
+
+    shape = (10, 10, 7, 7)
+    x = relay.var("x", shape=shape)
+    y = relay.nn.global_avg_pool2d(x)
+    func = relay.Function([x], y)
+    mod = tvm.IRModule()
+    mod["main"] = func
+    verify_codegen(mod, dpu_target="DPUCADX8G")
+    verify_codegen(mod, dpu_target="DPUCZDX8G-zcu104")
+
+
+def test_avg_pool2d():
+    """Test avg_pool2d for operator Vitis-AI DPUCADX8G and DPUCZDX8G-zcu104 targets"""
+
+    shape = (10, 10, 10, 10)
+    x = relay.var("x", shape=shape)
+    y = relay.nn.avg_pool2d(x, pool_size=(3, 3))
+    func = relay.Function([x], y)
+    mod = tvm.IRModule()
+    mod["main"] = func
+    verify_codegen(mod, dpu_target="DPUCADX8G")
+    verify_codegen(mod, dpu_target="DPUCZDX8G-zcu104")
+
+
+def test_max_pool2d():
+    """Test max_pool2d for operator Vitis-AI DPUCADX8G and DPUCZDX8G-zcu104 targets"""
+
+    shape = (64, 512, 10, 10)
+    x = relay.var("x", shape=shape)
+    y = relay.nn.max_pool2d(x, pool_size=(3, 3))
+    func = relay.Function([x], y)
+    mod = tvm.IRModule()
+    mod["main"] = func
+    verify_codegen(mod, dpu_target="DPUCADX8G")
+    verify_codegen(mod, dpu_target="DPUCZDX8G-zcu104")
+
+
+def test_global_max_pool2d():
+    """Test global_maxpool2d operator for Vitis-AI DPUCADX8G and DPUCZDX8G-zcu104 targets"""
+
+    shape = (1, 512, 7, 7)
+    x = relay.var("x", shape=shape)
+    y = relay.nn.global_max_pool2d(x)
+    func = relay.Function([x], y)
+    mod = tvm.IRModule()
+    mod["main"] = func
+    verify_codegen(mod, dpu_target="DPUCADX8G")
+    verify_codegen(mod, dpu_target="DPUCZDX8G-zcu104")
+
+
+def test_upsampling():
+    """Test upsampling operator for Vitis-AI DPUCADX8G and DPUCZDX8G-zcu104 targets"""
+
+    shape = (64, 512, 10, 10)
+    x = relay.var("x", shape=shape)
+    y = relay.nn.upsampling(x, scale_h=2, scale_w=2)
+    func = relay.Function([x], y)
+    mod = tvm.IRModule()
+    mod["main"] = func
+    verify_codegen(mod, dpu_target="DPUCADX8G")
+    verify_codegen(mod, dpu_target="DPUCZDX8G-zcu104")
+
+
+def test_conv2d_transpose():
+    """Test conv2d_transpose operator for Vitis-AI DPUCADX8G and DPUCZDX8G-zcu104 targets"""
+
+    dshape = (1, 3, 18, 18)
+    kshape = (3, 10, 3, 3)
+    x = relay.var("x", shape=dshape)
+    w = relay.const(np.zeros(kshape, dtype="float32"))
+    y = relay.nn.conv2d_transpose(
+        x, w, channels=10, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1)
+    )
+    func = relay.Function([x], y)
+    params = {}
+    dtype = "float32"
+    params["x"] = np.random.uniform(size=dshape).astype(dtype)
+    params["w"] = np.random.uniform(size=kshape).astype(dtype)
+    mod = tvm.IRModule()
+    mod["main"] = func
+    verify_codegen(mod, params=params, dpu_target="DPUCADX8G")
+    verify_codegen(mod, params=params, dpu_target="DPUCZDX8G-zcu104")
+
+
+def test_annotate():
+    """Test annotation operator for Vitis-AI DPUCADX8G and DPUCZDX8G-zcu104 targets"""
+
+    def partition(dpu_target):
+        data = relay.var("data", relay.TensorType((1, 3, 224, 224), "float32"))
+        weight = relay.var("weight", relay.TensorType((16, 3, 3, 3), "float32"))
+        bn_gamma = relay.var("bn_gamma", relay.TensorType((16,), "float32"))
+        bn_beta = relay.var("bn_beta", relay.TensorType((16,), "float32"))
+        bn_mmean = relay.var("bn_mean", relay.TensorType((16,), "float32"))
+        bn_mvar = relay.var("bn_var", relay.TensorType((16,), "float32"))
+
+        conv = relay.nn.conv2d(
+            data=data, weight=weight, kernel_size=(3, 3), channels=16, padding=(1, 1)
+        )
+        bn_output = relay.nn.batch_norm(conv, bn_gamma, bn_beta, bn_mmean, bn_mvar)
+
+        func = relay.Function(
+            [data, weight, bn_gamma, bn_beta, bn_mmean, bn_mvar], bn_output.astuple()
+        )
+        mod = tvm.IRModule()
+        mod["main"] = func
+        params = {}
+        params["weight"] = np.random.rand(16, 3, 3, 3).astype("float32")
+        params["bn_gamma"] = np.random.rand(16).astype("float32")
+        params["bn_beta"] = np.random.rand(16).astype("float32")
+        params["bn_mean"] = np.random.rand(16).astype("float32")
+        params["bn_var"] = np.random.rand(16).astype("float32")
+        mod = annotation(mod, params, dpu_target)
+
+        opt_pass = tvm.transform.Sequential(
+            [
+                transform.MergeCompilerRegions(),
+                transform.PartitionGraph(),
+            ]
+        )
+
+        with tvm.transform.PassContext(opt_level=3):
+            mod = opt_pass(mod)
+
+        return mod
+
+    def expected():
+        # function variables for conv2d
+        data0 = relay.var("data0", relay.TensorType((1, 3, 224, 224), "float32"))
+        weight0 = relay.var("weight0", relay.TensorType((16, 3, 3, 3), "float32"))
+        conv = relay.nn.conv2d(
+            data=data0, weight=weight0, kernel_size=(3, 3), channels=16, padding=(1, 1)
+        )
+
+        # function variables for batch_norm
+        bn_gamma0 = relay.var("bn_gamma0", relay.TensorType((16,), "float32"))
+        bn_beta0 = relay.var("bn_beta0", relay.TensorType((16,), "float32"))
+        bn_mmean0 = relay.var("bn_mean0", relay.TensorType((16,), "float32"))
+        bn_mvar0 = relay.var("bn_var0", relay.TensorType((16,), "float32"))
+        bn = relay.nn.batch_norm(conv, bn_gamma0, bn_beta0, bn_mmean0, bn_mvar0)
+        func0 = relay.Function(
+            [data0, weight0, bn_gamma0, bn_beta0, bn_mmean0, bn_mvar0], bn.astuple()
+        )
+        func0 = set_func_attr(func0, "vitis_ai", "vitis_ai_0")
+        gv0 = relay.GlobalVar("vitis_ai_0")
+        mod = tvm.IRModule()
+        mod[gv0] = func0
+        mod = relay.transform.InferType()(mod)
+
+        # main function
+        data = relay.var("data", relay.TensorType((1, 3, 224, 224), "float32"))
+        weight = relay.var("weight", relay.TensorType((16, 3, 3, 3), "float32"))
+        bn_gamma = relay.var("bn_gamma", relay.TensorType((16,), "float32"))
+        bn_beta = relay.var("bn_beta", relay.TensorType((16,), "float32"))
+        bn_mmean = relay.var("bn_mean", relay.TensorType((16,), "float32"))
+        bn_mvar = relay.var("bn_var", relay.TensorType((16,), "float32"))
+        call0 = gv0(data, weight, bn_gamma, bn_beta, bn_mmean, bn_mvar)
+        mod["main"] = relay.Function([data, weight, bn_gamma, bn_beta, bn_mmean, bn_mvar], call0)
+        mod = relay.transform.InferType()(mod)
+        return mod
+
+    partitioned_dpuczdx8g_zcu104 = partition("DPUCZDX8G-zcu104")
+    partitioned_dpucadx8g = partition("DPUCADX8G")
+
+    ref_mod = expected()
+
+    assert tvm.ir.structural_equal(partitioned_dpuczdx8g_zcu104, ref_mod, map_free_vars=True)
+    assert tvm.ir.structural_equal(partitioned_dpucadx8g, ref_mod, map_free_vars=True)
+
+
+if __name__ == "__main__":
+    if sys.platform == "win32":
+        print("Skip test on Windows for now")
+        sys.exit(0)
+
+    test_conv2d()
+    test_depthwise_conv()
+    test_bias_add()
+    test_relu()
+    test_add()
+    test_max_pool2d()
+    test_global_max_pool2d()
+    test_batchnorm()
+    test_global_avg_pool2d()
+    test_avg_pool2d()
+    test_upsampling()
+    test_conv2d_transpose()
+    test_annotate()
diff --git a/tests/python/contrib/test_vitis_ai/test_vitis_ai_runtime_cpu_part.py b/tests/python/contrib/test_vitis_ai/test_vitis_ai_runtime_cpu_part.py
new file mode 100644
index 000000000000..030dda372cfe
--- /dev/null
+++ b/tests/python/contrib/test_vitis_ai/test_vitis_ai_runtime_cpu_part.py
@@ -0,0 +1,82 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=no-else-return, unidiomatic-typecheck, invalid-name, W0611, C0413
+
+"""Vitis-AI runtime test for CPU only part
+
+This test verifies as much as possible whether the a model can be correctly offloaded
+and executed for Vitis-AI acceleration. This entails:
+    - Annotating and partitioning model for Vitis-AI acceleration
+    - Building a Vitis-AI PyXIR runtime module with on-the-fly quantization enabled
+    - Run first iteration of on-the-fly quantization flow. This will always be run
+      on CPU as the first N (parameter) will be used for collecting calibration data
+      for quantization.
+
+NOTE This is not a full end-to-end test as we need the full Vitis-AI docker environment
+and access to an FPGA instance for that. This test verifies the Vitis-AI flow as much as
+possible without requiring access to dedicated docker environment and/or hardware setup.
+NOTE Quantization is not being tested (we need to be inside Vitis-AI docker environment
+for that) buth the internal representation used for quantization is being generated and
+functionally tested (CPU).
+"""
+
+import sys
+import numpy as np
+
+import pytest
+
+pytest.importorskip("pyxir")
+import pyxir.contrib.target.DPUCADX8G
+
+import tvm
+import tvm.relay.testing
+from tvm import relay
+
+from .infrastructure import skip_test, verify_result
+
+
+def test_extern_vitis_ai_resnet18():
+    """Test first part of Vitis-AI on-the-fly quantization runtime with ResNet 18 model"""
+    if skip_test():
+        return
+
+    dtype = "float32"
+    ishape = (1, 3, 224, 224)
+    mod, params = relay.testing.resnet.get_workload(num_layers=18, batch_size=1)
+    ref_mod, params = relay.testing.resnet.get_workload(num_layers=18, batch_size=1)
+
+    ref_ex = relay.create_executor("graph", mod=ref_mod, ctx=tvm.cpu(0))
+    i_data = np.random.uniform(0, 1, ishape).astype(dtype)
+
+    ref_res = ref_ex.evaluate()(i_data, **params)
+    verify_result(
+        mod,
+        {"data": i_data},
+        (1, 1000),
+        ref_res.asnumpy(),
+        tol=1e-5,
+        params=params,
+        dpu_target="DPUCADX8G",
+        tvm_ops=4,
+    )
+
+
+if __name__ == "__main__":
+    if sys.platform == "win32":
+        print("Skip test on Windows for now")
+        sys.exit(0)
+    test_extern_vitis_ai_resnet18()
diff --git a/tests/scripts/task_config_build_cpu.sh b/tests/scripts/task_config_build_cpu.sh
index 6fc64966c0ab..9a009b6a4a78 100755
--- a/tests/scripts/task_config_build_cpu.sh
+++ b/tests/scripts/task_config_build_cpu.sh
@@ -44,3 +44,4 @@ echo set\(USE_TENSORFLOW_PATH \"/tensorflow\"\) >> config.cmake
 echo set\(USE_FLATBUFFERS_PATH \"/flatbuffers\"\) >> config.cmake
 echo set\(USE_ETHOSN /opt/arm/ethosn-driver\) >> config.cmake
 echo set\(USE_ETHOSN_HW OFF\) >> config.cmake
+echo set\(USE_VITIS_AI ON\) >> config.cmake