diff --git a/CMakeLists.txt b/CMakeLists.txt index dd84e67fce5a..62aa6db6e46d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -119,8 +119,9 @@ else(MSVC) endif(MSVC) # add source group -FILE(GLOB_RECURSE GROUP_SOURCE "src/*.cc") -FILE(GLOB_RECURSE GROUP_INCLUDE "src/*.h" "include/*.h") +FILE(GLOB_RECURSE GROUP_SOURCE "src/*.cc" "nnvm/src/*.cc") +FILE(GLOB_RECURSE GROUP_INCLUDE "src/*.h" "include/*.h" + "nnvm/src/*.h" "nnvm/include/*.h") assign_source_group("Source" ${GROUP_SOURCE}) assign_source_group("Include" ${GROUP_INCLUDE}) @@ -169,6 +170,19 @@ endif(USE_VM_PROFILER) file(GLOB DATATYPE_SRCS src/codegen/datatype/*.cc) list(APPEND COMPILER_SRCS ${DATATYPE_SRCS}) +if(NOT MSVC) + file(GLOB COMPILER_VERILOG_SRCS src/codegen/verilog/*.cc) + list(APPEND COMPILER_SRCS ${COMPILER_VERILOG_SRCS}) +endif() + +file(GLOB_RECURSE NNVM_COMPILER_SRCS + nnvm/src/c_api/*.cc + nnvm/src/core/*.cc + nnvm/src/pass/*.cc + nnvm/src/compiler/*.cc + nnvm/src/top/*.cc +) + file(GLOB TOPI_SRCS topi/src/*.cc ) @@ -241,8 +255,6 @@ include(cmake/modules/LLVM.cmake) include(cmake/modules/Micro.cmake) include(cmake/modules/ANTLR.cmake) include(cmake/modules/contrib/BLAS.cmake) -include(cmake/modules/contrib/CODEGENC.cmake) -include(cmake/modules/contrib/DNNL.cmake) include(cmake/modules/contrib/Random.cmake) include(cmake/modules/contrib/MicroStandaloneRuntime.cmake) include(cmake/modules/contrib/Sort.cmake) @@ -283,6 +295,7 @@ if(NOT USE_SGX STREQUAL "OFF") add_dependencies(tvm_runtime sgx_edl tvm_t) install(TARGETS tvm_t ARCHIVE DESTINATION lib${LIB_SUFFIX}) endif() +add_library(nnvm_compiler SHARED ${NNVM_COMPILER_SRCS}) if(USE_THREADS) message(STATUS "Build with thread support...") @@ -292,11 +305,14 @@ if(USE_THREADS) target_link_libraries(tvm Threads::Threads) target_link_libraries(tvm_topi Threads::Threads) target_link_libraries(tvm_runtime Threads::Threads) + target_link_libraries(nnvm_compiler Threads::Threads) endif(USE_THREADS) target_link_libraries(tvm ${TVM_LINKER_LIBS} ${TVM_RUNTIME_LINKER_LIBS}) target_link_libraries(tvm_topi tvm ${TVM_LINKER_LIBS} ${TVM_RUNTIME_LINKER_LIBS}) target_link_libraries(tvm_runtime ${TVM_RUNTIME_LINKER_LIBS}) +target_link_libraries(tvm_runtime_static ${TVM_RUNTIME_LINKER_LIBS}) +target_link_libraries(nnvm_compiler tvm) if (HIDE_PRIVATE_SYMBOLS AND NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin") set(HIDE_SYMBOLS_LINKER_FLAGS "-Wl,--exclude-libs,ALL") @@ -306,6 +322,7 @@ if (HIDE_PRIVATE_SYMBOLS AND NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin") target_link_libraries(tvm ${HIDE_SYMBOLS_LINKER_FLAGS}) target_link_libraries(tvm_topi ${HIDE_SYMBOLS_LINKER_FLAGS}) target_link_libraries(tvm_runtime ${HIDE_SYMBOLS_LINKER_FLAGS}) + target_link_libraries(nnvm_compiler ${HIDE_SYMBOLS_LINKER_FLAGS}) endif() # Related headers @@ -315,7 +332,10 @@ target_include_directories( target_include_directories( tvm_topi PUBLIC "topi/include") - +target_include_directories( + nnvm_compiler + PUBLIC "nnvm/include" + PUBLIC "topi/include") # Tests set(TEST_EXECS "") @@ -354,6 +374,8 @@ add_custom_target(runtime DEPENDS tvm_runtime) install(TARGETS tvm DESTINATION lib${LIB_SUFFIX}) install(TARGETS tvm_topi DESTINATION lib${LIB_SUFFIX}) install(TARGETS tvm_runtime DESTINATION lib${LIB_SUFFIX}) +install(TARGETS tvm_runtime_static DESTINATION lib${LIB_SUFFIX}) +install(TARGETS nnvm_compiler DESTINATION lib${LIB_SUFFIX}) if (INSTALL_DEV) install( @@ -376,6 +398,11 @@ if (INSTALL_DEV) FILES_MATCHING PATTERN "*.h" ) + install( + DIRECTORY "nnvm/include/." DESTINATION "include" + FILES_MATCHING + PATTERN "*.h" + ) else(INSTALL_DEV) install( DIRECTORY "include/tvm/runtime/." DESTINATION "include/tvm/runtime" @@ -388,4 +415,5 @@ endif(INSTALL_DEV) if(MSVC) target_compile_definitions(tvm PRIVATE -DTVM_EXPORTS) target_compile_definitions(tvm_runtime PRIVATE -DTVM_EXPORTS) + target_compile_definitions(nnvm_compiler PRIVATE -DNNVM_EXPORTS) endif() diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 99b6a35c576e..b402e72b5e53 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -69,7 +69,6 @@ We do encourage everyone to work anything they are interested in. - [Liangfu Chen](https://github.com/liangfu): @liangfu - [Wei Chen](https://github.com/wweic): @wweic - [Zhi Chen](https://github.com/zhiics): @zhiics -- [Neo Chien](https://github.com/cchung100m): @cchung100m - [Meghan Cowan](https://github.com/cowanmeg): @cowanmeg - [Balint Cristian](https://github.com/cbalint13): @cbalint13 - [Sergei Grechanik](https://github.com/sgrechanik-h): @sgrechanik-h @@ -121,3 +120,4 @@ We do encourage everyone to work anything they are interested in. - [Cody Hao Yu](https://github.com/comaniac) - [Chris Nuernberger](https://github.com/cnuernber) - [Shoubhik Bhattacharya](https://github.com/shoubhik) +- [Neo Chien](https://github.com/cchung100m) diff --git a/Jenkinsfile b/Jenkinsfile index ec79b9718a67..43118590629b 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -57,7 +57,7 @@ tvm_multilib = "build/libtvm.so, " + "build/libvta_tsim.so, " + "build/libvta_fsim.so, " + "build/libtvm_topi.so, " + - tvm_runtime + "build/libnnvm_compiler.so, " + tvm_runtime // command to start a docker container docker_run = 'docker/bash.sh' @@ -309,15 +309,14 @@ stage('Integration Test') { } } }, - 'docs: GPU': { + 'legacy: GPU': { node('GPU') { - ws(per_exec_ws("tvm/docs-python-gpu")) { + ws(per_exec_ws("tvm/legacy-python-gpu")) { init_git() unpack_lib('gpu', tvm_multilib) timeout(time: max_time, unit: 'MINUTES') { - sh "${docker_run} ${ci_gpu} ./tests/scripts/task_python_docs.sh" + sh "${docker_run} ${ci_gpu} ./tests/scripts/task_python_legacy.sh" } - pack_lib('mydocs', 'docs.tgz') } } } diff --git a/Makefile b/Makefile index d34fbe4c9d88..d3ad1030b9f2 100644 --- a/Makefile +++ b/Makefile @@ -69,12 +69,14 @@ build/libtvm_web_runtime.js: build/libtvm_web_runtime.bc cpplint: python3 3rdparty/dmlc-core/scripts/lint.py vta cpp vta/include vta/src python3 3rdparty/dmlc-core/scripts/lint.py topi cpp topi/include; + python3 3rdparty/dmlc-core/scripts/lint.py nnvm cpp nnvm/include nnvm/src; python3 3rdparty/dmlc-core/scripts/lint.py tvm cpp include src \ examples/extension/src examples/graph_executor/src pylint: python3 -m pylint python/tvm --rcfile=$(ROOTDIR)/tests/lint/pylintrc python3 -m pylint topi/python/topi --rcfile=$(ROOTDIR)/tests/lint/pylintrc + python3 -m pylint nnvm/python/nnvm --rcfile=$(ROOTDIR)/tests/lint/pylintrc python3 -m pylint vta/python/vta --rcfile=$(ROOTDIR)/tests/lint/pylintrc jnilint: diff --git a/NEWS.md b/NEWS.md index 438bd3059ef5..658848b3e3bb 100644 --- a/NEWS.md +++ b/NEWS.md @@ -26,899 +26,6 @@ Refer to the Roadmap issue for complete list on on-going version features. If you check in something that is not reflected in Roadmap issue, please reply to that issue so it can get added. -## 0.6 - -### Relay in Production -Relay is a functional, differentiable programming language designed to be an expressive intermediate representation for machine learning systems. Relay supports algebraic data types, closures, control flow, and recursion, allowing it to directly represent more complex models than computation graph-based IRs (e.g., NNVM) can. In TVM v0.6, Relay is in stable phase and is ready for production. - -* Algebraic Data Types (ADT) support (#2442, #2575). ADT provides an expressive, efficient, and safe way to realize recursive computation (e.g., RNN). Refer to https://docs.tvm.ai/langref/relay_adt.html for more information. -* Pass manager for Relay (#2546, #3226, #3234, #3191) -* Most frameworks have been supported in Relay, including ONNX, Keras, Tensorflow, Caffe2, CoreML, NNVMv1, MXNet (#2246). -* Explicitly manifest memory and tensor allocations in Relay. (#3560) - -### Relay Virtual Machine -The Relay Virtual Machine (Relay VM) is the new generation of runtime to strike a balance between performance and flexibility when deploying and executing Relay programs. Previously, the graph runtime is able to utilize the fully static nature of the input graphs to perform aggressive optimization such as fully static allocation, and optimal memory reuse. When we introduce models which make use of control-flow, recursion, dynamic shapes, dynamic allocation we must change how execution works. - -Relay VM is now usable and is able to achieve decent performance for a various of models and targets. - -* Design (#2810 #2915) and a first version of implementation (#2889), -* Add VM runtime for Relay and compiler support (#3120, #3121, #2889, #3139) -* Relay VM (pattern matching #3470, port to python #3391, serialization #3647) -* Relay VM Profiler (#3727) -* Support execution on devices for Relay VM (#3678) -* [Relay][VM] Add more passes to VMCompiler (#4058) -* [relay][vm] Separate VM runtime with executable (#4100) -* Port VM, VM compiler, and Object into Python (#3391) -* VM: Add AllocTensor instruction and better instruction printer (#3306) -* [Relay][VM][Interpreter] Enable first-class constructors in VM and interpreter via eta expansion. (#4218) -* [Relay][VM] Clean up the VM and VM profiler code (#4391) - -### Training -Relay is designed to natively support first-order and higher-order differentiation. The automatic differentiation infrastructure is now usable and a count of operators with gradient support are available in v0.6 release. - -* Higher order reverse mode automatic differentiation that work with control flow (#2496) -* Higher order continuation passing style (#3456, #3485 ) -* Relay gradient registration (clip #3509, `max_pool2d` and `avg_pool2d` #3601) -* Relay AD algorithm (#3585) -* Relay Training - allow gradient to return a tuple (#3600), numerical gradient check (#3630) -* Improve AD for concatenate (#3729) -* [Relay][Training] Add missing gradient check to gradient pass (#4169) -* As a part of Relay's automatic differentiation system, we are adding primal gradients for Relay operators. Please refer to #2562 for tracking the progress. -* Gradient for Conv2d (#3636) -* Add gradient operators (#3857, #3894, #3901, #3915) -* Add gradient for log-softmax (#4069) -* [Relay][Training] Add gradient for Crossentropy (#3925) -* [Relay][Training] Add and fix gradients (#4126) - -### Quantization - -Low-bit inference is getting more and more popular as it benefits both the performance and storage usage. TVM now supports two types of quantization. 1. Automatic quantizaion takes floating-point precision model, does per-layer calibration and generates low-bit model. 2. TVM also imports pre-quantized model from Tensorflow and MXNet, a new dialect QNN is introduced to handle further lowering to normal operators. - -* Automatic Quantization - - Low-bit automatic quantization supported. (#2116). The workflow includes annotation, calibration and transformation. - - Refactor quantization codebase and fix model accuracy. (#3543) - - KL-divergence-based per-layer calibration. (#3538) - - Add option to select which convolution layers are quantized. (#3173) - - [Relay][Quantize] Integrate data-aware calibration into quantization. (#4295) -* Pre-quantized model support (QNN operators and legalize pass). - - Add a legalize pass to Relay (#3672) - - Qnn Concatenate, quantize, dequantize and requantize operators (#3819, #3730, #3745, #3531) - - QNNtoRelay & QNNLegalize Pass utility (#3838, #3782) - - Requantize: Optimize lowering for some corner cases. (#3864) - - New quantized operator support: conv2d, add, dense (#3580, #3736, #3896, #3910) - - Do type checking for the input and kernel in the qnn conv2d (#3904) - - Legalize and AlterOpLayout for Intel int8. (#3961) - - Renaming tests to follow the Relay nomenclature. (#3975) - - Fix padding changes due to #3739 (#3989) - - Memorizing quantize node mapping to avoid duplicated simulated quantization (#3233) - - Infrastructure to support pre-quantized models (QNN) (#3971). - - [Relay][AlterOp] NHWC to NCHWc support for Pool, concatenate, sum. (#4059) - - [TOPI][x86] Cascade lake support. (#4123) - - [TOPI][x86] Legalize - Support int8xint8 convolution to use VNNI inst (#4196) - - Qnn dequantize with min max using Mxnet flavor to support Mxnet prequantized models. (#3945) - - Improve the lowering of Qnn Dense (#4213) - - Adding support for dequantizing from int32 to float32. (#4130) - - [QNN] Refactor fixed point multiplication in requantize (#4073) - - [Relay][Quantize] Use fixed point mulplications (#4160) - - Add support for quantized multiply to Relay (#4141) - - Use legalize to handle NHWC layout for `arm_cpu` (#3754) - - [QNN][Legalize] Specialize for Platforms w/o fast Int8 support (#4307) - - [QNN] Use Int16 upcast in Fallback Conv2D. (#4329) - - Retain input kernel scales in QNN dialect (#4292) - - [QNN] Lowering for Depthwise Convolution. (#4351) - - [QNN][TFLite] Parsing QNN Add op. Adding MobilenetV2. (#4142) - - [QNN][TFLite] Parsing TFLite quantized models. (#3900) - - Added tflite frontend support for quantized mean. (#4339) - - [Relay][Legalize] Legalize `conv2d_transpose` for NHWC (#4399) - -### Accelerator and Microcontroller Support - -TSIM is introduced to improve software and hardware integration and simulation accuracy. It integrates the hardware development process into the software stack. TSIM enables VTA to provide a more accurate performance feedback, i.e. clock cycles, compared to the traditional functional model of a hardware accelerator. Moreover, Chisel implementation for VTA is availale and it runs on top of TSIM. - -There has been a proliferation of resource-constrained and embedded devices that do not have operating systems or a mature software stack. MicroTVM is intended to support TVM on such bare-metal devices. - -* [TSIM] Enabling Cycle-Accurate Hardware Simulation for VTA (#3010, #3206, #3242) -* Chisel implementation for VTA and runs on top of TSIM (#3258, #3347) -* MicroTVM (#3227) -* Relay Compilation + AutoTVM compatible operator libraries for VTA (#3135) -* ChangeBatch pass for batched VTA compilation (#3656, #3660) -* VTA fast simulator statistics (#3481) -* TSIM improvements and fixes (#3505) -* Chisel VTA enhancements and fixes (32bit support #3558, alu instruction generation #3592, coherence support #3593, separate types #3605, tensor issue/commit #3637, uop load request #3643, uop dma requests #3654) -* VTA Runtime refactor for non-shared memory FPGAs (#3590) -* VTA HLS codebase refactor for Ultra96 (#3496) -* VTA support for batched inference (#3661) -* VTA bitstream compilation for Intel FPGA (#3494) -* TSIM: Introduce Virtual Memory for TSIM Driver (#3686) -* Parallel TSIM hardware compilation with macOS and debug support (#3797) -* Chisel: scale dram base address in hardware instead of runtime (#3772) -* Chisel: run all unittests by default (#3766) -* Chisel: improved Data Gen, Added ALU Test (#3743) -* Chisel dependencies for TSIM CI (#3721) -* Chisel: Added Module Unit Test Infrastructure (#3698) -* Add ISA BitPat generation (#3891) -* de10-nano driver (#3394) -* Extending Vision model coverage compilation for VTA (#3740) -* Conv2d transpose (deconvolution) operator support (#3777) -* Support TLPP in function simulator. (#3555) -* [VTA][Chisel] TSIM VTA Source Refactor (#4163) -* [VTA][TSIM] Serial GEMM Application Added (#4082) - -### Rust Support -Rust language support in TVM includes two parts. 1. The frontend wraps the current C API and exposes a Rust programming model. 2. The backend serves as an alternative to C++ runtime. It privdes a standalone WASM module and security support, e.g., SGX. - -* Rust frontend (#2292). -* Unify types between bindings and pure Rust impl (#2616) -* Rust: load syslib modules at compile time (#3274) -* Rustify PackedFunc & Friends (#2969) -* Rust DSO module (#2976) - -### Operator Support -* A special operator `annotation.stop_fusion` to prevent it being fused with previous expressions (#2624). -* `batch_matmul` supported (#2561). -* `reverse_reshape` supported (#2503). -* Faster-RCNN proposal operator for CUDA (#2420). -* Vision operator for YOLO `yolo_reorg` (#1941). -* `slice` operator for MXNet (#2662). -* `arange` supported (#2621). -* Vision operator `roi_align` (#2618). -* `where` operator for MXNet (#2647). -* Deformable conv2d (#2908) -* Faster-RCNN Proposal OP (#2725) -* ROI Pool operator (#2811) -* Gluoncv SSD support on CPU (#2353) -* shape, reverse, and sign op (#2749, #2800, #2775) -* tile and repeat op (#2720) -* logical operators (#2743, #2453) -* stack op (#2729) -* NCHWc upsampling (#2806) -* clip and wrap mode support in take (#2858) -* AlterLayout support for `intel_graphics` conv2d , depthwise conv2d (#2729, #2806) -* Add foldr1 operator (#2928) -* Add rsqrt operator (#2949) -* Add clip and wrap mode support in take (#2858) -* `Gather_nd` exposed to relay (#2945) -* `bitserial_conv2d` move to autotvm template and updates (#2819) -* Port x86 NCHWc to AutoTVM for Task Extraction (#2664) -* Implement relay `nn.bias_add` compute in C++ (#3027) -* Rename output tensors for better readability (#3006) -* int8 dense on CUDA & Dense op quantization (#2877) -* Bitserial dense operators for CPU (#3051) -* Enhance upsample operator to adapt onnx opset v9 (#2968) -* Add adaptive pooling operator (#3085) -* Add all operator (#3124) -* Add cblas `batch_matmul` (#3210) -* Add packing for int8 1x1 convolution and support the int8 group convolution on X86 (#2991) -* Add op size (#3094) -* x86 TOPI (`roi_align` #3475, `conv2d_transpose` #3491) -* Intel INT8 (dilation in conv2d #3510, type checking #3516) -* Reinterpretation of tensor elements (#3599) -* Spase-Dense for block-sparse multiplication (#3566) -* Winograd matrix computation (#3553) -* CUDA schedule for `pool_grad` (#3622), `group_conv2d` (#3663) -* Bitserial operations conv2d, dense and bitpack (#3844) -* Improve numeric gradient check (#3856) -* Resize rework ([3788](#3788)) -* Improve `conv2d_transpose` CUDA schedule template (#3796) -* SpaceToDepth and MirrorPad Operators (#3718) -* Add variance and layer norm op (#3700) -* Add `sparse_transpose` for Square CSR matrices (#3707) -* TOPI: Memoize winograd matrix (#3687) -* New TOPI operators: `erf`, `logical_and`, `logical_or`, `logical_not`, `isnan` (#3702, #3929, #3979) -* Improve `ceil_divide` in tile/split (#3842) -* [Relay][Frontend][TF] Add tensor array ops (#3798, #4309) -* [TF][Op] Op where (#4045) -* [TOPI]Add op argwhere (#3994) -* [Relay] `crossentropy_with_logits` and its gradient (#4075) -* [Relay][Op] Enhance Upsample Operator to support float scales (#4206) -* [Relay][Op] Add instance norm op (#4004) - -### Frontend and User Interface -* Frontend darknet (#2773) -* Support tf.gather (#2935) -* Support tf.where (#2936) -* Adding ADD operator to tflite frontend for compiling the MobileNetV2 (#2919) -* Support SpaceToBatchND/BatchToSpaceND in Tensorflow frontend (#2943) -* Simplify TF `get_output_names` (#3025) -* TF Tile Round Sign Pow Exp Reverse (#2960) -* Gluncv SSD support on the GPU (#2784) -* Allow an op as loop var in Tensorflow (#3056) -* Add `FULLY_CONNECTED` op into tflite frontend (#3019) -* Add MXNet converter for RNN layer ops (#3125) -* Add log op in tf frontend (#3111) -* Add SoftPlus Sqrt in Tensorflow frontend (#3187) -* Add onnx elemwise greater/less (#3186) -* Add PlaceholderWithDefault (limited) implementation in TensorFlow (#3184) -* Support `tf.math.reduce_prod` (#3166) -* Better shape inference in TensorFlow Frontend (#3176) -* Get list of unsupported ONNX operators (#2995) -* Implement ONNX MaxPool-v8 and MaxPool-v10 (#3114) -* Convert TFLite NCHW to NHWC (#3141) -* Add Crop op converter (#3241) -* TFLite frontend operator support: PAD, RESIZE, MUL, Reduce (min, max, mean, prod), LOGISTIC, elemwise operators (Sub, Divide, Power, Max, Min) (#3310, #3370, #3304, #3421, #3313, #3357) -* Tensorflow frontend operator support: Abs, FloorDiv, GatherND, LeftShift, LogSoftmax, Max, Min, Mod, RightShift, ZerosLike, TruncateMod, Neg, ClipByValue, ResizeNearestNeighbor (#3270, #3211, #3393) -* TFLite: Add `fused_activation_function` for ADD, SUB, MUL, DIV (#3372) -* Support bidirectional RNN layer for MXNet (#3397) -* TFLite operator support (pack #3521, split #3520 ) -* Keras operator support (permute, softmax #3618) -* TF operator support (BatchMatMul #3634) -* TFLite frontend operator support: tile, transpose (#3814, #3705) -* ONNX frontend operator support: PReLU for NNVM, Not, Sign, Equal (#3813, #3836, #3760) -* Keras frontend operator support: Dot (#3668) -* Add more cases to Keras `_convert_reshape` (#3846) -* TensorFlow frontend operator support: OneHot, log1p, cos, sin (#3781, #3614) -* Support BatchMatMul with input dimensions larger than 3 for TensorFlow (#3732) -* ONNX new operator support: And, Tile, Erf (#3878, #3941, #3988) -* MXNet new operator support: pad, conv1d, deconv1d (#3739) -* TFLite new operator support: `batch_to_space_nd`, `space_to_batch_nd`, tanh, greater, relu (#3850, #3996, #3963, #4022) -* TFLite: Support depthwise convolution multiplier greater than 1 (#3922) -* Keras: Fix ReLU in Keras Converter missed the case (#3917) -* Keras: frontend upsample and 1 channel conv2d fixes (#3937) -* Tensorflow: Convert scalar Const into tvm.relay.const (#3885) -* TensorFlow: Add support for SquaredDifference (#3930) -* [relay][frontend] clean up tf frontend (#3710) -* [Relay][Topi][TensorFlow][ONNX][Lang] Add support for Any op (#4205) -* [Relay][Frontend][ONNX] Add support for op Where (#4184) -* [Relay][TopHub] Add switch to disable TopHub download (#4015) -* Add parser support for CAST tflite operator (#4096) -* Add parses support for `zeros_like` tflite operator (#4042) -* Add parser support for SUM tflite operator (#4182) -* Add support for tf.assert (as no-op) and `tf.no_op` to TF Relay frontend. (#4172) -* [Relay][Frontend][ONNX] New Operators and Opsets to Support BERT (#4197) -* [Relay][Params] Add APIs for storing and retrieving parameters from individual functions. (#4194) -* Add `build_create_shared_func` to tvm/contrib/cc.py (#3840) -* Tensorflow saved model for NNVM ([#2493](#2493/) and Relay ([#2586](#2586/)). -* Introduced `HybridModule` (#2477) so that normal TVM schedule can be compiled to hybrid target, run and dumped to Hybrid Script. -* Relay ][Frontend][Tensorflow] add operator `add_n` (#4181) -* [Relay][Frontend][Tensorflow] StopGradient (#4238) -* [Relay][Frontend][ONNX] Add support for broadcasting to Where and MatMul (#4267) -* [TFLite] Support PRelu (#4298) -* [Frontend][MxNet] support mxnet cond op (#4311) -* Add support for `quant.mul` operator in tflite frontend (#4283) -* [Relay][Frontend][ONNX] operator support: DepthToSpace, SpaceToDepth (#4271) -* [Relay][Frontend][Tensorflow]Add `conv2d_transpose`. (#4300) -* [Frontend]Add TensorFlow FloorMod (#4308) - -### Runtime and Backend Support -* Make external library extend TVM's NDArray more easily (#2613). -* Improvements for NNPACK integratation, includes ci test, winograd (#2846, #2868, #2856, #2721) -* Improvements for OpenCL runtime (#2741, #2737) -* GraphRuntime: Enable sharing parameters of a model among multiple threads (#3384) -* Android runtime argsort support (#3472) -* GraphRuntime enhancements (`set_input_zero_copy` #3416) -* A new minimal runtime implementation (~12kb .text on ARMv7/x86) for TVM. -* Add AVX512VNNI support for TVM (#3388) -* Enable miopen Group Convolution (#3987) -* Minimal runtime (~12kb .text on ARMv7/x86) for subset of TVM models (#3567) -* [RUNTIME] Separate runtime related contrib into runtime/contrib (#4207) -* [topi] add ARM v8.2 udot (uint8) support (#3978) -* [codegen] Add multiple operands and function support when using fp16 compilation (#4056) -* [TOPI] Added support for Mali Bifrost target (#4047) -* [topi] enable fp16 sort for arm (#4084) -* Add OpenOCD Low-Level Device (RISC-V Support) (#3756) -* Add wave 32 bc for AMD ROCm backend (#3984) -* [RUTNIME] Support C++ RPC (#4281) -* [TOPI][OP] Support Faster-RCNN Proposal OP on CPU (#4297) -* [TVM][RUNTIME] A minimum example to generate external library wrappers for DSOModule (#4280) - -### Language and Architecture -* Support custom datatypes (#2900) -* Add the acc16 intrinsic support (#3081) -* Handle float16 constants & fix BatchNorm (#3260) -* Structural hash - incorporate the var type into its hash (#3267) -* Relay C++ Build Module (#3082, #3144, #3174) -* Enable decorating python class to be a Relay Pass (#3364) -* Make Partial Eval support interprocedural optimization and termination check. (#3033) -* Introduce feature manager to Relay. (#3236) -* Use Relay parser to define the Relay prelude (#3043) -* Mechanism to detect incomplete expression match in Relay (#3203) -* EQ/NE operators support for StringImm expressions (#3283) -* Mechanism to detect incomplete expression match in Relay (#3203) -* Introduce CanonicalizeCast pass to formally reduce memory overhead introduced by fused cast operations (#3280) -* Support overloading comparison operations in Relay (#3168) -* Mac count: provide a pass to calculate the number of multiply-accumulate operations in a network (#2609). - - support for `conv_2d_transpose` (#3469) - - [Relay][Pass] Count MAC for BatchMatMul (#4157) - - Detect depthwise conv2d in `mac_count` pass (#3083) -* Add Tuple pattern (#3596) -* Text format support for ADTs and prelude (#3863, #3939) -* Add new IR pass CombineParallelDense (#3862) -* Add support for `EQ` op in the deduce bound and the loop partition (#3775) -* Introduce base-class IRMutatorWithAnalyzer (#3969) -* Define more standard global functions in the prelude of relay program, includes foldr1, hd, tl, nth, list update (#2928, #2917, #2771, #2866) -* Add SkipVectorize pass (#3222, #3228) -* [Relay][Pass] Add pass to remove unused functions in relay module (#4334) - -### Symbolic shape enhancement -* Add shape function for symbolic shape. It enables certain cases for broadcast with symbolic shapes. (#3606) -* [tvm][any] broadcast with values other than one (#3967) -* Symbolic shape support (broadcast op #3389) -* Support reshape for dynamic shape in tf converter (#4185) -* Runtime Shape Functions (#4179) - -### Language and Architecture -* An optimization pass to eliminate expressions which have the same functionality and same inputs (#2639). -* Refactor text printer to add stream-like API and FunctionType support (#2605, #2882) -* Build a scaffold for structured error handling (#2838). The new mechanism detects and rewrites error messages so that c++ and python stack trace are unified and not redundant. Guideslines and conventions for error handling is also discussed. -* Higher order reverse mode automatic differentiation that work with control flow (#2496) -* Integer arithmetic analyzers, includes modular set analysis, const integer bound analysis and rewrite simplifier (#2904, #2851, #2768, #2722, #2668, #2860) -* Improve operator fusion for TupleGetItem in relay (#2914, #2929 -* Compute FLOP of autotvm template for int8 models (#2776) -* Common subexpression elimination pass in Relay (#2639) -* Improve quantization in Relay (#2723) -* Refactor `build_func` in measure module of autotvm to better support cross compiler (#2927) -* Quantize all fields of concatenate (#2913) -* Remove stale verilog generator (#2964) -* Improve Relay printing (#2984, #2881, #3030, #3041) -* Add `min_num_branches` option in CombineParallelConv2D (#2961) -* Add `expr_visitor`, fix `expr_functor` exponential blowup problem (#2988) -* Support Deriving channels when it is not provided in AlterLayout. (#2972) -* Enhance BoundDeduce algorithm (#2795) -* Enhance loop partition algorithm (#2956) -* Better tuple fusion implementation (#3092) -* Enhance fusion rule that starts from elemwise and broadcast (#2932) -* Remove `on_device` op after annotation in heterogeneous pass (#3204) -* Improve canonical and rewrite simplifier (#3132, #3149) -* Capture constant external python variables in hybrid script (#3157) -* Remove Peano nats from the prelude (#3045) -* Macro to define NodeRef methods, constructor style example (#3224) -* Consistent RAII scoping API (#3231) -* Register all operators' attributes in Python (#3175) -* Add module supoort in relay.build (#3424) -* Relay pass infrastructure improvement (#3319, #3336, #3430, #3353) -* Migrate Relay passes to pass manager (#3323, #3289, #3251, #3406) -* Improve heterogeneous annotation by using visitor (#3261) -* Support export ADT value in Python (#3299) -* Extend TensorComputeOp to allow scalar inputs (#3300) -* Transitioning low-level IR away from HalideIR (#3533, #3535) -* Tags for ADT constructors (#3369) -* IR dumping for debugging (#3493) -* Pretty printer and parser roundtrip (#3460, #3536) -* Relay type checking (conv2d weight dimension #3511, any shape #3221) -* Relay Module enhancements (remove free variables #3476) -* LLVM DWARF debug information (#3420) -* Printer for Layout/BijectiveLayout (#3582) -* Type inference escape hatch (#3571) -* Making iterators compatible with constructors of STL containers (#3624) -* Moving Conv, Dense, Concatenate InferTypes to header (#3783) -* Simplify casts of constants 0 and 1 (#3758) -* Conditionally replace reduction init axis. (#3408) -* Improve Partial Evaluator (#3749, #3703) -* Strict mode in Relay pattern matching (#3620) -* Quit and clean when TVM is interrupted (#3640) -* Make Type Relation catch more errors (#3899, #3699) -* Refactor the way we interface between different modules of Relay (#3906) -* Introduce `schedule_injective_from_existing` and unify external schedules for all targets (#3983) -* [NODE][REFACTOR] Refactor reflection system in node. (#4189) -* Unify node system and object (#4161, #4115, #4128) -* [Relay][Refactor] Rename Datatype to ADT (#4156) -* [Relay] fix exponential blowup in interpreter (#3559) -* [Relay] Fix memory leak in the interpreter (#4155) -* [rpc] use callback func to do send & recv (#4147) -* Add `lift_if_then_else` pass to improve loop partitioning (#3865) -* Decrease the complexity of CalcDep from exponential to linear (#4053) -* [IR] Make iterators compatible with constructors of STL containers (#3624) -* [Relay][Pass] Avoid FoldConstant folding some ops (#4245) -* [Relay][Prelude] More dtypes support in `tensor_t` (#4233) -* [NODE][REFACTOR] Rename IRFunctor->NodeFunctor, use func pointer (#4247) -* [RUNTIME][REFACTOR] Use object protocol to support runtime::Module (#4289) -* [CodeGen] Add build config option `disable_assert` to control whether to generate assert. (#4340) - -### Arithmetic Analysis -* Formalize Integer Arithmetic Analysis (RFC: #2588). It is aiming to perform better context-dependent analysis, bound analysis, centralized arithmetic logic and arithmetic simplification. (#3272, #3463, #3464, #3368, #3503, #3504 , #3502, #3479 , #3568) -* Introduce FloorDiv/Mod, TruncDiv/Mod, and IndexDiv/Mod for better arithmetic simplification (#3976, #3986, #4000, #4014, #4008, #4028) -* [ARITH] Use floordiv for the deduce bound (#4025) -* [Simplifier] Rewrite simplification rule to eliminate unnecessary conditionals. (#4076) - -### Runtime and Backend Support -* Provide error msg for failure function call in tvm4j (#2967) -* Expose backtrace symbols in Debug mode (#3001) -* C++ GraphRuntimeCodegen, Deprecate Python2 (#2986) -* Ensure interpreted functions can take values that are not TensorValues (#3015) -* Make OpenCL runtime Compatible with OpenCL2.0 (#2897) -* Handle INF and NAN in CUDA and OpenCL (#3194) -* Update debug graph runtime for more precise layerwise timing (#3232) -* ROCM support (llvm printing #3662, ld.lld finding #3664, save to file #3665) -* Threadpool: make `spin_count` configurable (#3577) -* RPC worker children termination (#3669) -* Vulkan runtime reimplementation (stream approach) (#3849) -* Vulkan backend supports Call::reinterpret and vectorized comparison (#3795) -* Support MKL on Windows (#3837) -* Vulkan IR builder (bool to float #3513) -* Force `code_object_v2` for amd gpu backend (#4099) -* [Codegen][cuda-fp16] fallback to fp32 simulation when cuda arch < sm53 (#4268) -* Fix and refactoring for AMD gpu backend (#4305, #4321, #4341, #4342) -* [Debugger] Sorting op-time breakdown for quicker analysis. (#4352) -* [nvcc] enable multiple arch in one fatbin (#4377) -* [RUNTIME] Move module export to the function level. (#4405) - - -### Frontend and User Interface -* Relay now supports saving and loading parameter dictionaries. (#2620) -* Add `max_num_threads` to Hybrid Script, which allows users to get max number of threads for GPU targets ([#2672](#2672/)). -* Improvements for tensorflow frontend (#2830, #2757, #2586), includes decompiling tf control flow (#2830) -* Improvements for mxnet frontend (#2844, #2777, #2772, #2706, #2704, #2709,, #2739) -* Improvements for keras frontend (#2842, #2854) -* Improvements for DarkNet frontend (#2673) -* Improvements for ONNX frontend (#2843, #2840) -* Better profile result dump in Chrome Tracing format (#2922, #2863) -* Unified error handling in NNVM and Relay frontends (#2828) -* Improve NNVM to Relay conversion (#2734) -* Remove `input_0d_mismatch` special handling for TF Frontend(#3087) -* Bumped ONNX version from 1.1.0 to 1.4.1 (#3286) -* Simplify parameter handling in Tensorflow frontend (#2993) -* CoreML improvement for image scaler and padding (#3800) -* Clean up TensorFlow frontend (#3710) -* Darknet: Solve tvm parsing darknet resnext failure bug (#3778) -* Frontend changes `get_workload` - (#3483) -* [TF][Relay][Op] Pass module when infer shape (#4287) - -### AutoTVM -* Support override in `register_topi_compute` and `register_topi_schedule`. (#3292) -* Improve graph tuner dealing with Tuple. (#3649) -* Add AutoTVM template for conv2d Intel int8. (#3955) -* Add AutoTVM template for dense on CUDA. (#3923) -* Add AutoTVM template for conv2d on Intel graphics. (#3839) -* Optimizing autotvm task extraction speed. (#4138) -* [AutoTVM] Add `batch_matmul` to tunable operations. (#4242) -* Selecting tuning templates when extracting task. (#4338) - -### Performance Improvements -* Enable AlterOpLayout pass for x86 on Relay (#2585). It is essential to get decent performance for CNN-based model on Intel CPUs. -* Better intrinsic matching for x86 CPU and ARM CPU, includes variants of vcvtph2ps and vmlal.s16 (#2925, #2748). -* Improve injective schedule for ARM CPU(#2801) -* Core functionality for Graph tuner (#2184) -* Fast tanh implementation (#3255) -* Improve multi-batch conv2d on x86 (#3308) -* Improve `non_max_suppression` and `get_valid_counts` for CPU (#3305) -* Improve `roi_align` performance for CPU (#3296) -* Improve `nms` and `get_valid_count` performance (#3282) -* Graph tuner for multiple subgraph (#3490) -* For sparsity, fast transpose for square CSR matrices has been now merged, which is a good start point for more general sparse type support. -* Reduce `set_input` and `set_input_zero_copy` overhead (#3805) -* Parallelize batch axis for ARM (#3931) -* Support cuBLAS BatchMatMul (#3936) -* Add AVX512VNNI support for TVM (#3388) -* Enhance tuning space of split (#3949) -* Enable miopen transpose convolution and fp16 support (#3952) -* Improve `conv2d_transpose` schedule on X86 and CUDA (#3948) -* Expose llvm.nearbyint intrinsic (#4001) -* [TOPI][X86] Pool operator parallel support. (#4090) -* Improve layout for several operators (#4103, #4040, #4080) -* [Relay][VM] Fix constant folding issue in VM compiler (#4077) -* [relay][vm] Reuse allocated device memory (#4170) -* [Runtime] Enable option to use OpenMP thread pool (#4089) -* [PERF] Parallelize reduction for CPU (#4158) -* [TOPI] Tunable Template for Conv2D HWCN on CUDA (#4168) -* [TOPI] Add valid auto tvm for Intel Graphics (#4078) -* [TOPI] FIFO buffer op, to accelerate sequence modeling with dilated convolutions (#4039) -* TensorCore Support using Intrinsic (#4136) -* Auto TensorCore CodeGen (#4234) -* Use cblas for dense and `batch_matmul` (#3787) -* Update TOPI softmax compute and CPU schedule (#3680) -* [VTA] Performance optimize, remove unnecessary contigious memory use. (#4246) -* [TOPI][AlterOpLayout][ARM] Enabling NHWC to NCHW layout transformation. (#4249) -* [PERF] Parallelize reduction for CPU (#4158) -* [ThreadPool] Solve thread transitions issue (#4344) - -### Documentation -* Tutorials for deep learning frameworks support in Relay. -* Tutorial for running AutoTVM with Relay (#2594). -* Document for Algebraic Data Types (#2575). -* Move NNVM tutorials to Relay (#2783, #2785, #2766, #2693) -* Documentation on operators (#2761) -* Add gradient operator tutorial docs (#2751) -* Add compiler pass tutorial docs (#2746) -* Add Android Tutorial (#2977) -* Developer documentation for InferBound pass (#3126) -* Add missing targets to `target_name` documentation (#3128) -* Various documentation improvements (#3133) -* Add VM doc (#3188) -* Update documents for TSim (#3409, #3318, #3302, #3343, #3206) -* Improve tvm4j document describing LLVM support (#3404) -* Tutorial migration to Python3 (#3498) -* Android RPC README (#3500) -* Documentation for Relay opcode (#3522) -* Tutorial for pass manager (#3515) -* Minimum version of Python in docs (#3588) -* Relay pass infra (#3583) -* X86 Autotune tutorial improvements (#3609) -* YOLOv3 tiny Darknet tutorial (#3674) -* SSD doc to avoid confusion (#3677) -* Tutorial: Build a Graph Convolutional Network on TVM (#3681) -* Add docs for analysis namespace (#3985) -* [tutorial] Relay pass infra tutorial (#4083) -* [DOCS] Add TensorFlow frontend docs (#4154) -* Tutorial: update Building a Graph Convolutional Network tutorial (#4060) -* [Docs] Add dependency of compilation with LLVM (#4117) -* [Documentation]Fix example code in comment of `tvm.build_module.build()` (#4195) -* TSIM: add virtual memory support to examples (#3868) -* Relay pass infra tutorial (#4083) -* Fix the TF tutorial to run against TF2.0 and TF1.x (#4104) -* Add `topi.nn.fifo_buffer` to TVM doc (#4343) -* License statement (#4345, #4359, #4401, #4402, #4408, #4409, #4410, #4414, #4431) - -### Build and Test -* Increate the robuteness of CI test (#2841, #2798, #2793, #2788, #2781, #2727, #2710, #2711, #2923) -* Improve conda build (#2742) -* Add caffe2 nnvm frontend to CI (#3018) -* Use bridge network and expose port on macOS when launch docker image (#3086) -* Run DarkNet tests (#2673) -* Add file type check (#3116) -* Always run cpptest during build to ensure library correctness (#3147) -* Handle more file types in ASF header (#3235) -* Add `test_forward_ssd_mobilenet_v1` to `tflite/test_forward` (#3350) -* Add Azure build pipeline (#3458, #3459) -* Update ci-gpu to v0.52 (#3374) -* Enable more visible symbols by default (#3365) -* Separate out legacy as a stage in CI (#3337) -* Simplify build script, remove python 2 support (#3419) -* Ignore rust cargo lock files in rat (#3314) -* Improve CUDA Conda package build (#3281) -* Update CMakeLists.txt to be more flexible to find the third parties libraries (#3354) -* Docker update conda package (#3344), requests and pillow (#3495), Android demo (#3499), rat install (#3527), ARM support (#3546), LLVM (#3590) -* Relay-to-Python testing (#3156) -* Code refactoring/remove (#3523, #3667) -* Zero-rank testing (#3612) -* CMake compilation (#3611, #3650, google test #3628) -* Standalone wheel build for TOPI (#3657) -* Fixing performance issues in PassUpDomain when fusing and splitting axes (#3073) -* conda recipe (#3791) -* Allow users to specify download directory (#3803) -* Update docs for installation for CUDA (#3832) -* Update `hybrid_script.rst` (#3799) -* Acknowledge Halide attributions (#3824) -* Add psutil dependency (#3780) -* Temporary disable rust test (#3809) -* Solve occasional CI issue when pad value is all 0 (#3801) -* Towards TSIM CI testing (#3704) -* Use pip3 for python3 (#3742) -* Update docker image `ci_cpu,i386` to include verilator (#3738) -* Remove sccache from Rust install (#3728) -* Update dmlc-core to the latest commit (#3716) -* Update GPU docker (#3709) -* Add an option to build with -pthread (#3671) -* Add DGL to `{ci_gpu, demo_cpu, demo_gpu}` docker images (#3692) -* Use pytest instead of nosetest (#3524) -* Enable NHWC of `relay.testing.mobilenet` (#3886) -* Add .hsaco save/load for `tesnor_expr` Tutorial (#3852) -* Support LLVM trunk (#3907) -* Remove GTest cmake flag from install docs (#3953) -* Allow `USE_LLVM` to take extra arguments (#3954) -* [CI] Pin NNPack pthreadtools version (#4152) -* [TOPI] Fix flaky testcase for check round (#4211) -* [CI] Move gpu docker binary to cuda10 (#4229) -* [CI] use llvm9 for the gpu tests (#4224) -* [CI] Update GPU docker to cuda10 (#4228) -* [Relay] Install Relay Prelude program in package install (#4227) -* [relay] use `time_evaluator` for measurement (#4191) -* [Relay] Improve build error when no lowered funcs are produced (#4132) -* [llvm] switch to use Align for llvm trunk (#4051) -* [CUDA] Update `have_int8` condition to run on compute capability 7.x devices (#4214) -* [DOCKER] Pin torchvision==0.4.1 (#4140) -* [DOCKER] torch install depends on future package (#4098) -* [CodeGen] Disable -mfloat-abi hard option for LLVM < 6.0 (#4071) -* Add a python how to example of deploying tvm module with tvm runtime only (#4094) -* Hide symbols from dependent libraries if `HIDE_PRIVATE_SYMBOLS` is ON. (#4041) -* [BUILD] Disable utvm standalone runtime by default (#4240) -* Fix TSIM compile error in Linux (add missing -fPIC flag) (#3876) -* Add scalafmt and format existing scala codebase (#3880) -* Update TFLite wheel version to 1.13.1 (#3435) -* Remove PEP498 f-string new feature for support python3.5 (#4250) -* Require LLVM >= 9 for AMDGPU backend (#4253) -* Rename ml.dmlc.tvm to org.apache.tvm (#4290) -* [Test][TF][Relay] Fix argument preparation for vm test mode (#4296) -* Add test for the `qnn_add` operator (#4282) -* [CI][DOCKER] Add ONNX runtime dep (#4314) -* [CI][DOCKER] Upgrade image to include onnx runtime (#4313) -* [CI] Set workspace to be per executor (#4336) -* [Build][Windows] Fix Windows build by including cctype (#4319) -* [Contrib] Add MKL DNN option (#4323) -* [Test][Relay][Pass] Add test case for lambda lift (#4317) -* Remove Python imp module as it is deprecated (#4275) -* Bump up CUDA log version in tophub.py (#4347) -* Add rule for clean in APPs (#4364) -* [Relay tests] Temporary Attr Update for Order-Independent Testing (#4357) -* [CI] Avoid content-length request in test data download (#4375) -* Compare all outputs in TFLite `test_forward_ssd_mobilenet_v1` (#4373) - -### Bug Fixes -* [RELAY] Fix `get_int_tuple`. (#2691) -* [ARITH] Select support for integer set analysis. (#2687) -* [Relay] Fix error in ANF (too agressively inline atomic expression and create free variable). (#2665) -* [Hybrid Script] Fix name conflict and attached scope problem. (#2649) -* [Relay] Fix ANF for reference and pattern matching. (#2637) -* [Relay] Fix fusion bug when call symbol that is not an operator. (#2630) -* Fix missing header file. (#2629) -* [Relay]Fix the bug in heterogeneous annotation which mistakenly steps into the fused op. (#2622) -* [AutoTVM] Fix incorrect localhost usage in RPC mode. (#2619) -* [NNVM] Fix incorrectly getting layout attribute as a tuple. (#2610) -* [Relay] Fix mutating IF expression. (#2601) -* [Tutorial] Fix downloaded file path. (#2590) -* [Storage] Fix int32 overflow bug when input is big. (#2580) -* [NNVM] Fix non-identity problem for FInplaceIdentity. (#2572) -* [Golang] Fix compilation error. (#2558) -* [Tensor Expression] Fix missing reduction init predicates. (#2495) -* [Relay] Fix missing argument for NCHWc in Relay. (#2627) -* [TOPI] Fix `Nms_ir` data race. (#2600) -* Fix `compute_inline` with multiple outputs (#2934) -* [TEXPR][PASS] Fix thread all reduce to avoid write after read hazzard (#2937) -* [FRONTEND][TENSORFLOW] bug fix for tensorflow official slim models. (#2864) -* [FRONTEND][ONNX] Some bug fixes and Shape operator fixed for relay. (#2850) -* Turn on `USE_SORT` by default (#2916) -* [DOCKER] Upgrade ci-cpu to latest v0.50 (#2901) -* [TESTS] Import script robustness (set -u) (#2896) -* [Relay] Fix name of bias in testing.mlp (#2892) -* [TESTS] Improve script robustness (#2893) -* Add dense schedules to `__init__` for cpu (#2855) -* [Apps] [howto_deploy] fix cxx-flags order and build directory (#2888) -* [Relay] Add TVM_DLL for ANF/GNF conversion #2883 -* [Relay] Fix Relay ARM CPU depthwise spatial pack schedule alter op layout issue. (#2861) -* Fix setting up hints for getaddrinfo (#2872) -* Add missing sgx includes (#2878) -* Fix error reporting for missing axis (#2835) -* Fix an OrderDict initilization bug. (#2862) -* Fix Xcode 10 metal compile error (#2836) -* tvmrpc: Fix includes (#2825) -* Fix `init_proj.py`: Team ID expected (#2824) -* [DOCKER] Fix git clone failure. (#2816) -* upgrade java style-check due to CVE-2019-9658 (#2817) -* [Relay][Quantization] Fix duplicated simulated quantization (#2803) -* [Bugfix] Repeat and tile bug fixed, relay tests added (#2804) -* Fix caffe2 relay frontend (#2733) -* Fix a bug in nnvm to relay converter. (#2756) -* Ensure loop count is a constant before trying to unroll. (#2797) -* xcode.py: Decode bytes before output #2833 -* [WIN] Fix a bug in `find_llvm` when specify llvm-config (#2758) -* [DLPACK] fix flaky ctypes support (#2759) -* [Bugfix][Relay][Frontend] Fix bug in mxnet converter for `slick_like` (#2744) -* [DOCS] Fix tutorial (#2724) -* [TOPI][Relay] Fix default `out_dtype` for `conv2d_NCHWc` and Relay (#2702) -* [Relay] fix checkwellform (#2705) -* fix prelu, now can use on 2d input and add one test (#2875) -* [CODEGEN][OPENCL] Fix compile error about ternary expression. (#2821) -* Fix Placeholder issue (#2834) -* Fix makedirs() condition in contrib (#2942) -* Add missing #!/bin/bash directive (#2951) -* Bilinear resize bug fix from PR #2777 (#2857) -* Fix `bias_add` default axis (#2829) -* Remove empty ty.rs (#2958) -* fix undefined reference to dlopen, etc (#2957) -* Removed deprecated `std::unary_function` (#2962) -* Add output format to ndk build func (#2999) -* Fix java checkstyle version (#2998) -* Fix relay invariant error message (#3011) -* Fix for caffe2 nnvm frontend (#2996) -* Fix rust resnet example (#3000) -* Fix x||!x for comparisons in rewrite simplifier (#3029) -* Fix BatchMatMulRel typerelation (#3032) -* Update dmlc-core, fix default ctors of NodeEntry (#3017) -* Fix Fuse (#3035) -* Fix PostOrderVisit signature (#3048) -* Fix winograd nnpack fp16 (#3046) -* Fix some typos (#3063, #3112) -* Fix `group_conv2d` unit test (#3113) -* Fix bug in ONNX importer (#3084) -* Fixing a doc nit (#3123) -* Fix type code error for StringImm (#3050) -* Fix bug of wrongly generated `device_map` (#2990) -* use `unordered_map` instead of map in ANF (#3024) -* Fix PRelu layout in Relay (#3013) -* Minor addition to graph runtime debug (#3129) -* Fix mali conv2d performance regression (#3131) -* Fix dense autotvm template registration in ROCm (#3136) -* Fix `conv2d_transpose` (#3138) -* Fix python lint warnings (#3145) -* Some fixes for golang latest version compiler #3119 (#3182) -* Add more syncs to fix flaky test caused by `get_valid_counts` (#3151) -* Fix AlterLayout Pass (#3155) -* Fix a multithreaded bug in llvm LazyInitJIT (#3158) -* Fix a tensorflow test bug. (#3165) -* Fix concat for ARM (#3061) -* Handle vectorize for LE statement (#3137) -* Raise exception `group_conv2d_nchw` not supported (#3195) -* Quick fix of VTA FPGA Toolchain Installation documentation (#3196) -* Check file exists before removing it (#3178) -* Fix a bug of flatten in ONNX to Relay converter (#3180) -* Fix converter where initializers were not registered as nodes (#3143) -* Fix bug in cast to bool (#3207) -* Hotfix `build_module` creation (#3198) -* Fix sort changing original input data issue (#3212) -* Fix bug in vta runtime DepPop function (#3208) -* Fix resize nearest with fractional scaling (#3244) -* Fix `vta_conv2d` crash issue after change `vta_config.json` (#3213) -* Fix a memory leak in OpManager (#3263) -* PkgConfig cause crash in PYNQ board due to link library (#3257) -* Fix Error messages in tflite.py (#3320) -* Fix typos in docs and comments (#3309, #3376) -* Bugfix min/max const canonicalize rule (#3386) -* Return module from frontend for autotvm (#3401) -* Fix constant and reshape in ONNX (#3387) -* Default verilator location fix (#3324) -* Fix autodiff for conditional expression (#3453) -* Gramatical improvements to `tensor_expr_get_started` (#3330) -* Fix AutoTVM data structure bug (#3462) -* Fix MXNet RNN without providing state initialization as input (#3326) -* Fix flaky test on topk and quantize pass (#3362) -* Add VTA PYNQ `metal_test` bitstream program logic and fix compilation issue. (#3400) -* Fix VTA function Vivado Compile Error. (#3375) -* Fix VTA DRAM functionality issue. (#3278) -* Fix reshape precompute and type error in ONNX frontend (#3230) -* Fix interpreter argument conversion for tuples. (#3349) -* Fix code generation for packed functions + tuples in VM (#3287) -* Fix memory leak in Relay interpreter (#3448) -* Fix x86 depthwise conv2d `alter_op_layout` (#3264) -* Create closure object for GlobalVar (#3411) -* Fix getting global var in prelude (#3405) -* Fix rfactor bugs which related to predicate and loop partition (#3382, #3444) -* Fix the bug in AutoTVM where SimulatedAnnealingOptimizer sometimes finds useless candidate (#3413) -* Fix name conflict in PartialEval (#3402) -* Fix int bound analysis bug for modular (#3288) -* Check arg positiveness for modular rules (#3279) -* Fixes failure of `sum` and `all` on `axis=0` (#3422) -* Fix package path in tflite test (#3427) -* Fix Windows build (#3429) -* Fix `LSTMBlockCell` in Tensorflow frontend (#3410) -* TF fix where output index is ignored (#3622) -* Runtime fix for custom datatypes (#3471) -* Relay build module warnings (#3452) -* Relay partial evaluator (#3482) -* Pynq AutoTVM tracker (#3497, #3578) -* A normal form test (#3525) -* Lint issue (#3519, #3615 ) -* Any shape testing (#3528) -* Android `posix_memalign` (#3532) -* Quantization `add_rewrite` and UnifyDTypeScale (#3534) -* Bound inference fix (#3526) -* Tensorflow NCHW data format (#3514) -* First order gradient (#3550) -* JS load module example (#3556) -* Build error (#3552) -* Relay VM debug statements (#3565) -* C++ lambda expr (#3570) -* Handling of tempdir if subprocess is killed (#3574) -* Remove tabs in Chisel source (#3603) -* Relay VM DataTypeObject (#3604) -* Removing prints (#3616) -* Average Pool2D Bug (#3607) -* Missing header in `cuda_device_api.cc` (#3621) -* Tensorflow frontend fix where `output_shape` is None (#3632) -* Winograd accuracy fix (#3644) -* Fix comment (#3646) -* Zero-input op fix for recursive traversals (#3623) -* Python 3.5 compatibility (#3675) -* Fix infinite recursive `device_api.ext_dev` call in VTA. (#3843) -* Fix `depth_mult` for TensorFlow frontend (#3676) -* Fix database APIs for AutoTVM (#3821) -* Fix axis of softmax in Keras (#3834) -* Fix VTA TensorLoad module (#3841) -* Fix inconsistent python/cpp API behavior for `if_then_else`, power (#3829) -* Fix code comment of operators in ONNX frontend (#3830) -* Added repo for llvm-9 to fix missing dependency issue (#3826) -* Fix typo in Relay text parser (#3785) -* Fix tvm const warnings (#3817) -* Add gfx906 bc (#3808) -* Fixed onnx test failures when run on a cpu backend (#3764) -* Fix ArgBinder assert order (#3794) -* Fix for NoneType Target for quantization (#3792) -* Fix out-of-date quantization realize (#3790) -* Fix Qnn concatenate InferType (#3779) -* Fix dense tuning (#3768) -* Fix `visit_pattern` in ExprMutator (#3769) -* Fix Chisel Scala style (#3765) -* Fix some pass docs (#3767) -* Fix mistype in rpc tutorial (#3763) -* Fix tvm.scan follow by tvm.compute segfault (#3723) -* Fix the potential index overflow in where operator (#3751) -* Revert `compile_cmd` kwarg name change (#3746) -* Update tophub (#3752) -* Fix typo in `ir_pass.h` (#3741) -* Bug fix for VME Shell (#3737) -* Fix missing apt https transport support (#3735) -* Take zero extent loops as NoOp and remove it (#3724) -* Fix mxnet converter for hybridblock and add `div_sqrt_dim` (#3701) -* Fix partial eval unit test name (#3719) -* Fix conv2d schedule code (#3648, #3717) -* Remove thread related headers (#3713) -* Fix FunctionPass (#3712) -* Export tvm::relay::OpRegistry::OpRegistry (#3711) -* Fix Metal reinterpret (#3706) -* Fix `gather_nd` in Relay (#3442) -* Fix error in partial evaluator (#3693) -* Align the naming rule for OpAttributeUnImplemented (#3695) -* Enable the sparse schedule (#3651) -* Fix typo names in Caffe2 frontend (#3685) -* Make tests multi-process friendly. (#3683) -* Fix typo in README.md (#3684) -* Fix doc rendering (#3897) -* Add test script starter command to document (#3993) -* Add type solver unit tests for unifying quantified funcs (#3947) -* Change Vivado install instructions to version 2018.3 (#4003) -* Add a link to the defining network description of auto-tuning tutorial (#4023) -* Additional MXNet Convolution and Deconvolution tests (#4026) -* Adding support to check if an attribute is present or not without having to get the value (#3957) -* Fix parser for cast. (#3873) -* Fix operator fusion for multiple output (#3871) -* Remove extern C warpper for cuBLAS (#3877) -* Fix int32 range overflow by using int64 (#3870) -* Remove duplicate resize (#3902) -* Fix blas cmake for mac os (#3898) -* Add another MKL name alias for MKL installed through pypi (#3853) -* Numpy compatible dtype inference for `tvm.convert` and `tvm.const` (#3861) -* Remove incorrect check for LLVM in C codegen test (#3921) -* Fix exponential blowup in interpreter (#3559) -* Fix CUDA int8x4 vectorize (#3928) -* Make buffer auto broadcast independent to the order of input args (#3956) -* Fix benchmark layout in graph tuner (#3926) -* Fix Android Demo LLVM version (#3962) -* Cast filepath arguments to string (#3968) -* Fixes "common" sub crate using nightly and master (#3965) -* Changes to make tensorize work. These changes also fix the previously broken test. (#3981) -* Remove FLOP computation when calling 3rd party library (#4005) -* Use a more intuitive way to limit the #ops in a group (#4018) -* Add more `pad_mode` support for onnx converter (#4029) -* Impose a max op limit to the op fusion pass (#4002) -* Fixes issue with CPP enums (#4019) -* Int64 shape handling for outputs. (#4031) -* [PYTHON] Fix installation for generated grammar (#4223) -* [Bugfix] Fix target host for vm compiler (#4057) -* [Fix][VM] Fix VM invoke with `set_params` (#4079) -* [Fix] Fix a few bugs when dtype is fp16 (#4088) -* [Relay][Frontend][TF] Fix Size operator (#4175) -* [cmake][ANTLR] Support setting path to ANTLR jar (#4176) -* Fix infer type of kernel in dense. (#4125) -* [Relay] Fix match case in Python-side expr functor (#4037) -* Split `adaptive_pool2d_avg` into sum and div (#4186) -* [AutoTVM] Fix Split Factors when `no_tail` is off (#4044) -* Fix extent one for the `post_stmt` in loop partition (#3734) -* [TOPI] Fix bug in intel graphics auto tune (#4093) -* [ARITH] Fix lowering of `floormod(x, y) != 0` (#4127) -* [ARITH] Fix the rule `y < x && x <= y` (#4220) -* [Bugfix][TF] reset graph after getting tag of savedmodel (#4055) -* [Fix] Fix the logic of the number of nodes checking in op fusion (#4074) -* [VTA] hotfix for de10-nano driver (#4081) -* Fixing tensor not found issue in bitserial operator (#4095) -* Fix wrong `n_trial` number in autotvm tutorials' progress bar if `n_trial` is larger then config space. (#4070) -* [PATCH] Fix undefined `__floatdihf` in libtvmruntime.so on aarch64. (#4119) -* [ARITH] Fix lowering of FloorMod (#4236) -* [Relay][Frontend][Tensorflow] Fix GatherV2 (#4238) -* Fix typing.Deque import error for Python 3.5 (#4254) -* [VTA] Hotfix for padded load test in Chisel VTA (#4264) -* [Contrib] Fix error message at `callback_get_section_size()` (#4221) -* [TOPI] Fix bug in Winograd on CUDA (#4260) -* AutoTVM: Fix hang/crash issues on feature extraction (#3689) -* [TOPI][CUDA] Fix Winograd Kernel Size Support (#4276) -* [Relay][Frontend][Tensorflow] Fix type assignment for 'tf.range' operator (#4294) -* Fix incorrect call to Unicode Win32 InetPton (#4306) -* [Relay][Frontend][Keras] handle `batch_norm` op params well (#4310) -* [VTA] fix error when `memory_id` is `VTA_MEM_ID_OUT` (#4330) -* [Doc][fix] fix sphinx parsing for pass infra tutorial (#4337) -* [Codegen] remove fp16 function override for cuda (#4331) -* [TFLite] Fix Prelu unified shape error (#4326) -* [Relay][Frontend][TF] Fix transpose when axes is not a param (#4327) -* [VTA] Bug fix for padded load with large inputs (#4293) -* Fix inconsistent operator tag name (#4134) -* Fix for a specific case when loop partitioning with indivisble. (#4243) -* Send list as argument to `schedule_conv2d` (#4358) -* [Docker] Fix TVM folder name for installing on Android and OpenCL. (#4363) -* Fix TFLite Reshape assert (#4320) -* [Relay][Frontend][TF] Fix slice when begin or size is not Const (#4372) -* Fix compilaton of bfloat16 on Windows (#4415) - -### Known Issues - -* The performance of Relay VM is not good enough on GPU, due to memeory allocation overhead which will be resolved later. -* TFlite rounding vs tvm rounding causing differences in accuracy and potentially off by 1 errors. For reference #3900 -* TFlite pre-quantized network support is still a work in progress and the project would welcome further contributions. -* TSIM build requires `python` command exist on the host. See [forum discussion](https://discuss.tvm.ai/t/vta-build-failure/4790) for details. -* Tensorflow control flow has not been fully supported in the frontend converter. -* `topi.floor_div` is inconsistent with floor division semantic when result number is close to an integer. - - -### Depreciations -* Deprecating python2 support in the master branch and following release (v0.6). (#2994, #2986) -* NNVM is deprecated and will be removed in a future version. (#4333, #4368) - ## 0.5 This release features several major improvements. Some of the highlights are: Arbitrary bits quantization algorithm; High-level auto-differentiable programming IR -- Relay. @@ -1172,5 +279,3 @@ We also make major improvements in supporting new backends: ROCm for AMDGPUs and - DLPack integration support - AOT and module system - Basic code structure ready. - - diff --git a/apps/benchmark/util.py b/apps/benchmark/util.py index c7de3a1dda31..0af1669ec364 100644 --- a/apps/benchmark/util.py +++ b/apps/benchmark/util.py @@ -34,8 +34,8 @@ def get_network(name, batch_size, dtype='float32'): Returns ------- - net: relay.Module - The relay function of network definition + net: nnvm.symbol + The NNVM symbol of network definition params: dict The random parameters for benchmark input_shape: tuple diff --git a/apps/bundle_deploy/Makefile b/apps/bundle_deploy/Makefile index 57e484379a4e..8550a0ee1f00 100644 --- a/apps/bundle_deploy/Makefile +++ b/apps/bundle_deploy/Makefile @@ -16,15 +16,15 @@ # under the License. # Makefile Example to bundle TVM modules. - TVM_ROOT=$(shell cd ../..; pwd) +NNVM_PATH=nnvm DMLC_CORE=${TVM_ROOT}/3rdparty/dmlc-core -PKG_CFLAGS = -std=c++14 -O2 -fPIC\ +PKG_CFLAGS = -std=c++14 -Oz -fPIC\ -I${TVM_ROOT}/include\ -I${DMLC_CORE}/include\ - -I${TVM_ROOT}/3rdparty/dlpack/include + -I${TVM_ROOT}/3rdparty/dlpack/include\ -PKG_LDFLAGS = -pthread +PKG_LDFLAGS = -L${TVM_ROOT}/build build_dir := build @@ -33,7 +33,7 @@ test: $(build_dir)/demo $(build_dir)/bundle.so $(build_dir)/demo: demo.cc @mkdir -p $(@D) - $(CXX) $(PKG_CFLAGS) -o $@ $^ -ldl + $(CXX) $(PKG_CFLAGS) -o $@ $^ # Serialize our graph.json file. $(build_dir)/graph.json.cc: $(build_dir)/graph.json @@ -44,13 +44,13 @@ $(build_dir)/params.bin.cc: $(build_dir)/params.bin xxd -i $^ > $@ $(build_dir)/model.o $(build_dir)/graph.json $(build_dir)/params.bin: build_model.py - python3 $< -o $(build_dir) + python $< -o $(build_dir) # Build our bundle against the serialized bundle.cc API, the runtime.cc API, and # the serialized graph.json and params.bin $(build_dir)/bundle.so: bundle.cc runtime.cc $(build_dir)/model.o $(build_dir)/graph.json.cc $(build_dir)/params.bin.cc @mkdir -p $(@D) - $(CXX) -shared $(PKG_CFLAGS) -fvisibility=hidden -o $@ $^ $(PKG_LDFLAGS) + $(CXX) $(PKG_CFLAGS) -fvisibility=hidden -o $@ $^ $(PKG_LDFLAGS) -shared clean: rm -r $(build_dir) diff --git a/apps/bundle_deploy/build_model.py b/apps/bundle_deploy/build_model.py index de9e73522ca2..dc4c14b47a01 100644 --- a/apps/bundle_deploy/build_model.py +++ b/apps/bundle_deploy/build_model.py @@ -18,7 +18,8 @@ import argparse import os -from tvm import relay +import nnvm.compiler +import nnvm.testing import tvm import logging @@ -33,24 +34,22 @@ def main(): dshape = (1, 3, 224, 224) from mxnet.gluon.model_zoo.vision import get_model block = get_model('mobilenet0.25', pretrained=True) - shape_dict = {'data': dshape} - mod, params = relay.frontend.from_mxnet(block, shape_dict) - func = mod["main"] - func = relay.Function(func.params, relay.nn.softmax(func.body), None, func.type_params, func.attrs) - - with relay.build_config(opt_level=3): - graph, lib, params = relay.build( - func, 'llvm --system-lib', params=params) + net, params = nnvm.frontend.from_mxnet(block) + net = nnvm.sym.softmax(net) + with nnvm.compiler.build_config(opt_level=3): + graph, lib, params = nnvm.compiler.build( + net, 'llvm --system-lib', shape={'data': dshape}, params=params) + print(graph.symbol().debug_str()) build_dir = os.path.abspath(opts.out_dir) if not os.path.isdir(build_dir): os.makedirs(build_dir) lib.save(os.path.join(build_dir, 'model.o')) with open(os.path.join(build_dir, 'graph.json'), 'w') as f_graph_json: - f_graph_json.write(graph) + f_graph_json.write(graph.json()) with open(os.path.join(build_dir, 'params.bin'), 'wb') as f_params: - f_params.write(relay.save_param_dict(params)) + f_params.write(nnvm.compiler.save_param_dict(params)) if __name__ == '__main__': diff --git a/apps/bundle_deploy/bundle.cc b/apps/bundle_deploy/bundle.cc index 14f0b7edc301..61169f17cf71 100644 --- a/apps/bundle_deploy/bundle.cc +++ b/apps/bundle_deploy/bundle.cc @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -26,9 +26,7 @@ extern unsigned int build_graph_json_len; extern unsigned char build_params_bin[]; extern unsigned int build_params_bin_len; -#define TVM_BUNDLE_FUNCTION __attribute__((visibility("default"))) - -extern "C" { +#define TVM_BUNDLE_FUNCTION __attribute__((visibility("default"))) extern "C" TVM_BUNDLE_FUNCTION void *tvm_runtime_create() { const std::string json_data(&build_graph_json[0], @@ -66,4 +64,3 @@ TVM_BUNDLE_FUNCTION void tvm_runtime_get_output(void *handle, int index, reinterpret_cast(handle)->GetFunction("get_output")( index, reinterpret_cast(tensor)); } -} diff --git a/apps/bundle_deploy/runtime.cc b/apps/bundle_deploy/runtime.cc index 7a116e89fa88..f1c2ba2f54ec 100644 --- a/apps/bundle_deploy/runtime.cc +++ b/apps/bundle_deploy/runtime.cc @@ -25,7 +25,7 @@ #include "../../src/runtime/c_runtime_api.cc" #include "../../src/runtime/cpu_device_api.cc" #include "../../src/runtime/workspace_pool.cc" -#include "../../src/runtime/library_module.cc" +#include "../../src/runtime/module_util.cc" #include "../../src/runtime/module.cc" #include "../../src/runtime/registry.cc" #include "../../src/runtime/file_util.cc" @@ -33,5 +33,5 @@ #include "../../src/runtime/thread_pool.cc" #include "../../src/runtime/ndarray.cc" #include "../../src/runtime/object.cc" -#include "../../src/runtime/system_library.cc" +#include "../../src/runtime/system_lib_module.cc" #include "../../src/runtime/graph/graph_runtime.cc" diff --git a/apps/extension/Makefile b/apps/extension/Makefile index 1680a003e06f..14c71d92ca20 100644 --- a/apps/extension/Makefile +++ b/apps/extension/Makefile @@ -20,7 +20,8 @@ TVM_ROOT=$(shell cd ../..; pwd) PKG_CFLAGS = -std=c++11 -O2 -fPIC\ -I${TVM_ROOT}/include\ -I${TVM_ROOT}/3rdparty/dmlc-core/include\ - -I${TVM_ROOT}/3rdparty/dlpack/include + -I${TVM_ROOT}/3rdparty/dlpack/include\ + -I${TVM_ROOT}/3rdparty/HalideIR/src PKG_LDFLAGS =-L${TVM_ROOT}/build UNAME_S := $(shell uname -s) diff --git a/apps/extension/python/tvm_ext/__init__.py b/apps/extension/python/tvm_ext/__init__.py index 31b149eb4913..38d511eeb617 100644 --- a/apps/extension/python/tvm_ext/__init__.py +++ b/apps/extension/python/tvm_ext/__init__.py @@ -38,9 +38,18 @@ def load_lib(): ivec_create = tvm.get_global_func("tvm_ext.ivec_create") ivec_get = tvm.get_global_func("tvm_ext.ivec_get") -@tvm.register_object("tvm_ext.IntVector") -class IntVec(tvm.Object): +class IntVec(object): """Example for using extension class in c++ """ + _tvm_tcode = 17 + + def __init__(self, handle): + self.handle = handle + + def __del__(self): + # You can also call your own customized + # deleter if you can free it via your own FFI. + tvm.nd.free_extension_handle(self.handle, self.__class__._tvm_tcode) + @property def _tvm_handle(self): return self.handle.value @@ -48,26 +57,32 @@ def _tvm_handle(self): def __getitem__(self, idx): return ivec_get(self, idx) +# Register IntVec extension on python side. +tvm.register_extension(IntVec, IntVec) + nd_create = tvm.get_global_func("tvm_ext.nd_create") nd_add_two = tvm.get_global_func("tvm_ext.nd_add_two") -nd_get_additional_info = tvm.get_global_func("tvm_ext.nd_get_additional_info") +nd_get_addtional_info = tvm.get_global_func("tvm_ext.nd_get_addtional_info") -@tvm.register_object("tvm_ext.NDSubClass") class NDSubClass(tvm.nd.NDArrayBase): """Example for subclassing TVM's NDArray infrastructure. By inheriting TMV's NDArray, external libraries could leverage TVM's FFI without any modification. """ + # Should be consistent with the type-trait set in the backend + _array_type_code = 1 @staticmethod - def create(additional_info): - return nd_create(additional_info) + def create(addtional_info): + return nd_create(addtional_info) @property - def additional_info(self): - return nd_get_additional_info(self) + def addtional_info(self): + return nd_get_addtional_info(self) def __add__(self, other): return nd_add_two(self, other) + +tvm.register_extension(NDSubClass, NDSubClass) diff --git a/apps/extension/src/tvm_ext.cc b/apps/extension/src/tvm_ext.cc index d57b41b1215b..8655fa7d0c30 100644 --- a/apps/extension/src/tvm_ext.cc +++ b/apps/extension/src/tvm_ext.cc @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -29,6 +29,24 @@ #include #include +namespace tvm_ext { +using IntVector = std::vector; +class NDSubClass; +} // namespace tvm_ext + +namespace tvm { +namespace runtime { +template<> +struct extension_type_info { + static const int code = 17; +}; +template<> +struct array_type_info { + static const int code = 1; +}; +} // namespace tvm +} // namespace runtime + using namespace tvm; using namespace tvm::runtime; @@ -39,95 +57,71 @@ namespace tvm_ext { * To use this extension, an external library should * * 1) Inherit TVM's NDArray and NDArray container, + * and define the trait `array_type_info` for this class. * - * 2) Follow the new object protocol to define new NDArray as a reference class. + * 2) Define a constructor in the inherited class that accepts + * a pointer to TVM's Container, which is nullable. * - * 3) On Python frontend, inherit `tvm.nd.NDArray`, - * register the type using tvm.register_object + * 3) On Python frontend, inherit `tvm.nd.NDArrayBase`, + * define the class attribute `_array_type_code` consistent to + * the C++ type trait, and register the subclass using `tvm.register_extension`. */ class NDSubClass : public tvm::runtime::NDArray { public: class SubContainer : public NDArray::Container { public: - SubContainer(int additional_info) : - additional_info_(additional_info) { - type_index_ = SubContainer::RuntimeTypeIndex(); + SubContainer(int addtional_info) : + addtional_info_(addtional_info) { + array_type_code_ = array_type_info::code; } - int additional_info_{0}; - - static constexpr const uint32_t _type_index = TypeIndex::kDynamic; - static constexpr const char* _type_key = "tvm_ext.NDSubClass"; - TVM_DECLARE_FINAL_OBJECT_INFO(SubContainer, NDArray::Container); + static bool Is(NDArray::Container *container) { + SubContainer *c = static_cast(container); + return c->array_type_code_ == array_type_info::code; + } + int addtional_info_{0}; }; - - static void SubContainerDeleter(Object* obj) { - auto* ptr = static_cast(obj); - delete ptr; + NDSubClass(NDArray::Container *container) { + if (container == nullptr) { + data_ = nullptr; + return; + } + CHECK(SubContainer::Is(container)); + container->IncRef(); + data_ = container; } - - NDSubClass() {} - explicit NDSubClass(ObjectPtr n) : NDArray(n) {} - explicit NDSubClass(int additional_info) { - SubContainer* ptr = new SubContainer(additional_info); - ptr->SetDeleter(SubContainerDeleter); - data_ = GetObjectPtr(ptr); + ~NDSubClass() { + this->reset(); } - NDSubClass AddWith(const NDSubClass &other) const { - SubContainer *a = static_cast(get_mutable()); - SubContainer *b = static_cast(other.get_mutable()); + SubContainer *a = static_cast(data_); + SubContainer *b = static_cast(other.data_); CHECK(a != nullptr && b != nullptr); - return NDSubClass(a->additional_info_ + b->additional_info_); + return NDSubClass(new SubContainer(a->addtional_info_ + b->addtional_info_)); } int get_additional_info() const { - SubContainer *self = static_cast(get_mutable()); + SubContainer *self = static_cast(data_); CHECK(self != nullptr); - return self->additional_info_; + return self->addtional_info_; } - using ContainerType = SubContainer; -}; - -TVM_REGISTER_OBJECT_TYPE(NDSubClass::SubContainer); - -/*! - * \brief Introduce additional extension data structures - * by sub-classing TVM's object system. - */ -class IntVectorObj : public Object { - public: - std::vector vec; - - static constexpr const char* _type_key = "tvm_ext.IntVector"; - TVM_DECLARE_FINAL_OBJECT_INFO(IntVectorObj, Object); }; - -/*! - * \brief Int vector reference class. - */ -class IntVector : public ObjectRef { - public: - TVM_DEFINE_OBJECT_REF_METHODS(IntVector, ObjectRef, IntVectorObj); -}; - -TVM_REGISTER_OBJECT_TYPE(IntVectorObj); - } // namespace tvm_ext namespace tvm_ext { +TVM_REGISTER_EXT_TYPE(IntVector); + TVM_REGISTER_GLOBAL("tvm_ext.ivec_create") .set_body([](TVMArgs args, TVMRetValue *rv) { - auto n = tvm::runtime::make_object(); + IntVector vec; for (int i = 0; i < args.size(); ++i) { - n->vec.push_back(args[i].operator int()); + vec.push_back(args[i].operator int()); } - *rv = IntVector(n); + *rv = vec; }); TVM_REGISTER_GLOBAL("tvm_ext.ivec_get") .set_body([](TVMArgs args, TVMRetValue *rv) { - IntVector p = args[0]; - *rv = p->vec[args[1].operator int()]; + *rv = args[0].AsExtension()[args[1].operator int()]; }); @@ -154,10 +148,8 @@ TVM_REGISTER_GLOBAL("device_api.ext_dev") TVM_REGISTER_GLOBAL("tvm_ext.nd_create") .set_body([](TVMArgs args, TVMRetValue *rv) { - int additional_info = args[0]; - *rv = NDSubClass(additional_info); - CHECK_EQ(rv->type_code(), kNDArrayContainer); - + int addtional_info = args[0]; + *rv = NDSubClass(new NDSubClass::SubContainer(addtional_info)); }); TVM_REGISTER_GLOBAL("tvm_ext.nd_add_two") @@ -167,7 +159,7 @@ TVM_REGISTER_GLOBAL("tvm_ext.nd_add_two") *rv = a.AddWith(b); }); -TVM_REGISTER_GLOBAL("tvm_ext.nd_get_additional_info") +TVM_REGISTER_GLOBAL("tvm_ext.nd_get_addtional_info") .set_body([](TVMArgs args, TVMRetValue *rv) { NDSubClass a = args[0]; *rv = a.get_additional_info(); diff --git a/apps/extension/tests/test_ext.py b/apps/extension/tests/test_ext.py index a5e7e0f69456..e481e82fefb3 100644 --- a/apps/extension/tests/test_ext.py +++ b/apps/extension/tests/test_ext.py @@ -87,17 +87,16 @@ def check_llvm(): def test_nd_subclass(): - a = tvm_ext.NDSubClass.create(additional_info=3) - b = tvm_ext.NDSubClass.create(additional_info=5) - assert isinstance(a, tvm_ext.NDSubClass) + a = tvm_ext.NDSubClass.create(addtional_info=3) + b = tvm_ext.NDSubClass.create(addtional_info=5) c = a + b d = a + a e = b + b - assert(a.additional_info == 3) - assert(b.additional_info == 5) - assert(c.additional_info == 8) - assert(d.additional_info == 6) - assert(e.additional_info == 10) + assert(a.addtional_info == 3) + assert(b.addtional_info == 5) + assert(c.addtional_info == 8) + assert(d.addtional_info == 6) + assert(e.addtional_info == 10) if __name__ == "__main__": diff --git a/apps/howto_deploy/Makefile b/apps/howto_deploy/Makefile index a260e89bc042..5c4a6d6e89da 100644 --- a/apps/howto_deploy/Makefile +++ b/apps/howto_deploy/Makefile @@ -17,6 +17,7 @@ # Makefile Example to deploy TVM modules. TVM_ROOT=$(shell cd ../..; pwd) +NNVM_PATH=nnvm DMLC_CORE=${TVM_ROOT}/3rdparty/dmlc-core PKG_CFLAGS = -std=c++11 -O2 -fPIC\ @@ -24,7 +25,7 @@ PKG_CFLAGS = -std=c++11 -O2 -fPIC\ -I${DMLC_CORE}/include\ -I${TVM_ROOT}/3rdparty/dlpack/include\ -PKG_LDFLAGS = -L${TVM_ROOT}/build -ldl -pthread +PKG_LDFLAGS = -L${TVM_ROOT}/build -ldl -lpthread .PHONY: clean all @@ -38,7 +39,7 @@ lib/libtvm_runtime_pack.o: tvm_runtime_pack.cc # The code library built by TVM lib/test_addone_sys.o: prepare_test_libs.py @mkdir -p $(@D) - python3 prepare_test_libs.py + python prepare_test_libs.py # Deploy using the all in one TVM package library lib/cpp_deploy_pack: cpp_deploy.cc lib/test_addone_sys.o lib/libtvm_runtime_pack.o diff --git a/apps/rocm_rpc/Makefile b/apps/rocm_rpc/Makefile index 36eb41596be8..8d30fb6ef780 100644 --- a/apps/rocm_rpc/Makefile +++ b/apps/rocm_rpc/Makefile @@ -19,6 +19,7 @@ ROCM_PATH=/opt/rocm TVM_ROOT=$(shell cd ../..; pwd) +NNVM_PATH=nnvm DMLC_CORE=${TVM_ROOT}/3rdparty/dmlc-core PKG_CFLAGS = -std=c++11 -O2 -fPIC\ diff --git a/apps/sgx/README.md b/apps/sgx/README.md index ad87be4e93db..13f72b0629cf 100644 --- a/apps/sgx/README.md +++ b/apps/sgx/README.md @@ -49,7 +49,7 @@ mkdir build && cd build cmake .. -DUSE_LLVM=ON -DUSE_SGX=/opt/sgxsdk -DRUST_SGX_SDK=/opt/rust-sgx-sdk make -j4 cd .. -pip install -e python -e topi/python +pip install -e python -e topi/python -e nnvm/python cd apps/sgx ``` diff --git a/apps/sgx/enclave/src/build_model.py b/apps/sgx/enclave/src/build_model.py index dff571668422..5a6b10cfcd38 100644 --- a/apps/sgx/enclave/src/build_model.py +++ b/apps/sgx/enclave/src/build_model.py @@ -20,8 +20,8 @@ import os from os import path as osp -from tvm import relay -from tvm.relay import testing +import nnvm.compiler +import nnvm.testing import tvm @@ -30,13 +30,14 @@ def main(): parser.add_argument('-o', '--out-dir', default='.') opts = parser.parse_args() + # from tutorials/nnvm_quick_start.py dshape = (1, 3, 224, 224) - net, params = relay.testing.resnet.get_workload( + net, params = nnvm.testing.resnet.get_workload( layers=18, batch_size=dshape[0], image_shape=dshape[1:]) - with relay.build_config(opt_level=3): - graph, lib, params = relay.build( - net, 'llvm --system-lib', params=params) + with nnvm.compiler.build_config(opt_level=3): + graph, lib, params = nnvm.compiler.build( + net, 'llvm --system-lib', shape={'data': dshape}, params=params) build_dir = osp.abspath(opts.out_dir) if not osp.isdir(build_dir): @@ -44,9 +45,9 @@ def main(): lib.save(osp.join(build_dir, 'model.bc')) with open(osp.join(build_dir, 'graph.json'), 'w') as f_graph_json: - f_graph_json.write(graph) + f_graph_json.write(graph.json()) with open(osp.join(build_dir, 'params.bin'), 'wb') as f_params: - f_params.write(relay.save_param_dict(params)) + f_params.write(nnvm.compiler.save_param_dict(params)) if __name__ == '__main__': diff --git a/cmake/config.cmake b/cmake/config.cmake index 42c19b5277be..dbad944c5459 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -175,10 +175,6 @@ set(USE_SORT ON) # Whether use TensorRT # /path/to/tensorrt that contains include and lib dirs set(USE_TENSORRT OFF) - -# Whether use MKL-DNN (DNNL) codegen -set(USE_DNNL_CODEGEN OFF) - # Build ANTLR parser for Relay text format # Possible values: # - ON: enable ANTLR by searching default locations (cmake find_program for antlr4 and /usr/local for jar) diff --git a/conda/tvm/build.sh b/conda/tvm/build.sh index 358e0b91798a..494f90f0afa0 100644 --- a/conda/tvm/build.sh +++ b/conda/tvm/build.sh @@ -6,9 +6,9 @@ # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -26,3 +26,7 @@ cd .. cd topi/python $PYTHON setup.py install --single-version-externally-managed --record=/tmp/record.txt cd ../.. + +cd nnvm/python +$PYTHON setup.py install --single-version-externally-managed --record=/tmp/record.txt +cd ../.. diff --git a/conda/tvm/meta.yaml b/conda/tvm/meta.yaml index 3ce0f5e4726c..12f9a9698d70 100644 --- a/conda/tvm/meta.yaml +++ b/conda/tvm/meta.yaml @@ -48,6 +48,7 @@ test: imports: - tvm - topi + - nnvm requires: - pytest - scipy diff --git a/dmlc_tvm_commit_id.txt b/dmlc_tvm_commit_id.txt index 86cce8c71a91..461bca727d0a 100644 --- a/dmlc_tvm_commit_id.txt +++ b/dmlc_tvm_commit_id.txt @@ -1 +1 @@ -475158f6285c63b42efe574cb9ba8afec24261be +6e085b40328a99ec59a7e4ff50017edab31eb553 \ No newline at end of file diff --git a/docker/Dockerfile.demo_android b/docker/Dockerfile.demo_android index 13d1a2175b88..4d52411444f7 100644 --- a/docker/Dockerfile.demo_android +++ b/docker/Dockerfile.demo_android @@ -70,5 +70,5 @@ RUN cd /usr && \ make -j10 # Environment variables -ENV PYTHONPATH=/usr/tvm/python:/usr/tvm/topi/python:/usr/tvm/vta/python:${PYTHONPATH} +ENV PYTHONPATH=/usr/tvm/python:/usr/tvm/topi/python:/usr/tvm/nnvm/python/:/usr/tvm/vta/python:${PYTHONPATH} ENV ANDROID_HOME=/opt/android-sdk-linux/ diff --git a/docker/Dockerfile.demo_cpu b/docker/Dockerfile.demo_cpu index 6700579bc41b..63dc3a15d088 100644 --- a/docker/Dockerfile.demo_cpu +++ b/docker/Dockerfile.demo_cpu @@ -30,4 +30,4 @@ COPY install/install_tvm_cpu.sh /install/install_tvm_cpu.sh RUN bash /install/install_tvm_cpu.sh # Environment variables -ENV PYTHONPATH=/usr/tvm/python:/usr/tvm/topi/python:/usr/tvm/vta/python:${PYTHONPATH} +ENV PYTHONPATH=/usr/tvm/python:/usr/tvm/topi/python:/usr/tvm/nnvm/python/:/usr/tvm/vta/python:${PYTHONPATH} diff --git a/docker/Dockerfile.demo_gpu b/docker/Dockerfile.demo_gpu index 0591050c5270..9be8c00f941c 100644 --- a/docker/Dockerfile.demo_gpu +++ b/docker/Dockerfile.demo_gpu @@ -28,7 +28,7 @@ COPY install/install_tvm_gpu.sh /install/install_tvm_gpu.sh RUN bash /install/install_tvm_gpu.sh # Environment variables -ENV PYTHONPATH=/usr/tvm/python:/usr/tvm/topi/python:/usr/tvm/vta/python:${PYTHONPATH} +ENV PYTHONPATH=/usr/tvm/python:/usr/tvm/topi/python:/usr/tvm/nnvm/python/:/usr/tvm/vta/python:${PYTHONPATH} ENV PATH=/usr/local/nvidia/bin:${PATH} ENV PATH=/usr/local/cuda/bin:${PATH} ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/nvidia/lib64:${LD_LIBRARY_PATH} diff --git a/docker/Dockerfile.demo_opencl b/docker/Dockerfile.demo_opencl index bf27eef862f5..7afb2243cb52 100644 --- a/docker/Dockerfile.demo_opencl +++ b/docker/Dockerfile.demo_opencl @@ -76,6 +76,7 @@ RUN mkdir -p ${TVM_BUILD_DIR} && \ make -j6 RUN echo "Building Python package" -ENV PYTHONPATH=${TVM_HOME}/python:${TVM_HOME}/topi/python:${PYTHONPATH} +ENV PYTHONPATH=${TVM_HOME}/python:${TVM_HOME}/topi/python:${TVM_HOME}/nnvm/python:${PYTHONPATH} RUN cd ${TVM_HOME}/python && python3 setup.py install --user RUN cd ${TVM_HOME}/topi/python && python3 setup.py install --user +RUN cd ${TVM_HOME}/nnvm/python && python3 setup.py install --user diff --git a/docker/install/ubuntu_install_iverilog.sh b/docker/install/ubuntu_install_iverilog.sh new file mode 100755 index 000000000000..da20730d491e --- /dev/null +++ b/docker/install/ubuntu_install_iverilog.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e +set -u +set -o pipefail + +apt-get install -y --no-install-recommends make bison flex +wget -q ftp://icarus.com/pub/eda/verilog/v10/verilog-10.1.tar.gz +tar xf verilog-10.1.tar.gz +cd verilog-10.1 +./configure --prefix=/usr +make install -j8 +cd .. +rm -rf verilog-10.1 verilog-10.1.tar.gz diff --git a/docs/Doxyfile b/docs/Doxyfile index b96678ca1696..c52e14867a82 100644 --- a/docs/Doxyfile +++ b/docs/Doxyfile @@ -770,7 +770,7 @@ WARN_LOGFILE = # spaces. # Note: If this tag is empty the current directory is searched. -INPUT = include/tvm topi/include/topi vta/include/vta +INPUT = include/tvm topi/include/topi nnvm/include/nnvm vta/include/vta # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses @@ -1991,7 +1991,7 @@ INCLUDE_FILE_PATTERNS = # recursively expanded use the := operator instead of the = operator. # This tag requires that the tag ENABLE_PREPROCESSING is set to YES. -PREDEFINED = DMLC_USE_CXX11 TVM_DLL= __attribute__(x)= +PREDEFINED = DMLC_USE_CXX11 TVM_DLL= NNVM_DLL= __attribute__(x)= # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this # tag can be used to specify a list of macro names that should be expanded. The diff --git a/docs/api/python/autotvm.rst b/docs/api/python/autotvm.rst index 5e8778502457..f6a9ff5f6aa8 100644 --- a/docs/api/python/autotvm.rst +++ b/docs/api/python/autotvm.rst @@ -83,6 +83,9 @@ tvm.autotvm.task .. automodule:: tvm.autotvm.task.topi_integration :members: +.. automodule:: tvm.autotvm.task.nnvm_integration + :members: + tvm.autotvm.record ~~~~~~~~~~~~~~~~~~ .. automodule:: tvm.autotvm.record diff --git a/docs/api/python/index.rst b/docs/api/python/index.rst index 7a8566eec7ba..2773fefedee3 100644 --- a/docs/api/python/index.rst +++ b/docs/api/python/index.rst @@ -40,5 +40,6 @@ Python API dev topi vta/index + nnvm/index hybrid relay/index diff --git a/docs/api/python/nnvm/compiler.rst b/docs/api/python/nnvm/compiler.rst new file mode 100644 index 000000000000..4cf1b083df60 --- /dev/null +++ b/docs/api/python/nnvm/compiler.rst @@ -0,0 +1,40 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +nnvm.compiler +------------- + +.. automodule:: nnvm.compiler + +.. autofunction:: nnvm.compiler.build + +.. autofunction:: nnvm.compiler.build_config + +.. autofunction:: nnvm.compiler.save_param_dict + +.. autofunction:: nnvm.compiler.load_param_dict + +.. autofunction:: nnvm.compiler.optimize + +.. automodule:: nnvm.compiler.graph_util + :members: + +.. automodule:: nnvm.compiler.graph_attr + :members: + +.. automodule:: nnvm.compiler.compile_engine + :members: diff --git a/docs/api/python/nnvm/frontend.rst b/docs/api/python/nnvm/frontend.rst new file mode 100644 index 000000000000..ca8c4088fd08 --- /dev/null +++ b/docs/api/python/nnvm/frontend.rst @@ -0,0 +1,33 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +nnvm.frontend +------------- + +.. automodule:: nnvm.frontend + +.. autofunction:: nnvm.frontend.from_mxnet + +.. autofunction:: nnvm.frontend.from_onnx + +.. autofunction:: nnvm.frontend.from_coreml + +.. autofunction:: nnvm.frontend.from_keras + +.. autofunction:: nnvm.frontend.from_tensorflow + +.. autofunction:: nnvm.frontend.from_darknet diff --git a/docs/api/python/nnvm/graph.rst b/docs/api/python/nnvm/graph.rst new file mode 100644 index 000000000000..e9f667e416e8 --- /dev/null +++ b/docs/api/python/nnvm/graph.rst @@ -0,0 +1,25 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +nnvm.graph +---------- +.. automodule:: nnvm.graph + +.. autofunction:: nnvm.graph.create + +.. autoclass:: nnvm.graph.Graph + :members: diff --git a/docs/api/python/nnvm/index.rst b/docs/api/python/nnvm/index.rst new file mode 100644 index 000000000000..493a8fc1a772 --- /dev/null +++ b/docs/api/python/nnvm/index.rst @@ -0,0 +1,31 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +NNVM API +======== + +This document contains the python API to NNVM compiler toolchain. + +.. toctree:: + :maxdepth: 2 + + compiler + frontend + symbol + graph + top + testing diff --git a/docs/api/python/nnvm/symbol.rst b/docs/api/python/nnvm/symbol.rst new file mode 100644 index 000000000000..46dcac97ddf9 --- /dev/null +++ b/docs/api/python/nnvm/symbol.rst @@ -0,0 +1,27 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +nnvm.symbol +----------- +.. automodule:: nnvm.symbol + +.. autoclass:: nnvm.symbol.Symbol + :members: + +.. autoclass:: nnvm.symbol.Variable + +.. autofunction:: nnvm.symbol.Group diff --git a/docs/api/python/nnvm/testing.rst b/docs/api/python/nnvm/testing.rst new file mode 100644 index 000000000000..9ee72d41b2eb --- /dev/null +++ b/docs/api/python/nnvm/testing.rst @@ -0,0 +1,31 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +nnvm.testing +------------ + +.. automodule:: nnvm.testing + +.. autofunction:: nnvm.testing.ctx_list + +nnvm.testing.check_computation +------------------------------ + +.. automodule:: nnvm.testing.check_computation + :members: + +.. include:: testing_new_ops.rst diff --git a/docs/api/python/nnvm/testing_new_ops.rst b/docs/api/python/nnvm/testing_new_ops.rst new file mode 100644 index 000000000000..bf80b526a5d5 --- /dev/null +++ b/docs/api/python/nnvm/testing_new_ops.rst @@ -0,0 +1,152 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +Testing new operations +---------------------- + +When adding new operations, it is a good idea to test them. Testing +should be done with the function ``nnvm.testing.check_function``. You +should provide it with the symbol representing the result of a +computation and a reference numpy implementation. By default, it will +also check analytical gradients against numerical gradients if +analytical gradients are implemented for your operation. You can also +pass a reference implementation for the gradients, but numerical +gradients will still be checked. Numerical gradient checking may be +switched off explicitly, but doing this is not a good idea generally. +Here is an example testing the logarithm operation: + +.. code:: python + + import numpy as np + import nnvm + import nnvm.symbol as sym + from nnvm.testing.check_computation import check_function + + x = sym.Variable("x") + y = sym.log(x) + + def forward(x): + return np.log(x) + + def backward(head_grads, x): + return [1. / x * head_grads] + + dtype = "float32" + shape = {'x': (1, 3, 32, 32)} + check_function(y, forward, backward, in_range=(0.001, 2.0), dtype=dtype, shape=shape) + +If you run the code above, you might get an ``AssertionError`` in rare +cases. That’s why it is recommended to run new tests a lot of times. + +.. code:: python + + for _ in range(10000): + check_function(y, forward, backward, in_range=(0.001, 2.0), dtype=dtype, shape=shape) + +If you run the code above then sooner or later you will get an exception +which may look like this: + +.. code-block:: text + + AssertionError: Analytical and numerical grads wrt x differ too much + analytical grad = [ + ... + ] + numerical grad = [ + ... + ] + distance > atol*sqrt(n) + rtol*grad_norm + distance 308.50885009765625 > 0.01*55.42562584220407 + 0.1*2167.70703125 + +It means that either you have a mistake in the ``FGradient`` function or +the numerical error is too high. Generally, if you look at the printed +gradients and see that they differ only slightly or just in a single +position, then it is a numerical error. But if the gradients look +completely different, especially if many corresponding positions have +different signs, then it must be something wrong with the analytical +gradient implementation. + +Then try to make this error reproducible, and also try to reduce the +shape of inputs, but not too much, a vector of 10 elements is a +reasonable choice. Also you won’t need reference functions ``forward`` +and ``backward``, and restricting the number of targets might also be a +good idea. Since the error may manifest itself only in rare cases, you +might want to run it in a loop. + +.. code:: python + + shape = {'x': (10,)} + np.random.seed(42) + + for _ in range(1000): + check_function(y, in_range=(0.001, 2.0), dtype=dtype, shape=shape, + numerical_grads=True, only_targets=['llvm']) + +Running this code will result in the following: + +.. code-block:: text + + check_function failed while checking gradients numerically, here is the main graph + Graph(%x, %head_grads_0) { + %x, shape=[10], dtype=0 + %head_grads_0, shape=[10], dtype=0 + %1 = log(%x), shape=[10], dtype=0 + %3 = elemwise_div(%head_grads_0, %x), shape=[10], dtype=0 + ret %1, %3, %head_grads_0 + } + graph_attr_keys = [layout_inputs, dtype_num_unknown_nodes, dtype, shape_num_unknown_nodes, shape] + + Generated inputs: + {'x': array([2.5660574e-01, 1.5313280e+00, 1.0232578e-03, 8.3371508e-01, + 1.0454979e+00, 1.1021420e-01, 1.9461832e+00, 4.5302454e-01, + 6.0909325e-01, 6.0858107e-01], dtype=float32), 'head_grads_0': array([0.4616029 , 0.00394617, 1.4589603 , 1.9337242 , 0.44936267, + 1.3264314 , 1.4840508 , 1.6970023 , 0.84583575, 0.60655886], + dtype=float32)} + + ... + + AssertionError: Analytical and numerical grads wrt x differ too much + analytical grad = [1.7988799e+00 2.5769596e-03 1.4257993e+03 2.3194065e+00 4.2980734e-01 + 1.2035031e+01 7.6254421e-01 3.7459390e+00 1.3886802e+00 9.9667716e-01] + numerical grad = [1.7948151e+00 1.9073486e-03 9.9268610e+02 2.3174286e+00 4.2915344e-01 + 1.1980057e+01 7.6198578e-01 3.7412643e+00 1.3866425e+00 9.9563599e-01] + distance > atol*sqrt(n) + rtol*grad_norm + distance 433.11322021484375 > 0.01*3.1622776601683795 + 0.1*992.7716674804688 + +In this case the largest difference is in the 2nd position (starting +from 0) which corresponds to input value ``1.0232578e-03``. This value +is too close to the singularity, so the numerical derivative gets too +imprecise. The solution is to shrink the range for ``x``, here, for +example, ``(0.002, 2.0)`` turned out to be enough. Don’t forget to run +lots of tests, so that other people don’t get false positives. + +.. code:: python + + for _ in range(100): + check_function(y, in_range={x: (0.002, 2.0)}, dtype=dtype, shape=(1, 3, 32, 32), + numerical_grads=True, only_targets=['llvm']) + +If you need a more precise control over which values get passed to the +checking function, you can use ``values={x: ...}``: + +.. code:: python + + x_val = np.array([1.2594858e+00, 1.0960974e-01, 1.4975418e+00, 6.3585603e-01, + 1.2692513e-03, 1.0227472e+00, 9.4656967e-02, 5.5306298e-01, + 1.4142460e+00, 1.2631655e-01], dtype=np.float32) + check_function(y, values={x: x_val}, dtype=dtype, shape=shape, + numerical_grads=True, only_targets=['llvm']) diff --git a/docs/api/python/nnvm/top.rst b/docs/api/python/nnvm/top.rst new file mode 100644 index 000000000000..ff946e7639c8 --- /dev/null +++ b/docs/api/python/nnvm/top.rst @@ -0,0 +1,30 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +nnvm.top +-------- +.. automodule:: nnvm.top + +.. autofunction:: register_compute + +.. autofunction:: register_schedule + +.. autofunction:: register_pattern + + +.. autoclass:: nnvm.top.AttrDict + :members: diff --git a/docs/conf.py b/docs/conf.py index a098ad4e4d55..c4410e5864f9 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -43,6 +43,7 @@ curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__))) sys.path.insert(0, os.path.join(curr_path, '../python/')) sys.path.insert(0, os.path.join(curr_path, '../topi/python')) +sys.path.insert(0, os.path.join(curr_path, '../nnvm/python')) sys.path.insert(0, os.path.join(curr_path, '../vta/python')) # -- General configuration ------------------------------------------------ @@ -59,6 +60,7 @@ '.md': CommonMarkParser } os.environ['TVM_BUILD_DOC'] = '1' +os.environ['NNVM_BUILD_DOC'] = '1' # Version information. import tvm version = tvm.__version__ diff --git a/docs/deploy/android.md b/docs/deploy/android.md index 788ab412db62..daf023c38042 100644 --- a/docs/deploy/android.md +++ b/docs/deploy/android.md @@ -20,15 +20,18 @@ ## Build model for Android Target -Relay compilation of model for android target could follow same approach like android_rpc. -The code below will save the compilation output which is required on android target. +NNVM compilation of model for android target could follow same approach like android_rpc. + +An reference example can be found at [chainer-nnvm-example](https://github.com/tkat0/chainer-nnvm-example) + +Above example will directly run the compiled model on RPC target. Below modification at [rum_mobile.py](https://github.com/tkat0/chainer-nnvm-example/blob/5b97fd4d41aa4dde4b0aceb0be311054fb5de451/run_mobile.py#L64) will save the compilation output which is required on android target. ``` lib.export_library("deploy_lib.so", ndk.create_shared) with open("deploy_graph.json", "w") as fo: fo.write(graph.json()) with open("deploy_param.params", "wb") as fo: - fo.write(relay.save_param_dict(params)) + fo.write(nnvm.compiler.save_param_dict(params)) ``` deploy_lib.so, deploy_graph.json, deploy_param.params will go to android target. diff --git a/docs/deploy/index.rst b/docs/deploy/index.rst index 9a30b96ca66e..db50865008a2 100644 --- a/docs/deploy/index.rst +++ b/docs/deploy/index.rst @@ -67,4 +67,5 @@ target device without relying on RPC. see the following resources on how to do s cpp_deploy android + nnvm integrate diff --git a/docs/deploy/nnvm.md b/docs/deploy/nnvm.md new file mode 100644 index 000000000000..650912231b12 --- /dev/null +++ b/docs/deploy/nnvm.md @@ -0,0 +1,196 @@ + + + + + + + + + + + + + + + + + +# Deploy NNVM Modules +NNVM compiled modules are fully embedded in TVM runtime as long as ```GRAPH_RUNTIME``` option +is enabled in tvm runtime. + + +In a nutshell, we will need three items to deploy a compiled module. +Checkout our tutorials on getting started with NNVM compiler for more details. + +- The graph json data which contains the execution graph. +- The tvm module library of compiled functions. +- The parameter blobs for stored parameters. + +We can then use TVM's runtime API to deploy the compiled module. +Here is an example in python. + +```python +import tvm + +# tvm module for compiled functions. +loaded_lib = tvm.module.load("deploy.so") +# json graph +loaded_json = open(temp.relpath("deploy.json")).read() +# parameters in binary +loaded_params = bytearray(open(temp.relpath("deploy.params"), "rb").read()) + +fcreate = tvm.get_global_func("tvm.graph_runtime.create") +ctx = tvm.gpu(0) +gmodule = fcreate(loaded_json, loaded_lib, ctx.device_type, ctx.device_id) +set_input, get_output, run = gmodule["set_input"], gmodule["get_output"], gmodule["run"] +set_input("x", tvm.nd.array(x_np)) +gmodule["load_params"](loaded_params) +run() +out = tvm.nd.empty(shape) +get_output(0, out) +print(out.asnumpy()) +``` + +An example in c++. +```cpp +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +int main() +{ + // tvm module for compiled functions + tvm::runtime::Module mod_syslib = tvm::runtime::Module::LoadFromFile("deploy.so"); + + // json graph + std::ifstream json_in("deploy.json", std::ios::in); + std::string json_data((std::istreambuf_iterator(json_in)), std::istreambuf_iterator()); + json_in.close(); + + // parameters in binary + std::ifstream params_in("deploy.params", std::ios::binary); + std::string params_data((std::istreambuf_iterator(params_in)), std::istreambuf_iterator()); + params_in.close(); + + // parameters need to be TVMByteArray type to indicate the binary data + TVMByteArray params_arr; + params_arr.data = params_data.c_str(); + params_arr.size = params_data.length(); + + int dtype_code = kDLFloat; + int dtype_bits = 32; + int dtype_lanes = 1; + int device_type = kDLCPU; + int device_id = 0; + + // get global function module for graph runtime + tvm::runtime::Module mod = (*tvm::runtime::Registry::Get("tvm.graph_runtime.create"))(json_data, mod_syslib, device_type, device_id); + + DLTensor* x; + int in_ndim = 4; + int64_t in_shape[4] = {1, 3, 224, 224}; + TVMArrayAlloc(in_shape, in_ndim, dtype_code, dtype_bits, dtype_lanes, device_type, device_id, &x); + // load image data saved in binary + const std::string data_filename = "cat.bin"; + std::ifstream data_fin(data_filename, std::ios::binary); + if(!data_fin) throw std::runtime_error("Could not open: " + data_filename); + data_fin.read(static_cast(x->data), 3 * 224 * 224 * 4); + + // get the function from the module(set input data) + tvm::runtime::PackedFunc set_input = mod.GetFunction("set_input"); + set_input("data", x); + + // get the function from the module(load patameters) + tvm::runtime::PackedFunc load_params = mod.GetFunction("load_params"); + load_params(params_arr); + + // get the function from the module(run it) + tvm::runtime::PackedFunc run = mod.GetFunction("run"); + run(); + + DLTensor* y; + int out_ndim = 2; + int64_t out_shape[2] = {1, 1000, }; + TVMArrayAlloc(out_shape, out_ndim, dtype_code, dtype_bits, dtype_lanes, device_type, device_id, &y); + + // get the function from the module(get output data) + tvm::runtime::PackedFunc get_output = mod.GetFunction("get_output"); + get_output(0, y); + + // get the maximum position in output vector + auto y_iter = static_cast(y->data); + auto max_iter = std::max_element(y_iter, y_iter + 1000); + auto max_index = std::distance(y_iter, max_iter); + std::cout << "The maximum position in output vector is: " << max_index << std::endl; + + TVMArrayFree(x); + TVMArrayFree(y); + + return 0; +} +``` + +## Deploy as System Module +C++ additionally support deployment as system module. +This process need few additional options as given below to NNVM build. + +- For target llvm append --system-lib as ```target=llvm --system-lib``` +- For a GPU build (or non llvm) the additional option should be given to targat_host as ```target_host=llvm --system-lib``` + +Module export require additional options for not to compile but save as ```lib.export_library (path, fcompile=False)``` + +The output of above API is a tar compressed file containing object file ```(lib.o)``` and cpp source file ```(devc.cc)``` which embeds device blob. Thease two files should be compiled along with other files or objects while building c++ application. +Please refer to [Makefile](https://github.com/apache/incubator-tvm/tree/master/apps/howto_deploy/Makefile#L32) for a reference. + +The c++ code to load this system module require the below change. + +```cpp + // tvm module for compiled functions + tvm::runtime::Module mod_syslib = (*tvm::runtime::Registry::Get("module._GetSystemLib"))(); +``` + +Based on the build environment the system object, device blob source should be included in the final executable. An example with bazel build is given below. +```bash +cc_library( + name = "host_module", + srcs = ["lib.o"], + alwayslink=1 +) + +cc_library( + name = "device_module", + srcs = ["devc.cc"], + alwayslink=1 +) + +cc_library( + name = "tvm_runtime", + srcs = ["libtvm_runtime_pack.cc"], +) + +cc_binary( + name = "bazel_deploy", + srcs = ["cpp_deploy.cc"], + deps = [ + ":tvm_runtime", ":host_module", ":device_module" + ], + linkopts = [ "-lpthread -ldl" ] +) + +``` + +This build directive creates +- new library ```host_module``` out of ```lib.o``` +- new library ```device_module``` out of ```devc.cc``` + +These intermediate modules can be used as a dependency to final deploy application. + +In bazel ```alwayslink=1``` enforce embedding entire lib into application (even though it doesn't call any API from this module). diff --git a/docs/dev/codebase_walkthrough.rst b/docs/dev/codebase_walkthrough.rst index 7e78d5753027..ffda632f738f 100644 --- a/docs/dev/codebase_walkthrough.rst +++ b/docs/dev/codebase_walkthrough.rst @@ -16,7 +16,7 @@ under the License. ======================================= -TVM Codebase Walkthrough by Example +**TVM Codebase Walkthrough by Example** ======================================= Getting to know a new codebase can be a challenge. This is especially true for a codebase like that of TVM, where different components interact in non-obvious ways. In this guide, we try to illustrate the key elements that comprise a compilation pipeline with a simple example. For each important step, we show where in the codebase it is implemented. The purpose is to let new developers and interested users dive into the codebase more quickly. @@ -28,13 +28,16 @@ Codebase Structure Overview At the root of the TVM repository, we have following subdirectories that together comprise a bulk of the codebase. - ``src`` - C++ code for operator compilation and deployment runtimes. -- ``src/relay`` - Implementation of Relay, a new functional IR for deep learning framework. +- ``src/relay`` - Implementation of Relay, a new IR for deep learning framework superseding ``nnvm`` below. - ``python`` - Python frontend that wraps C++ functions and objects implemented in ``src``. - ``topi`` - Compute definitions and backend schedules for standard neural network operators. +- ``nnvm`` - C++ code and Python frontend for graph optimization and compilation. After the introduction of Relay, it remains in the codebase for backward compatibility. Using standard Deep Learning terminology, ``src/relay`` is the component that manages a computational graph, and nodes in a graph are compiled and executed using infrastructure implemented in the rest of ``src``. ``python`` provides python bindings for the C++ API and driver code that users can use to execute compilation. Operators corresponding to each node are registered in ``src/relay/op``. Implementations of operators are in ``topi``, and they are coded in either C++ or Python. -When a user invokes graph compilation by ``relay.build(...)``, the following sequence of actions happens for each node in the graph: +Relay is the new IR for deep networks that is intended to replace NNVM. If you have used NNVM, Relay provides equivalent or better functionality. In fact, Relay goes beyond a traditional way of thinking deep networks in terms of computational graphs. But for the purpose of this document, we can think of Relay as a traditional computational graph framework. You can read more about Relay `here `_. + +When a user invokes graph compilation by ``relay.build(...)`` (or ``nnvm.compiler.build(...)`` for the older API), the following sequence of actions happens for each node in the graph: - Look up an operator implementation by querying the operator registry - Generate a compute expression and a schedule for the operator diff --git a/docs/dev/debugger.rst b/docs/dev/debugger.rst index 4c49e926a8df..65f206f0cd5e 100644 --- a/docs/dev/debugger.rst +++ b/docs/dev/debugger.rst @@ -16,18 +16,18 @@ under the License. ================= -Debugger +**Debugger** ================= TVM Debugger is an interface for debugging TVM's computation graph execution. It helps to provide access to graph structures and tensor values at the TVM runtime. ******************************************* -Debug Exchange Format +**Debug Exchange Format** ******************************************* -1. Computational Graph -====================== -The optimized graph build by relay in json +**1. Computational Graph** +========================== +The optimized graph build by nnvm in json serialized format is dumped as it is. This contains the whole information about the graph. The UX can either use this graph directly or transform this graph to the format UX can understand. @@ -35,7 +35,7 @@ or transform this graph to the format UX can understand. The Graph JSON format is explained below 1. ``nodes`` -Nodes are either placeholders or computational nodes in json. The nodes are stored +Nodes are either placeholders or computational nodes in NNVM graph. The nodes are stored as a list. A node contains the below information - ``op`` - operation type, ``null`` means it is a placeholder/variable/input node and``tvm_op`` means this node can be executed @@ -44,7 +44,7 @@ as a list. A node contains the below information - ``attrs`` - Attributes of the node which contains the following information - ``flatten_data`` - Whether this data need to be flattened before execution - - ``func_name`` - Fused function name, corresponds to the symbol in the lib generated by relay compilation process. + - ``func_name`` - Fused function name, corresponds to the symbol in the lib generated by NNVM compilation process. - ``num_inputs`` - Number of inputs for this node - ``num_outputs`` - Number of outputs this node produces @@ -82,7 +82,7 @@ Example of dumped graph: "name": "relu0", # Name of the node "attrs": { # Attributes of the node "flatten_data": "0", # Whether this data need to be flattened - "func_name": "fuse_l2_normalize_relu", # Fused function name, corresponds to the symbol in the lib generated by compilation process + "func_name": "fuse_l2_normalize_relu", # Fused function name, corresponds to the symbol in the lib generated by NNVM compilation process "num_inputs": "1", # Number of inputs for this node "num_outputs": "1" # Number of outputs this node produces }, @@ -105,8 +105,8 @@ Example of dumped graph: } } -2. Tensor dumping -================= +**2. Tensor dumping** +===================== The tensor received after execution is in ``tvm.ndarray`` type. All the tensors will be saved as binary bytes in serialized format. The result binary bytes can be loaded by the @@ -155,7 +155,7 @@ folder specified while creating the runtime. Sample Output *************************************** -The below is the an example output of the debugger. +The below is the output of running ``tvm/nnvm/tutorials/from_onnx.py`` with debugger. :: diff --git a/docs/dev/index.rst b/docs/dev/index.rst index 9f46c39ce0a7..983d04f93fe3 100644 --- a/docs/dev/index.rst +++ b/docs/dev/index.rst @@ -34,4 +34,6 @@ In this part of documentation, we share the rationale for the specific choices m virtual_machine codebase_walkthrough inferbound + nnvm_json_spec + nnvm_overview benchmark diff --git a/docs/dev/nnvm_json_spec.rst b/docs/dev/nnvm_json_spec.rst new file mode 100644 index 000000000000..60d1b9b12bfe --- /dev/null +++ b/docs/dev/nnvm_json_spec.rst @@ -0,0 +1,229 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +NNVM Graph JSON Specification +============================= + +NNVM uses JSON for graph serialization. This allows NNVM graph to be +exported to any backend either natively supported or by third-party +without any dependency such as protobuf. + +Getting started +--------------- + +A serialized NNVM graph in JSON format can be deserialized by any JSON +parser. + +.. code:: python + + # python + import json + with open('model.json', 'r') as f: + graph = json.loads(f.read()) + print(graph.keys()) + +``['nodes', 'arg_nodes', 'heads', 'node_row_ptr']`` + +Actually, the following keys are valid in JSON graph. + ++--------------------------------------+------------+-----------------------------------+ +| Keys | Required | Description | ++======================================+============+===================================+ +| `nodes <#nodes>`__ | Yes | The nodes in graph. | ++--------------------------------------+------------+-----------------------------------+ +| `arg\_nodes <#arg_nodes>`__ | Yes | Indices of input nodes. | ++--------------------------------------+------------+-----------------------------------+ +| `heads <#heads>`__ | Yes | Indices of output nodes. | ++--------------------------------------+------------+-----------------------------------+ +| `node\_row\_ptr <#node_row_ptr>`__ | Optional | Depth first search row indices. | ++--------------------------------------+------------+-----------------------------------+ +| `attr <#attr>`__ | Optional | Additional information. | ++--------------------------------------+------------+-----------------------------------+ + +nodes +----- + +Explained by the name itself, ``nodes`` are either placeholders or +computational nodes in NNVM graph. The ``nodes`` are stored in list. + +.. code:: python + + nodes = graph['nodes'] + print(len(nodes)) + print(nodes[0]) + print(nodes[3]) + +:: + + 53 + {'inputs': [], 'name': 'data', 'op': 'null'} + {'inputs': [[0, 0, 0], [1, 0, 0], [2, 0, 0]], 'attrs': {'channels': '64', + 'padding': '(1, 1)', 'layout': 'NCHW', 'kernel_size': '[3, 3]', 'groups': '1', + 'strides': '(1, 1)', 'use_bias': 'True', 'dilation': '(1, 1)'}, + 'name': 'conv1_1', 'op': 'conv2d'} + +The following keys are valid in each node: + ++----------------+------------------+----------+ +| Keys | Required | Descript | +| | | ion | ++================+==================+==========+ +| op | Yes | The | +| | | operator | +| | | type | +| | | name, | +| | | 'null' | +| | | is used | +| | | if it's | +| | | a | +| | | placehol | +| | | der/vari | +| | | able/inp | +| | | ut. | ++----------------+------------------+----------+ +| name | Yes | The | +| | | given | +| | | name of | +| | | the | +| | | node, | +| | | defined | +| | | by user | +| | | composin | +| | | g | +| | | the | +| | | network. | ++----------------+------------------+----------+ +| inputs | Yes | List of | +| | | Entry | +| | | of the | +| | | input | +| | | nodes, | +| | | can be | +| | | empty | +| | | list []. | +| | | Entry is | +| | | a list | +| | | of | +| | | [nose\_i | +| | | d, | +| | | index, | +| | | version] | ++----------------+------------------+----------+ +| attrs | Optional | Extra | +| | | attribut | +| | | es | +| | | for the | +| | | specific | +| | | operator | +| | | . | ++----------------+------------------+----------+ +| control\_deps | Optional | Control | +| | | dependen | +| | | cies, | +| | | left | +| | | blank | +| | | unless | +| | | specific | +| | | ally | +| | | used. | ++----------------+------------------+----------+ + +``attrs`` for operators is a dictionary. Key-value pair examples: + ++----------------+------------------+----------+----------+ +| Keys | Value | Operator | Descript | +| | | | ion | ++================+==================+==========+==========+ +| 'channels' | '64' | conv2d | Output | +| | | | channels | +| | | | for 2d | +| | | | convolut | +| | | | ion. | ++----------------+------------------+----------+----------+ +| 'kernel\_size' | '[3, 3]' | conv2d | Convolut | +| | | | ion | +| | | | filter | +| | | | kernel | +| | | | size in | +| | | | (h, w), | +| | | | list and | +| | | | tuple | +| | | | both | +| | | | works. | ++----------------+------------------+----------+----------+ +| 'use\_bias' | '1' | conv2d | Whether | +| | | | use bias | +| | | | such | +| | | | that | +| | | | `y = w | +| | | | * x + b` | +| | | | . | ++----------------+------------------+----------+----------+ + +.. note:: + + Tips for parsing key-value pair: + + * Both key and value are stored as strings. + + * Boolean values need extra attention, convert to int is recommended since `bool('0') == True` in python. + + * For a full list of operator attributes, please refer to the core operator `documentation `__. + +arg\_nodes +---------- + +``arg_nodes`` is a list of indices of nodes which is +placeholder/variable/input to the graph. + +.. code:: python + + print(graph['arg_nodes']) + +:: + + [0, 1, 2, 6, 7, 11, 12, 15, 16, 20, 21, 24, 25, 29, 30, 33, 34, 39, 40, 44, 45, 49, 50] + +For example, ``nodes[3]`` is not in ``arg_nodes`` because it's an +internal node. + +heads +----- + +``heads`` is a list of entries as the outlet/output of the graph. + +.. code:: python + + print(graph['heads']) + +:: + + [[52, 0, 0]] + +This example indicating that there's only one output in the graph, with +index 52. + +node\_row\_ptr +-------------- + +``node_row_ptr`` stores the history of forward path, so you can skip +constructing the entire graph in inference tasks. + +attrs +----- + +``attrs`` can contain version numbers or similar helpful informations. diff --git a/docs/dev/nnvm_overview.md b/docs/dev/nnvm_overview.md new file mode 100644 index 000000000000..b4a8ee7ccb9f --- /dev/null +++ b/docs/dev/nnvm_overview.md @@ -0,0 +1,143 @@ + + + + + + + + + + + + + + + + + + +# NNVM Design Overview + +NNVM is a reusable graph IR stack for deep learning systems. It provides useful API to construct, represent and transform computation graphs to get most high-level optimization needed in deep learning. +As a part of TVM stack for deep learning, NNVM also provides a shared compiler for deep learning frameworks to optimize, compile and deploy into different hardware backends via [TVM](https://github.com/apache/incubator-tvm) + +## Key Requirements and Design Choices + +- Have minimum dependency in the deployment module. +- Being able to add new operators to the IR, in a decentralized fashion. +- Being able to add new optimization passes to the IR and applies to existing graphs. + +The item2 and 3 are particularly interesting if we compare it to a typical compiler IR. Compiler IR usually contains a fixed set of primitives(instructions), and use them as a contract between optimization pass designers. This design enables easy addition of new optimization passes, but not new operator(instruction). Because every time we add a new instruction, we need to modify the passes to accommodate these changes. + +Deep learning frameworks usually have a fixed operator interface(schema). These interfaces can contain properties like shape inference function, whether in-place computation can happen. The operator interface is an again contract that makes it easy to add new an operator. But it is hard to add new passes in decentralized fashion a new optimization pass usually requires additional information, and this results in frequent changes of the centralized operator interface when we are exploring new optimizations. There is also a drawback of modularization. For example, a graph compiler for FPGA devices may not need the GPU device specific attributes. + +During our explorations in graph optimization and compilation, we find that it is important to quickly add both operators and passes to the framework without changing the core library. + +Here is a list of key elements in NNVM's design + +- Operator registry system to register and add new operators +- Operator attribute system provide property of operator in decentralized fashion +- A reusable IR data structure for optimization passes. + +The above list is more like the generic language part of NNVM, besides of that, we also provide a collection of core operator primitives, and graph optimization passes. The core tensor operator primitives and optimizations already cover commonly deep learning workloads. This design allows the NNVM compiler to be directly used as optimization and compilation stack for frameworks. The extendible nature of NNVM makes new adjustment easy without constraining the backend providers. + +## Minimum Registration for a Symbolic Front-End +To use NNVM to build language front end, a developer only needs to register minimum information about each operator. + +```c++ +NNVM_REGISTER_OP(add) +.describe("add two data together") +.set_num_inputs(2); + +NNVM_REGISTER_OP(conv2d) +.describe("take 2d convolution of input") +.set_num_inputs(2); + +NNVM_REGISTER_OP(assign) +.describe("assign second input argument to the first one") +.set_num_inputs(2); +``` + +Compiling the code with NNVM library. User can use the following interface to compose the computation graph in python, like the following code. + +```python +import nnvm.symbol as nn + +# symbolic variable +x = nn.Variable('x') +y = nn.Variable('y') +w = nn.Variable('w') + +z = nn.conv2d(nn.elemwise_add(x, y), w, kernel_size=(2,2), name='conv1') +``` + +The graph structure is interchangeable between the frontend and the backend. Python interface is supported currently. More language support can be easily +moved in the future. + +## Operator Attribute for More Extensions + +The minimum information provided by the operator is enough to get a front-end. However, we need more knowledge about each operator to do transformations and executing the graph. +A typical difference between neural nets' computation graph and traditional compiler IR is that there are a lot more high-level operators. We cannot fix the set of operators in the IR. + +NNVM allow developers to register attributes of each operator. The attributes can include shape inference function, whether the operator can perform in-place calculation etc. + +This design to having an operator attribute registry is not uncommon in deep learning systems. +For example, MXNet has a ```OpProperty``` class, Tensorflow has a ```OpDef``` and Caffe2 have a ```OperatorSchema``` class. +However, the operator attribute interface listed in these frameworks only support a fixed number of defined attributes of interest to the system. If we want to extend the framework to add a new attribute in each operator, we need to change the operator registry. +Eventually, the operator interface grows into to be very big and have to evolve in the centralized repo. + +In NNVM, we decided to change the design and support arbitrary type of operator attributes, without changing the interface registry. The minimum interface also makes it easier to share across multiple projects + +User can register new attribute, such as inplace property checking function as follows. +```c++ +using FInplaceOption = std::function< + std::vector > (const NodeAttrs& attrs)>; + +// we can register attributes from multiple places. +NNVM_REGISTER_OP(elemwise_add) +.set_num_inputs(2); + +// register to tell first input can be calculate inplace with first output +NNVM_REGISTER_OP(add) +.set_attr("FInplaceOption", [](const NodeAttrs& attrs) { + return std::vector >{{0, 0}}; + }); + +NNVM_REGISTER_OP(exp) +.set_num_inputs(1) +.set_attr("FInplaceOption", [](const NodeAttrs& attrs) { + return std::vector >{{0, 0}}; + }); +``` + +We can query these attributes at arbitrary parts of the code, like the following parts. Under the hood, each attribute is stored in a columnar store, that can easily be retrieved table and do quick lookups. + +```c++ +void MyFunction() { + const Op* add = Op::Get("add"); + // if we need quick query, we can use static variable + // attribute map contains attributes of all operators. + static auto& finplace_option_map = Op::GetAttr("FInplaceOption"); + + // quick look up attribute of add, O(1) time, vector index lookup internally. + auto add_inplace = finplace_option_map[add]; +} +``` +Besides making the code minimum, this attribute store enables decentralization of projects. +Before, all the attributes of operator have to sit on a centralized interface class. +Now, everyone can register attributes of their own, take some other attributes they need from another project without changing the operator interface and core library + + +## Graph and Pass + +We can use the additional information on attribute registry to do optimizations and get more information about the graph. Graph is the unit we manipulate in these steps. A Graph in NNVM contains +two parts: +- The computation graph structure +- A attribute map from string to any type ```map >``` + +The second attribute map is quite important, as we may need different kinds +of information about the graph during the transformation process. Let it be +shapes of each tensor, types of each tensor or the storage allocation plans. + +A ```Pass``` can take a graph with existing attribute information, +and transform it to the same graph structure with more graph attributes or another graph. diff --git a/docs/faq.md b/docs/faq.md index f070ed59a575..3161e3bff082 100644 --- a/docs/faq.md +++ b/docs/faq.md @@ -26,7 +26,7 @@ See [Installation](http://docs.tvm.ai/install/) TVM's relation to Other IR/DSL Projects --------------------------------------- There are usually two levels of abstractions of IR in the deep learning systems. -TensorFlow's XLA and Intel's ngraph uses computation graph representation. +NNVM, TensorFlow's XLA and Intel's ngraph uses computation graph representation. This representation is high level, and can be helpful to perform generic optimizations such as memory reuse, layout transformation and automatic differentiation. diff --git a/docs/frontend/tensorflow.rst b/docs/frontend/tensorflow.rst index 87341ab6b7c6..c2fefedfebf7 100644 --- a/docs/frontend/tensorflow.rst +++ b/docs/frontend/tensorflow.rst @@ -97,7 +97,7 @@ Import the Model Explicit Shape: ~~~~~~~~~~~~~~~ -To ensure shapes can be known throughout the entire graph, pass the ```shape``` argument to ```from_tensorflow```. This dictionary maps input names to input shapes. Please refer to these `test cases `_ for examples. +To ensure shapes can be known throughout the entire graph, pass the ```shape``` argument to ```from_tensorflow```. This dictionary maps input names to input shapes. Please refer to these `test cases `_ for examples. Data Layout ~~~~~~~~~~~ diff --git a/docs/index.rst b/docs/index.rst index 258547a34acd..f02dcc7c91e2 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -45,6 +45,7 @@ Developer Guide :maxdepth: 2 dev/index + nnvm_top Frontends ---------------- diff --git a/docs/install/from_source.rst b/docs/install/from_source.rst index b1b780b28c0d..acbd9b413d74 100644 --- a/docs/install/from_source.rst +++ b/docs/install/from_source.rst @@ -62,7 +62,8 @@ The minimal building requirements are - CMake 3.5 or higher - We highly recommend to build with LLVM to enable all the features. - If you want to use CUDA, CUDA toolkit version >= 8.0 is required. If you are upgrading from an older version, make sure you purge the older version and reboot after installation. - +- It is possible to build TVM without the LLVM dependency if you only want to use CUDA/OpenCL +- If you want to use the NNVM compiler, then LLVM is required We use cmake to build the library. The configuration of TVM can be modified by `config.cmake`. @@ -106,14 +107,6 @@ The configuration of TVM can be modified by `config.cmake`. cmake .. make -j4 - - You can also use Ninja build system instead of Unix Makefiles. It can be faster to build than using Makefiles. - - .. code:: bash - - cd build - cmake .. -G Ninja - ninja - If everything goes well, we can go to :ref:`python-package-installation` Building on Windows @@ -131,6 +124,7 @@ In order to generate the VS solution file using cmake, make sure you have a rece This will generate the VS project using the MSVC 14 64 bit generator. Open the .sln file in the build directory and build with Visual Studio. In order to build with LLVM in windows, you will need to build LLVM from source. +You need to run build the nnvm by running the same script under the nnvm folder. Building ROCm support ~~~~~~~~~~~~~~~~~~~~~ @@ -163,7 +157,7 @@ Method 1 .. code:: bash export TVM_HOME=/path/to/tvm - export PYTHONPATH=$TVM_HOME/python:$TVM_HOME/topi/python:${PYTHONPATH} + export PYTHONPATH=$TVM_HOME/python:$TVM_HOME/topi/python:$TVM_HOME/nnvm/python:${PYTHONPATH} Method 2 @@ -178,6 +172,7 @@ Method 2 export MACOSX_DEPLOYMENT_TARGET=10.9 # This is required for mac to avoid symbol conflicts with libstdc++ cd python; python setup.py install --user; cd .. cd topi/python; python setup.py install --user; cd ../.. + cd nnvm/python; python setup.py install --user; cd ../.. Python dependencies diff --git a/docs/langref/relay_op.rst b/docs/langref/relay_op.rst index 1fabd704482c..fc77869a6261 100644 --- a/docs/langref/relay_op.rst +++ b/docs/langref/relay_op.rst @@ -71,13 +71,10 @@ This level enables typical convnet models. tvm.relay.nn.conv2d_transpose tvm.relay.nn.dense tvm.relay.nn.max_pool2d - tvm.relay.nn.max_pool3d tvm.relay.nn.avg_pool2d - tvm.relay.nn.avg_pool3d tvm.relay.nn.global_max_pool2d tvm.relay.nn.global_avg_pool2d tvm.relay.nn.upsampling - tvm.relay.nn.upsampling3d tvm.relay.nn.batch_flatten tvm.relay.nn.pad tvm.relay.nn.lrn @@ -249,13 +246,10 @@ Level 2 Definitions .. autofunction:: tvm.relay.nn.conv2d_transpose .. autofunction:: tvm.relay.nn.dense .. autofunction:: tvm.relay.nn.max_pool2d -.. autofunction:: tvm.relay.nn.max_pool3d .. autofunction:: tvm.relay.nn.avg_pool2d -.. autofunction:: tvm.relay.nn.avg_pool3d .. autofunction:: tvm.relay.nn.global_max_pool2d .. autofunction:: tvm.relay.nn.global_avg_pool2d .. autofunction:: tvm.relay.nn.upsampling -.. autofunction:: tvm.relay.nn.upsampling3d .. autofunction:: tvm.relay.nn.batch_flatten .. autofunction:: tvm.relay.nn.pad .. autofunction:: tvm.relay.nn.lrn diff --git a/docs/nnvm_top.rst b/docs/nnvm_top.rst new file mode 100644 index 000000000000..8679cae9b256 --- /dev/null +++ b/docs/nnvm_top.rst @@ -0,0 +1,297 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +NNVM Core Tensor Operators +========================== + +This page contains the list of core tensor operator primitives pre-defined in NNVM. +The core tensor operator primitives(``nnvm.top``) covers typical workloads in deep learning. +They can represent workloads in front-end frameworks, and provide basic building blocks for optimization. +Since deep learning is a fast evolving field and it is that possible to have operators that are not in here. +NNVM is designed for this problem and can easily new operators without changing the core library. + +.. note:: + + Each operator node in the graph IR contains the following two kinds of parameters. + + - inputs: positional list of input tensors + - attrs: attributes about operator(e.g. kernel_size in conv2d) + + This document lists both inputs and attributes in the parameter field. You can distinguish them by the marked type. The inputs are of type Tensor, while the rest parameters are attributes. + To construct the graph with NNVM python API, a user can pass in the input Tensors as positional arguments, and attributes as keyword arguments. + + +Overview of Operators +--------------------- +**Level 1: Basic Operators** + +This level enables fully connected multi-layer perceptron. + +.. autosummary:: + :nosignatures: + + nnvm.symbol.dense + nnvm.symbol.relu + nnvm.symbol.prelu + nnvm.symbol.tanh + nnvm.symbol.sigmoid + nnvm.symbol.exp + nnvm.symbol.log + nnvm.symbol.sqrt + nnvm.symbol.logical_and + nnvm.symbol.logical_or + nnvm.symbol.logical_not + nnvm.symbol.elemwise_add + nnvm.symbol.elemwise_sub + nnvm.symbol.elemwise_mul + nnvm.symbol.elemwise_div + nnvm.symbol.elemwise_sum + nnvm.symbol.elemwise_mod + nnvm.symbol.elemwise_pow + nnvm.symbol.flatten + nnvm.symbol.concatenate + nnvm.symbol.expand_dims + nnvm.symbol.squeeze + nnvm.symbol.split + nnvm.symbol.dropout + nnvm.symbol.batch_norm + nnvm.symbol.softmax + nnvm.symbol.log_softmax + nnvm.symbol.pad + nnvm.symbol.block_grad + nnvm.symbol.matmul + nnvm.symbol.resize + nnvm.symbol.upsampling + nnvm.symbol.take + nnvm.symbol.l2_normalize + nnvm.symbol.flip + nnvm.symbol.lrn + nnvm.symbol.where + nnvm.symbol.gather_nd + + +**Level 2: Convolutions** + +This level enables typical convnet models. + +.. autosummary:: + :nosignatures: + + nnvm.symbol.conv2d + nnvm.symbol.conv2d_transpose + nnvm.symbol.max_pool2d + nnvm.symbol.avg_pool2d + nnvm.symbol.global_max_pool2d + nnvm.symbol.global_avg_pool2d + + +**Level 3: Additional Tensor Ops** + +.. autosummary:: + :nosignatures: + + nnvm.symbol.reshape + nnvm.symbol.copy + nnvm.symbol.negative + nnvm.symbol.floor + nnvm.symbol.ceil + nnvm.symbol.round + nnvm.symbol.trunc + nnvm.symbol.abs + nnvm.symbol.leaky_relu + nnvm.symbol.__add_scalar__ + nnvm.symbol.__sub_scalar__ + nnvm.symbol.__rsub_scalar__ + nnvm.symbol.__mul_scalar__ + nnvm.symbol.__div_scalar__ + nnvm.symbol.__rdiv_scalar__ + nnvm.symbol.__pow_scalar__ + nnvm.symbol.__rpow_scalar__ + nnvm.symbol.__lshift_scalar__ + nnvm.symbol.__rshift_scalar__ + + +**Level 4: Broadcast and Reductions** + +.. autosummary:: + :nosignatures: + + nnvm.symbol.transpose + nnvm.symbol.broadcast_to + nnvm.symbol.sum + nnvm.symbol.min + nnvm.symbol.max + nnvm.symbol.mean + nnvm.symbol.prod + nnvm.symbol.broadcast_add + nnvm.symbol.broadcast_sub + nnvm.symbol.broadcast_mul + nnvm.symbol.broadcast_div + nnvm.symbol.clip + nnvm.symbol.greater + nnvm.symbol.less + nnvm.symbol.expand_like + nnvm.symbol.reshape_like + nnvm.symbol.full + nnvm.symbol.full_like + nnvm.symbol.ones + nnvm.symbol.ones_like + nnvm.symbol.zeros + nnvm.symbol.zeros_like + nnvm.symbol.slice_like + nnvm.symbol.strided_slice + nnvm.symbol.argmax + nnvm.symbol.argmin + nnvm.symbol.collapse_sum + nnvm.symbol.broadcast_equal + nnvm.symbol.broadcast_greater_equal + nnvm.symbol.broadcast_greater + nnvm.symbol.broadcast_left_shift + nnvm.symbol.broadcast_less_equal + nnvm.symbol.broadcast_less + nnvm.symbol.broadcast_max + nnvm.symbol.broadcast_min + nnvm.symbol.broadcast_mod + nnvm.symbol.broadcast_not_equal + nnvm.symbol.broadcast_pow + nnvm.symbol.broadcast_right_shift + + +**Level 5: Vision Operators** + +.. autosummary:: + :nosignatures: + + nnvm.symbol.multibox_prior + nnvm.symbol.multibox_transform_loc + nnvm.symbol.nms + nnvm.symbol.yolo_region + nnvm.symbol.yolo_reorg + +Detailed Definitions +-------------------- +.. autofunction:: nnvm.symbol.dense +.. autofunction:: nnvm.symbol.relu +.. autofunction:: nnvm.symbol.prelu +.. autofunction:: nnvm.symbol.tanh +.. autofunction:: nnvm.symbol.sigmoid +.. autofunction:: nnvm.symbol.exp +.. autofunction:: nnvm.symbol.log +.. autofunction:: nnvm.symbol.sqrt +.. autofunction:: nnvm.symbol.logical_and +.. autofunction:: nnvm.symbol.logical_or +.. autofunction:: nnvm.symbol.logical_not +.. autofunction:: nnvm.symbol.elemwise_add +.. autofunction:: nnvm.symbol.elemwise_sub +.. autofunction:: nnvm.symbol.elemwise_mul +.. autofunction:: nnvm.symbol.elemwise_div +.. autofunction:: nnvm.symbol.elemwise_sum +.. autofunction:: nnvm.symbol.elemwise_mod +.. autofunction:: nnvm.symbol.elemwise_pow +.. autofunction:: nnvm.symbol.flatten +.. autofunction:: nnvm.symbol.concatenate +.. autofunction:: nnvm.symbol.expand_dims +.. autofunction:: nnvm.symbol.squeeze +.. autofunction:: nnvm.symbol.split +.. autofunction:: nnvm.symbol.dropout +.. autofunction:: nnvm.symbol.batch_norm +.. autofunction:: nnvm.symbol.softmax +.. autofunction:: nnvm.symbol.log_softmax +.. autofunction:: nnvm.symbol.pad +.. autofunction:: nnvm.symbol.block_grad +.. autofunction:: nnvm.symbol.matmul +.. autofunction:: nnvm.symbol.resize +.. autofunction:: nnvm.symbol.upsampling +.. autofunction:: nnvm.symbol.take +.. autofunction:: nnvm.symbol.l2_normalize +.. autofunction:: nnvm.symbol.flip +.. autofunction:: nnvm.symbol.lrn +.. autofunction:: nnvm.symbol.where +.. autofunction:: nnvm.symbol.gather_nd + +.. autofunction:: nnvm.symbol.conv2d +.. autofunction:: nnvm.symbol.conv2d_transpose +.. autofunction:: nnvm.symbol.max_pool2d +.. autofunction:: nnvm.symbol.avg_pool2d +.. autofunction:: nnvm.symbol.global_max_pool2d +.. autofunction:: nnvm.symbol.global_avg_pool2d + +.. autofunction:: nnvm.symbol.reshape +.. autofunction:: nnvm.symbol.copy +.. autofunction:: nnvm.symbol.negative +.. autofunction:: nnvm.symbol.floor +.. autofunction:: nnvm.symbol.ceil +.. autofunction:: nnvm.symbol.round +.. autofunction:: nnvm.symbol.trunc +.. autofunction:: nnvm.symbol.abs +.. autofunction:: nnvm.symbol.leaky_relu +.. autofunction:: nnvm.symbol.__add_scalar__ +.. autofunction:: nnvm.symbol.__sub_scalar__ +.. autofunction:: nnvm.symbol.__rsub_scalar__ +.. autofunction:: nnvm.symbol.__mul_scalar__ +.. autofunction:: nnvm.symbol.__div_scalar__ +.. autofunction:: nnvm.symbol.__rdiv_scalar__ +.. autofunction:: nnvm.symbol.__pow_scalar__ +.. autofunction:: nnvm.symbol.__rpow_scalar__ +.. autofunction:: nnvm.symbol.__lshift_scalar__ +.. autofunction:: nnvm.symbol.__rshift_scalar__ + +.. autofunction:: nnvm.symbol.transpose +.. autofunction:: nnvm.symbol.broadcast_to +.. autofunction:: nnvm.symbol.sum +.. autofunction:: nnvm.symbol.min +.. autofunction:: nnvm.symbol.max +.. autofunction:: nnvm.symbol.mean +.. autofunction:: nnvm.symbol.prod +.. autofunction:: nnvm.symbol.broadcast_add +.. autofunction:: nnvm.symbol.broadcast_sub +.. autofunction:: nnvm.symbol.broadcast_mul +.. autofunction:: nnvm.symbol.broadcast_div +.. autofunction:: nnvm.symbol.clip +.. autofunction:: nnvm.symbol.greater +.. autofunction:: nnvm.symbol.less +.. autofunction:: nnvm.symbol.expand_like +.. autofunction:: nnvm.symbol.reshape_like +.. autofunction:: nnvm.symbol.full +.. autofunction:: nnvm.symbol.full_like +.. autofunction:: nnvm.symbol.ones +.. autofunction:: nnvm.symbol.ones_like +.. autofunction:: nnvm.symbol.zeros +.. autofunction:: nnvm.symbol.zeros_like +.. autofunction:: nnvm.symbol.slice_like +.. autofunction:: nnvm.symbol.strided_slice +.. autofunction:: nnvm.symbol.argmax +.. autofunction:: nnvm.symbol.argmin +.. autofunction:: nnvm.symbol.collapse_sum +.. autofunction:: nnvm.symbol.broadcast_equal +.. autofunction:: nnvm.symbol.broadcast_greater_equal +.. autofunction:: nnvm.symbol.broadcast_greater +.. autofunction:: nnvm.symbol.broadcast_left_shift +.. autofunction:: nnvm.symbol.broadcast_less_equal +.. autofunction:: nnvm.symbol.broadcast_less +.. autofunction:: nnvm.symbol.broadcast_max +.. autofunction:: nnvm.symbol.broadcast_min +.. autofunction:: nnvm.symbol.broadcast_mod +.. autofunction:: nnvm.symbol.broadcast_not_equal +.. autofunction:: nnvm.symbol.broadcast_pow +.. autofunction:: nnvm.symbol.broadcast_right_shift + +.. autofunction:: nnvm.symbol.multibox_prior +.. autofunction:: nnvm.symbol.multibox_transform_loc +.. autofunction:: nnvm.symbol.nms +.. autofunction:: nnvm.symbol.yolo_region +.. autofunction:: nnvm.symbol.yolo_reorg diff --git a/include/tvm/attrs.h b/include/tvm/attrs.h index 8810c4e4a0df..2fbb9e6a866e 100644 --- a/include/tvm/attrs.h +++ b/include/tvm/attrs.h @@ -159,7 +159,7 @@ class AttrsEqual { bool operator()(const std::string& lhs, const std::string& rhs) const { return lhs == rhs; } - bool operator()(const DataType& lhs, const DataType& rhs) const { + bool operator()(const Type& lhs, const Type& rhs) const { return lhs == rhs; } // node comparator @@ -506,8 +506,8 @@ inline void SetValue(std::string* ptr, const TVMArgValue& val) { } } template<> -inline void SetValue(DataType* ptr, const TVMArgValue& val) { - *ptr = val.operator DataType(); +inline void SetValue(Type* ptr, const TVMArgValue& val) { + *ptr = val.operator Type(); } template<> inline void SetValue(double* ptr, const TVMArgValue& val) { @@ -611,7 +611,7 @@ struct TypeName { }; template<> -struct TypeName { +struct TypeName { static constexpr const char* value = "Type"; }; diff --git a/include/tvm/buffer.h b/include/tvm/buffer.h index fac18a9b1753..d2c2b40661e2 100644 --- a/include/tvm/buffer.h +++ b/include/tvm/buffer.h @@ -74,16 +74,14 @@ class Buffer : public NodeRef { * \param content_lanes The number of lanes for the (data) type. * \param offset The offset of ptr. */ - TVM_DLL Expr access_ptr(int access_mask, - DataType ptr_type = DataType::Handle(), - int content_lanes = 1, - Expr offset = make_const(DataType::Int(32), 0)) const; + TVM_DLL Expr access_ptr(int access_mask, Type ptr_type = Handle(), + int content_lanes = 1, Expr offset = make_const(Int(32), 0)) const; /*! * \brief Create an Expr that does a vector load at begin index. * \param begin The beginning index * \param dtype The data type to be loaded. */ - TVM_DLL Expr vload(Array begin, DataType dtype) const; + TVM_DLL Expr vload(Array begin, Type dtype) const; /*! * \brief Create a Stmt that does a vector store at begin index. * \param begin The beginning index @@ -110,7 +108,7 @@ class BufferNode : public Node { */ Var data; /*! \brief data type in the content of the tensor */ - DataType dtype; + Type dtype; /*! \brief The shape of the buffer */ Array shape; /*! @@ -151,14 +149,14 @@ class BufferNode : public Node { } /*! \return preferred index type for this buffer node */ - DataType DefaultIndexType() const { - return shape.size() != 0 ? shape[0].dtype() : DataType::Int(32); + Type DefaultIndexType() const { + return shape.size() != 0 ? shape[0].type() : Int(32); } // User can specify data_alignment and offset_factor to be 0 // A default value will be picked. TVM_DLL static Buffer make(Var ptr, - DataType dtype, + Type dtype, Array shape, Array strides, Expr elem_offset, @@ -185,7 +183,7 @@ inline const BufferNode* Buffer::operator->() const { * \sa BufferNode::make for complete constructor. */ TVM_DLL Buffer decl_buffer(Array shape, - DataType dtype = DataType::Float(32), + Type dtype = Float(32), std::string name = "buffer"); } // namespace tvm #endif // TVM_BUFFER_H_ diff --git a/include/tvm/build_module.h b/include/tvm/build_module.h index fba929cda1be..a83288ce3662 100644 --- a/include/tvm/build_module.h +++ b/include/tvm/build_module.h @@ -170,9 +170,6 @@ TVM_DLL Target intel_graphics(const std::vector& options = TVM_DLL Target stackvm(const std::vector& options = std::vector()); -/*! \return A target for external device */ -TVM_DLL Target ext_dev(const std::vector& options = - std::vector()); } // namespace target /*! diff --git a/include/tvm/channel.h b/include/tvm/channel.h new file mode 100644 index 000000000000..3a40a787d891 --- /dev/null +++ b/include/tvm/channel.h @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file tvm/channel.h + * \brief Channel object for pipeline. + */ +#ifndef TVM_CHANNEL_H_ +#define TVM_CHANNEL_H_ + +#include + +namespace tvm { +// Node container of channel +struct ChannelNode; + +/*! \brief The data channel. */ +class Channel : public NodeRef { + public: + /*! \brief default constructor */ + Channel() {} + explicit Channel(ObjectPtr n) : NodeRef(n) {} + /*! + * \brief access the internal node container + * \return the pointer to the internal node container + */ + inline const ChannelNode* operator->() const; + // The container type + using ContainerType = ChannelNode; +}; + +/*! + * \brief Generalized FIFO channel. + */ +struct ChannelNode : public Node { + /*! \brief Variable to channel handle */ + Var handle_var; + /*! \brief default data type in read/write */ + Type dtype; + // visit all attributes + void VisitAttrs(AttrVisitor* v) { + v->Visit("handle_var", &handle_var); + v->Visit("dtype", &dtype); + } + + static Channel make(Var handle_var, Type dtype); + static constexpr const char* _type_key = "Channel"; + + TVM_DECLARE_NODE_TYPE_INFO(ChannelNode, Node); +}; + +// Inline implementations +inline const ChannelNode* Channel::operator->() const { + return static_cast(get()); +} +} // namespace tvm +#endif // TVM_CHANNEL_H_ diff --git a/include/tvm/runtime/data_type.h b/include/tvm/dtype.h similarity index 57% rename from include/tvm/runtime/data_type.h rename to include/tvm/dtype.h index 5b222ac6b442..9f7902deb960 100644 --- a/include/tvm/runtime/data_type.h +++ b/include/tvm/dtype.h @@ -17,35 +17,23 @@ * under the License. */ /* - * \file tvm/runtime/data_type.h - * \brief Primitive runtime data type. + * \file tvm/dtype.h + * \brief Data type used in IR. */ // Acknowledgement: DataType structure design originates from Halide. -#ifndef TVM_RUNTIME_DATA_TYPE_H_ -#define TVM_RUNTIME_DATA_TYPE_H_ - -#include -#include -#include +#ifndef TVM_DTYPE_H_ +#define TVM_DTYPE_H_ +#include "runtime/packed_func.h" namespace tvm { -namespace runtime { +class Expr; + /*! - * \brief Runtime primitive data type. - * - * This class is a thin wrapper of DLDataType. - * We also make use of DataType in compiler to store quick hint + * \brief Primitive data types in tvm. */ class DataType { public: - /*! \brief Type code for the DataType. */ - enum TypeCode { - kInt = kDLInt, - kUInt = kDLUInt, - kFloat = kDLFloat, - kHandle = TVMTypeCode::kHandle, - }; /*! \brief default constructor */ DataType() {} /*! @@ -87,23 +75,23 @@ class DataType { } /*! \return whether type is a scalar type. */ bool is_bool() const { - return code() == DataType::kUInt && bits() == 1; + return code() == kDLUInt && bits() == 1; } /*! \return whether type is a float type. */ bool is_float() const { - return code() == DataType::kFloat; + return code() == kDLFloat; } /*! \return whether type is an int type. */ bool is_int() const { - return code() == DataType::kInt; + return code() == kDLInt; } /*! \return whether type is an uint type. */ bool is_uint() const { - return code() == DataType::kUInt; + return code() == kDLUInt; } /*! \return whether type is a handle type. */ bool is_handle() const { - return code() == DataType::kHandle; + return code() == kHandle; } /*! \return whether type is a vector type. */ bool is_vector() const { @@ -132,93 +120,107 @@ class DataType { DataType element_of() const { return with_lanes(1); } - /*! - * \brief Equal comparator. - * \param other The data type to compre against. - * \return The comparison resilt. - */ + // operator overloadings bool operator==(const DataType& other) const { return data_.code == other.data_.code && data_.bits == other.data_.bits && data_.lanes == other.data_.lanes; } - /*! - * \brief NotEqual comparator. - * \param other The data type to compre against. - * \return The comparison resilt. - */ bool operator!=(const DataType& other) const { return !operator==(other); } - /*! - * \brief Converter to DLDataType - * \return the result. - */ operator DLDataType () const { return data_; } - - /*! - * \brief Construct an int type. - * \param bits The number of bits in the type. - * \param lanes The number of lanes. - * \return The constructed data type. - */ - static DataType Int(int bits, int lanes = 1) { - return DataType(kDLInt, bits, lanes); - } - /*! - * \brief Construct an uint type. - * \param bits The number of bits in the type. - * \param lanes The number of lanes - * \return The constructed data type. - */ - static DataType UInt(int bits, int lanes = 1) { - return DataType(kDLUInt, bits, lanes); - } - /*! - * \brief Construct an uint type. - * \param bits The number of bits in the type. - * \param lanes The number of lanes - * \return The constructed data type. - */ - static DataType Float(int bits, int lanes = 1) { - return DataType(kDLFloat, bits, lanes); - } - /*! - * \brief Construct a bool type. - * \param lanes The number of lanes - * \return The constructed data type. - */ - static DataType Bool(int lanes = 1) { - return DataType::UInt(1, lanes); - } - /*! - * \brief Construct a handle type. - * \param bits The number of bits in the type. - * \param lanes The number of lanes - * \return The constructed data type. - */ - static DataType Handle(int bits = 64, int lanes = 1) { - return DataType(kHandle, bits, lanes); - } - /*! - * \brief Get the corresponding type of TVMShapeIndex. - * \return The type of TVM shape index. - */ - static DataType ShapeIndex() { - if (std::is_signed::value) { - return DataType::Int(sizeof(tvm_index_t) * 8); - } else { - return DataType::UInt(sizeof(tvm_index_t) * 8); - } - } + /*! \return the maximum possible value in this format. */ + TVM_DLL Expr max() const; + /*! \return the minimum possible value in this format. */ + TVM_DLL Expr min() const; private: DLDataType data_; }; +/*! + * \brief Construct an int type. + * \param bits The number of bits in the type. + * \param lanes The number of lanes. + * \return The constructed data type. + */ +inline DataType Int(int bits, int lanes = 1) { + return DataType(kDLInt, bits, lanes); +} + +/*! + * \brief Construct an uint type. + * \param bits The number of bits in the type. + * \param lanes The number of lanes + * \return The constructed data type. + */ +inline DataType UInt(int bits, int lanes = 1) { + return DataType(kDLUInt, bits, lanes); +} + +/*! + * \brief Construct a bool type. + * \param lanes The number of lanes + * \return The constructed data type. + */ +inline DataType Bool(int lanes = 1) { + return UInt(1, lanes); +} + +/*! + * \brief Construct an uint type. + * \param bits The number of bits in the type. + * \param lanes The number of lanes + * \return The constructed data type. + */ +inline DataType Float(int bits, int lanes = 1) { + return DataType(kDLFloat, bits, lanes); +} + +/*! + * \brief Construct a handle type. + * \param bits The number of bits in the type. + * \param lanes The number of lanes + * \return The constructed data type. + */ +inline DataType Handle(int bits = 64, int lanes = 1) { + return DataType(kHandle, bits, lanes); +} + +/*! + * \brief Get the corresponding type of TVMShapeIndex. + * \return The type of TVM shape index. + */ +inline DataType TVMShapeIndexType() { + if (std::is_signed::value) { + return Int(sizeof(tvm_index_t) * 8); + } else { + return UInt(sizeof(tvm_index_t) * 8); + } +} + +/*! + * \brief Convert DLDataType to DataType. + * \param t The original type. + * \return The conversion result. + */ +inline DataType TVMType2Type(DLDataType t) { + return DataType(t.code, t.bits, t.lanes); +} + +/*! + * \brief Convert DataType to DataType. + * \param t The original type. + * \return The conversion result. + */ +inline DLDataType Type2TVMType(DataType t) { + return t.operator DLDataType(); +} + /*! * \brief Get the number of bytes needed in a vector. * \param dtype The data type. @@ -227,15 +229,19 @@ class DataType { inline int GetVectorBytes(DataType dtype) { int data_bits = dtype.bits() * dtype.lanes(); // allow bool to exist - if (dtype == DataType::Bool()) return 1; + if (dtype == Bool()) return 1; CHECK_EQ(data_bits % 8, 0U) << "Need to load/store by multiple of bytes"; return data_bits / 8; } -} // namespace runtime - -using DataType = runtime::DataType; +// Overload print function. +inline std::ostream& operator<<(std::ostream& os, DataType dtype) { // NOLINT(*) + using namespace tvm::runtime; + return os << dtype.operator DLDataType(); +} +// Backward compatibility +using Type = DataType; } // namespace tvm -#endif // TVM_RUNTIME_DATA_TYPE_H_ +#endif // TVM_DTYPE_H_ diff --git a/include/tvm/expr.h b/include/tvm/expr.h index f27cb9879fb7..fc52421d903b 100644 --- a/include/tvm/expr.h +++ b/include/tvm/expr.h @@ -29,11 +29,11 @@ #include #include #include "base.h" +#include "dtype.h" #include "node/node.h" #include "node/container.h" #include "node/functor.h" #include "runtime/c_runtime_api.h" -#include "runtime/data_type.h" namespace tvm { @@ -41,7 +41,7 @@ namespace tvm { class ExprNode : public Node { public: /*! \brief The data type of the expression. */ - DataType dtype; + DataType type; static constexpr const char* _type_key = "Expr"; TVM_DECLARE_BASE_NODE_INFO(ExprNode, Node); @@ -69,8 +69,8 @@ class Expr : public NodeRef { TVM_DLL Expr(std::string str); // NOLINT(*) /*! \return the data type of this expression. */ - DataType dtype() const { - return static_cast(get())->dtype; + DataType type() const { + return static_cast(get())->type; } /*! \brief type indicate the container type */ @@ -113,7 +113,7 @@ class Variable : public ExprNode { static Var make(DataType dtype, std::string name_hint); void VisitAttrs(AttrVisitor* v) { - v->Visit("dtype", &dtype); + v->Visit("dtype", &type); v->Visit("name", &name_hint); } @@ -126,14 +126,14 @@ class Var : public Expr { public: explicit Var(ObjectPtr n) : Expr(n) {} TVM_DLL explicit Var(std::string name_hint = "v", - DataType t = DataType::Int(32)); + Type t = Int(32)); /*! * \brief Make a new copy of var with same type, append suffix * \param suffix The suffix to be appended. * \return the new Var copy */ Var copy_with_suffix(const std::string& suffix) const { - return Var((*this)->name_hint + suffix, (*this)->dtype); + return Var((*this)->name_hint + suffix, (*this)->type); } /*! * \brief Get pointer to the internal value. @@ -167,7 +167,7 @@ class IntImm : public ExprNode { int64_t value; void VisitAttrs(AttrVisitor* v) { - v->Visit("dtype", &dtype); + v->Visit("dtype", &type); v->Visit("value", &value); } @@ -452,7 +452,7 @@ inline const char* IterVarType2String(IterVarType t) { * \param name_hint The name hint for the expression * \param t The type of the expression */ -TVM_DLL Var var(std::string name_hint, DataType t = DataType::Int(32)); +TVM_DLL Var var(std::string name_hint, Type t = Int(32)); /* * \brief Template function to convert Map to unordered_map diff --git a/include/tvm/expr_operator.h b/include/tvm/expr_operator.h index 41e7aa5b7796..625ee8e49286 100644 --- a/include/tvm/expr_operator.h +++ b/include/tvm/expr_operator.h @@ -44,20 +44,20 @@ namespace tvm { */ template::value>::type> -inline Expr make_const(DataType t, ValueType value); +inline Expr make_const(Type t, ValueType value); /*! * \brief Make a const zero expr. * \param t The target type. * \return the result expression. */ -inline Expr make_zero(DataType t); +inline Expr make_zero(Type t); /*! * \brief Make a constant true expression. * \param lanes The number of lanes in the bool * \return The result expression. */ inline Expr const_true(int lanes = 1) { - return make_const(DataType::UInt(1, lanes), 1); + return make_const(UInt(1, lanes), 1); } /*! * \brief Make a constant false expression. @@ -65,7 +65,7 @@ inline Expr const_true(int lanes = 1) { * \return The result expression. */ inline Expr const_false(int lanes = 1) { - return make_const(DataType::UInt(1, lanes), 0); + return make_const(UInt(1, lanes), 0); } /*! * \brief Get x as constant int expression. @@ -139,20 +139,6 @@ inline bool is_zero(const Expr& x) { */ inline bool is_const(const Expr& x); -/*! - * Query the maximum possible value of dtype. - * \param dtype The data type. - * \return the maximum possible value in this format. - */ -TVM_DLL Expr max_value(const DataType& dtype); - -/*! - * Query the minimum possible value of dtype. - * \param dtype The data type. - * \return the minimum possible value in this format. - */ -TVM_DLL Expr min_value(const DataType& dtype); - /*! * \brief Check whether x is a constant power of two * If x is power of two, write the power to the shift. @@ -171,7 +157,7 @@ TVM_DLL bool is_const_power_of_two_integer(const Expr& x, int* shift); * \return The result expression. * \note This function may return value if the type is the same. */ -TVM_DLL Expr cast(const DataType& t, Expr value); +TVM_DLL Expr cast(const Type& t, Expr value); /*! * \brief perform reinterpret cast value to type. * @@ -180,7 +166,7 @@ TVM_DLL Expr cast(const DataType& t, Expr value); * \return The result expression. * \note This function may return value if the type is the same. */ -TVM_DLL Expr reinterpret(const DataType& t, Expr value); +TVM_DLL Expr reinterpret(const Type& t, Expr value); /*! * \brief add operator * @@ -600,7 +586,7 @@ TVM_DLL Expr trunc(Expr x); // Intrinsic operators #define TVM_DECLARE_INTRIN_UNARY(OpName) \ inline Expr OpName(Expr x) { \ - return ir::Call::make(x.dtype(), #OpName, {x}, ir::Call::PureIntrinsic); \ + return ir::Call::make(x.type(), #OpName, {x}, ir::Call::PureIntrinsic); \ } \ TVM_DECLARE_INTRIN_UNARY(exp); @@ -671,7 +657,7 @@ inline bool is_no_op(const Stmt& stmt) { } template -inline Expr MakeConstScalar(DataType t, ValueType value) { +inline Expr MakeConstScalar(Type t, ValueType value) { if (t.is_int()) return ir::IntImm::make(t, static_cast(value)); if (t.is_uint()) return ir::UIntImm::make(t, static_cast(value)); if (t.is_float()) return ir::FloatImm::make(t, static_cast(value)); @@ -686,7 +672,7 @@ inline Expr MakeConstScalar(DataType t, ValueType value) { } template -inline Expr make_const(DataType t, ValueType value) { +inline Expr make_const(Type t, ValueType value) { if (t.lanes() == 1) { return MakeConstScalar(t, value); } else { @@ -695,9 +681,9 @@ inline Expr make_const(DataType t, ValueType value) { } } -inline Expr make_zero(DataType t) { +inline Expr make_zero(Type t) { if (t.is_handle()) { - return reinterpret(t, make_const(DataType::UInt(64), 0)); + return reinterpret(t, make_const(UInt(64), 0)); } return make_const(t, 0); } @@ -717,13 +703,13 @@ inline Expr make_zero(DataType t) { return Name(Expr(a), b); \ } \ inline Expr Name(int a, const Expr& b) { \ - return Name(make_const(b.dtype(), a), b); \ + return Name(make_const(b.type(), a), b); \ } \ inline Expr Name(const Expr& a, int b) { \ - return Name(a, make_const(a.dtype(), b)); \ + return Name(a, make_const(a.type(), b)); \ } \ inline Expr Name(const Expr& a, double b) { \ - return Name(a, make_const(DataType::Float(64), b)); \ + return Name(a, make_const(Float(64), b)); \ } #define TVM_DEFINE_LOGICAL_OP_CONST_VAL_OVERLOAD(Name) \ @@ -736,10 +722,10 @@ inline Expr make_zero(DataType t) { #define TVM_DEFINE_INT_OP_CONST_VAL_OVERLOAD(Name) \ inline Expr Name(const Expr& a, int b) { \ - return Name(a, make_const(a.dtype(), b)); \ + return Name(a, make_const(a.type(), b)); \ } \ inline Expr Name(int a, const Expr& b) { \ - return Name(make_const(b.dtype(), a), b); \ + return Name(make_const(b.type(), a), b); \ } diff --git a/include/tvm/ir.h b/include/tvm/ir.h index 33aa72b50805..226d6f83dcc7 100644 --- a/include/tvm/ir.h +++ b/include/tvm/ir.h @@ -46,11 +46,11 @@ class UIntImm : public ExprNode { uint64_t value; void VisitAttrs(AttrVisitor* v) { - v->Visit("dtype", &dtype); + v->Visit("dtype", &type); v->Visit("value", &value); } - TVM_DLL static Expr make(DataType t, uint64_t value); + TVM_DLL static Expr make(Type t, uint64_t value); static constexpr const char* _type_key = "UIntImm"; TVM_DECLARE_NODE_TYPE_INFO(UIntImm, ExprNode); @@ -63,11 +63,11 @@ class FloatImm : public ExprNode { double value; void VisitAttrs(AttrVisitor* v) { - v->Visit("dtype", &dtype); + v->Visit("dtype", &type); v->Visit("value", &value); } - TVM_DLL static Expr make(DataType t, double value); + TVM_DLL static Expr make(Type t, double value); static constexpr const char* _type_key = "FloatImm"; TVM_DECLARE_NODE_TYPE_INFO(FloatImm, ExprNode); @@ -80,7 +80,7 @@ class StringImm : public ExprNode { std::string value; void VisitAttrs(AttrVisitor* v) { - v->Visit("dtype", &dtype); + v->Visit("dtype", &type); v->Visit("value", &value); } @@ -100,11 +100,11 @@ class Cast : public ExprNode { Expr value; void VisitAttrs(AttrVisitor* v) { - v->Visit("dtype", &dtype); + v->Visit("dtype", &type); v->Visit("value", &value); } - TVM_DLL static Expr make(DataType t, Expr v); + TVM_DLL static Expr make(Type t, Expr v); static constexpr const char* _type_key = "Cast"; TVM_DECLARE_NODE_TYPE_INFO(Cast, ExprNode); @@ -123,7 +123,7 @@ class BinaryOpNode : public ExprNode { Expr b; void VisitAttrs(AttrVisitor* v) { - v->Visit("dtype", &(this->dtype)); + v->Visit("dtype", &(this->type)); v->Visit("a", &a); v->Visit("b", &b); } @@ -131,9 +131,9 @@ class BinaryOpNode : public ExprNode { static Expr make(Expr a, Expr b) { CHECK(a.defined()) << "ValueError: a is undefined\n"; CHECK(b.defined()) << "ValueError: b is undefined\n"; - CHECK(a.dtype() == b.dtype()) << "TypeError: mismatched types\n"; + CHECK(a.type() == b.type()) << "TypeError: mismatched types\n"; NodePtr node = make_node(); - node->dtype = a.dtype(); + node->type = a.type(); node->a = std::move(a); node->b = std::move(b); return Expr(node); @@ -215,7 +215,7 @@ class CmpOpNode : public ExprNode { Expr b; void VisitAttrs(AttrVisitor* v) { - v->Visit("dtype", &(this->dtype)); + v->Visit("dtype", &(this->type)); v->Visit("a", &a); v->Visit("b", &b); } @@ -223,9 +223,9 @@ class CmpOpNode : public ExprNode { static Expr make(Expr a, Expr b) { CHECK(a.defined()) << "ValueError: a is undefined\n"; CHECK(b.defined()) << "ValueError: b is undefined\n"; - CHECK(a.dtype() == b.dtype()) << "TypeError: mismatched types\n"; + CHECK(a.type() == b.type()) << "TypeError: mismatched types\n"; NodePtr node = make_node(); - node->dtype = DataType::Bool(a.dtype().lanes()); + node->type = Bool(a.type().lanes()); node->a = std::move(a); node->b = std::move(b); return Expr(node); @@ -279,7 +279,7 @@ class And : public ExprNode { Expr b; void VisitAttrs(AttrVisitor* v) { - v->Visit("dtype", &(this->dtype)); + v->Visit("dtype", &(this->type)); v->Visit("a", &a); v->Visit("b", &b); } @@ -299,7 +299,7 @@ class Or : public ExprNode { Expr b; void VisitAttrs(AttrVisitor* v) { - v->Visit("dtype", &dtype); + v->Visit("dtype", &type); v->Visit("a", &a); v->Visit("b", &b); } @@ -317,7 +317,7 @@ class Not : public ExprNode { Expr a; void VisitAttrs(AttrVisitor* v) { - v->Visit("dtype", &dtype); + v->Visit("dtype", &type); v->Visit("a", &a); } @@ -344,7 +344,7 @@ class Select : public ExprNode { Expr false_value; void VisitAttrs(AttrVisitor* v) { - v->Visit("dtype", &dtype); + v->Visit("dtype", &type); v->Visit("condition", &condition); v->Visit("true_value", &true_value); v->Visit("false_value", &false_value); @@ -381,13 +381,13 @@ class Load : public ExprNode { Expr predicate; void VisitAttrs(AttrVisitor* v) { - v->Visit("dtype", &dtype); + v->Visit("dtype", &type); v->Visit("buffer_var", &buffer_var); v->Visit("index", &index); v->Visit("predicate", &predicate); } - TVM_DLL static Expr make(DataType dtype, Var buffer_var, Expr index, Expr predicate); + TVM_DLL static Expr make(Type type, Var buffer_var, Expr index, Expr predicate); static constexpr const char* _type_key = "Load"; TVM_DECLARE_NODE_TYPE_INFO(Load, ExprNode); @@ -412,7 +412,7 @@ class Ramp : public ExprNode { int lanes; void VisitAttrs(AttrVisitor* v) { - v->Visit("dtype", &dtype); + v->Visit("dtype", &type); v->Visit("base", &base); v->Visit("stride", &stride); v->Visit("lanes", &lanes); @@ -433,7 +433,7 @@ class Broadcast : public ExprNode { int lanes; void VisitAttrs(AttrVisitor* v) { - v->Visit("dtype", &dtype); + v->Visit("dtype", &type); v->Visit("value", &value); v->Visit("lanes", &lanes); } @@ -457,7 +457,7 @@ class Let : public ExprNode { Expr body; void VisitAttrs(AttrVisitor* v) { - v->Visit("dtype", &dtype); + v->Visit("dtype", &type); v->Visit("var", &var); v->Visit("value", &value); v->Visit("body", &body); @@ -523,7 +523,7 @@ class Call : public ExprNode { int value_index{0}; void VisitAttrs(AttrVisitor* v) { - v->Visit("dtype", &dtype); + v->Visit("dtype", &type); v->Visit("name", &name); v->Visit("args", &args); v->Visit("call_type", &call_type); @@ -531,7 +531,7 @@ class Call : public ExprNode { v->Visit("value_index", &value_index); } - TVM_DLL static Expr make(DataType dtype, + TVM_DLL static Expr make(Type type, std::string name, Array args, CallType call_type, @@ -695,7 +695,7 @@ class Reduce : public ExprNode { int value_index); void VisitAttrs(AttrVisitor* v) { - v->Visit("dtype", &dtype); + v->Visit("dtype", &type); v->Visit("combiner", &combiner); v->Visit("source", &source); v->Visit("axis", &axis); @@ -713,7 +713,7 @@ class Any : public ExprNode { void VisitAttrs(AttrVisitor* v) {} /*! \brief Convert to var. */ Var ToVar() const { - return Variable::make(DataType::Int(32), "any_dim"); + return Variable::make(Int(32), "any_dim"); } TVM_DLL static Expr make(); @@ -917,7 +917,7 @@ class Allocate : public StmtNode { /*! \brief The buffer variable. */ Var buffer_var; /*! \brief The type of the buffer. */ - DataType dtype; + DataType type; /*! \brief The extents of the buffer. */ Array extents; /*! \brief Only allocate buffer when condition is satisfied. */ @@ -931,14 +931,14 @@ class Allocate : public StmtNode { void VisitAttrs(AttrVisitor* v) { v->Visit("buffer_var", &buffer_var); - v->Visit("dtype", &dtype); + v->Visit("dtype", &type); v->Visit("extents", &extents); v->Visit("condition", &condition); v->Visit("body", &body); } TVM_DLL static Stmt make(Var buffer_var, - DataType dtype, + DataType type, Array extents, Expr condition, Stmt body, @@ -993,7 +993,7 @@ class Realize : public StmtNode { /*! \brief The output value index if func's value is a tuple. */ int value_index; /*! \brief The data type of the array. */ - DataType dtype; + DataType type; /*! \brief Bounds to be realized. */ Region bounds; /*! \brief Only realize if condition holds. */ @@ -1004,7 +1004,7 @@ class Realize : public StmtNode { void VisitAttrs(AttrVisitor* v) { v->Visit("func", &func); v->Visit("value_index", &value_index); - v->Visit("dtype", &dtype); + v->Visit("dtype", &type); v->Visit("bounds", &bounds); v->Visit("condition", &condition); v->Visit("body", &body); @@ -1012,7 +1012,7 @@ class Realize : public StmtNode { TVM_DLL static Stmt make(FunctionRef func, int value_index, - DataType dtype, + DataType type, Region bounds, Expr condition, Stmt body); @@ -1165,20 +1165,20 @@ class Prefetch : public StmtNode { /*! \brief The output value index if func's value is a tuple. */ int value_index; /*! \brief The data type of the array. */ - DataType dtype; + DataType type; /*! \brief Bounds to be prefetched. */ Region bounds; void VisitAttrs(AttrVisitor* v) { v->Visit("func", &func); v->Visit("value_index", &value_index); - v->Visit("dtype", &dtype); + v->Visit("type", &type); v->Visit("bounds", &bounds); } TVM_DLL static Stmt make(FunctionRef func, int value_index, - DataType dtype, + DataType type, Region bounds); static constexpr const char* _type_key = "Prefetch"; @@ -1620,7 +1620,7 @@ constexpr const char* tvm_store_matrix_sync = "tvm_store_matrix_sync"; * \param dtype The data type * \return Expr a expression with dtype. */ -inline Expr TypeAnnotation(DataType dtype) { +inline Expr TypeAnnotation(Type dtype) { return ir::Call::make(dtype, "type_annotation", {}, ir::Call::PureIntrinsic); diff --git a/include/tvm/ir_pass.h b/include/tvm/ir_pass.h index b0b13df729cc..5c5c4bb2f452 100644 --- a/include/tvm/ir_pass.h +++ b/include/tvm/ir_pass.h @@ -236,6 +236,21 @@ bool VerifyCompactBuffer(Stmt stmt); */ Stmt RemoveNoOp(Stmt stmt); +/*! + * \brief Split statement into pipeine stages. + * \param stmt The stmt to be splitted + * \param split_load Whether split load into its own stage. + * \return Transformed stmt. + */ +Stmt SplitPipeline(Stmt stmt, bool split_load); + +/*! + * \brief Narrow channel access to smaller range. + * \param stmt The stmt to do access rewriting. + * \return Transformed stmt. + */ +Stmt NarrowChannelAccess(Stmt stmt); + /*! * \brief unroll the constant loop marked by unroll. * This pass also automatically attach pragma unroll tag to loops which meets the standard. diff --git a/include/tvm/node/container.h b/include/tvm/node/container.h index 1a276ae695fc..41b47d3a679e 100644 --- a/include/tvm/node/container.h +++ b/include/tvm/node/container.h @@ -23,14 +23,14 @@ #ifndef TVM_NODE_CONTAINER_H_ #define TVM_NODE_CONTAINER_H_ -#include - #include #include #include #include #include #include +#include "node.h" +#include "memory.h" namespace tvm { diff --git a/include/tvm/node/reflection.h b/include/tvm/node/reflection.h index daffeb859668..35a8e1d4a657 100644 --- a/include/tvm/node/reflection.h +++ b/include/tvm/node/reflection.h @@ -28,7 +28,6 @@ #include #include #include -#include #include #include @@ -36,6 +35,8 @@ namespace tvm { // forward declaration +class DataType; + using runtime::Object; using runtime::ObjectPtr; using runtime::ObjectRef; diff --git a/include/tvm/operation.h b/include/tvm/operation.h index 34f584b63261..f53c1ce56a93 100644 --- a/include/tvm/operation.h +++ b/include/tvm/operation.h @@ -75,7 +75,7 @@ class OperationNode : public ir::FunctionBaseNode { * \param i The output index. * \return type of i-th output. */ - virtual DataType output_dtype(size_t i) const = 0; + virtual Type output_dtype(size_t i) const = 0; /*! * \brief Get shape of i-th output tensor. * \param i The output index. @@ -160,11 +160,11 @@ class PlaceholderOpNode : public OperationNode { /*! \brief The shape of the input */ Array shape; /*! \brief The data type of the input. */ - DataType dtype; + Type dtype; // override behavior. int num_outputs() const final; Array root_iter_vars() const final; - DataType output_dtype(size_t i) const final; + Type output_dtype(size_t i) const final; Array output_shape(size_t i) const final; Array InputTensors() const final; Operation ReplaceInputs( @@ -197,7 +197,7 @@ class PlaceholderOpNode : public OperationNode { } static Operation make(std::string name, Array shape, - DataType dtype); + Type dtype); static constexpr const char* _type_key = "PlaceholderOp"; TVM_DECLARE_NODE_TYPE_INFO(PlaceholderOpNode, OperationNode); @@ -243,7 +243,7 @@ class TVM_DLL ComputeOpNode : public BaseComputeOpNode { ComputeOpNode() {} // override functions int num_outputs() const final; - DataType output_dtype(size_t i) const final; + Type output_dtype(size_t i) const final; Array InputTensors() const final; Operation ReplaceInputs( const Operation& self, @@ -296,7 +296,7 @@ class TensorComputeOpNode : public BaseComputeOpNode { TensorComputeOpNode() {} // override functions int num_outputs() const final; - DataType output_dtype(size_t i) const final; + Type output_dtype(size_t i) const final; Array InputTensors() const final; Operation ReplaceInputs( const Operation& self, @@ -370,7 +370,7 @@ class ScanOpNode : public OperationNode { // override behavior. int num_outputs() const final; Array root_iter_vars() const final; - DataType output_dtype(size_t i) const final; + Type output_dtype(size_t i) const final; Array output_shape(size_t i) const final; Array InputTensors() const final; Operation ReplaceInputs( @@ -437,7 +437,7 @@ class ExternOpNode : public OperationNode { // override functions int num_outputs() const final; Array root_iter_vars() const final; - DataType output_dtype(size_t i) const final; + Type output_dtype(size_t i) const final; Array output_shape(size_t i) const final; Array InputTensors() const final; Operation ReplaceInputs( @@ -505,7 +505,7 @@ class HybridOpNode : public OperationNode { // override functions int num_outputs() const final; Array root_iter_vars() const final; - DataType output_dtype(size_t i) const final; + Type output_dtype(size_t i) const final; Array output_shape(size_t i) const final; Array InputTensors() const final; Operation ReplaceInputs( @@ -562,7 +562,7 @@ using FBatchCompute = std::function (const Array& i)>; * \param name The name of the Tensor. */ TVM_DLL Tensor placeholder(Array shape, - DataType dtype = DataType::Float(32), + Type dtype = Float(32), std::string name = "placeholder"); /*! diff --git a/include/tvm/packed_func_ext.h b/include/tvm/packed_func_ext.h index c9f7a580621f..71f8f55b2655 100644 --- a/include/tvm/packed_func_ext.h +++ b/include/tvm/packed_func_ext.h @@ -25,6 +25,7 @@ #ifndef TVM_PACKED_FUNC_EXT_H_ #define TVM_PACKED_FUNC_EXT_H_ +#include #include #include #include @@ -42,7 +43,22 @@ using runtime::TVMRetValue; using runtime::PackedFunc; namespace runtime { - +/*! + * \brief Runtime type checker for node type. + * \tparam T the type to be checked. + */ +template +struct ObjectTypeChecker { + static bool Check(const Object* ptr) { + using ContainerType = typename T::ContainerType; + if (ptr == nullptr) return true; + return ptr->IsInstance(); + } + static void PrintName(std::ostream& os) { // NOLINT(*) + using ContainerType = typename T::ContainerType; + os << ContainerType::_type_key; + } +}; template struct ObjectTypeChecker > { @@ -57,8 +73,10 @@ struct ObjectTypeChecker > { } return true; } - static std::string TypeName() { - return "List[" + ObjectTypeChecker::TypeName() + "]"; + static void PrintName(std::ostream& os) { // NOLINT(*) + os << "List["; + ObjectTypeChecker::PrintName(os); + os << "]"; } }; @@ -73,9 +91,11 @@ struct ObjectTypeChecker > { } return true; } - static std::string TypeName() { - return "Map[str, " + - ObjectTypeChecker::TypeName()+ ']'; + static void PrintName(std::ostream& os) { // NOLINT(*) + os << "Map[str"; + os << ','; + ObjectTypeChecker::PrintName(os); + os << ']'; } }; @@ -91,16 +111,39 @@ struct ObjectTypeChecker > { } return true; } - static std::string TypeName() { - return "Map[" + - ObjectTypeChecker::TypeName() + - ", " + - ObjectTypeChecker::TypeName()+ ']'; + static void PrintName(std::ostringstream& os) { // NOLINT(*) + os << "Map["; + ObjectTypeChecker::PrintName(os); + os << ','; + ObjectTypeChecker::PrintName(os); + os << ']'; } }; +template +inline std::string ObjectTypeName() { + std::ostringstream os; + ObjectTypeChecker::PrintName(os); + return os.str(); +} + // extensions for tvm arg value -inline TVMPODValue_::operator tvm::Expr() const { + +template +inline TObjectRef TVMArgValue::AsObjectRef() const { + static_assert( + std::is_base_of::value, + "Conversion only works for ObjectRef"); + if (type_code_ == kNull) return TObjectRef(NodePtr(nullptr)); + TVM_CHECK_TYPE_CODE(type_code_, kObjectHandle); + Object* ptr = static_cast(value_.v_handle); + CHECK(ObjectTypeChecker::Check(ptr)) + << "Expected type " << ObjectTypeName() + << " but get " << ptr->GetTypeKey(); + return TObjectRef(ObjectPtr(ptr)); +} + +inline TVMArgValue::operator tvm::Expr() const { if (type_code_ == kNull) return Expr(); if (type_code_ == kDLInt) { CHECK_LE(value_.v_int64, std::numeric_limits::max()); @@ -121,12 +164,12 @@ inline TVMPODValue_::operator tvm::Expr() const { return Tensor(ObjectPtr(ptr))(); } CHECK(ObjectTypeChecker::Check(ptr)) - << "Expect type " << ObjectTypeChecker::TypeName() + << "Expected type " << ObjectTypeName() << " but get " << ptr->GetTypeKey(); return Expr(ObjectPtr(ptr)); } -inline TVMPODValue_::operator tvm::Integer() const { +inline TVMArgValue::operator tvm::Integer() const { if (type_code_ == kNull) return Integer(); if (type_code_ == kDLInt) { CHECK_LE(value_.v_int64, std::numeric_limits::max()); @@ -136,10 +179,52 @@ inline TVMPODValue_::operator tvm::Integer() const { TVM_CHECK_TYPE_CODE(type_code_, kObjectHandle); Object* ptr = static_cast(value_.v_handle); CHECK(ObjectTypeChecker::Check(ptr)) - << "Expect type " << ObjectTypeChecker::TypeName() + << "Expected type " << ObjectTypeName() << " but get " << ptr->GetTypeKey(); return Integer(ObjectPtr(ptr)); } + +template +inline bool TVMPODValue_::IsObjectRef() const { + TVM_CHECK_TYPE_CODE(type_code_, kObjectHandle); + Object* ptr = static_cast(value_.v_handle); + return ObjectTypeChecker::Check(ptr); +} + +// extensions for TVMRetValue +template +inline TObjectRef TVMRetValue::AsObjectRef() const { + static_assert( + std::is_base_of::value, + "Conversion only works for ObjectRef"); + if (type_code_ == kNull) return TObjectRef(); + TVM_CHECK_TYPE_CODE(type_code_, kObjectHandle); + + Object* ptr = static_cast(value_.v_handle); + + CHECK(ObjectTypeChecker::Check(ptr)) + << "Expected type " << ObjectTypeName() + << " but get " << ptr->GetTypeKey(); + return TObjectRef(ObjectPtr(ptr)); +} + +// type related stuffs +inline TVMRetValue& TVMRetValue::operator=(const DataType& t) { + return this->operator=(t.operator DLDataType()); +} + +inline TVMRetValue::operator tvm::DataType() const { + return DataType(operator DLDataType()); +} + +inline TVMArgValue::operator tvm::DataType() const { + return DataType(operator DLDataType()); +} + +inline void TVMArgsSetter::operator()( + size_t i, const DataType& t) const { + this->operator()(i, t.operator DLDataType()); +} } // namespace runtime } // namespace tvm #endif // TVM_PACKED_FUNC_EXT_H_ diff --git a/include/tvm/relay/attrs/memory.h b/include/tvm/relay/attrs/memory.h index c74b6487de54..2e279a56bbde 100644 --- a/include/tvm/relay/attrs/memory.h +++ b/include/tvm/relay/attrs/memory.h @@ -43,7 +43,7 @@ struct AllocTensorAttrs : public tvm::AttrsNode { TVM_ATTR_FIELD(dtype) .describe( "The dtype of the tensor to allocate.") - .set_default(DataType::Float(32, 1)); + .set_default(Float(32, 1)); TVM_ATTR_FIELD(const_shape) .describe( "The shape of constant used to aid in type inference."); diff --git a/include/tvm/relay/attrs/nn.h b/include/tvm/relay/attrs/nn.h index d724f8173832..4422fce250c2 100644 --- a/include/tvm/relay/attrs/nn.h +++ b/include/tvm/relay/attrs/nn.h @@ -315,64 +315,6 @@ struct Conv2DTransposeAttrs : public tvm::AttrsNode { } }; -/*! \brief Attributes used in 1D transposed convolution operator */ -struct Conv1DTransposeAttrs : public tvm::AttrsNode { - IndexExpr channels; - Array kernel_size; - Array strides; - Array padding; - Array output_padding; - Array dilation; - int groups; - std::string data_layout; - std::string kernel_layout; - std::string out_layout; - DataType out_dtype; - - TVM_DECLARE_ATTRS(Conv1DTransposeAttrs, "relay.attrs.Conv1DTransposeAttrs") { - TVM_ATTR_FIELD(channels) - .set_default(NullValue()) - .describe("The dimensionality of the output space" - "i.e. the number of output channels in the convolution."); - TVM_ATTR_FIELD(kernel_size) - .describe("The dimensions of the convolution window.") - .set_default(NullValue >()); - TVM_ATTR_FIELD(strides).set_default(Array({1})) - .describe("The strides of the convolution."); - TVM_ATTR_FIELD(output_padding).set_default(Array({0})) - .describe("Zero-padding added to one side of the output."); - TVM_ATTR_FIELD(padding).set_default(Array({0})) - .describe("Symmetric or asymmetric padding." - "Single value: the input is implicitly zero-padded on both sides." - "Two values: padding[0] is used for left input padding, " - "padding[1] is used for right input padding,"); - TVM_ATTR_FIELD(dilation).set_default(Array({1})) - .describe("Specifies the dilation rate to use for dilated convolution."); - TVM_ATTR_FIELD(groups).set_default(1) - .describe("Controls the connections between inputs and outputs." - "At groups=1, all inputs are convolved to all outputs." - "At groups=2, the operation becomes equivalent to having two convolution" - "layers side by side, each seeing half the input channels, and producing" - "half the output channels, and both subsequently concatenated."); - TVM_ATTR_FIELD(data_layout).set_default("NCW") - .describe("Dimension ordering of data. Can be 'NCW', 'NWC', etc." - "'N', 'C', 'W' stands for batch, channel, and width" - "dimensions respectively. Convolution is applied on the" - "'W' dimension."); - TVM_ATTR_FIELD(kernel_layout).set_default("OIW") - .describe("Dimension ordering of data and weight. Can be 'OIW', 'OIW16o16i', etc." - "'O', 'I', 'W' stands for num_filter, input_channel, and width" - "dimensions respectively."); - TVM_ATTR_FIELD(out_layout).set_default("") - .describe("Dimension ordering of output. Can be 'NCW', 'NWC', etc." - "'N', 'C', 'W' stands for batch, channel, and width" - "dimensions respectively. Default to be same as input layout."); - TVM_ATTR_FIELD(out_dtype) - .set_default(NullValue()) - .describe("Output data type, set to explicit type under mixed precision setting"); - } -}; - /*! \brief Attributes for max pool operator */ struct MaxPool2DAttrs : public tvm::AttrsNode { Array pool_size; @@ -589,39 +531,6 @@ struct UpSamplingAttrs : public tvm::AttrsNode { } }; -/*! \brief Attributes for upsampling3d operator */ -struct UpSampling3DAttrs : public tvm::AttrsNode { - double scale_d; - double scale_h; - double scale_w; - std::string layout; - std::string method; - std::string coordinate_transformation_mode; - - TVM_DECLARE_ATTRS(UpSampling3DAttrs, "relay.attrs.UpSampling3DAttrs") { - TVM_ATTR_FIELD(scale_d) - .describe("The upsampling factor for depth"); - TVM_ATTR_FIELD(scale_h) - .describe("The upsampling factor for height"); - TVM_ATTR_FIELD(scale_w) - .describe("The upsampling factor for width"); - TVM_ATTR_FIELD(layout).set_default("NCDHW") - .describe("Dimension ordering of input data. Can be 'NCDHW', 'NDHWC', etc." - "'N', 'C', 'D', 'H', 'W' stands for batch, channel, depth, height, and width" - "dimensions respectively. Upsampling is applied on the 'D', 'H' and" - "'W' dimensions."); - TVM_ATTR_FIELD(method).set_default("nearest_neighbor") - .describe("Specify the mode to use for scaling." - "nearest_neighbor - Nearest Neighbor" - "trilinear - Trilinear Interpolation"); - TVM_ATTR_FIELD(coordinate_transformation_mode).set_default("half_pixel") - .describe("Describes how to transform the coordinate in the resized tensor" - "to the coordinate in the original tensor." - "Refer to the ONNX Resize operator specification for details" - "Available options are half_pixel, align_corners and asymmetric"); - } -}; - /*! \brief Attributes used for the padding operator */ struct PadAttrs : public tvm::AttrsNode { double pad_value; @@ -854,26 +763,6 @@ struct DeformableConv2DAttrs : public tvm::AttrsNode { } }; -/*! \brief Attributes used in subpixel operators */ -struct SubPixelAttrs : public tvm::AttrsNode { - int block_size; - std::string layout; - std::string mode; - - TVM_DECLARE_ATTRS(SubPixelAttrs, "relay.attrs.SubPixelAttrs") { - TVM_ATTR_FIELD(block_size) - .describe("The size of subpixel blocks to compose or decompose.") - .set_default(1); - TVM_ATTR_FIELD(layout).set_default("NCHW").describe( - "Dimension ordering of input data. Can be 'NCHW', 'NHWC', etc." - "'N', 'C', 'H', 'W' stands for batch, channel, height, and width" - "dimensions respectively."); - TVM_ATTR_FIELD(mode).set_default("DCR").describe( - "Indicates order in which channels are accessed. Must be one of" - "DCR or CDR."); - } -}; // struct SubPixelAttrs - } // namespace relay } // namespace tvm #endif // TVM_RELAY_ATTRS_NN_H_ diff --git a/include/tvm/relay/base.h b/include/tvm/relay/base.h index 32f9c32f468a..42a01f009b10 100644 --- a/include/tvm/relay/base.h +++ b/include/tvm/relay/base.h @@ -63,7 +63,7 @@ using NodeRef = tvm::NodeRef; /*! * \brief Content data type. */ -using DataType = ::tvm::DataType; +using DataType = ::tvm::Type; /*! * \brief Symbolic expression for tensor shape. diff --git a/include/tvm/relay/expr.h b/include/tvm/relay/expr.h index 01a73d5396cc..2aa88099a69c 100644 --- a/include/tvm/relay/expr.h +++ b/include/tvm/relay/expr.h @@ -268,15 +268,6 @@ class FunctionNode : public ExprNode { */ bool IsPrimitive() const; - /*! - * \brief Check whether the function should use the TVM default compiler to build, or - * use other compilers. - * - * \return Whether the function will be compiled using the default compiler - * (e.g. those are used in the TVM stack). - */ - bool UseDefaultCompiler() const; - TVM_DLL static Function make(tvm::Array params, Expr body, Type ret_type, @@ -597,25 +588,6 @@ std::string AsText(const NodeRef& node, bool show_meta_data = true, runtime::TypedPackedFunc annotate = nullptr); -/*! \brief namespace of the attributes that are attached to a function. */ -namespace attr { -/*! \brief Mark the function as a primitive function. */ -constexpr const char* kPrimitive = "Primitive"; -/*! - * \brief Indicate the compiler that should be used for builing this function. - * When this is unset or set to "default", the default compilation pipeline will be used. - */ -constexpr const char* kCompiler = "Compiler"; -/*! \brief Indicate if the function is a closure. */ -constexpr const char* kClosure = "Closure"; -/*! \brief Store a Var to parameter/Constant mapping on a Function. */ -constexpr const char* kParams = "__params__"; -/*! \brief Store the unique external symbol for external compilers. */ -constexpr const char* kExternalSymbol = "ExternalSymbol"; -/*! \brief Mark if the function should be avoided being optimized. */ -constexpr const char* kSkipOptimization = "SkipOptimization"; -} // namespace attr - } // namespace relay } // namespace tvm #endif // TVM_RELAY_EXPR_H_ diff --git a/include/tvm/relay/op.h b/include/tvm/relay/op.h index 90f2937c929b..7f1ef456b59b 100644 --- a/include/tvm/relay/op.h +++ b/include/tvm/relay/op.h @@ -594,11 +594,12 @@ inline ValueType OpMap::get(const Expr& expr, return map_.get(expr, def_value); } + /*! - * \brief Check that an expression is a "primitive operator". + * \brief Check that an expression is a "primtive operator". * * Will return true if the expression is an operator which - * matches the form of primitive operators registered directly + * matches the form of primtive operators registered directly * by the Relay codebase. * * That is the arguments are all type variables, and there is a single diff --git a/include/tvm/relay/op_attr_types.h b/include/tvm/relay/op_attr_types.h index 54ea707905e5..741e8b478828 100644 --- a/include/tvm/relay/op_attr_types.h +++ b/include/tvm/relay/op_attr_types.h @@ -29,7 +29,6 @@ #include #include #include -#include namespace tvm { namespace relay { @@ -133,22 +132,6 @@ using FTVMAlterOpLayout = runtime::TypedPackedFunc< const Array& args, const Array& tinfos)>; -/*! - * \brief Convert the layout of operators or replace the - * operator with other expressions. This function will be invoked - * in ConvertLayout pass. - * \param attrs The attribute of the original node. - * \param inputs The input symbols of the original node. - * \param tinfos An array of placeholders, use for getting the inferred shape - * and dtype of the inputs. - * \param desired_layout The desired layout. - * \return new_expr The modified expression. - */ -using FTVMConvertOpLayout = runtime::TypedPackedFunc< - Expr(const Attrs& attrs, - const Array& args, - const Array& tinfos, - const std::string& desired_layout)>; /*! * \brief Legalizes an expression with another expression. This function will be * invoked in Legalize pass. It is a target-dependent pass. diff --git a/include/tvm/relay/transform.h b/include/tvm/relay/transform.h index 52be6a0f3781..ddadbe4fc31d 100644 --- a/include/tvm/relay/transform.h +++ b/include/tvm/relay/transform.h @@ -532,26 +532,6 @@ TVM_DLL Pass CanonicalizeOps(); */ TVM_DLL Pass AlterOpLayout(); -/*! - * \brief Given a dest layout, this pass transforms the expr such that most of the ops input data - * layout is changed to the dest layout. In ideal situation, there are only 2 layout transforms, one - * at the start and one at the end. - * - * This pass is not a part of relay.build and is expected to be called between framework-relay - * parser and relay.build call. This is very helpful for hardware backends that support/prefer only - * type of data layout. - * - * RFC - https://discuss.tvm.ai/t/layout-conversion-pass/4009 - * - * This pass uses most of the AlterOpLayout and InferCorrectLayout infrastructure. We can define new - * layouts for conv2d ops for now. Most of the other operators try to adapt to their input layout - * using the InferCorrectLayout infrastructure. - * - * \param desired_layout The desired layout. - * \return The pass. - */ -TVM_DLL Pass ConvertLayout(const std::string& desired_layout); - /*! * \brief Legalizes an expr with another expression. * \param legalize_map_attr_name The Op's attr name which corresponds to the legalize rule function. diff --git a/include/tvm/runtime/c_runtime_api.h b/include/tvm/runtime/c_runtime_api.h index 8cb86bf725bc..5053326058bc 100644 --- a/include/tvm/runtime/c_runtime_api.h +++ b/include/tvm/runtime/c_runtime_api.h @@ -136,7 +136,7 @@ typedef DLDataType TVMType; typedef DLContext TVMContext; /*! - * \brief The tensor array structure to TVM API. + * \brief The tensor array stucture to TVM API. */ typedef DLTensor TVMArray; @@ -234,6 +234,14 @@ TVM_DLL int TVMModGetFunction(TVMModuleHandle mod, int query_imports, TVMFunctionHandle *out); +/*! + * \brief Free front-end extension type resource. + * \param handle The extension handle. + * \param type_code The type of of the extension type. + * \return 0 when success, -1 when failure happens + */ +TVM_DLL int TVMExtTypeFree(void* handle, int type_code); + /*! * \brief Free the Module * \param mod The module to be freed. diff --git a/include/tvm/runtime/container.h b/include/tvm/runtime/container.h index 4dc07f4a3a04..dbe827812fc3 100644 --- a/include/tvm/runtime/container.h +++ b/include/tvm/runtime/container.h @@ -23,7 +23,6 @@ */ #ifndef TVM_RUNTIME_CONTAINER_H_ #define TVM_RUNTIME_CONTAINER_H_ - #include #include #include diff --git a/include/tvm/runtime/ndarray.h b/include/tvm/runtime/ndarray.h index 090cacff5c3a..993295179842 100644 --- a/include/tvm/runtime/ndarray.h +++ b/include/tvm/runtime/ndarray.h @@ -24,13 +24,11 @@ #ifndef TVM_RUNTIME_NDARRAY_H_ #define TVM_RUNTIME_NDARRAY_H_ -#include -#include -#include - #include #include #include +#include "c_runtime_api.h" +#include "serializer.h" namespace tvm { namespace runtime { @@ -39,23 +37,72 @@ namespace runtime { * \brief Managed NDArray. * The array is backed by reference counted blocks. */ -class NDArray : public ObjectRef { +class NDArray { public: - /*! \brief ContainerBase used to back the TVMArrayHandle */ - class ContainerBase; - /*! \brief NDArray internal container type */ + // internal container type class Container; - /*! \brief Container type for Object system. */ - using ContainerType = Container; /*! \brief default constructor */ NDArray() {} /*! - * \brief constructor. - * \param data ObjectPtr to the data container. + * \brief cosntruct a NDArray that refers to data + * \param data The data this NDArray refers to */ - explicit NDArray(ObjectPtr data) - : ObjectRef(data) {} - + explicit inline NDArray(Container* data); + /*! + * \brief copy constructor. + * + * It does not make a copy, but the reference count of the input NDArray is incremented + * + * \param other NDArray that shares internal data with the input NDArray. + */ + inline NDArray(const NDArray& other); // NOLINT(*) + /*! + * \brief move constructor + * \param other The value to be moved + */ + NDArray(NDArray&& other) // NOLINT(*) + : data_(other.data_) { + other.data_ = nullptr; + } + /*! \brief destructor */ + ~NDArray() { + this->reset(); + } + /*! + * \brief Swap this array with another NDArray + * \param other The other NDArray + */ + void swap(NDArray& other) { // NOLINT(*) + std::swap(data_, other.data_); + } + /*! + * \brief copy assignmemt + * \param other The value to be assigned. + * \return reference to self. + */ + NDArray& operator=(const NDArray& other) { // NOLINT(*) + // copy-and-swap idiom + NDArray(other).swap(*this); // NOLINT(*) + return *this; + } + /*! + * \brief move assignmemt + * \param other The value to be assigned. + * \return reference to self. + */ + NDArray& operator=(NDArray&& other) { // NOLINT(*) + // copy-and-swap idiom + NDArray(std::move(other)).swap(*this); // NOLINT(*) + return *this; + } + /*! \return If NDArray is defined */ + bool defined() const { + return data_ != nullptr; + } + /*! \return If both NDArray reference the same container */ + bool same_as(const NDArray& other) const { + return data_ == other.data_; + } /*! \brief reset the content of NDArray to be nullptr */ inline void reset(); /*! @@ -71,7 +118,7 @@ class NDArray : public ObjectRef { * \note The copy may happen asynchrously if it involves a GPU context. * TVMSynchronize is necessary. */ - inline void CopyFrom(const DLTensor* other); + inline void CopyFrom(DLTensor* other); inline void CopyFrom(const NDArray& other); /*! * \brief Copy data content into another array. @@ -141,43 +188,39 @@ class NDArray : public ObjectRef { * \param stream The stream used in copy. */ TVM_DLL static void CopyFromTo( - const DLTensor* from, DLTensor* to, TVMStreamHandle stream = nullptr); + DLTensor* from, DLTensor* to, TVMStreamHandle stream = nullptr); TVM_DLL std::vector Shape() const; + // internal namespace struct Internal; - protected: + /*! \brief Internal Data content */ + Container* data_{nullptr}; + // enable internal functions + friend struct Internal; friend class TVMPODValue_; + friend class TVMArgValue; friend class TVMRetValue; friend class TVMArgsSetter; - /*! - * \brief Get mutable internal container pointer. - * \return a mutable container pointer. - */ - inline Container* get_mutable() const; - // Helper functions for FFI handling. - /*! - * \brief Construct NDArray's Data field from array handle in FFI. - * \param handle The array handle. - * \return The corresponding ObjectPtr to the constructed container object. - * - * \note We keep a special calling convention for NDArray by passing - * ContainerBase pointer in FFI. - * As a result, the argument is compatible to DLTensor*. - */ - inline static ObjectPtr FFIDataFromHandle(TVMArrayHandle handle); - /*! - * \brief DecRef resource managed by an FFI array handle. - * \param handle The array handle. - */ - inline static void FFIDecRef(TVMArrayHandle handle); - /*! - * \brief Get FFI Array handle from ndarray. - * \param nd The object with ndarray type. - * \return The result array handle. - */ - inline static TVMArrayHandle FFIGetHandle(const ObjectRef& nd); +}; + +/*! + * \brief The type trait indicates subclass of TVM's NDArray. + * For irrelavant classes, code = -1. + * For TVM NDArray itself, code = 0. + * All subclasses of NDArray should override code > 0. + */ +template +struct array_type_info { + /*! \brief the value of the traits */ + static const int code = -1; +}; + +// Overrides the type trait for tvm's NDArray. +template<> +struct array_type_info { + static const int code = 0; }; /*! @@ -188,14 +231,19 @@ class NDArray : public ObjectRef { inline bool SaveDLTensor(dmlc::Stream* strm, const DLTensor* tensor); /*! - * \brief The container base structure - * contains all the fields except for the Object header. + * \brief Reference counted Container object used to back NDArray. * - * \note We explicitly declare this structure in order to pass - * PackedFunc argument using ContainerBase*. + * This object is DLTensor compatible: + * the pointer to the NDArrayContainer can be directly + * interpreted as a DLTensor* + * + * \note do not use this function directly, use NDArray. */ -class NDArray::ContainerBase { +class NDArray::Container { public: + // NOTE: the first part of this structure is the same as + // DLManagedTensor, note that, however, the deleter + // is only called when the reference counter goes to 0 /*! * \brief The corresponding dl_tensor field. * \note it is important that the first field is DLTensor @@ -211,27 +259,42 @@ class NDArray::ContainerBase { * (e.g. reference to original memory when creating views). */ void* manager_ctx{nullptr}; + /*! + * \brief Customized deleter + * + * \note The customized deleter is helpful to enable + * different ways of memory allocator that are not + * currently defined by the system. + */ + void (*deleter)(Container* self) = nullptr; protected: + friend class NDArray; + friend class TVMPODValue_; + friend class TVMArgValue; + friend class TVMRetValue; + friend class RPCWrappedFunc; + /*! + * \brief Type flag used to indicate subclass. + * Default value 0 means normal NDArray::Conatainer. + * + * We can extend a more specialized NDArray::Container + * and use the array_type_code_ to indicate + * the specific array subclass. + */ + int32_t array_type_code_{0}; + /*! \brief The internal reference counter */ + std::atomic ref_counter_{0}; + /*! * \brief The shape container, * can be used used for shape data. */ std::vector shape_; -}; -/*! - * \brief Object container class that backs NDArray. - * \note do not use this function directly, use NDArray. - */ -class NDArray::Container : - public Object, - public NDArray::ContainerBase { public: /*! \brief default constructor */ Container() { - // Initialize the type index. - type_index_ = Container::RuntimeTypeIndex(); dl_tensor.data = nullptr; dl_tensor.ndim = 0; dl_tensor.shape = nullptr; @@ -243,8 +306,6 @@ class NDArray::Container : std::vector shape, DLDataType dtype, DLContext ctx) { - // Initialize the type index. - type_index_ = Container::RuntimeTypeIndex(); dl_tensor.data = data; shape_ = std::move(shape); dl_tensor.ndim = static_cast(shape_.size()); @@ -254,36 +315,49 @@ class NDArray::Container : dl_tensor.byte_offset = 0; dl_tensor.ctx = ctx; } - /*! - * \brief Set the deleter field. - * \param deleter The deleter. - */ - void SetDeleter(FDeleter deleter) { - deleter_ = deleter; + + /*! \brief developer function, increases reference counter */ + void IncRef() { + ref_counter_.fetch_add(1, std::memory_order_relaxed); + } + /*! \brief developer function, decrease reference counter */ + void DecRef() { + if (ref_counter_.fetch_sub(1, std::memory_order_release) == 1) { + std::atomic_thread_fence(std::memory_order_acquire); + if (this->deleter != nullptr) { + (*this->deleter)(this); + } + } } +}; - // Expose DecRef and IncRef as public function - // NOTE: they are only for developer purposes only. - using Object::DecRef; - using Object::IncRef; +// implementations of inline functions +// the usages of functions are documented in place. +inline NDArray::NDArray(Container* data) + : data_(data) { + if (data != nullptr) { + data_->IncRef(); + } +} - // Information for object protocol. - static constexpr const uint32_t _type_index = TypeIndex::kDynamic; - static constexpr const uint32_t _type_child_slots = 0; - static constexpr const uint32_t _type_child_slots_can_overflow = true; - static constexpr const char* _type_key = "NDArray"; - TVM_DECLARE_BASE_OBJECT_INFO(NDArray::Container, Object); +inline NDArray::NDArray(const NDArray& other) + : data_(other.data_) { + if (data_ != nullptr) { + data_->IncRef(); + } +} - protected: - friend class RPCWrappedFunc; - friend class NDArray; -}; +inline void NDArray::reset() { + if (data_ != nullptr) { + data_->DecRef(); + data_ = nullptr; + } +} -// implementations of inline functions -/*! - * \brief return the size of data the DLTensor hold, in term of number of bytes +/*! \brief return the size of data the DLTensor hold, in term of number of bytes * * \param arr the input DLTensor + * * \return number of bytes of data in the DLTensor. */ inline size_t GetDataSize(const DLTensor& arr) { @@ -295,26 +369,26 @@ inline size_t GetDataSize(const DLTensor& arr) { return size; } -inline void NDArray::CopyFrom(const DLTensor* other) { +inline void NDArray::CopyFrom(DLTensor* other) { CHECK(data_ != nullptr); - CopyFromTo(other, &(get_mutable()->dl_tensor)); + CopyFromTo(other, &(data_->dl_tensor)); } inline void NDArray::CopyFrom(const NDArray& other) { CHECK(data_ != nullptr); CHECK(other.data_ != nullptr); - CopyFromTo(&(other.get_mutable()->dl_tensor), &(get_mutable()->dl_tensor)); + CopyFromTo(&(other.data_->dl_tensor), &(data_->dl_tensor)); } inline void NDArray::CopyTo(DLTensor* other) const { CHECK(data_ != nullptr); - CopyFromTo(&(get_mutable()->dl_tensor), other); + CopyFromTo(&(data_->dl_tensor), other); } inline void NDArray::CopyTo(const NDArray& other) const { CHECK(data_ != nullptr); CHECK(other.data_ != nullptr); - CopyFromTo(&(get_mutable()->dl_tensor), &(other.get_mutable()->dl_tensor)); + CopyFromTo(&(data_->dl_tensor), &(other.data_->dl_tensor)); } inline NDArray NDArray::CopyTo(const DLContext& ctx) const { @@ -327,46 +401,19 @@ inline NDArray NDArray::CopyTo(const DLContext& ctx) const { } inline int NDArray::use_count() const { - return data_.use_count(); + if (data_ == nullptr) return 0; + return data_->ref_counter_.load(std::memory_order_relaxed); } inline const DLTensor* NDArray::operator->() const { - return &(get_mutable()->dl_tensor); -} - -inline NDArray::Container* NDArray::get_mutable() const { - return static_cast(data_.get()); -} - -inline ObjectPtr NDArray::FFIDataFromHandle(TVMArrayHandle handle) { - return GetObjectPtr(static_cast( - reinterpret_cast(handle))); -} - -inline TVMArrayHandle NDArray::FFIGetHandle(const ObjectRef& nd) { - // NOTE: it is necessary to cast to container then to base - // so that the FFI handle uses the ContainerBase address. - return reinterpret_cast( - static_cast( - static_cast( - const_cast(nd.get())))); -} - -inline void NDArray::FFIDecRef(TVMArrayHandle handle) { - static_cast( - reinterpret_cast(handle))->DecRef(); -} - -inline Object* TVMArrayHandleToObjectHandle(TVMArrayHandle handle) { - return static_cast( - reinterpret_cast(handle)); + return &(data_->dl_tensor); } /*! \brief Magic number for NDArray file */ constexpr uint64_t kTVMNDArrayMagic = 0xDD5E40F096B4A13F; inline bool SaveDLTensor(dmlc::Stream* strm, - const DLTensor* tensor) { + DLTensor* tensor) { uint64_t header = kTVMNDArrayMagic, reserved = 0; strm->Write(header); strm->Write(reserved); @@ -404,7 +451,7 @@ inline bool SaveDLTensor(dmlc::Stream* strm, } else { std::vector bytes(data_byte_size); CHECK_EQ(TVMArrayCopyToBytes( - const_cast(tensor), dmlc::BeginPtr(bytes), data_byte_size), 0) + tensor, dmlc::BeginPtr(bytes), data_byte_size), 0) << TVMGetLastError(); if (!DMLC_IO_NO_ENDIAN_SWAP) { dmlc::ByteSwap(dmlc::BeginPtr(bytes), type_bytes, num_elems); @@ -415,7 +462,7 @@ inline bool SaveDLTensor(dmlc::Stream* strm, } inline void NDArray::Save(dmlc::Stream* strm) const { - SaveDLTensor(strm, operator->()); + SaveDLTensor(strm, const_cast(operator->())); } inline bool NDArray::Load(dmlc::Stream* strm) { diff --git a/include/tvm/runtime/object.h b/include/tvm/runtime/object.h index 96215daf4a7a..20e6b5a0fb63 100644 --- a/include/tvm/runtime/object.h +++ b/include/tvm/runtime/object.h @@ -24,11 +24,10 @@ #define TVM_RUNTIME_OBJECT_H_ #include -#include #include #include #include - +#include "c_runtime_api.h" /*! * \brief Whether or not use atomic reference counter. @@ -581,14 +580,6 @@ class ObjectRef { static T DowncastNoCheck(ObjectRef ref) { return T(std::move(ref.data_)); } - /*! - * \brief Clear the object ref data field without DecRef - * after we successfully moved the field. - * \param ref The reference data. - */ - static void FFIClearAfterMove(ObjectRef* ref) { - ref->data_.data_ = nullptr; - } /*! * \brief Internal helper function get data_ as ObjectPtr of ObjectType. * \note only used for internal dev purpose. @@ -657,7 +648,7 @@ struct ObjectEqual { return _GetOrAllocRuntimeTypeIndex(); \ } \ static const uint32_t _GetOrAllocRuntimeTypeIndex() { \ - static uint32_t tidx = Object::GetOrAllocRuntimeTypeIndex( \ + static uint32_t tidx = GetOrAllocRuntimeTypeIndex( \ TypeName::_type_key, \ TypeName::_type_index, \ ParentType::_GetOrAllocRuntimeTypeIndex(), \ @@ -677,19 +668,6 @@ struct ObjectEqual { TVM_DECLARE_BASE_OBJECT_INFO(TypeName, ParentType) \ -/*! \brief helper macro to supress unused warning */ -#if defined(__GNUC__) -#define TVM_ATTRIBUTE_UNUSED __attribute__((unused)) -#else -#define TVM_ATTRIBUTE_UNUSED -#endif - -#define TVM_STR_CONCAT_(__x, __y) __x##__y -#define TVM_STR_CONCAT(__x, __y) TVM_STR_CONCAT_(__x, __y) - -#define TVM_OBJECT_REG_VAR_DEF \ - static TVM_ATTRIBUTE_UNUSED uint32_t __make_Object_tid - /*! * \brief Helper macro to register the object type to runtime. * Makes sure that the runtime type table is correctly populated. @@ -697,7 +675,7 @@ struct ObjectEqual { * Use this macro in the cc file for each terminal class. */ #define TVM_REGISTER_OBJECT_TYPE(TypeName) \ - TVM_STR_CONCAT(TVM_OBJECT_REG_VAR_DEF, __COUNTER__) = \ + static DMLC_ATTRIBUTE_UNUSED uint32_t __make_Object_tidx ## _ ## TypeName ## __ = \ TypeName::_GetOrAllocRuntimeTypeIndex() @@ -713,14 +691,14 @@ struct ObjectEqual { using ContainerType = ObjectName; #define TVM_DEFINE_OBJECT_REF_METHODS_MUT(TypeName, ParentType, ObjectName) \ - TypeName() {} \ - explicit TypeName( \ - ::tvm::runtime::ObjectPtr<::tvm::runtime::Object> n) \ - : ParentType(n) {} \ - ObjectName* operator->() { \ - return static_cast(data_.get()); \ - } \ - operator bool() const { return data_ != nullptr; } \ + TypeName() {} \ + explicit TypeName( \ + ::tvm::runtime::ObjectPtr<::tvm::runtime::Object> n) \ + : ParentType(n) {} \ + ObjectName* operator->() { \ + return static_cast(data_.get()); \ + } \ + operator bool() const { return data_ != nullptr; } \ using ContainerType = ObjectName; // Implementations details below diff --git a/include/tvm/runtime/packed_func.h b/include/tvm/runtime/packed_func.h index 5650db6f909c..57c4291907c0 100644 --- a/include/tvm/runtime/packed_func.h +++ b/include/tvm/runtime/packed_func.h @@ -28,11 +28,6 @@ #include #endif #include -#include -#include -#include -#include -#include #include #include #include @@ -41,7 +36,10 @@ #include #include #include - +#include "c_runtime_api.h" +#include "module.h" +#include "ndarray.h" +#include "object.h" // Whether use TVM runtime in header only mode. #ifndef TVM_RUNTIME_HEADER_ONLY @@ -51,6 +49,7 @@ namespace tvm { // forward declarations class Integer; +class DataType; class Expr; namespace runtime { @@ -389,20 +388,47 @@ inline std::string TVMType2String(TVMType t); << TypeCode2Str(T) << " but get " << TypeCode2Str(CODE) \ /*! - * \brief Type traits for runtime type check during FFI conversion. - * \tparam T the type to be checked. + * \brief Type traits to mark if a class is tvm extension type. + * + * To enable extension type in C++ must be registered via marco. + * TVM_REGISTER_EXT_TYPE(TypeName) after defining this with this traits. + * + * Extension class can be passed and returned via PackedFunc in all tvm runtime. + * Internally extension class is stored as T*. + * + * \tparam T the typename */ template -struct ObjectTypeChecker { - static bool Check(const Object* ptr) { - using ContainerType = typename T::ContainerType; - if (ptr == nullptr) return true; - return ptr->IsInstance(); - } - static std::string TypeName() { - using ContainerType = typename T::ContainerType; - return ContainerType::_type_key; - } +struct extension_type_info { + static const int code = 0; +}; + +/*! + * \brief Runtime function table about extension type. + */ +class ExtTypeVTable { + public: + /*! \brief function to be called to delete a handle */ + void (*destroy)(void* handle); + /*! \brief function to be called when clone a handle */ + void* (*clone)(void* handle); + /*! + * \brief Register type + * \tparam T The type to be register. + * \return The registered vtable. + */ + template + static inline ExtTypeVTable* Register_(); + /*! + * \brief Get a vtable based on type code. + * \param type_code The type code + * \return The registered vtable. + */ + TVM_DLL static ExtTypeVTable* Get(int type_code); + + private: + // Internal registration function. + TVM_DLL static ExtTypeVTable* RegisterInternal(int type_code, const ExtTypeVTable& vt); }; /*! @@ -451,17 +477,24 @@ class TVMPODValue_ { return static_cast(value_.v_handle); } else { if (type_code_ == kNull) return nullptr; - LOG(FATAL) << "Expect " + LOG(FATAL) << "Expected " << "DLTensor* or NDArray but get " << TypeCode2Str(type_code_); return nullptr; } } operator NDArray() const { - if (type_code_ == kNull) return NDArray(ObjectPtr(nullptr)); + if (type_code_ == kNull) return NDArray(); TVM_CHECK_TYPE_CODE(type_code_, kNDArrayContainer); - return NDArray(NDArray::FFIDataFromHandle( - static_cast(value_.v_handle))); + return NDArray(static_cast(value_.v_handle)); + } + operator ObjectRef() const { + if (type_code_ == kNull) { + return ObjectRef(ObjectPtr(nullptr)); + } + TVM_CHECK_TYPE_CODE(type_code_, kObjectHandle); + return ObjectRef( + ObjectPtr(static_cast(value_.v_handle))); } operator Module() const { if (type_code_ == kNull) { @@ -475,9 +508,28 @@ class TVMPODValue_ { TVM_CHECK_TYPE_CODE(type_code_, kTVMContext); return value_.v_ctx; } + template::value>::type> + TNDArray AsNDArray() const { + if (type_code_ == kNull) return TNDArray(nullptr); + auto *container = static_cast(value_.v_handle); + CHECK_EQ(container->array_type_code_, array_type_info::code); + return TNDArray(container); + } + template + const TExtension& AsExtension() const { + CHECK_LT(type_code_, kExtEnd); + return static_cast(value_.v_handle)[0]; + } + template::value>::type> + inline bool IsObjectRef() const; int type_code() const { return type_code_; } + /*! * \brief return handle as specific pointer type. * \tparam T the data type. @@ -487,16 +539,6 @@ class TVMPODValue_ { T* ptr() const { return static_cast(value_.v_handle); } - // ObjectRef handling - template::value>::type> - inline bool IsObjectRef() const; - template - inline TObjectRef AsObjectRef() const; - // ObjectRef Specializations - inline operator tvm::Expr() const; - inline operator tvm::Integer() const; protected: friend class TVMArgsSetter; @@ -539,11 +581,9 @@ class TVMArgValue : public TVMPODValue_ { using TVMPODValue_::operator DLTensor*; using TVMPODValue_::operator NDArray; using TVMPODValue_::operator TVMContext; + using TVMPODValue_::operator ObjectRef; using TVMPODValue_::operator Module; using TVMPODValue_::IsObjectRef; - using TVMPODValue_::AsObjectRef; - using TVMPODValue_::operator tvm::Expr; - using TVMPODValue_::operator tvm::Integer; // conversion operator. operator std::string() const { @@ -570,9 +610,6 @@ class TVMArgValue : public TVMPODValue_ { TVM_CHECK_TYPE_CODE(type_code_, kTVMType); return value_.v_type; } - operator DataType() const { - return DataType(operator DLDataType()); - } operator PackedFunc() const { if (type_code_ == kNull) return PackedFunc(); TVM_CHECK_TYPE_CODE(type_code_, kFuncHandle); @@ -585,10 +622,16 @@ class TVMArgValue : public TVMPODValue_ { const TVMValue& value() const { return value_; } + // Deferred extension handler. + template + inline TObjectRef AsObjectRef() const; template::value>::type> + std::is_class::value>::type> inline operator T() const; + inline operator tvm::DataType() const; + inline operator tvm::Expr() const; + inline operator tvm::Integer() const; }; /*! @@ -626,11 +669,9 @@ class TVMRetValue : public TVMPODValue_ { using TVMPODValue_::operator DLTensor*; using TVMPODValue_::operator TVMContext; using TVMPODValue_::operator NDArray; + using TVMPODValue_::operator ObjectRef; using TVMPODValue_::operator Module; using TVMPODValue_::IsObjectRef; - using TVMPODValue_::AsObjectRef; - using TVMPODValue_::operator tvm::Expr; - using TVMPODValue_::operator tvm::Integer; TVMRetValue(const TVMRetValue& other) : TVMPODValue_() { this->Assign(other); @@ -652,9 +693,6 @@ class TVMRetValue : public TVMPODValue_ { TVM_CHECK_TYPE_CODE(type_code_, kTVMType); return value_.v_type; } - operator DataType() const { - return DataType(operator DLDataType()); - } operator PackedFunc() const { if (type_code_ == kNull) return PackedFunc(); TVM_CHECK_TYPE_CODE(type_code_, kFuncHandle); @@ -707,9 +745,6 @@ class TVMRetValue : public TVMPODValue_ { value_.v_type = t; return *this; } - TVMRetValue& operator=(const DataType& other) { - return operator=(other.operator DLDataType()); - } TVMRetValue& operator=(bool value) { this->SwitchToPOD(kDLInt); value_.v_int64 = value; @@ -724,20 +759,24 @@ class TVMRetValue : public TVMPODValue_ { return *this; } TVMRetValue& operator=(NDArray other) { - if (other.data_ != nullptr) { - this->Clear(); - type_code_ = kNDArrayContainer; - value_.v_handle = NDArray::FFIGetHandle(other); - ObjectRef::FFIClearAfterMove(&other); - } else { - SwitchToPOD(kNull); - } + this->Clear(); + type_code_ = kNDArrayContainer; + value_.v_handle = other.data_; + other.data_ = nullptr; return *this; } + TVMRetValue& operator=(ObjectRef other) { + return operator=(std::move(other.data_)); + } TVMRetValue& operator=(Module m) { SwitchToObject(kModuleHandle, std::move(m.data_)); return *this; } + template + TVMRetValue& operator=(ObjectPtr other) { + SwitchToObject(kObjectHandle, std::move(other)); + return *this; + } TVMRetValue& operator=(PackedFunc f) { this->SwitchToClass(kFuncHandle, f); return *this; @@ -754,6 +793,14 @@ class TVMRetValue : public TVMPODValue_ { this->Assign(other); return *this; } + template::code != 0>::type> + TVMRetValue& operator=(const T& other) { + this->SwitchToClass( + extension_type_info::code, other); + return *this; + } /*! * \brief Move the value back to front-end via C API. * This marks the current container as null. @@ -779,15 +826,16 @@ class TVMRetValue : public TVMPODValue_ { type_code_ != kStr) << "TVMRetValue.value can only be used for POD data"; return value_; } - // ObjectRef handling - template::value>::type> - inline TVMRetValue& operator=(TObjectRef other); + // ObjectRef related extenstions: in tvm/packed_func_ext.h template::value>::type> inline operator T() const; + template + inline TObjectRef AsObjectRef() const; + // type related + inline operator tvm::DataType() const; + inline TVMRetValue& operator=(const tvm::DataType& other); private: template @@ -814,15 +862,24 @@ class TVMRetValue : public TVMPODValue_ { break; } case kObjectHandle: { - // Avoid operator ObjectRef as we already know it is not NDArray/Module - SwitchToObject( - kObjectHandle, GetObjectPtr( - static_cast(other.value_.v_handle))); + *this = other.operator ObjectRef(); break; } default: { - SwitchToPOD(other.type_code()); - value_ = other.value_; + if (other.type_code() < kExtBegin) { + SwitchToPOD(other.type_code()); + value_ = other.value_; + } else { +#if TVM_RUNTIME_HEADER_ONLY + LOG(FATAL) << "Header only mode do not support ext type"; +#else + this->Clear(); + type_code_ = other.type_code(); + value_.v_handle = + (*(ExtTypeVTable::Get(other.type_code())->clone))( + other.value().v_handle); +#endif + } break; } } @@ -861,7 +918,7 @@ class TVMRetValue : public TVMPODValue_ { case kStr: delete ptr(); break; case kFuncHandle: delete ptr(); break; case kNDArrayContainer: { - NDArray::FFIDecRef(static_cast(value_.v_handle)); + static_cast(value_.v_handle)->DecRef(); break; } case kModuleHandle: { @@ -873,6 +930,13 @@ class TVMRetValue : public TVMPODValue_ { break; } } + if (type_code_ > kExtBegin) { +#if TVM_RUNTIME_HEADER_ONLY + LOG(FATAL) << "Header only mode do not support ext type"; +#else + (*(ExtTypeVTable::Get(type_code_)->destroy))(value_.v_handle); +#endif + } type_code_ = kNull; } }; @@ -893,7 +957,7 @@ inline const char* TypeCode2Str(int type_code) { case kFuncHandle: return "FunctionHandle"; case kModuleHandle: return "ModuleHandle"; case kNDArrayContainer: return "NDArrayContainer"; - case kObjectHandle: return "Object"; + case kObjectHandle: return "ObjectCell"; default: LOG(FATAL) << "unknown type_code=" << static_cast(type_code); return ""; } @@ -917,10 +981,6 @@ inline std::ostream& operator<<(std::ostream& os, TVMType t) { // NOLINT(*) return os; } -inline std::ostream& operator<<(std::ostream& os, const DataType& dtype) { // NOLINT(*) - return os << dtype.operator DLDataType(); -} - #endif inline std::string TVMType2String(TVMType t) { @@ -1080,31 +1140,50 @@ class TVMArgsSetter { values_[i].v_type = value; type_codes_[i] = kTVMType; } - void operator()(size_t i, DataType dtype) const { - operator()(i, dtype.operator DLDataType()); - } void operator()(size_t i, const char* value) const { values_[i].v_str = value; type_codes_[i] = kStr; } - // setters for container types - void operator()(size_t i, const std::string& value) const { + // setters for container type + // They must be reference(instead of const ref) + // to make sure they are alive in the tuple(instead of getting converted) + void operator()(size_t i, const std::string& value) const { // NOLINT(*) values_[i].v_str = value.c_str(); type_codes_[i] = kStr; } - void operator()(size_t i, const TVMByteArray& value) const { + void operator()(size_t i, const TVMByteArray& value) const { // NOLINT(*) values_[i].v_handle = const_cast(&value); type_codes_[i] = kBytes; } - void operator()(size_t i, const PackedFunc& value) const { + void operator()(size_t i, const PackedFunc& value) const { // NOLINT(*) values_[i].v_handle = const_cast(&value); type_codes_[i] = kFuncHandle; } template - void operator()(size_t i, const TypedPackedFunc& value) const { + void operator()(size_t i, const TypedPackedFunc& value) const { // NOLINT(*) operator()(i, value.packed()); } - void operator()(size_t i, const TVMRetValue& value) const { + void operator()(size_t i, const Module& value) const { // NOLINT(*) + if (value.defined()) { + values_[i].v_handle = value.data_.data_; + type_codes_[i] = kModuleHandle; + } else { + type_codes_[i] = kNull; + } + } + void operator()(size_t i, const NDArray& value) const { // NOLINT(*) + values_[i].v_handle = value.data_; + type_codes_[i] = kNDArrayContainer; + } + void operator()(size_t i, const ObjectRef& value) const { // NOLINT(*) + if (value.defined()) { + values_[i].v_handle = value.data_.data_; + type_codes_[i] = kObjectHandle; + } else { + type_codes_[i] = kNull; + } + } + void operator()(size_t i, const TVMRetValue& value) const { // NOLINT(*) if (value.type_code() == kStr) { values_[i].v_str = value.ptr()->c_str(); type_codes_[i] = kStr; @@ -1114,11 +1193,12 @@ class TVMArgsSetter { type_codes_[i] = value.type_code(); } } - // ObjectRef handling - template::value>::type> - inline void operator()(size_t i, const TObjectRef& value) const; + extension_type_info::code != 0>::type> + inline void operator()(size_t i, const T& value) const; + inline void operator()(size_t i, const tvm::DataType& t) const; private: /*! \brief The values fields */ @@ -1230,131 +1310,78 @@ inline R TypedPackedFunc::operator()(Args... args) const { ::run(packed_, std::forward(args)...); } -// ObjectRef related conversion handling -// Object can have three possible type codes: -// kNDArrayContainer, kModuleHandle, kObjectHandle -// -// We use type traits to eliminate un-necessary checks. -template -inline void TVMArgsSetter::operator()(size_t i, const TObjectRef& value) const { - if (value.defined()) { - Object* ptr = value.data_.data_; - if (std::is_base_of::value || - (std::is_base_of::value && - ptr->IsInstance())) { - values_[i].v_handle = NDArray::FFIGetHandle(value); - type_codes_[i] = kNDArrayContainer; - } else if (std::is_base_of::value || - (std::is_base_of::value && - ptr->IsInstance())) { - values_[i].v_handle = ptr; - type_codes_[i] = kModuleHandle; - } else { - values_[i].v_handle = ptr; - type_codes_[i] = kObjectHandle; - } - } else { - type_codes_[i] = kNull; +// extension and node type handling +namespace detail { +template +struct TVMValueCast { + static T Apply(const TSrc* self) { + static_assert(!is_ext && !is_nd, "The default case accepts only non-extensions"); + return self->template AsObjectRef(); } -} - -template -inline bool TVMPODValue_::IsObjectRef() const { - using ContainerType = typename TObjectRef::ContainerType; - // NOTE: the following code can be optimized by constant folding. - if (std::is_base_of::value) { - return type_code_ == kNDArrayContainer && - TVMArrayHandleToObjectHandle( - static_cast(value_.v_handle))->IsInstance(); - } - if (std::is_base_of::value) { - return type_code_ == kModuleHandle && - static_cast(value_.v_handle)->IsInstance(); - } - return - (std::is_base_of::value && type_code_ == kNDArrayContainer) || - (std::is_base_of::value && type_code_ == kModuleHandle) || - (type_code_ == kObjectHandle && - ObjectTypeChecker::Check(static_cast(value_.v_handle))); -} +}; -template -inline TObjectRef TVMPODValue_::AsObjectRef() const { - static_assert( - std::is_base_of::value, - "Conversion only works for ObjectRef"); - using ContainerType = typename TObjectRef::ContainerType; - if (type_code_ == kNull) return TObjectRef(ObjectPtr(nullptr)); - // NOTE: the following code can be optimized by constant folding. - if (std::is_base_of::value) { - // Casting to a sub-class of NDArray - TVM_CHECK_TYPE_CODE(type_code_, kNDArrayContainer); - ObjectPtr data = NDArray::FFIDataFromHandle( - static_cast(value_.v_handle)); - CHECK(data->IsInstance()) - << "Expect " << ContainerType::_type_key << " but get " << data->GetTypeKey(); - return TObjectRef(data); - } - if (std::is_base_of::value) { - // Casting to a sub-class of Module - TVM_CHECK_TYPE_CODE(type_code_, kModuleHandle); - ObjectPtr data = GetObjectPtr(static_cast(value_.v_handle)); - CHECK(data->IsInstance()) - << "Expect " << ContainerType::_type_key << " but get " << data->GetTypeKey(); - return TObjectRef(data); - } - if (type_code_ == kObjectHandle) { - // normal object type check. - Object* ptr = static_cast(value_.v_handle); - CHECK(ObjectTypeChecker::Check(ptr)) - << "Expect " << ObjectTypeChecker::TypeName() - << " but get " << ptr->GetTypeKey(); - return TObjectRef(GetObjectPtr(ptr)); - } else if (std::is_base_of::value && - type_code_ == kNDArrayContainer) { - // Casting to a base class that NDArray can sub-class - ObjectPtr data = NDArray::FFIDataFromHandle( - static_cast(value_.v_handle)); - return TObjectRef(data); - } else if (std::is_base_of::value && - type_code_ == kModuleHandle) { - // Casting to a base class that Module can sub-class - return TObjectRef(GetObjectPtr(static_cast(value_.v_handle))); - } else { - TVM_CHECK_TYPE_CODE(type_code_, kObjectHandle); - return TObjectRef(ObjectPtr(nullptr)); +template +struct TVMValueCast { + static T Apply(const TSrc* self) { + return self->template AsExtension(); } -} +}; -template -inline TVMRetValue& TVMRetValue::operator=(TObjectRef other) { - const Object* ptr = other.get(); - if (ptr != nullptr) { - if (std::is_base_of::value || - (std::is_base_of::value && - ptr->IsInstance())) { - return operator=(NDArray(std::move(other.data_))); - } - if (std::is_base_of::value || - (std::is_base_of::value && - ptr->IsInstance())) { - return operator=(Module(std::move(other.data_))); - } - SwitchToObject(kObjectHandle, std::move(other.data_)); - } else { - SwitchToPOD(kNull); +template +struct TVMValueCast { + static T Apply(const TSrc* self) { + return self->template AsNDArray(); } - return *this; -} +}; + +} // namespace detail template inline TVMArgValue::operator T() const { - return AsObjectRef(); + return detail:: + TVMValueCast::code != 0), + (array_type_info::code > 0)> + ::Apply(this); } template inline TVMRetValue::operator T() const { - return AsObjectRef(); + return detail:: + TVMValueCast::code != 0), + (array_type_info::code > 0)> + ::Apply(this); +} + +template +inline void TVMArgsSetter::operator()(size_t i, const T& value) const { + static_assert(extension_type_info::code != 0, + "Need to have extesion code"); + type_codes_[i] = extension_type_info::code; + values_[i].v_handle = const_cast(&value); +} + +// extension type handling +template +struct ExtTypeInfo { + static void destroy(void* handle) { + delete static_cast(handle); + } + static void* clone(void* handle) { + return new T(*static_cast(handle)); + } +}; + +template +inline ExtTypeVTable* ExtTypeVTable::Register_() { + const int code = extension_type_info::code; + static_assert(code != 0, + "require extension_type_info traits to be declared with non-zero code"); + ExtTypeVTable vt; + vt.clone = ExtTypeInfo::clone; + vt.destroy = ExtTypeInfo::destroy; + return ExtTypeVTable::RegisterInternal(code, vt); } inline PackedFunc Module::GetFunction(const std::string& name, bool query_imports) { diff --git a/include/tvm/runtime/registry.h b/include/tvm/runtime/registry.h index e51b806ea81f..d668984f50e2 100644 --- a/include/tvm/runtime/registry.h +++ b/include/tvm/runtime/registry.h @@ -43,9 +43,9 @@ #ifndef TVM_RUNTIME_REGISTRY_H_ #define TVM_RUNTIME_REGISTRY_H_ -#include #include #include +#include "packed_func.h" namespace tvm { namespace runtime { @@ -283,9 +283,22 @@ class Registry { friend struct Manager; }; +/*! \brief helper macro to supress unused warning */ +#if defined(__GNUC__) +#define TVM_ATTRIBUTE_UNUSED __attribute__((unused)) +#else +#define TVM_ATTRIBUTE_UNUSED +#endif + +#define TVM_STR_CONCAT_(__x, __y) __x##__y +#define TVM_STR_CONCAT(__x, __y) TVM_STR_CONCAT_(__x, __y) + #define TVM_FUNC_REG_VAR_DEF \ static TVM_ATTRIBUTE_UNUSED ::tvm::runtime::Registry& __mk_ ## TVM +#define TVM_TYPE_REG_VAR_DEF \ + static TVM_ATTRIBUTE_UNUSED ::tvm::runtime::ExtTypeVTable* __mk_ ## TVMT + /*! * \brief Register a function globally. * \code @@ -298,6 +311,15 @@ class Registry { TVM_STR_CONCAT(TVM_FUNC_REG_VAR_DEF, __COUNTER__) = \ ::tvm::runtime::Registry::Register(OpName) +/*! + * \brief Macro to register extension type. + * This must be registered in a cc file + * after the trait extension_type_info is defined. + */ +#define TVM_REGISTER_EXT_TYPE(T) \ + TVM_STR_CONCAT(TVM_TYPE_REG_VAR_DEF, __COUNTER__) = \ + ::tvm::runtime::ExtTypeVTable::Register_() + } // namespace runtime } // namespace tvm #endif // TVM_RUNTIME_REGISTRY_H_ diff --git a/include/tvm/tensor.h b/include/tvm/tensor.h index f44498a0aa7a..599d6ff657d1 100644 --- a/include/tvm/tensor.h +++ b/include/tvm/tensor.h @@ -163,7 +163,7 @@ class TensorNode : public Node { /*! \brief The shape of the tensor */ Array shape; /*! \brief data type in the content of the tensor */ - DataType dtype; + Type dtype; /*! \brief the source operation, can be None */ Operation op; /*! \brief the output index from source operation */ @@ -178,7 +178,7 @@ class TensorNode : public Node { v->Visit("value_index", &value_index); } TVM_DLL static Tensor make(Array shape, - DataType dtype, + Type dtype, Operation op, int value_index); diff --git a/jvm/core/src/main/java/org/apache/tvm/contrib/GraphRuntime.java b/jvm/core/src/main/java/org/apache/tvm/contrib/GraphRuntime.java index c31c67f283af..03695dc9045b 100644 --- a/jvm/core/src/main/java/org/apache/tvm/contrib/GraphRuntime.java +++ b/jvm/core/src/main/java/org/apache/tvm/contrib/GraphRuntime.java @@ -32,7 +32,7 @@ public class GraphRuntime { /** * Create a runtime executor module given a graph and module. - * @param graphJson The graph deployed in json format output by compiler. + * @param graphJson The graph deployed in json format output by nnvm graph. * @param libmod The module of the corresponding function. * @param ctx The local or remote context to deploy the module. * @return Runtime graph module that can be used to execute the graph. diff --git a/neo-tools/sync-with-dmlc.py b/neo-tools/sync-with-dmlc.py index 74881eacdf39..a1967444ffb6 100755 --- a/neo-tools/sync-with-dmlc.py +++ b/neo-tools/sync-with-dmlc.py @@ -58,7 +58,7 @@ def main(): # Add dmlc/tvm to remote 'upstream' if not repo = Repo() - add_remote(repo, 'upstream', 'https://github.com/apache/incubator-tvm.git') + add_remote(repo, 'upstream', 'git@github.com:dmlc/tvm.git') # Fetch 'upstream' remote logging.info("Fetching remote upstrean") diff --git a/nnvm/Makefile b/nnvm/Makefile index 14af3b294e73..39763cb59db8 100644 --- a/nnvm/Makefile +++ b/nnvm/Makefile @@ -30,6 +30,7 @@ TVMPATH = .. export LDFLAGS = -pthread -lm export CFLAGS = -std=c++11 -Wall -O2 -Iinclude -fPIC +CFLAGS += -I$(TVMPATH)/include -I$(TVMPATH)/3rdparty/dlpack/include -I$(TVMPATH)/3rdparty/HalideIR/src -I$(TVMPATH)/topi/include ifdef DMLC_CORE_PATH CFLAGS += -I$(DMLC_CORE_PATH)/include @@ -65,7 +66,7 @@ else NO_WHOLE_ARCH= --no-whole-archive endif -all: lib/libnnvm.a lib/libnnvm.$(SHARED_LIBRARY_SUFFIX) +all: lib/libnnvm.a lib/libnnvm_compiler.$(SHARED_LIBRARY_SUFFIX) SRC = $(wildcard src/*.cc src/c_api/*.cc src/core/*.cc src/pass/*.cc) SRC_COMPILER = $(wildcard src/top/*/*.cc wildcard src/top/vision/*/*.cc src/compiler/*.cc src/compiler/*/*.cc) @@ -86,7 +87,7 @@ lib/libnnvm.a: $(ALL_DEP) @mkdir -p $(@D) $(AR) crv $@ $(filter %.o, $?) -lib/libnnvm.$(SHARED_LIBRARY_SUFFIX): lib/libnnvm.a ${TOP_OBJ} +lib/libnnvm_compiler.$(SHARED_LIBRARY_SUFFIX): lib/libnnvm.a ${TOP_OBJ} @mkdir -p $(@D) $(CXX) $(CFLAGS) -shared -o $@ $(filter %.o, $^) $(LDFLAGS) -Wl,${WHOLE_ARCH} lib/libnnvm.a -Wl,${NO_WHOLE_ARCH} diff --git a/nnvm/README.md b/nnvm/README.md index 54caa17e2ce3..e3b451d63dcd 100644 --- a/nnvm/README.md +++ b/nnvm/README.md @@ -15,8 +15,38 @@ -# NNVM +# NNVM Compiler Module of TVM Stack -NNVM is a graph level IR for neural networks. -We are moving towards Relay IR, a better unified IR that support wider range of programs. -Please use relay instead. +```python +import tvm +from tvm.contrib import graph_runtime, rpc +import nnvm.frontend +import nnvm.compiler + +# GET model from frameworks +# change xyz to supported framework name. +graph, params = nnvm.frontend.from_xyz(...) + +# OPTIMIZE and COMPILE the graph to get a deployable module +# target can be "opencl", "llvm", "metal" or any target supported by tvm +target = "cuda" +graph, lib, params = nnvm.compiler.build(graph, target, {"data", data_shape}, params=params) + +# DEPLOY and run on gpu(0) +module = graph_runtime.create(graph, lib, tvm.gpu(0)) +module.set_input(**params) +module.run(data=data_array) +output = tvm.nd.empty(out_shape, ctx=tvm.gpu(0)) +module.get_output(0, output) + +# DEPLOY to REMOTE mobile/rasp/browser with minimum tvm rpc runtime +# useful for quick experiments on mobile devices +remote = rpc.connect(remote_host, remote_port) +lib.export_library("mylib.so") +remote.upload("mylib.so") +rlib = rpc.load_module("mylib.so") +# run on remote device +rmodule = graph_runtime.create(graph, rlib, remote.gpu(0)) +rmodule.set_input(**params) +rmodule.run() +``` diff --git a/nnvm/include/nnvm/base.h b/nnvm/include/nnvm/base.h index 678ed4d4a942..2fd71c7d087e 100644 --- a/nnvm/include/nnvm/base.h +++ b/nnvm/include/nnvm/base.h @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -46,24 +46,6 @@ using dmlc::get; /*!\brief "unsafe" getter function of any type */ using dmlc::unsafe_get; -enum TypeFlag { - kFloat32 = 0, - kFloat64 = 1, - kFloat16 = 2, - kUint8 = 3, - kInt32 = 4, - kInt8 = 5, - kInt64 = 6, - // kBool = 7, - // 7 is reserved for kBool, in order to keep consistency with MXNet TypeFlag defined in - // https://github.com/apache/incubator-mxnet/blob/master/3rdparty/mshadow/mshadow/base.h#L314 - kInt16 = 8, - kUint16 = 9, - kUint32 = 10, - kUint64 = 11, - kBfloat16 = 12, -}; - } // namespace nnvm // describe op registration point diff --git a/nnvm/include/nnvm/compiler/op_attr_types.h b/nnvm/include/nnvm/compiler/op_attr_types.h new file mode 100644 index 000000000000..12b4415850d4 --- /dev/null +++ b/nnvm/include/nnvm/compiler/op_attr_types.h @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file nnvm/compiler/op_attr_types.h + * \brief The Expr and related elements in DataFlow construction. + */ +#ifndef NNVM_COMPILER_OP_ATTR_TYPES_H_ +#define NNVM_COMPILER_OP_ATTR_TYPES_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "packed_func_ext.h" + +namespace nnvm { +namespace compiler { + +using ::tvm::Array; +using ::tvm::Tensor; +using ::tvm::Schedule; + +/*! \brief operator pattern used in graph fusion */ +enum OpPatternKind { + // Elementwise operation + kElemWise = 0, + // Broadcasting operator, can always map output axis to the input in order. + // for example :code:`out[i, ax1, j, ax2] = input[i, j]`. + // Note that the axis need to be in order so transpose is not a bcast operator. + kBroadcast = 1, + // Injective operator, can always injectively map output axis to a single input axis. + // All injective operator can still be safely fused to injective and reduction. + kInjective = 2, + // Communicative reduction operator. + kCommReduce = 3, + // Complex operation, can still fuse elemwise operations into its output. + // but cannot chain another complex op + kOutEWiseFusable = 4, + // Opaque operation, cannot fuse anything. + kOpaque = 8 +}; + +/*! \brief the operator pattern */ +using TOpPattern = int; + +/*! + * \brief Computation description interface + * \param attrs The attribute of the node. + * \param inputs The input tensors(placeholders) + * \param out_info Tensors holding shape/type information about output, + & these are always placeholders. + * \return The output description of the tensor. + */ +using FTVMCompute = std::function< + Array(const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info)>; + +/*! + * \brief Build the computation schedule for + * op whose root is at current op. + * \param attrs The attribute of the node. + * \param outs The output tensors. + * \param target The build target. + * \return schedule The computation schedule. + */ +using FTVMSchedule = std::function< + Schedule(const NodeAttrs& attrs, + const Array& outs, + const std::string& target)>; + +/*! + * \brief Modify the op node to alter its input layout. + * it is invoked in AlterOpLayout pass. + * \param attrs The attribute of the original node. + * \param inputs The input symbols of the original node. + * \param tinfos The inferred shape and dtype of the inputs. + * \param ret The replaced operator. + * \return Whether to replace current operator. + */ +using FTVMAlterOpLayout = std::function< + bool(const NodeAttrs& attrs, + const Symbol& inputs, + const Array& tinfos, + Symbol* ret)>; + +/*! + * \brief Transform from normal operator to vectorized operator + * \param node The source node. + * \return Transformed vectorized op. + */ +using FTVMVectorizedOp = std::function; + +} // namespace compiler +} // namespace nnvm +#endif // NNVM_COMPILER_OP_ATTR_TYPES_H_ diff --git a/nnvm/include/nnvm/compiler/packed_func_ext.h b/nnvm/include/nnvm/compiler/packed_func_ext.h new file mode 100644 index 000000000000..67a43a7b4104 --- /dev/null +++ b/nnvm/include/nnvm/compiler/packed_func_ext.h @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file nnvm/compiler/packed_func_ext.h + * \brief Extension to enable packed functionn for nnvm types + */ +#ifndef NNVM_COMPILER_PACKED_FUNC_EXT_H_ +#define NNVM_COMPILER_PACKED_FUNC_EXT_H_ + +#include +#include +#include +#include +#include +#include +#include + +namespace nnvm { +namespace compiler { + +using tvm::runtime::PackedFunc; + +using AttrDict = std::unordered_map; + +/*! + * \brief Get PackedFunction from global registry and + * report error if it does not exist + * \param name The name of the function. + * \return The created PackedFunc. + */ +inline const PackedFunc& GetPackedFunc(const std::string& name) { + const PackedFunc* pf = tvm::runtime::Registry::Get(name); + CHECK(pf != nullptr) << "Cannot find function " << name << " in registry"; + return *pf; +} +} // namespace compiler +} // namespace nnvm + +// Enable the graph and symbol object exchange. +namespace tvm { +namespace runtime { + +template<> +struct extension_type_info { + static const int code = 16; +}; + +template<> +struct extension_type_info { + static const int code = 17; +}; + +template<> +struct extension_type_info { + static const int code = 18; +}; + +} // namespace runtime +} // namespace tvm +#endif // NNVM_COMPILER_PACKED_FUNC_EXT_H_ diff --git a/nnvm/include/nnvm/compiler/util.h b/nnvm/include/nnvm/compiler/util.h new file mode 100644 index 000000000000..f108ff131d66 --- /dev/null +++ b/nnvm/include/nnvm/compiler/util.h @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! +* \file nnvm/compiler/util.h +* \brief Utility functions for nnvm compiler +*/ +#ifndef NNVM_COMPILER_UTIL_H_ +#define NNVM_COMPILER_UTIL_H_ + +#include +#include + +namespace nnvm { +namespace compiler { + +/* + * \brief Helper function to convert TShape to TVM array. Useful for + * passing data from NNVM param structures to TOPI ops. + * + * \param shape The shape to convert + * + * \return An Array of Expr, where each element is a constant int32 + */ +inline tvm::Array ShapeToArray(TShape shape) { + tvm::Array result; + for (auto i : shape) { + result.push_back(tvm::make_const(tvm::Int(32), i)); + } + return result; +} + +/* + * \brief Helper function to convert TShape to TVM array. Useful for + * passing data from NNVM param structures to TOPI ops. + * + * \param shape The shape to convert + * + * \return An Array of Expr, where each element is a constant int32 + */ +inline tvm::Array ShapeToIntArray(TShape shape) { + return tvm::Downcast >(ShapeToArray(shape)); +} +} // namespace compiler +} // namespace nnvm +#endif // NNVM_COMPILER_UTIL_H_ diff --git a/nnvm/include/nnvm/top/README b/nnvm/include/nnvm/top/README new file mode 100644 index 000000000000..09a4d6fc387f --- /dev/null +++ b/nnvm/include/nnvm/top/README @@ -0,0 +1 @@ +NNVM Core Operator and Compiler diff --git a/nnvm/include/nnvm/top/nn.h b/nnvm/include/nnvm/top/nn.h new file mode 100644 index 000000000000..f2a3e81472e1 --- /dev/null +++ b/nnvm/include/nnvm/top/nn.h @@ -0,0 +1,555 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file nnvm/top/nn.h + * \brief Auxiliary param for tensor primitive. + */ +#ifndef NNVM_TOP_NN_H_ +#define NNVM_TOP_NN_H_ + +#include +#include +#include +#include +#include +#include "tensor.h" + +namespace nnvm { +namespace top { + +struct DenseParam : public dmlc::Parameter { + int units; + bool use_bias; + + DMLC_DECLARE_PARAMETER(DenseParam) { + DMLC_DECLARE_FIELD(units).set_lower_bound(1) + .describe("Number of hidden units of the dense transformation."); + DMLC_DECLARE_FIELD(use_bias).set_default(true) + .describe("Whether to use bias parameter"); + } + // constants + static const constexpr int kData = 0; + static const constexpr int kWeight = 1; + static const constexpr int kBias = 2; +}; + +struct DropoutParam : public dmlc::Parameter { + float rate; + + DMLC_DECLARE_PARAMETER(DropoutParam) { + DMLC_DECLARE_FIELD(rate).set_default(0.5) + .set_range(0, 1) + .describe("Fraction of the input that gets dropped out during training time."); + } +}; + +struct BatchNormParam : public dmlc::Parameter { + int axis; + double epsilon; + double momentum; + bool center; + bool scale; + + DMLC_DECLARE_PARAMETER(BatchNormParam) { + DMLC_DECLARE_FIELD(axis).set_default(1) + .describe("Specify which shape axis the channel is specified."); + DMLC_DECLARE_FIELD(epsilon).set_default(1e-5) + .describe("Small float added to variance to avoid dividing by zero."); + DMLC_DECLARE_FIELD(center).set_default(true) + .describe("If True, add offset of `beta` to normalized tensor." + "If False, `beta` is ignored."); + DMLC_DECLARE_FIELD(scale).set_default(true) + .describe("If True, multiply by `gamma`. If False, `gamma` is not used." + "When the next layer is piecewise linear (also e.g. `nn.relu`)," + "this can be disabled since the scaling" + "will be done by the next layer."); + } + // constants + static const constexpr int kData = 0; + static const constexpr int kGamma = 1; + static const constexpr int kBeta = 2; + static const constexpr int kMovingMean = 3; + static const constexpr int kMovingVariance = 4; +}; + + +// Shared by softmax and log_softmax +struct SoftmaxParam : public dmlc::Parameter { + int axis; + + DMLC_DECLARE_PARAMETER(SoftmaxParam) { + DMLC_DECLARE_FIELD(axis).set_default(-1) + .describe("The axis to sum over when computing softmax."); + } +}; + +struct LeakyReLUParam : public dmlc::Parameter { + double alpha; + + DMLC_DECLARE_PARAMETER(LeakyReLUParam) { + DMLC_DECLARE_FIELD(alpha).set_lower_bound(0.0).set_default(0.25) + .describe("slope coefficient for the negative half axis."); + } +}; + +struct PReLUParam : public dmlc::Parameter { + int axis; + DMLC_DECLARE_PARAMETER(PReLUParam) { + DMLC_DECLARE_FIELD(axis).set_default(1) + .describe("Specify which shape axis the channel is specified."); + } +}; + +struct PadParam : public dmlc::Parameter { + float pad_value; + Tuple > pad_width; + + DMLC_DECLARE_PARAMETER(PadParam) { + DMLC_DECLARE_FIELD(pad_value).set_default(0.0) + .describe("The value to be padded."); + DMLC_DECLARE_FIELD(pad_width) + .describe("Number of values padded to the edges of each axis, " + "in the format of ((before_1, after_1), ... (before_N, after_N))"); + } +}; + + +struct Conv2DParam : public dmlc::Parameter { + int channels; + TShape kernel_size; + TShape strides; + TShape padding; + TShape dilation; + int groups; + std::string layout; + std::string kernel_layout; + std::string out_layout; + int out_dtype; + bool use_bias; + + DMLC_DECLARE_PARAMETER(Conv2DParam) { + DMLC_DECLARE_FIELD(channels) + .describe("The dimensionality of the output space" + "i.e. the number of output channels in the convolution."); + DMLC_DECLARE_FIELD(kernel_size) + .describe("Specifies the dimensions of the convolution window."); + DMLC_DECLARE_FIELD(strides).set_default(TShape({1, 1})) + .describe("Specifies the strides of the convolution."); + DMLC_DECLARE_FIELD(padding).set_default(TShape({0, 0})) + .describe("If padding is non-zero, then the input is implicitly zero-padded" + "on both sides for padding number of points"); + DMLC_DECLARE_FIELD(dilation).set_default(TShape({1, 1})) + .describe("Specifies the dilation rate to use for dilated convolution."); + DMLC_DECLARE_FIELD(groups).set_default(1) + .describe("Controls the connections between inputs and outputs." + "At groups=1, all inputs are convolved to all outputs." + "At groups=2, the operation becomes equivalent to having two convolution" + "layers side by side, each seeing half the input channels, and producing" + "half the output channels, and both subsequently concatenated."); + DMLC_DECLARE_FIELD(layout).set_default("NCHW") + .describe("Dimension ordering of input data. Can be 'NCHW', 'NHWC', etc." + "'N', 'C', 'H', 'W' stands for batch, channel, height, and width" + "dimensions respectively. Convolution is applied on the 'H' and" + "'W' dimensions."); + DMLC_DECLARE_FIELD(out_layout).set_default("__undef__") + .describe("Dimension ordering of output. Can be 'NCHW', 'NHWC', etc." + "'N', 'C', 'H', 'W' stands for batch, channel, height, and width" + "dimensions respectively. Default to be same as input layout."); + DMLC_DECLARE_FIELD(kernel_layout).set_default("OIHW") + .describe("Dimension ordering of weight. Can be 'OIHW', 'OIHW16o16i', etc." + "'O', 'I', 'H', 'W' stands for num_filter, input_channel, height, and width" + "dimensions respectively."); + DMLC_DECLARE_DTYPE_FIELD(out_dtype) + .add_enum("same", -1) + .set_default(-1) + .describe("Output data type, set to explicit type under mixed precision setting"); + + DMLC_DECLARE_FIELD(use_bias).set_default(true) + .describe("Whether the layer uses a bias vector."); + } + // constants + static const constexpr int kData = 0; + static const constexpr int kWeight = 1; + static const constexpr int kBias = 2; +}; + +struct WinogradWeightTransformParam : public dmlc::Parameter { + int tile_size; + + DMLC_DECLARE_PARAMETER(WinogradWeightTransformParam) { + DMLC_DECLARE_FIELD(tile_size) + .describe("Tile size of winograd. E.g. 2 for F(2x2, 3x3) and 4 for F(4x4, 3x3)"); + } + + static const constexpr int kWeight = 0; +}; + +struct WinogradNNPACKWeightTransformParam + : public dmlc::Parameter { + int convolution_algorithm; + int out_dtype; + + DMLC_DECLARE_PARAMETER(WinogradNNPACKWeightTransformParam) { + DMLC_DECLARE_FIELD(convolution_algorithm) + .describe( + "The convolution algorithm for Winograd NNPACK. " + "E.g. tvm.contrib.nnpack.ConvolutionAlgorithm.WT_8x8 for WT_8x8, " + "tvm.contrib.nnpack.ConvolutionAlgorithm.WT_8x8_FP16 for WT_8x8_FP16"); + DMLC_DECLARE_DTYPE_FIELD(out_dtype) + .add_enum("same", -1) + .set_default(-1) + .describe("Output data type, set to explicit type under mixed precision setting"); + } + + static const constexpr int kWeight = 0; +}; + +struct WinogradConv2DParam : public dmlc::Parameter { + int channels; + TShape kernel_size; + TShape strides; + TShape padding; + TShape dilation; + int groups; + std::string layout; + std::string kernel_layout; + std::string out_layout; + int out_dtype; + bool use_bias; + int tile_size; + + DMLC_DECLARE_PARAMETER(WinogradConv2DParam) { + DMLC_DECLARE_FIELD(channels) + .describe("The dimensionality of the output space" + "i.e. the number of output channels in the convolution."); + DMLC_DECLARE_FIELD(kernel_size) + .describe("Specifies the dimensions of the convolution window."); + DMLC_DECLARE_FIELD(strides).set_default(TShape({1, 1})) + .describe("Specifies the strides of the convolution."); + DMLC_DECLARE_FIELD(padding).set_default(TShape({0, 0})) + .describe("If padding is non-zero, then the input is implicitly zero-padded" + "on both sides for padding number of points"); + DMLC_DECLARE_FIELD(dilation).set_default(TShape({1, 1})) + .describe("Specifies the dilation rate to use for dilated convolution."); + DMLC_DECLARE_FIELD(groups).set_default(1) + .describe("Controls the connections between inputs and outputs." + "At groups=1, all inputs are convolved to all outputs." + "At groups=2, the operation becomes equivalent to having two convolution" + "layers side by side, each seeing half the input channels, and producing" + "half the output channels, and both subsequently concatenated."); + DMLC_DECLARE_FIELD(layout).set_default("NCHW") + .describe("Dimension ordering of input data. Can be 'NCHW', 'NHWC', etc." + "'N', 'C', 'H', 'W' stands for batch, channel, height, and width" + "dimensions respectively. Convolution is applied on the 'H' and" + "'W' dimensions."); + DMLC_DECLARE_FIELD(out_layout).set_default("__undef__") + .describe("Dimension ordering of output. Can be 'NCHW', 'NHWC', etc." + "'N', 'C', 'H', 'W' stands for batch, channel, height, and width" + "dimensions respectively. Default to be same as input layout."); + DMLC_DECLARE_FIELD(kernel_layout).set_default("OIHW") + .describe("Dimension ordering of weight. Can be 'OIHW', 'OIHW16o16i', etc." + "'O', 'I', 'H', 'W' stands for num_filter, input_channel, height, and width" + "dimensions respectively."); + DMLC_DECLARE_DTYPE_FIELD(out_dtype) + .add_enum("same", -1) + .set_default(-1) + .describe("Output data type, set to explicit type under mixed precision setting"); + DMLC_DECLARE_FIELD(use_bias).set_default(true) + .describe("Whether the layer uses a bias vector."); + DMLC_DECLARE_FIELD(tile_size) + .describe("Tile size of winograd. E.g. 2 for F(2x2, 3x3) and 4 for F(4x4, 3x3)"); + } + // constants + static const constexpr int kData = 0; + static const constexpr int kWeight = 1; + static const constexpr int kBias = 2; +}; + +struct Conv2DTransposeParam : public dmlc::Parameter { + int channels; + TShape kernel_size; + TShape strides; + TShape padding; + TShape output_padding; + TShape dilation; + int groups; + std::string layout; + std::string kernel_layout; + int out_dtype; + bool use_bias; + + DMLC_DECLARE_PARAMETER(Conv2DTransposeParam) { + DMLC_DECLARE_FIELD(channels) + .describe("The dimensionality of the output space" + "i.e. the number of output channels in the convolution."); + DMLC_DECLARE_FIELD(kernel_size) + .describe("Specifies the dimensions of the convolution window."); + DMLC_DECLARE_FIELD(strides).set_default(TShape({1, 1})) + .describe("Specifies the strides of the convolution."); + DMLC_DECLARE_FIELD(output_padding).set_default(TShape({0, 0})) + .describe("Zero-padding added to one side of the output."); + DMLC_DECLARE_FIELD(padding).set_default(TShape({0, 0})) + .describe("If padding is non-zero, then the input is implicitly zero-padded" + "on both sides for padding number of points"); + DMLC_DECLARE_FIELD(dilation).set_default(TShape({1, 1})) + .describe("Specifies the dilation rate to use for dilated convolution."); + DMLC_DECLARE_FIELD(groups).set_default(1) + .describe("Controls the connections between inputs and outputs." + "At groups=1, all inputs are convolved to all outputs." + "At groups=2, the operation becomes equivalent to having two convolution" + "layers side by side, each seeing half the input channels, and producing" + "half the output channels, and both subsequently concatenated."); + DMLC_DECLARE_FIELD(layout).set_default("NCHW") + .describe("Dimension ordering of data. Can be 'NCHW', 'NHWC', etc." + "'N', 'C', 'H', 'W' stands for batch, channel, height, and width" + "dimensions respectively. Convolution is applied on the 'H' and" + "'W' dimensions."); + DMLC_DECLARE_FIELD(kernel_layout).set_default("OIHW") + .describe("Dimension ordering of data and weight. Can be 'OIHW', 'OIHW16o16i', etc." + "'O', 'I', 'H', 'W' stands for num_filter, input_channel, height, and width" + "dimensions respectively."); + DMLC_DECLARE_DTYPE_FIELD(out_dtype) + .add_enum("same", -1) + .set_default(-1) + .describe("Output data type, set to explicit type under mixed precision setting"); + DMLC_DECLARE_FIELD(use_bias).set_default(true) + .describe("Whether the layer uses a bias vector."); + } + // constants + static const constexpr int kData = 0; + static const constexpr int kWeight = 1; + static const constexpr int kBias = 2; +}; + + +struct MaxPool2DParam : public dmlc::Parameter { + TShape pool_size; + TShape strides; + TShape padding; + std::string layout; + bool ceil_mode; + + DMLC_DECLARE_PARAMETER(MaxPool2DParam) { + DMLC_DECLARE_FIELD(pool_size) + .describe("Size of the pooling windows.."); + DMLC_DECLARE_FIELD(strides).set_default(TShape({1, 1})) + .describe("Specifies the strides of the convolution."); + DMLC_DECLARE_FIELD(padding).set_default(TShape({0, 0})) + .describe("If padding is non-zero, then the input is implicitly zero-padded" + "Padding support both symmetric and asymmetric as" + "one int : same padding used on all sides" + "two int : bottom, right will use same padding as top, left" + "four int : padding width in the order of (top, left, bottom, right)"); + DMLC_DECLARE_FIELD(layout).set_default("NCHW") + .describe("Dimension ordering of data and weight. Can be 'NCHW', 'NHWC', etc." + "'N', 'C', 'H', 'W' stands for batch, channel, height, and width" + "dimensions respectively. Convolution is applied on the 'H' and" + "'W' dimensions."); + DMLC_DECLARE_FIELD(ceil_mode).set_default(false) + .describe("When true, will use ceil instead of floor to compute the output shape."); + } +}; + + +struct AvgPool2DParam : public dmlc::Parameter { + TShape pool_size; + TShape strides; + TShape padding; + std::string layout; + bool ceil_mode; + bool count_include_pad; + + DMLC_DECLARE_PARAMETER(AvgPool2DParam) { + DMLC_DECLARE_FIELD(pool_size) + .describe("Size of the pooling windows.."); + DMLC_DECLARE_FIELD(strides).set_default(TShape({1, 1})) + .describe("Specifies the strides of the convolution."); + DMLC_DECLARE_FIELD(padding).set_default(TShape({0, 0})) + .describe("If padding is non-zero, then the input is implicitly zero-padded" + "Padding support both symmetric and asymmetric as" + "one int : same padding used on all sides" + "two int : bottom, right will use same padding as top, left" + "four int : padding width in the order of (top, left, bottom, right)"); + DMLC_DECLARE_FIELD(layout).set_default("NCHW") + .describe("Dimension ordering of data and weight. Can be 'NCHW', 'NHWC', etc." + "'N', 'C', 'H', 'W' stands for batch, channel, height, and width" + "dimensions respectively. Convolution is applied on the 'H' and" + "'W' dimensions."); + DMLC_DECLARE_FIELD(ceil_mode).set_default(false) + .describe("When true, will use ceil instead of floor to compute the output shape."); + DMLC_DECLARE_FIELD(count_include_pad).set_default(false) + .describe("When true, will include padding to compute the average"); + } +}; + + +struct GlobalPool2DParam : public dmlc::Parameter { + std::string layout; + + DMLC_DECLARE_PARAMETER(GlobalPool2DParam) { + DMLC_DECLARE_FIELD(layout).set_default("NCHW") + .describe("Dimension ordering of data and weight. Can be 'NCHW', 'NHWC', etc." + "'N', 'C', 'H', 'W' stands for batch, channel, height, and width" + "dimensions respectively. Convolution is applied on the 'H' and" + "'W' dimensions."); + } +}; + +struct UpSamplingParam : public dmlc::Parameter { + int scale; + std::string layout; + std::string method; + + DMLC_DECLARE_PARAMETER(UpSamplingParam) { + DMLC_DECLARE_FIELD(scale) + .describe("upsampling scaling factor"); + DMLC_DECLARE_FIELD(layout) + .set_default("NCHW") + .describe("Dimension ordering of data. Can be 'NCHW', 'NHWC', etc." + "'N', 'C', 'H', 'W' stands for batch, channel, height, and width" + "dimensions respectively. Upsampling is applied on the 'H' and" + "'W' dimensions."); + DMLC_DECLARE_FIELD(method) + .set_default("NEAREST_NEIGHBOR") + .describe("Specify the mode to use for scaling." + "NEAREST_NEIGHBOR - Nearest Neighbor" + "BILINEAR - Bilinear Interpolation"); + } +}; + +struct LayoutTransformParam : public dmlc::Parameter { + std::string src_layout; + std::string dst_layout; + + DMLC_DECLARE_PARAMETER(LayoutTransformParam) { + DMLC_DECLARE_FIELD(src_layout).set_default("__undef__") + .describe("Dimension ordering of data"); + DMLC_DECLARE_FIELD(dst_layout).set_default("__undef__") + .describe("Dimension ordering of data."); + } +}; + +struct MultiBoxPriorParam : public dmlc::Parameter { + Tuple sizes; + Tuple ratios; + Tuple steps; + Tuple offsets; + bool clip; + + DMLC_DECLARE_PARAMETER(MultiBoxPriorParam) { + DMLC_DECLARE_FIELD(sizes).set_default(Tuple({1.0})) + .describe("List of sizes of generated MultiBoxPriores."); + DMLC_DECLARE_FIELD(ratios).set_default(Tuple({1.0})) + .describe("List of aspect ratios of generated MultiBoxPriores."); + DMLC_DECLARE_FIELD(steps).set_default(Tuple({-1.0, -1.0})) + .describe("Priorbox step across y and x, -1 for auto calculation."); + DMLC_DECLARE_FIELD(offsets).set_default(Tuple({0.5, 0.5})) + .describe("Priorbox center offsets, y and x respectively."); + DMLC_DECLARE_FIELD(clip).set_default(false) + .describe("Whether to clip out-of-boundary boxes."); + } +}; + +struct MultiBoxTransformLocParam : public dmlc::Parameter { + bool clip; + float threshold; + Tuple variances; + DMLC_DECLARE_PARAMETER(MultiBoxTransformLocParam) { + DMLC_DECLARE_FIELD(clip).set_default(true) + .describe("Clip out-of-boundary boxes."); + DMLC_DECLARE_FIELD(threshold).set_default(0.01) + .describe("Threshold to be a positive prediction."); + DMLC_DECLARE_FIELD(variances).set_default(Tuple({0.1f, 0.1f, 0.2f, 0.2f})) + .describe("Variances to be decoded from box regression output."); + } +}; + +struct NonMaximumSuppressionParam : public dmlc::Parameter { + bool return_indices; + float iou_threshold; + bool force_suppress; + int top_k; + int id_index; + int coord_start; + int score_index; + int max_output_size; + bool invalid_to_bottom; + DMLC_DECLARE_PARAMETER(NonMaximumSuppressionParam) { + DMLC_DECLARE_FIELD(max_output_size).set_default(-1) + .describe("Max number of output valid boxes for each instance." + "By default all valid boxes are returned."); + DMLC_DECLARE_FIELD(iou_threshold).set_default(0.5) + .describe("Non-maximum suppression threshold."); + DMLC_DECLARE_FIELD(force_suppress).set_default(false) + .describe("Suppress all detections regardless of class_id."); + DMLC_DECLARE_FIELD(top_k).set_default(-1) + .describe("Keep maximum top k detections before nms, -1 for no limit."); + DMLC_DECLARE_FIELD(coord_start).set_default(2) + .describe("Start index of the consecutive 4 coordinates."); + DMLC_DECLARE_FIELD(score_index).set_default(1) + .describe("Index of the scores/confidence of boxes."); + DMLC_DECLARE_FIELD(id_index).set_default(0) + .describe("Axis index of id."); + DMLC_DECLARE_FIELD(return_indices).set_default(true) + .describe("Whether to return box indices in input data."); + DMLC_DECLARE_FIELD(invalid_to_bottom).set_default(false) + .describe("Whether to move all invalid bounding boxes to the bottom."); + } +}; + +struct LRNParam : public dmlc::Parameter { + int size; + int axis; + float alpha; + float beta; + float bias; + + DMLC_DECLARE_PARAMETER(LRNParam) { + DMLC_DECLARE_FIELD(size) + .describe("The size of the local region to be considered for normalization."); + DMLC_DECLARE_FIELD(axis) + .describe("input data layout channel axis"); + DMLC_DECLARE_FIELD(alpha) + .describe("The scaling parameter."); + DMLC_DECLARE_FIELD(beta) + .describe("The exponent parameter."); + DMLC_DECLARE_FIELD(bias) + .describe("The offset parameter."); + } + // constants + static const constexpr int kData = 0; +}; + +struct L2NormalizeParam : public dmlc::Parameter { + float eps; + Tuple axis; + + DMLC_DECLARE_PARAMETER(L2NormalizeParam) { + DMLC_DECLARE_FIELD(eps) + .describe("float type epsilon value."); + DMLC_DECLARE_FIELD(axis) + .describe("axis over the normalization applied"); + } +}; + +} // namespace top +} // namespace nnvm + +#endif // NNVM_TOP_NN_H_ diff --git a/nnvm/include/nnvm/top/tensor.h b/nnvm/include/nnvm/top/tensor.h index b8e245dbdfe2..f2dc1b6c8b01 100644 --- a/nnvm/include/nnvm/top/tensor.h +++ b/nnvm/include/nnvm/top/tensor.h @@ -100,14 +100,10 @@ enum TypeFlag { kInt32 = 4, kInt8 = 5, kInt64 = 6, - // kBool = 7, - // 7 is reserved for kBool, in order to keep consistency with MXNet TypeFlag defined in - // https://github.com/apache/incubator-mxnet/blob/master/3rdparty/mshadow/mshadow/base.h#L314 - kInt16 = 8, - kUint16 = 9, - kUint32 = 10, - kUint64 = 11, - kBfloat16 = 12, + kInt16 = 7, + kUint16 = 8, + kUint32 = 9, + kUint64 = 10, }; enum IndicatorRuleFlag { @@ -129,8 +125,7 @@ enum IndicatorRuleFlag { .add_enum("int8", kInt8) \ .add_enum("int16", kInt16) \ .add_enum("int32", kInt32) \ - .add_enum("int64", kInt64) \ - .add_enum("bfloat16", kBfloat16) + .add_enum("int64", kInt64) struct CastParam : public dmlc::Parameter { int dtype; diff --git a/nnvm/python/.gitignore b/nnvm/python/.gitignore new file mode 100644 index 000000000000..40d7cb4cc13a --- /dev/null +++ b/nnvm/python/.gitignore @@ -0,0 +1,2 @@ +*.c +*.cpp diff --git a/nnvm/python/nnvm/__init__.py b/nnvm/python/nnvm/__init__.py new file mode 100644 index 000000000000..450058449e3a --- /dev/null +++ b/nnvm/python/nnvm/__init__.py @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +#!/usr/bin/env python +# coding: utf-8 +"""NNVM python API for ease of use and help new framework establish python API. """ +from __future__ import absolute_import as _abs +import warnings + +from . import _base +from . import symbol as sym +from . import symbol +from ._base import NNVMError +from . import frontend + +__version__ = _base.__version__ + +warnings.warn("NNVM is deprecated and will be removed in a future version. Use Relay instead.", + FutureWarning) diff --git a/nnvm/python/nnvm/_base.py b/nnvm/python/nnvm/_base.py new file mode 100644 index 000000000000..420392f17e92 --- /dev/null +++ b/nnvm/python/nnvm/_base.py @@ -0,0 +1,215 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# coding: utf-8 +# pylint: disable=invalid-name, unused-import +""" ctypes library of nnvm and helper functions """ +from __future__ import absolute_import + +import os +import sys +import ctypes +import numpy as np +from . import libinfo + +try: + import tvm +except ImportError: + pass + +#---------------------------- +# library loading +#---------------------------- +if sys.version_info[0] == 3: + string_types = str + numeric_types = (float, int, np.float32, np.int32) + # this function is needed for python3 + # to convert ctypes.char_p .value back to python str + py_str = lambda x: x.decode('utf-8') +else: + string_types = basestring + numeric_types = (float, int, long, np.float32, np.int32) + py_str = lambda x: x + + +class NNVMError(Exception): + """Error that will be throwed by all nnvm functions""" + + +def _load_lib(): + """Load libary by searching possible path.""" + lib_path = libinfo.find_lib_path() + lib = ctypes.CDLL(lib_path[0], ctypes.RTLD_LOCAL) + # DMatrix functions + lib.NNGetLastError.restype = ctypes.c_char_p + return lib + +# version number +__version__ = libinfo.__version__ +# library instance of nnvm +_LIB = _load_lib() +# The FFI mode of TVM +_FFI_MODE = os.environ.get("TVM_FFI", "auto") + +# type definitions +nn_uint = ctypes.c_uint +OpHandle = ctypes.c_void_p +SymbolHandle = ctypes.c_void_p +GraphHandle = ctypes.c_void_p + +# Global dict of str to symbol to initialize variables +_all_var_init = {} + +#---------------------------- +# helper function definition +#---------------------------- +def check_call(ret): + """Check the return value of C API call + + This function will raise exception when error occurs. + Wrap every API call with this function + + Parameters + ---------- + ret : int + return value from API calls + """ + if ret != 0: + raise NNVMError(py_str(_LIB.NNGetLastError())) + +def c_str(string): + """Create ctypes char * from a python string + Parameters + ---------- + string : string type + python string + + Returns + ------- + str : c_char_p + A char pointer that can be passed to C API + """ + return ctypes.c_char_p(string.encode('utf-8')) + + +def c_array(ctype, values): + """Create ctypes array from a python array + + Parameters + ---------- + ctype : ctypes data type + data type of the array we want to convert to + + values : tuple or list + data content + + Returns + ------- + out : ctypes array + Created ctypes array + """ + return (ctype * len(values))(*values) + +def ctypes2buffer(cptr, length): + """Convert ctypes pointer to buffer type. + + Parameters + ---------- + cptr : ctypes.POINTER(ctypes.c_char) + pointer to the raw memory region + length : int + the length of the buffer + + Returns + ------- + buffer : bytearray + The raw byte memory buffer + """ + if not isinstance(cptr, ctypes.POINTER(ctypes.c_char)): + raise TypeError('expected char pointer') + res = bytearray(length) + rptr = (ctypes.c_char * length).from_buffer(res) + if not ctypes.memmove(rptr, cptr, length): + raise RuntimeError('memmove failed') + return res + +def ctypes2numpy_shared(cptr, shape): + """Convert a ctypes pointer to a numpy array + + The result numpy array shares the memory with the pointer + + Parameters + ---------- + cptr : ctypes.POINTER(mx_float) + pointer to the memory region + + shape : tuple + shape of target ndarray + + Returns + ------- + out : numpy_array + A numpy array : numpy array + """ + if not isinstance(cptr, ctypes.POINTER(mx_float)): + raise RuntimeError('expected float pointer') + size = 1 + for s in shape: + size *= s + dbuffer = (mx_float * size).from_address(ctypes.addressof(cptr.contents)) + return np.frombuffer(dbuffer, dtype=np.float32).reshape(shape) + + +def ctypes2docstring(num_args, arg_names, arg_types, arg_descs, remove_dup=True): + """Convert ctypes returned doc string information into parameters docstring. + + num_args : nn_uint + Number of arguments. + + arg_names : ctypes.POINTER(ctypes.c_char_p) + Argument names. + + arg_types : ctypes.POINTER(ctypes.c_char_p) + Argument type information. + + arg_descs : ctypes.POINTER(ctypes.c_char_p) + Argument description information. + + remove_dup : boolean, optional + Whether remove duplication or not. + + Returns + ------- + docstr : str + Python docstring of parameter sections. + """ + param_keys = set() + param_str = [] + for i in range(num_args.value): + key = py_str(arg_names[i]) + if key in param_keys and remove_dup: + continue + param_keys.add(key) + type_info = py_str(arg_types[i]) + ret = '%s : %s' % (key, type_info) + if arg_descs[i]: + ret += '\n ' + py_str(arg_descs[i]) + param_str.append(ret) + doc_str = ('Parameters\n' + + '----------\n' + + '%s\n') + doc_str = doc_str % ('\n'.join(param_str)) + return doc_str diff --git a/nnvm/python/nnvm/_ctypes/README b/nnvm/python/nnvm/_ctypes/README new file mode 100644 index 000000000000..6e82cb962f99 --- /dev/null +++ b/nnvm/python/nnvm/_ctypes/README @@ -0,0 +1 @@ +Ctypes specific implementation of certain modules \ No newline at end of file diff --git a/cmake/modules/contrib/CODEGENC.cmake b/nnvm/python/nnvm/_ctypes/__init__.py similarity index 84% rename from cmake/modules/contrib/CODEGENC.cmake rename to nnvm/python/nnvm/_ctypes/__init__.py index bb53621f1a11..ea196643ae2f 100644 --- a/cmake/modules/contrib/CODEGENC.cmake +++ b/nnvm/python/nnvm/_ctypes/__init__.py @@ -15,6 +15,4 @@ # specific language governing permissions and limitations # under the License. -file(GLOB CSOURCE_RELAY_CONTRIB_SRC src/relay/backend/contrib/codegen_c/codegen.cc) -list(APPEND COMPILER_SRCS ${CSOURCE_RELAY_CONTRIB_SRC}) - +""""ctypes implementation of the Symbol""" diff --git a/nnvm/python/nnvm/_ctypes/symbol.py b/nnvm/python/nnvm/_ctypes/symbol.py new file mode 100644 index 000000000000..8c7d58a65920 --- /dev/null +++ b/nnvm/python/nnvm/_ctypes/symbol.py @@ -0,0 +1,242 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# coding: utf-8 +# pylint: disable=invalid-name, protected-access, too-many-arguments, too-many-lines, +# pylint: disable=len-as-condition, consider-iterating-dictionary +"""Symbolic configuration API.""" +from __future__ import absolute_import as _abs + +import copy +import ctypes +import sys +from .._base import _LIB +from .._base import c_array, c_str, nn_uint, py_str +from .._base import SymbolHandle, OpHandle +from .._base import check_call, ctypes2docstring +from ..name import NameManager +from ..attribute import AttrScope + +class SymbolBase(object): + """Symbol is symbolic graph.""" + __slots__ = ["handle"] + # pylint: disable=no-member + def __init__(self, handle): + """Initialize the function with handle + + Parameters + ---------- + handle : SymbolHandle + the handle to the underlying C++ Symbol + """ + self.handle = handle + + def __del__(self): + check_call(_LIB.NNSymbolFree(self.handle)) + + def __call__(self, *args, **kwargs): + """Invoke symbol as function on inputs. + + Parameters + ---------- + args: + provide positional arguments + + kwargs: + provide keyword arguments + Returns + ------- + the resulting symbol + """ + s = copy.deepcopy(self) + s._compose(*args, **kwargs) + return s + + def _compose(self, *args, **kwargs): + """Compose symbol on inputs. + + This call mutates the current symbol. + + Parameters + ---------- + args: + provide positional arguments + + kwargs: + provide keyword arguments + + Returns + ------- + the resulting symbol + """ + name = kwargs.pop('name', None) + + if name: + name = c_str(name) + if len(args) != 0 and len(kwargs) != 0: + raise TypeError('compose only accept input Symbols \ + either as positional or keyword arguments, not both') + + for arg in args: + if not isinstance(arg, SymbolBase): + raise TypeError('Compose expect `Symbol` as arguments') + for val in kwargs.values(): + if not isinstance(val, SymbolBase): + raise TypeError('Compose expect `Symbol` as arguments') + + num_args = len(args) + len(kwargs) + if len(kwargs) != 0: + keys = c_array(ctypes.c_char_p, [c_str(key) for key in kwargs.keys()]) + args = c_array(SymbolHandle, [s.handle for s in kwargs.values()]) + else: + keys = None + args = c_array(SymbolHandle, [s.handle for s in args]) + check_call(_LIB.NNSymbolCompose( + self.handle, name, num_args, keys, args)) + + def _set_attr(self, **kwargs): + """Set the attribute of the symbol. + + Parameters + ---------- + **kwargs + The attributes to set + """ + keys = c_array(ctypes.c_char_p, + [c_str(key) for key in kwargs.keys()]) + vals = c_array(ctypes.c_char_p, + [c_str(str(val)) for val in kwargs.values()]) + num_args = nn_uint(len(kwargs)) + check_call(_LIB.NNSymbolSetAttrs( + self.handle, num_args, keys, vals)) + + +_symbol_cls = SymbolBase + +def _set_symbol_class(cls): + global _symbol_cls + _symbol_cls = cls + + +def _make_atomic_symbol_function(handle, name): + """Create an atomic symbol function by handle and funciton name.""" + real_name = ctypes.c_char_p() + desc = ctypes.c_char_p() + num_args = nn_uint() + arg_names = ctypes.POINTER(ctypes.c_char_p)() + arg_types = ctypes.POINTER(ctypes.c_char_p)() + arg_descs = ctypes.POINTER(ctypes.c_char_p)() + ret_type = ctypes.c_char_p() + + check_call(_LIB.NNGetOpInfo( + handle, ctypes.byref(real_name), ctypes.byref(desc), + ctypes.byref(num_args), + ctypes.byref(arg_names), + ctypes.byref(arg_types), + ctypes.byref(arg_descs), + ctypes.byref(ret_type))) + param_str = ctypes2docstring(num_args, arg_names, arg_types, arg_descs) + func_name = name + desc = py_str(desc.value) + + doc_str = ('%s\n\n' + + '%s\n' + + 'Returns\n' + + '-------\n' + + 'result: Tensor\n' + + ' The result Tensor.') + doc_str = doc_str % (desc, param_str) + + def creator(*args, **kwargs): + """Activation Operator of Neural Net. + The parameters listed below can be passed in as keyword arguments. + + Parameters + ---------- + name : string, required. + Name of the resulting symbol. + + Returns + ------- + symbol: Symbol + the resulting symbol + """ + param_keys = [] + param_vals = [] + symbol_kwargs = {} + name = kwargs.pop('name', None) + attr = kwargs.pop('attr', None) + + for k, v in kwargs.items(): + if isinstance(v, SymbolBase): + symbol_kwargs[k] = v + else: + param_keys.append(c_str(k)) + param_vals.append(c_str(str(v))) + # create atomic symbol + param_keys = c_array(ctypes.c_char_p, param_keys) + param_vals = c_array(ctypes.c_char_p, param_vals) + sym_handle = SymbolHandle() + check_call(_LIB.NNSymbolCreateAtomicSymbol( + handle, + nn_uint(len(param_keys)), + param_keys, param_vals, + ctypes.byref(sym_handle))) + + if len(args) != 0 and len(symbol_kwargs) != 0: + raise TypeError( + '%s can only accept input' + 'Symbols either as positional or keyword arguments, not both' % func_name) + s = _symbol_cls(sym_handle) + attr = AttrScope.current.get(attr) + if attr: + s._set_attr(**attr) + hint = func_name.lower() + name = NameManager.current.get(name, hint) + s._compose(*args, name=name, **symbol_kwargs) + return s + + creator.__name__ = func_name + creator.__doc__ = doc_str + return creator + + +def _init_symbol_module(symbol_class, root_namespace): + """List and add all the atomic symbol functions to current module.""" + _set_symbol_class(symbol_class) + plist = ctypes.POINTER(ctypes.c_char_p)() + size = ctypes.c_uint() + + check_call(_LIB.NNListAllOpNames(ctypes.byref(size), + ctypes.byref(plist))) + op_names = [] + for i in range(size.value): + op_names.append(py_str(plist[i])) + + module_obj = sys.modules["%s.symbol" % root_namespace] + module_obj_contrib = sys.modules["%s.contrib" % root_namespace] + module_internal = sys.modules["%s._symbol_internal" % root_namespace] + for name in op_names: + hdl = OpHandle() + check_call(_LIB.NNGetOpHandle(c_str(name), ctypes.byref(hdl))) + function = _make_atomic_symbol_function(hdl, name) + if function.__name__.startswith('_contrib_'): + setattr(module_obj_contrib, function.__name__.split('_contrib_')[1], function) + elif function.__name__.startswith('_'): + setattr(module_internal, function.__name__, function) + setattr(module_obj, function.__name__, function) + else: + setattr(module_obj, function.__name__, function) diff --git a/nnvm/python/nnvm/_cy2/README b/nnvm/python/nnvm/_cy2/README new file mode 100644 index 000000000000..ed4639b674a0 --- /dev/null +++ b/nnvm/python/nnvm/_cy2/README @@ -0,0 +1 @@ +This folder is by default empty and will hold DLLs generated by cython. diff --git a/nnvm/python/nnvm/_cy2/__init__.py b/nnvm/python/nnvm/_cy2/__init__.py new file mode 100644 index 000000000000..1961cd9ff613 --- /dev/null +++ b/nnvm/python/nnvm/_cy2/__init__.py @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Namespace for cython generated modules for python2""" diff --git a/nnvm/python/nnvm/_cy3/README b/nnvm/python/nnvm/_cy3/README new file mode 100644 index 000000000000..dc3a57603782 --- /dev/null +++ b/nnvm/python/nnvm/_cy3/README @@ -0,0 +1 @@ +This folder is by default empty and will hold DLLs generated by cython. \ No newline at end of file diff --git a/nnvm/python/nnvm/_cy3/__init__.py b/nnvm/python/nnvm/_cy3/__init__.py new file mode 100644 index 000000000000..c9a495225351 --- /dev/null +++ b/nnvm/python/nnvm/_cy3/__init__.py @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Cython generated modules""" diff --git a/nnvm/python/nnvm/_symbol_internal.py b/nnvm/python/nnvm/_symbol_internal.py new file mode 100644 index 000000000000..de2f85aa2f29 --- /dev/null +++ b/nnvm/python/nnvm/_symbol_internal.py @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Module space to register internal functions. Leave empty""" diff --git a/nnvm/python/nnvm/attribute.py b/nnvm/python/nnvm/attribute.py new file mode 100644 index 000000000000..14341794bb64 --- /dev/null +++ b/nnvm/python/nnvm/attribute.py @@ -0,0 +1,76 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# coding: utf-8 +"""Attribute scoping support for symbolic API.""" +from __future__ import absolute_import + +from ._base import string_types + +class AttrScope(object): + """Attribute manager for scoping. + + User can also inherit this object to change naming behavior. + + Parameters + ---------- + kwargs + The attributes to set for all symbol creations in the scope. + """ + current = None + + def __init__(self, **kwargs): + self._old_scope = None + for value in kwargs.values(): + if not isinstance(value, string_types): + raise ValueError("Attributes need to be string") + self._attr = kwargs + + def get(self, attr): + """ + Get the attribute dict given the attribute set by the symbol. + + Parameters + ---------- + attr : dict of string to string + The attribute passed in by user during symbol creation. + + Returns + ------- + attr : dict of string to string + Updated attributes to add other scope related attributes. + """ + if self._attr: + ret = self._attr.copy() + if attr: + ret.update(attr) + return ret + return attr + + def __enter__(self): + # pylint: disable=protected-access + self._old_scope = AttrScope.current + attr = AttrScope.current._attr.copy() + attr.update(self._attr) + self._attr = attr + AttrScope.current = self + return self + + def __exit__(self, ptype, value, trace): + assert self._old_scope + AttrScope.current = self._old_scope + +AttrScope.current = AttrScope() diff --git a/nnvm/python/nnvm/compiler/__init__.py b/nnvm/python/nnvm/compiler/__init__.py new file mode 100644 index 000000000000..6a3e846c4496 --- /dev/null +++ b/nnvm/python/nnvm/compiler/__init__.py @@ -0,0 +1,40 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""NNVM compiler toolchain. + +User only need to use :any:`build` and :any:`build_config` to do the compilation, +and :any:`save_param_dict` to save the parameters into bytes. +The other APIs are for more advanced interaction with the compiler toolchain. +""" +from __future__ import absolute_import + +import tvm + +from . import build_module +from . build_module import build, optimize, build_config +from . compile_engine import engine, graph_key +from . param_dict import save_param_dict, load_param_dict + +from .. import symbol as _symbol +from .. import graph as _graph + +from .. import top as _top + + +tvm.register_extension(_symbol.Symbol, _symbol.Symbol) +tvm.register_extension(_graph.Graph, _graph.Graph) diff --git a/nnvm/python/nnvm/compiler/compile_engine.py b/nnvm/python/nnvm/compiler/compile_engine.py new file mode 100644 index 000000000000..d7799bf7b0e7 --- /dev/null +++ b/nnvm/python/nnvm/compiler/compile_engine.py @@ -0,0 +1,119 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=invalid-name +"""Compiler engine interface to internal engine + +You can get the engine singleton at ``nnvm.compiler.engine`` +""" +import tvm + +_list_cache_items = tvm.get_global_func("nnvm.compiler.ListCacheItems") +_clear_cache = tvm.get_global_func("nnvm.compiler.ClearCache") +_get_cache_item = tvm.get_global_func("nnvm.compiler.GetCacheItem") +_set_cache_item = tvm.get_global_func("nnvm.compiler.SetCacheItem") +_graph_key_get_graph = tvm.get_global_func("nnvm.compiler.GraphKeyGetGraph") +_make_graph_key = tvm.get_global_func("nnvm.compiler.MakeGraphKey") + +@tvm.register_node +class GraphKey(tvm.node.NodeBase): + """Key of a graph compilation context""" + @property + def graph(self): + return _graph_key_get_graph(self) + + +@tvm.register_node +class GraphCacheEntry(tvm.node.NodeBase): + """CacheEntry of compilation into a TVM Function""" + + +@tvm.register_node +class GraphFunc(tvm.node.NodeBase): + """Compiled result of a graph into a TVM Function""" + + +class Engine(object): + """Global singleton compilation engine. + + You can get the singleton at ``nnvm.compiler.engine`` + """ + def items(self): + """List the available cache key value pairs. + + Returns + ------- + item_list : list of (GraphKey, GraphCacheEntry) + The existing cache items + """ + res = _list_cache_items() + assert len(res) % 2 == 0 + return [(res[2*i], res[2*i+1]) for i in range(len(res) // 2)] + + def clear_cache(self): + """Clear the existing cached functions.""" + _clear_cache() + + def __setitem__(self, key, value): + """Clear the existing cached functions.""" + if isinstance(value, GraphCacheEntry): + _set_cache_item(key, value.graph_func) + else: + _set_cache_item(key, value) + + def __getitem__(self, key): + """Clear the existing cached functions.""" + return _get_cache_item(key) + + def dump(self): + """Return a string representation of engine dump + + Returns + ------- + dump : str + The dumped string representation + """ + items = self.items() + res = "====================================\n" + res += "CompilerEngine dump, %d items cached\n" % len(items) + for key, value in items: + res += "------------------------------------\n" + res += "target={}\n".format(key.target) + res += "inputs={}\n".format(key.inputs) + res += "use_count={}\n".format(value.use_count) + res += "func_name={}\n".format(value.graph_func.func_name) + res += key.graph.ir() + "\n" + res += "===================================\n" + return res + +engine = Engine() + + +def graph_key(graph, inputs, target): + """Construct a new graph key. + + Parameters + ---------- + graph : Graph + The computation graph structure + + inputs : list of Tensor(placeholder) + The input requirement to the graph. + + target : str + The target of compilation. + """ + return _make_graph_key(graph, inputs, target) diff --git a/nnvm/python/nnvm/compiler/graph_attr.py b/nnvm/python/nnvm/compiler/graph_attr.py new file mode 100644 index 000000000000..de557cce78b3 --- /dev/null +++ b/nnvm/python/nnvm/compiler/graph_attr.py @@ -0,0 +1,136 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=invalid-name +"""Utilities to access graph attributes""" +from __future__ import absolute_import as _abs + +import tvm + +def set_shape_inputs(g, shape): + """Set the shape of input graph nodes in the graph attribute. + + Parameters + ---------- + g : Graph + The input graph + + shape : dict of str to tuple + The input shape + + Returns + ------- + g : Graph + The updated graph with updated shape. + """ + list_shape = [ + shape.get(name, ()) for name in g.index.input_names] + g._set_json_attr("shape_inputs", list_shape, 'list_shape') + return g + + +DTYPE_TO_TCODE = { + "default": -1, + "float32": 0, + "float64": 1, + "float16": 2, + "uint8": 3, + "int32": 4, + "int8": 5, + "int64": 6, + "int16": 7, + "uint16": 8, + "uint32": 9, + "uint64": 10, + "bool": 11, +} + +TCODE_TO_DTYPE = { + -1: None, + 0: "float32", + 1: "float64", + 2: "float16", + 3: "uint8", + 4: "int32", + 5: "int8", + 6: "int64", + 7: "int16", + 8: "uint16", + 9: "uint32", + 10: "uint64", + 11: "bool", +} + +def set_dtype_inputs(g, dtype): + """Set the dtype inputs of graph nodes + + Parameters + ---------- + g : Graph + The input graph + + dtype : dict of str to str or str + The input dtype + + Returns + ------- + g : Graph + The updated graph with updated dtype. + """ + if isinstance(dtype, dict): + list_dtype = [ + DTYPE_TO_TCODE[str(dtype.get(name, "default"))] + for name in g.index.input_names] + else: + list_dtype = [DTYPE_TO_TCODE[dtype]] * len(g.index.input_names) + g._set_json_attr("dtype_inputs", list_dtype, "list_int") + return g + + +def set_layout_inputs(g, layout): + """Set the layout inputs of graph nodes + + Parameters + ---------- + g : Graph + The input graph + + layout : dict of str to str or str + The input layout + + Returns + ------- + g : Graph + The updated graph with updated layout. + """ + if isinstance(layout, dict): + list_layout = [ + layout.get(name, "__undef__") for name in g.index.input_names] + elif isinstance(layout, str): + list_layout = ["__undef__"] * len(g.index.input_names) + list_layout[0] = layout + else: + raise ValueError("Input layout must be str or dict") + last_inferred_layouts = g.json_attr("layout") + if last_inferred_layouts: + input_layout = [last_inferred_layouts[g.index.entry_id(x)] for x in g.index.input_names] + for i, layout_stored in enumerate(input_layout): + list_layout[i] = list_layout[i] if list_layout[i] != '__undef__' else layout_stored + g._set_json_attr("layout_inputs", list_layout, 'list_layout') + return g + +_move_out_module = tvm.get_global_func("nnvm.graph._move_module") +_move_out_graph = tvm.get_global_func("nnvm.graph._move_graph") diff --git a/nnvm/python/nnvm/compiler/graph_pass.py b/nnvm/python/nnvm/compiler/graph_pass.py new file mode 100644 index 000000000000..a11a80e43fe4 --- /dev/null +++ b/nnvm/python/nnvm/compiler/graph_pass.py @@ -0,0 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=invalid-name +"""Namespace of graph pass. + +Principle: +- Graph in, graph out: always takes in graph as first argument and returns a graph +- Composable API: break graph transformation pass as segments of small transformations. +""" +from __future__ import absolute_import as _abs diff --git a/nnvm/python/nnvm/compiler/graph_util.py b/nnvm/python/nnvm/compiler/graph_util.py new file mode 100644 index 000000000000..3ce38dacacc3 --- /dev/null +++ b/nnvm/python/nnvm/compiler/graph_util.py @@ -0,0 +1,164 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=invalid-name +"""Utility function to get information from graph.""" +from __future__ import absolute_import as _abs + +import tvm +from . import graph_attr + +from ..graph import create +from ..symbol import Group, ones_like + +def infer_shape(graph, **shape): + """Infer the shape given the shape of inputs. + + Parameters + ---------- + graph : Graph + The graph to perform shape inference from + + shape : dict of str to tuple + The specific input shape. + + Returns + ------- + in_shape : list of tuple + Shape of inputs + + out_shape: list of tuple + Shape of outputs + """ + graph = graph_attr.set_shape_inputs(graph, shape) + graph = graph.apply("InferShape") + shape = graph.json_attr("shape") + index = graph.index + input_shape = [shape[index.entry_id(x)] for x in index.input_names] + output_shape = [shape[index.entry_id(x)] for x in index.output_entries] + return input_shape, output_shape + + +def infer_dtype(graph, **dtype): + """Infer the type given the typeS of inputs. + + Parameters + ---------- + graph : Graph + The graph to perform type inference from + + dtype : dict of str to dtype + The specific input data type. + + Returns + ------- + in_dtype : list of tuple + Dtype of inputs + + out_dtype: list of tuple + Dtype of outputs + """ + graph = graph_attr.set_dtype_inputs(graph, dtype) + graph = graph.apply("InferType") + dtype = graph.json_attr("dtype") + index = graph.index + input_dtype = [graph_attr.TCODE_TO_DTYPE[dtype[index.entry_id(x)]] + for x in index.input_names] + output_dtype = [graph_attr.TCODE_TO_DTYPE[dtype[index.entry_id(x)]] + for x in index.output_entries] + return input_dtype, output_dtype + + +_deep_compare = tvm.get_global_func("nnvm.graph.DeepCompare") + +def check_graph_equal(grapha, graphb, compare_variable_attrs=False): + """Check if two graphs have equal structure. + + Parameters + ---------- + grapha : Graph + The first graph + + graphb : Graph + The second graph + + compare_variable_attrs : bool, optional + Whether we want to compare attributes(names) on variables. + Usually it is safe to skip it unless we want input name + to exactly match + + Raises + ------ + ValueError + ValueError is raised with error message when graph not equal + """ + err = _deep_compare(grapha, graphb, compare_variable_attrs) + if err: + raise ValueError("Graph compare error: " + err) + +def get_gradient_graph(ys, xs, grad_ys=None): + """Create gradient graph of ys with respect to xs. + + Parameters + ---------- + ys : Symbol or list of Symbol + Symbols from which the gradient is calculated. + xs : Symbol or list of Symbol + Symbols the gradient respect to. + For group symbol, gradients for all outputs will be calculated. + grad_ys : Symbol or list of Symbol + Head gradients for ys. + + Returns + ------- + ret : Graph + Generated gradient graph. + """ + if isinstance(ys, list): + ys = Group(ys) + g = create(ys) + g._set_symbol_list_attr('grad_ys', ys) + g._set_symbol_list_attr('grad_xs', xs) + ny = len(ys.list_output_names()) + if grad_ys is None: + grad_ys = [ones_like(ys[i]) for i in range(ny)] + g._set_symbol_list_attr('grad_ys_out_grad', grad_ys) + return g.apply('Gradient') + +def gradients(ys, xs, grad_ys=None): + """Create gradient symbol of ys respect to xs. + + Parameters + ---------- + ys : Symbol or list of Symbol + Symbols from which the gradient is calculated. + xs : Symbol or list of Symbol + Symbols the gradient respect to. + For group symbol, gradients for all outputs will be calculated. + grad_ys : Symbol or list of Symbol + Head gradients for ys. + + Returns + ------- + ret : list of Symbol + Generated gradient symbol. For each xs, + all gradients from ys are merged into a single symbol. + """ + grad_g = get_gradient_graph(ys, xs, grad_ys) + nx = len(Group(xs).list_output_names()) \ + if isinstance(xs, list) else len(xs.list_output_names()) + ret = [grad_g.symbol[i] for i in range(nx)] + return ret diff --git a/nnvm/python/nnvm/compiler/lr_scheduler.py b/nnvm/python/nnvm/compiler/lr_scheduler.py new file mode 100644 index 000000000000..3a33f390b6f4 --- /dev/null +++ b/nnvm/python/nnvm/compiler/lr_scheduler.py @@ -0,0 +1,74 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=too-few-public-methods, no-member +"""API for scheduling learning rate.""" +from .. import symbol as sym + +class LRScheduler(object): + """Base class of a learning rate scheduler. + + A scheduler returns a new learning rate based on the number of updates that have + been performed. + + Parameters + ---------- + base_lr : float, optional + The initial learning rate. + """ + def __init__(self, base_lr=0.01, name='LRScheduler'): + self.name = name + self.base_lr = base_lr + + def __call__(self, num_update): + """Return a new learning rate based on number of updates. + + Parameters + ---------- + num_update: nnvm Symbol + the number of updates applied to weight. + """ + raise NotImplementedError("__call__ method must be overridden.") + +class FactorScheduler(LRScheduler): + """Reduce the learning rate by a factor for every *n* steps. + + It returns a new learning rate by:: + + base_lr * pow(factor, num_update/step) + + Parameters + ---------- + step : int + Changes the learning rate for every n updates. + factor : float, optional + The factor to change the learning rate. + stop_factor_lr : float, optional + Stop updating the learning rate if it is less than this value. + """ + def __init__(self, step, factor=1, stop_factor_lr=1e-8, name='FactorScheduler', **kwargs): + super(FactorScheduler, self).__init__(name=name, **kwargs) + if step < 1: + raise ValueError("Schedule step must be greater or equal than 1 round") + if factor > 1.0: + raise ValueError("Factor must be no more than 1 to make lr reduce") + self.step = step + self.factor = factor + self.stop_factor_lr = stop_factor_lr + + def __call__(self, num_update): + updated_lr = self.base_lr * self.factor ** (num_update / self.step) + return sym.clip(updated_lr, a_min=self.stop_factor_lr, a_max=self.base_lr) diff --git a/nnvm/python/nnvm/compiler/optimizer.py b/nnvm/python/nnvm/compiler/optimizer.py new file mode 100644 index 000000000000..ba739b8c7056 --- /dev/null +++ b/nnvm/python/nnvm/compiler/optimizer.py @@ -0,0 +1,147 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=invalid-name, no-member, too-few-public-methods, too-many-arguments, too-many-locals, protected-access +"""Optimizer API""" +from . import graph_util +from .. import symbol as sym + +class Optimizer(object): + """Base class inherited by all optimizers. + + Parameters + ---------- + learning_rate : float, optional + The initial learning rate. + + lr_scheduler : LRScheduler, optional + The learning rate scheduler. + + rescale_grad : float, optional + Multiply the gradient with `rescale_grad` before updating. Often + choose to be ``1.0/batch_size``. + + clip_gradient : float, optional + Clip the gradient by projecting onto the box ``[-clip_gradient, clip_gradient]``. + + wd : float, optional + The weight decay (or L2 regularization) coefficient. Modifies objective + by adding a penalty for having large weights. + + name : string, optional + The name of optimizer. + """ + def __init__(self, learning_rate=0.01, lr_scheduler=None, + rescale_grad=1, clip_gradient=None, wd=0, name="Optimizer"): + self.name = name + self.lr = learning_rate + self.lr_scheduler = lr_scheduler + self.rescale_grad = rescale_grad + self.clip_gradient = clip_gradient + self.wd = wd + init_update_t = sym.Variable(name+'_t', init=sym.zeros(shape=(1,), dtype="int32")) + self.update_t = sym._assign(init_update_t, init_update_t + 1) + + def minimize(self, obj, var=None): + """Minimize given obj symbol respect to var. If var is not set, all input + variables of obj will be used. + + Parameters + ---------- + obj : nnvm Symbol or list of nnvm Symbols + Symbols to be minimized. + var : nnvm Symbol or list of nnvm Symbols, optional + Symbols the gradient respect to. + + Returns + ------- + group_sym : nnvm Symbol + Group symbol represents update symbols. + """ + raise NotImplementedError() + + def _get_lr(self): + """Gets the learning rate with learning rate scheduler. + + Returns + ------- + lr : float + Learning rate. + """ + if self.lr_scheduler is not None: + lr = self.lr_scheduler(self.update_t) + else: + lr = self.lr + return lr + + +class SGD(Optimizer): + """The SGD optimizer + """ + def __init__(self, name='SGD', **kwargs): + super(SGD, self).__init__(name=name, **kwargs) + + def minimize(self, obj, var=None): + variables = var or obj.list_input_variables() + if not isinstance(variables, list): + variables = [variables] + grads = graph_util.gradients(obj, variables) + updates = [] + lr_t = self._get_lr() + for v, g in zip(variables, grads): + g = self.rescale_grad * g + if self.clip_gradient is not None: + g = sym.clip(g, a_min=-1 * self.clip_gradient, a_max=self.clip_gradient) + updates.append(sym._assign(v, v - lr_t * (g + self.wd * v))) + return sym.Group(updates) + + +class Adam(Optimizer): + """The Adam optimizer. + + This class implements the optimizer described in *Adam: A Method for + Stochastic Optimization*, available at http://arxiv.org/abs/1412.6980. + """ + def __init__(self, learning_rate=0.001, beta1=0.9, beta2=0.999, + epsilon=1e-8, name='Adam', **kwargs): + super(Adam, self).__init__(learning_rate=learning_rate, name=name, **kwargs) + self.beta1 = beta1 + self.beta2 = beta2 + self.epsilon = epsilon + self.m = [] + self.v = [] + + def minimize(self, obj, var=None): + variables = var or obj.list_input_variables() + if not isinstance(variables, list): + variables = [variables] + grads = graph_util.gradients(obj, variables) + updates = [] + for i, v in enumerate(variables): + self.m.append(sym.Variable(self.name + '_m' + str(i), init=sym.zeros_like(v))) + self.v.append(sym.Variable(self.name + '_v' + str(i), init=sym.zeros_like(v))) + rate = sym.sqrt(1 - self.beta2 ** self.update_t) / (1 - self.beta1 ** self.update_t) + lr_t = self._get_lr() * rate + for variable, g, m, v in zip(variables, grads, self.m, self.v): + g = self.rescale_grad * g + if self.clip_gradient is not None: + g = sym.clip(g, a_min=-1 * self.clip_gradient, a_max=self.clip_gradient) + update_m = sym._assign(m, self.beta1 * m + (1 - self.beta1) * g) + update_v = sym._assign(v, self.beta2 * v + (1 - self.beta2) * g * g) + update_var = sym._assign(variable, variable - lr_t * (update_m / (sym.sqrt(update_v) \ + + self.epsilon) + self.wd * variable)) + updates.append(update_var) + return sym.Group(updates) diff --git a/nnvm/python/nnvm/compiler/param_dict.py b/nnvm/python/nnvm/compiler/param_dict.py new file mode 100644 index 000000000000..a543e0a827b3 --- /dev/null +++ b/nnvm/python/nnvm/compiler/param_dict.py @@ -0,0 +1,77 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=invalid-name +"""Helper utility to save parameter dict""" +import tvm + +_save_param_dict = tvm.get_global_func("nnvm.compiler._save_param_dict") +_load_param_dict = tvm.get_global_func("nnvm.compiler._load_param_dict") + +def save_param_dict(params): + """Save parameter dictionary to binary bytes. + + The result binary bytes can be loaded by the + GraphModule with API "load_params". + + Parameters + ---------- + params : dict of str to NDArray + The parameter dictionary. + + Returns + ------- + param_bytes: bytearray + Serialized parameters. + + Examples + -------- + .. code-block:: python + + # compile and save the modules to file. + graph, lib, params = nnvm.compiler.build( + graph, target, shape={"data", data_shape}, params=params) + module = graph_runtime.create(graph, lib, tvm.gpu(0)) + # save the parameters as byte array + param_bytes = nnvm.compiler.save_param_dict(params) + # We can serialize the param_bytes and load it back later. + # Pass in byte array to module to directly set parameters + module["load_params"](param_bytes) + """ + args = [] + for k, v in params.items(): + args.append(k) + args.append(tvm.nd.array(v)) + return _save_param_dict(*args) + + +def load_param_dict(param_bytes): + """Load parameter dictionary to binary bytes. + + Parameters + ---------- + param_bytes: bytearray + Serialized parameters. + + Returns + ------- + params : dict of str to NDArray + The parameter dictionary. + """ + if isinstance(param_bytes, (bytes, str)): + param_bytes = bytearray(param_bytes) + load_arr = _load_param_dict(param_bytes) + return {v.name : v.array for v in load_arr} diff --git a/nnvm/python/nnvm/contrib.py b/nnvm/python/nnvm/contrib.py new file mode 100644 index 000000000000..c3e943682db5 --- /dev/null +++ b/nnvm/python/nnvm/contrib.py @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Module space to register contrib functions. Leave empty""" diff --git a/nnvm/python/nnvm/cython/README b/nnvm/python/nnvm/cython/README new file mode 100644 index 000000000000..d9deab1abca9 --- /dev/null +++ b/nnvm/python/nnvm/cython/README @@ -0,0 +1 @@ +Cython specific implementation of certain modules \ No newline at end of file diff --git a/nnvm/python/nnvm/cython/base.pyi b/nnvm/python/nnvm/cython/base.pyi new file mode 100644 index 000000000000..40ef71a20546 --- /dev/null +++ b/nnvm/python/nnvm/cython/base.pyi @@ -0,0 +1,106 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ctypedef void* SymbolHandle +ctypedef void* OpHandle +ctypedef unsigned nn_uint + +cdef py_str(const char* x): + if PY_MAJOR_VERSION < 3: + return x + else: + return x.decode("utf-8") + + +cdef c_str(pystr): + """Create ctypes char * from a python string + Parameters + ---------- + string : string type + python string + + Returns + ------- + str : c_char_p + A char pointer that can be passed to C API + """ + return pystr.encode("utf-8") + + +cdef CALL(int ret): + if ret != 0: + raise NNVMError(NNGetLastError()) + + +cdef const char** CBeginPtr(vector[const char*]& vec): + if (vec.size() != 0): + return &vec[0] + else: + return NULL + +cdef vector[const char*] SVec2Ptr(vector[string]& vec): + cdef vector[const char*] svec + svec.resize(vec.size()) + for i in range(vec.size()): + svec[i] = vec[i].c_str() + return svec + + +cdef BuildDoc(nn_uint num_args, + const char** arg_names, + const char** arg_types, + const char** arg_descs, + remove_dup=True): + """Convert ctypes returned doc string information into parameters docstring. + + num_args : nn_uint + Number of arguments. + + arg_names : ctypes.POINTER(ctypes.c_char_p) + Argument names. + + arg_types : ctypes.POINTER(ctypes.c_char_p) + Argument type information. + + arg_descs : ctypes.POINTER(ctypes.c_char_p) + Argument description information. + + remove_dup : boolean, optional + Whether remove duplication or not. + + Returns + ------- + docstr : str + Python docstring of parameter sections. + """ + param_keys = set() + param_str = [] + for i in range(num_args): + key = arg_names[i] + if key in param_keys and remove_dup: + continue + param_keys.add(key) + type_info = arg_types[i] + ret = '%s : %s' % (key, type_info) + if len(arg_descs[i]) != 0: + ret += '\n ' + py_str(arg_descs[i]) + param_str.append(ret) + doc_str = ('Parameters\n' + + '----------\n' + + '%s\n') + doc_str = doc_str % ('\n'.join(param_str)) + return doc_str diff --git a/nnvm/python/nnvm/cython/symbol.pyx b/nnvm/python/nnvm/cython/symbol.pyx new file mode 100644 index 000000000000..eedf2afbbc2a --- /dev/null +++ b/nnvm/python/nnvm/cython/symbol.pyx @@ -0,0 +1,233 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import absolute_import as _abs + +import sys as _sys +import ctypes as _ctypes +from numbers import Number as _Number +from .._base import NNVMError +from ..name import NameManager +from ..attribute import AttrScope +from libcpp.vector cimport vector +from libcpp.string cimport string +from cpython.version cimport PY_MAJOR_VERSION + +include "./base.pyi" + +cdef extern from "nnvm/c_api.h": + const char* NNGetLastError(); + int NNListAllOpNames(nn_uint *out_size, + const char ***out_array); + int NNGetOpHandle(const char *op_name, + OpHandle *handle); + int NNGetOpInfo(OpHandle op, + const char **name, + const char **description, + nn_uint *num_doc_args, + const char ***arg_names, + const char ***arg_type_infos, + const char ***arg_descriptions, + const char **return_type); + int NNListOpNames(nn_uint *out_size, + const char ***out_array); + int NNSymbolCreateAtomicSymbol(OpHandle op, + nn_uint num_param, + const char **keys, + const char **vals, + SymbolHandle *out); + int NNSymbolFree(SymbolHandle symbol); + int NNSymbolSetAttrs(SymbolHandle symbol, + nn_uint num_param, + const char** keys, + const char** values); + int NNSymbolCompose(SymbolHandle sym, + const char* name, + nn_uint num_args, + const char** keys, + SymbolHandle* args); + +cdef class SymbolBase: + """Symbol is symbolic graph.""" + # handle for symbolic operator. + cdef SymbolHandle handle + + def __init__(self, handle): + cdef unsigned long ptr + if handle is None: + self.handle = NULL + else: + ptr = handle.value + self.handle = (ptr) + + def __dealloc__(self): + CALL(NNSymbolFree(self.handle)) + + @property + def handle(self): + return _ctypes.cast(self.handle, _ctypes.c_void_p) + + def _set_attr(self, **kwargs): + """Set the attribute of the symbol. + + Parameters + ---------- + **kwargs + The attributes to set + """ + SymbolSetAttr(self.handle, kwargs) + + +cdef SymbolSetAttr(SymbolHandle handle, dict kwargs): + cdef vector[string] sparam_keys + cdef vector[string] sparam_vals + cdef nn_uint num_args + for k, v in kwargs.items(): + sparam_keys.push_back(c_str(k)) + sparam_vals.push_back(c_str(str(v))) + # keep strings in vector + cdef vector[const char*] param_keys = SVec2Ptr(sparam_keys) + cdef vector[const char*] param_vals = SVec2Ptr(sparam_vals) + num_args = param_keys.size() + CALL(NNSymbolSetAttrs( + handle, num_args, CBeginPtr(param_keys), CBeginPtr(param_vals))) + + +_symbol_cls = SymbolBase + +cdef _set_symbol_class(cls): + global _symbol_cls + _symbol_cls = cls + +cdef NewSymbol(SymbolHandle handle): + """Create a new symbol given handle""" + sym = _symbol_cls(None) + (sym).handle = handle + return sym + +cdef _make_atomic_symbol_function(OpHandle handle, string name): + """Create an atomic symbol function by handle and funciton name.""" + cdef const char *real_name + cdef const char *desc + cdef nn_uint num_args + cdef const char** arg_names + cdef const char** arg_types + cdef const char** arg_descs + cdef const char* return_type + + CALL(NNGetOpInfo( + handle, &real_name, &desc, + &num_args, &arg_names, + &arg_types, &arg_descs, + &return_type)) + + param_str = BuildDoc(num_args, arg_names, arg_types, arg_descs) + func_name = py_str(name.c_str()) + doc_str = ('%s\n\n' + + '%s\n' + + 'Returns\n' + + '-------\n' + + 'result: Tensor\n' + + ' The result Tensor.') + doc_str = doc_str % (desc, param_str) + func_hint = func_name.lower() + + def creator(*args, **kwargs): + cdef vector[string] sparam_keys + cdef vector[string] sparam_vals + cdef vector[SymbolHandle] symbol_args + cdef vector[string] ssymbol_keys + cdef SymbolHandle ret_handle + + name = kwargs.pop("name", None) + attr = kwargs.pop("attr", None) + + if len(kwargs) != 0: + for k, v in kwargs.items(): + if isinstance(v, SymbolBase): + ssymbol_keys.push_back(c_str(k)) + symbol_args.push_back((v).handle) + else: + sparam_keys.push_back(c_str(k)) + sparam_vals.push_back(c_str(str(v))) + + if len(args) != 0: + if symbol_args.size() != 0: + raise TypeError("compose only accept input Symbols\ + either as positional or keyword arguments, not both") + for v in args: + if not isinstance(v, SymbolBase): + raise TypeError('Compose expect `Symbol` as arguments') + symbol_args.push_back((v).handle) + + cdef vector[const char*] param_keys = SVec2Ptr(sparam_keys) + cdef vector[const char*] param_vals = SVec2Ptr(sparam_vals) + cdef vector[const char*] symbol_keys = SVec2Ptr(ssymbol_keys) + + CALL(NNSymbolCreateAtomicSymbol( + handle, + param_keys.size(), + CBeginPtr(param_keys), + CBeginPtr(param_vals), + &ret_handle)) + num_args = (symbol_args.size()) + + attr = AttrScope.current.get(attr) + if attr: + SymbolSetAttr(ret_handle, attr) + name = NameManager.current.get(name, func_hint) + + cdef const char* c_name = NULL + + if name: + name = c_str(name) + c_name = name + + CALL(NNSymbolCompose( + ret_handle, + c_name, + num_args, + &symbol_keys[0] if symbol_keys.size() != 0 else NULL, + &symbol_args[0] if symbol_args.size() != 0 else NULL)) + return NewSymbol(ret_handle) + + creator.__name__ = func_name + creator.__doc__ = doc_str + return creator + + +def _init_symbol_module(symbol_class, root_namespace): + """List and add all the atomic symbol functions to current module.""" + cdef const char** op_name_ptrs + cdef nn_uint size + cdef vector[string] op_names + cdef OpHandle handle + + _set_symbol_class(symbol_class) + CALL(NNListAllOpNames(&size, &op_name_ptrs)) + for i in range(size): + op_names.push_back(string(op_name_ptrs[i])); + module_obj = _sys.modules["%s.symbol" % root_namespace] + module_internal = _sys.modules["%s._symbol_internal" % root_namespace] + for i in range(op_names.size()): + CALL(NNGetOpHandle(op_names[i].c_str(), &handle)) + function = _make_atomic_symbol_function(handle, op_names[i]) + if function.__name__.startswith('_'): + setattr(module_internal, function.__name__, function) + setattr(module_obj, function.__name__, function) + else: + setattr(module_obj, function.__name__, function) diff --git a/nnvm/python/nnvm/frontend/__init__.py b/nnvm/python/nnvm/frontend/__init__.py new file mode 100644 index 000000000000..61c294f2606f --- /dev/null +++ b/nnvm/python/nnvm/frontend/__init__.py @@ -0,0 +1,26 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""NNVM frontends.""" +from __future__ import absolute_import +from .mxnet import from_mxnet +from .onnx import from_onnx +from .coreml import from_coreml +from .keras import from_keras +from .darknet import from_darknet +from .tensorflow import from_tensorflow +from .caffe2 import from_caffe2 diff --git a/nnvm/python/nnvm/frontend/caffe2.py b/nnvm/python/nnvm/frontend/caffe2.py new file mode 100644 index 000000000000..f951db66b5a6 --- /dev/null +++ b/nnvm/python/nnvm/frontend/caffe2.py @@ -0,0 +1,471 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=import-self, invalid-name, line-too-long, unused-argument +"""Caffe2 frontend""" +from __future__ import absolute_import as _abs +import tvm +from nnvm import symbol as _sym +from .common import get_nnvm_op, Renamer, AttrConverter as AttrCvt +from .onnx_caffe2_utils import dimension_picker, dimension_constraint, infer_channels, revert_caffe2_pad +from . import onnx + +__all__ = ['from_caffe2'] + + +def _clean_up_pool_args(args): + """ A helper function to clean up common arguments in conv and pooling ops. + """ + assert isinstance(args, dict) + + if 'stride_h' in args and 'stride_w' in args: + assert 'stride' not in args and 'strides' not in args + args['strides'] = [args['stride_h'], args['stride_w']] + args.pop('stride_h') + args.pop('stride_w') + elif 'stride' in args: + args['strides'] = [args['stride'], args['stride']] + args.pop('stride') + + # rename 'kernel', 'kernels', to 'kernel_shape' + if 'kernel_h' in args and 'kernel_w' in args: + assert 'kernel' not in args and 'kernels' not in args + args['kernel_shape'] = [args['kernel_h'], args['kernel_w']] + args.pop('kernel_h') + args.pop('kernel_w') + elif 'kernel' in args: + args['kernel_shape'] = [args['kernel'], args['kernel']] + args.pop('kernel') + elif 'kernels' in args: + args['kernel_shape'] = args['kernels'] + args.pop('kernels') + + if 'pad_t' in args and 'pad_l' in args and 'pad_b' in args and 'pad_r' in args: + assert 'pad' not in args and 'pads' not in args + args['pads'] = [ + args['pad_t'], args['pad_l'], args['pad_b'], args['pad_r'] + ] + for pad in ['pad_t', 'pad_l', 'pad_b', 'pad_r']: + args.pop(pad) + elif 'pad' in args: + args['pads'] = [args['pad'], args['pad']] + args.pop('pad') + + if 'dilation_h' in args and 'dilation_w' in args: + assert 'dilation' not in args and 'dilations' not in args + args['dilations'] = [args['dilation_h'], args['dilation_w']] + args.pop('dilation_h') + args.pop('dilation_w') + elif 'dilation' in args: + args['dilations'] = [args['dilation'], args['dilation']] + args.pop('dilation') + + return args + + +class Caffe2OpConverter(object): + """ A helper class for holding Caffe2 op converters. + """ + + @classmethod + def get_converter(cls): + """ Get converter. + + :return: converter, which should be `_impl`. + """ + + if hasattr(cls, '_impl'): + return getattr(cls, '_impl') + raise tvm.error.OpNotImplemented( + 'Operator {} is not implemented in frontend Caffe2.'.format(cls.__name__)) + + +_caffe2_internal_args = { + # nnpack args + 'algo', + 'convolution_transform_strategy', + 'float16_compute', + 'shared_buffer', + + # training args + 'init_params', + 'cudnn_exhaustive_search', + 'exhaustive_search', + + # training args + 'adj', + 'hwgq', + + # args that we don't care + 'legacy_pad', +} + + +class Pool(Caffe2OpConverter): + """ A helper class for pool op converters. + """ + + name = '' + + @classmethod + def _impl(cls, inputs, args, params): + _clean_up_pool_args(args) + if 'global_pooling' in args and args['global_pooling'] == 1: + op_name = dimension_picker('global_' + cls.name) + return get_nnvm_op(op_name(args))(*inputs) + + return AttrCvt( + op_name=dimension_picker(cls.name), + transforms={ + 'kernel_shape': 'pool_size', + 'pads': ('padding', (0, 0), revert_caffe2_pad), + 'strides': 'strides', + }, + excludes={ + # TVM poolop does not support dilation + 'dilations', + }, + ignores=_caffe2_internal_args | {'global_pooling', 'order'}, + custom_check=dimension_constraint())(inputs, args, params) + + +class AveragePool(Pool): + name = 'avg_pool' + + +class MaxPool(Pool): + name = 'max_pool' + + +class Conv(Caffe2OpConverter): + """ Operator converter for Conv. + """ + + @classmethod + def _impl(cls, inputs, args, params): + # get number of channels + channels = infer_channels(inputs[1], params) + args['channels'] = channels + _clean_up_pool_args(args) + return AttrCvt( + op_name=dimension_picker('conv'), + transforms={ + 'group': ('groups', 1), + 'kernel_shape': + 'kernel_size', + 'pads': ('padding', (0, 0), revert_caffe2_pad), + 'strides': + 'strides', + 'dilations': ('dilation', (1, 1)), + 'order': + ('layout', ("NCHW"), + lambda x: x if isinstance(x, str) else x.decode('UTF-8')), + }, + excludes={}, + ignores=_caffe2_internal_args, + extras={'use_bias': len(inputs) == 3}, + custom_check=dimension_constraint())(inputs, args, params) + + +class Concat(Caffe2OpConverter): + """ Operator converter for Concat. + """ + + @classmethod + def _impl(cls, inputs, args, params): + def _get_axis_from_order_str(order): + order = order if isinstance(order, str) else order.decode('UTF-8') + if order == 'NCHW': + return 1 + if order == 'NHWC': + return 3 + raise tvm.error.OpAttributeInvalid('Value {} in attribute {} of operator {} is not valid.'.format(order, 'order', 'Concat')) + + return AttrCvt( + op_name='concatenate', + transforms={ + 'order': ('axis', (1), _get_axis_from_order_str), + }, + excludes={ + 'add_axis', + })(inputs, args, params) + + +class NormalizePlanarYUV(Caffe2OpConverter): + """ Operator converter for NormalizePlanarYUV. + caffe2 definition: https://github.com/pytorch/pytorch/blob/master/caffe2/operators/norm_planar_yuv_op.cc + """ + + @classmethod + def _impl(cls, inputs, args, params): + assert len(inputs) == 3 + mean = _sym.expand_dims(inputs[1], axis=2, num_newaxis=2) + std = _sym.expand_dims(inputs[2], axis=2, num_newaxis=2) + + return _sym.broadcast_div(_sym.broadcast_sub(inputs[0], mean), std) + + +class ResizeNearest(Caffe2OpConverter): + """ Operator converter for Upsample (nearest mode). + """ + + @classmethod + def _impl(cls, inputs, args, params): + width_scale = args['width_scale'] if 'width_scale' in args else 1 + height_scale = args['height_scale'] if 'height_scale' in args else 1 + assert width_scale == height_scale + + return _sym.upsampling( + inputs[0], scale=int(width_scale), method="NEAREST_NEIGHBOR") + + +class FC(Caffe2OpConverter): + """ Operator converter for FC. + """ + + @classmethod + def _impl(cls, inputs, args, params): + inputs[0] = _sym.flatten(inputs[0]) + args['units'] = infer_channels(inputs[1], params) + return AttrCvt( + 'dense', + ignores=['axis', 'axis_w'], + extras={'use_bias': len(inputs) == 3}, + )(inputs, args, params) + + +class SpatialBN(Caffe2OpConverter): + """ Operator converter for SpatialBN. + """ + + @classmethod + def _impl(cls, inputs, args, params): + return AttrCvt( + op_name='batch_norm', + disables=['momentum'], + ignores=[ + 'order', 'spatial', 'is_test', 'consumed_inputs', 'num_batches' + ])(inputs, args, params) + + +# compatible operators that do NOT require any conversion. +_identity_list = [] + +# _convert_map defines maps of name to converter functor(callable) +# for 1 to 1 mapping, use Renamer if nothing but name is different +# use AttrCvt if attributes need to be converted +# for 1 to N mapping(composed), use custom callable functions +# for N to 1 mapping, currently not supported(?) + +# Minimal set of ops for squeezenet and resnet50 +def _get_convert_map(): + return { + # caffe2/onnx common operators + 'Add': onnx.Add.get_converter(opset=1), + 'Sum': onnx.Sum.get_converter(opset=1), + 'Softmax': onnx.Softmax.get_converter(opset=1), + + # nn + 'AveragePool': AveragePool.get_converter(), + 'MaxPool': MaxPool.get_converter(), + 'Conv': Conv.get_converter(), + 'Concat': Concat.get_converter(), + 'FC': FC.get_converter(), + 'SpatialBN': SpatialBN.get_converter(), + 'ResizeNearest': ResizeNearest.get_converter(), + 'Relu': AttrCvt('relu', {}, ignores=['order']), + 'Sigmoid': Renamer('sigmoid'), + 'Dropout': AttrCvt('dropout', {'ratio': 'rate'}, ignores=['is_test']), + + # c2 image preprocessing ops + 'NormalizePlanarYUV': NormalizePlanarYUV.get_converter(), + } + + +class Caffe2NetDef(object): + """A helper class for handling nnvm graph copying from pb2.GraphProto. + Definition: https://github.com/pytorch/pytorch/blob/master/caffe2/proto/caffe2.proto + """ + + def __init__(self): + self._nodes = {} + self._params = {} + self._visited_nodes = set() + self._ops = {} + + def from_caffe2(self, init_net, predict_net): + """Construct nnvm nodes from caffe2 graph. + + Parameters + ---------- + workspace : Caffe2 workspace + predict_net : protobuf object + + Returns + ------- + sym : nnvm.sym.Symbol + The returned nnvm symbol + params : dict + A dict of name: tvm.nd.array pairs, used as pretrained weights + """ + from caffe2.python import workspace + workspace.RunNetOnce(init_net) + + # Input + input_name = predict_net.op[0].input[0] + + # Params + self._params = {} + used_blobs = set() + for c2_op in predict_net.op: + for i in c2_op.input: + used_blobs.add(i) + for blob in workspace.Blobs(): + if blob in used_blobs and blob != input_name: + self._params[blob] = tvm.nd.array(workspace.FetchBlob(blob)) + + # Variables + self._nodes = {} + for blob in predict_net.external_input: + self._nodes[blob] = _sym.Variable(name=blob) + + # Ops + for c2_op in predict_net.op: + for blob in c2_op.output: + self._ops[blob] = c2_op + for c2_op in predict_net.op: + self._process_op(c2_op) + + # Outputs + out = [] + for blob in predict_net.external_output: + out.append(self._nodes[blob]) + + if len(out) > 1: + sym = _sym.Group(out) + else: + sym = out[0] + + return sym, self._params + + def _get_node(self, blob): + """Get the nnvm Symbol of blob and detect cyclic dependency in the graph.""" + if blob in self._nodes: + return self._nodes[blob] + + assert blob not in self._visited_nodes, 'Cyclic dependency in the graph (in {})'.format( + blob) + self._visited_nodes.add(blob) + + self._process_op(self._ops[blob]) + return self._nodes[blob] + + def _process_op(self, c2_op): + op_type = c2_op.type + args = self._parse_arg(c2_op.arg) + inputs = [self._get_node(i) for i in c2_op.input] + tvm_op = self._convert_operator(op_type, inputs, args) + # Ignore all outputs except the first one + self._nodes[c2_op.output[0]] = tvm_op[0] + + def _parse_arg(self, arg): + """Convert a list of Argument to a dict, with names as keys.""" + args = {} + for a in arg: + for f in ['f', 'i', 's']: + if a.HasField(f): + args[a.name] = getattr(a, f) + for f in ['floats', 'ints', 'strings']: + if list(getattr(a, f)): + assert a.name not in args, "Only one type of attr is allowed" + args[a.name] = tuple(getattr(a, f)) + for f in ['n']: + if a.HasField(f): + raise NotImplementedError( + "Field {} is not supported in nnvm.".format(f)) + for f in ['nets']: + if list(getattr(a, f)): + raise NotImplementedError( + "Field {} is not supported in nnvm.".format(f)) + if a.name not in args: + raise ValueError("Cannot parse attribute: \n{}\n.".format(a)) + return args + + def _convert_operator(self, + op_type, + inputs, + args, + identity_list=None, + convert_map=None): + """Convert from Caffe2 operator to nnvm operator. + The converter must specify conversions explicitly for incompatible name, and + apply handlers to operator attributes. + + Parameters + ---------- + op_type : str + Operator name, such as Convolution, FullyConnected + inputs : list of nnvm.Symbol + List of input symbols. + args : dict + Dict of operator attributes + identity_list : list + List of operators that don't require conversion + convert_map : dict + Dict of name : callable, where name is the op's name that + require conversion to nnvm, callable are functions which + take args and return (new_op_type, new_args) + + Returns + ------- + sym : nnvm.Symbol + Converted nnvm Symbol + """ + identity_list = identity_list if identity_list else _identity_list + convert_map = convert_map if convert_map else _get_convert_map() + if op_type in identity_list: + sym = get_nnvm_op(op_type)(*inputs, **args) + elif op_type in convert_map: + # Add a sanitizing step to convert all byte strings in args to strings + sym = convert_map[op_type](inputs, args, self._params) + else: + raise tvm.error.OpNotImplemented( + 'Operator {} is not supported in frontend Caffe2.'.format(op_type)) + return sym + + +def from_caffe2(init_net, predict_net): + """Load caffe2 graph which contains init_net and predict_net into nnvm graph. + + Parameters + ---------- + init_net : protobuf object + Caffe2 NetDef containing the weights + + predict_net : protobuf object + Caffe2 NetDef containing the graph + + Returns + ------- + sym : nnvm.Symbol + Compatible nnvm symbol + + params : dict of str to tvm.ndarray + Dict of converted parameters stored in tvm.ndarray format + """ + + caffe2 = Caffe2NetDef() + return caffe2.from_caffe2(init_net, predict_net) diff --git a/nnvm/python/nnvm/frontend/common.py b/nnvm/python/nnvm/frontend/common.py new file mode 100644 index 000000000000..0e09a2c43323 --- /dev/null +++ b/nnvm/python/nnvm/frontend/common.py @@ -0,0 +1,204 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Shared functions and classes for frontends.""" +from __future__ import absolute_import as _abs +import logging +from nnvm import sym as _sym +from .._base import string_types + +def get_nnvm_op(op_name): + op = getattr(_sym, op_name) + if not op: + raise OpNotImplemented( + 'Operator {} is not supported.'.format(op)) + return op + +def required_attr(attr, key, op_name): + assert isinstance(attr, dict) + if key not in attr: + raise OpAttributeRequired( + 'Required attribute {} not found in operator {}'.format(key, op_name)) + return attr[key] + +def parse_tshape(tshape): + """Parse tshape in string.""" + return [int(x.strip()) for x in tshape.strip('()').split(',')] + +def parse_bool_str(attr, key, default='False'): + """Parse bool string to boolean.""" + return attr.get(key, default).strip().lower() in ['true', '1', 't', 'y', 'yes'] + +class Renamer(object): + """A simply renamer for operators. + + Parameters + ---------- + new_name : str + The new name for the operator + """ + def __init__(self, new_name): + self._new_name = new_name + + def __call__(self, inputs, attrs, *args): + return get_nnvm_op(self._new_name)(*inputs, **attrs) + + +class AttrConverter(object): + """Common attribute converter. An AttrConverter instance is a callable: + ``` + attr_converter = AttrConverter(op_name, transforms={'a':'b', 'c':('d', 1)}) + new_op_name, new_attr = attr_converter(attrs) + ``` + + Parameters + ---------- + op_name : str or callable + If set as str, returned operator name is the str. + If set as callable, returned operator is the str returned by calling: + `op_name = func(attr)` + transforms : dict of `new_name, or (new_name, default_value, transform function)` + If only a new_name is provided, it's like renaming the attribute name. + If default_value if provided, then the attribute is considered as optional. + If transform function is provided, the original attribute value is handled + by transform function. + excludes : list + A list of excluded attributes that should `NOT` appear. + Raise NotImplementedError if occurred. + disables : list + A list of attributes that is disabled in nnvm. Log warnings. + ignores : list + A list of attributes that is ignored in nnvm. Debug level logging. + extras : dict + A series of additional attributes should be added anyway to the returned + attribute dict. + custom_check : callable + A custom function takes attribute, and return True/False. + Raise RuntimeError if not bool(True) returned. + """ + def __init__(self, op_name, transforms=None, + excludes=None, disables=None, ignores=None, + extras=None, custom_check=None): + self._op_name = op_name + self._transforms = transforms if transforms else {} + self._excludes = excludes if excludes else [] + self._disables = disables if disables else [] + self._ignores = ignores if ignores else [] + self._extras = extras if extras else {} + self._custom_check = custom_check + + def __call__(self, inputs, attrs, *args): + # apply custom check + if self._custom_check: + func, msg = self._custom_check + if not func(attrs): + raise RuntimeError("Check failed: {}".format(msg)) + # get new op_name + if isinstance(self._op_name, string_types): + op_name = self._op_name + else: + assert callable(self._op_name), "op_name can either be string or callable" + op_name = self._op_name(attrs) + # convert attributes + new_attrs = {} + for k in attrs.keys(): + if k in self._excludes: + raise NotImplementedError("Attribute {} not supported yet.".format(k)) + elif k in self._disables: + logging.warning("Attribute %s is disabled in nnvm.sym.%s", k, op_name) + elif k in self._ignores: + logging.debug("Attribute %s is ignored in nnvm.sym.%s", k, op_name) + elif k in self._transforms: + new_name, defaults, transform = self._parse_default(self._transforms[k]) + if defaults is None: + new_attr = self._required_attr(attrs, k) + else: + new_attr = attrs.get(k, None) + if new_attr is None: + new_attrs[new_name] = defaults + else: + new_attrs[new_name] = transform(new_attr) + else: + # copy + new_attrs[k] = attrs[k] + # add extras + new_attrs.update(self._extras) + return get_nnvm_op(op_name)(*inputs, **new_attrs) + + def _parse_default(self, target): + """Helper function to parse default values.""" + if not isinstance(target, (list, tuple)): + k, v, t = target, None, lambda x: x + elif len(target) == 1: + k, v, t = target[0], None, lambda x: x + elif len(target) == 2: + k, v, t = target[0], target[1], lambda x: x + elif len(target) > 2: + k, v, t = target[0], target[1], target[2] + else: + k = None # should raise + if not isinstance(k, string_types): + msg = "{} is not a valid target, (name, default) expected.".format(target) + raise ValueError(msg) + return k, v, t + + def _parse_bool(self, value): + """Helper function to parse default boolean values.""" + if isinstance(value, string_types): + return value.strip().lower() in ['true', '1', 't', 'y', 'yes'] + return bool(value) + + def _required_attr(self, attr, key): + """Wrapper for getting required attributes.""" + assert isinstance(attr, dict) + if key not in attr: + raise AttributeError("Required attribute {} not found.".format(key)) + return attr[key] + + +class SymbolTable(object): + """Table storing symbols by names.""" + def __init__(self): + self.vars = {} + self.params = {} + self.const_ctr = 1 + self.in_padding = False + self.paddings = [0, 0] + + def new_const(self, value): + name = "_param_%d" % (self.const_ctr) + self.const_ctr += 1 + self.params[name] = value + self.vars[name] = _sym.Variable(name=name) + return self.vars[name] + + def get_var(self, name, must_contain=True): + if must_contain: + assert name in self.vars + if name not in self.vars: + self.vars[name] = _sym.Variable(name=name) + return self.vars[name] + + def set_var(self, name, sym): + assert isinstance(sym, _sym.Symbol) + self.vars[name] = sym + + def set_padding(self, paddings): + self.paddings = paddings + self.in_padding = True + + def clear_padding(self): + self.in_padding = False diff --git a/nnvm/python/nnvm/frontend/coreml.py b/nnvm/python/nnvm/frontend/coreml.py new file mode 100644 index 000000000000..c5b0c0a799ec --- /dev/null +++ b/nnvm/python/nnvm/frontend/coreml.py @@ -0,0 +1,431 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=invalid-name, unused-argument +"""CoreML frontend.""" +from __future__ import absolute_import as _abs +import numpy as np +import tvm +from .common import SymbolTable +from .. import symbol as _sym +from .._base import string_types + +__all__ = ['from_coreml'] + + +def NeuralNetworkImageScaler(op, insym, symtab): + # this changes the symbol + biases = np.array([op.blueBias, op.greenBias, op.redBias]).reshape([3, 1, 1]) + bias = symtab.new_const(biases) + ret = _sym.__mul_scalar__(insym, scalar=op.channelScale) + ret = _sym.broadcast_add(ret, bias) + return ret + + +def NeuralNetworkMeanImage(op, insym, symtab): + # this changes the symbol + ret = _sym.elemwise_sub(insym, scalar=op.meanImage) + return ret + + +def ConvolutionLayerParams(op, insym, symtab): + """Convolution layer params.""" + weights = symtab.new_const(np.array(list(op.weights.floatValue)).reshape( + tuple([op.outputChannels, op.kernelChannels] + list(op.kernelSize)))) + if op.hasBias: + biases = symtab.new_const(list(op.bias.floatValue)) + dilation = list(op.dilationFactor) + if not dilation: + dilation = [1, 1] + params = {'channels':op.outputChannels, + 'kernel_size':list(op.kernelSize), + 'strides':list(op.stride), + 'dilation': dilation, + 'use_bias': op.hasBias, + 'groups':op.nGroups} + + if op.WhichOneof('ConvolutionPaddingType') == 'valid': + valid = op.valid + padding = [b.startEdgeSize for b in valid.paddingAmounts.borderAmounts] + padding2 = [b.endEdgeSize for b in valid.paddingAmounts.borderAmounts] + for i, j in zip(padding, padding2): + assert i == j, "Asymmetry padding not supported" + if padding: + params['padding'] = padding + elif op.WhichOneof('ConvolutionPaddingType') == 'same': + kernel = params['kernel_size'] + pad_h = kernel[0] - 1 + pad_w = kernel[1] - 1 + pad_t = pad_h // 2 + pad_l = pad_w // 2 + pad_b = pad_h - pad_t + pad_r = pad_w - pad_l + assert pad_t == pad_r and pad_l == pad_b, "Asymmetry padding not supported" + params['padding'] = [pad_t, pad_l] + else: + raise NotImplementedError("Valid/Same convolution padding implemented") + + if op.hasBias: + pos = [insym, weights, biases] + else: + pos = [insym, weights] + + # consume padding layer + if symtab.in_padding: + params['padding'] = [sum(x) for x in zip(params.get('padding', [0, 0]), symtab.paddings)] + symtab.clear_padding() + + if op.isDeconvolution: + ret = _sym.conv2d_transpose(*pos, **params) + else: + ret = _sym.conv2d(*pos, **params) + return ret + +def BatchnormLayerParams(op, insym, symtab): + """Get layer of batchnorm parameter""" + # this changes the symbol + if op.instanceNormalization: + msg = 'Operator "instance normalization" is not supported in frontend CoreML.' + raise tvm.error.OpNotImplemented(msg) + else: + params = {'gamma':symtab.new_const(list(op.gamma.floatValue)), + 'beta':symtab.new_const(list(op.beta.floatValue)), + 'moving_mean':symtab.new_const(list(op.mean.floatValue)), + 'moving_var': symtab.new_const(list(op.variance.floatValue)), + 'epsilon': op.epsilon} + return _sym.batch_norm(data=insym, **params) + +def ActivationParams(op, insym, symtab): + """Get activation parameters""" + whichActivation = op.WhichOneof('NonlinearityType') + par = getattr(op, whichActivation) + if whichActivation == 'linear': + return _sym.__add_scalar__(_sym.__mul_scalar__(insym, scalar=par.alpha), scalar=par.beta) + if whichActivation == 'ReLU': + return _sym.relu(insym) + if whichActivation == 'leakyReLU': + return _sym.leaky_relu(insym, alpha=par.alpha) + if whichActivation == 'thresholdedReLU': + alpha_tensor = _sym.full_like(insym, fill_value=float(par.alpha)) + return _sym.elemwise_mul(insym, _sym.greater(insym, alpha_tensor)) + if whichActivation == 'PReLU': + return _sym.prelu(insym, alpha=par.alpha) + if whichActivation == 'tanh': + return _sym.tanh(insym) + if whichActivation == 'scaledTanh': + return _sym.__mul_scalar__(_sym.tanh(_sym.__mul_scalar__( + insym, scalar=par.beta)), scalar=par.alpha) + if whichActivation == 'sigmoid': + return _sym.sigmoid(insym) + if whichActivation == 'sigmoidHard': + transformX = (par.alpha * insym) + par.beta + return _sym.clip(transformX, a_min=0, a_max=1) + if whichActivation == 'ELU': + return _sym.__mul_scalar__(_sym.__add_scalar__( + _sym.exp(insym), scalar=-1), scalar=par.alpha) + if whichActivation == 'softsign': + return insym / (1 + (_sym.relu(insym) + _sym.relu(_sym.negative(insym)))) + if whichActivation == 'softplus': + return _sym.log(_sym.__add_scalar__(_sym.exp(insym), scalar=1)) + if whichActivation == 'parametricSoftplus': + alpha = list(par.alpha.floatValue) + beta = list(par.alpha.floatValue) + if len(alpha) == 1: + return _sym.__mul_scalar__(_sym.log(_sym.__add_scalar__( + _sym.exp(insym), scalar=beta[0])), scalar=alpha[0]) + alpha = np.array(alpha).reshape((len(alpha), 1, 1)) + beta = np.array(beta).reshape((len(beta), 1, 1)) + alphasym = symtab.new_const(alpha) + betasym = symtab.new_const(beta) + return _sym.broadcast_mul(_sym.log(_sym.broadcast_add( + _sym.exp(insym), betasym)), alphasym) + raise tvm.error.OpNotImplemented( + 'Operator {} is not supported in frontend CoreML.'.format(whichActivation)) + +def ScaleLayerParams(op, insym, symtab): + """Scale layer params.""" + scale = symtab.new_const(np.array(list(op.scale.floatValue)).reshape( + tuple(list(op.shapeScale) + [1, 1]))) + # scale = _sym.reshape(scale, shape=tuple(list(op.shapeScale) + [1,1])) + ret = _sym.broadcast_mul(insym, scale) + if op.hasBias: + bias = symtab.new_const(np.array(list(op.bias.floatValue)).reshape( + tuple(list(op.shapeBias) + [1, 1]))) + # bias = _sym.reshape(bias, shape=tuple(list(op.shapeBias) + [1,1])) + ret = _sym.broadcast_add(ret, bias) + return ret + +def PoolingLayerParams(op, insym, symtab): + """get pooling parameters""" + if op.globalPooling: + if op.type == 0: + return _sym.global_max_pool2d(insym) + if op.type == 1: + return _sym.global_avg_pool2d(insym) + raise tvm.error.OpNotImplemented( + 'Operator pooling (not max or average) is not supported in frontend CoreML.') + + else: + params = {'pool_size':list(op.kernelSize), + 'strides':list(op.stride)} + + if op.WhichOneof('PoolingPaddingType') == 'valid': + valid = op.valid + padding = [b.startEdgeSize for b in valid.paddingAmounts.borderAmounts] + padding2 = [b.endEdgeSize for b in valid.paddingAmounts.borderAmounts] + for i, j in zip(padding, padding2): + assert i == j + params['padding'] = padding + elif op.WhichOneof('PoolingPaddingType') == 'includeLastPixel': + # I don't know if this is correct + valid = op.includeLastPixel + padding = list(valid.paddingAmounts) + params['padding'] = padding + params['ceil_mode'] = True + else: + msg = 'Value {} in attribute PoolingPaddingType of operator Pooling is not valid.' + raise tvm.error.OpAttributeInvalid(msg.format(op.WhichOneof('PoolingPaddingType'))) + + # consume padding layer + if symtab.in_padding: + params['padding'] = [sum(x) for x in zip( + params.get('padding', [0, 0]), symtab.paddings)] + symtab.clear_padding() + + if op.type == 0: + return _sym.max_pool2d(insym, **params) + if op.type == 1: + return _sym.avg_pool2d(insym, **params) + msg = 'Operator pooling (not max or average) is not supported in frontend CoreML.' + raise tvm.error.OpNotImplemented(msg) + +def SoftmaxLayerParams(op, insym, symtab): + return _sym.softmax(_sym.flatten(insym)) + +def InnerProductLayerParams(op, insym, symtab): + weights = symtab.new_const(np.array(op.weights.floatValue).reshape( + (op.outputChannels, op.inputChannels))) + par = {'weight':weights, 'use_bias':False, 'units':op.outputChannels} + if op.hasBias: + bias = symtab.new_const(np.array(op.bias.floatValue)) + par['bias'] = bias + par['use_bias'] = True + return _sym.dense(data=insym, **par) + +def AddLayerParams(op, insyms, symtab): + if not isinstance(insyms, list): + insyms = [insyms] + ret = insyms[0] + for i in range(1, len(insyms)): + ret = _sym.elemwise_add(ret, insyms[i]) + if op.alpha > 0: + ret = _sym.__add_scalar__(ret, scalar=op.alpha) + return ret + +def MultiplyLayerParams(op, insyms, symtab): + if not isinstance(insyms, list): + insyms = [insyms] + ret = insyms[0] + for i in range(1, len(insyms)): + ret = _sym.elemwise_mul(ret, insyms[i]) + if op.alpha != 1: + ret = _sym.__mul_scalar__(ret, scalar=op.alpha) + return ret + +def ConcatLayerParams(op, insyms, symtab): + if not isinstance(insyms, list): + insyms = [insyms] + if op.sequenceConcat: + raise tvm.error.OpNotImplemented( + 'Operator Sequence Concat is not supported in frontend CoreML.') + ret = _sym.concatenate(*insyms, axis=1) + return ret + +def FlattenLayerParams(op, insym, symtab): + if op.mode == 1: + insym = _sym.transpose(_sym.reshape(insym, shape=(0, 0, -1)), axes=(0, 2, 1)) + return _sym.flatten(insym) + +def PaddingLayerParams(op, insym, symtab): + """Hacking for padding layer params.""" + if op.WhichOneof('PaddingType') == 'constant': + constant = op.constant + if constant.value != 0: + msg = 'Value {} in attribute "padding value" of operator Padding is not valid.' + raise tvm.error.OpAttributeInvalid(msg.format(constant.value)) + padding = [b.startEdgeSize for b in op.paddingAmounts.borderAmounts] + padding2 = [b.endEdgeSize for b in op.paddingAmounts.borderAmounts] + for i, j in zip(padding, padding2): + assert i == j + symtab.set_padding(padding) + else: + raise tvm.error.OpNotImplemented( + 'Operator "non-constant padding" is not supported in frontend CoreML.') + return insym + +def PermuteLayerParams(op, insym, symtab): + axes = tuple(op.axis) + return _sym.transpose(insym, axes=axes) + +def UpsampleLayerParams(op, insym, symtab): + if op.scalingFactor[0] != op.scalingFactor[1]: + raise tvm.error.OpAttributeInvalid( + 'Height and width scaling factors of Upsample operator must be equal.') + interpolationMode = 'NEAREST_NEIGHBOR' if op.mode == 0 else 'BILINEAR' + return _sym.upsampling(insym, scale=op.scalingFactor[0], method=interpolationMode) + +def L2NormalizeLayerParams(op, insym, symtab): + return _sym.l2_normalize(insym, eps=op.epsilon, axis=1) + +def LRNLayerParams(op, insym, symtab): + par = {} + par['size'] = op.localSize + par['bias'] = op.k + par['alpha'] = op.alpha + par['beta'] = op.beta + par['axis'] = 1 #default layout is nchw + return _sym.lrn(data=insym, **par) + +def AverageLayerParams(op, insyms, symtab): + if not isinstance(insyms, list) or len(insyms) < 2: + raise ValueError("Expect minimum 2 inputs") + count = len(insyms) + _sum = insyms[0] + for i in range(1, count): + _sum = _sym.broadcast_add(_sum, insyms[i]) + return _sum / count + +def MaxLayerParams(op, insyms, symtab): + if not isinstance(insyms, list) or len(insyms) < 2: + raise ValueError("Expect minimum 2 inputs") + _max = insyms[0] + for i in range(1, len(insyms)): + _max = _sym.broadcast_max(_max, insyms[i]) + return _max + +def MinLayerParams(op, insyms, symtab): + if not isinstance(insyms, list) or len(insyms) < 2: + raise ValueError("Expect minimum 2 inputs") + _min = insyms[0] + for i in range(1, len(insyms)): + _min = _sym.broadcast_min(_min, insyms[i]) + return _min + +_convert_map = { + 'NeuralNetworkMeanImage': NeuralNetworkMeanImage, + 'NeuralNetworkImageScaler': NeuralNetworkImageScaler, + 'ConvolutionLayerParams':ConvolutionLayerParams, + 'BatchnormLayerParams':BatchnormLayerParams, + 'ActivationParams':ActivationParams, + 'ScaleLayerParams':ScaleLayerParams, + 'PoolingLayerParams':PoolingLayerParams, + 'SoftmaxLayerParams':SoftmaxLayerParams, + 'InnerProductLayerParams':InnerProductLayerParams, + 'AddLayerParams':AddLayerParams, + 'MultiplyLayerParams':MultiplyLayerParams, + 'FlattenLayerParams':FlattenLayerParams, + 'ConcatLayerParams':ConcatLayerParams, + 'PaddingLayerParams':PaddingLayerParams, + 'PermuteLayerParams':PermuteLayerParams, + 'UpsampleLayerParams':UpsampleLayerParams, + 'L2NormalizeLayerParams':L2NormalizeLayerParams, + 'LRNLayerParams':LRNLayerParams, + 'AverageLayerParams':AverageLayerParams, + 'MaxLayerParams':MaxLayerParams, + 'MinLayerParams':MinLayerParams, +} + +def coreml_op_to_nnvm(op, inname, outname, symtab): + """Convert coreml layer to nnvm layer. + + Parameters + ---------- + coremlop: a coreml protobuf bit + + prevsym: previous nnvm symbol + + Returns: + ------- + nnvm.sym.Symbol + Converted symbol + """ + classname = type(op).__name__ + if classname not in _convert_map: + raise tvm.error.OpNotImplemented( + 'Operator {} is not supported in frontend CoreML.'.format(classname)) + if isinstance(inname, string_types): + insym = symtab.get_var(inname) + else: + insym = [symtab.get_var(i) for i in inname] + ret = _convert_map[classname](op, insym, symtab) + if outname: + symtab.set_var(outname, ret) + if classname != 'PaddingLayerParams': + assert not symtab.in_padding, "Previous padding not consumed by conv/pool" + +def from_coreml(model): + """Convert from coreml model into NNVM format. + + Parameters + ---------- + model: + coremltools.models.MLModel of a NeuralNetworkClassifier + + Returns + ------- + sym : nnvm.Symbol + Compatible nnvm symbol + + params : dict of str to tvm.NDArray + The parameter dict to be used by nnvm + """ + try: + import coremltools as cm + except ImportError: + raise ImportError('The coremltools package must be installed') + + assert isinstance(model, cm.models.MLModel) + spec = model.get_spec() + modeltype = spec.WhichOneof('Type') + assert modeltype in ['neuralNetworkClassifier', 'neuralNetwork', 'neuralNetworkRegressor'] + cc = getattr(spec, modeltype) + + symtab = SymbolTable() + for i in spec.description.input: + symtab.get_var(i.name, must_contain=False) + + for pp in cc.preprocessing: + whichpp = pp.WhichOneof('preprocessor') + ppmethod = getattr(pp, whichpp) + # the NeuralNetworkImageScalar doesn't seem to have a featureName? + if whichpp == 'scaler': + for i in spec.description.input: + coreml_op_to_nnvm(ppmethod, i.name, i.name, symtab) + else: + coreml_op_to_nnvm(ppmethod, pp.featureName, pp.featureName, symtab) + + for l in cc.layers: + layertype = l.WhichOneof('layer') + layerop = getattr(l, layertype) + assert len(l.output) == 1 + if len(l.input) == 1: + coreml_op_to_nnvm(layerop, l.input[0], l.output[0], symtab) + else: + coreml_op_to_nnvm(layerop, list(l.input), l.output[0], symtab) + returns = [symtab.get_var(i.name, must_contain=False) for i in spec.description.output] + tvmparams = {k:tvm.nd.array(np.array(v, dtype=np.float32)) for k, v in symtab.params.items()} + # for now return first output + return returns[0], tvmparams diff --git a/nnvm/python/nnvm/frontend/darknet.py b/nnvm/python/nnvm/frontend/darknet.py new file mode 100644 index 000000000000..8c6020500b45 --- /dev/null +++ b/nnvm/python/nnvm/frontend/darknet.py @@ -0,0 +1,979 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +DarkNet symbol frontend. +""" + +from __future__ import absolute_import as _abs +import numpy as np +import tvm +from .. import symbol as _sym +from .common import get_nnvm_op, required_attr, parse_tshape, parse_bool_str + +class LAYERTYPE(object): + """Darknet LAYERTYPE Class constant.""" + CONVOLUTIONAL = 0 + DECONVOLUTIONAL = 1 + CONNECTED = 2 + MAXPOOL = 3 + SOFTMAX = 4 + DETECTION = 5 + DROPOUT = 6 + CROP = 7 + ROUTE = 8 + COST = 9 + NORMALIZATION = 10 + AVGPOOL = 11 + LOCAL = 12 + SHORTCUT = 13 + ACTIVE = 14 + RNN = 15 + GRU = 16 + LSTM = 17 + CRNN = 18 + BATCHNORM = 19 + NETWORK = 20 + XNOR = 21 + REGION = 22 + YOLO = 23 + REORG = 24 + UPSAMPLE = 25 + LOGXENT = 26 + L2NORM = 27 + BLANK = 28 + +class ACTIVATION(object): + """Darknet ACTIVATION Class constant.""" + LOGISTIC = 0 + RELU = 1 + RELIE = 2 + LINEAR = 3 + RAMP = 4 + TANH = 5 + PLSE = 6 + LEAKY = 7 + ELU = 8 + LOGGY = 9 + STAIR = 10 + HARDTAN = 11 + LHTAN = 12 + +__all__ = ['from_darknet'] + +def _darknet_maxpooling(inputs, attrs): + """Process the max pool 2d operation.""" + kernel = parse_tshape(required_attr(attrs, 'kernel', 'maxpool')) + if len(kernel) != 1: + raise tvm.error.OpAttributeUnImplemented( + 'Non-2D kernels for Max Pooling are not supported in frontend Darknet.') + + op_name, new_attrs = 'max_pool2d', {} + strides = int(attrs.get('stride', (1, 1))) + pads = int(attrs.get('pad', (0, 0))) + new_attrs['pool_size'] = [kernel[0], kernel[0]] + new_attrs['strides'] = str((strides, strides)) + new_attrs['padding'] = str((pads, pads)) + extra_pad_size = attrs.get('extra_pad_size', 0) + if extra_pad_size: + pad_width = ((0, 0), (0, 0), (0, extra_pad_size), (0, extra_pad_size)) + inputs = _sym.pad(*inputs, pad_width=pad_width, pad_value=np.finfo(np.float32).min) + return get_nnvm_op(op_name)(*inputs, **new_attrs), None + +def _darknet_avgpooling(inputs, attrs): + """Process the average pool 2d operation.""" + kernel = parse_tshape(required_attr(attrs, 'kernel', 'avgpool')) + if len(kernel) != 1: + raise tvm.error.OpAttributeUnimplemented( + 'Non-2D kernels for Average Pooling are not supported in frontend Darknet.') + + op_name, new_attrs = 'avg_pool2d', {} + strides = int(attrs.get('stride', (1, 1))) + pads = int(attrs.get('pad', (0, 0))) + new_attrs['pool_size'] = [kernel[0], kernel[0]] + new_attrs['strides'] = str((strides, strides)) + new_attrs['padding'] = str((pads, pads)) + + return get_nnvm_op(op_name)(*inputs, **new_attrs), None + +def _darknet_batch_norm(inputs, attrs): + """Process the batchnormalization operation.""" + op_name, new_attrs = 'darknet_batch_norm', {} + new_attrs['axis'] = attrs.get('axis', 1) + new_attrs['epsilon'] = attrs.get('eps', 0.000001) + new_attrs['center'] = True + new_attrs['scale'] = True + return get_nnvm_op(op_name)(*inputs, **new_attrs), None + +def _darknet_conv2d(inputs, attrs): + """Process the convolution 2d operation.""" + kernel = parse_tshape(required_attr(attrs, 'kernel', 'conv2d')) + if len(kernel) != 1: + raise tvm.error.OpAttributeUnimplemented('Non-2D kernels for Conv2D are unsupported ' + 'in frontend Darknet.') + layout = attrs.get('layout', 'NCHW') + if layout not in ['NCHW', 'NHWC']: + raise tvm.error.OpAttributeInvalid( + 'Value {} in attribute "layout" of operator Conv2D is not valid.'.format(layout)) + strides = int(attrs.get('stride', (1, 1))) + pads = int(attrs.get('pad', (0, 0))) + + op_name, new_attrs = 'conv2d', {} + new_attrs['channels'] = required_attr(attrs, 'num_filter', 'conv2d') + new_attrs['kernel_size'] = [kernel[0], kernel[0]] + new_attrs['strides'] = (strides, strides) + new_attrs['padding'] = (pads, pads) + new_attrs['dilation'] = attrs.get('dilate', (1, 1)) + new_attrs['groups'] = attrs.get('num_group', 1) + new_attrs['layout'] = layout + if attrs.get('use_batchNorm', False) is True: + new_attrs['use_bias'] = False + else: + new_attrs['use_bias'] = True + out_name = {} + sym = get_nnvm_op(op_name)(*inputs, **new_attrs) + out_name[0] = sym.list_output_names()[0].replace('_output', '') + + if attrs.get('use_batchNorm', False) is True: + op_name, new_attrs = 'batch_norm', {} + new_attrs['epsilon'] = 0.000001 + sym = get_nnvm_op(op_name)(*sym, **new_attrs) + out_name[1] = sym.list_output_names()[0].replace('_output', '') + if 'activation' in attrs: + new_attrs = {} + new_attrs['activation'] = attrs['activation'] + new_attrs['slope'] = 0.1 + sym, _ = _darknet_activations(sym, new_attrs) + return sym, out_name + + +def _darknet_conv2d_transpose(inputs, attrs): + """Process the convolution 2d transpose operation.""" + if 'target_shape' in attrs: + raise tvm.error.OpAttributeUnimplemented( + 'Attribute "target_shape" is not supported in operator Conv2D-transpose.') + kernel = parse_tshape(required_attr(attrs, 'kernel', 'conv2d_transpose')) + if len(kernel) != 2: + raise tvm.error.OpAttributeUnimplemented( + 'Non-2D kernels are not supported in operator Conv2D-transpose.') + layout = attrs.get('layout', 'NCHW') + if layout not in ['NCHW', 'NHWC']: + msg = 'Value {} in attribute "layout" of operator Conv2D-transpose is not valid.' + raise tvm.error.OpAttributeInvalid(msg.format(layout)) + op_name, new_attrs = 'conv2d_transpose', {} + new_attrs['channels'] = required_attr(attrs, 'num_filter', 'conv2d_transpose') + new_attrs['kernel_size'] = kernel + new_attrs['strides'] = attrs.get('stride', (1, 1)) + new_attrs['output_padding'] = attrs.get('adj', (0, 0)) + new_attrs['padding'] = attrs.get('pad', (0, 0)) + new_attrs['dilation'] = attrs.get('dilate', (1, 1)) + new_attrs['groups'] = attrs.get('num_group', 1) + new_attrs['layout'] = layout + new_attrs['use_bias'] = not parse_bool_str(attrs, 'no_bias') + return get_nnvm_op(op_name)(*inputs, **new_attrs), None + +def _darknet_shortcut(inputs, attrs): + """Process the shortcut operation.""" + op_name, new_attrs = 'elemwise_add', {} + input_0 = inputs[0] + input_1 = inputs[1] + input_0_channel = int(attrs['out_channel']) + input_1_channel = int(attrs['add_out_channel']) + input_0_size = int(attrs['out_size']) + input_1_size = int(attrs['add_out_size']) + + if input_0_size > input_1_size: + scale = int(input_0_size/input_1_size) + input_1 = _sym.upsampling(input_1, scale=scale, name="_upsampling") + elif input_0_size < input_1_size: + stride = int(input_1_size/input_0_size) + input_1 = _sym.avg_pool2d(input_1, pool_size=(1, 1), + strides=(stride, stride), padding=(0, 0), name="_downsampling") + + if input_0_channel != input_1_channel: + pad_channel = input_0_channel - input_1_channel + input_1 = _sym.pad(input_1, pad_width=((0, 0), (0, pad_channel), (0, 0), (0, 0)), + pad_value=0.) + + new_inputs = _as_list([input_0, input_1]) + sym = get_nnvm_op(op_name)(*new_inputs, **new_attrs) + out_name = sym.list_output_names()[0].replace('_output', '') + if 'activation' in attrs: + new_attrs['activation'] = attrs['activation'] + sym, _ = _darknet_activations(sym, new_attrs) + return sym, out_name + +def _darknet_dense(inputs, attrs): + """Process the dense operation.""" + op_name, new_attrs = 'dense', {} + new_attrs['units'] = required_attr(attrs, 'num_hidden', 'dense') + out_name = {} + new_attrs['use_bias'] = attrs.get('use_bias', False) + if attrs.get('use_flatten', False) is True: + inputs[0] = _sym.flatten(inputs[0]) + sym = get_nnvm_op(op_name)(*inputs, **new_attrs) + out_name[0] = sym.list_output_names()[0].replace('_output', '') + if 'use_batchNorm' in attrs: + op_name, new_attrs = 'batch_norm', {} + new_attrs['epsilon'] = 0.000001 + sym = get_nnvm_op(op_name)(*sym, **new_attrs) + out_name[1] = sym.list_output_names()[0].replace('_output', '') + if 'activation' in attrs: + new_attrs = {} + new_attrs['activation'] = attrs['activation'] + sym, _ = _darknet_activations(sym, new_attrs) + return sym, out_name + +def _darknet_dropout(inputs, attrs): + """Process the dropout operation, its a blank operation.""" + op_name, new_attrs = 'dropout', {} + new_attrs['rate'] = attrs.get('p', 0.5) + return get_nnvm_op(op_name)(*inputs, **new_attrs), None + +def _darknet_reshape(inputs, attrs): + """Process the reshape operation.""" + if parse_bool_str(attrs, 'reverse'): + raise tvm.error.OpAttributeUnimplemented( + 'Attribute "reverse" is not supported in operator Reshape.') + op_name, new_attrs = 'reshape', {} + new_attrs['shape'] = required_attr(attrs, 'shape', 'reshape') + return get_nnvm_op(op_name)(*inputs, **new_attrs), None + +def _darknet_upsampling(inputs, attrs): + """Process the upsampling operation.""" + op_name, new_attrs = 'upsampling', {} + new_attrs['scale'] = attrs.get('scale', 1) + return get_nnvm_op(op_name)(*inputs, **new_attrs), None + +def _darknet_l2normalize(inputs, attrs): + """Process the l2 normalization operation.""" + op_name, new_attrs = 'l2_normalize', {} + new_attrs['eps'] = attrs.get('eps', 0) + new_attrs['axis'] = attrs.get('axis', 1) + return get_nnvm_op(op_name)(*inputs, **new_attrs), None + +def _darknet_softmax_output(inputs, attrs): + """Process the softmax operation.""" + temperature = attrs.get('temperature', 1) + if temperature != 1: + inputs[0] = inputs[0] / float(temperature) + op_name, new_attrs = 'softmax', {} + if parse_bool_str(attrs, 'multi_output'): + new_attrs['axis'] = 1 + + if attrs.get('use_flatten', False) is True: + inputs[0] = _sym.flatten(inputs[0]) + return get_nnvm_op(op_name)(*inputs, **new_attrs), None + +def _darknet_route(inputs, attrs): + """Process the route operation, which is equivalent to concat.""" + op_name = 'concatenate' + new_attrs = {'axis': attrs.get('dim', 1)} + return get_nnvm_op(op_name)(*inputs, **new_attrs), None + +def _darknet_reorg(inputs, attrs): + """Process the reorg operation.""" + op_name, new_attrs = 'yolo_reorg', {} + if 'stride' in attrs: + new_attrs = {'stride': attrs.get('stride', 1)} + return get_nnvm_op(op_name)(*inputs, **new_attrs), None + +def _darknet_region(inputs, attrs): + """Process the region operation.""" + num = attrs.get('n', 1) + classes = attrs.get('classes', 1) + coords = attrs.get('coords', 0) + background = attrs.get('background', 0) + softmax = attrs.get('softmax', True) + input_shape = attrs.get('shape') + + split_size = classes + coords + 1 + intermediate_shape = (input_shape[0], num, split_size, input_shape[2], input_shape[3]) + data_block = _sym.reshape(inputs[0], shape=intermediate_shape) + split_indices = (2, 4, 5) + split_res = _sym.split(data_block, indices_or_sections=split_indices, axis=2) + split_res0 = _sym.sigmoid(split_res[0]) + if not background: + split_res2 = _sym.sigmoid(split_res[2]) + else: + split_res2 = split_res[2] + if softmax: + split_res3 = _sym.softmax(split_res[3], axis=2) + concat_list = [split_res0, split_res[1], split_res2, split_res3] + out = _sym.concatenate(*concat_list, axis=2) + return _sym.reshape(out, shape=input_shape), None + + +def _darknet_yolo(inputs, attrs): + """Process the yolo operation.""" + num = attrs.get('n', 1) + classes = attrs.get('classes', 1) + input_shape = attrs.get('shape') + split_size = classes + 5 + intermediate_shape = (input_shape[0], num, split_size, input_shape[2], input_shape[3]) + data_block = _sym.reshape(inputs[0], shape=intermediate_shape) + split_indices = (2, 4) + split_res = _sym.split(data_block, indices_or_sections=split_indices, axis=2) + split_res0 = _sym.sigmoid(split_res[0]) + split_res2 = _sym.sigmoid(split_res[2]) + concat_list = [split_res0, split_res[1], split_res2] + out = _sym.concatenate(*concat_list, axis=2) + return _sym.reshape(out, shape=input_shape), None + +def _darknet_activations(inputs, attrs): + """Process the activation function.""" + act = required_attr(attrs, 'activation', 'activations') + if ACTIVATION.LOGISTIC == act: + act_type = 'sigmoid' + elif ACTIVATION.RELU == act: + act_type = 'relu' + elif ACTIVATION.TANH == act: + act_type = 'tanh' + elif ACTIVATION.LINEAR == act: + return inputs, None + elif ACTIVATION.LEAKY == act: + act_type = 'leaky_relu' + elif ACTIVATION.ELU == act: + act_type = 'elu' + else: + raise tvm.error.OpNotImplemented( + 'Operator act: {} is not supported in framework Darknet.'.format(act)) + + if act_type in ['relu', 'tanh']: + op_name, new_attrs = act_type, {} + sym = get_nnvm_op(op_name)(*inputs, **new_attrs) + elif act_type in ['leaky_relu']: + op_name, new_attrs = act_type, {} + new_attrs['alpha'] = attrs.get('slope', 0.1) + sym = get_nnvm_op(op_name)(*inputs, **new_attrs) + elif act_type in ['elu']: + sym = -1 * _sym.relu(1 - _sym.exp(*inputs)) + _sym.relu(*inputs) + elif act_type in ['sigmoid']: + op_name, new_attrs = act_type, {} + sym = get_nnvm_op(op_name)(*inputs, **new_attrs) + else: + raise tvm.error.OpNotImplemented( + 'Operator act: {} is not supported in framework Darknet.'.format(act)) + return sym, None + +def _darknet_op_not_support(inputs, attrs): + """Raise exception if the operation is not supported.""" + err = "{} is not supported in {}.".format(attrs, inputs) + raise NotImplementedError(err) + +_DARKNET_CONVERT_MAP = { + LAYERTYPE.CONVOLUTIONAL : _darknet_conv2d, + LAYERTYPE.DECONVOLUTIONAL : _darknet_conv2d_transpose, + LAYERTYPE.CONNECTED : _darknet_dense, + LAYERTYPE.MAXPOOL : _darknet_maxpooling, + LAYERTYPE.SOFTMAX : _darknet_softmax_output, + LAYERTYPE.DROPOUT : _darknet_dropout, + LAYERTYPE.AVGPOOL : _darknet_avgpooling, + LAYERTYPE.BATCHNORM : _darknet_batch_norm, + LAYERTYPE.ROUTE : _darknet_route, + LAYERTYPE.REORG : _darknet_reorg, + LAYERTYPE.REGION : _darknet_region, + LAYERTYPE.SHORTCUT : _darknet_shortcut, + LAYERTYPE.UPSAMPLE : _darknet_upsampling, + LAYERTYPE.L2NORM : _darknet_l2normalize, + LAYERTYPE.YOLO : _darknet_yolo, + LAYERTYPE.DETECTION : _darknet_op_not_support, + LAYERTYPE.CROP : _darknet_op_not_support, + LAYERTYPE.COST : _darknet_op_not_support, + LAYERTYPE.NORMALIZATION : _darknet_op_not_support, + LAYERTYPE.LOCAL : _darknet_op_not_support, + LAYERTYPE.ACTIVE : _darknet_op_not_support, + LAYERTYPE.RNN : _darknet_op_not_support, + LAYERTYPE.GRU : _darknet_op_not_support, + LAYERTYPE.LSTM : _darknet_op_not_support, + LAYERTYPE.CRNN : _darknet_op_not_support, + LAYERTYPE.NETWORK : _darknet_op_not_support, + LAYERTYPE.XNOR : _darknet_op_not_support, + LAYERTYPE.BLANK : _darknet_op_not_support, +} + +def _darknet_convert_symbol(op_name, inputs, attrs): + """Convert from darknet op to nnvm op. + The converter must specify some conversions explicitly to + support gluon format ops such as conv2d... + + Parameters + ---------- + op_name : str + Operator name, such as Convolution, Connected, etc + inputs : list of nnvm.Symbol + List of input symbols. + attrs : dict + Dict of operator attributes + + Returns + ------- + out_name : converted out name of operation + sym : nnvm.Symbol + Converted nnvm Symbol + """ + + if op_name in _DARKNET_CONVERT_MAP: + sym, out_name = _DARKNET_CONVERT_MAP[op_name](inputs, attrs) + else: + raise tvm.error.OpNotImplemented( + 'Operator {} is not supported in frontend Darknet.'.format(op_name)) + if out_name is None: + out_name = sym.list_output_names()[0].replace('_output', '') + return out_name, sym + + +def _as_list(arr): + """Force being a list, ignore if already is.""" + if isinstance(arr, list): + return arr + return [arr] + + +class GraphProto(object): + """A helper class for handling nnvm graph copying from darknet model. + """ + + def __init__(self, net, dtype='float32'): + self.net = net + self.dtype = dtype + self._sym_array = {} + self._tvmparams = {} + self._outs = [] + self._state_ctr = {} + self._state_ctr['rnn'] = 0 + self._state_ctr['crnn'] = 0 + self._state_ctr['lstm'] = 0 + self._state_ctr['cell_state'] = 0 + self._state_ctr['gru'] = 0 + + def _read_memory_buffer(self, shape, data, dtype=None): + if dtype is None: + dtype = self.dtype + length = 1 + for x in shape: + length *= x + data_np = np.zeros(length, dtype=dtype) + for i in range(length): + data_np[i] = data[i] + return data_np.reshape(shape) + + def _get_convolution_weights(self, layer, opname): + """Get the convolution layer weights and biases.""" + if layer.nweights == 0: + return + + if layer.n * layer.c * layer.size * layer.size != layer.nweights: + msg = 'nweights ({}) != n * c * h * w ({}) in operator {}' + msg = msg.format(layer.nweights, layer.n * layer.c * layer.size ** 2, opname) + raise tvm.error.OpAttributeInvalid(msg) + + shape = (layer.n, layer.c, layer.size, layer.size) + weights = self._read_memory_buffer(shape, layer.weights) + + biases = self._read_memory_buffer((layer.n, ), layer.biases) + + k = self._get_tvm_params_name(opname[0], 'weight') + self._tvmparams[k] = tvm.nd.array(weights) + + if layer.batch_normalize == 1 and layer.dontloadscales != 1: + self._get_batchnorm_weights(layer, opname[1], layer.n) + k = self._get_tvm_params_name(opname[1], 'beta') + self._tvmparams[k] = tvm.nd.array(biases) + else: + k = self._get_tvm_params_name(opname[0], 'bias') + self._tvmparams[k] = tvm.nd.array(biases) + + def _get_connected_weights(self, layer, opname): + """Parse the weights and biases for fully connected or dense layer.""" + size = layer.outputs * layer.inputs + if size == 0: + return + + weights = self._read_memory_buffer((layer.outputs, layer.inputs), layer.weights) + biases = self._read_memory_buffer((layer.outputs, ), layer.biases) + + k = self._get_tvm_params_name(opname[0], 'weight') + self._tvmparams[k] = tvm.nd.array(weights) + + if layer.batch_normalize == 1 and layer.dontloadscales != 1: + self._get_batchnorm_weights(layer, opname[1], layer.outputs) + k = self._get_tvm_params_name(opname[1], 'beta') + self._tvmparams[k] = tvm.nd.array(biases) + else: + k = self._get_tvm_params_name(opname[0], 'bias') + self._tvmparams[k] = tvm.nd.array(biases) + + def _get_region_weights(self, layer, opname): + """Parse the biases for region layer.""" + biases = self._read_memory_buffer((layer.n*2, ), layer.biases) + attributes = np.array([layer.n, layer.out_c, layer.out_h, layer.out_w, + layer.classes, layer.coords, layer.background], + dtype=np.int32) + k = self._get_tvm_params_name(opname, 'bias') + self._tvmparams[k] = tvm.nd.array(biases) + k = self._get_tvm_params_name(opname, 'attr') + self._tvmparams[k] = tvm.nd.array(attributes) + + def _get_yolo_weights(self, layer, opname): + """Parse the biases and mask for yolo layer.""" + biases = self._read_memory_buffer((layer.total*2, ), layer.biases) + mask = self._read_memory_buffer((layer.n, ), layer.mask, dtype='int32') + attributes = np.array([layer.n, layer.out_c, layer.out_h, layer.out_w, + layer.classes, layer.total], + dtype=np.int32) + k = self._get_tvm_params_name(opname, 'bias') + self._tvmparams[k] = tvm.nd.array(biases) + k = self._get_tvm_params_name(opname, 'mask') + self._tvmparams[k] = tvm.nd.array(mask) + k = self._get_tvm_params_name(opname, 'attr') + self._tvmparams[k] = tvm.nd.array(attributes) + + def _get_batchnorm_weights(self, layer, opname, size): + """Parse the weights for batchnorm, which includes, scales, moving mean + and moving variances.""" + scales = self._read_memory_buffer((size, ), layer.scales) + rolling_mean = self._read_memory_buffer((size, ), layer.rolling_mean) + rolling_variance = self._read_memory_buffer((size, ), layer.rolling_variance) + + k = self._get_tvm_params_name(opname, 'moving_mean') + self._tvmparams[k] = tvm.nd.array(rolling_mean) + k = self._get_tvm_params_name(opname, 'moving_var') + self._tvmparams[k] = tvm.nd.array(rolling_variance) + k = self._get_tvm_params_name(opname, 'gamma') + self._tvmparams[k] = tvm.nd.array(scales) + + def _get_darknet_attrs(self, layer, layer_num): + """Parse attributes of each layer and return.""" + attr = {} + use_flatten = True + if LAYERTYPE.CONVOLUTIONAL == layer.type: + attr.update({'layout' : 'NCHW'}) + attr.update({'pad' : str(layer.pad)}) + attr.update({'num_group' : str(layer.groups)}) + attr.update({'num_filter' : str(layer.n)}) + attr.update({'stride' : str(layer.stride)}) + attr.update({'kernel' : str(layer.size)}) + attr.update({'activation' : (layer.activation)}) + + if layer.nbiases == 0: + attr.update({'use_bias' : False}) + else: + attr.update({'use_bias' : True}) + + if layer.batch_normalize == 1 and layer.dontloadscales != 1: + attr.update({'use_batchNorm' : True}) + attr.update({'use_scales' : True}) + + elif LAYERTYPE.CONNECTED == layer.type: + attr.update({'num_hidden' : str(layer.outputs)}) + attr.update({'activation' : (layer.activation)}) + if layer_num != 0: + layer_prev = self.net.layers[layer_num - 1] + if (layer_prev.out_h == layer.h and + layer_prev.out_w == layer.w and + layer_prev.out_c == layer.c): + use_flatten = False + attr.update({'use_flatten' : use_flatten}) + attr.update({'use_bias' : True}) + if layer.batch_normalize == 1 and layer.dontloadscales != 1: + attr.update({'use_batchNorm' : True}) + attr.update({'use_scales' : True}) + attr.update({'use_bias' : False}) + + elif LAYERTYPE.MAXPOOL == layer.type: + attr.update({'pad' : str(layer.pad)}) + attr.update({'stride' : str(layer.stride)}) + attr.update({'kernel' : str(layer.size)}) + max_output = (layer.w - layer.size + 2 * layer.pad)/float(layer.stride) + 1 + if max_output < layer.out_w: + extra_pad = (layer.out_w - max_output)*layer.stride + attr.update({'extra_pad_size' : int(extra_pad)}) + elif LAYERTYPE.AVGPOOL == layer.type: + attr.update({'pad' : str(layer.pad)}) + if layer.stride == 0: + attr.update({'stride' : str(1)}) + else: + attr.update({'stride' : str(layer.stride)}) + if layer.size == 0 and layer.h == layer.w: + attr.update({'kernel' : str(layer.h)}) + else: + attr.update({'kernel' : str(layer.size)}) + + elif LAYERTYPE.DROPOUT == layer.type: + attr.update({'p' : str(layer.probability)}) + + elif LAYERTYPE.SOFTMAX == layer.type: + attr.update({'axis' : 1}) + attr.update({'use_flatten' : True}) + if layer.temperature: + attr.update({'temperature' : str(layer.temperature)}) + + elif LAYERTYPE.SHORTCUT == layer.type: + add_layer = self.net.layers[layer.index] + attr.update({'activation' : (layer.activation)}) + attr.update({'out_channel' : (layer.out_c)}) + attr.update({'out_size' : (layer.out_h)}) + attr.update({'add_out_channel' : (add_layer.out_c)}) + attr.update({'add_out_size' : (add_layer.out_h)}) + + elif LAYERTYPE.ROUTE == layer.type: + pass + + elif LAYERTYPE.COST == layer.type: + pass + + elif LAYERTYPE.REORG == layer.type: + attr.update({'stride' : layer.stride}) + + elif LAYERTYPE.REGION == layer.type: + attr.update({'n' : layer.n}) + attr.update({'classes' : layer.classes}) + attr.update({'coords' : layer.coords}) + attr.update({'background' : layer.background}) + attr.update({'softmax' : layer.softmax}) + attr.update({'shape' : (1, layer.c, layer.h, layer.w)}) + + elif LAYERTYPE.YOLO == layer.type: + attr.update({'n' : layer.n}) + attr.update({'classes' : layer.classes}) + attr.update({'shape' : (1, layer.c, layer.h, layer.w)}) + + elif LAYERTYPE.UPSAMPLE == layer.type: + attr.update({'scale' : layer.stride}) + + elif LAYERTYPE.L2NORM == layer.type: + pass + + else: + raise tvm.error.OpNotImplemented( + 'Operator {} is not supported in frontend Darknet.'.format(layer.type)) + + return attr + + def _get_tvm_params_name(self, opname, arg_name): + """Makes the params name for the k,v pair.""" + return opname + '_'+ arg_name + + def _get_darknet_params(self, layer, opname): + """To parse and get the darknet params.""" + if LAYERTYPE.CONVOLUTIONAL == layer.type: + self._get_convolution_weights(layer, opname) + + elif LAYERTYPE.CONNECTED == layer.type: + self._get_connected_weights(layer, opname) + + elif LAYERTYPE.REGION == layer.type: + self._get_region_weights(layer, opname) + + elif LAYERTYPE.YOLO == layer.type: + self._get_yolo_weights(layer, opname) + def _preproc_layer(self, layer, layer_num): + """To preprocess each darknet layer, some layer doesnt need processing.""" + if layer_num == 0: + name = 'data' + attribute = {} + sym = [_sym.Variable(name, **attribute)] + else: + sym = self._sym_array[layer_num - 1] + skip_layer = False + + if LAYERTYPE.ROUTE == layer.type: + sym = [] + for j in range(layer.n): + sym.append(self._sym_array[layer.input_layers[j]]) + if layer.n == 1: + skip_layer = True + + elif LAYERTYPE.COST == layer.type: + skip_layer = True + + elif LAYERTYPE.SHORTCUT == layer.type: + sym = [sym, self._sym_array[layer.index]] + + elif LAYERTYPE.BLANK == layer.type: + skip_layer = True + + if skip_layer is True: + self._sym_array[layer_num] = sym + + return skip_layer, sym + + def _get_opname(self, layer): + """Returs the layer name.""" + return layer.type + + def _new_rnn_state_sym(self, state=None, name='rnn'): + """Returs a symbol for state""" + sym_name = name + "%d_state" % self._state_ctr[name] + self._state_ctr[name] += 1 + return _sym.Variable(name=sym_name, init=state) + + def _get_rnn_state_buffer(self, layer, name): + """Get the state buffer for rnn.""" + buffer = np.zeros((1, layer.outputs), self.dtype) + return self._new_rnn_state_sym(buffer, name) + + def _get_darknet_rnn_attrs(self, layer, sym): + """Get the rnn converted symbol from attributes.""" + attr = self._get_darknet_attrs(layer, 0) + op_name = self._get_opname(layer) + layer_name, sym = _darknet_convert_symbol(op_name, _as_list(sym), attr) + self._get_darknet_params(layer, layer_name) + return sym + + def _handle_darknet_rnn_layers(self, layer_num, sym): + """Parse attributes and handle the rnn layers.""" + attr = {} + layer = self.net.layers[layer_num] + processed = False + + if LAYERTYPE.RNN == layer.type: + attr.update({'n' : layer.n}) + attr.update({'batch' : layer.batch}) + attr.update({'num_hidden' : str(layer.outputs)}) + + state = self._get_rnn_state_buffer(layer, 'rnn') + + for _ in range(layer.steps): + input_layer = layer.input_layer + sym = self._get_darknet_rnn_attrs(input_layer, sym) + + self_layer = layer.self_layer + state = self._get_darknet_rnn_attrs(self_layer, state) + + op_name, new_attrs = 'elemwise_add', {} + new_inputs = _as_list([sym, state]) + state = get_nnvm_op(op_name)(*new_inputs, **new_attrs) + self._outs.append(state) + + output_layer = layer.output_layer + sym = self._get_darknet_rnn_attrs(output_layer, state) + + self._sym_array[layer_num] = sym + processed = True + + elif LAYERTYPE.CRNN == layer.type: + attr.update({'n' : layer.n}) + attr.update({'batch' : layer.batch}) + attr.update({'num_hidden' : str(layer.outputs)}) + + state = self._get_rnn_state_buffer(layer, 'crnn') + + for _ in range(layer.steps): + input_layer = layer.input_layer + sym = self._get_darknet_rnn_attrs(input_layer, sym) + + self_layer = layer.self_layer + state = self._get_darknet_rnn_attrs(self_layer, state) + + op_name, new_attrs = 'elemwise_add', {} + new_inputs = _as_list([sym, state]) + state = get_nnvm_op(op_name)(*new_inputs, **new_attrs) + self._outs.append(state) + + output_layer = layer.output_layer + sym = self._get_darknet_rnn_attrs(output_layer, state) + + self._sym_array[layer_num] = sym + processed = True + + elif LAYERTYPE.LSTM == layer.type: + if layer.steps > 1: + raise tvm.error.OpAttributeInvalid( + 'Number of steps {} of RNN is not valid.'.format(layer.steps)) + + op_name_add = 'elemwise_add' + op_name_mul = 'elemwise_mul' + attrs = {} + act_attr = {} + + h_state = self._get_rnn_state_buffer(layer, 'lstm') + c_state = self._get_rnn_state_buffer(layer, 'cell_state') + for _ in range(layer.steps): + sym_wf = self._get_darknet_rnn_attrs(layer.wf, h_state) + sym_wi = self._get_darknet_rnn_attrs(layer.wi, h_state) + sym_wg = self._get_darknet_rnn_attrs(layer.wg, h_state) + sym_wo = self._get_darknet_rnn_attrs(layer.wo, h_state) + + input_sym = sym + sym_uf = self._get_darknet_rnn_attrs(layer.uf, input_sym) + sym_ui = self._get_darknet_rnn_attrs(layer.ui, input_sym) + sym_ug = self._get_darknet_rnn_attrs(layer.ug, input_sym) + sym_uo = self._get_darknet_rnn_attrs(layer.uo, input_sym) + + new_inputs = _as_list([sym_wf, sym_uf]) + add_f = get_nnvm_op(op_name_add)(*new_inputs, **attrs) + + new_inputs = _as_list([sym_wi, sym_ui]) + add_i = get_nnvm_op(op_name_add)(*new_inputs, **attrs) + + new_inputs = _as_list([sym_wg, sym_ug]) + add_g = get_nnvm_op(op_name_add)(*new_inputs, **attrs) + + new_inputs = _as_list([sym_wo, sym_uo]) + add_o = get_nnvm_op(op_name_add)(*new_inputs, **attrs) + + act_attr['activation'] = ACTIVATION.LOGISTIC + act_f, _ = _darknet_activations(_as_list(add_f), act_attr) + + act_attr['activation'] = ACTIVATION.LOGISTIC + act_i, _ = _darknet_activations(_as_list(add_i), act_attr) + + act_attr['activation'] = ACTIVATION.TANH + act_g, _ = _darknet_activations(_as_list(add_g), act_attr) + + act_attr['activation'] = ACTIVATION.LOGISTIC + act_o, _ = _darknet_activations(_as_list(add_o), act_attr) + + new_inputs = _as_list([act_i, act_g]) + mul_t = get_nnvm_op(op_name_mul)(*new_inputs, **attrs) + + new_inputs = _as_list([act_f, c_state]) + c_state = get_nnvm_op(op_name_mul)(*new_inputs, **attrs) + + new_inputs = _as_list([mul_t, c_state]) + c_state = get_nnvm_op(op_name_add)(*new_inputs, **attrs) + + act_attr['activation'] = ACTIVATION.TANH + h_state, _ = _darknet_activations(_as_list(c_state), act_attr) + + new_inputs = _as_list([act_o, h_state]) + h_state = get_nnvm_op(op_name_mul)(*new_inputs, **attrs) + self._outs = self._outs + [c_state, h_state] + sym = h_state + self._sym_array[layer_num] = sym + processed = True + + elif LAYERTYPE.GRU == layer.type: + if layer.steps > 1: + raise tvm.error.OpAttributeInvalid( + 'Number of steps {} is not valid in RNN.'.format(layer.steps)) + + op_name_add = 'elemwise_add' + op_name_mul = 'elemwise_mul' + attrs = {} + act_attr = {} + + state = self._get_rnn_state_buffer(layer, "gru") + for _ in range(layer.steps): + sym_wz = self._get_darknet_rnn_attrs(layer.wz, state) + sym_wr = self._get_darknet_rnn_attrs(layer.wr, state) + + input_sym = sym + sym_uz = self._get_darknet_rnn_attrs(layer.uz, input_sym) + sym_ur = self._get_darknet_rnn_attrs(layer.ur, input_sym) + sym_uh = self._get_darknet_rnn_attrs(layer.uh, input_sym) + + new_inputs = _as_list([sym_uz, sym_wz]) + add_z = get_nnvm_op(op_name_add)(*new_inputs, **attrs) + + new_inputs = _as_list([sym_ur, sym_wr]) + add_r = get_nnvm_op(op_name_add)(*new_inputs, **attrs) + + act_attr['activation'] = ACTIVATION.LOGISTIC + act_z, _ = _darknet_activations(_as_list(add_z), act_attr) + + act_attr['activation'] = ACTIVATION.LOGISTIC + act_r, _ = _darknet_activations(_as_list(add_r), act_attr) + + new_inputs = _as_list([act_r, state]) + forgot = get_nnvm_op(op_name_mul)(*new_inputs, **attrs) + + sym_wh = self._get_darknet_rnn_attrs(layer.wh, forgot) + + new_inputs = _as_list([sym_uh, sym_wh]) + h_state = get_nnvm_op(op_name_add)(*new_inputs, **attrs) + + if layer.tanh == 1: + act_attr['activation'] = ACTIVATION.TANH + else: + act_attr['activation'] = ACTIVATION.LOGISTIC + h_state, _ = _darknet_activations(_as_list(h_state), act_attr) + + sym = act_z * state + (1 - act_z) * h_state + + self._outs = self._outs + [sym] + self._sym_array[layer_num] = sym + processed = True + + return processed, sym + + def _make_outlist(self, sym, op_name, layer, layer_num): + if layer.type == LAYERTYPE.REGION: + k = self._get_tvm_params_name(op_name, 'attr') + self._outs.insert(0, _sym.Variable(name=k, init=self._tvmparams[k].asnumpy())) + k = self._get_tvm_params_name(op_name, 'bias') + self._outs.insert(0, _sym.Variable(name=k, init=self._tvmparams[k].asnumpy())) + if layer_num != self.net.n-1: + self._outs.insert(0, sym) + + elif layer.type == LAYERTYPE.YOLO: + k = self._get_tvm_params_name(op_name, 'attr') + self._outs.insert(0, _sym.Variable(name=k, init=self._tvmparams[k].asnumpy())) + k = self._get_tvm_params_name(op_name, 'bias') + self._outs.insert(0, _sym.Variable(name=k, init=self._tvmparams[k].asnumpy())) + k = self._get_tvm_params_name(op_name, 'mask') + self._outs.insert(0, _sym.Variable(name=k, init=self._tvmparams[k].asnumpy())) + if layer_num != self.net.n-1: + self._outs.insert(0, sym) + + def from_darknet(self): + """To convert the darknet symbol to nnvm symbols.""" + for i in range(self.net.n): + layer = self.net.layers[i] + need_skip, sym = self._preproc_layer(layer, i) + if need_skip is True: + continue + + processed, sym = self._handle_darknet_rnn_layers(i, sym) + if processed is True: + continue + + attr = self._get_darknet_attrs(layer, i) + op_name = self._get_opname(layer) + layer_name, sym = _darknet_convert_symbol(op_name, _as_list(sym), attr) + self._get_darknet_params(self.net.layers[i], layer_name) + self._sym_array[i] = sym + self._make_outlist(sym, layer_name, layer, i) + + self._outs = _as_list(sym) + self._outs + if isinstance(self._outs, list): + sym = _sym.Group(self._outs) + return sym, self._tvmparams + +def from_darknet(net, dtype='float32'): + """Convert from darknet's model into compatible NNVM format. + Reconstruct a nnvm symbol by traversing the darknet input. + + Parameters + ---------- + net : ctype Pointer to network + Darknet parsed symbols + + dtype : str + Datatype of the input net structure, default is float32 + + Returns + ------- + sym : nnvm.Symbol + Compatible nnvm symbol + + params : dict of str to tvm.NDArray + The parameter dict to be used by nnvm + """ + + return GraphProto(net, dtype).from_darknet() diff --git a/nnvm/python/nnvm/frontend/keras.py b/nnvm/python/nnvm/frontend/keras.py new file mode 100644 index 000000000000..f647a644bd2b --- /dev/null +++ b/nnvm/python/nnvm/frontend/keras.py @@ -0,0 +1,727 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=invalid-name, import-self +"""Keras frontend.""" +from __future__ import absolute_import as _abs +import sys +import numpy as np +import tvm +from .. import symbol as _sym +from .common import SymbolTable + +__all__ = ['from_keras'] + + +def _check_data_format(keras_layer): + if hasattr(keras_layer, ('data_format')): + if keras_layer.data_format != 'channels_last': + raise ValueError("Keras frontend currently supports data_format = channels_last only.") + + +def _get_pad_pair(input1d, kernel1d, stride1d): + out1d = (input1d + stride1d - 1) // stride1d + pad = np.maximum((out1d - 1) * stride1d + kernel1d - input1d, 0) + pad_before = pad // 2 + pad_after = pad - pad_before + return [pad_before, pad_after] + +def _get_elu(insym, alpha): + """ A helper method for elu. + """ + return -alpha * _sym.relu(1 - _sym.exp(insym)) + _sym.relu(insym) + +def _convert_recurrent_activation(insym, keras_layer): + act_type = keras_layer.recurrent_activation.__name__ + return _convert_activation(insym, act_type, None) + +def _convert_activation(insym, keras_layer, _): + if isinstance(keras_layer, str): + act_type = keras_layer + else: + if sys.version_info.major < 3: + act_type = keras_layer.activation.func_name + else: + act_type = keras_layer.activation.__name__ + if act_type == 'linear': + if isinstance(keras_layer, str): + return insym + alpha = keras_layer.alpha if hasattr(keras_layer, "alpha") else 1 + beta = keras_layer.beta if hasattr(keras_layer, "beta") else 0 + return _sym.__add_scalar__(_sym.__mul_scalar__(insym, \ + scalar=alpha), scalar=beta) + if act_type == 'softmax': + return _sym.softmax(insym, axis=1) + if act_type == 'sigmoid': + return _sym.sigmoid(insym) + if act_type == 'tanh': + return _sym.tanh(insym) + if act_type == 'relu': + return _sym.relu(insym) + if act_type == 'softplus': + return _sym.log(_sym.__add_scalar__(_sym.exp(insym), scalar=1)) + if act_type == 'elu': + alpha = keras_layer.alpha if hasattr(keras_layer, "alpha") else 1 + return _get_elu(insym, alpha) + if act_type == 'selu': + # Alpha, Gamma values, obtained from https://arxiv.org/abs/1706.02515 + alpha = keras_layer.alpha if hasattr(keras_layer, "alpha") \ + else 1.6732632423543772848170429916717 + gamma = keras_layer.gamma if hasattr(keras_layer, "gamma") \ + else 1.0507009873554804934193349852946 + return gamma * _get_elu(insym, alpha) + if act_type == 'relu6': + return _sym.clip(insym, a_min=0, a_max=6) + if act_type == 'softsign': + return insym / (1 + (_sym.relu(insym) + _sym.relu(_sym.negative(insym)))) + if act_type == 'hard_sigmoid': + transformX = (0.2 * insym) + 0.5 + return _sym.clip(transformX, a_min=0, a_max=1) + raise tvm.error.OpNotImplemented( + 'Operator {} is not supported in frontend Keras.'.format(act_type)) + + +def _convert_advanced_activation(insym, keras_layer, symtab): + act_type = type(keras_layer).__name__ + if act_type == 'ReLU': + if keras_layer.max_value: + return _sym.clip(insym, a_min=0, a_max=keras_layer.max_value) + return _sym.relu(insym) + if act_type == 'LeakyReLU': + return _sym.leaky_relu(insym, alpha=keras_layer.alpha) + if act_type == 'ELU': + alpha = keras_layer.alpha if hasattr(keras_layer, "alpha") else 1 + return _get_elu(insym, alpha) + if act_type == 'PReLU': + assert hasattr(keras_layer, "alpha"), \ + "alpha required for PReLU." + _check_data_format(keras_layer) + size = len(keras_layer.alpha.shape) + return -symtab.new_const(keras_layer.get_weights()[0] \ + .transpose(np.roll(range(size), 1))) \ + * _sym.relu(-insym) + _sym.relu(insym) + if act_type == 'ThresholdedReLU': + theta = keras_layer.theta if hasattr(keras_layer, "theta") else 1.0 + theta_tensor = _sym.full_like(insym[0], fill_value=float(theta)) + return _sym.elemwise_mul(insym[0], _sym.greater(insym[0], theta_tensor, out_type="float32")) + raise tvm.error.OpNotImplemented( + 'Operator {} is not supported in frontend Keras.'.format(act_type)) + + +def _convert_merge(insym, keras_layer, _): + merge_type = type(keras_layer).__name__ + ret = insym[0] + for i in range(1, len(insym)): + if merge_type == 'Add': + ret = _sym.elemwise_add(ret, insym[i]) + elif merge_type == 'Subtract': + ret = _sym.elemwise_sub(ret, insym[i]) + elif merge_type == 'Multiply': + ret = _sym.elemwise_mul(ret, insym[i]) + else: + raise tvm.error.OpNotImplemented( + 'Operator {} Merge is not supported in frontend Keras.'.format(merge_type)) + return ret + + +def _convert_dense(insym, keras_layer, symtab): + weightList = keras_layer.get_weights() + weight = symtab.new_const(weightList[0].transpose([1, 0])) + params = {'weight':weight, 'use_bias':False, 'units':weightList[0].shape[1]} + if keras_layer.use_bias: + params['use_bias'] = True + params['bias'] = symtab.new_const(weightList[1]) + input_shape = keras_layer.input_shape + input_dim = len(input_shape) + # In case of RNN dense, input shape will be (1, 1, n) + if input_dim > 2: + input_shape = tuple(dim if dim else 1 for dim in _as_list(input_shape)[0]) + if input_dim != 3 or input_shape[0] != 1 or input_shape[1] != 1: + msg = 'Value {} in attribute "input_shape" of operator Dense is not valid.' + raise tvm.error.OpAttributeInvalid(msg.format(input_shape)) + insym = _sym.squeeze(insym, axis=0) + out = _sym.dense(data=insym, **params) + # defuse activation + if sys.version_info.major < 3: + act_type = keras_layer.activation.func_name + else: + act_type = keras_layer.activation.__name__ + if act_type != 'linear': + out = _convert_activation(out, act_type, symtab) + if input_dim > 2: + out = _sym.expand_dims(out, axis=0) + return out + + +def _convert_convolution(insym, keras_layer, symtab): + _check_data_format(keras_layer) + is_deconv = type(keras_layer).__name__ == 'Conv2DTranspose' + is_depthconv = type(keras_layer).__name__ == 'DepthwiseConv2D' + weightList = keras_layer.get_weights() + if is_deconv: + kernel_h, kernel_w, n_filters, in_channels = weightList[0].shape + weight = weightList[0].transpose([3, 2, 0, 1]) + elif is_depthconv: + kernel_h, kernel_w, in_channels, depth_mult = weightList[0].shape + weight = weightList[0].transpose([2, 3, 0, 1]) + else: + kernel_h, kernel_w, in_channels, n_filters = weightList[0].shape + weight = weightList[0].transpose([3, 2, 0, 1]) + if isinstance(keras_layer.dilation_rate, (list, tuple)): + dilation = [keras_layer.dilation_rate[0], keras_layer.dilation_rate[1]] + else: + dilation = [keras_layer.dilation_rate, keras_layer.dilation_rate] + dilated_kernel_h = (kernel_h - 1) * dilation[0] + 1 + dilated_kernel_w = (kernel_w - 1) * dilation[1] + 1 + stride_h, stride_w = keras_layer.strides + params = {'weight': symtab.new_const(weight), + 'kernel_size': [kernel_h, kernel_w], + 'strides': [stride_h, stride_w], + 'dilation': dilation, + 'padding': [0, 0], + 'use_bias': False} + if is_depthconv: + params['channels'] = in_channels * depth_mult + params['groups'] = in_channels + else: + params['channels'] = n_filters + if keras_layer.use_bias: + params['use_bias'] = True + params['bias'] = symtab.new_const(weightList[1]) + if keras_layer.padding == 'valid': + pass + # we insert a separate pad operator + elif keras_layer.padding == 'same': + in_h = keras_layer.input_shape[1] + in_w = keras_layer.input_shape[2] + pad_t, pad_b = _get_pad_pair(in_h, dilated_kernel_h, stride_h) + pad_l, pad_r = _get_pad_pair(in_w, dilated_kernel_w, stride_w) + if pad_t == pad_b and pad_l == pad_r: + params['padding'] = (pad_t, pad_l) + else: + insym = _sym.pad(data=insym, pad_width=((0, 0), (0, 0), (pad_t, pad_b), (pad_l, pad_r))) + else: + msg = 'Value {} in attribute "padding" of operator Convolution is not valid.' + raise tvm.error.OpAttributeInvalid(msg.format(keras_layer.padding)) + if is_deconv: + out = _sym.conv2d_transpose(data=insym, **params) + else: + out = _sym.conv2d(data=insym, **params) + # defuse activation + if sys.version_info.major < 3: + act_type = keras_layer.activation.func_name + else: + act_type = keras_layer.activation.__name__ + if act_type != 'linear': + out = _convert_activation(out, act_type, symtab) + return out + + +def _convert_separable_convolution(insym, keras_layer, symtab): + _check_data_format(keras_layer) + weightList = keras_layer.get_weights() + # depthwise conv + kernel_h, kernel_w, in_channels, depth_mult = weightList[0].shape + stride_h, stride_w = keras_layer.strides + weight0 = weightList[0].transpose([2, 3, 0, 1]) + params0 = {'weight': symtab.new_const(weight0), + 'channels': in_channels * depth_mult, + 'groups': in_channels, + 'kernel_size': [kernel_h, kernel_w], + 'strides': [stride_h, stride_w], + 'dilation': [1, 1], + 'padding': [0, 0], + 'use_bias': False} + if keras_layer.padding == 'valid': + pass + # we insert a separate pad operator + elif keras_layer.padding == 'same': + in_h = keras_layer.input_shape[1] + in_w = keras_layer.input_shape[2] + pad_t, pad_b = _get_pad_pair(in_h, kernel_h, stride_h) + pad_l, pad_r = _get_pad_pair(in_w, kernel_w, stride_w) + insym = _sym.pad(data=insym, pad_width=( + (0, 0), (0, 0), (pad_t, pad_b), (pad_l, pad_r))) + else: + msg = 'Value {} in attribute "padding" of operator Separable Convolution is not valid.' + raise tvm.error.OpAttributeInvalid(msg.format(keras_layer.padding)) + depthconv = _sym.conv2d(data=insym, **params0) + # pointwise conv + weight1 = weightList[1].transpose([3, 2, 0, 1]) + params1 = {'weight': symtab.new_const(weight1), + 'channels': weight1.shape[0], + 'groups': 1, + 'kernel_size': [1, 1], + 'strides': [1, 1], + 'dilation': [1, 1], + 'use_bias': False} + if keras_layer.use_bias: + params1['use_bias'] = True + params1['bias'] = symtab.new_const(weightList[2]) + out = _sym.conv2d(data=depthconv, **params1) + # defuse activation + if sys.version_info.major < 3: + act_type = keras_layer.activation.func_name + else: + act_type = keras_layer.activation.__name__ + if act_type != 'linear': + out = _convert_activation(out, act_type, symtab) + return out + + +def _convert_flatten(insym, keras_layer, _): + _check_data_format(keras_layer) + # NCHW -> NHWC so that dense can be correctly converted + insym = _sym.transpose(insym, axes=[0, 2, 3, 1]) + return _sym.flatten(insym) + + +def _convert_pooling(insym, keras_layer, symtab): + _check_data_format(keras_layer) + pool_type = type(keras_layer).__name__ + # global pool in keras = global pool + flatten in nnvm + if pool_type == 'GlobalMaxPooling2D': + return _convert_flatten(_sym.global_max_pool2d(insym), keras_layer, symtab) + if pool_type == 'GlobalAveragePooling2D': + return _convert_flatten(_sym.global_avg_pool2d(insym), keras_layer, symtab) + pool_h, pool_w = keras_layer.pool_size + stride_h, stride_w = keras_layer.strides + params = {'pool_size': [pool_h, pool_w], + 'strides': [stride_h, stride_w], + 'padding': [0, 0]} + if keras_layer.padding == 'valid': + pass + elif keras_layer.padding == 'same': + in_h = keras_layer.input_shape[1] + in_w = keras_layer.input_shape[2] + pad_t, pad_b = _get_pad_pair(in_h, pool_h, stride_h) + pad_l, pad_r = _get_pad_pair(in_w, pool_w, stride_w) + params['padding'] = [pad_t, pad_l, pad_b, pad_r] + else: + msg = 'Value {} in attribute "padding" of operator Pooling is not valid.' + raise tvm.error.OpAttributeInvalid(msg.format(keras_layer.padding)) + if pool_type == 'MaxPooling2D': + return _sym.max_pool2d(insym, **params) + if pool_type == 'AveragePooling2D': + # TODO: in keras, padded zeros are not calculated + return _sym.avg_pool2d(insym, **params) + msg = 'Value {} in attribute "padding" of operator Pooling is not valid.' + raise tvm.error.OpAttributeInvalid(msg.format(keras_layer.padding)) + + +def _convert_upsample(insym, keras_layer, _): + _check_data_format(keras_layer) + upsample_type = type(keras_layer).__name__ + if upsample_type == "UpSampling1D": + h = keras_layer.size + params = {'scale': h} + elif upsample_type == "UpSampling2D": + h, w = keras_layer.size + if h != w: + raise tvm.error.OpAttributeInvalid( + 'Upsample height ({}) must equal width ({})'.format(h, w)) + params = {'scale': h} + elif upsample_type == "UpSampling3D": + h, w, d = keras_layer.size + if h != w or w != d: + raise tvm.error.OpAttributeInvalid( + 'Upsample height ({}), width ({}), and depth ({}) must be equal.'.format(h, w, d)) + params = {'scale': h} + else: + msg = 'Operator {} is not supported in frontend Keras.' + raise tvm.error.OpNotImplemented(msg.format(upsample_type)) + return _sym.upsampling(insym, **params) + + +def _convert_cropping(insym, keras_layer, _): + _check_data_format(keras_layer) + crop_type = type(keras_layer).__name__ + if crop_type == "Cropping2D": + (_, in_h, in_w, _) = keras_layer.input_shape + ((crop_t, crop_b), (crop_l, crop_r)) = keras_layer.cropping + else: + raise tvm.error.OpNotImplemented( + 'Operator {} is not supported in frontend Keras.'.format(crop_type)) + int32_max = np.iinfo(np.int32).max + return _sym.strided_slice(insym, begin=[0, 0, crop_t, crop_l], + end=[int32_max, int32_max, in_h-crop_b, in_w-crop_r]) + + +def _convert_batchnorm(insym, keras_layer, symtab): + params = {'scale': False, + 'center': False, + 'epsilon': keras_layer.epsilon} + idx = 0 + if keras_layer.scale: + params['scale'] = True + gamma = keras_layer.get_weights()[idx] + params['gamma'] = symtab.new_const(gamma) + idx += 1 + if keras_layer.center: + params['center'] = True + beta = keras_layer.get_weights()[idx] + params['beta'] = symtab.new_const(beta) + idx += 1 + moving_mean = keras_layer.get_weights()[idx] + moving_var = keras_layer.get_weights()[idx + 1] + params['moving_mean'] = symtab.new_const(moving_mean) + params['moving_var'] = symtab.new_const(moving_var) + return _sym.batch_norm(data=insym, **params) + + +def _convert_padding(insym, keras_layer, _): + _check_data_format(keras_layer) + padding_type = type(keras_layer).__name__ + padding = keras_layer.padding + top = left = bottom = right = 0 + if padding_type == 'ZeroPadding2D': + if isinstance(padding, int): + top = left = bottom = right = padding + elif isinstance(padding, tuple): + if isinstance(padding[0], int): + top, left = padding + bottom, right = padding + elif isinstance(padding[0], tuple): + top, bottom = padding[0] + left, right = padding[1] + else: + msg = 'Value {} in attribute "padding" of operator {} is not valid.' + raise tvm.error.OpAttributeInvalid(msg.format(str(padding), padding_type)) + else: + msg = 'Value {} in attribute "padding" of operator {} is not valid.' + raise tvm.error.OpAttributeInvalid(msg.format(str(padding), padding_type)) + else: + raise tvm.error.OpNotImplemented('Operator {} is not supported in frontend Keras.') + return _sym.pad(data=insym, pad_width=((0, 0), (0, 0), (top, bottom), (left, right))) + + +def _convert_concat(insym, keras_layer, _): + _check_data_format(keras_layer) + if not isinstance(insym, list): + insym = [insym] + return _sym.concatenate(*insym, axis=1) + + +def _convert_reshape(insym, keras_layer, _): + _check_data_format(keras_layer) + ch = keras_layer.input_shape[-1] + assert ch == keras_layer.target_shape[-1], \ + "Only supports last dimension in target shape being equal to " \ + "the channel number of input tensor." + shape = (-1, ch) + keras_layer.target_shape[:-1] + return _sym.reshape(insym, shape=shape) + +def _convert_lstm(insym, keras_layer, symtab): + _check_data_format(keras_layer) + if not isinstance(insym, list): + buffer = np.zeros((1, keras_layer.units), 'float32') + c_sym = symtab.new_const(buffer) + h_sym = symtab.new_const(buffer) + insym = [insym, h_sym, c_sym] + + in_data = insym[0] + next_h = insym[1] + next_c = insym[2] + + weightList = keras_layer.get_weights() + inp_shape = tuple(dim if dim else 1 for dim in _as_list(keras_layer.input_shape)[0]) + + kernel_wt = symtab.new_const(weightList[0].transpose([1, 0])) + recurrent_wt = symtab.new_const(weightList[1].transpose([1, 0])) + in_bias = symtab.new_const(weightList[2]) + + units = list(weightList[0].shape)[1] + + time_steps = inp_shape[1] + in_data = _sym.squeeze(in_data, axis=0) + in_data = _sym.split(in_data, indices_or_sections=time_steps, axis=0) + #loop for the number of time_steps + for data in in_data: + ixh1 = _sym.dense(data, kernel_wt, use_bias=False, units=units) + ixh2 = _sym.dense(next_h, recurrent_wt, in_bias, use_bias=True, units=units) + gate = ixh1 + ixh2 + gates = _sym.split(gate, indices_or_sections=4, axis=1) + in_gate = _convert_recurrent_activation(gates[0], keras_layer) + in_transform = _convert_recurrent_activation(gates[1], keras_layer) + next_c = in_transform * next_c + in_gate * _convert_activation(gates[2], keras_layer, None) + out_gate = _convert_recurrent_activation(gates[3], keras_layer) + next_h = out_gate * _convert_activation(next_c, keras_layer, None) + + out_shape = tuple(dim if dim else 1 for dim in _as_list(keras_layer.output_shape)[0]) + out = _sym.reshape(next_h, shape=out_shape) + return [out, next_h, next_c] + +def _convert_simple_rnn(insym, keras_layer, symtab): + _check_data_format(keras_layer) + if not isinstance(insym, list): + buffer = np.zeros((1, keras_layer.units), 'float32') + prev_sym = symtab.new_const(buffer) + insym = [insym, prev_sym] + in_data = insym[0] + prev_sym = insym[1] + + weightList = keras_layer.get_weights() + kernel_wt = symtab.new_const(weightList[0].transpose([1, 0])) + recurrent_wt = symtab.new_const(weightList[1].transpose([1, 0])) + in_bias = symtab.new_const(weightList[2]) + units = list(weightList[0].shape)[1] + + in_data = _sym.flatten(in_data) + ixh = _sym.dense(in_data, kernel_wt, in_bias, use_bias=True, units=units) + prev_sym = _sym.flatten(prev_sym) + ixh2 = _sym.dense(prev_sym, recurrent_wt, use_bias=False, units=units) + output = ixh + ixh2 + output = _convert_activation(output, keras_layer, None) + + out_shape = tuple(dim if dim else 1 for dim in _as_list(keras_layer.output_shape)[0]) + output = _sym.reshape(output, shape=out_shape) + + return [output, output] + +def _convert_gru(insym, keras_layer, symtab): + _check_data_format(keras_layer) + if not isinstance(insym, list): + buffer = np.zeros((1, keras_layer.units), 'float32') + h_tm1 = symtab.new_const(buffer) + insym = [insym, h_tm1] + in_data = insym[0] + h_tm1_sym = insym[1] + + weightList = keras_layer.get_weights() + kernel_wt = symtab.new_const(weightList[0].transpose([1, 0])) + recurrent_wt = symtab.new_const(weightList[1].transpose([1, 0])) + in_bias = symtab.new_const(weightList[2]) + + units = list(weightList[0].shape)[1] + + in_data = _sym.flatten(in_data) + matrix_x = _sym.dense(in_data, kernel_wt, in_bias, use_bias=True, units=units) + + # inputs projected by all gate matrices at once + split_indices = [keras_layer.units, 2 * keras_layer.units] + gates = _sym.split(matrix_x, indices_or_sections=split_indices, axis=1) + x_z = gates[0] + x_r = gates[1] + x_h = gates[2] + + # hidden state projected separately for update/reset and new + units = 2 * keras_layer.units + split_indices = [units] + rec_wts = _sym.split(recurrent_wt, indices_or_sections=split_indices, axis=0) + + h_tm1_sym = _sym.flatten(h_tm1_sym) + matrix_inner = _sym.dense(h_tm1_sym, rec_wts[0], use_bias=False, units=units) + + split_indices = [keras_layer.units] + recurrent = _sym.split(matrix_inner, indices_or_sections=split_indices, axis=1) + recurrent_z = recurrent[0] + recurrent_r = recurrent[1] + + rec_act_z = _convert_recurrent_activation(x_z + recurrent_z, keras_layer) + rec_act_r = _convert_recurrent_activation(x_r + recurrent_r, keras_layer) + + units = keras_layer.units + recurrent_h = _sym.dense(rec_act_r * h_tm1_sym, rec_wts[1], use_bias=False, units=units) + act_hh = _convert_activation(x_h + recurrent_h, keras_layer, None) + + # previous and candidate state mixed by update gate + output = rec_act_z * h_tm1_sym + (1 - rec_act_z) * act_hh + + out_shape = tuple(dim if dim else 1 for dim in _as_list(keras_layer.output_shape)[0]) + output = _sym.reshape(output, shape=out_shape) + return [output, output] + +def _default_skip(insym, keras_layer, _): # pylint: disable=unused-argument + """Layers that can be skipped because they are train time only.""" + return insym + + +_convert_map = { + 'Dense' : _convert_dense, + 'Activation' : _convert_activation, + 'ReLU' : _convert_advanced_activation, + 'LeakyReLU' : _convert_advanced_activation, + 'PReLU' : _convert_advanced_activation, + 'ELU' : _convert_advanced_activation, + 'ThresholdedReLU' : _convert_advanced_activation, + + 'AveragePooling2D' : _convert_pooling, + 'MaxPooling2D' : _convert_pooling, + 'GlobalAveragePooling2D' : _convert_pooling, + 'GlobalMaxPooling2D' : _convert_pooling, + 'Conv2D' : _convert_convolution, + 'Conv2DTranspose' : _convert_convolution, + 'DepthwiseConv2D' : _convert_convolution, + 'SeparableConv2D' : _convert_separable_convolution, + + 'Flatten' : _convert_flatten, + 'Reshape' : _convert_reshape, + 'Concatenate' : _convert_concat, + 'BatchNormalization' : _convert_batchnorm, + + 'Add' : _convert_merge, + 'Subtract' : _convert_merge, + 'Multiply' : _convert_merge, + 'ZeroPadding2D' : _convert_padding, + 'UpSampling2D' : _convert_upsample, + 'Cropping2D' : _convert_cropping, + + # 'ZeroPadding1D' : _convert_padding, + # 'AveragePooling1D' : _convert_pooling, + # 'MaxPooling1D' : _convert_pooling, + # 'GlobalAveragePooling1D' : _convert_pooling, + # 'GlobalMaxPooling1D' : _convert_pooling, + # 'Cropping1D' : _convert_cropping, + # 'UpSampling1D' : _convert_upsample, + # 'UpSampling3D' : _convert_upsample, + # 'Conv1D' : _convert_convolution1d, + + 'SimpleRNN' : _convert_simple_rnn, + 'LSTM' : _convert_lstm, + 'GRU' : _convert_gru, + # 'Bidirectional' : _convert_bidirectional, + # 'TimeDistributed' : _default_skip, + + # 'Average' : _convert_merge, + # 'Maximum' : _convert_merge, + # 'Dot' : _convert_merge, + # 'Permute' : _convert_permute, + # 'Embedding' : _convert_embedding, + # 'RepeatVector' : _convert_repeat_vector, + + 'InputLayer' : _default_skip, + 'Dropout' : _default_skip, + 'SpatialDropout2D' : _default_skip, + 'SpatialDropout1D' : _default_skip, +} + + +def _check_unsupported_layers(model): + for layer in model.layers: + op_name = type(layer).__name__ + if op_name not in _convert_map: + raise tvm.error.OpNotImplemented( + 'Operator {} is not supported in frontend Keras.'.format(op_name)) + +def _as_list(arr): + """Force being a list, ignore if already is.""" + if isinstance(arr, list): + return arr + return [arr] + +def keras_op_to_nnvm(insym, keras_layer, outname, symtab): + """Convert keras layer to nnvm symbol, and update symtab. + + Parameters + ---------- + insym : nnvm.symbol.Symbol or a list of it + The input nnvm symbol(s) + + keras_layer : keras.layers + The keras layer to be converted + + outname : str + Name of the output nnvm symbol + + symtab : nnvm.frontend.common.SymbolTable + The global symbol table to be updated + """ + op_name = type(keras_layer).__name__ + if op_name not in _convert_map: + raise tvm.error.OpNotImplemented( + 'Operator {} is not supported in frontend Keras.'.format(op_name)) + outs = _convert_map[op_name](insym, keras_layer, symtab) + outs = _as_list(outs) + + for t_idx, out in enumerate(outs): + name = outname + ":" + str(t_idx) + symtab.set_var(name, out) + +def from_keras(model): + """Convert keras model to NNVM format. + + Parameters + ---------- + model : keras.engine.training.Model + The keras model to be converted + + Returns + ------- + sym : nnvm.Symbol + Compatible nnvm symbol + + params : dict of str to tvm.NDArray + The parameter dict to be used by nnvm + """ + try: + import keras + except ImportError: + raise ImportError('Keras must be installed') + + assert isinstance(model, keras.engine.training.Model) + if keras.backend.backend() != 'tensorflow': + raise ValueError("Keras frontend currently supports tensorflow backend only.") + if keras.backend.image_data_format() != 'channels_last': + raise ValueError("Keras frontend currently supports data_format = channels_last only.") + _check_unsupported_layers(model) + + symtab = SymbolTable() + for keras_layer in model.layers: + if isinstance(keras_layer, keras.engine.InputLayer): + symtab.get_var(keras_layer.name, must_contain=False) + else: + inbound_nodes = keras_layer.inbound_nodes if hasattr(keras_layer, 'inbound_nodes') \ + else keras_layer._inbound_nodes if hasattr(keras_layer, '_inbound_nodes') \ + else None + if inbound_nodes is None: + raise TypeError("Unknown layer type or unsupported Keras version : {}" + .format(keras_layer)) + for node_idx, node in enumerate(inbound_nodes): + # If some nodes in imported model is not relevant to the current model, + # skip such layers. model._network_nodes contains keys of all nodes relevant + # to the current model. + if not model._node_key(keras_layer, node_idx) in model._network_nodes: + continue + + insym = [] + + # Since Keras allows creating multiple layers from the same name instance, + # we append node index to the symbol name to make it unique. + # The one exception is InputLayer. Changing input variable names after conversion + # would confuse users, so we should keep them as far as possible. Fortunately, + # they are named uniquely to input_1, input_2, input_3 ... by default. + zip_node = zip(node.node_indices, node.tensor_indices, node.inbound_layers) + for n_idx, t_idx, layer in zip_node: + if isinstance(layer, keras.engine.InputLayer): + sym = symtab.get_var(layer.name, must_contain=True) + else: + sym_name = layer.name + ':' + str(n_idx) + ':' + str(t_idx) + sym = symtab.get_var(sym_name, must_contain=True) + insym.append(sym) + + if len(insym) == 1: + insym = insym[0] + keras_op_to_nnvm(insym, keras_layer, keras_layer.name + ':' + str(node_idx), symtab) + + #model._output_coordinates contains out_node(oc[0]), node_index(oc[1]) and tensor index(oc[2]) + #Get all output nodes in symtab using the name made from above values. The out symbols + #were added to symtab in keras_op_to_nnvm using this name. For multiple outputs, make a list + #with these output symbols and Group them. + outsym = [symtab.get_var(oc[0].name + ":" + str(oc[1]) + ":" + str(oc[2])) + for oc in model._output_coordinates] + + tvmparams = {k:tvm.nd.array(np.array(v, dtype=np.float32)) for k, v in symtab.params.items()} + return _sym.Group(outsym), tvmparams diff --git a/nnvm/python/nnvm/frontend/onnx.py b/nnvm/python/nnvm/frontend/onnx.py new file mode 100644 index 000000000000..8a92821476a5 --- /dev/null +++ b/nnvm/python/nnvm/frontend/onnx.py @@ -0,0 +1,1038 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=import-self, invalid-name, unused-argument, too-many-lines +"""ONNX: Open Neural Network Exchange frontend.""" +from __future__ import absolute_import as _abs +import numpy as np +import tvm +from .. import symbol as _sym +from .common import get_nnvm_op, Renamer, SymbolTable, AttrConverter as AttrCvt +from .onnx_caffe2_utils import dimension_picker, dimension_constraint, \ + infer_channels, revert_caffe2_pad + +__all__ = ['from_onnx'] + + +def onnx_storage_order2layout(storage_order): + if storage_order not in (0, 1): + raise tvm.error.OpAttributeInvalid('Mode of storage_order must be either 0 or 1') + + return 'NCHW' if storage_order == 0 else 'NHWC' + + +class OnnxOpConverter(object): + """ A helper class for holding onnx op converters. + """ + + @classmethod + def get_converter(cls, opset): + """ Get converter matches given opset. + + :param opset: opset from model. + :return: converter, which should be `_impl_vx`. Number x is the biggest + number smaller than or equal to opset belongs to all support versions. + """ + versions = [ + int(d.replace('_impl_v', '')) for d in dir(cls) if '_impl_v' in d + ] + versions = sorted(versions + [opset]) + version = versions[ + max([i for i, v in enumerate(versions) if v == opset]) - 1] + if hasattr(cls, '_impl_v{}'.format(version)): + return getattr(cls, '_impl_v{}'.format(version)) + raise NotImplementedError( + 'opset version {} of {} not implemented'.format( + version, cls.__name__)) + + +class Elemwise(OnnxOpConverter): + """ A helper class for elemwise op converters. + """ + + name = '' + + @classmethod + def _math_name_picker(cls, suffix): + + def _impl(attr): + if attr.get('broadcast', 0): + return 'broadcast_' + suffix + return 'elemwise_' + suffix + + return _impl + + @classmethod + def _impl_v1(cls, inputs, attr, params): + assert len(inputs) == 2, "Math op take 2 inputs, {} given".format( + len(inputs)) + op_name = cls._math_name_picker(cls.name)(attr) + axis = int(attr.get('axis', 0)) + conv_ops = ["conv2d", "conv2d_transpose"] + if op_name == 'broadcast_add' and inputs[0].attr('op_name') in conv_ops: + # TODO(zhreshold): remove hard coded infershape + inputs[1] = _sym.expand_dims(inputs[1], axis=axis, num_newaxis=2) + return get_nnvm_op(op_name)(*inputs) + + +class Pool(OnnxOpConverter): + """ A helper class for pool op converters. + """ + + name = '' + + @classmethod + def _impl_v1(cls, inputs, attr, params): + return AttrCvt( + op_name=dimension_picker(cls.name), + transforms={ + 'kernel_shape': 'pool_size', + 'pads': ('padding', (0, 0), revert_caffe2_pad) + }, + # very weird attributes here in onnx, force check + ignores=['dilations'], + # TODO(zhreshold): make sure ceil_mode in onnx, and layout? + extras={'ceil_mode': False}, + custom_check=dimension_constraint())(inputs, attr, params) + + +class Absolute(OnnxOpConverter): + + @classmethod + def _impl_v1(cls, inputs, attr, params): + return _sym.relu(inputs[0]) + _sym.relu(_sym.negative(inputs[0])) + + +class Add(Elemwise): + name = 'add' + + +class AveragePool(Pool): + name = 'avg_pool' + + +class BatchNorm(OnnxOpConverter): + + @classmethod + def _impl_v1(cls, inputs, attr, params): + # TODO(zhreshold): 'spatial' is not properly handled here. + return AttrCvt( + op_name='batch_norm', + disables=['momentum'], + ignores=['spatial', 'is_test', 'consumed_inputs'])(inputs, attr, + params) + + +class Conv(OnnxOpConverter): + + @classmethod + def _impl_v1(cls, inputs, attr, params): + # get number of channels + channels = infer_channels(inputs[1], params) + attr['channels'] = channels + return AttrCvt( + op_name=dimension_picker('conv'), + transforms={ + 'kernel_shape': 'kernel_size', + 'dilations': ('dilation', (0, 0)), + 'pads': ('padding', (0, 0), revert_caffe2_pad), + 'group': ('groups', 1) + }, + extras={'use_bias': len(inputs) == 3}, + custom_check=dimension_constraint())(inputs, attr, params) + + +class ConvTranspose(OnnxOpConverter): + + @classmethod + def _impl_v1(cls, inputs, attr, params): + # get number of channels + channels = infer_channels(inputs[1], params, True) + attr['channels'] = channels + groups = attr.pop('group') + attr['groups'] = groups + return AttrCvt( + op_name=dimension_picker('conv', '_transpose'), + transforms={ + 'kernel_shape': 'kernel_size', + 'dilations': ('dilation', (0, 0)), + 'pads': ('padding', (0, 0), revert_caffe2_pad) + }, + disables=['output_shape'], + extras={'use_bias': len(inputs) == 3}, + custom_check=dimension_constraint())(inputs, attr, params) + + +class Div(Elemwise): + name = 'div' + + +class Elu(OnnxOpConverter): + + @classmethod + def _impl_v1(cls, inputs, attr, params): + alpha = float(attr.get('alpha', 1.0)) + return -alpha * _sym.relu(1 - _sym.exp(inputs[0])) + _sym.relu( + inputs[0]) + + +class Gemm(OnnxOpConverter): + """ Operator converter for Gemm. + """ + + @classmethod + def _impl_v1(cls, inputs, attr, params): + assert len(inputs) == 3, "Gemm op take 3 inputs, {} given".format( + len(inputs)) + # Y = alpha * A * B + beta * C + alpha = float(attr.get('alpha', 1.0)) + beta = float(attr.get('beta', 1.0)) + transA = int(attr.get('transA', 0)) + transB = int(attr.get('transB', 0)) + # get number of channels + channels = infer_channels(inputs[1], params, not transB) + if transA: + inputs[0] = _sym.transpose(inputs[0], axes=(1, 0)) + if not transB: + inputs[1] = _sym.transpose(inputs[1], axes=(1, 0)) + inputs[0] = _sym.flatten(inputs[0]) + return _sym.dense( + alpha * inputs[0], inputs[1], beta * inputs[2], units=channels) + + +class MaxPool(Pool): + """ Operator converter for MaxPool + """ + name = 'max_pool' + + @classmethod + def _impl_v8(cls, inputs, attr, params): + return AttrCvt( + op_name=dimension_picker(cls.name), + transforms={ + 'kernel_shape': 'pool_size', + 'pads': ('padding', (0, 0), revert_caffe2_pad), + 'storage_order': ('layout', 'NCHW', onnx_storage_order2layout), + }, + # very weird attributes here in onnx, force check + ignores=['dilations', 'auto_pad'], + # TODO(higumachan): make sure ceil_mode in onnx, and layout? + extras={'ceil_mode': False}, + custom_check=dimension_constraint())(inputs, attr, params) + + @classmethod + def _impl_v10(cls, inputs, attr, params): + return AttrCvt( + op_name=dimension_picker(cls.name), + transforms={ + 'kernel_shape': 'pool_size', + 'pads': ('padding', (0, 0), revert_caffe2_pad), + 'storage_order': ('layout', 'NCHW', onnx_storage_order2layout), + 'ceil_mode': 'ceil_mode' + }, + # very weird attributes here in onnx, force check + ignores=['dilations', 'auto_pad'], + custom_check=dimension_constraint())(inputs, attr, params) + +class Mul(Elemwise): + name = 'mul' + + +class Pad(OnnxOpConverter): + """ Operator converter for Pad. + """ + + @classmethod + def _impl_v1(cls, inputs, attr, params): + pad_width = [] + pads = attr.pop('paddings') + dims = int(len(pads) / 2) + for i in range(dims): + pad_width.append((pads[i], pads[i+dims])) + attr['pad_width'] = pad_width + + return AttrCvt( + op_name='pad', + transforms={ + 'value': 'pad_value', + }, + ignores=['mode'], + custom_check=(lambda attrs: attrs.get('mode', 'constant').decode("utf-8") == 'constant', + 'split mode != constant'))(inputs, attr, params) + + @classmethod + def _impl_v2(cls, inputs, attr, params): + pad_width = [] + pads = attr.pop('pads') + dims = int(len(pads) / 2) + for i in range(dims): + pad_width.append((pads[i], pads[i+dims])) + attr['pad_width'] = pad_width + + return AttrCvt( + op_name='pad', + transforms={ + 'value': 'pad_value', + }, + ignores=['mode'], + custom_check=(lambda attrs: attrs.get('mode', 'constant').decode("utf-8") == 'constant', + 'split mode != constant'))(inputs, attr, params) + + +class ParametricSoftPlus(OnnxOpConverter): + + @classmethod + def _impl_v1(cls, inputs, attr, params): + alpha = float(attr.get('alpha', 1.0)) + beta = float(attr.get('beta', 1.0)) + return _sym.log(_sym.exp(beta * inputs[0]) + 1) * alpha + + +class Prelu(OnnxOpConverter): + + @classmethod + def _impl_v1(cls, inputs, attr, params): + assert len(inputs) == 2, "Prelu need 2 inputs, {} given".format( + len(inputs)) + return _sym.prelu(inputs[0], inputs[1]) + + +class Reciprocal(OnnxOpConverter): + + @classmethod + def _impl_v1(cls, inputs, attr, params): + return 1.0 / inputs[0] + + +class Reshape(OnnxOpConverter): + """ Operator converter for Reshape. + """ + + @classmethod + def _impl_v1(cls, inputs, attr, params): + return _sym.reshape(inputs[0], shape=attr['shape']) + + @classmethod + def _impl_v5(cls, inputs, attr, params): + if inputs[1].list_output_names()[0] in params: + shape = tuple(params[inputs[1].list_output_names()[0]].asnumpy()) + out = _sym.reshape(inputs[0], shape=shape) + else: + out = _sym.reshape_like(inputs[0], inputs[1]) + + return out + +class Scale(OnnxOpConverter): + + @classmethod + def _impl_v1(cls, inputs, attr, params): + scale = float(attr.get('scale', 1.0)) + return inputs[0] * scale + + +class Selu(OnnxOpConverter): + + @classmethod + def _impl_v1(cls, inputs, attr, params): + alpha = float(attr.get('alpha', 1.6732)) + gamma = float(attr.get('gamma', 1.0507)) + return gamma * ( + -alpha * _sym.relu(1 - _sym.exp(inputs[0])) + _sym.relu(inputs[0])) + + +class ScaledTanh(OnnxOpConverter): + + @classmethod + def _impl_v1(cls, inputs, attr, params): + alpha = float(attr.get('alpha', 1.0)) + beta = float(attr.get('beta', 1.0)) + return _sym.tanh(beta * inputs[0]) * alpha + + +class SoftPlus(OnnxOpConverter): + + @classmethod + def _impl_v1(cls, inputs, attr, params): + return _sym.log(_sym.exp(inputs[0]) + 1) + + +class Softsign(OnnxOpConverter): + + @classmethod + def _impl_v1(cls, inputs, attr, params): + return inputs[0] / (1 + Absolute.get_converter(1)(inputs, attr, params)) + + +class Sub(Elemwise): + name = 'sub' + + +class Sum(OnnxOpConverter): + + @classmethod + def _impl_v1(cls, inputs, attr, params): + # Onnx Sum Operator + for in_index in range(len(inputs) - 1): + inputs[in_index + 1] = _sym.broadcast_add(inputs[in_index], + inputs[in_index + 1]) + + return inputs[len(inputs) - 1] + + +class ThresholdedRelu(OnnxOpConverter): + + @classmethod + def _impl_v1(cls, inputs, attr, params): + alpha = float(attr.get('alpha', 1.0)) + alpha_tensor = _sym.full_like(inputs[0], fill_value=float(alpha)) + return _sym.elemwise_mul(inputs[0], _sym.greater(inputs[0], alpha_tensor)) + +class ImageScaler(OnnxOpConverter): + + @classmethod + def _impl_v1(cls, inputs, attr, params): + channelScale = attr['scale'] + bias_attr = attr['bias'] + bias = SymbolTable().new_const(np.array(bias_attr).reshape([3, 1, 1])) + scaledChannel = _sym.__mul_scalar__(inputs[0], scalar=channelScale) + ret = _sym.broadcast_add(scaledChannel, bias) + return ret + + +def _broadcast_constraint(): + + def _broadcast_check(attrs): + if attrs.get('axis', None): + return False + return True + + return _broadcast_check, "Specifying broadcast axis not allowed." + + +def _fully_connected(opset): + + def _impl(inputs, attr, params): + # get number of channels + channels = infer_channels(inputs[1], params) + attr['units'] = channels + return AttrCvt('dense', ignores=['axis', 'axis_w'])(inputs, attr) + + return _impl + + +class Upsample(OnnxOpConverter): + """ Operator converter for Upsample (nearest mode). + """ + + @classmethod + def _impl_v9(cls, inputs, attr, params): + scales = attr.get('scales') + if not scales: + #Here we are going to higher OPSET version. + assert len(inputs) == 2, "Upsample op take 2 inputs, {} given".format(len(inputs)) + input_name = inputs[1].list_input_names()[0] + scales = params[input_name].asnumpy() + inputs = inputs[:1] + assert len(scales) == 4 and scales[0] == 1.0 and scales[1] == 1.0 and scales[2] == scales[3] + mode = attr.get('mode') + if mode == b'nearest': + method = "NEAREST_NEIGHBOR" + elif mode == b'linear': + method = "BILINEAR" + else: + raise tvm.error.OpAttributeInvalid( + 'Value {} in attribute "mode" of operator Upsample is not valid.'.format(mode)) + return _sym.upsampling(inputs[0], scale=int(scales[-1]), method=method, layout='NCHW') + + +class Shape(OnnxOpConverter): + """ Operator converter for Shape. + """ + + @classmethod + def _impl_v1(cls, inputs, attr, params): + # Result of this operator is prominently used by reshape operator. + # Just pass the input as it is so that reshape_like can be used there. + print("Shape: Differently implemented in NNVM as a bypass (dummy operator)") + return inputs[0] + +class Cast(OnnxOpConverter): + """ Operator converter for Cast. + """ + + @classmethod + def _impl_v1(cls, inputs, attr, params): + return AttrCvt(op_name='cast', transforms={'to': 'dtype'})(inputs, attr) + + @classmethod + def _impl_v5(cls, inputs, attr, params): + try: + from onnx.mapping import TENSOR_TYPE_TO_NP_TYPE + attr['to'] = TENSOR_TYPE_TO_NP_TYPE[attr['to']] + except ImportError as e: + raise ImportError( + "Unable to import onnx.mapping which is required {}".format(e)) + return AttrCvt(op_name='cast', transforms={'to': 'dtype'})(inputs, attr) + + +class Unsqueeze(OnnxOpConverter): + """ Operator converter for Unsqueeze. + """ + + @classmethod + def _impl_v1(cls, inputs, attr, params): + for axes in attr['axes']: + inputs[0] = _sym.expand_dims(inputs[0], axis=axes, num_newaxis=1) + return inputs[0] + + +class Split(OnnxOpConverter): + """ Operator converter for Split. + """ + + @classmethod + def _impl_v1(cls, inputs, attr, params): + attr['indices_or_sections'] = [] + index = 0 + for i in attr['split'][:-1]: + index += i + attr['indices_or_sections'].append(index) + return AttrCvt( + op_name='split', + ignores=['split'])(inputs, attr, params) + + +class Slice(OnnxOpConverter): + """ Operator converter for Slice. + """ + @classmethod + def _impl_v1(cls, inputs, attr, params): + if isinstance(attr['starts'], int): + attr['starts'] = (attr['starts'],) + attr['ends'] = (attr['ends'],) + + try: + # Update the starts and ends according to axes if required. + if isinstance(attr['axes'], int): + attr['axes'] = (attr['axes'],) + + if (max(attr['axes']) + 1) != len(attr['axes']): + new_axes = [] + new_starts = [] + new_ends = [] + pop_index = 0 + for i in range(max(attr['axes']) + 1): + if i in attr['axes']: + new_axes.append(i) + new_starts.append(attr['starts'][pop_index]) + new_ends.append(attr['ends'][pop_index]) + pop_index += 1 + else: + new_axes.append(i) + new_starts.append(0) + new_ends.append(np.iinfo(np.int32).max) + attr['axes'] = new_axes + attr['starts'] = new_starts + attr['ends'] = new_ends + except KeyError: + pass + + return AttrCvt(op_name='strided_slice', + transforms={'starts': 'begin', + 'ends': 'end'}, + ignores=['axes'])(inputs, attr) + +class Gather(OnnxOpConverter): + """ Operator converter for Gather. + """ + @classmethod + def _impl_v1(cls, inputs, attr, params): + axis = attr.get('axis', 0) + return AttrCvt(op_name='take', + extras={'axis':axis})(inputs, attr) + +class LRN(OnnxOpConverter): + """ Operator converter for Local Response Normalization. + """ + @classmethod + def _impl_v1(cls, inputs, attr, params): + """LRN support only NCHW format + https://github.com/onnx/onnx/blob/master/docs/Operators.md#LRN + """ + axis = 1 + alpha = attr.get('alpha', 0.0001) + beta = attr.get('beta', 0.75) + bias = attr.get('bias', 1.0) + nsize = attr.get('size') + return _sym.lrn(inputs[0], size=nsize, axis=axis, + alpha=alpha, beta=beta, bias=bias) + +class Maximum(OnnxOpConverter): + """ Operator converter for Maximum. + """ + @classmethod + def _impl_v1(cls, inputs, attr, params): + if not isinstance(inputs, list) or len(inputs) < 2: + raise ValueError("Expect minimum 2 inputs") + _max = inputs[0] + for i in range(1, len(inputs)): + _max = AttrCvt(op_name='broadcast_max')([_max, inputs[i]], {}) + return _max + +class Minimum(OnnxOpConverter): + """ Operator converter for Minimum. + """ + @classmethod + def _impl_v1(cls, inputs, attr, params): + if not isinstance(inputs, list) or len(inputs) < 2: + raise ValueError("Expect minimum 2 inputs") + _min = inputs[0] + for i in range(1, len(inputs)): + _min = AttrCvt(op_name='broadcast_min')([_min, inputs[i]], {}) + return _min + +class Mean(OnnxOpConverter): + """ Operator converter for Mean. + """ + @classmethod + def _impl_v1(cls, inputs, attr, params): + if not isinstance(inputs, list) or len(inputs) < 2: + raise ValueError("Expect minimum 2 inputs") + count = len(inputs) + _sum = inputs[0] + for i in range(1, count): + _sum = AttrCvt(op_name='broadcast_add')([_sum, inputs[i]], {}) + return _sum / count + +class HardSigmoid(OnnxOpConverter): + """ Operator converter for HardSigmoid. + """ + @classmethod + def _impl_v1(cls, inputs, attr, params): + alpha = attr.get('alpha', 0.2) + beta = attr.get('beta', 0.5) + transformX = (inputs[0] * alpha) + beta + attr = {'a_min':0, 'a_max':1} + return AttrCvt(op_name='clip')([transformX], attr) + +class ArgMax(OnnxOpConverter): + """ Operator converter for ArgMax. + """ + @classmethod + def _impl_v1(cls, inputs, attr, params): + axis = attr.get('axis', 0) + keepdims = attr.get('keepdims', True) + attr = {'axis':axis, 'keepdims':keepdims} + return AttrCvt(op_name='argmax')(inputs, attr) + +class ArgMin(OnnxOpConverter): + """ Operator converter for ArgMin. + """ + @classmethod + def _impl_v1(cls, inputs, attr, params): + axis = attr.get('axis', 0) + keepdims = attr.get('keepdims', True) + attr = {'axis':axis, 'keepdims':keepdims} + return AttrCvt(op_name='argmin')(inputs, attr) + +class Softmax(OnnxOpConverter): + """ Operator converter for Softmax. + """ + @classmethod + def _impl_v1(cls, inputs, attr, params): + # set default value when axis is not set in the model + if 'axis' not in attr: + attr['axis'] = 1 + return AttrCvt( + op_name='softmax', + transforms={ + 'axis': ('axis', 1), + })(inputs, attr, params) + +class ConstantFill(OnnxOpConverter): + """ Operator converter for ConstantFill. + """ + @classmethod + def _impl_v1(cls, inputs, attr, params): + is_full = True + num_inputs = len(inputs) + if 'shape' in attr: + if num_inputs > 0: + raise ImportError( + "Can't set shape and input tensor at a time") + shape = attr.pop('shape') + else: + if num_inputs == 0: + raise ImportError( + "Either shape attribute or input should be set") + if 'input_as_shape' in attr and attr['input_as_shape']: + shape = params[inputs[0].list_output_names()[0]].asnumpy() + else: + is_full = False + + if not is_full: + if 'extra_shape' in attr: + raise ImportError( + "Extra Shape not supported with fill_like") + + out = AttrCvt( + op_name='full_like', + transforms={'value': 'fill_value'}, + ignores=['dtype'])(inputs, attr) + return _sym.cast(out, dtype=attr['dtype'].decode("utf-8")) + if 'extra_shape' in attr: + shape = shape + attr.pop('extra_shape') + + return AttrCvt( + op_name='full', + transforms={'value': 'fill_value'}, + extras={'shape':shape})(inputs, attr) + +# compatible operators that do NOT require any conversion. +_identity_list = [] + + +# _convert_map defines maps of name to converter functor(callable) +# for 1 to 1 mapping, use Renamer if nothing but name is different +# use AttrCvt if attributes need to be converted +# for 1 to N mapping(composed), use custom callable functions +# for N to 1 mapping, currently not supported(?) +def _get_convert_map(opset): + return { + # defs/experimental + 'Identity': Renamer('copy'), + # 'Affine' + 'ThresholdedRelu': ThresholdedRelu.get_converter(opset), + 'ScaledTanh': ScaledTanh.get_converter(opset), + 'ParametricSoftplus': ParametricSoftPlus.get_converter(opset), + 'ConstantFill': ConstantFill.get_converter(opset), + # 'GivenTensorFill' + 'FC': AttrCvt('dense', ignores=['axis', 'axis_w']), + 'Scale': Scale.get_converter(opset), + # 'GRUUnit' + # 'ATen' + 'ImageScaler': ImageScaler.get_converter(opset), + # 'MeanVarianceNormalization' + # 'Crop' + # 'Embedding' + 'Upsample' : Upsample.get_converter(opset), + 'SpatialBN': BatchNorm.get_converter(opset), + + # defs/generator + # 'Constant' # Implemented + # 'RandomUniform' + # 'RandomNormal' + # 'RandomUniformLike' + # 'RandomNormalLike' + + # defs/logical + + # defs/math + 'Add': Add.get_converter(opset), + 'Sub': Sub.get_converter(opset), + 'Mul': Mul.get_converter(opset), + 'Div': Div.get_converter(opset), + 'Neg': Renamer('negative'), + 'Abs': Absolute.get_converter(opset), + 'Reciprocal': Reciprocal.get_converter(opset), + 'Floor': Renamer('floor'), + 'Ceil': Renamer('ceil'), + 'Sqrt': Renamer('sqrt'), + 'Relu': Renamer('relu'), + 'LeakyRelu': Renamer('leaky_relu'), + 'Selu': Selu.get_converter(opset), + 'Elu': Elu.get_converter(opset), + 'Exp': Renamer('exp'), + 'Log': Renamer('log'), + 'Tanh': Renamer('tanh'), + 'Pow': Renamer('broadcast_pow'), + 'PRelu': Prelu.get_converter(opset), + 'Sigmoid': Renamer('sigmoid'), + 'HardSigmoid': HardSigmoid.get_converter(opset), + 'Max': Maximum.get_converter(opset), + 'Min': Minimum.get_converter(opset), + 'Sum': Sum.get_converter(opset), + 'Mean': Mean.get_converter(opset), + 'Clip': AttrCvt('clip', transforms={'min': 'a_min', 'max': 'a_max'}), + # softmax default axis is different in onnx + 'Softmax': Softmax.get_converter(opset), + 'LogSoftmax': AttrCvt('log_softmax', {'axis': ('axis', 1)}), + # 'Hardmax' + 'Softsign': Softsign.get_converter(opset), + 'SoftPlus': SoftPlus.get_converter(opset), + 'Gemm': Gemm.get_converter(opset), + 'MatMul': Renamer('matmul'), + + # defs/nn + 'AveragePool': AveragePool.get_converter(opset), + 'MaxPool': MaxPool.get_converter(opset), + 'Conv': Conv.get_converter(opset), + 'ConvTranspose': ConvTranspose.get_converter(opset), + 'GlobalAveragePool': Renamer('global_avg_pool2d'), + 'GlobalMaxPool': Renamer('global_max_pool2d'), + 'BatchNormalization': BatchNorm.get_converter(opset), + # 'InstanceNormalization' + # 'LpNormalization' + 'Dropout': AttrCvt('dropout', {'ratio': 'rate'}, ignores=['is_test']), + 'Flatten': Renamer('flatten'), + 'LRN': LRN.get_converter(opset), + + # defs/reduction + 'ReduceMax': AttrCvt('max', {'axes': 'axis'}), + 'ReduceMin': AttrCvt('min', {'axes': 'axis'}), + 'ReduceSum': AttrCvt('sum', {'axes': 'axis'}), + 'ReduceMean': AttrCvt('mean', {'axes': 'axis'}), + # 'ReduceProd' + # 'ReduceLogSumExp' + 'ArgMax': ArgMax.get_converter(opset), + 'ArgMin': ArgMin.get_converter(opset), + + # defs/tensor + 'Cast': Cast.get_converter(opset), + 'Reshape': Reshape.get_converter(opset), + 'Concat': Renamer('concatenate'), + 'Split': Split.get_converter(opset), + 'Slice': Slice.get_converter(opset), + 'Transpose': AttrCvt('transpose', {'perm': 'axes'}), + 'Gather': Gather.get_converter(opset), + 'Squeeze': AttrCvt('squeeze', {'axes': 'axis'}), + 'Unsqueeze': Unsqueeze.get_converter(opset), + 'Pad': Pad.get_converter(opset), + 'Shape': Shape.get_converter(opset), + } + + +class GraphProto(object): + """A helper class for handling nnvm graph copying from pb2.GraphProto. + Definition: https://github.com/onnx/onnx/blob/master/onnx/onnx.proto + """ + + def __init__(self): + self._nodes = {} + self._params = {} + self._renames = {} + self._num_input = 0 + self._num_param = 0 + + def from_onnx(self, graph, opset): + """Construct nnvm nodes from onnx graph. + The inputs from onnx graph is vague, only providing "1", "2"... + For convenience, we rename the `real` input names to "input_0", + "input_1"... And renaming parameters to "param_0", "param_1"... + + Parameters + ---------- + graph : onnx protobuf object + The loaded onnx graph + opset : opset version + + Returns + ------- + sym : nnvm.sym.Symbol + The returned nnvm symbol + params : dict + A dict of name: tvm.nd.array pairs, used as pretrained weights + """ + # parse network inputs to nnvm, aka parameters + for init_tensor in graph.initializer: + if not init_tensor.name.strip(): + raise ValueError("Tensor's name is required.") + self._params[init_tensor.name] = self._parse_array(init_tensor) + for i in graph.input: + # from onnx v0.2, GraphProto.input has type ValueInfoProto, + # and the name is 'i.name' + i_name = self._parse_value_proto(i) + if i_name in self._params: + # i is a param instead of input + self._num_param += 1 + self._params[i_name] = self._params.pop(i_name) + self._nodes[i_name] = _sym.Variable( + name=i_name, shape=self._params[i_name].shape) + else: + self._num_input += 1 + self._nodes[i_name] = _sym.Variable(name=i_name) + # get list of unsupported ops + convert_map = _get_convert_map(opset) + unsupported_ops = set() + for node in graph.node: + op_name = node.op_type + if op_name not in convert_map and \ + op_name != 'Constant' and \ + op_name not in _identity_list: + unsupported_ops.add(op_name) + if unsupported_ops: + msg = 'The following operators are not supported for frontend ONNX: ' + msg += ', '.join(unsupported_ops) + raise tvm.error.OpNotImplemented(msg) + # construct nodes, nodes are stored as directed acyclic graph + for node in graph.node: + op_name = node.op_type + attr = self._parse_attr(node.attribute) + inputs = [self._nodes[self._renames.get(i, i)] for i in node.input] + if op_name == "Constant": + t_proto = self._parse_attr(node.attribute)["value"] + self._num_param += 1 + self._params[node.output[0]] = self._parse_array(t_proto) + self._nodes[node.output[0]] = _sym.Variable(name=node.output[0], + shape=list(t_proto.dims)) + else: + op = self._convert_operator(op_name, inputs, attr, opset) + node_output = self._fix_outputs(op_name, node.output) + assert len(node_output) == len(op.list_output_names()), ( + "Number of output mismatch {} vs {} in {}.".format( + len(node_output), len(op.list_output_names()), op_name)) + for k, i in zip(list(node_output), range(len(node_output))): + self._nodes[k] = op[i] + # now return the outputs + out = [self._nodes[self._parse_value_proto(i)] for i in graph.output] + if len(out) > 1: + out = _sym.Group(out) + else: + out = out[0] + return out, self._params + + def _parse_value_proto(self, value_proto): + """Parse ValueProto or raw str.""" + try: + name = value_proto.name + except AttributeError: + name = value_proto + return name + + def _parse_array(self, tensor_proto): + """Grab data in TensorProto and convert to numpy array.""" + try: + from onnx.numpy_helper import to_array + except ImportError as e: + raise ImportError( + "Unable to import onnx which is required {}".format(e)) + np_array = to_array(tensor_proto).reshape(tuple(tensor_proto.dims)) + return tvm.nd.array(np_array) + + def _parse_attr(self, attr_proto): + """Convert a list of AttributeProto to a dict, with names as keys.""" + attrs = {} + for a in attr_proto: + for f in ['f', 'i', 's']: + if a.HasField(f): + attrs[a.name] = getattr(a, f) + for f in ['floats', 'ints', 'strings']: + if list(getattr(a, f)): + assert a.name not in attrs, "Only one type of attr is allowed" + attrs[a.name] = tuple(getattr(a, f)) + for f in ['t']: + if a.HasField(f): + attrs[a.name] = getattr(a, f) + for f in ['tensors']: + if list(getattr(a, f)): + assert a.name not in attrs, "Only one type of attr is allowed" + attrs[a.name] = tuple(getattr(a, f)) + for f in ['g']: + if a.HasField(f): + raise NotImplementedError( + "Filed {} is not supported in nnvm.".format(f)) + for f in ['graphs']: + if list(getattr(a, f)): + raise NotImplementedError( + "Filed {} is not supported in nnvm.".format(f)) + if a.name not in attrs: + raise ValueError("Cannot parse attribute: \n{}\n.".format(a)) + return attrs + + def _convert_operator(self, + op_name, + inputs, + attrs, + opset, + identity_list=None, + convert_map=None): + """Convert from onnx operator to nnvm operator. + The converter must specify conversions explicitly for incompatible name, and + apply handlers to operator attributes. + + Parameters + ---------- + op_name : str + Operator name, such as Convolution, FullyConnected + inputs : list of nnvm.Symbol + List of input symbols. + attrs : dict + Dict of operator attributes + opset : int + Opset version + identity_list : list + List of operators that don't require conversion + convert_map : dict + Dict of name : callable, where name is the op's name that + require conversion to nnvm, callable are functions which + take attrs and return (new_op_name, new_attrs) + + Returns + ------- + sym : nnvm.Symbol + Converted nnvm Symbol + """ + identity_list = identity_list if identity_list else _identity_list + convert_map = convert_map if convert_map else _get_convert_map(opset) + if op_name in identity_list: + sym = get_nnvm_op(op_name)(*inputs, **attrs) + elif op_name in convert_map: + sym = convert_map[op_name](inputs, attrs, self._params) + else: + raise tvm.error.OpNotImplemented( + 'Operator {} is not supported in frontend ONNX.') + return sym + + def _fix_outputs(self, op_name, outputs): + """A hack to handle dropout or similar operator that have more than one out + in ONNX. + """ + if op_name == 'Dropout': + if len(outputs) == 1: + return outputs + # TODO(zhreshold): support dropout mask? + outputs = outputs[:-1] + return outputs + + +def from_onnx(model): + """Load onnx graph which is a python protobuf object into nnvm graph. + The companion parameters will be handled automatically. + The inputs from onnx graph is vague, only providing "1", "2"... + For convenience, we rename the `real` input names to "input_0", + "input_1"... And renaming parameters to "param_0", "param_1"... + + Parameters + ---------- + model : protobuf object + ONNX ModelProto after ONNX v1.1.0 + + Returns + ------- + sym : nnvm.Symbol + Compatible nnvm symbol + + params : dict of str to tvm.ndarray + Dict of converted parameters stored in tvm.ndarray format + """ + g = GraphProto() + graph = model.graph + try: + opset = model.opset_import[0].version if model.opset_import else 1 + except AttributeError: + opset = 1 + sym, params = g.from_onnx(graph, opset) + return sym, params diff --git a/nnvm/python/nnvm/frontend/onnx_caffe2_utils.py b/nnvm/python/nnvm/frontend/onnx_caffe2_utils.py new file mode 100644 index 000000000000..18f9263ecc0b --- /dev/null +++ b/nnvm/python/nnvm/frontend/onnx_caffe2_utils.py @@ -0,0 +1,61 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Util functions shared by the ONNX and Caffe2 frontends.""" +from __future__ import absolute_import as _abs +from nnvm import graph as _graph +from nnvm.compiler import graph_util + + +def dimension_picker(prefix, surfix=''): + def _impl(attr): + kernel = attr['kernel_shape'] + if len(kernel) == 2: + return prefix + '2d' + surfix + raise NotImplementedError("Only 2d kernel supported.") + + return _impl + + +def dimension_constraint(): + def _dim_check(attrs): + if len(attrs['kernel_shape']) == 2: + return True + return False + + return _dim_check, "Only 2d kernel supported." + + +def infer_channels(inputs, params, transpose=False): + """A hack for getting 'channels' or 'units' since caffe2 don't provide + these attributes. We check the shape of weights provided to get the number. + """ + g = _graph.create(inputs) + shape_dict = {k: v.shape for k, v in params.items()} + _, out_shapes = graph_util.infer_shape(g, **shape_dict) + channels = out_shapes[0][0] if not transpose else out_shapes[0][1] + return channels + + +def revert_caffe2_pad(pads): + """Caffe2 require two times the normal padding.""" + if len(pads) == 4: + pads = pads[:2] + elif len(pads) == 2: + pass + else: + raise ValueError("Invalid caffe2 type padding: {}".format(pads)) + return pads diff --git a/nnvm/python/nnvm/graph.py b/nnvm/python/nnvm/graph.py new file mode 100644 index 000000000000..0d1e70f4e0f6 --- /dev/null +++ b/nnvm/python/nnvm/graph.py @@ -0,0 +1,288 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# coding: utf-8 +# pylint: disable=invalid-name, protected-access, too-many-arguments, too-many-lines +"""NNVM Graph IR API. + +This is a developer API that is used to manipulate and transform graphs. +""" +from __future__ import absolute_import as _abs + +import ctypes +import json +from ._base import _LIB +from ._base import c_array, c_str, nn_uint, py_str, string_types +from ._base import GraphHandle, SymbolHandle +from ._base import check_call +from .symbol import Variable, Symbol, Group as _Group + +class GraphIndex(object): + """Index for quickly accessing graph attributes. + + Parameters + ---------- + graph : Graph + The graph to create index. + """ + def __init__(self, graph): + jgraph = json.loads(create(graph).apply("SaveJSON").json_attr("json")) + self.nodes = jgraph["nodes"] + self.entry_ptr = jgraph["node_row_ptr"] + self._name2nodeid = {n["name"]: i for i, n in enumerate(self.nodes)} + self.input_names = graph.symbol.list_input_names() + self.output_entries = jgraph["heads"] + + @property + def num_nodes(self): + """Number of nodes in graph.""" + return len(self.entry_ptr) - 1 + + @property + def num_node_entries(self): + """Number of nodes in graph.""" + return self.entry_ptr[-1] + + def node_id(self, key): + """Get the node index for a given key. + + Parameters + ---------- + key : str or int + The node key or index + + Returns + ------- + index : int + The entry index + """ + return self._name2nodeid[key] + + def entry_id(self, key, value_index=0): + """Get the entry id of a node entry. + + Parameters + ---------- + key : str or int + The node key or index + + value_index : int + The value index of output + + Returns + ------- + index : int + The entry index + """ + if isinstance(key, (list, tuple)): + if len(key) != 3: + raise ValueError("Expect entry index to be tuple of 3 elems") + key, value_index, _ = key + idx = self.node_id(key) if isinstance(key, str) else key + assert value_index < self.entry_ptr[idx + 1] + return self.entry_ptr[idx] + value_index + + + +class Graph(object): + """Graph is the graph object that can be used to apply optimization pass. + + It contains additional graphwise attribute besides the internal symbol. + """ + _tvm_tcode = 17 + + # pylint: disable=no-member + def __init__(self, handle): + """Initialize the function with handle + + Parameters + ---------- + handle : GraphHandle + the handle to the underlying C++ Graph + """ + self.handle = handle + self._index = None + + def __del__(self): + check_call(_LIB.NNGraphFree(self.handle)) + + def json_attr(self, key): + """Get attribute string from the graph. + + Parameters + ---------- + key : str + The key to get attribute from. + + Returns + ------- + value : str + The attribute value of the key, returns None if attribute do not exist. + """ + ret = ctypes.c_char_p() + success = ctypes.c_int() + check_call(_LIB.NNGraphGetJSONAttr( + self.handle, c_str(key), ctypes.byref(ret), ctypes.byref(success))) + if success.value != 0: + json_str = py_str(ret.value) + return json.loads(json_str)[1] + return None + + def _set_symbol_list_attr(self, key, value): + """Set the attribute of the graph. + + Parameters + ---------- + key : string + The key of the attribute + value : value + The any type that can be dumped to json + type_name : string + The typename registered on c++ side. + """ + if isinstance(value, list): + value = _Group(value) + if not isinstance(value, Symbol): + raise ValueError("value need to be grouped symbol") + check_call(_LIB.NNGraphSetNodeEntryListAttr_( + self.handle, c_str(key), value.handle)) + + def _set_json_attr(self, key, value, type_name=None): + """Set the attribute of the graph. + + Parameters + ---------- + key : string + The key of the attribute + value : value + The any type that can be dumped to json + type_name : string + The typename registered on c++ side. + """ + if isinstance(value, string_types): + type_name = 'str' + elif type_name is None: + raise ValueError("Need to specify type_name") + json_value = json.dumps([type_name, value]) + check_call(_LIB.NNGraphSetJSONAttr( + self.handle, c_str(key), c_str(json_value))) + + @property + def _tvm_handle(self): + return self.handle.value + + @property + def symbol(self): + shandle = SymbolHandle() + check_call(_LIB.NNGraphGetSymbol(self.handle, ctypes.byref(shandle))) + return Symbol(shandle) + + def json(self): + """Get JSON representation of the graph + + Returns + ------- + json : str + JSON representation of the graph + """ + return self.apply("SaveJSON").json_attr("json") + + def _tvm_graph_json(self): + """Get TVM graph json""" + return self.json() + + @property + def index(self): + if not self._index: + self._index = GraphIndex(self) + return self._index + + def ir(self, join_entry_attrs=None, join_node_attrs=None): + """Get text form of graph ir. + + Parameters + ---------- + join_entry_attrs : list of str + List of graph NodeEntry attribute to be + printed along each operator. + + join_node_attrs : list of str + List of graph node attribute to be + printed along each operator. + """ + if join_entry_attrs: + self._set_json_attr("join_entry_attrs", join_entry_attrs, "list_str") + if join_node_attrs: + self._set_json_attr("join_node_attrs", join_node_attrs, "list_str") + return self.apply("PrintGraphIR").json_attr("graphir") + + def apply(self, passes): + """Apply passes to the graph + + Parameters + ---------- + passes : str or list of str + The passes to be applied + + Returns + ------- + g : Graph + The transformed graph. + """ + if isinstance(passes, string_types): + passes = [passes] + cpass = c_array(ctypes.c_char_p, [c_str(key) for key in passes]) + ghandle = GraphHandle() + npass = nn_uint(len(passes)) + check_call(_LIB.NNGraphApplyPasses(self.handle, npass, cpass, ctypes.byref(ghandle))) + return Graph(ghandle) + + +def load_json(json_str): + """Create a new graph by loading from json + + Parameters + ---------- + json_str : str + The json string + + Returns + ------- + graph : Graph + The loaded graph + """ + ret = create(Variable("x")) + ret._set_json_attr("json", json_str) + return ret.apply("LoadJSON") + + +def create(symbol): + """Create a new graph from symbol. + + Parameters + ---------- + symbol : Symbol + The symbolic graph used to create Graph object. + + Returns + ------- + graph : Graph + A generated new graph object. + """ + ghandle = GraphHandle() + check_call(_LIB.NNGraphCreate( + symbol.handle, ctypes.byref(ghandle))) + return Graph(ghandle) diff --git a/nnvm/python/nnvm/libinfo.py b/nnvm/python/nnvm/libinfo.py new file mode 100644 index 000000000000..b3bfc753b9c2 --- /dev/null +++ b/nnvm/python/nnvm/libinfo.py @@ -0,0 +1,84 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# coding: utf-8 +"""Information about nnvm.""" +from __future__ import absolute_import +import sys +import os +import platform + +if sys.version_info[0] == 3: + import builtins as __builtin__ +else: + import __builtin__ + +def find_lib_path(): + """Find NNNet dynamic library files. + + Returns + ------- + lib_path : list(string) + List of all found path to the libraries + """ + if hasattr(__builtin__, "NNVM_BASE_PATH"): + base_path = __builtin__.NNVM_BASE_PATH + else: + base_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__))) + + if hasattr(__builtin__, "NNVM_LIBRARY_NAME"): + lib_name = __builtin__.NNVM_LIBRARY_NAME + else: + lib_name = "nnvm_compiler" if sys.platform.startswith('win32') else "libnnvm_compiler" + + api_path = os.path.join(base_path, '..', '..', 'lib') + cmake_build_path_win = os.path.join(base_path, '..', '..', '..', 'build', 'Release') + cmake_build_path = os.path.join(base_path, '..', '..', '..', 'build') + install_path = os.path.join(base_path, '..', '..', '..') + dll_path = [base_path, api_path, cmake_build_path_win, cmake_build_path, + install_path] + + if sys.platform.startswith('linux') and os.environ.get('LD_LIBRARY_PATH', None): + dll_path.extend([p.strip() for p in os.environ['LD_LIBRARY_PATH'].split(":")]) + elif sys.platform.startswith('darwin') and os.environ.get('DYLD_LIBRARY_PATH', None): + dll_path.extend([p.strip() for p in os.environ['DYLD_LIBRARY_PATH'].split(":")]) + elif sys.platform.startswith('win32') and os.environ.get('PATH', None): + dll_path.extend([p.strip() for p in os.environ['PATH'].split(";")]) + + if sys.platform.startswith('win32'): + vs_configuration = 'Release' + if platform.architecture()[0] == '64bit': + dll_path.append(os.path.join(base_path, '..', '..', '..', 'build', vs_configuration)) + dll_path.append(os.path.join(base_path, '..', '..', '..', 'windows', 'x64', + vs_configuration)) + else: + dll_path.append(os.path.join(base_path, '..', '..', '..', 'build', vs_configuration)) + dll_path.append(os.path.join(base_path, '..', '..', '..', 'windows', vs_configuration)) + dll_path = [os.path.join(p, '%s.dll' % lib_name) for p in dll_path] + elif sys.platform.startswith('darwin'): + dll_path = [os.path.join(p, '%s.dylib' % lib_name) for p in dll_path] + else: + dll_path = [os.path.join(p, '%s.so' % lib_name) for p in dll_path] + + lib_path = [p for p in dll_path if os.path.exists(p) and os.path.isfile(p)] + if not lib_path: + raise RuntimeError('Cannot find the files.\n' + + 'List of candidates:\n' + str('\n'.join(dll_path))) + return lib_path + + +# current version +__version__ = "0.8.0" diff --git a/nnvm/python/nnvm/name.py b/nnvm/python/nnvm/name.py new file mode 100644 index 000000000000..fe3d8311f1a6 --- /dev/null +++ b/nnvm/python/nnvm/name.py @@ -0,0 +1,94 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# coding: utf-8 +"""Automatic naming support for symbolic API.""" +from __future__ import absolute_import as _abs + +class NameManager(object): + """NameManager to do automatic naming. + + User can also inherit this object to change naming behavior. + """ + current = None + + def __init__(self): + self._counter = {} + self._old_manager = None + + def get(self, name, hint): + """Get the canonical name for a symbol. + + This is default implementation. + When user specified a name, + the user specified name will be used. + + When user did not, we will automatically generate a + name based on hint string. + + Parameters + ---------- + name : str or None + The name user specified. + + hint : str + A hint string, which can be used to generate name. + + Returns + ------- + full_name : str + A canonical name for the user. + """ + if name: + return name + if hint not in self._counter: + self._counter[hint] = 0 + name = '%s%d' % (hint, self._counter[hint]) + self._counter[hint] += 1 + return name + + def __enter__(self): + self._old_manager = NameManager.current + NameManager.current = self + return self + + def __exit__(self, ptype, value, trace): + assert self._old_manager + NameManager.current = self._old_manager + + +class Prefix(NameManager): + """A name manager that always attach a prefix to all names. + + Examples + -------- + >>> import nnvm as nn + >>> data = nn.symbol.Variable('data') + >>> with nn.name.Prefix('mynet_'): + net = nn.symbol.FullyConnected(data, num_hidden=10, name='fc1') + >>> net.list_arguments() + ['data', 'mynet_fc1_weight', 'mynet_fc1_bias'] + """ + def __init__(self, prefix): + super(Prefix, self).__init__() + self._prefix = prefix + + def get(self, name, hint): + name = super(Prefix, self).get(name, hint) + return self._prefix + name + +# initialize the default name manager +NameManager.current = NameManager() diff --git a/nnvm/python/nnvm/symbol.py b/nnvm/python/nnvm/symbol.py new file mode 100644 index 000000000000..297d2ba7405a --- /dev/null +++ b/nnvm/python/nnvm/symbol.py @@ -0,0 +1,405 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=invalid-name, unused-import, protected-access +"""Symbolic graph construction API. + +This namespace contains most of the registered operators. +For detailed list of operators, checkout ``Core Tensor Operators`` +""" +from __future__ import absolute_import as _abs +import sys as _sys +import os as _os +import ctypes as _ctypes +from numbers import Number as _Number + +import numpy as np + +from . import _base +from ._base import _LIB, check_call as _check_call, _FFI_MODE, _all_var_init +from .attribute import AttrScope +from . import _symbol_internal as _internal +from . import contrib + +# Use different verison of SymbolBase +# When possible, use cython to speedup part of computation. + +IMPORT_EXCEPT = RuntimeError if _FFI_MODE == "cython" else ImportError + +try: + if _FFI_MODE == "ctypes": + raise ImportError() + if _sys.version_info >= (3, 0): + from ._cy3.symbol import SymbolBase, _init_symbol_module + else: + from ._cy2.symbol import SymbolBase, _init_symbol_module +except IMPORT_EXCEPT: + # pylint: disable=wrong-import-position + from ._ctypes.symbol import SymbolBase, _init_symbol_module + + +class Symbol(SymbolBase): + """Symbol is basic operation unit for symbolic graph composition.""" + # disable dictionary storage, also do not have parent type. + __slots__ = [] + + _tvm_tcode = 16 + + @property + def _tvm_handle(self): + return self.handle.value + + def __add__(self, other): + """x.__add__(y) <=> x+y""" + if isinstance(other, Symbol): + return __add_symbol__(self, other) + if isinstance(other, _Number): + return __add_scalar__(self, scalar=other) + raise TypeError("type %s not supported" % str(type(other))) + + def __radd__(self, other): + return self.__add__(other) + + def __sub__(self, other): + """x.__sub__(y) <=> x-y""" + if isinstance(other, Symbol): + return __sub_symbol__(self, other) + if isinstance(other, _Number): + return __sub_scalar__(self, scalar=other) + raise TypeError('type %s not supported' % str(type(other))) + + def __rsub__(self, other): + if isinstance(other, _Number): + return __rsub_scalar__(self, scalar=other) + raise TypeError('type %s not supported' % str(type(other))) + + def __mul__(self, other): + """x.__mul__(y) <=> x*y""" + if isinstance(other, Symbol): + return __mul_symbol__(self, other) + if isinstance(other, _Number): + return __mul_scalar__(self, scalar=other) + raise TypeError('type %s not supported' % str(type(other))) + + def __rmul__(self, other): + return self.__mul__(other) + + def __div__(self, other): + """x.__div__(y) <=> x/y""" + if isinstance(other, Symbol): + return __div_symbol__(self, other) + if isinstance(other, _Number): + return __div_scalar__(self, scalar=other) + raise TypeError('type %s not supported' % str(type(other))) + + def __rdiv__(self, other): + if isinstance(other, _Number): + return __rdiv_scalar__(self, scalar=other) + raise TypeError('type %s not supported' % str(type(other))) + + def __lshift__(self, other): + """x.__lshift__(y) <=> x << y""" + if isinstance(other, _Number): + return __lshift_scalar__(self, scalar=other) + raise TypeError('type %s not supported' % str(type(other))) + + def __rshift__(self, other): + """x.__rshift__(y) <=> x >> y""" + if isinstance(other, _Number): + return __rshift_scalar__(self, scalar=other) + raise TypeError('type %s not supported' % str(type(other))) + + def __truediv__(self, other): + return self.__div__(other) + + def __rtruediv__(self, other): + return self.__rdiv__(other) + + def __pow__(self, other): + """x.__pow__(y) <=> x**y""" + if isinstance(other, Symbol): + return __pow_symbol__(self, other) + if isinstance(other, _Number): + return __pow_scalar__(self, scalar=other) + raise TypeError('type %s not supported' % str(type(other))) + + def __rpow__(self, other): + if isinstance(other, _Number): + return __rpow_scalar__(self, scalar=other) + raise TypeError('type %s not supported' % str(type(other))) + + def __neg__(self): + """x.__neg__() <=> -x""" + return self.__mul__(-1.0) + + def __copy__(self): + return self.__deepcopy__() + + def __deepcopy__(self, _=None): + """Returns a deep copy of the input object.""" + handle = _base.SymbolHandle() + _base.check_call(_LIB.NNSymbolCopy(self.handle, + _ctypes.byref(handle))) + return Symbol(handle) + + def __getitem__(self, index): + if isinstance(index, _base.string_types): + idx = None + for i, name in enumerate(self.list_output_names()): + if name == index: + if idx is not None: + raise ValueError('There are multiple outputs with name \"%s\"' % index) + idx = i + if idx is None: + raise ValueError('Cannot find output that matches name \"%s\"' % index) + index = idx + if not isinstance(index, int): + raise TypeError('Symbol only support integer index to fetch i-th output') + handle = _base.SymbolHandle() + _check_call(_LIB.NNSymbolGetOutput( + self.handle, _base.nn_uint(index), _ctypes.byref(handle))) + return Symbol(handle=handle) + + def __iter__(self): + return (self[i] for i in self.list_output_names()) + + def attr(self, key): + """Get attribute string from the symbol, this function only works for non-grouped symbol. + + Parameters + ---------- + key : str + The key to get attribute from. + + Returns + ------- + value : str + The attribute value of the key, returns None if attribute do not exist. + """ + ret = _ctypes.c_char_p() + success = _ctypes.c_int() + _check_call(_LIB.NNSymbolGetAttr( + self.handle, _base.c_str(key), _ctypes.byref(ret), _ctypes.byref(success))) + if success.value != 0: + return _base.py_str(ret.value) + return None + + def list_attr(self, recursive=False): + """Get all attributes from the symbol. + + Parameters + ---------- + recursive : bool + Default `False`. When `recursive` is `True`, list recursively all the + attributes in the descendents. The attribute names are pre-pended with + the symbol names to avoid conflicts. If `False`, then only attributes + that belongs to this symbol is returned, and the attribute names will + **not** be pre-pended with the symbol name. + """ + size = _base.nn_uint() + pairs = _ctypes.POINTER(_ctypes.c_char_p)() + option = _ctypes.c_int(0) if recursive else _ctypes.c_int(1) + _check_call(_LIB.NNSymbolListAttrs( + self.handle, option, _ctypes.byref(size), _ctypes.byref(pairs))) + return {_base.py_str(pairs[i*2]): _base.py_str(pairs[i*2+1]) for i in range(size.value)} + + def get_internals(self): + """Get a new grouped symbol whose output contains all the internal outputs of this symbol. + + Returns + ------- + sgroup : Symbol + The internal of the symbol. + """ + handle = _base.SymbolHandle() + _check_call(_LIB.NNSymbolGetInternals( + self.handle, _ctypes.byref(handle))) + return Symbol(handle=handle) + + def get_children(self): + """Gets a new grouped symbol whose output contains + inputs to output nodes of the original symbol.""" + handle = _base.SymbolHandle() + _check_call(_LIB.NNSymbolGetChildren( + self.handle, _ctypes.byref(handle))) + ret = Symbol(handle=handle) + if not ret.list_output_names(): + return None + return ret + + def _get_list_copt(self, option): + """internal function to get list option""" + if option == 'all': + return _ctypes.c_int(0) + if option == 'read_only': + return _ctypes.c_int(1) + if option == 'aux_state': + return _ctypes.c_int(2) + raise ValueError("option need to be in {'all', 'read_only, 'aux_state'}") + + def list_input_variables(self, option='all'): + """List all the input variables in the symbol. + + Parameters + ---------- + option : {'all', 'read_only', 'aux_state'}, optional + The listing option + - 'all' will list all the arguments. + - 'read_only' lists arguments that are readed by the graph. + - 'aux_state' lists arguments that are mutated by the graph as state. + Returns + ------- + vars : list of symbol + List of all the variables + """ + size = _ctypes.c_uint() + sarr = _ctypes.POINTER(_base.SymbolHandle)() + _check_call(_LIB.NNSymbolListInputVariables( + self.handle, self._get_list_copt(option), + _ctypes.byref(size), _ctypes.byref(sarr))) + return [Symbol(_base.SymbolHandle(sarr[i])) for i in range(size.value)] + + def list_input_names(self, option='all'): + """List all the inputs in the symbol. + + Parameters + ---------- + option : {'all', 'read_only', 'aux_state'}, optional + The listing option + - 'all' will list all the arguments. + - 'read_only' lists arguments that are readed by the graph. + - 'aux_state' lists arguments that are mutated by the graph as state. + Returns + ------- + args : list of string + List of all the arguments. + """ + size = _ctypes.c_uint() + sarr = _ctypes.POINTER(_ctypes.c_char_p)() + _check_call(_LIB.NNSymbolListInputNames( + self.handle, self._get_list_copt(option), + _ctypes.byref(size), _ctypes.byref(sarr))) + return [_base.py_str(sarr[i]) for i in range(size.value)] + + def list_output_names(self): + """List all outputs in the symbol. + + Returns + ------- + returns : list of string + List of all the outputs. + """ + size = _ctypes.c_uint() + sarr = _ctypes.POINTER(_ctypes.c_char_p)() + _check_call(_LIB.NNSymbolListOutputNames( + self.handle, _ctypes.byref(size), _ctypes.byref(sarr))) + return [_base.py_str(sarr[i]) for i in range(size.value)] + + def debug_str(self): + """Get a debug string. + + Returns + ------- + debug_str : string + Debug string of the symbol. + """ + debug_str = _ctypes.c_char_p() + _check_call(_LIB.NNSymbolPrint( + self.handle, _ctypes.byref(debug_str))) + return _base.py_str(debug_str.value) + + def _add_control_deps(self, deps): + """Add control flow dependencies. + This makes current op depend on the deps. + Only use when necessary, + this function mutate the current symbol node. + + Returns + ------- + deps : Symbol for list of symbol + The dependencies + """ + if isinstance(deps, list): + deps = Group(deps) + _check_call(_LIB.NNAddControlDeps( + self.handle, deps.handle)) + + +def Variable(name, init=None, **kwargs): + """Create a symbolic variable with specified name. + + Parameters + ---------- + name : str + Name of the variable. + init : Symbol or numpy.ndarray + Symbol or numpy ndarray of initial value for the variable. + Note that for symbolic initialization value, it must be able + to be defined through InferShape, such as sym.zeros_like(v), + in which v is an input or parameter. Otherwise, pass a numpy + ndarray instead. + kwargs : dict of string -> string + Additional attributes to set on the variable. + + Returns + ------- + variable : Symbol + The created variable symbol. + """ + if not isinstance(name, _base.string_types): + raise TypeError('Expect a string for variable `name`') + handle = _base.SymbolHandle() + _base.check_call(_LIB.NNSymbolCreateVariable( + _base.c_str(name), _ctypes.byref(handle))) + ret = Symbol(handle) + attr = AttrScope.current.get(kwargs) + if attr: + ret._set_attr(**attr) + if init is not None: + if not isinstance(init, (Symbol, np.ndarray)): + raise TypeError('Expect a Symbol or numpy ndarray' + 'for variable `init`') + _all_var_init[name] = init + return ret + + +def Group(symbols): + """Create a symbol that groups symbols together. + + Parameters + ---------- + symbols : list + List of symbols to be grouped. + + Returns + ------- + sym : Symbol + The created group symbol. + """ + ihandles = [] + for sym in symbols: + if not isinstance(sym, Symbol): + raise TypeError('Expect Symbols in the list input') + ihandles.append(sym.handle) + handle = _base.SymbolHandle() + _check_call(_LIB.NNSymbolCreateGroup( + _base.nn_uint(len(ihandles)), + _base.c_array(_base.SymbolHandle, ihandles), + _ctypes.byref(handle))) + return Symbol(handle) + +# Set the real symbol class to Symbol +_init_symbol_module(Symbol, "nnvm") diff --git a/nnvm/python/nnvm/testing/__init__.py b/nnvm/python/nnvm/testing/__init__.py new file mode 100644 index 000000000000..506a9e9aa68b --- /dev/null +++ b/nnvm/python/nnvm/testing/__init__.py @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Utilities for testing and benchmarks""" +from __future__ import absolute_import as _abs + +from .config import ctx_list +from .utils import create_workload +from . import mobilenet +from . import mobilenet_v2 +from . import mlp +from . import resnet +from . import vgg +from . import densenet +from . import squeezenet +from . import inception_v3 +from . import dcgan +from . import dqn +from . import check_computation diff --git a/nnvm/python/nnvm/testing/check_computation.py b/nnvm/python/nnvm/testing/check_computation.py new file mode 100644 index 000000000000..63b3a17880a2 --- /dev/null +++ b/nnvm/python/nnvm/testing/check_computation.py @@ -0,0 +1,573 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=cell-var-from-loop,no-else-return +"""Helper utilities to check functions and their gradients.""" +from __future__ import absolute_import as _abs + +import logging +import numpy as np + +import tvm +from tvm.contrib import graph_runtime +from tvm.testing import check_numerical_grads +from tvm import relay + +import nnvm +from nnvm.compiler import graph_util +from nnvm.compiler.graph_attr import TCODE_TO_DTYPE, DTYPE_TO_TCODE +from nnvm.to_relay import to_relay +from .config import ctx_list + +def infer_shapes_dtypes(graph, shape=None, dtype=None, fallback_dtype=None): + """Runs dtype and shape inference passes on a graph and returns the resulting graph + along with the inferred information. + + Parameters + ---------- + graph : nnvm.graph.Graph + A graph we want to run inference on. + + shape : Dict[str, Tuple[int]] or Tuple[int], optional + A dict mapping input variable names to shapes. + By default shapes will be inferred from variables' attributes. + Note that this parameter takes precedence over variables' attributes. + + dtype : Dict[str, str] or str, optional + A dict mapping input variable names to dtypes, or just a single dtype. + By default dtypes will be inferred from variables' attributes. + Note that this parameter takes precedence over variables' attributes. + + fallback_dtype : str, optional + A dtype that will be used for variables whose dtype can't be inferred from other + variables' dtypes. + + Returns + ------- + graph : nnvm.graph.Graph + The resulting graph with dtype and shape information on its nodes. + + input_shapes : Dict[str, Tuple[int]] + The inferred shapes of input variables merged with the `shape` dictionary. + + input_dtypes : Dict[str, str] + The inferred dtypes of input variables merged with the `dtype` dictionary. + + output_shapes : List[Tuple[int]] + The inferred shapes of outputs. + + output_dtypes : List[str] + The inferred dtypes of outputs. + """ + # Preprocess input parameters + if shape is None: + provided_shapes = {} + elif isinstance(shape, dict): + provided_shapes = shape + else: + provided_shapes = {x: shape for x in graph.symbol.list_input_variables()} + + if dtype is None: + provided_dtypes = {} + elif isinstance(dtype, dict): + provided_dtypes = dtype + else: + provided_dtypes = {x: dtype for x in graph.symbol.list_input_variables()} + + provided_shapes = _dict_var_to_dict_str(provided_shapes) + provided_dtypes = _dict_var_to_dict_str(provided_dtypes) + + # The graph may already contain shape and dtype info, so extract it and merge with + # the user-specified shapes and dtypes (use the user-specified one on contradiction) + preexisting_shapes = graph.json_attr('shape') + preexisting_dtypes = graph.json_attr('dtype') + + if preexisting_shapes: + for x in graph.index.input_names: + if x not in provided_shapes: + x_shape = tuple(preexisting_shapes[graph.index.entry_id(x)]) + provided_shapes[x] = x_shape + + if preexisting_dtypes: + for x in graph.index.input_names: + if x not in provided_dtypes: + x_dtype = TCODE_TO_DTYPE[preexisting_dtypes[graph.index.entry_id(x)]] + provided_dtypes[x] = x_dtype + + # Perform inference + nnvm.compiler.graph_attr.set_shape_inputs(graph, provided_shapes) + nnvm.compiler.graph_attr.set_dtype_inputs(graph, provided_dtypes) + + graph = graph.apply('InferShape').apply('InferType') + + inferred_shapes = graph.json_attr('shape') + inferred_dtypes = graph.json_attr('dtype') + + index = graph.index + + output_shapes = [tuple(inferred_shapes[index.entry_id(entry)]) + for entry in index.output_entries] + output_dtypes = [TCODE_TO_DTYPE[inferred_dtypes[index.entry_id(entry)]] + for entry in index.output_entries] + + # Postprocess the results + input_shapes = provided_shapes.copy() + input_dtypes = provided_dtypes.copy() + + for x in graph.symbol.list_input_variables(): + x_name = x.attr('name') + x_entry_id = graph.index.entry_id(x_name) + input_shapes[x_name] = tuple(inferred_shapes[x_entry_id]) + input_dtypes[x_name] = TCODE_TO_DTYPE[inferred_dtypes[x_entry_id]] + + # Merge the original user-specified shapes in case some of them are specified for non-existing + # variables + for x_name, x_shape in provided_shapes.items(): + x_shape = tuple(x_shape) + if input_shapes.get(x_name, x_shape) != x_shape: + raise RuntimeError("Inferred shape differs from the provided shape.\n" + "Provided shapes: {}\nInferred shapes: {}" + .format(provided_shapes, input_shapes)) + else: + input_shapes[x_name] = x_shape + + # Merge the original user-specified dtypes + for x_name, x_dtype in provided_dtypes.items(): + if not isinstance(x_dtype, str): + x_dtype = TCODE_TO_DTYPE[x_dtype] + if input_dtypes.get(x_name, x_dtype) != x_dtype: + raise RuntimeError("Inferred dtype differs from the provided dtype.\n" + "Provided dtypes: {}\nInferred dtypes: {}" + .format(provided_dtypes, input_dtypes)) + else: + input_dtypes[x_name] = x_dtype + + # If some dtypes weren't inferred and there is a fallback dtype, assign it to those varibles + # and repeat the inference + if fallback_dtype is not None and not all(input_dtypes.values()): + input_dtypes = {x: input_dtypes[x] if input_dtypes[x] else fallback_dtype + for x in input_dtypes} + return infer_shapes_dtypes(graph, input_shapes, input_dtypes, fallback_dtype=None) + + return graph, input_shapes, input_dtypes, output_shapes, output_dtypes + +def graph_to_function(graph, target, ctx, shape=None, dtype=None): + """Convert a graph to a function taking a keyword args and returning a list of results + (both args and results are numpy arrays). + + Example:: + + fun = graph_to_function(graph, llvm, cpu(0)) + [res1, res2] = fun(x=np.zeros((1,2)), y=np.zeros((1,))) + + Parameters + ---------- + graph : nnvm.graph.Graph + A graph we want to convert to a function. + + target : str or :any:`tvm.target.Target` + The build target + + ctx : TVMContext + The context to deploy the module. + + shape : Dict[str, Tuple[int]], optional + A dict mapping input variable names to shapes. + By default shapes will be inferred from variables' attributes. + Note that this parameter takes precedence over variables' attributes. + + dtype : Dict[str, str] or str, optional + A dict mapping input variable names to dtypes, or just a single dtype. + By default dtypes will be inferred from variables' attributes. + Note that this parameter takes precedence over variables' attributes. + + Returns + ------- + function : Callable[..., List[numpy.ndarray]] + """ + # Infer missing shapes and dtypes + graph, shape, dtype, output_shapes, output_dtypes = \ + infer_shapes_dtypes(graph, shape=shape, dtype=dtype) + + if None in dtype.values(): + raise ValueError("Input variables with no type: {}".format(dtype)) + + if not all(shape.values()): + raise ValueError("Input variables with no shape: {}".format(shape)) + + compute_graph, lib, params = nnvm.compiler.build(graph, target, shape=shape, dtype=dtype) + module = graph_runtime.create(compute_graph, lib, ctx) + + if params: + module.set_inputs(**params) + + def run(**kwargs): + module.run(**kwargs) + res = [] + for i, (o_shape, o_dtype) in enumerate(zip(output_shapes, output_dtypes)): + res.append(module.get_output(i, tvm.nd.empty(o_shape, o_dtype)).asnumpy()) + return res + + return run + +def _dict_var_to_dict_str(dictionary): + """Convert a Dict[nnvm.Symbol, T] to Dict[str, T]""" + if isinstance(dictionary, dict): + return {s.attr('name') if isinstance(s, nnvm.symbol.Symbol) else s: + dictionary[s] for s in dictionary} + else: + return dictionary + +def check_function(symbol, forward=None, backward=None, grad_input_vars=None, + shape=None, dtype=None, in_range=None, values=None, + exclude_targets=None, only_targets=None, + additional_params=None, + numerical_grads=None, numerical_grads_params=None, + atol=1e-5, rtol=1e-5, quiet=False): + """Compute the function and/or its gradients on a random input and raise + an exception if the result doesn't match the reference implementation. + + Parameters + ---------- + symbol : nnvm.Symbol + A symbol representing the output. + + forward : Callable[..., List[numpy.ndarray]], optional + A reference implementation to compare with. + + backward : Callable[..., List[numpy.ndarray] or Dict[str, numpy.ndarray]], optional + A reference implementation of gradients. Should also accept head_grads besides + normal inputs which is a list of gradients of some scalar wrt the outputs or just a + single gradient if there are multiple outputs. + Should return either a dict mapping input variable names to the respective + gradients or a list of gradients wrt variables from grad_input_vars in + exactly the same order (in alphabetical order by default). + + grad_input_vars : List[nnvm.Symbol or str], optional + A list of variables with respect to which the gradients will be computed. + None (default) means that all input variables will be used in an alphabetical order. + + shape : Dict[nnvm.Symbol or str, Tuple[int]] or Tuple[int], optional + A dict mapping input variable names to shapes, or just a single shape. + By default shapes will be inferred from variables' attributes (see the Examples). + Note that this parameter takes precedence over variables' attributes. + + dtype : Dict[nnvm.Symbol or str, str] or str, optional + A dict mapping input variable names to dtypes, or just a single dtype. + By default dtypes will be inferred from variables' attributes (see the Examples). + If dtypes cannot be inferred for some variables then float32 will be used as a fallback. + Note that this parameter takes precedence over variables' attributes. + + in_range : Dict[nnvm.Symbol or str, (float, float)] or (float, float), optional + A dict mapping input variable names to ranges or just a single range + (the same for all variables). Input values will be generated from + uniform distributions on these ranges. `head_grads` can also be + assigned a range this way. + + values : Dict[nnvm.Symbol or str, numpy.ndarray], optional + A dict explicitly providing values for some variables instead of random generation. + + exclude_targets : Set[str], optional + Skip compiling and running anything for these targets. + + only_targets : Set[str], optional + Test only for those targets from `ctx_list()` that are also in this set. + + additional_params : dict, optional + A dict of additional parameters which will be passed to forward and backward. + + numerical_grads : bool or 'if_possible', optional + Whether to additionally check against numerically computed gradients. If 'if_possible' or + None is passed (which is the default) then it will try to create a gradient computation + graph and then check gradients numerically only if this graph can be created (i.e. if there + are some operations with unimplemented gradients, it will just issue a warning). + Checking against numerical gradients is done via the `check_numerical_grads` function. + + numerical_grads_params : dict, optional + Additional parameters for `check_numerical_grads`. + + atol : float, optional + Absolute tolerance for `tvm.testing.assert_allclose`. NOT used for numerical gradients. + + rtol : float, optional + Relative tolerance for `tvm.testing.assert_allclose`. NOT used for numerical gradients. + + quiet : bool, optional + Don't dump additional information to stdout on failure. + + Examples + -------- + .. code-block:: python + + x = sym.Variable("x", shape=(1, 2)) + y = sym.Variable("y", shape=(1, 2)) + + # check the function and its gradients both numerically and using a reference function + check_function(x + 2*y, + lambda x, y: x + 2*y, + lambda x, y, head_grads: {'x': head_grads, 'y': 2*head_grads}) + + # just check gradients numerically + check_function(x + 2*y, numerical_grads=True) + + # just check the forward computation + check_function(x + 2*y, lambda x, y: x + 2*y, numerical_grads=False) + + # specifying dtype + check_function(x + 2*y, lambda x, y: x + 2*y, dtype='float64') + + # dtypes can also be specified during variable creation with dtype codes + x = sym.Variable("x", dtype=0) + check_function(x + 1, shape=(2, 2), numerical_grads=True) + """ + # validate and preprocess the input params + if numerical_grads is None and forward is None and backward is None: + raise ValueError("No reference function was passed to check_function. If you only want to " + "check gradients numerically, pass numerical_grads=True explicitly.") + + if numerical_grads is None: + numerical_grads = 'if_possible' + + if numerical_grads not in [False, True, 'if_possible']: + raise ValueError("numerical_grads must be a bool or 'if_possible', not {}" + .format(numerical_grads)) + + if additional_params is None: + additional_params = {} + + input_vars = symbol.list_input_variables() + input_dict = {x.attr('name'): x for x in input_vars} + + if grad_input_vars is None: + grad_input_vars = sorted(input_vars, key=lambda x: x.attr('name')) + else: + grad_input_vars = [input_dict[x] if isinstance(x, str) else x for x in grad_input_vars] + + in_range = _dict_var_to_dict_str(in_range) + values = _dict_var_to_dict_str(values) + + out_len = len(symbol.list_output_names()) + + # Infer the output shapes and dtypes, and preprocess the shape and dtype params + forward_graph, shape, dtype, out_shapes, out_dtypes = \ + infer_shapes_dtypes(nnvm.graph.create(symbol), shape=shape, dtype=dtype, + fallback_dtype='float32') + + if not all(out_shapes) or not all(out_dtypes): + if not quiet: + print(forward_graph.ir(join_node_attrs=['shape', 'dtype'])) + raise ValueError("Could not infer shapes or dtypes for outputs.\n" + "out_shapes = {}\nout_dtypes = {}".format(out_shapes, out_dtypes)) + + backward_graph = None + + # If we want gradients, we have to recreate the graph, but now with gradient computations + # Note that here we need out_shapes for defining the shape of head grads, so we have to + # create the graph twice + if backward is not None or numerical_grads: + try: + head_grads_symbols = [nnvm.symbol.Variable("head_grads_" + str(i), + shape=out_shapes[i], + dtype=DTYPE_TO_TCODE[out_dtypes[i]]) + for i in range(out_len)] + grad_symbols = graph_util.gradients([symbol], grad_input_vars, + grad_ys=head_grads_symbols) + # Sometimes grads do not depend on head_grads, so head_grads does not appear + # in the variable list; adding it manually prevents this, making things a bit easier + backward_graph = \ + nnvm.graph.create(nnvm.symbol.Group([symbol] + grad_symbols + head_grads_symbols)) + + backward_graph, shape, dtype, out_shapes, out_dtypes = \ + infer_shapes_dtypes(backward_graph, shape=shape, dtype=dtype, + fallback_dtype='float32') + except nnvm._base.NNVMError as err: + if backward is None and numerical_grads == "if_possible": + logging.warning("Won't check gradients because: %s", str(err).split('\n', 1)[0]) + numerical_grads = False + backward_graph = None + else: + raise + + main_graph = backward_graph if backward_graph is not None else forward_graph + + # Generate random data for inputs (including head_grads) + + np_inputs = {} + + for x in main_graph.symbol.list_input_variables(): + x_name = x.attr('name') + x_shape = shape[x_name] + x_dtype = dtype[x_name] + + if values is not None and x_name in values: + np_inputs[x_name] = values[x_name].astype(x_dtype) + continue + + low = -1.0 + high = 1.0 + if in_range is not None: + if isinstance(in_range, dict): + if x_name in in_range: + low = in_range[x_name][0] + high = in_range[x_name][1] + else: + low = in_range[0] + high = in_range[1] + + np_inputs[x_name] = np.random.uniform(size=x_shape, low=low, high=high).astype(x_dtype) + + np_inputs_without_head_grads = {k: np_inputs[k] for k in np_inputs + if not k.startswith('head_grads_')} + + nothing_was_done = True + + # Compute and compare the results + for target, ctx in ctx_list(): + if exclude_targets is not None: + if target in exclude_targets or str(target) in exclude_targets: + logging.info("Skipping target = %s, ctx = %s", target, ctx) + continue + if only_targets is not None: + if target not in only_targets and str(target) not in only_targets: + logging.info("Skipping target = %s, ctx = %s", target, ctx) + continue + + logging.info("Checking computation on target = %s, ctx = %s", target, ctx) + + debug_stage = None + + try: + nnvm_res = None + + debug_stage = "compiling" + main_function = graph_to_function(main_graph, target, ctx) + + # nnvm_res contains the output and gradients (if they are needed) + debug_stage = "running" + nnvm_res = main_function(**np_inputs) + + try: + logging.debug("checking to_relay conversion") + inputs = np_inputs_without_head_grads.copy() + func, inputs = to_relay(main_graph, shape, dtype, params=inputs) + with relay.build_config(opt_level=3): + graph, lib, params = relay.build(func, target=target) + m = graph_runtime.create(graph, lib, ctx) + m.set_input(**inputs) + m.set_input(**params) + m.run() + for i in range(out_len): + relay_out = m.get_output(i).asnumpy() + tvm.testing.assert_allclose(nnvm_res[i], relay_out, atol=atol, rtol=rtol) + except NotImplementedError as err: + # the NNVM operator is not supported yet + logging.warning(err) + + if backward_graph is not None: + grad_var_names = [x.attr('name') for x in grad_input_vars] + nnvm_grads = {x: v for x, v in zip(grad_var_names, nnvm_res[out_len:])} + + if forward is not None: + nothing_was_done = False + debug_stage = "checking forward computation" + logging.debug(debug_stage) + + params = {} + params.update(np_inputs_without_head_grads) + params.update(additional_params) + numpy_res = forward(**params) + + if isinstance(numpy_res, tuple): + numpy_res = list(numpy_res) + + if not isinstance(numpy_res, list): + numpy_res = [numpy_res] + + if len(numpy_res) != out_len: + raise ValueError("Forward function returned {} values, but " + "the nnvm graph returns {} values" + .format(len(numpy_res), out_len)) + + for i in range(out_len): + tvm.testing.assert_allclose(nnvm_res[i], numpy_res[i], atol=atol, rtol=rtol) + + if backward is not None: + nothing_was_done = False + debug_stage = "checking gradients" + logging.debug(debug_stage) + + np_head_grads = [np_inputs["head_grads_" + str(i)] for i in range(out_len)] + + if out_len == 1: + np_head_grads = np_head_grads[0] + + params = {'head_grads': np_head_grads} + params.update(np_inputs_without_head_grads) + params.update(additional_params) + numpy_grads = backward(**params) + + if not isinstance(numpy_grads, dict): + if isinstance(numpy_grads, tuple): + numpy_grads = list(numpy_grads) + if not isinstance(numpy_grads, list): + numpy_grads = [numpy_grads] + numpy_grads = {x: v for x, v in zip(grad_var_names, numpy_grads)} + if len(numpy_grads) != len(grad_var_names): + raise ValueError("The backward function returns a list of gradients which " + "does not contain gradients for these variables: {}" + .format(set(grad_var_names) - set(numpy_grads))) + + for x_name in numpy_grads: + tvm.testing.assert_allclose(nnvm_grads[x_name], numpy_grads[x_name], + atol=atol, rtol=rtol) + + if numerical_grads: + nothing_was_done = False + debug_stage = "checking gradients numerically" + logging.debug(debug_stage) + + forward_function = graph_to_function(forward_graph, target, ctx) + + # Since the result may be non-scalar, we have to put another operation on the top, + # so we just multiple by the randomly generated head_grads and then sum everything. + # This way we can reuse the gradient values which has been already computed. + def scalar_function(**kwargs): + res = forward_function(**kwargs) + return np.sum([np.dot(np_inputs['head_grads_' + str(i)].ravel(), res[i].ravel()) + for i in range(out_len)]) + + if numerical_grads_params is None: + numerical_grads_params = {} + + check_numerical_grads( + scalar_function, + input_values=np_inputs_without_head_grads, + grad_values=nnvm_grads, + **numerical_grads_params) + + except: + if not quiet: + print("\ncheck_function failed while {}, here is the main graph" + .format(debug_stage)) + print(main_graph.ir(join_node_attrs=['shape', 'dtype'])) + if nnvm_res is not None: + print("Generated inputs:") + print(np_inputs) + print() + raise + + if nothing_was_done: + logging.warning("Nothing was done in check_function. Check ctx_list().") diff --git a/nnvm/python/nnvm/testing/config.py b/nnvm/python/nnvm/testing/config.py new file mode 100644 index 000000000000..175478b6e14a --- /dev/null +++ b/nnvm/python/nnvm/testing/config.py @@ -0,0 +1,30 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Configuration about tests""" +from __future__ import absolute_import as _abs + +import os +import tvm + +def ctx_list(): + """Get context list for testcases""" + device_list = os.environ.get("NNVM_TEST_TARGETS", "") + device_list = (device_list.split(",") if device_list + else ["llvm", "cuda"]) + device_list = set(device_list) + res = [(device, tvm.context(device, 0)) for device in device_list] + return [x for x in res if x[1].exist] diff --git a/nnvm/python/nnvm/testing/dcgan.py b/nnvm/python/nnvm/testing/dcgan.py new file mode 100644 index 000000000000..714b3fbb1301 --- /dev/null +++ b/nnvm/python/nnvm/testing/dcgan.py @@ -0,0 +1,109 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=unused-argument +""" +Symbol of the generator of DCGAN + +Adopted from: +https://github.com/tqchen/mxnet-gan/blob/master/mxgan/generator.py + +Reference: +Radford, Alec, Luke Metz, and Soumith Chintala. +"Unsupervised representation learning with deep convolutional generative adversarial networks." +arXiv preprint arXiv:1511.06434 (2015). +""" +from .. import symbol as sym +from . utils import create_workload + +def deconv2d(data, ishape, oshape, kshape, name, stride=(2, 2)): + """a deconv layer that enlarges the feature map""" + target_shape = (oshape[-2], oshape[-1]) + + pad_y = (kshape[0] - 1) // 2 + pad_x = (kshape[1] - 1) // 2 + adj_y = (target_shape[0] + 2 * pad_y - kshape[0]) % stride[0] + adj_x = (target_shape[1] + 2 * pad_x - kshape[1]) % stride[1] + + net = sym.conv2d_transpose(data, + kernel_size=kshape, + strides=stride, + channels=oshape[0], + padding=(pad_y, pad_x), + output_padding=(adj_y, adj_x), + use_bias=False, + name=name) + return net + +def deconv2d_bn_relu(data, prefix, **kwargs): + """a block of deconv + batch norm + relu""" + eps = 1e-5 + 1e-12 + net = deconv2d(data, name="%s_deconv" % prefix, **kwargs) + net = sym.batch_norm(net, epsilon=eps, name="%s_bn" % prefix) + net = sym.relu(net, name="%s_act" % prefix) + return net + +def get_symbol(oshape, ngf=128, code=None): + """get symbol of dcgan generator""" + assert oshape[-1] == 64, "Only support 64x64 image" + assert oshape[-2] == 64, "Only support 64x64 image" + + code = sym.Variable("data") if code is None else code + net = sym.dense(code, name="g1", units=4*4*ngf*8, use_bias=False) + net = sym.relu(net) + # 4 x 4 + net = sym.reshape(net, shape=(-1, ngf * 8, 4, 4)) + # 8 x 8 + net = deconv2d_bn_relu( + net, ishape=(ngf * 8, 4, 4), oshape=(ngf * 4, 8, 8), kshape=(4, 4), prefix="g2") + # 16x16 + net = deconv2d_bn_relu( + net, ishape=(ngf * 4, 8, 8), oshape=(ngf * 2, 16, 16), kshape=(4, 4), prefix="g3") + # 32x32 + net = deconv2d_bn_relu( + net, ishape=(ngf * 2, 16, 16), oshape=(ngf, 32, 32), kshape=(4, 4), prefix="g4") + # 64x64 + net = deconv2d( + net, ishape=(ngf, 32, 32), oshape=oshape[-3:], kshape=(4, 4), name="g5_deconv") + net = sym.tanh(net) + return net + + +def get_workload(batch_size, oshape=(3, 64, 64), ngf=128, random_len=100, dtype="float32"): + """Get benchmark workload for a DCGAN generator + + Parameters + ---------- + batch_size : int + The batch size used in the model + oshape : tuple, optional + The shape of output image, layout="CHW" + ngf: int, optional + The number of final feature maps in the generator + random_len : int, optional + The length of random input + dtype : str, optional + The data type + + Returns + ------- + net : nnvm.symbol + The computational graph + params : dict of str to NDArray + The parameters. + """ + net = get_symbol(oshape=oshape, ngf=ngf) + return create_workload(net, batch_size, (random_len, ), dtype) diff --git a/nnvm/python/nnvm/testing/densenet.py b/nnvm/python/nnvm/testing/densenet.py new file mode 100644 index 000000000000..92ba2bf46a8f --- /dev/null +++ b/nnvm/python/nnvm/testing/densenet.py @@ -0,0 +1,65 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +DenseNet, load model from gluon model zoo + +Reference: +Huang, Gao, et al. "Densely Connected Convolutional Networks." CVPR 2017 +""" + +from .utils import create_workload +from ..frontend.mxnet import _from_mxnet_impl + +def get_workload(batch_size, num_classes=1000, num_layers=121, dtype="float32"): + """Get benchmark workload for mobilenet + + Parameters + ---------- + batch_size : int + The batch size used in the model + + num_classes : int, optional + Number of classes + + num_layers : int, optional + The number of layers + + dtype : str, optional + The data type + + Returns + ------- + net : nnvm.Symbol + The computational graph + + params : dict of str to NDArray + The parameters. + """ + import mxnet as mx + from mxnet.gluon.model_zoo.vision import get_model + + image_shape = (1, 3, 224, 224) + + block = get_model('densenet%d' % num_layers, classes=num_classes, pretrained=False) + + data = mx.sym.Variable('data') + sym = block(data) + sym = mx.sym.SoftmaxOutput(sym) + + net = _from_mxnet_impl(sym, {}) + + return create_workload(net, batch_size, image_shape[1:], dtype) diff --git a/nnvm/python/nnvm/testing/dqn.py b/nnvm/python/nnvm/testing/dqn.py new file mode 100644 index 000000000000..b04475efa32a --- /dev/null +++ b/nnvm/python/nnvm/testing/dqn.py @@ -0,0 +1,71 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Symbol of Nature DQN + +Reference: +Mnih, Volodymyr, et al. "Human-level control through deep reinforcement learning." +Nature 518.7540 (2015): 529. +""" + +from .. import symbol as sym +from . utils import create_workload + +def get_symbol(num_actions=18): + """get symbol of nature dqn""" + data = sym.Variable(name='data') + net = sym.conv2d(data, kernel_size=(8, 8), strides=(4, 4), padding=(0, 0), + channels=32, name='conv1') + net = sym.relu(net, name='relu1') + net = sym.conv2d(net, kernel_size=(4, 4), strides=(2, 2), padding=(0, 0), + channels=64, name='conv2') + net = sym.relu(net, name='relu2') + net = sym.conv2d(net, kernel_size=(3, 3), strides=(1, 1), padding=(0, 0), + channels=64, name='conv3') + net = sym.relu(net, name='relu3') + net = sym.flatten(net, name='flatten') + net = sym.dense(net, units=512, name='fc4') + net = sym.relu(net, name='relu4') + net = sym.dense(net, units=num_actions, name='fc5') + + return net + + +def get_workload(batch_size, num_actions=18, image_shape=(4, 84, 84), dtype="float32"): + """Get benchmark workload for a Deep Q Network + + Parameters + ---------- + batch_size : int + The batch size used in the model + num_actions : int, optional + Number of actions + image_shape : tuple, optional + The input image shape + dtype : str, optional + The data type + + Returns + ------- + net : nnvm.symbol + The computational graph + params : dict of str to NDArray + The parameters. + """ + net = get_symbol(num_actions=num_actions) + return create_workload(net, batch_size, image_shape, dtype) diff --git a/nnvm/python/nnvm/testing/inception_v3.py b/nnvm/python/nnvm/testing/inception_v3.py new file mode 100644 index 000000000000..e1614d7a9fed --- /dev/null +++ b/nnvm/python/nnvm/testing/inception_v3.py @@ -0,0 +1,270 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +Inception V3, suitable for images with around 299 x 299 + +Reference: +Szegedy, Christian, et al. "Rethinking the Inception Architecture for Computer Vision." +arXiv preprint arXiv:1512.00567 (2015). + +Adopted from https://github.com/apache/incubator-mxnet/blob/ + master/example/image-classification/symbols/inception-v3.py +""" +# pylint: disable=invalid-name,missing-docstring,unused-argument +from .. import symbol as sym +from .utils import create_workload + +def Conv(data, num_filter, kernel=(1, 1), stride=(1, 1), pad=(0, 0), name=None, suffix=''): + conv = sym.conv2d(data=data, channels=num_filter, kernel_size=kernel, + strides=stride, padding=pad, use_bias=False, + name='%s%s_conv2d' % (name, suffix)) + bn = sym.batch_norm(data=conv, name='%s%s_batchnorm' % (name, suffix), epsilon=2e-5) + act = sym.relu(data=bn, name='%s%s_relu' % (name, suffix)) + return act + +def Pooling(data, kernel, stride, pad, pool_type, name): + if pool_type == 'max': + return sym.max_pool2d(data=data, pool_size=kernel, strides=stride, padding=pad, name=name) + if pool_type == 'avg': + return sym.avg_pool2d(data=data, pool_size=kernel, strides=stride, padding=pad, name=name, + count_include_pad=True) + raise ValueError("Invalid pooling type: " + pool_type) + +def Inception7A(data, + num_1x1, + num_3x3_red, num_3x3_1, num_3x3_2, + num_5x5_red, num_5x5, + pool, proj, + name): + tower_1x1 = Conv(data, num_1x1, name=('%s_conv' % name)) + tower_5x5 = Conv(data, num_5x5_red, name=('%s_tower' % name), suffix='_conv') + tower_5x5 = Conv(tower_5x5, num_5x5, kernel=(5, 5), pad=(2, 2), name=('%s_tower' % name), + suffix='_conv_1') + tower_3x3 = Conv(data, num_3x3_red, name=('%s_tower_1' % name), suffix='_conv') + tower_3x3 = Conv(tower_3x3, num_3x3_1, kernel=(3, 3), pad=(1, 1), name=('%s_tower_1' % name), + suffix='_conv_1') + tower_3x3 = Conv(tower_3x3, num_3x3_2, kernel=(3, 3), pad=(1, 1), name=('%s_tower_1' % name), + suffix='_conv_2') + pooling = Pooling(data=data, kernel=(3, 3), stride=(1, 1), pad=(1, 1), pool_type=pool, + name=('%s_pool_%s_pool' % (pool, name))) + + cproj = Conv(pooling, proj, name=('%s_tower_2' % name), suffix='_conv') + concat = sym.concatenate(*[tower_1x1, tower_5x5, tower_3x3, cproj], + name='ch_concat_%s_chconcat' % name) + return concat + +# First Downsample +def Inception7B(data, + num_3x3, + num_d3x3_red, num_d3x3_1, num_d3x3_2, + pool, + name): + tower_3x3 = Conv(data, num_3x3, kernel=(3, 3), pad=(0, 0), stride=(2, 2), + name=('%s_conv' % name)) + tower_d3x3 = Conv(data, num_d3x3_red, name=('%s_tower' % name), suffix='_conv') + tower_d3x3 = Conv(tower_d3x3, num_d3x3_1, kernel=(3, 3), pad=(1, 1), stride=(1, 1), + name=('%s_tower' % name), suffix='_conv_1') + tower_d3x3 = Conv(tower_d3x3, num_d3x3_2, kernel=(3, 3), pad=(0, 0), stride=(2, 2), + name=('%s_tower' % name), suffix='_conv_2') + pooling = Pooling(data=data, kernel=(3, 3), stride=(2, 2), pad=(0, 0), pool_type="max", + name=('max_pool_%s_pool' % name)) + concat = sym.concatenate(*[tower_3x3, tower_d3x3, pooling], name='ch_concat_%s_chconcat' % name) + return concat + +def Inception7C(data, + num_1x1, + num_d7_red, num_d7_1, num_d7_2, + num_q7_red, num_q7_1, num_q7_2, num_q7_3, num_q7_4, + pool, proj, + name): + tower_1x1 = Conv(data=data, num_filter=num_1x1, kernel=(1, 1), name=('%s_conv' % name)) + tower_d7 = Conv(data=data, num_filter=num_d7_red, name=('%s_tower' % name), suffix='_conv') + tower_d7 = Conv(data=tower_d7, num_filter=num_d7_1, kernel=(1, 7), pad=(0, 3), + name=('%s_tower' % name), suffix='_conv_1') + tower_d7 = Conv(data=tower_d7, num_filter=num_d7_2, kernel=(7, 1), pad=(3, 0), + name=('%s_tower' % name), suffix='_conv_2') + tower_q7 = Conv(data=data, num_filter=num_q7_red, name=('%s_tower_1' % name), suffix='_conv') + tower_q7 = Conv(data=tower_q7, num_filter=num_q7_1, kernel=(7, 1), pad=(3, 0), + name=('%s_tower_1' % name), suffix='_conv_1') + tower_q7 = Conv(data=tower_q7, num_filter=num_q7_2, kernel=(1, 7), pad=(0, 3), + name=('%s_tower_1' % name), suffix='_conv_2') + tower_q7 = Conv(data=tower_q7, num_filter=num_q7_3, kernel=(7, 1), pad=(3, 0), + name=('%s_tower_1' % name), suffix='_conv_3') + tower_q7 = Conv(data=tower_q7, num_filter=num_q7_4, kernel=(1, 7), pad=(0, 3), + name=('%s_tower_1' % name), suffix='_conv_4') + pooling = Pooling(data=data, kernel=(3, 3), stride=(1, 1), pad=(1, 1), pool_type=pool, + name=('%s_pool_%s_pool' % (pool, name))) + cproj = Conv(data=pooling, num_filter=proj, kernel=(1, 1), + name=('%s_tower_2' % name), suffix='_conv') + # concat + concat = sym.concatenate(*[tower_1x1, tower_d7, tower_q7, cproj], + name='ch_concat_%s_chconcat' % name) + return concat + +def Inception7D(data, + num_3x3_red, num_3x3, + num_d7_3x3_red, num_d7_1, num_d7_2, num_d7_3x3, + pool, + name): + tower_3x3 = Conv(data=data, num_filter=num_3x3_red, name=('%s_tower' % name), + suffix='_conv') + tower_3x3 = Conv(data=tower_3x3, num_filter=num_3x3, kernel=(3, 3), pad=(0, 0), stride=(2, 2), + name=('%s_tower' % name), suffix='_conv_1') + tower_d7_3x3 = Conv(data=data, num_filter=num_d7_3x3_red, name=('%s_tower_1' % name), + suffix='_conv') + tower_d7_3x3 = Conv(data=tower_d7_3x3, num_filter=num_d7_1, kernel=(1, 7), pad=(0, 3), + name=('%s_tower_1' % name), suffix='_conv_1') + tower_d7_3x3 = Conv(data=tower_d7_3x3, num_filter=num_d7_2, kernel=(7, 1), pad=(3, 0), + name=('%s_tower_1' % name), suffix='_conv_2') + tower_d7_3x3 = Conv(data=tower_d7_3x3, num_filter=num_d7_3x3, kernel=(3, 3), stride=(2, 2), + name=('%s_tower_1' % name), suffix='_conv_3') + pooling = Pooling(data=data, kernel=(3, 3), stride=(2, 2), pool_type=pool, pad=(0, 0), + name=('%s_pool_%s_pool' % (pool, name))) + # concat + concat = sym.concatenate(*[tower_3x3, tower_d7_3x3, pooling], + name='ch_concat_%s_chconcat' % name) + return concat + +def Inception7E(data, + num_1x1, + num_d3_red, num_d3_1, num_d3_2, + num_3x3_d3_red, num_3x3, num_3x3_d3_1, num_3x3_d3_2, + pool, proj, + name): + tower_1x1 = Conv(data=data, num_filter=num_1x1, kernel=(1, 1), name=('%s_conv' % name)) + tower_d3 = Conv(data=data, num_filter=num_d3_red, name=('%s_tower' % name), suffix='_conv') + tower_d3_a = Conv(data=tower_d3, num_filter=num_d3_1, kernel=(1, 3), pad=(0, 1), + name=('%s_tower' % name), suffix='_mixed_conv') + tower_d3_b = Conv(data=tower_d3, num_filter=num_d3_2, kernel=(3, 1), pad=(1, 0), + name=('%s_tower' % name), suffix='_mixed_conv_1') + tower_3x3_d3 = Conv(data=data, num_filter=num_3x3_d3_red, name=('%s_tower_1' % name), + suffix='_conv') + tower_3x3_d3 = Conv(data=tower_3x3_d3, num_filter=num_3x3, kernel=(3, 3), pad=(1, 1), + name=('%s_tower_1' % name), suffix='_conv_1') + tower_3x3_d3_a = Conv(data=tower_3x3_d3, num_filter=num_3x3_d3_1, kernel=(1, 3), pad=(0, 1), + name=('%s_tower_1' % name), suffix='_mixed_conv') + tower_3x3_d3_b = Conv(data=tower_3x3_d3, num_filter=num_3x3_d3_2, kernel=(3, 1), pad=(1, 0), + name=('%s_tower_1' % name), suffix='_mixed_conv_1') + pooling = Pooling(data=data, kernel=(3, 3), stride=(1, 1), pad=(1, 1), pool_type=pool, + name=('%s_pool_%s_pool' % (pool, name))) + cproj = Conv(data=pooling, num_filter=proj, kernel=(1, 1), name=('%s_tower_2' % name), + suffix='_conv') + # concat + concat = sym.concatenate( + *[tower_1x1, tower_d3_a, tower_d3_b, tower_3x3_d3_a, tower_3x3_d3_b, cproj], + name='ch_concat_%s_chconcat' % name) + return concat + + +def get_symbol(num_classes=1000, **kwargs): + data = sym.Variable(name="data") + # stage 1 + conv = Conv(data, 32, kernel=(3, 3), stride=(2, 2), name="conv") + conv_1 = Conv(conv, 32, kernel=(3, 3), name="conv_1") + conv_2 = Conv(conv_1, 64, kernel=(3, 3), pad=(1, 1), name="conv_2") + pool = Pooling(data=conv_2, kernel=(3, 3), stride=(2, 2), pool_type="max", pad=(0, 0), + name="pool") + # stage 2 + conv_3 = Conv(pool, 80, kernel=(1, 1), name="conv_3") + conv_4 = Conv(conv_3, 192, kernel=(3, 3), name="conv_4") + pool1 = Pooling(data=conv_4, kernel=(3, 3), stride=(2, 2), pool_type="max", pad=(0, 0), + name="pool1") + + # stage 3 + in3a = Inception7A(pool1, 64, + 64, 96, 96, + 48, 64, + "avg", 32, "mixed") + in3b = Inception7A(in3a, 64, + 64, 96, 96, + 48, 64, + "avg", 64, "mixed_1") + in3c = Inception7A(in3b, 64, + 64, 96, 96, + 48, 64, + "avg", 64, "mixed_2") + in3d = Inception7B(in3c, 384, + 64, 96, 96, + "max", "mixed_3") + # stage 4 + in4a = Inception7C(in3d, 192, + 128, 128, 192, + 128, 128, 128, 128, 192, + "avg", 192, "mixed_4") + in4b = Inception7C(in4a, 192, + 160, 160, 192, + 160, 160, 160, 160, 192, + "avg", 192, "mixed_5") + in4c = Inception7C(in4b, 192, + 160, 160, 192, + 160, 160, 160, 160, 192, + "avg", 192, "mixed_6") + in4d = Inception7C(in4c, 192, + 192, 192, 192, + 192, 192, 192, 192, 192, + "avg", 192, "mixed_7") + in4e = Inception7D(in4d, 192, 320, + 192, 192, 192, 192, + "max", "mixed_8") + # stage 5 + in5a = Inception7E(in4e, 320, + 384, 384, 384, + 448, 384, 384, 384, + "avg", 192, "mixed_9") + in5b = Inception7E(in5a, 320, + 384, 384, 384, + 448, 384, 384, 384, + "max", 192, "mixed_10") + # pool + pool = Pooling(data=in5b, kernel=(8, 8), stride=(1, 1), pool_type="avg", pad=(0, 0), + name="global_pool") + flatten = sym.flatten(data=pool, name="flatten") + fc1 = sym.dense(data=flatten, units=num_classes, name='fc1') + softmax = sym.softmax(data=fc1, name='softmax') + return softmax + +def get_workload(batch_size=1, num_classes=1000, + image_shape=(3, 299, 299), dtype="float32", **kwargs): + """Get benchmark workload for InceptionV3 + + Parameters + ---------- + batch_size : int + The batch size used in the model + + num_classes : int, optional + Number of classes + + image_shape : tuple, optional + The input image shape + + dtype : str, optional + The data type + + kwargs : dict + Extra arguments + + Returns + ------- + net : nnvm.Symbol + The computational graph + + params : dict of str to NDArray + The parameters. + """ + net = get_symbol(num_classes=num_classes, **kwargs) + return create_workload(net, batch_size, image_shape, dtype) diff --git a/nnvm/python/nnvm/testing/init.py b/nnvm/python/nnvm/testing/init.py new file mode 100644 index 000000000000..611c81e69483 --- /dev/null +++ b/nnvm/python/nnvm/testing/init.py @@ -0,0 +1,125 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Initializer of parameters.""" +import numpy as np + +class Initializer(object): + """The base class of an initializer.""" + def __init__(self, **kwargs): + self._kwargs = kwargs + + def __call__(self, desc, arr): + """Initialize an array + + Parameters + ---------- + desc : str + Initialization pattern descriptor. + + arr : NDArray + The array to be initialized. + """ + if desc.endswith('weight'): + self._init_weight(desc, arr) + elif desc.endswith('bias'): + self._init_bias(desc, arr) + elif desc.endswith('gamma'): + self._init_gamma(desc, arr) + elif desc.endswith('beta'): + self._init_beta(desc, arr) + elif desc.endswith('mean'): + self._init_mean(desc, arr) + elif desc.endswith('var'): + self._init_var(desc, arr) + else: + self._init_default(desc, arr) + + def _init_bias(self, _, arr): + arr[:] = 0.0 + + def _init_gamma(self, _, arr): + arr[:] = 1.0 + + def _init_beta(self, _, arr): + arr[:] = 0.0 + + def _init_mean(self, _, arr): + arr[:] = 0.0 + + def _init_var(self, _, arr): + arr[:] = 1.0 + + def _init_weight(self, name, arr): + """Abstract method to Initialize weight.""" + raise NotImplementedError("Must override it") + + def _init_default(self, name, _): + raise ValueError( + 'Unknown initialization pattern for %s. ' \ + 'Default initialization is now limited to '\ + '"weight", "bias", "gamma" (1.0), and "beta" (0.0).' \ + 'Please use mx.sym.Variable(init=mx.init.*) to set initialization pattern' % name) + + +class Xavier(Initializer): + """ "Xavier" initialization for weights + + Parameters + ---------- + rnd_type: str, optional + Random generator type, can be ``'gaussian'`` or ``'uniform'``. + + factor_type: str, optional + Can be ``'avg'``, ``'in'``, or ``'out'``. + + magnitude: float, optional + Scale of random number. + """ + def __init__(self, rnd_type="uniform", factor_type="avg", magnitude=3): + super(Xavier, self).__init__(rnd_type=rnd_type, + factor_type=factor_type, + magnitude=magnitude) + self.rnd_type = rnd_type + self.factor_type = factor_type + self.magnitude = float(magnitude) + + def _init_weight(self, name, arr): + shape = arr.shape + hw_scale = 1. + if len(shape) < 2: + raise ValueError('Xavier initializer cannot be applied to vector {0}. It requires at' + ' least 2D.'.format(name)) + if len(shape) > 2: + hw_scale = np.prod(shape[2:]) + fan_in, fan_out = shape[1] * hw_scale, shape[0] * hw_scale + factor = 1. + if self.factor_type == "avg": + factor = (fan_in + fan_out) / 2.0 + elif self.factor_type == "in": + factor = fan_in + elif self.factor_type == "out": + factor = fan_out + else: + raise ValueError("Incorrect factor type") + # Hack for mobilenet, because there is less connectivity + if "depthwise" in name: + factor = 3 * 3 + scale = np.sqrt(self.magnitude / factor) + if self.rnd_type == "uniform": + arr[:] = np.random.uniform(-scale, scale, size=arr.shape) + else: + raise ValueError("Unknown random type") diff --git a/nnvm/python/nnvm/testing/mlp.py b/nnvm/python/nnvm/testing/mlp.py new file mode 100644 index 000000000000..1b6975661fe4 --- /dev/null +++ b/nnvm/python/nnvm/testing/mlp.py @@ -0,0 +1,60 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +a simple multilayer perceptron +""" +from .. import symbol as sym +from . utils import create_workload + +def get_symbol(num_classes=1000): + data = sym.Variable('data') + data = sym.flatten(data=data) + fc1 = sym.dense(data=data, name='fc1', units=128) + act1 = sym.relu(data=fc1, name='relu1') + fc2 = sym.dense(data=act1, name='fc2', units=64) + act2 = sym.relu(data=fc2, name='relu2') + fc3 = sym.dense(data=act2, name='fc3', units=num_classes) + mlp = sym.softmax(data=fc3, name='softmax') + return mlp + +def get_workload(batch_size, num_classes=1000, image_shape=(3, 224, 224), dtype="float32"): + """Get benchmark workload for a simple multilayer perceptron + + Parameters + ---------- + batch_size : int + The batch size used in the model + + num_classes : int, optional + Number of claseses + + image_shape : tuple, optional + The input image shape + + dtype : str, optional + The data type + + Returns + ------- + net : nnvm.symbol + The computational graph + + params : dict of str to NDArray + The parameters. + """ + net = get_symbol(num_classes=num_classes) + return create_workload(net, batch_size, image_shape, dtype) diff --git a/nnvm/python/nnvm/testing/mobilenet.py b/nnvm/python/nnvm/testing/mobilenet.py new file mode 100644 index 000000000000..e505ff499a54 --- /dev/null +++ b/nnvm/python/nnvm/testing/mobilenet.py @@ -0,0 +1,122 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Helper utility to get mobilenet workload for testing.""" +# pylint: disable=invalid-name +from __future__ import absolute_import as _abs + +from .. import symbol as sym +from . utils import create_workload + +def conv_block(data, name, channels, + kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), + epsilon=1e-5): + """Helper function to construct conv-bn-relu""" + # convolution + bn + relu + conv = sym.conv2d(data=data, channels=channels, + kernel_size=kernel_size, strides=strides, + padding=padding, use_bias=False, + layout="NCHW", name=name + "_conv") + bn = sym.batch_norm(data=conv, epsilon=epsilon, name=name + "_bn") + act = sym.relu(data=bn, name=name + "_relu") + return act + +def separable_conv_block(data, name, depthwise_channels, + pointwise_channels, kernel_size=(3, 3), + downsample=False, padding=(1, 1), + epsilon=1e-5): + """Helper function to get a separable conv block""" + if downsample: + strides = (2, 2) + else: + strides = (1, 1) + # depthwise convolution + bn + relu + conv1 = sym.conv2d(data=data, channels=depthwise_channels, + groups=depthwise_channels, kernel_size=kernel_size, strides=strides, + padding=padding, use_bias=False, layout="NCHW", + name=name + "_depthwise_conv1") + bn1 = sym.batch_norm(data=conv1, epsilon=epsilon, name=name + "_bn1") + act1 = sym.relu(data=bn1, name=name + "_relu1") + # pointwise convolution + bn + relu + conv2 = sym.conv2d(data=act1, channels=pointwise_channels, kernel_size=(1, 1), strides=(1, 1), + padding=(0, 0), use_bias=False, layout="NCHW", name=name + "_conv2") + bn2 = sym.batch_norm(data=conv2, epsilon=epsilon, name=name + "_bn2") + act2 = sym.relu(data=bn2, name=name + "_relu2") + return act2 + +def mobile_net(num_classes=1000, alpha=1.0, is_shallow=False): + """Function to construct a MobileNet""" + data = sym.Variable("data") + body = conv_block(data, "conv_block_1", int(32*alpha), strides=(2, 2)) + body = separable_conv_block(body, "separable_conv_block_1", + int(32*alpha), int(64*alpha)) + body = separable_conv_block(body, "separable_conv_block_2", + int(64*alpha), int(128*alpha), downsample=True) + body = separable_conv_block(body, "separable_conv_block_3", + int(128*alpha), int(128*alpha)) + body = separable_conv_block(body, "separable_conv_block_4", + int(128*alpha), int(256*alpha), downsample=True) + body = separable_conv_block(body, "separable_conv_block_5", + int(256*alpha), int(256*alpha)) + body = separable_conv_block(body, "separable_conv_block_6", + int(256*alpha), int(512*alpha), downsample=True) + if is_shallow: + body = separable_conv_block(body, "separable_conv_block_7", + int(512*alpha), int(1024*alpha), downsample=True) + body = separable_conv_block(body, "separable_conv_block_8", + int(1024*alpha), int(1024*alpha)) + else: + for i in range(7, 12): + body = separable_conv_block(body, "separable_conv_block_%d" % i, + int(512*alpha), int(512*alpha)) + body = separable_conv_block(body, "separable_conv_block_12", + int(512*alpha), int(1024*alpha), downsample=True) + body = separable_conv_block(body, "separable_conv_block_13", + int(1024*alpha), int(1024*alpha)) + pool = sym.global_avg_pool2d(data=body, name="pool") + flatten = sym.flatten(data=pool, name="flatten") + fc = sym.dense(data=flatten, units=num_classes, use_bias=False, name="fc") + softmax = sym.softmax(data=fc, name="softmax") + return softmax + + +def get_workload(batch_size, num_classes=1000, image_shape=(3, 224, 224), dtype="float32"): + """Get benchmark workload for mobilenet + + Parameters + ---------- + batch_size : int + The batch size used in the model + + num_classes : int, optional + Number of classes + + image_shape : tuple, optional + The input image shape + + dtype : str, optional + The data type + + Returns + ------- + net : nnvm.Symbol + The computational graph + + params : dict of str to NDArray + The parameters. + """ + net = mobile_net(num_classes=num_classes, alpha=1.0, is_shallow=False) + return create_workload(net, batch_size, image_shape, dtype) diff --git a/nnvm/python/nnvm/testing/mobilenet_v2.py b/nnvm/python/nnvm/testing/mobilenet_v2.py new file mode 100644 index 000000000000..87c4a2c7e9f5 --- /dev/null +++ b/nnvm/python/nnvm/testing/mobilenet_v2.py @@ -0,0 +1,67 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +MobileNetV2, load model from gluon model zoo + +Reference: +Inverted Residuals and Linear Bottlenecks: +Mobile Networks for Classification, Detection and Segmentation +https://arxiv.org/abs/1801.04381 +""" + +from .utils import create_workload +from ..frontend.mxnet import _from_mxnet_impl + +def get_workload(batch_size, num_classes=1000, multiplier=1.0, dtype="float32"): + """Get benchmark workload for mobilenet + + Parameters + ---------- + batch_size : int + The batch size used in the model + + num_classes : int, optional + Number of classes + + multiplier : tuple, optional + The input image shape + + dtype : str, optional + The data type + + Returns + ------- + net : nnvm.Symbol + The computational graph + + params : dict of str to NDArray + The parameters. + """ + import mxnet as mx + from mxnet.gluon.model_zoo.vision.mobilenet import MobileNetV2 + + image_shape = (1, 3, 224, 224) + + block = MobileNetV2(multiplier=multiplier, classes=num_classes) + + data = mx.sym.Variable('data') + sym = block(data) + sym = mx.sym.SoftmaxOutput(sym) + + net = _from_mxnet_impl(sym, {}) + + return create_workload(net, batch_size, image_shape[1:], dtype) diff --git a/nnvm/python/nnvm/testing/resnet.py b/nnvm/python/nnvm/testing/resnet.py new file mode 100644 index 000000000000..e63ceff7c3f0 --- /dev/null +++ b/nnvm/python/nnvm/testing/resnet.py @@ -0,0 +1,224 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +''' +Adapted from https://github.com/tornadomeet/ResNet/blob/master/symbol_resnet.py +Original author Wei Wu + +Implemented the following paper: + +Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun. "Identity Mappings in Deep Residual Networks" +''' +# pylint: disable=unused-argument +from .. import symbol as sym +from . utils import create_workload + +def residual_unit(data, num_filter, stride, dim_match, name, bottle_neck=True): + """Return ResNet Unit symbol for building ResNet + Parameters + ---------- + data : str + Input data + num_filter : int + Number of output channels + bnf : int + Bottle neck channels factor with regard to num_filter + stride : tuple + Stride used in convolution + dim_match : Boolean + True means channel number between input and output is the same, + otherwise means differ + name : str + Base name of the operators + """ + if bottle_neck: + bn1 = sym.batch_norm(data=data, epsilon=2e-5, name=name + '_bn1') + act1 = sym.relu(data=bn1, name=name + '_relu1') + conv1 = sym.conv2d( + data=act1, channels=int(num_filter*0.25), kernel_size=(1, 1), + strides=stride, padding=(0, 0), use_bias=False, name=name + '_conv1') + bn2 = sym.batch_norm(data=conv1, epsilon=2e-5, name=name + '_bn2') + act2 = sym.relu(data=bn2, name=name + '_relu2') + conv2 = sym.conv2d( + data=act2, channels=int(num_filter*0.25), kernel_size=(3, 3), + strides=(1, 1), padding=(1, 1), use_bias=False, name=name + '_conv2') + bn3 = sym.batch_norm(data=conv2, epsilon=2e-5, name=name + '_bn3') + act3 = sym.relu(data=bn3, name=name + '_relu3') + conv3 = sym.conv2d( + data=act3, channels=num_filter, kernel_size=(1, 1), + strides=(1, 1), padding=(0, 0), use_bias=False, name=name + '_conv3') + if dim_match: + shortcut = data + else: + shortcut = sym.conv2d( + data=act1, channels=num_filter, kernel_size=(1, 1), + strides=stride, use_bias=False, name=name+'_sc') + return sym.elemwise_add(conv3, shortcut) + else: + bn1 = sym.batch_norm(data=data, epsilon=2e-5, name=name + '_bn1') + act1 = sym.relu(data=bn1, name=name + '_relu1') + conv1 = sym.conv2d( + data=act1, channels=num_filter, kernel_size=(3, 3), + strides=stride, padding=(1, 1), use_bias=False, name=name + '_conv1') + bn2 = sym.batch_norm(data=conv1, epsilon=2e-5, name=name + '_bn2') + act2 = sym.relu(data=bn2, name=name + '_relu2') + conv2 = sym.conv2d( + data=act2, channels=num_filter, kernel_size=(3, 3), + strides=(1, 1), padding=(1, 1), use_bias=False, name=name + '_conv2') + if dim_match: + shortcut = data + else: + shortcut = sym.conv2d( + data=act1, channels=num_filter, kernel_size=(1, 1), + strides=stride, use_bias=False, name=name+'_sc') + return sym.elemwise_add(conv2, shortcut) + +def resnet(units, num_stages, filter_list, num_classes, image_shape, + bottle_neck=True): + """Return ResNet symbol of + Parameters + ---------- + units : list + Number of units in each stage + num_stages : int + Number of stage + filter_list : list + Channel size of each stage + num_classes : int + Ouput size of symbol + dataset : str + Dataset type, only cifar10 and imagenet supports + """ + num_unit = len(units) + assert num_unit == num_stages + data = sym.Variable(name='data') + data = sym.batch_norm(data=data, epsilon=2e-5, scale=False, name='bn_data') + (_, height, _) = image_shape + if height <= 32: # such as cifar10 + body = sym.conv2d( + data=data, channels=filter_list[0], kernel_size=(3, 3), + strides=(1, 1), padding=(1, 1), use_bias=False, name="conv0") + else: # often expected to be 224 such as imagenet + body = sym.conv2d( + data=data, channels=filter_list[0], kernel_size=(7, 7), + strides=(2, 2), padding=(3, 3), use_bias=False, name="conv0") + body = sym.batch_norm(data=body, epsilon=2e-5, name='bn0') + body = sym.relu(data=body, name='relu0') + body = sym.max_pool2d(data=body, pool_size=(3, 3), strides=(2, 2), padding=(1, 1)) + + for i in range(num_stages): + body = residual_unit( + body, filter_list[i+1], (1 if i == 0 else 2, 1 if i == 0 else 2), + False, name='stage%d_unit%d' % (i + 1, 1), bottle_neck=bottle_neck) + for j in range(units[i]-1): + body = residual_unit( + body, filter_list[i+1], (1, 1), True, + name='stage%d_unit%d' % (i + 1, j + 2), bottle_neck=bottle_neck) + bn1 = sym.batch_norm(data=body, epsilon=2e-5, name='bn1') + relu1 = sym.relu(data=bn1, name='relu1') + # Although kernel is not used here when global_pool=True, we should put one + pool1 = sym.global_avg_pool2d(data=relu1, name='pool1') + flat = sym.flatten(data=pool1) + fc1 = sym.dense(data=flat, units=num_classes, name='fc1') + return sym.softmax(data=fc1, name='softmax') + +def get_symbol(num_classes, num_layers=50, image_shape=(3, 224, 224), **kwargs): + """ + Adapted from https://github.com/tornadomeet/ResNet/blob/master/train_resnet.py + Original author Wei Wu + """ + (_, height, _) = image_shape + if height <= 28: + num_stages = 3 + if (num_layers-2) % 9 == 0 and num_layers >= 164: + per_unit = [(num_layers-2)//9] + filter_list = [16, 64, 128, 256] + bottle_neck = True + elif (num_layers-2) % 6 == 0 and num_layers < 164: + per_unit = [(num_layers-2)//6] + filter_list = [16, 16, 32, 64] + bottle_neck = False + else: + raise ValueError("no experiments done on num_layers {}".format(num_layers)) + units = per_unit * num_stages + else: + if num_layers >= 50: + filter_list = [64, 256, 512, 1024, 2048] + bottle_neck = True + else: + filter_list = [64, 64, 128, 256, 512] + bottle_neck = False + num_stages = 4 + if num_layers == 18: + units = [2, 2, 2, 2] + elif num_layers == 34: + units = [3, 4, 6, 3] + elif num_layers == 50: + units = [3, 4, 6, 3] + elif num_layers == 101: + units = [3, 4, 23, 3] + elif num_layers == 152: + units = [3, 8, 36, 3] + elif num_layers == 200: + units = [3, 24, 36, 3] + elif num_layers == 269: + units = [3, 30, 48, 8] + else: + raise ValueError("no experiments done on num_layers {}".format(num_layers)) + + return resnet(units=units, + num_stages=num_stages, + filter_list=filter_list, + num_classes=num_classes, + image_shape=image_shape, + bottle_neck=bottle_neck) + +def get_workload(batch_size=1, num_classes=1000, num_layers=18, + image_shape=(3, 224, 224), dtype="float32", **kwargs): + """Get benchmark workload for resnet + + Parameters + ---------- + batch_size : int + The batch size used in the model + + num_classes : int, optional + Number of classes + + num_layers : int, optional + Number of layers + + image_shape : tuple, optional + The input image shape + + dtype : str, optional + The data type + + kwargs : dict + Extra arguments + + Returns + ------- + net : nnvm.Symbol + The computational graph + + params : dict of str to NDArray + The parameters. + """ + net = get_symbol(num_classes=num_classes, num_layers=num_layers, + image_shape=image_shape, **kwargs) + return create_workload(net, batch_size, image_shape, dtype) diff --git a/nnvm/python/nnvm/testing/squeezenet.py b/nnvm/python/nnvm/testing/squeezenet.py new file mode 100644 index 000000000000..eab2cf06fee6 --- /dev/null +++ b/nnvm/python/nnvm/testing/squeezenet.py @@ -0,0 +1,132 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# coding: utf-8 +# pylint: disable=unused-argument + +""" +Symbol of SqueezeNet + +Reference: +Iandola, Forrest N., et al. +"Squeezenet: Alexnet-level accuracy with 50x fewer parameters and< 0.5 mb model size." (2016). +""" + +from .. import symbol as sym +from . utils import create_workload + +# Helpers +def _make_fire(net, squeeze_channels, expand1x1_channels, expand3x3_channels): + net = _make_fire_conv(net, squeeze_channels, 1, 0) + + left = _make_fire_conv(net, expand1x1_channels, 1, 0) + right = _make_fire_conv(net, expand3x3_channels, 3, 1) + # NOTE : Assume NCHW layout here + net = sym.concatenate(left, right, axis=1) + + return net + +def _make_fire_conv(net, channels, kernel_size, padding=0): + net = sym.conv2d(net, channels=channels, kernel_size=(kernel_size, kernel_size), + padding=(padding, padding)) + net = sym.relu(net) + return net + +# Net +def get_symbol(num_classes, version, **kwargs): + """Get symbol of SqueezeNet + + Parameters + ---------- + num_classes: int + The number of classification results + + version : str, optional + "1.0" or "1.1" of SqueezeNet + """ + assert version in ['1.0', '1.1'], ("Unsupported SqueezeNet version {version}:" + "1.0 or 1.1 expected".format(version=version)) + net = sym.Variable("data") + if version == '1.0': + net = sym.conv2d(net, channels=96, kernel_size=(7, 7), strides=(2, 2), padding=(3, 3)) + net = sym.relu(net) + net = sym.max_pool2d(net, pool_size=(3, 3), strides=(2, 2)) + net = _make_fire(net, 16, 64, 64) + net = _make_fire(net, 16, 64, 64) + net = _make_fire(net, 32, 128, 128) + net = sym.max_pool2d(net, pool_size=(3, 3), strides=(2, 2)) + net = _make_fire(net, 32, 128, 128) + net = _make_fire(net, 48, 192, 192) + net = _make_fire(net, 48, 192, 192) + net = _make_fire(net, 64, 256, 256) + net = sym.max_pool2d(net, pool_size=(3, 3), strides=(2, 2)) + net = _make_fire(net, 64, 256, 256) + else: + net = sym.conv2d(net, channels=64, kernel_size=(3, 3), strides=(2, 2), padding=(1, 1)) + net = sym.relu(net) + net = sym.max_pool2d(net, pool_size=(3, 3), strides=(2, 2)) + net = _make_fire(net, 16, 64, 64) + net = _make_fire(net, 16, 64, 64) + net = sym.max_pool2d(net, pool_size=(3, 3), strides=(2, 2)) + net = _make_fire(net, 32, 128, 128) + net = _make_fire(net, 32, 128, 128) + net = sym.max_pool2d(net, pool_size=(3, 3), strides=(2, 2)) + net = _make_fire(net, 48, 192, 192) + net = _make_fire(net, 48, 192, 192) + net = _make_fire(net, 64, 256, 256) + net = _make_fire(net, 64, 256, 256) + net = sym.dropout(net, rate=0.5) + net = sym.conv2d(net, channels=num_classes, kernel_size=(1, 1)) + net = sym.relu(net) + net = sym.global_avg_pool2d(net) + net = sym.flatten(net) + return sym.softmax(net) + +def get_workload(batch_size=1, num_classes=1000, version='1.0', + image_shape=(3, 224, 224), dtype="float32", **kwargs): + """Get benchmark workload for SqueezeNet + + Parameters + ---------- + batch_size : int + The batch size used in the model + + num_classes : int, optional + Number of classes + + version : str, optional + "1.0" or "1.1" of SqueezeNet + + image_shape : tuple, optional + The input image shape + + dtype : str, optional + The data type + + kwargs : dict + Extra arguments + + Returns + ------- + net : nnvm.Symbol + The computational graph + + params : dict of str to NDArray + The parameters. + """ + net = get_symbol(num_classes=num_classes, version=version, **kwargs) + return create_workload(net, batch_size, image_shape, dtype) diff --git a/nnvm/python/nnvm/testing/utils.py b/nnvm/python/nnvm/testing/utils.py new file mode 100644 index 000000000000..0bffc81a0663 --- /dev/null +++ b/nnvm/python/nnvm/testing/utils.py @@ -0,0 +1,73 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Helper utility to create common workload for testing.""" +from __future__ import absolute_import as _abs + +import numpy as np +import tvm +from ..compiler import graph_util +from ..import graph +from . init import Xavier + +def create_workload(net, batch_size, image_shape=(3, 224, 224), + dtype="float32", initializer=None, seed=0): + """Helper function to create benchmark workload for input network + + Parameters + ---------- + net : nnvm.Symbol + The selected network symbol to use + + batch_size : int + The batch size used in the model + + image_shape : tuple, optional + The input image shape + + dtype : str, optional + The data type + + initializer : Initializer + The initializer used + + seed : int + The seed used in initialization. + + Returns + ------- + net : nnvm.Symbol + The computational graph + + params : dict of str to NDArray + The parameters. + """ + if image_shape is None: + image_shape = (3, 224, 224) + data_shape = (batch_size,) + image_shape + params = {} + g = graph.create(net) + input_shapes, _ = graph_util.infer_shape(g, data=data_shape) + shape_dict = dict(zip(g.index.input_names, input_shapes)) + np.random.seed(seed) + initializer = initializer if initializer else Xavier() + for k, v in shape_dict.items(): + if k == "data": + continue + init_value = np.zeros(v).astype(dtype) + initializer(k, init_value) + params[k] = tvm.nd.array(init_value, ctx=tvm.cpu(0)) + return net, params diff --git a/nnvm/python/nnvm/testing/vgg.py b/nnvm/python/nnvm/testing/vgg.py new file mode 100644 index 000000000000..2c290bdc3c68 --- /dev/null +++ b/nnvm/python/nnvm/testing/vgg.py @@ -0,0 +1,107 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""References: + +Simonyan, Karen, and Andrew Zisserman. "Very deep convolutional networks for +large-scale image recognition." arXiv preprint arXiv:1409.1556 (2014). +""" +from .. import symbol as sym +from . utils import create_workload + +def get_feature(internel_layer, layers, filters, batch_norm=False): + """Get VGG feature body as stacks of convoltions.""" + for i, num in enumerate(layers): + for j in range(num): + internel_layer = sym.conv2d( + data=internel_layer, kernel_size=(3, 3), padding=(1, 1), + channels=filters[i], name="conv%s_%s"%(i + 1, j + 1)) + if batch_norm: + internel_layer = sym.batch_norm( + data=internel_layer, name="bn%s_%s" %(i + 1, j + 1)) + internel_layer = sym.relu(data=internel_layer, name="relu%s_%s" %(i + 1, j + 1)) + internel_layer = sym.max_pool2d( + data=internel_layer, pool_size=(2, 2), strides=(2, 2), name="pool%s"%(i + 1)) + return internel_layer + +def get_classifier(input_data, num_classes): + """Get VGG classifier layers as fc layers.""" + flatten = sym.flatten(data=input_data, name="flatten") + fc6 = sym.dense(data=flatten, units=4096, name="fc6") + relu6 = sym.relu(data=fc6, name="relu6") + drop6 = sym.dropout(data=relu6, rate=0.5, name="drop6") + fc7 = sym.dense(data=drop6, units=4096, name="fc7") + relu7 = sym.relu(data=fc7, name="relu7") + drop7 = sym.dropout(data=relu7, rate=0.5, name="drop7") + fc8 = sym.dense(data=drop7, units=num_classes, name="fc8") + return fc8 + +def get_symbol(num_classes, num_layers=11, batch_norm=False): + """ + Parameters + ---------- + num_classes : int, default 1000 + Number of classification classes. + num_layers : int + Number of layers for the variant of densenet. Options are 11, 13, 16, 19. + batch_norm : bool, default False + Use batch normalization. + """ + vgg_spec = {11: ([1, 1, 2, 2, 2], [64, 128, 256, 512, 512]), + 13: ([2, 2, 2, 2, 2], [64, 128, 256, 512, 512]), + 16: ([2, 2, 3, 3, 3], [64, 128, 256, 512, 512]), + 19: ([2, 2, 4, 4, 4], [64, 128, 256, 512, 512])} + if num_layers not in vgg_spec: + raise ValueError("Invalide num_layers {}. Choices are 11,13,16,19.".format(num_layers)) + layers, filters = vgg_spec[num_layers] + data = sym.Variable(name="data") + feature = get_feature(data, layers, filters, batch_norm) + classifier = get_classifier(feature, num_classes) + symbol = sym.softmax(data=classifier, name='softmax') + return symbol + +def get_workload(batch_size, num_classes=1000, image_shape=(3, 224, 224), + dtype="float32", **kwargs): + """Get benchmark workload for VGG nets. + + Parameters + ---------- + batch_size : int + The batch size used in the model + + num_classes : int, optional + Number of claseses + + image_shape : tuple, optional + The input image shape + + dtype : str, optional + The data type + + kwargs : dict + Extra arguments + + Returns + ------- + net : nnvm.Symbol + The computational graph + + params : dict of str to NDArray + The parameters. + """ + net = get_symbol(num_classes=num_classes, **kwargs) + return create_workload(net, batch_size, image_shape, dtype) diff --git a/nnvm/python/nnvm/to_relay.py b/nnvm/python/nnvm/to_relay.py new file mode 100644 index 000000000000..94a736dabe70 --- /dev/null +++ b/nnvm/python/nnvm/to_relay.py @@ -0,0 +1,507 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=no-else-return, unidiomatic-typecheck, invalid-name, unused-argument +"""Convert an NNVM graph to Relay.""" +import numpy + +from tvm import relay, nd +from tvm.relay import op, expr, var +from tvm.relay.frontend.common import StrAttrsDict +from tvm.relay.frontend.nnvm_common import _rename, _binop_scalar, _rbinop_scalar, \ + _elemwise_sum, _softmax_op, _compare, _reduce +from .symbol import Symbol +from .compiler import graph_attr +from .graph import create as graph_create + +def _nn_batch_flatten(children, attrs, odtype='float32'): + assert len(children) == 1 + return op.nn.batch_flatten(children[0]) + + +def _dense(children, attrs, odtype='float32'): + use_bias = attrs.get_bool('use_bias', True) + units = attrs.get_int('units') + dense = op.nn.dense(children[0], children[1], units=units) + if use_bias: + return op.nn.bias_add(dense, children[2]) + else: + return dense + +def _conv2d(children, attrs, odtype='float32'): + use_bias = attrs.get_bool('use_bias', True) + + if use_bias: + data, weight, bias = children + else: + data, weight = children + + kernel_size = attrs.get_int_tuple('kernel_size') + channels = attrs.get_int('channels') + strides = attrs.get_int_tuple('strides', (1, 1)) + padding = attrs.get_int_tuple('padding', (0, 0)) + dilation = attrs.get_int_tuple('dilation', (1, 1)) + groups = attrs.get_int('groups', 1) + data_layout = attrs.get_str('layout', 'NCHW') + kernel_layout = attrs.get_str('kernel_layout', 'OIHW') + out_layout = '' + out_dtype = attrs.get_str('out_dtype', '') + + conv_out = op.nn.conv2d( + data, + weight, + kernel_size=kernel_size, + channels=channels, + strides=strides, + padding=padding, + dilation=dilation, + groups=groups, + data_layout=data_layout, + kernel_layout=kernel_layout, + out_layout=out_layout, + out_dtype=out_dtype) + + if use_bias: + return op.nn.bias_add(conv_out, bias) + else: + return conv_out + + +def _conv2d_transpose(children, attrs, odtype='float32'): + use_bias = attrs.get_bool('use_bias', False) + + if use_bias: + data, weight, bias = children + else: + data, weight = children + + strides = attrs.get_int_tuple('strides', (1, 1)) + padding = attrs.get_int_tuple('padding', (0, 0)) + dilation = attrs.get_int_tuple('dilation', (1, 1)) + groups = attrs.get_int('groups', 1) + data_layout = attrs.get_str('layout', 'NCHW') + kernel_layout = attrs.get_str('kernel_layout', 'OIHW') + out_dtype = attrs.get_str('out_dtype', '') + + out_conv2d = op.nn.conv2d_transpose( + data, + weight, + strides=strides, + padding=padding, + dilation=dilation, + groups=groups, + data_layout=data_layout, + kernel_layout=kernel_layout, + out_dtype=out_dtype) + + if use_bias: + return op.nn.bias_add(out_conv2d, bias) + else: + return out_conv2d + + +def _batch_norm(children, attrs, odtype='float32'): + data, gamma, beta, moving_mean, moving_view = children + axis = attrs.get_int('axis', 1) + epsilon = attrs.get_float('epsilon', 1e-05) + center = attrs.get_bool('center', True) + scale = attrs.get_bool('scale', True) + + return op.nn.batch_norm( + data, + gamma, + beta, + moving_mean, + moving_view, + axis=axis, + epsilon=epsilon, + center=center, + scale=scale)[0] + + +def _max_pool2d(children, attrs, odtype='float32'): + assert len(children) == 1 + data = children[0] + pool_size = attrs.get_int_tuple('pool_size', (1, 1)) + strides = attrs.get_int_tuple('strides', (1, 1)) + padding = attrs.get_int_tuple('padding', (0, 0)) + layout = attrs.get_str('layout', 'NCHW') + ceil_mode = attrs.get_bool('ceil_mode', False) + + return op.nn.max_pool2d( + data, + pool_size=pool_size, + strides=strides, + padding=padding, + layout=layout, + ceil_mode=ceil_mode) + + +def _reshape(children, attrs, odtype='float32'): + data = children[0] + shape = attrs.get_int_list('shape') + return op.reshape(data, shape) + + +def _transpose(children, attrs, odtype='float32'): + axes = attrs.get_int_list('axes', None) + return op.transpose(children[0], axes=axes) + + +def _clip(children, attrs, odtype='float32'): + a_min = attrs.get_float('a_min') + a_max = attrs.get_float('a_max') + return op.clip(children[0], a_min, a_max) + + +def _cast(children, attrs, odtype='float32'): + data = children[0] + dtype = attrs.get_str('dtype') + return data.astype(dtype) + + +def _expand_dims(children, attrs, odtype='float32'): + data = children[0] + axis = attrs.get_int('axis') + num_newaxis = attrs.get_int('num_newaxis', 1) + return op.transform.expand_dims(data, axis, num_newaxis=num_newaxis) + + +def broadcast_to(children, attrs, odtype='float32'): + # TODO(@jroesch) export broadcast to? + data = children[0] + shape = attrs.get_int_tuple('shape') + array = numpy.zeros(shape).astype(odtype) + rconst = relay.Constant(nd.array(array)) + return op.broadcast_to_like(data, rconst) + + +def _global_avg_pool2d(children, attrs, odtype='float32'): + data = children[0] + layout = attrs.get_str('layout', "NCHW") + return op.nn.global_avg_pool2d(data, layout) + + +def _avg_pool2d(children, attrs, odtype='float32'): + data = children[0] + pool_size = attrs.get_int_tuple('pool_size', (1, 1)) + strides = attrs.get_int_tuple('strides', (1, 1)) + padding = attrs.get_int_tuple('padding', (0, 0)) + layout = attrs.get_str('layout', "NCHW") + ceil_mode = attrs.get_bool('ceil_mode', False) + count_include_pad = attrs.get_bool('layout', False) + return op.nn.avg_pool2d( + data, + pool_size=pool_size, + strides=strides, + padding=padding, + layout=layout, + ceil_mode=ceil_mode, + count_include_pad=count_include_pad) + + +def _upsampling(children, attrs, odtype='float32'): + scale = attrs.get_int('scale') + layout = attrs.get_str('layout', 'NCHW') + method = attrs.get_str('method', 'NEAREST_NEIGHBOR') + return op.nn.upsampling( + children[0], + scale_h=scale, + scale_w=scale, + layout=layout, + method=method) + + +def _pad(children, attrs, odtype='float32'): + pad_value = attrs.get_float('pad_value', 0.0) + pad_width = attrs.get_tuple_tuple_int('pad_width') + return op.nn.pad(children[0], pad_width, pad_value=pad_value) + +def _leaky_relu(children, attrs, odtype='float32'): + alpha = attrs.get_float('alpha') + return op.nn.leaky_relu(children[0], alpha) + + +def _full_like(children, attrs, odtype='float32'): + fill_value = relay.const(attrs.get_float('fill_value'), dtype='float32') + return op.full_like(children[0], fill_value) + + +def _strided_slice(children, attrs, odtype='float32'): + begin = attrs.get_int_list('begin') + end = attrs.get_int_list('end') + strides = attrs.get_int_list('stride', None) + return op.strided_slice(children[0], begin, end, strides=strides) + + +def _split(children, attrs, odtype='float32'): + indices_or_sections = None + try: + indices_or_sections = attrs.get_int('indices_or_sections', None) + except ValueError: + indices_or_sections = indices_or_sections or attrs.get_int_tuple( + 'indices_or_sections') + + axis = attrs.get_int('axis', 0) + + return op.split(children[0], indices_or_sections, axis) + +def _squeeze(children, attrs, odtype='float32'): + axis = attrs.get_int_tuple('axis', None) + axis = [axis] if isinstance(axis, int) else axis + + return op.squeeze(children[0], axis) + +def _concatenate(children, attrs, odtype='float32'): + axis = attrs.get_int('axis', 1) + return op.concatenate(children, axis) + +def _dropout(children, attrs, odtype='float32'): + rate = attrs.get_float('rate', 0.5) + return op.nn.dropout(children[0], rate) + +def _mean(children, attrs, odtype='float32'): + axis = attrs.get_int_tuple('axis', None) + keepdims = attrs.get_bool('keepdims') + + return op.mean(children[0], axis, keepdims) + + +def _prelu(children, attrs, odtype='float32'): + axis = attrs.get_int('axis', 1) + return op.nn.prelu(children[0], children[1], axis) + + +def _lrn(children, attrs, odtype='float32'): + size = attrs.get_int("size", 5) + axis = attrs.get_int("axis", 1) + bias = attrs.get_float("bias", 2) + alpha = attrs.get_float("alpha", 1e-05) + beta = attrs.get_float("beta", 0.75) + return op.nn.lrn(children[0], size, axis, bias, alpha, beta) + + +def _l2_nomalize(children, attrs, odtype='float32'): + eps = attrs.get_float('eps') + axis = attrs.get_int_tuple('axis', None) + return op.nn.l2_normalize(children[0], eps, axis) + + +def _take(children, attrs, odtype='float32'): + axis = attrs.get_int('axis', None) + return op.take(children[0], children[1], axis) + + +def _matmul(children, attrs, odtype='float32'): + input_1_t = op.transpose(children[1], axes=(1, 0)) + return op.nn.dense(children[0], input_1_t) + + +def _collapse_sum(children, attrs, odtype='float32'): + for key in ["axis", "keepdims", "exclude"]: + if key in attrs.attrs: + raise NotImplementedError("Parameter '" + key + "' is not supported.") + return op.collapse_sum_like(children[0], children[1]) + + +def _not_implemented(new_op): + def _impl(children, attrs, odtype='float32'): + raise NotImplementedError(str(new_op) + " is not implemented.") + return _impl + + +NNVM_OP_2_RELAY_OP = { + 'flatten': _nn_batch_flatten, + 'dense': _dense, + 'softmax': _softmax_op(op.nn.softmax), + 'log_softmax': _softmax_op(op.nn.log_softmax), + 'conv2d': _conv2d, + 'batch_norm': _batch_norm, + 'max_pool2d': _max_pool2d, + 'reshape': _reshape, + 'transpose': _transpose, + 'dropout': _dropout, + 'mean': _mean, + # Addition + '__add_scalar__': _binop_scalar(op.add), + 'broadcast_add' : _rename(op.add), + 'elemwise_add' : _rename(op.add), + # Subtraction + '__sub_scalar__' : _binop_scalar(op.subtract), + '__rsub_scalar__': _rbinop_scalar(op.subtract), + 'broadcast_sub' : _rename(op.subtract), + 'elemwise_sub' : _rename(op.subtract), + # Multiply + '__mul_scalar__': _binop_scalar(op.multiply), + 'broadcast_mul' : _rename(op.multiply), + 'elemwise_mul' : _rename(op.multiply), + # Division + '__div_scalar__': _binop_scalar(op.divide), + 'broadcast_div' : _rename(op.divide), + 'elemwise_div' : _rename(op.divide), + 'broadcast_mod' : _rename(op.mod), + # Negative + 'negative': _rename("negative"), + # Power + '__pow_scalar__': _binop_scalar(op.power), + '__rpow_scalar__': _rbinop_scalar(op.power), + 'broadcast_pow': _rename(op.power), + # Sum + 'sum': _reduce(op.sum), + 'elemwise_sum': _elemwise_sum, + 'collapse_sum': _collapse_sum, + 'broadcast_max': _rename(op.maximum), + 'broadcast_min': _rename(op.minimum), + + # Comparsion + 'greater': _compare(op.greater), + 'broadcast_greater': _compare(op.greater), + 'greater_equal': _compare(op.greater_equal), + 'broadcast_greater_equal': _compare(op.greater_equal), + 'less': _compare(op.less), + 'broadcast_less': _compare(op.less), + 'less_equal': _compare(op.less_equal), + 'broadcast_less_equal': _compare(op.less_equal), + 'broadcast_equal': _compare(op.equal), + 'broadcast_not_equal': _compare(op.not_equal), + + # Activations + 'sigmoid': _rename('sigmoid'), + 'relu': _rename('nn.relu'), + 'exp': _rename('exp'), + 'log': _rename('log'), + 'tanh': _rename('tanh'), + 'leaky_relu': _leaky_relu, + 'prelu': _prelu, + 'clip': _clip, + 'round': _rename('round'), + 'cast': _cast, + 'expand_dims': _expand_dims, + 'broadcast_to': broadcast_to, + '__lshift_scalar__': _binop_scalar(op.left_shift), + '__rshift_scalar__': _binop_scalar(op.right_shift), + 'broadcast_left_shift': _rename(op.left_shift), + 'broadcast_right_shift': _rename(op.right_shift), + 'copy': _rename(op.copy), + 'global_avg_pool2d': _global_avg_pool2d, + 'avg_pool2d': _avg_pool2d, + 'conv2d_transpose': _conv2d_transpose, + 'upsampling': _upsampling, + 'pad': _pad, + 'full_like': _full_like, + 'strided_slice': _strided_slice, + 'split': _split, + 'squeeze': _squeeze, + 'concatenate': _concatenate, + 'abs': _rename(op.abs), + 'ceil': _rename(op.ceil), + 'floor': _rename(op.floor), + 'trunc': _rename(op.trunc), + 'take': _take, + 'lrn': _lrn, + 'l2_normalize': _l2_nomalize, + 'matmul': _matmul, + 'zeros_like': _rename(op.zeros_like), + 'reshape_like': _rename(op.reshape_like), + 'ones_like': _rename(op.ones_like), + + 'expand_like': _not_implemented("expand_like"), + 'gather_nd': _not_implemented("gather_nd"), + 'block_grad': _not_implemented("block_grad"), +} + + +def to_relay(graph, shape_dict, dtype_dict, params): + """Convert an NNVM graph into the corresponding Relay expression. + + Parameters + ---------- + graph : Graph + The input graph. + + shape_dict : dict of str to shape + The input shape. + + dtype_dict : dict of str to str/dtype + The input shape. + + params : dict of str to array + The parameters. + + Returns + ------- + (expr, params) : Tuple[relay.Expr, dict of str to array] + The corresponding Relay expression and parameters. + """ + if isinstance(graph, Symbol): + graph = graph_create(graph) + + param_shapes = dict((k, params[k].shape) for k in params) + shape_dict = shape_dict.copy() + shape_dict.update(param_shapes) + graph = graph_attr.set_shape_inputs(graph, shape_dict) + graph = graph_attr.set_dtype_inputs(graph, dtype_dict) + graph = graph.apply(["InferShape", "InferType"]) + shape = graph.json_attr("shape") + dtype = [graph_attr.TCODE_TO_DTYPE[di] for di in graph.json_attr("dtype")] + + gidx = graph.index + relay_map = {} + fn_params = [] + + for nid, node in enumerate(gidx.nodes): + children = [] + for i in node['inputs']: + child = relay_map[i[0]] + if isinstance(child, expr.TupleWrapper): + children.append(child[i[1]]) + else: + children.append(child) + + oshape = shape[gidx.entry_id(nid, 0)] + odtype = dtype[gidx.entry_id(nid, 0)] + attrs = node.get("attrs", {}) + node_name = node["name"] + op_name = node["op"] + + if op_name == "null": + v = var(node_name, shape=oshape, dtype=odtype) + fn_params.append(v) + relay_map[nid] = v + else: + if op_name in NNVM_OP_2_RELAY_OP: + str_attrs = StrAttrsDict(attrs) + call = NNVM_OP_2_RELAY_OP[op_name](children, str_attrs, odtype) + relay_map[nid] = call + else: + raise Exception( + "nnvm.to_relay: unsupported operator: {0}".format(op_name)) + + outputs = [] + for nid, idx, _ in gidx.output_entries: + output = relay_map[nid] + if isinstance(output, expr.TupleWrapper): + outputs.append(output[idx]) + else: + outputs.append(output) + + if len(outputs) == 1: + body = outputs[0] + else: + body = expr.Tuple(outputs) + + func = relay.Function(fn_params, body) + return func, params diff --git a/cmake/modules/contrib/DNNL.cmake b/nnvm/python/nnvm/top/__init__.py similarity index 63% rename from cmake/modules/contrib/DNNL.cmake rename to nnvm/python/nnvm/top/__init__.py index 3fd3f7cbc887..db80df03e269 100644 --- a/cmake/modules/contrib/DNNL.cmake +++ b/nnvm/python/nnvm/top/__init__.py @@ -15,14 +15,17 @@ # specific language governing permissions and limitations # under the License. -if(USE_DNNL_CODEGEN STREQUAL "ON") - file(GLOB DNNL_RELAY_CONTRIB_SRC src/relay/backend/contrib/dnnl/codegen.cc) - list(APPEND COMPILER_SRCS ${DNNL_RELAY_CONTRIB_SRC}) +"""Tensor operator property registry - find_library(EXTERN_LIBRARY_DNNL dnnl) - list(APPEND TVM_RUNTIME_LINKER_LIBS ${EXTERN_LIBRARY_DNNL}) - file(GLOB DNNL_CONTRIB_SRC src/runtime/contrib/dnnl/*) - list(APPEND RUNTIME_SRCS ${DNNL_CONTRIB_SRC}) - message(STATUS "Build with DNNL codegen: " ${EXTERN_LIBRARY_DNNL}) -endif() +Provide information to lower and schedule tensor operators. +""" +from .attr_dict import AttrDict +from . import tensor +from . import nn +from . import transform +from . import reduction +from . import vision +from . import image +from .registry import OpPattern +from .registry import register_compute, register_schedule, register_pattern diff --git a/nnvm/python/nnvm/top/attr_dict.py b/nnvm/python/nnvm/top/attr_dict.py new file mode 100644 index 000000000000..5082a587d5a0 --- /dev/null +++ b/nnvm/python/nnvm/top/attr_dict.py @@ -0,0 +1,175 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=invalid-name +"""Attr dictionary object used by schedule functions""" +import tvm + +_dict_get = tvm.get_global_func("nnvm.compiler._dict_get") +_dict_size = tvm.get_global_func("nnvm.compiler._dict_size") +_dict_keys = tvm.get_global_func("nnvm.compiler._dict_keys") + +class AttrDict(object): + """Attribute dictionary in nnvm. + + Used by python registration of compute and schedule function. + AttrDict is passed as the first argument to schedule and compute function. + """ + _tvm_tcode = 18 + + def __init__(self, handle): + self.handle = handle + + def __del__(self): + tvm.nd.free_extension_handle(self.handle, 18) + + @property + def _tvm_handle(self): + return self.handle.value + + def __getitem__(self, key): + return _dict_get(self, key) + + def keys(self): + """Get list of keys in the dict. + + Returns + ------- + keys : list of str + List of keys + """ + return [x.value for x in _dict_keys(self)] + + def get_int_tuple(self, key): + """Get tuple of integer from attr dict + + Parameters + ---------- + key : str + The attr key + + Returns + ------- + tuple : tuple of int + The result tuple + """ + return tuple(int(x) for x in self[key][1:-1].split(",") if x) + + def get_int_pair_tuple(self, key): + """Get tuple of integer pairs from attr dict + + Parameters + ---------- + key : str + The attr key + + Returns + ------- + tuple : tuple of int pairs + The result tuple + """ + flat = [int(x.strip(' [] ')) for x in self[key][1:-1].split(",")] + return tuple((flat[i], flat[i+1]) for i in range(0, len(flat), 2)) + + def get_int(self, key): + """Get integer from attr dict + + Parameters + ---------- + key : str + The attr key + + Returns + ------- + value : int + The result value + """ + return int(self[key]) + + def get_float_tuple(self, key): + """Get tuple of float from attr dict + + Parameters + ---------- + key : str + The attr key + + Returns + ------- + tuple : tuple of float + The result tuple + """ + return tuple(float(x) for x in self[key][1:-1].split(",") if x) + + def get_float(self, key): + """Get float from attr dict + + Parameters + ---------- + key : str + The attr key + + Returns + ------- + value : float + The result value + """ + return float(self[key]) + + def get_bool(self, key): + """Get bool from attr dict + + Parameters + ---------- + key : str + The attr key + + Returns + ------- + value : bool + The result value + """ + lowercase = self[key].lower() + if lowercase == "1": + return True + if lowercase == "0": + return False + if lowercase == "true": + return True + if lowercase == "false": + return False + raise ValueError("Wrong bool format for key %s" % key) + + def get_str(self, key): + """Get string from attr dict + + Parameters + ---------- + key : str + The attr key + + Returns + ------- + value : str + The result value + """ + return self[key] + + def __repr__(self): + return str({k : self[k] for k in self.keys()}) + + +tvm.register_extension(AttrDict, AttrDict) diff --git a/nnvm/python/nnvm/top/image.py b/nnvm/python/nnvm/top/image.py new file mode 100644 index 000000000000..4367d982985c --- /dev/null +++ b/nnvm/python/nnvm/top/image.py @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=invalid-name, unused-argument +"""Definition of image ops""" +from __future__ import absolute_import + +import tvm +import topi +from . import registry as reg +from .registry import OpPattern + +# resize +@reg.register_schedule("resize") +def schedule_resize(_, outs, target): + """Schedule definition of resize""" + with tvm.target.create(target): + return topi.generic.schedule_injective(outs) + +reg.register_pattern("resize", OpPattern.INJECTIVE) diff --git a/nnvm/python/nnvm/top/reduction.py b/nnvm/python/nnvm/top/reduction.py new file mode 100644 index 000000000000..ce14d0d28831 --- /dev/null +++ b/nnvm/python/nnvm/top/reduction.py @@ -0,0 +1,75 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=invalid-name, unused-argument +"""Reduction ops""" +from __future__ import absolute_import + +import tvm +import topi +import topi.cuda +from . import registry as reg +from .registry import OpPattern + +def _schedule_reduce(_, outs, target): + """Generic schedule for reduce""" + with tvm.target.create(target): + return topi.generic.schedule_reduce(outs) + + +_fschedule_reduce = tvm.convert(_schedule_reduce) + +def _compute_reduce(f): + """auxiliary function""" + def _compute(attrs, inputs, out_info): + axis = attrs.get_int_tuple("axis") + keepdims = attrs.get_bool("keepdims") + if axis: + return f(inputs[0], axis=axis, keepdims=keepdims) + return f(inputs[0], keepdims=keepdims) + return _compute + +# sum +reg.register_pattern("sum", OpPattern.COMM_REDUCE) +reg.register_schedule("sum", _fschedule_reduce) + +# max +reg.register_pattern("max", OpPattern.COMM_REDUCE) +reg.register_schedule("max", _fschedule_reduce) + +# min +reg.register_pattern("min", OpPattern.COMM_REDUCE) +reg.register_schedule("min", _fschedule_reduce) + +# collapse sum +reg.register_pattern("collapse_sum", OpPattern.COMM_REDUCE) +reg.register_schedule("collapse_sum", _fschedule_reduce) + +# argmax +reg.register_pattern("argmax", OpPattern.COMM_REDUCE) +reg.register_schedule("argmax", _fschedule_reduce) + +# argmin +reg.register_pattern("argmin", OpPattern.COMM_REDUCE) +reg.register_schedule("argmin", _fschedule_reduce) + +# mean +reg.register_pattern("mean", OpPattern.COMM_REDUCE) +reg.register_schedule("mean", _fschedule_reduce) + +# product +reg.register_pattern("prod", OpPattern.COMM_REDUCE) +reg.register_schedule("prod", _fschedule_reduce) diff --git a/nnvm/python/nnvm/top/registry.py b/nnvm/python/nnvm/top/registry.py new file mode 100644 index 000000000000..7ad10620f304 --- /dev/null +++ b/nnvm/python/nnvm/top/registry.py @@ -0,0 +1,138 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=invalid-name +"""Information registry to register operator information for compiler""" +import tvm + +class OpPattern(object): + """Operator generic patterns + + See Also + -------- + top.tag : Contains explanation of the tag type. + """ + # Elementwise operator + ELEMWISE = 0 + # Broadcast operator + BROADCAST = 1 + # Injective mapping + INJECTIVE = 2 + # Comunication + COMM_REDUCE = 3 + # Complex op, can still fuse ewise into it + OUT_ELEMWISE_FUSABLE = 4 + # Not fusable opaque op + OPAQUE = 8 + +_register_compute = tvm.get_global_func("nnvm._register_compute") +_register_schedule = tvm.get_global_func("nnvm._register_schedule") +_register_pattern = tvm.get_global_func("nnvm._register_pattern") +_register_alter_op_layout = tvm.get_global_func("nnvm.compiler._register_alter_op_layout") + +def register_compute(op_name, f=None, level=10): + """Register compute function for operator + + Parameters + ---------- + op_name : str + The name of operator + + f : function + The schedule function + + level : int + The priority level + + Returns + ------- + fregister : function + Register function if f is not specified. + """ + def register(myf): + """internal register function""" + _register_compute(op_name, myf, level) + return myf + return register(f) if f else register + + +def register_schedule(op_name, f=None, level=10): + """Register schedule function for operator + + Parameters + ---------- + op_name : str + The name of operator + + f : function + The schedule function + + level : int + The priority level + + Returns + ------- + fregister : function + Register function if f is not specified. + """ + def register(myf): + """internal register function""" + _register_schedule(op_name, myf, level) + return myf + return register(f) if f else register + + +def register_pattern(op_name, pattern, level=10): + """Register pattern code for operator + + Parameters + ---------- + op_name : str + The name of operator + + pattern : int + The pattern code. + + level : int + The priority level + """ + _register_pattern(op_name, pattern, level) + + +def register_alter_op_layout(op_name, f=None, level=10): + """Register alter layout function for operator + + Parameters + ---------- + op_name : str + The name of operator + + f : function + The schedule function + + level : int + The priority level + + Returns + ------- + fregister : function + Register function if f is not specified. + """ + def register(myf): + """internal register function""" + _register_alter_op_layout(op_name, myf, level) + return myf + return register(f) if f else register diff --git a/nnvm/python/nnvm/top/tensor.py b/nnvm/python/nnvm/top/tensor.py new file mode 100644 index 000000000000..9f12e3245e3a --- /dev/null +++ b/nnvm/python/nnvm/top/tensor.py @@ -0,0 +1,306 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=invalid-name, unused-argument +"""Tensor ops""" +from __future__ import absolute_import + +import tvm +import topi +import topi.cuda +from . import registry as reg +from .registry import OpPattern + +def _schedule_injective(_, outs, target): + """Generic schedule for binary bcast""" + with tvm.target.create(target): + return topi.generic.schedule_injective(outs) + +def _compute_binary_scalar(f): + """auxiliary function""" + @tvm.tag_scope(topi.tag.ELEMWISE) + def _compute(attrs, x, _): + x = x[0] + scalar = attrs.get_float("scalar") + scalar = tvm.const(scalar, x.dtype) + return tvm.compute(x.shape, lambda *i: f(x(*i), scalar)) + return _compute + + +def _compute_unary(f): + """auxiliary function""" + def _compute(attrs, x, _): + return f(x[0]) + return _compute + + +def _compute_binary(f): + """auxiliary function""" + def _compute(attrs, x, _): + return f(x[0], x[1]) + return _compute + + +_fschedule_injective = tvm.convert(_schedule_injective) +_fschedule_broadcast = _fschedule_injective +_fschedule_elemwise = _fschedule_injective + +# Assign requires special treatment in the compiler +# The compute and schedule are designed as +# copy from rhs to output +reg.register_pattern("_assign", OpPattern.OPAQUE) +reg.register_schedule("_assign", _fschedule_broadcast) + +# copy +reg.register_pattern("copy", OpPattern.ELEMWISE) +reg.register_schedule("copy", _fschedule_broadcast) + +# cast +reg.register_pattern("cast", OpPattern.ELEMWISE) +reg.register_schedule("cast", _fschedule_broadcast) + +# floor +reg.register_pattern("floor", OpPattern.ELEMWISE) +reg.register_schedule("floor", _fschedule_broadcast) + +# ceil +reg.register_pattern("ceil", OpPattern.ELEMWISE) +reg.register_schedule("ceil", _fschedule_broadcast) + +# round +reg.register_pattern("round", OpPattern.ELEMWISE) +reg.register_schedule("round", _fschedule_broadcast) + +# abs +reg.register_pattern("abs", OpPattern.ELEMWISE) +reg.register_schedule("abs", _fschedule_broadcast) + +# trunc +reg.register_pattern("trunc", OpPattern.ELEMWISE) +reg.register_schedule("trunc", _fschedule_broadcast) + +# exp +reg.register_pattern("exp", OpPattern.ELEMWISE) +reg.register_schedule("exp", _fschedule_broadcast) + +# sqrt +reg.register_pattern("sqrt", OpPattern.ELEMWISE) +reg.register_schedule("sqrt", _fschedule_broadcast) + +# log +reg.register_pattern("log", OpPattern.ELEMWISE) +reg.register_schedule("log", _fschedule_broadcast) + +# tanh +reg.register_pattern("tanh", OpPattern.ELEMWISE) +reg.register_schedule("tanh", _fschedule_broadcast) + +# negative +reg.register_pattern("negative", OpPattern.ELEMWISE) +reg.register_schedule("negative", _fschedule_broadcast) + +# sigmoid +reg.register_pattern("sigmoid", OpPattern.ELEMWISE) +reg.register_schedule("sigmoid", _fschedule_broadcast) + +# add_scalar +reg.register_pattern("__add_scalar__", OpPattern.ELEMWISE) +reg.register_schedule("__add_scalar__", _fschedule_broadcast) + +# sub_calar +reg.register_pattern("__sub_scalar__", OpPattern.ELEMWISE) +reg.register_schedule("__sub_scalar__", _fschedule_broadcast) + +# rsub_scalar +reg.register_pattern("__rsub_scalar__", OpPattern.ELEMWISE) +reg.register_schedule("__rsub_scalar__", _fschedule_broadcast) + +# mul_scalar +reg.register_pattern("__mul_scalar__", OpPattern.ELEMWISE) +reg.register_schedule("__mul_scalar__", _fschedule_broadcast) + +# div_scalar +reg.register_pattern("__div_scalar__", OpPattern.ELEMWISE) +reg.register_schedule("__div_scalar__", _fschedule_broadcast) + +# rdiv_scalar +reg.register_pattern("__rdiv_scalar__", OpPattern.ELEMWISE) +reg.register_schedule("__rdiv_scalar__", _fschedule_broadcast) + +# pow_scalar +reg.register_pattern("__pow_scalar__", OpPattern.ELEMWISE) +reg.register_schedule("__pow_scalar__", _fschedule_broadcast) + +# rpow_scalar +reg.register_pattern("__rpow_scalar__", OpPattern.ELEMWISE) +reg.register_schedule("__rpow_scalar__", _fschedule_broadcast) + +# lshift_scalar +reg.register_pattern("__lshift_scalar__", OpPattern.ELEMWISE) +reg.register_schedule("__lshift_scalar__", _fschedule_broadcast) + +# rshift_scalar +reg.register_pattern("__rshift_scalar__", OpPattern.ELEMWISE) +reg.register_schedule("__rshift_scalar__", _fschedule_broadcast) + +# logical_and +reg.register_pattern("logical_and", OpPattern.ELEMWISE) +reg.register_schedule("logical_and", _fschedule_broadcast) + +# logical_or +reg.register_pattern("logical_or", OpPattern.ELEMWISE) +reg.register_schedule("logical_or", _fschedule_broadcast) + +# logical_not +reg.register_pattern("logical_not", OpPattern.ELEMWISE) +reg.register_schedule("logical_not", _fschedule_broadcast) + +# elemwise_add +reg.register_pattern("elemwise_add", OpPattern.BROADCAST) +reg.register_schedule("elemwise_add", _fschedule_broadcast) + +# elemwise_sub +reg.register_pattern("elemwise_sub", OpPattern.BROADCAST) +reg.register_schedule("elemwise_sub", _fschedule_broadcast) + +# elemwise_mul +reg.register_pattern("elemwise_mul", OpPattern.BROADCAST) +reg.register_schedule("elemwise_mul", _fschedule_broadcast) + +# elemwise_div +reg.register_pattern("elemwise_div", OpPattern.BROADCAST) +reg.register_schedule("elemwise_div", _fschedule_broadcast) + +# elemwise_mod +reg.register_pattern("elemwise_mod", OpPattern.BROADCAST) +reg.register_schedule("elemwise_mod", _fschedule_broadcast) + +# elemwise_pow +reg.register_pattern("elemwise_pow", OpPattern.BROADCAST) +reg.register_schedule("elemwise_pow", _fschedule_broadcast) + +# broadcast_add +reg.register_pattern("broadcast_add", OpPattern.BROADCAST) +reg.register_schedule("broadcast_add", _fschedule_broadcast) + +# broadcast_sub +reg.register_pattern("broadcast_sub", OpPattern.BROADCAST) +reg.register_schedule("broadcast_sub", _fschedule_broadcast) + +# broadcast_mul +reg.register_pattern("broadcast_mul", OpPattern.BROADCAST) +reg.register_schedule("broadcast_mul", _fschedule_broadcast) + +# broadcast_div +reg.register_pattern("broadcast_div", OpPattern.BROADCAST) +reg.register_schedule("broadcast_div", _fschedule_broadcast) + +# broadcast mod +reg.register_pattern("broadcast_mod", OpPattern.BROADCAST) +reg.register_schedule("broadcast_mod", _fschedule_broadcast) + +# broadcast max +reg.register_pattern("broadcast_max", OpPattern.BROADCAST) +reg.register_schedule("broadcast_max", _fschedule_broadcast) + +# broadcast min +reg.register_pattern("broadcast_min", OpPattern.BROADCAST) +reg.register_schedule("broadcast_min", _fschedule_broadcast) + +# broadcast pow +reg.register_pattern("broadcast_pow", OpPattern.BROADCAST) +reg.register_schedule("broadcast_pow", _fschedule_broadcast) + +# broadcast left_shift +reg.register_pattern("broadcast_left_shift", OpPattern.BROADCAST) +reg.register_schedule("broadcast_left_shift", _fschedule_broadcast) + +# broadcast right_shift +reg.register_pattern("broadcast_right_shift", OpPattern.BROADCAST) +reg.register_schedule("broadcast_right_shift", _fschedule_broadcast) + +# broadcast greater +reg.register_pattern("broadcast_greater", OpPattern.BROADCAST) +reg.register_schedule("broadcast_greater", _fschedule_broadcast) + +# broadcast less +reg.register_pattern("broadcast_less", OpPattern.BROADCAST) +reg.register_schedule("broadcast_less", _fschedule_broadcast) + +# broadcast equal +reg.register_pattern("broadcast_equal", OpPattern.BROADCAST) +reg.register_schedule("broadcast_equal", _fschedule_broadcast) + +# broadcast not_equal +reg.register_pattern("broadcast_not_equal", OpPattern.BROADCAST) +reg.register_schedule("broadcast_not_equal", _fschedule_broadcast) + +# broadcast greater_equal +reg.register_pattern("broadcast_greater_equal", OpPattern.BROADCAST) +reg.register_schedule("broadcast_greater_equal", _fschedule_broadcast) + +# broadcast less_equal +reg.register_pattern("broadcast_less_equal", OpPattern.BROADCAST) +reg.register_schedule("broadcast_less_equal", _fschedule_broadcast) + +# broadcast_to +reg.register_pattern("broadcast_to", OpPattern.BROADCAST) +reg.register_schedule("broadcast_to", _fschedule_broadcast) + +# clip +reg.register_pattern("clip", OpPattern.ELEMWISE) +reg.register_schedule("clip", _fschedule_elemwise) + +# elemwise sum +reg.register_pattern("elemwise_sum", OpPattern.ELEMWISE) +reg.register_schedule("elemwise_sum", _fschedule_elemwise) + +# full +reg.register_pattern("full", OpPattern.OUT_ELEMWISE_FUSABLE) +reg.register_schedule("full", _fschedule_elemwise) + +# full_like +reg.register_pattern("full_like", OpPattern.ELEMWISE) +reg.register_schedule("full_like", _fschedule_elemwise) + +# zeros +reg.register_pattern("zeros", OpPattern.OUT_ELEMWISE_FUSABLE) +reg.register_schedule("zeros", _fschedule_elemwise) + +# zeros_like +reg.register_pattern("zeros_like", OpPattern.ELEMWISE) +reg.register_schedule("zeros_like", _fschedule_elemwise) + +# ones +reg.register_pattern("ones", OpPattern.OUT_ELEMWISE_FUSABLE) +reg.register_schedule("ones", _fschedule_elemwise) + +# ones_like +reg.register_pattern("ones_like", OpPattern.ELEMWISE) +reg.register_schedule("ones_like", _fschedule_elemwise) + +# greater +reg.register_pattern("greater", OpPattern.ELEMWISE) +reg.register_schedule("greater", _fschedule_elemwise) + +# less +reg.register_pattern("less", OpPattern.ELEMWISE) +reg.register_schedule("less", _fschedule_elemwise) + +# block_grad +reg.register_compute("block_grad", _compute_unary(topi.identity)) +reg.register_pattern("block_grad", OpPattern.ELEMWISE) +reg.register_schedule("block_grad", _fschedule_elemwise) diff --git a/nnvm/python/nnvm/top/transform.py b/nnvm/python/nnvm/top/transform.py new file mode 100644 index 000000000000..e9051309734a --- /dev/null +++ b/nnvm/python/nnvm/top/transform.py @@ -0,0 +1,108 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=invalid-name, unused-argument +"""Tensor transformation ops""" +from __future__ import absolute_import + +import tvm +import topi +from .tensor import _fschedule_broadcast, _fschedule_injective +from . import registry as reg +from .registry import OpPattern + +# expand_dims +reg.register_pattern("expand_dims", OpPattern.BROADCAST) +reg.register_schedule("expand_dims", _fschedule_broadcast) + +# expand_like +@reg.register_compute("expand_like") +def compute_expand_like(attrs, inputs, _): + """Compute definition of expand_like""" + if len(inputs[0].shape) == len(inputs[1].shape): + # If the number of dimensions is not changed then it is just a broadcasting + return topi.broadcast_to(inputs[0], inputs[1].shape) + + exclude = attrs.get_bool("exclude") + axis = attrs.get_int_tuple("axis") + if exclude: + exclude_axis = (axis,) if isinstance(axis, int) else axis + axis = [] + for item in range(len(inputs[1].shape)): + if item not in exclude_axis: + axis.append(item) + axis = tuple(axis) + + return topi.transform.expand_like(inputs[0], inputs[1], axis) +reg.register_pattern("expand_like", OpPattern.BROADCAST) +reg.register_schedule("expand_like", _fschedule_broadcast) + +# reshape_like +@reg.register_compute("reshape_like") +def compute_reshape_like(attrs, inputs, out_info): + """Compute definition of reshape_like""" + return topi.reshape(inputs[0], inputs[1].shape) +reg.register_pattern("reshape_like", OpPattern.INJECTIVE) +reg.register_schedule("reshape_like", _fschedule_injective) + +# transpose +reg.register_pattern("transpose", OpPattern.INJECTIVE) +reg.register_schedule("transpose", _fschedule_injective) + +# flip +reg.register_pattern("flip", OpPattern.INJECTIVE) +reg.register_schedule("flip", _fschedule_injective) + +# reshape +reg.register_pattern("reshape", OpPattern.INJECTIVE) +reg.register_schedule("reshape", _fschedule_injective) + +# squeeze +reg.register_pattern("squeeze", OpPattern.INJECTIVE) +reg.register_schedule("squeeze", _fschedule_injective) + +# concatenate +@reg.register_schedule("concatenate") +def schedule_concatenate(_, outs, target): + """Schedule definition of concatenate""" + with tvm.target.create(target): + return topi.generic.schedule_concatenate(outs) + +reg.register_pattern("concatenate", OpPattern.INJECTIVE) + +# split +reg.register_pattern("split", OpPattern.INJECTIVE) +reg.register_schedule("split", _fschedule_injective) + +# take +reg.register_pattern("take", OpPattern.INJECTIVE) +reg.register_schedule("take", _fschedule_injective) + +# strided_slice +reg.register_pattern("strided_slice", OpPattern.INJECTIVE) +reg.register_schedule("strided_slice", _fschedule_injective) + +# slice_like +reg.register_pattern("slice_like", OpPattern.INJECTIVE) +reg.register_schedule("slice_like", _fschedule_injective) + +# where +reg.register_pattern("where", OpPattern.INJECTIVE) +reg.register_schedule("where", _fschedule_injective) + +# gather_nd +reg.register_pattern("gather_nd", OpPattern.INJECTIVE) +reg.register_schedule("gather_nd", _fschedule_injective) diff --git a/nnvm/python/setup.py b/nnvm/python/setup.py new file mode 100644 index 000000000000..f89ac33a2e39 --- /dev/null +++ b/nnvm/python/setup.py @@ -0,0 +1,78 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import os +import sys +from setuptools import find_packages +from distutils.core import setup + +def config_cython(): + # temporary disable cython for now + # as NNVM uses local DLL build + return [] + try: + from Cython.Build import cythonize + from distutils.extension import Extension + if sys.version_info >= (3, 0): + subdir = "_cy3" + else: + subdir = "_cy2" + ret = [] + path = "nnvm/cython" + + for fn in os.listdir(path): + if not fn.endswith(".pyx"): + continue + ret.append(Extension( + "nnvm/%s/%s" % (subdir, fn[:-4]), + ["nnvm/cython/%s" % fn], + include_dirs=["../include/"], + language="c++")) + return cythonize(ret) + except: + print("Cython is not installed, will compile without cython module") + return [] + +# We can not import `libinfo.py` in setup.py directly since __init__.py +# Will be invoked which introduces dependences +CURRENT_DIR = os.path.dirname(__file__) +libinfo_py = os.path.join(CURRENT_DIR, './nnvm/libinfo.py') +libinfo = {'__file__': libinfo_py} +exec(compile(open(libinfo_py, "rb").read(), libinfo_py, 'exec'), libinfo, libinfo) + +__version__ = libinfo['__version__'] +if not os.getenv('CONDA_BUILD'): + LIB_PATH = libinfo['find_lib_path']() + _, LIB_NAME = os.path.split(LIB_PATH[0]) + curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__))) + rpath = os.path.relpath(LIB_PATH[0], curr_path) + setup_kwargs = dict( + include_package_data=True, + data_files=[('nnvm', [rpath])] + ) +else: + setup_kwargs = {} + +setup(name='nnvm', + version=__version__, + description="NNVM: Open Compiler for AI Frameworks", + zip_safe=False, + install_requires=[ + 'numpy' + ], + packages=find_packages(), + url='https://github.com/dmlc/nnvm', + **setup_kwargs) diff --git a/nnvm/src/README.md b/nnvm/src/README.md index 64fd1371719a..c1b66260625e 100644 --- a/nnvm/src/README.md +++ b/nnvm/src/README.md @@ -23,3 +23,8 @@ The following components are operator invariant. - c_api: NNVM C API - core: NNVM core data structure - pass: NNVM pass + +The following components are generic NNVM compiler and defines tensor operator set + +- top: NNVM core tensor operators +- compiler: NNVM compiler toolchain diff --git a/nnvm/src/compiler/alter_op_layout.cc b/nnvm/src/compiler/alter_op_layout.cc new file mode 100644 index 000000000000..abc0022c2a79 --- /dev/null +++ b/nnvm/src/compiler/alter_op_layout.cc @@ -0,0 +1,177 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file alter_op_layout.cc + * \brief Alter the operator layouts. Keep inferred layouts (if any) from previous stages. + * e.g., convolution may calculates faster with NCHW16c layout. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include "compile_engine.h" +#include "graph_transform.h" + +namespace nnvm { +namespace compiler { +namespace { + +tvm::Array GetTensorInfo(const IndexedGraph& idx_graph, + const uint32_t nid, + const ShapeVector& shape_vec, + const DTypeVector& dtype_vec) { + tvm::Array vec; + for (uint32_t i = 0; i < idx_graph[nid].source->num_outputs(); ++i) { + tvm::Array shape; + for (int64_t x : shape_vec[idx_graph.entry_id(nid, i)]) { + CHECK_LE(x, static_cast(std::numeric_limits::max())); + shape.push_back(tvm::make_const(tvm::Int(32), x)); + } + vec.push_back(tvm::placeholder( + shape, GetTVMType(dtype_vec[idx_graph.entry_id(nid, i)]))); + } + return vec; +} + +Graph AlterOpLayout(const Graph& src) { + static auto& falter_op_layout = + Op::GetAttr("FTVMAlterOpLayout"); + + const ShapeVector& shape_vec = src.GetAttr("shape"); + const DTypeVector& dtype_vec = src.GetAttr("dtype"); + const IndexedGraph& idx_graph = src.indexed_graph(); + + std::vector > in_layouts_of_node(idx_graph.num_nodes()); + std::vector > out_layouts_of_node(idx_graph.num_nodes()); + std::unordered_map unchanged_nodes; + + if (src.HasAttr("layout")) { + // record layouts so that LayoutTransform pass can fix layouts correctly, + // e.g., conv2d can be replaced by some contrib implement + // whose layout is different from the original one + // (which was imported from a model file). + const auto& layouts = src.GetAttr >("layout"); + for (uint32_t nid = 0; nid < idx_graph.num_nodes(); ++nid) { + const auto &inode = idx_graph[nid]; + // record input layouts for all nodes, + // while replaced nodes will ignore the records here and have undefined input layouts. + std::vector in_layout; + for (const auto& e : inode.inputs) { + in_layout.emplace_back(layouts[idx_graph.entry_id(e)]); + } + in_layouts_of_node[nid] = in_layout; + + std::vector out_layout; + for (uint32_t i = 0; i < inode.source->num_outputs(); ++i) { + out_layout.emplace_back(layouts[idx_graph.entry_id(nid, i)]); + } + out_layouts_of_node[nid] = out_layout; + } + } + + auto transform = [&](uint32_t nid, + const NodePtr& n, + std::vector* ret) { + nnvm::compiler::FTVMAlterOpLayout fn_alter_op_layout = + falter_op_layout.get(n->op(), nullptr); + if (fn_alter_op_layout == nullptr) { + // will restore the original input layouts later. + unchanged_nodes[n.get()] = nid; + return false; + } + + // construct parameters for registered function + std::vector op_inputs; + tvm::Array tensor_infos; + CHECK_EQ(n->num_inputs(), idx_graph[nid].inputs.size()); + for (uint32_t i = 0; i < n->num_inputs(); ++i) { + const nnvm::NodeEntry& input = n->inputs[i]; + // input operator + Symbol op_input; + op_input.outputs.push_back(input); + op_inputs.push_back(op_input); + + // input tinfo, extract from the original graph + // because it was where infer_shape & infer_type applied. + tvm::Array op_output_tinfos = + GetTensorInfo(idx_graph, idx_graph[nid].inputs[i].node_id, + shape_vec, dtype_vec); + tensor_infos.push_back(op_output_tinfos[input.index]); + } + // callback registered function to get a new operator. + Symbol op; + bool do_alter = + fn_alter_op_layout(n->attrs, Symbol::CreateGroup(op_inputs), tensor_infos, &op); + + if (do_alter) { + *ret = op.outputs; + } else { + // will restore the original input layouts later. + unchanged_nodes[n.get()] = nid; + } + return do_alter; + }; + + Graph ret = nnvm::compiler::GraphTransform(src, transform); + + if (src.HasAttr("layout")) { + // restore the layouts to return graph + const auto& ret_idx = ret.indexed_graph(); + std::vector ret_layouts(ret_idx.num_node_entries(), Layout::Undef()); + for (uint32_t nid = 0; nid < ret_idx.num_nodes(); ++nid) { + const auto& inode = ret_idx[nid]; + if (unchanged_nodes.count(inode.source)) { + const std::vector& in_layouts = + in_layouts_of_node[unchanged_nodes[inode.source]]; + for (uint32_t i = 0; i < inode.inputs.size(); ++i) { + const auto& e = inode.inputs[i]; + ret_layouts[ret_idx.entry_id(e)] = in_layouts[i]; + } + const std::vector& out_layouts = + out_layouts_of_node[unchanged_nodes[inode.source]]; + for (uint32_t i = 0; i < inode.source->num_outputs(); ++i) { + ret_layouts[ret_idx.entry_id(nid, i)] = out_layouts[i]; + } + } + } + + // cannot call indexed_graph() before return the origin Graph, + // thus create a new one. + nnvm::Graph new_ret; + new_ret.outputs = ret.outputs; + new_ret.attrs["layout"] = std::make_shared(std::move(ret_layouts)); + return new_ret; + } + + return ret; +} + +// register pass +NNVM_REGISTER_PASS(AlterOpLayout) +.set_body(AlterOpLayout) +.set_change_graph(true); + +} // namespace +} // namespace compiler +} // namespace nnvm diff --git a/nnvm/src/compiler/compile_engine.cc b/nnvm/src/compiler/compile_engine.cc index 3fe10b53c4a2..c2c0aa82b902 100644 --- a/nnvm/src/compiler/compile_engine.cc +++ b/nnvm/src/compiler/compile_engine.cc @@ -47,52 +47,52 @@ using namespace tvm; * \param type the tvm type. * \return corresponding DLDataType */ -int GetTypeFlag(tvm::DataType type) { - if (type == tvm::DataType::Float(32)) return 0; - if (type == tvm::DataType::Float(64)) return 1; - if (type == tvm::DataType::Float(16)) return 2; - if (type == tvm::DataType::UInt(8)) return 3; - if (type == tvm::DataType::Int(32)) return 4; - if (type == tvm::DataType::Int(8)) return 5; - if (type == tvm::DataType::Int(64)) return 6; - if (type == tvm::DataType::Int(16)) return 7; - if (type == tvm::DataType::UInt(16)) return 8; - if (type == tvm::DataType::UInt(32)) return 9; - if (type == tvm::DataType::UInt(64)) return 10; - if (type == tvm::DataType::UInt(1)) return 11; +int GetTypeFlag(tvm::Type type) { + if (type == tvm::Float(32)) return 0; + if (type == tvm::Float(64)) return 1; + if (type == tvm::Float(16)) return 2; + if (type == tvm::UInt(8)) return 3; + if (type == tvm::Int(32)) return 4; + if (type == tvm::Int(8)) return 5; + if (type == tvm::Int(64)) return 6; + if (type == tvm::Int(16)) return 7; + if (type == tvm::UInt(16)) return 8; + if (type == tvm::UInt(32)) return 9; + if (type == tvm::UInt(64)) return 10; + if (type == tvm::UInt(1)) return 11; LOG(FATAL) << "cannot convert " << type; return 0; } // convert from type flag to tvm type. -DataType GetTVMType(int type_flag) { +Type GetTVMType(int type_flag) { switch (type_flag) { case 0: - return tvm::DataType::Float(32); + return tvm::Float(32); case 1: - return tvm::DataType::Float(64); + return tvm::Float(64); case 2: - return tvm::DataType::Float(16); + return tvm::Float(16); case 3: - return tvm::DataType::UInt(8); + return tvm::UInt(8); case 4: - return tvm::DataType::Int(32); + return tvm::Int(32); case 5: - return tvm::DataType::Int(8); + return tvm::Int(8); case 6: - return tvm::DataType::Int(64); + return tvm::Int(64); case 7: - return tvm::DataType::Int(16); + return tvm::Int(16); case 8: - return tvm::DataType::UInt(16); + return tvm::UInt(16); case 9: - return tvm::DataType::UInt(32); + return tvm::UInt(32); case 10: - return tvm::DataType::UInt(64); + return tvm::UInt(64); case 11: - return tvm::DataType::UInt(1); + return tvm::UInt(1); default: LOG(FATAL) << "unknown type_flag=" << type_flag; - return DataType::Float(32); + return Float(32); } } @@ -218,7 +218,7 @@ class CompileEngine { Array shape; for (int64_t x : shape_vec[idx.entry_id(nid, i)]) { CHECK_LE(x, static_cast(std::numeric_limits::max())); - shape.push_back(make_const(DataType::Int(32), x)); + shape.push_back(make_const(Int(32), x)); } out_info.push_back( placeholder(shape, diff --git a/nnvm/src/compiler/compile_engine.h b/nnvm/src/compiler/compile_engine.h new file mode 100644 index 000000000000..8151f6ced478 --- /dev/null +++ b/nnvm/src/compiler/compile_engine.h @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file compile_engine.h + * \brief Internal engine to compile a subgraph fragment and cache compilation. + */ +#ifndef NNVM_COMPILER_COMPILE_ENGINE_H_ +#define NNVM_COMPILER_COMPILE_ENGINE_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "graph_hash.h" + +namespace nnvm { +namespace compiler { + +/*! \brief A TVM Node to represent compiled graph function */ +struct GraphFuncNode : public tvm::Node { + /* \brief compiled target */ + std::string target; + /*! \brief Function name */ + std::string func_name; + /* \brief The inputs to the function */ + tvm::Array inputs; + /* \brief The outputs to the function */ + tvm::Array outputs; + /*! \brief The lowered functions */ + tvm::Array funcs; + + void VisitAttrs(tvm::AttrVisitor* v) { + v->Visit("target", &target); + v->Visit("func_name", &func_name); + v->Visit("inputs", &inputs); + v->Visit("outputs", &outputs); + v->Visit("funcs", &funcs); + } + + static constexpr const char* _type_key = "GraphFunc"; + TVM_DECLARE_NODE_TYPE_INFO(GraphFuncNode, tvm::Node); +}; + +TVM_DEFINE_NODE_REF(GraphFunc, GraphFuncNode); + +/*! \brief Cache Entry in the graph */ +struct GraphCacheEntryNode : public tvm::Node { + /*! \brief The graph function */ + GraphFunc graph_func; + /*! \brief Usage statistics */ + int use_count{0}; + /*! \brief Index of the master node for calling schedule*/ + int master_idx; + + void VisitAttrs(tvm::AttrVisitor* v) { + v->Visit("graph_func", &graph_func); + v->Visit("use_count", &use_count); + v->Visit("master_idx", &master_idx); + } + static constexpr const char* _type_key = "GraphCacheEntry"; + TVM_DECLARE_NODE_TYPE_INFO(GraphCacheEntryNode, tvm::Node); +}; + +class GraphCacheEntry : public ::tvm::NodeRef { + public: + GraphCacheEntry() {} + explicit GraphCacheEntry(::tvm::NodePtr<::tvm::Node> n) : NodeRef(n) {} + GraphCacheEntryNode* operator->() { + return static_cast(get_mutable()); + } + using ContainerType = GraphCacheEntryNode; +}; + +/*! + * \brief Call compile engine to lower a graph with given inputs. + * + * \param graph The graph to be compiled + * \param inputs The input specification. + * \param target The build target + * \param master_idx The index of master node for calling schedule + * + * \return func A lowered tvm function. + */ +GraphFunc GraphLower(Graph graph, + const Array& inputs, + const std::string& target, + int master_idx); + +/*! + * \brief Get type flag from TVM Type + * + * \param type the tvm type + * \return corresponding DLDataType + */ +int GetTypeFlag(tvm::Type type); + +/*! + * \brief Get TVM Type from type flag + * + * \param type_flag the type flag + * \return corresponding TVM type + */ +tvm::Type GetTVMType(int type_flag); + +} // namespace compiler +} // namespace nnvm + +#endif // NNVM_COMPILER_COMPILE_ENGINE_H_ diff --git a/nnvm/src/compiler/fold_scale_axis.cc b/nnvm/src/compiler/fold_scale_axis.cc new file mode 100644 index 000000000000..6e5e73788c4c --- /dev/null +++ b/nnvm/src/compiler/fold_scale_axis.cc @@ -0,0 +1,602 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file fold_scale_axis.cc + * \author Fold scaling parameter of axis into weight of conv/dense +*/ +#include +#include +#include +#include +#include +#include +#include "pattern_util.h" +#include "graph_transform.h" + +namespace nnvm { +namespace compiler { + +enum FoldScaleKind { + // No folding is applied + kNone, + // The folding decision is pending, we can fold on a state. + kPending, + // The original operator that contains the scale. + kProvider, + // The final conumer of axis scale using multiply + // Likely be a conv or dense operator. + kMulConsumer, + // The final conumer of axis scale using division + kDivConsumer +}; + +struct FoldChainInfo { + // Entry kind + FoldScaleKind kind{kNone}; + // The output axis to be folded + int axis{0}; + // Source node in the fold chain + int source{0}; +}; + +// The entry of folding chains on which +// we should perform folding on +struct FoldChainEntry { + // Fold information + FoldChainInfo info; + // Number of outgoing fork count + // in forward propagation. + int fork_count{0}; + // Following field only used by provider. + // The input index + int fold_input_index{1}; + // The scale entry + NodeEntry scale_entry; +}; + +// Try to pass axis scaling to backward, +// Given that we we know the status of current fold axis. +// return whether the forward signal is consumed. +using FScaleAxisBackward = std::function< + bool(const NodeAttrs& attrs, + const std::vector& in_shape, + const std::vector& out_shape, + const FoldChainInfo& out_info, + std::vector* in_info)>; + + +// Try to pass axis scaling to forward, +// Given that we we know the status of one of its input to be pending +// also update other input info +// return whether the forward signal is consumed. +using FScaleAxisForward = std::function< + bool(const NodeAttrs& attrs, + const std::vector& in_shape, + const std::vector& out_shape, + std::vector* in_info, + FoldChainInfo* out_info)>; + + +// Detect if there is a scaling axis happening +bool DetectScaleAxis(const IndexedGraph& idx, + uint32_t nid, + const ShapeVector& shape_vec, + const std::vector& ref_count, + bool is_forward, + std::vector* chain) { + const IndexedGraph::Node& inode = idx[nid]; + static const Op* bcast_mul = Op::Get("broadcast_mul"); + static const Op* expand_dims = Op::Get("expand_dims"); + if (inode.source->op() != bcast_mul) return false; + const TShape& oshape = shape_vec[idx.entry_id(nid, 0)]; + CHECK_NE(oshape.ndim(), 0); + if (oshape.ndim() <= 1) return false; + for (int i = 0; i < 2; ++i) { + const IndexedGraph::NodeEntry& a = inode.inputs[i]; + const IndexedGraph::NodeEntry& b = inode.inputs[1 - i]; + std::pair axis = + MatchBroadcast1DAxis(oshape, shape_vec[idx.entry_id(a)]); + if (axis.first != -1 && + shape_vec[idx.entry_id(b)] == oshape) { + if (ref_count[a.node_id] != 1) return false; + if (is_forward && ref_count[nid] != 1) return false; + if (!is_forward && ref_count[b.node_id] != 1) return false; + const IndexedGraph::Node& anode = idx[a.node_id]; + // mark the current entry. + FoldChainEntry& e = (*chain)[nid]; + if (anode.source->is_variable()) { + e.fold_input_index = 1 - i; + e.scale_entry = inode.source->inputs[1 - i]; + } else if (anode.source->op() == expand_dims && + shape_vec[idx.entry_id(anode.source->inputs[0])].ndim() == 1) { + e.fold_input_index = 1 - i; + e.scale_entry = anode.source->inputs[0]; + } else { + return false; + } + e.info.axis = axis.first; + e.info.kind = kPending; + e.info.source = nid; + e.fork_count = 1; + // In the backward message passing + // We need to eagerly pass it to the input + // In the forward message passing + // we will "pull" the message from input. + if (!is_forward) { + FoldChainEntry& enext = (*chain)[b.node_id]; + enext.info.axis = e.info.axis; + enext.info.kind = kPending; + enext.info.source = nid; + } + return true; + } + } + return false; +} + +Graph FoldScaleAxis(Graph src) { + // Operator pattern + static auto& fbackward = + nnvm::Op::GetAttr("FScaleAxisBackward"); + static auto& fforward = + nnvm::Op::GetAttr("FScaleAxisForward"); + const IndexedGraph& idx = src.indexed_graph(); + const ShapeVector& shape_vec = src.GetAttr("shape"); + std::vector ref_count = GetNodeRefCounts(idx); + std::vector bwd_chain(idx.num_nodes()); + std::vector fwd_chain(idx.num_nodes()); + // shape hint for the inference. + std::vector in_shape, out_shape; + + // perform backward folding. + for (uint32_t i = idx.num_nodes(); i != 0; --i) { + uint32_t nid = i - 1; + const auto& inode = idx[nid]; + if (inode.source->is_variable()) continue; + if (DetectScaleAxis(idx, nid, shape_vec, + ref_count, false, &bwd_chain)) continue; + if (bwd_chain[nid].info.kind != kPending) continue; + // if referred by multiple node, cannot do propagation + if (ref_count[nid] != 1 || !fbackward.count(inode.source->op())) { + bwd_chain[nid].info.kind = kNone; continue; + } + // get input shape and output shape. + in_shape.clear(); out_shape.clear(); + for (const IndexedGraph::NodeEntry& e : inode.inputs) { + in_shape.push_back(shape_vec[idx.entry_id(e)]); + } + for (uint32_t i = 0; i < inode.source->num_outputs(); ++i) { + out_shape.push_back(shape_vec[idx.entry_id(nid, i)]); + } + std::vector in_info(in_shape.size(), FoldChainInfo()); + bool consumed = fbackward[inode.source->op()]( + inode.source->attrs, + in_shape, + out_shape, + bwd_chain[nid].info, + &in_info); + CHECK_EQ(in_info.size(), in_shape.size()); + // propagate back. + bool can_prop = true; + for (size_t i = 0; i < in_info.size(); ++i) { + const IndexedGraph::NodeEntry& e = inode.inputs[i]; + if (ref_count[e.node_id] != 1 || + idx[e.node_id].source->num_outputs() != 1) { + can_prop = false; break; + } + } + if (!can_prop) continue; + for (size_t i = 0; i < in_info.size(); ++i) { + const IndexedGraph::NodeEntry& e = inode.inputs[i]; + bwd_chain[e.node_id].info = in_info[i]; + } + // mark consumed by making the source as provider. + if (consumed) { + bwd_chain[bwd_chain[nid].info.source].info.kind = kProvider; + } + } + + + // perform forward folding. + for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) { + const auto& inode = idx[nid]; + if (inode.source->is_variable()) continue; + // skip scales that are already folded in backward. + if (bwd_chain[nid].info.kind == kProvider) continue; + if (DetectScaleAxis(idx, nid, shape_vec, + ref_count, true, &fwd_chain)) continue; + if (inode.source->num_outputs() != 1) continue; + // Do state update + // get input shape and output shape. + std::vector in_info; + FoldChainInfo out_info; + int num_inpending = 0; + in_shape.clear(); out_shape.clear(); + for (const IndexedGraph::NodeEntry& e : inode.inputs) { + in_shape.push_back(shape_vec[idx.entry_id(e)]); + // input information + in_info.push_back(fwd_chain[e.node_id].info); + if (fwd_chain[e.node_id].info.kind == kPending) { + ++num_inpending; + } + } + for (uint32_t i = 0; i < inode.source->num_outputs(); ++i) { + out_shape.push_back(shape_vec[idx.entry_id(nid, i)]); + } + if (num_inpending != 1 || + !fforward.count(inode.source->op())) continue; + bool consumed = fforward[inode.source->op()]( + inode.source->attrs, + in_shape, + out_shape, + &in_info, + &out_info); + // update input info + for (size_t i = 0; i < in_info.size(); ++i) { + fwd_chain[inode.inputs[i].node_id].info = in_info[i]; + } + if (consumed) { + fwd_chain[nid].info = out_info; + for (size_t i = 0; i < in_info.size(); ++i) { + if (in_info[i].kind == kPending) { + if (--fwd_chain[in_info[i].source].fork_count == 0) { + fwd_chain[in_info[i].source].info.kind = kProvider; + } + } + } + } else { + // can propagate condition + if (inode.source->num_outputs() == 1) { + fwd_chain[nid].info = out_info; + if (out_info.kind == kPending) { + // When there is multiple reference to input + // every path have to be consumed + fwd_chain[out_info.source].fork_count += ref_count[nid] - 1; + } + } + } + } + + auto transform = [&](uint32_t nid, const NodePtr& n, std::vector* ret) { + NodeEntry rvalue = NodeEntry{n, 0, 0}; + { + // Backward chain + const FoldChainEntry& e = bwd_chain[nid]; + if (e.info.kind == kMulConsumer && + bwd_chain[e.info.source].info.kind == kProvider) { + const FoldChainEntry& se = bwd_chain[e.info.source]; + CHECK_EQ(n->num_outputs(), 1); + NodeEntry scale = ExpandBiasToMatchAxis( + se.scale_entry, + shape_vec[idx.entry_id(nid, 0)].ndim(), + shape_vec[idx.entry_id(se.scale_entry)].ndim(), + e.info.axis); + rvalue = MakeNode("broadcast_mul", n->attrs.name + "_sc", + {rvalue, scale}); + } else if (e.info.kind == kProvider) { + rvalue = n->inputs[e.fold_input_index]; + } + } + // Note that the value might get transformed twice if it + // folds value from both fwd and backward chain. + { + // forward chain + const FoldChainEntry& e = fwd_chain[nid]; + if (e.info.kind == kMulConsumer && + fwd_chain[e.info.source].info.kind == kProvider) { + const FoldChainEntry& se = fwd_chain[e.info.source]; + CHECK_EQ(n->num_outputs(), 1); + NodeEntry scale = ExpandBiasToMatchAxis( + se.scale_entry, + shape_vec[idx.entry_id(nid, 0)].ndim(), + shape_vec[idx.entry_id(se.scale_entry)].ndim(), + e.info.axis); + rvalue = MakeNode("broadcast_mul", n->attrs.name + "_sc", + {rvalue, scale}); + } else if (e.info.kind == kDivConsumer && + fwd_chain[e.info.source].info.kind == kProvider) { + const FoldChainEntry& se = fwd_chain[e.info.source]; + CHECK_EQ(n->num_outputs(), 1); + NodeEntry scale = ExpandBiasToMatchAxis( + se.scale_entry, + shape_vec[idx.entry_id(nid, 0)].ndim(), + shape_vec[idx.entry_id(se.scale_entry)].ndim(), + e.info.axis); + rvalue = MakeNode("broadcast_div", n->attrs.name + "_sc", + {rvalue, scale}); + } else if (e.info.kind == kProvider) { + rvalue = n->inputs[e.fold_input_index]; + } + } + if (rvalue.node == n) { + return false; + } else { + *ret = {rvalue}; + return true; + } + }; + return GraphTransform(src, transform); +} + +NNVM_REGISTER_PASS(FoldScaleAxis) +.set_body(FoldScaleAxis); + +// property registration. +bool ReluScaleAxisBackward( + const NodeAttrs& attrs, + const std::vector& in_shape, + const std::vector& out_shape, + const FoldChainInfo& out_info, + std::vector* in_axis) { + (*in_axis)[0] = out_info; + return false; +} + +bool ReluScaleAxisForward( + const NodeAttrs& attrs, + const std::vector& in_shape, + const std::vector& out_shape, + std::vector* in_info, + FoldChainInfo* out_info) { + *out_info = (*in_info)[0]; + return false; +} + +NNVM_REGISTER_OP(relu) +.set_attr("FScaleAxisBackward", ReluScaleAxisBackward); + +NNVM_REGISTER_OP(leaky_relu) +.set_attr("FScaleAxisBackward", ReluScaleAxisBackward); + +NNVM_REGISTER_OP(relu) +.set_attr("FScaleAxisForward", ReluScaleAxisForward); + +NNVM_REGISTER_OP(leaky_relu) +.set_attr("FScaleAxisForward", ReluScaleAxisForward); + +// property registration. +template +bool Pool2DBackward( + const NodeAttrs& attrs, + const std::vector& in_shape, + const std::vector& out_shape, + const FoldChainInfo& out_info, + std::vector* in_axis) { + const T& param = nnvm::get(attrs.parsed); + if (out_info.axis == 1 && param.layout == "NCHW") { + (*in_axis)[0] = out_info; + } + return false; +} + +template +bool Pool2DForward( + const NodeAttrs& attrs, + const std::vector& in_shape, + const std::vector& out_shape, + std::vector* in_info, + FoldChainInfo* out_info) { + const T& param = nnvm::get(attrs.parsed); + if ((*in_info)[0].axis == 1 && param.layout == "NCHW") { + *out_info = (*in_info)[0]; + } + return false; +} + +NNVM_REGISTER_OP(max_pool2d) +.set_attr("FScaleAxisBackward", Pool2DBackward); + +NNVM_REGISTER_OP(avg_pool2d) +.set_attr("FScaleAxisBackward", Pool2DBackward); + +NNVM_REGISTER_OP(max_pool2d) +.set_attr("FScaleAxisForward", Pool2DForward); + +NNVM_REGISTER_OP(avg_pool2d) +.set_attr("FScaleAxisForward", Pool2DForward); + + + +bool BroadcastAddSubScaleAxisBackward( + const NodeAttrs& attrs, + const std::vector& in_shape, + const std::vector& out_shape, + const FoldChainInfo& out_info, + std::vector* in_axis) { + if (out_info.kind != kPending) return false; + for (int i = 0; i < 2; ++i) { + std::pair m = MatchBroadcast1DAxis(out_shape[0], in_shape[1 - i]); + if (m.second != -1 && + in_shape[i] == out_shape[0] && + m.first == out_info.axis) { + (*in_axis)[i].kind = kPending; + (*in_axis)[i].axis = out_info.axis; + (*in_axis)[i].source = out_info.source; + (*in_axis)[1 - i].kind = kMulConsumer; + (*in_axis)[1 - i].axis = m.second; + (*in_axis)[1 - i].source = out_info.source; + return false; + } + } + return false; +} + +bool BroadcastAddSubScaleAxisForward( + const NodeAttrs& attrs, + const std::vector& in_shape, + const std::vector& out_shape, + std::vector* in_info, + FoldChainInfo* out_info) { + for (int i = 0; i < 2; ++i) { + if ((*in_info)[i].kind == kPending) { + std::pair m = MatchBroadcast1DAxis(out_shape[0], in_shape[1 - i]); + if (m.second != -1 && + in_shape[i] == out_shape[0] && + m.first == (*in_info)[i].axis) { + out_info->kind = kPending; + out_info->axis = m.first; + out_info->source = (*in_info)[i].source; + (*in_info)[1 - i].kind = kDivConsumer; + (*in_info)[1 - i].axis = m.second; + (*in_info)[1 - i].source = (*in_info)[i].source; + return false; + } + } + } + return false; +} + +NNVM_REGISTER_OP(broadcast_add) +.set_attr("FScaleAxisBackward", BroadcastAddSubScaleAxisBackward); + +NNVM_REGISTER_OP(broadcast_sub) +.set_attr("FScaleAxisBackward", BroadcastAddSubScaleAxisBackward); + +NNVM_REGISTER_OP(broadcast_add) +.set_attr("FScaleAxisForward", BroadcastAddSubScaleAxisForward); + +NNVM_REGISTER_OP(broadcast_sub) +.set_attr("FScaleAxisForward", BroadcastAddSubScaleAxisForward); + +bool Conv2DScaleAxisBackward( + const NodeAttrs& attrs, + const std::vector& in_shape, + const std::vector& out_shape, + const FoldChainInfo& out_info, + std::vector* in_axis) { + using top::Conv2DParam; + const Conv2DParam& param = nnvm::get(attrs.parsed); + if (out_info.kind != kPending) return false; + // only optimize for kernel layout OIHW for now + if (param.kernel_layout == "OIHW" && out_info.axis == 1) { + (*in_axis)[1].kind = kMulConsumer; + (*in_axis)[1].axis = 0; + (*in_axis)[1].source = out_info.source; + if (param.use_bias) { + (*in_axis)[2].kind = kMulConsumer; + (*in_axis)[2].axis = 0; + (*in_axis)[2].source = out_info.source; + } + return true; + } else { + return false; + } +} + +bool Conv2DScaleAxisForward( + const NodeAttrs& attrs, + const std::vector& in_shape, + const std::vector& out_shape, + std::vector* in_info, + FoldChainInfo* out_info) { + using top::Conv2DParam; + const Conv2DParam& param = nnvm::get(attrs.parsed); + if ((*in_info)[0].kind != kPending) return false; + // only optimize for nchw for now + if (param.kernel_layout == "OIHW" && (*in_info)[0].axis == 1) { + // Check whether it is depthwise conv2d + if (param.use_bias) { + CHECK_EQ(in_shape.size(), 3U) << "Input:[data, weight, bias]"; + } else { + CHECK_EQ(in_shape.size(), 2U) << "Input:[data, weight]"; + } + + auto dshape = in_shape.at(0); + CHECK_EQ(dshape.ndim(), 4U) << "Input data shape should be 4D"; + + // TODO(FrozenGene): Currently, we don't support conv2d's groups != in channels. + if (param.groups > 1 && dshape[1] != param.groups) { + LOG(WARNING) << "FoldScaleAxis optimization doesn't support conv2d " + << "with groups != in channels. We will skip FoldScaleAxis " + << "optimization for this op."; + return false; + } + + + // input channel equals to groups, which means depthwise conv2d + bool is_depthwise_conv2d = (dshape[1] == param.groups); + + // if it is depthwise convolution, the weight fold axis should along to axis 0. + // For example: + // data shape [1,54,63,127] weights shape [54,1,3,3], scale shape [54] + // depthwise convolution's weights shape means we have divided the data shape's channel + // to groups parties. Here, we divide 54 channels into 54 parties. Every part size is 1. + // weights shape's first dimision means how many parties we have divided (mapping to + // input shape's channel). So, in the depthwise convolution, we shouldn't do like + // traditional convolution(i.e. OIHW) + + // Backgroud of this algorithm: + + // Original Graph: + // Graph(%x, + // %in_scale, + // %weight, + // %bias, + // %out_scale) { + // %1 = __add_scalar__(%x, scalar='1') + // %3 = expand_dims(%in_scale, num_newaxis='2', axis='1') + // %4 = broadcast_mul(%1, %3) + // %7 = conv2d(%4, %weight, %bias, padding='(1, 1)', kernel_size='(3, 3)', channels='2') + // %8 = relu(%7) + // %10 = expand_dims(%out_scale, num_newaxis='2', axis='1') + // %11 = broadcast_mul(%8, %10) + // ret %11 + // } + + // Optimized Graph: + // Graph(%x, + // %weight, + // %out_scale, + // %in_scale, + // %bias) { + // %1 = __add_scalar__(%x, scalar='1') + // %4 = expand_dims(%out_scale, num_newaxis='3', axis='1') + // %5 = broadcast_mul(%weight, %4) + // %7 = expand_dims(%in_scale, num_newaxis='2', axis='1') + // %8 = broadcast_mul(%5, %7) + // %10 = broadcast_mul(%bias, %out_scale) + // %11 = conv2d(%1, %8, %10, padding='(1, 1)', kernel_size='(3, 3)', channels='2') + // %12 = relu(%11) + // ret %12 + // } + + // Conv2DScaleAxisForward will need in_scale. Conv2DScaleAxisBackward will need out_scale. + // in_scale will apply into input data's channel (in_channel). out_scale will apply in + // conv2d's result, which will apply in weight's output channel. + // So, default Conv2DScaleAxisForward will fold axis 1 (weights' input channel). + // Conv2DScaleAxisBackward will fold axis 0 (weights' output channel). + // But depthwise convolution is another story as said previously. + (*in_info)[1].kind = kMulConsumer; + (*in_info)[1].axis = is_depthwise_conv2d ? 0 : 1; + (*in_info)[1].source = (*in_info)[0].source; + return true; + } else { + return false; + } +} + +NNVM_REGISTER_OP(conv2d) +.set_attr("FScaleAxisBackward", Conv2DScaleAxisBackward); + +NNVM_REGISTER_OP(conv2d) +.set_attr("FScaleAxisForward", Conv2DScaleAxisForward); + +} // namespace compiler +} // namespace nnvm diff --git a/nnvm/src/compiler/graph_fuse.cc b/nnvm/src/compiler/graph_fuse.cc new file mode 100644 index 000000000000..1b4a8e117555 --- /dev/null +++ b/nnvm/src/compiler/graph_fuse.cc @@ -0,0 +1,424 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file graph_fuse.cc + * \brief Fuse the operators together. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "graph_fuse.h" +#include "graph_runtime.h" +#include "pattern_util.h" + +namespace nnvm { +namespace compiler { +using namespace tvm; + +// Partition the graph into segments +// Each segment will be compiled into one operator. +// Also mark the property of the segment. +nnvm::Graph GraphFindFusibleGroups(nnvm::Graph g) { + const IndexedGraph& idx = g.indexed_graph(); + int opt_level = 2; + if (g.attrs.count("opt_level") != 0) { + opt_level = g.MoveCopyAttr("opt_level"); + } + + // Get attributes from the graph + const ShapeVector& shape_vec = g.GetAttr("shape"); + + // Reference counter of each op node + // For now, always store result when an op is referred more than once. + std::vector ref_count = GetNodeRefCounts(idx); + for (const auto& e : idx.outputs()) { + // this line will realize all the outputs + ref_count[e.node_id] += 1; + } + // Pattern for the subgraph + PatternVec pattern_vec(idx.num_nodes(), kOpaque); + // Whether node can be fused to parent. + std::vector fuse_vec(idx.num_nodes(), FuseRule::kUknown); + // Master node id of fusion segment. + std::vector master_vec(idx.num_nodes(), -1); + // Operator pattern + static auto& op_pattern = nnvm::Op::GetAttr("TOpPattern"); + + for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) { + const auto& inode = idx[nid]; + if (inode.source->is_variable()) { + fuse_vec[nid] = FuseRule::kRealize; continue; + } + TOpPattern pt = op_pattern.get(inode.source->op(), kOpaque); + + if (pt <= kBroadcast) { + // Check if we can fuse to the master. + int chosen_master = -1; + bool ewise = inode.source->num_outputs() == 1; + bool mark_as_injective = false; + for (const auto& e : inode.inputs) { + if (fuse_vec[e.node_id] == FuseRule::kUknown) { + TOpPattern ipt = pattern_vec[e.node_id]; + if (ipt != kElemWise) ewise = false; + if (ipt <= kBroadcast) { + fuse_vec[e.node_id] = FuseRule::kFuseToMaster; + } else if (ipt == kInjective) { + fuse_vec[e.node_id] = FuseRule::kFuseToMaster; + mark_as_injective = true; + } else if (ipt == kOutEWiseFusable && + chosen_master == -1 && + shape_vec[idx.entry_id(nid, 0)] == shape_vec[idx.entry_id(e)]) { + chosen_master = master_vec[e.node_id]; + fuse_vec[e.node_id] = FuseRule::kFuseToMaster; + } else { + fuse_vec[e.node_id] = FuseRule::kRealize; + } + } + if (ewise) { + if (shape_vec[idx.entry_id(nid, 0)] != shape_vec[idx.entry_id(e)]) { + ewise = false; + } + } + } + master_vec[nid] = chosen_master; + if (chosen_master != -1) { + pt = kOutEWiseFusable; + } else if (mark_as_injective) { + pt = kInjective; + } else { + pt = ewise ? kElemWise : kBroadcast; + } + } else if (pt == kInjective || pt == kCommReduce) { + // Fuse to the comm reduce or injective + for (const auto& e : inode.inputs) { + if (fuse_vec[e.node_id] == FuseRule::kUknown) { + TOpPattern ipt = pattern_vec[e.node_id]; + if (ipt <= kInjective) { + fuse_vec[e.node_id] = FuseRule::kFuseToMaster; + } else { + fuse_vec[e.node_id] = FuseRule::kRealize; + } + } + } + if (pt == kCommReduce) { + master_vec[nid] = nid; + } + } else { + // Realize + master_vec[nid] = nid; + for (const auto& e : inode.inputs) { + if (fuse_vec[e.node_id] == FuseRule::kUknown) { + fuse_vec[e.node_id] = FuseRule::kRealize; + if (master_vec[e.node_id] == -1) { + master_vec[e.node_id] = e.node_id; + } + } + } + } + + pattern_vec[nid] = pt; + if (ref_count[nid] > 1 || opt_level < 1) { + fuse_vec[nid] = FuseRule::kRealize; + if (master_vec[nid] == -1) { + master_vec[nid] = nid; + } + } + } + + // Point to the group root id of each node. + GroupVec group_vec(idx.num_nodes(), -1); + std::vector > node_ids_per_group(idx.num_nodes()); + for (uint32_t i = idx.num_nodes(); i != 0; --i) { + uint32_t nid = i - 1; + const auto& inode = idx[nid]; + bool is_root = false; + if (group_vec[nid] == -1) { + group_vec[nid] = nid; + node_ids_per_group[nid].push_back(nid); + is_root = true; + } + + // Check if injective op and out_ewise_fusable op (e.g. conv2d) are in the same group. + bool parent_out_ewise = false; + bool parent_injective = false; + for (const auto& e : inode.inputs) { + if (fuse_vec[e.node_id] != FuseRule::kFuseToMaster) continue; + TOpPattern pt = pattern_vec[e.node_id]; + if (pt == kOutEWiseFusable) { + parent_out_ewise = true; + } else if (pt == kInjective) { + parent_injective = true; + } + } + // Change the master node from out_ewise_fusable op to itself + if (parent_injective && parent_out_ewise) { + master_vec[nid] = nid; + if (!is_root) { + // Children nodes in the same group might be pointing to a master node in a different group. + for (uint32_t j : node_ids_per_group[group_vec[nid]]) { + master_vec[j] = nid; + } + } + } + + // Propagate the group id. + for (const auto& e : inode.inputs) { + TOpPattern pt = pattern_vec[e.node_id]; + if (parent_out_ewise && parent_injective) { + if (pt == kOutEWiseFusable) { + continue; // Do not fuse out_ewise_fusable op + } else if (pt == kInjective) { + master_vec[e.node_id] = nid; + } + } + if (fuse_vec[e.node_id] == FuseRule::kFuseToMaster) { + CHECK(group_vec[e.node_id] == -1|| + group_vec[e.node_id] == group_vec[nid]); + group_vec[e.node_id] = group_vec[nid]; + node_ids_per_group[group_vec[nid]].push_back(e.node_id); + } + } + } + + /* + Above algorithm will not fuse a node whose output is fed to more than one + child node. This is because in general, it does not make sense to fuse multiple + children branches with their parent, as in the following example. + + conv2d + / | \ + / | \ + op op op + | | | + | | | + + However, when all children branches meet at a certain node, there is a possibility for + further operator fusion. For example, all nodes in the following subgraph can be fused + into a single node, if three 'in-between' nodes and the bottom node are all element wise + operation. + + conv2d + / | \ + / | \ + op op op + \ | / + \ | / + elemwise add + | + + This pattern is not uncommon. For example, it arises when conv2d op is followed by exponential + linear unit. If bias add and batch normalization are also present, they can be fused as well. + + In fact, above fusion algorithm already fuses three in-between nodes and the element wise + add node in the figure above. The following code fuses the conv2d node with the already + fused children nodes. The following patterns are supported. + + * Any number of child nodes from the top node + * The path from the top node to bottom node can contain any number of element wise ops. + + The only restriction is that in-between nodes cannot have more than one child. + + The overview of the algorithm below is as follows: + + 1. Check if all children nodes are fused into a single op by the existing fusion algorithm + 2. Fuse the parent node to children nodes, and update its group id to be the children's group id + 3. If the parent node originally belongs to another group (for example, conv + batch norm), + propagate the new group id to a grand parent and upward + */ + if (opt_level >= 1) { + std::vector > children_group_ids(idx.num_nodes()); + for (uint32_t nid = idx.num_nodes() - 1; nid != 0; --nid) { + const auto& inode = idx[nid]; + if (inode.source->is_variable()) continue; + CHECK_NE(group_vec[nid], -1); + if (inode.inputs.size() != 1) continue; + const uint32_t parent_nid = inode.inputs[0].node_id; + // if parent node has more than one child, record each child's group id. + if (ref_count[parent_nid] > 1) children_group_ids[parent_nid].push_back(group_vec[nid]); + } + + std::vector new_group_id(idx.num_nodes(), -1); + for (uint32_t nid = idx.num_nodes() - 1; nid != 0; --nid) { + if (new_group_id[group_vec[nid]] != -1) { + // propagate new group id from child + group_vec[nid] = new_group_id[group_vec[nid]]; + } + TOpPattern pt = op_pattern.get(idx[nid].source->op(), kOpaque); + if (pt == kOpaque) continue; + const auto& group_ids = children_group_ids[nid]; + if (group_ids.size() <= 1) continue; + const uint32_t child_group_id = group_ids[0]; + const auto& children_node_ids = node_ids_per_group[child_group_id]; + + auto is_same_group_id = [child_group_id](uint32_t id) { + return id == child_group_id; + }; + auto is_fusible_pattern = [&idx](uint32_t child_nid) { + TOpPattern child_pt = op_pattern.get(idx[child_nid].source->op(), kOpaque); + return child_pt <= kBroadcast; + }; + // fuse this node with children if + // all children belong to the same group and + // all nodes in the group are element wise or broadcast op. + const bool can_be_fused = std::all_of(group_ids.begin(), group_ids.end(), is_same_group_id) && + std::all_of(children_node_ids.begin(), children_node_ids.end(), is_fusible_pattern); + + if (can_be_fused) { + new_group_id[group_vec[nid]] = child_group_id; + group_vec[nid] = child_group_id; + for (uint32_t nid2 : node_ids_per_group[child_group_id]) { + pattern_vec[nid2] = pattern_vec[nid]; + master_vec[nid2] = master_vec[nid]; + } + } + } + } + + g.attrs["group_root"] = std::make_shared(std::move(group_vec)); + g.attrs["group_master"] = std::make_shared(std::move(master_vec)); + g.attrs["pattern"] = std::make_shared(std::move(pattern_vec)); + return g; +} + +NNVM_REGISTER_PASS(GraphFindFusibleGroups) +.set_body(GraphFindFusibleGroups) +.depend_graph_attr("shape") +.depend_graph_attr("dtype"); + +// Fuse the partitioned graph into segments. +// Create a new graph with fused nodes. +// Also inherit attribute shape, dltype from the previous graph. +nnvm::Graph GraphFuse(nnvm::Graph g) { + CHECK(g.HasAttr("group_root") && g.HasAttr("pattern")) + << "GraphFindFusibleGroups pass hasn't been applied yet."; + + const IndexedGraph& idx = g.indexed_graph(); + // Get attributes from the graph + const ShapeVector& shape_vec = g.GetAttr("shape"); + const DTypeVector& dtype_vec = g.GetAttr("dtype"); + const GroupVec& group_vec = g.GetAttr("group_root"); + const PatternVec& pattern_vec = g.GetAttr("pattern"); + + // Specially handle assign op. + const nnvm::Op* assign_op = nnvm::Op::Get("_assign"); + + FuseEntryVec fuse_entries(idx.num_nodes()); + // Setup inputs and placeholder. + for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) { + const auto& inode = idx[nid]; + if (inode.source->is_variable()) continue; + CHECK_GE(group_vec[nid], 0); + int root_id = group_vec[nid]; + FuseEntry& fe = fuse_entries[root_id]; + fe.flatten_data = (pattern_vec[root_id] == kElemWise || + inode.source->op() == assign_op); + for (const auto& e : inode.inputs) { + if (group_vec[e.node_id] != root_id && fe.imap.count(e) == 0) { + Array shape; + if (fe.flatten_data) { + // Elementwise support flatten + int64_t prod = 1; + for (int64_t x : shape_vec[idx.entry_id(e)]) { + prod *= x; + } + CHECK_LE(prod, static_cast(std::numeric_limits::max())); + shape.push_back(make_const(Int(32), prod)); + } else { + for (int64_t x : shape_vec[idx.entry_id(e)]) { + CHECK_LE(x, static_cast(std::numeric_limits::max())); + shape.push_back(make_const(Int(32), x)); + } + } + std::ostringstream os_name; + os_name << "input" << fe.imap.size(); + Tensor data = placeholder( + shape, TVMType2Type(GetDLType(dtype_vec[idx.entry_id(e)])), + os_name.str()); + NodeEntry garg = Symbol::CreateVariable(os_name.str()).outputs[0]; + fe.imap[e] = garg; + fe.reverse_imap[garg.node.get()] = e; + fe.input_info[garg.node.get()] = std::move(data); + } + } + } + + // Setup the Subgraph + std::vector subgraph_vec(idx.num_node_entries()); + for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) { + const auto& inode = idx[nid]; + if (inode.source->is_variable()) continue; + int root_id = group_vec[nid]; + FuseEntry& fe = fuse_entries[root_id]; + // Create a subgraph node. + NodePtr gnode = Node::Create(); + gnode->attrs = inode.source->attrs; + // Set input entries for the subgraph node. + for (const auto& e : inode.inputs) { + if (group_vec[e.node_id] != root_id) { + auto it = fe.imap.find(e); + CHECK(it != fe.imap.end()); + gnode->inputs.push_back(it->second); + } else { + const NodeEntry& ne = subgraph_vec[idx.entry_id(e)]; + CHECK(!idx[e.node_id].source->is_variable()); + CHECK(ne.node != nullptr); + gnode->inputs.push_back(ne); + } + } + // Schedule on the root node and use the master's schedule + if (static_cast(nid) != root_id) { + for (uint32_t index = 0; index < inode.source->num_outputs(); ++index) { + uint32_t eid = idx.entry_id(nid, index); + subgraph_vec[eid] = NodeEntry{gnode, index, 0}; + } + } else { + for (uint32_t index = 0; index < inode.source->num_outputs(); ++index) { + fe.subgraph.outputs.push_back(NodeEntry{gnode, index, 0}); + } + } + } + g.attrs["fused_entry"] = std::make_shared(std::move(fuse_entries)); + return g; +} + +NNVM_REGISTER_PASS(GraphFuse) + .set_body(GraphFuse) + .set_change_graph(true) + .provide_graph_attr("fused_entry") + .depend_graph_attr("shape") + .depend_graph_attr("dtype") + .depend_graph_attr("group_root") + .depend_graph_attr("group_master"); + +} // namespace compiler +} // namespace nnvm diff --git a/nnvm/src/compiler/graph_fuse.h b/nnvm/src/compiler/graph_fuse.h index bde9a486669b..e5e51189dd94 100644 --- a/nnvm/src/compiler/graph_fuse.h +++ b/nnvm/src/compiler/graph_fuse.h @@ -48,7 +48,7 @@ enum class FuseRule { * \return corresponding DLDataType */ inline DLDataType GetDLType(int type_flag) { - return GetTVMType(type_flag); + return tvm::Type2TVMType(GetTVMType(type_flag)); } struct INodeEntryHash { diff --git a/nnvm/src/compiler/graph_hash.cc b/nnvm/src/compiler/graph_hash.cc new file mode 100644 index 000000000000..236a27375225 --- /dev/null +++ b/nnvm/src/compiler/graph_hash.cc @@ -0,0 +1,240 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file graph_deep_compare.cc + * \brief Deep compare two graph structure + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "node_attr.h" +#include "graph_hash.h" + +namespace nnvm { +namespace compiler { + +using namespace tvm; +using tvm::ir::IntImm; + +size_t HashPlaceHolder(const Tensor& t) { + size_t key = t->shape.size(); + key = dmlc::HashCombine(key, (t->dtype.code() << 8) | t->dtype.bits()); + for (Expr s : t->shape) { + if (const IntImm* op = s.as()) { + key = dmlc::HashCombine(key, op->value); + } + } + return key; +} + +bool PlaceHolderEqual(const Tensor& a, const Tensor& b) { + if (a->shape.size() != b->shape.size()) return false; + if (a->dtype != b->dtype) return false; + for (size_t i = 0; i < a->shape.size(); ++i) { + const IntImm* a_value = a->shape[i].as(); + const IntImm* b_value = b->shape[i].as(); + if (a_value && b_value == nullptr) return false; + if (b_value && a_value == nullptr) return false; + if (a_value == nullptr && b_value == nullptr) { + continue; + } + if (a_value->value != b_value->value) return false; + } + return true; +} + +size_t GraphKeyHash::Hash(const GraphKey& gkey) { + if (gkey->cache_hash_key_ != 0) return gkey->cache_hash_key_; + size_t key = dmlc::HashCombine(GraphHash(gkey->graph), gkey->target); + key = dmlc::HashCombine(key, gkey->inputs.size()); + for (size_t i = 0; i < gkey->inputs.size(); ++i) { + key = dmlc::HashCombine(key, HashPlaceHolder(gkey->inputs[i])); + } + if (key == 0) key = 1; + gkey->cache_hash_key_ = key; + return key; +} + +bool GraphKeyEqual::Equal(const GraphKey& a, + const GraphKey& b) { + if (a->target != b->target) return false; + if (a->inputs.size() != b->inputs.size()) return false; + for (size_t i = 0; i < a->inputs.size(); ++i) { + if (!PlaceHolderEqual(a->inputs[i], b->inputs[i])) return false; + } + if (GraphDeepCompare(a->graph, b->graph, false).length() != 0) return false; + return true; +} + +GraphKey GraphKeyNode::make(Graph graph, + tvm::Array inputs, + std::string target) { + auto n = tvm::make_node(); + n->graph = std::move(graph); + n->inputs = inputs; + n->target = std::move(target); + return GraphKey(n); +} + +TVM_STATIC_IR_FUNCTOR(IRPrinter, vtable) +.set_dispatch([](const ObjectRef& ref, IRPrinter* p) { + auto* op = static_cast(ref.get()); + p->stream << "GraphKeyNode("<< op << ")"; +}); + + +// Run graph hash +size_t GraphHash(const Graph& graph) { + const IndexedGraph& idx = graph.indexed_graph(); + size_t key = 0; + // Combine a linearized sequence of ops in subgraph + key = dmlc::HashCombine(key, idx.num_nodes()); + std::hash str_hash; + std::vector hash_temp; + for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) { + const IndexedGraph::Node& inode = idx[nid]; + // Use name instad op address so it is deterministic across runs + if (inode.source->is_variable()) continue; + key = dmlc::HashCombine(key, inode.source->op()->name); + hash_temp.clear(); + for (const auto& kv : GetAttrDict(inode.source->attrs)) { + hash_temp.push_back(dmlc::HashCombine(str_hash(kv.first), kv.second)); + } + // to make sure it is deterministic + // since unordered_map is not deterministic + std::sort(hash_temp.begin(), hash_temp.end()); + for (size_t value : hash_temp) { + key = dmlc::HashCombine(key, value); + } + } + return key; +} + +// deep compare the graph structure +// not considering the graph attributes +// return non-empty error message if the graph mismatch. +// the comparator won't match name of intermediate node. +// compare_var_attr +std::string GraphDeepCompare(const Graph& a, + const Graph& b, + bool compare_variable_attr) { + const IndexedGraph& idxa = a.indexed_graph(); + const IndexedGraph& idxb = b.indexed_graph(); + std::ostringstream err; + if (idxa.num_nodes() != idxb.num_nodes()) { + err << "Number of nodes mismatch (" << idxa.num_nodes() << " v.s " << idxb.num_nodes() << ")"; + return err.str(); + } + if (idxa.num_node_entries() != idxb.num_node_entries()) { + err << "Number of node entry mismatch"; + return err.str(); + } + if (idxa.outputs().size() != idxb.outputs().size()) { + err << "Number of outputs mismatch"; + return err.str(); + } + for (size_t i = 0; i < idxa.outputs().size(); ++i) { + if (idxa.outputs()[i].node_id != idxb.outputs()[i].node_id || + idxa.outputs()[i].index != idxb.outputs()[i].index) { + err << "Output entry mismatch"; + return err.str(); + } + } + if (idxa.input_nodes().size() != idxb.input_nodes().size()) { + err << "Number of inputs mismatch"; + return err.str(); + } + + for (uint32_t nid = 0; nid < idxa.num_nodes(); ++nid) { + const IndexedGraph::Node& anode = idxa[nid]; + const IndexedGraph::Node& bnode = idxb[nid]; + if (anode.source->op() != bnode.source->op()) { + err << "Node mismatch "; + return err.str(); + } + if (anode.source->is_variable()) { + CHECK(bnode.source->is_variable()); + if (!compare_variable_attr) continue; + } + AttrDict adict = GetAttrDict(anode.source->attrs); + AttrDict bdict = GetAttrDict(bnode.source->attrs); + + auto fmatch = [&err, &anode](const AttrDict& adict, const AttrDict& bdict) { + for (const auto& kv : adict) { + auto it = bdict.find(kv.first); + if (it != bdict.end()) { + if (it->second != kv.second) { + err << "Node attr mismatch, op=" << anode.source->attrs.name + << " attr_key=" << kv.first << " " << it->second + << " v.s. " << kv.second; + return false; + } + } else { + err << "One attr_key=" << kv.first << " is missing in another " + << "op=" << anode.source->attrs.name; + return false; + } + } + return true; + }; + if (!fmatch(adict, bdict)) return err.str(); + if (adict.size() != bdict.size()) { + CHECK(!fmatch(bdict, adict)); + return err.str(); + } + if (anode.inputs.size() != bnode.inputs.size()) { + err << "Node input mismatch, op=" << anode.source->attrs.name; + return err.str(); + } + if (anode.control_deps.size() != bnode.control_deps.size()) { + err << "Node control_deps mistach, op=" << anode.source->attrs.name; + return err.str(); + } + for (size_t i = 0; i < anode.inputs.size(); ++i) { + const IndexedGraph::NodeEntry& ae = anode.inputs[i]; + const IndexedGraph::NodeEntry& be = bnode.inputs[i]; + if (ae.node_id != be.node_id || + ae.index != be.index || + ae.version != be.version) { + err << "Node input mismatch on, op=" << anode.source->attrs.name; + return err.str(); + } + } + for (size_t i = 0; i < anode.control_deps.size(); ++i) { + if (anode.control_deps[i] != bnode.control_deps[i]) { + err << "Node control_dep mismatch on, op=" << anode.source->attrs.name; + return err.str(); + } + } + } + return ""; +} + +TVM_REGISTER_GLOBAL("nnvm.graph.DeepCompare") +.set_body_typed(GraphDeepCompare); +} // namespace compiler +} // namespace nnvm diff --git a/nnvm/src/compiler/graph_hash.h b/nnvm/src/compiler/graph_hash.h new file mode 100644 index 000000000000..42c069b280c9 --- /dev/null +++ b/nnvm/src/compiler/graph_hash.h @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file graph_hash.h + * \brief The graph hashing function. + */ +#ifndef NNVM_COMPILER_GRAPH_HASH_H_ +#define NNVM_COMPILER_GRAPH_HASH_H_ + +#include +#include +#include +#include +#include + +namespace nnvm { +namespace compiler { + +class GraphKey; + +/*! \brief Key to a graph compiler cache */ +struct GraphKeyNode : public tvm::Node { + /*! \brief The graph structure */ + Graph graph; + /* \brief The inputs to the function */ + tvm::Array inputs; + /*! \brief The target */ + std::string target; + // Cached internal hash key, invisible to the user. + // The graph hash key is ensured always not to be 0 + mutable size_t cache_hash_key_{0}; + + void VisitAttrs(tvm::AttrVisitor* v) { + v->Visit("inputs", &inputs); + v->Visit("target", &target); + } + + static GraphKey make(Graph graph, + tvm::Array inputs, + std::string target); + static constexpr const char* _type_key = "GraphKey"; + TVM_DECLARE_NODE_TYPE_INFO(GraphKeyNode, tvm::Node); +}; + +TVM_DEFINE_NODE_REF(GraphKey, GraphKeyNode); + +/*! \brief Hashing function for graph key */ +struct GraphKeyHash { + size_t operator()(const GraphKey& gkey) const { + return Hash(gkey); + } + static size_t Hash(const GraphKey& gkey); +}; + +/*! \brief function for graph key */ +struct GraphKeyEqual { + bool operator()(const GraphKey& a, + const GraphKey& b) const { + return Equal(a, b); + } + static bool Equal(const GraphKey& a, const GraphKey& b); +}; + +/*! + * \brief Create a hash code for a given graph. + * \return The hash code of the graph. + */ +size_t GraphHash(const Graph& graph); + +/*! + * \brief Compare two graphs + * return empty string if they are equal + * otherwise return error message + * \param a The first graph. + * \param b The second graph. + * \return empty string if they are equal, otherwise return error message. + */ +std::string GraphDeepCompare(const Graph& a, + const Graph& b, + bool compare_variable_attr); +} // namespace compiler +} // namespace nnvm + +#endif // NNVM_COMPILER_GRAPH_HASH_H_ diff --git a/nnvm/src/compiler/graph_runtime.cc b/nnvm/src/compiler/graph_runtime.cc new file mode 100644 index 000000000000..a4b398cd41ea --- /dev/null +++ b/nnvm/src/compiler/graph_runtime.cc @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file graph_runtime.cc + * \brief Interface code with TVM graph runtime. +*/ +#include +#include + +#include +#include "graph_runtime.h" + +namespace nnvm { +namespace compiler { + +using tvm::Object; +using tvm::ObjectPtr; +using tvm::runtime::TVMArgs; +using tvm::runtime::TVMRetValue; +using tvm::runtime::PackedFunc; + +DMLC_REGISTER_PARAMETER(TVMOpParam); + +// parser +inline void TVMOpParamParser(nnvm::NodeAttrs* attrs) { + TVMOpParam param; + param.Init(attrs->dict); + attrs->parsed = std::move(param); +} + +NNVM_REGISTER_OP(tvm_op) +.set_attr_parser(TVMOpParamParser) +.set_num_inputs([](const NodeAttrs& attrs) { + const TVMOpParam& param = nnvm::get(attrs.parsed); + return param.num_inputs; + }) +.set_num_outputs([](const NodeAttrs& attrs) { + const TVMOpParam& param = nnvm::get(attrs.parsed); + return param.num_outputs; + }); + + +TVM_REGISTER_GLOBAL("nnvm.compiler._save_param_dict") +.set_body([](TVMArgs args, TVMRetValue *rv) { + CHECK_EQ(args.size() % 2, 0u); + size_t num_params = args.size() / 2; + std::vector names; + names.reserve(num_params); + std::vector arrays; + arrays.reserve(num_params); + for (size_t i = 0; i < num_params * 2; i += 2) { + names.emplace_back(args[i].operator std::string()); + arrays.emplace_back(args[i + 1].operator DLTensor*()); + } + std::string bytes; + dmlc::MemoryStringStream strm(&bytes); + dmlc::Stream* fo = &strm; + uint64_t header = kTVMNDArrayListMagic, reserved = 0; + fo->Write(header); + fo->Write(reserved); + fo->Write(names); + { + uint64_t sz = static_cast(arrays.size()); + fo->Write(sz); + for (size_t i = 0; i < sz; ++i) { + tvm::runtime::SaveDLTensor(fo, arrays[i]); + } + } + TVMByteArray arr; + arr.data = bytes.c_str(); + arr.size = bytes.length(); + *rv = arr; + }); + + +TVM_REGISTER_GLOBAL("nnvm.compiler._load_param_dict") +.set_body([](TVMArgs args, TVMRetValue *rv) { + std::string bytes = args[0]; + std::vector names; + dmlc::MemoryStringStream memstrm(&bytes); + dmlc::Stream* strm = &memstrm; + uint64_t header, reserved; + CHECK(strm->Read(&header)) + << "Invalid parameters file format"; + CHECK(header == kTVMNDArrayListMagic) + << "Invalid parameters file format"; + CHECK(strm->Read(&reserved)) + << "Invalid parameters file format"; + CHECK(strm->Read(&names)) + << "Invalid parameters file format"; + uint64_t sz; + strm->Read(&sz, sizeof(sz)); + size_t size = static_cast(sz); + CHECK(size == names.size()) + << "Invalid parameters file format"; + tvm::Array ret; + for (size_t i = 0; i < size; ++i) { + tvm::runtime::NDArray temp; + temp.Load(strm); + auto n = tvm::make_node(); + n->name = std::move(names[i]); + n->array = temp; + ret.push_back(NDArrayWrapper(n)); + } + *rv = ret; + }); + +TVM_REGISTER_NODE_TYPE(NDArrayWrapperNode); +} // namespace compiler +} // namespace nnvm diff --git a/nnvm/src/compiler/graph_runtime.h b/nnvm/src/compiler/graph_runtime.h new file mode 100644 index 000000000000..252a6b243c3d --- /dev/null +++ b/nnvm/src/compiler/graph_runtime.h @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file graph_runtime.h + * \brief Interface code with TVM graph runtime. +*/ +#ifndef NNVM_COMPILER_GRAPH_RUNTIME_H_ +#define NNVM_COMPILER_GRAPH_RUNTIME_H_ + +#include +#include +#include +#include +#include +#include +#include + +namespace nnvm { +namespace compiler { + +/*! \brief Magic number for NDArray list file */ +constexpr uint64_t kTVMNDArrayListMagic = 0xF7E58D4F05049CB7; + +struct TVMOpParam : public dmlc::Parameter { + std::string func_name; + uint32_t num_inputs; + uint32_t num_outputs; + uint32_t flatten_data; + + DMLC_DECLARE_PARAMETER(TVMOpParam) { + DMLC_DECLARE_FIELD(func_name); + DMLC_DECLARE_FIELD(num_inputs).set_default(1); + DMLC_DECLARE_FIELD(num_outputs).set_default(1); + DMLC_DECLARE_FIELD(flatten_data).set_default(0); + } +}; + + +/*! + * \brief wrapper node container for exchange. + */ +struct NDArrayWrapperNode : public ::tvm::Node { + std::string name; + tvm::runtime::NDArray array; + + void VisitAttrs(tvm::AttrVisitor* v) { + v->Visit("name", &name); + v->Visit("array", &array); + } + + static constexpr const char* _type_key = "NDArrayWrapper"; + TVM_DECLARE_NODE_TYPE_INFO(NDArrayWrapperNode, tvm::Node); +}; + +TVM_DEFINE_NODE_REF(NDArrayWrapper, NDArrayWrapperNode); + +} // namespace compiler +} // namespace nnvm + +#endif // NNVM_COMPILER_GRAPH_RUNTIME_H_ diff --git a/nnvm/src/compiler/graph_transform.h b/nnvm/src/compiler/graph_transform.h new file mode 100644 index 000000000000..4b183bf2dd6c --- /dev/null +++ b/nnvm/src/compiler/graph_transform.h @@ -0,0 +1,144 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file graph_transform.h + * \brief A mutator class that does local pattern matching and mutates a node. +*/ +#ifndef NNVM_COMPILER_GRAPH_TRANSFORM_H_ +#define NNVM_COMPILER_GRAPH_TRANSFORM_H_ + +#include +#include +#include +#include + +namespace nnvm { +namespace compiler { + +/*! + * \brief Transform the graph to build a new Graph, in post DFS order. + * + * Automatically copies node when some of its children or control_deps changed. + * This function won't be called in Variable. + * + * \param graph The original graph + * + * \param ftransform Function of (int nid, const NodePtr& node, std::vector* out) -> bool + * + * If empty vector is returned, it means original entries should be kept. + * + * \tparam FTransform The transformation function. + */ +template +Graph GraphTransform(Graph graph, FTransform ftransform) { + const IndexedGraph& idx = graph.indexed_graph(); + // new nodes + std::vector new_entry_map(idx.num_node_entries()); + std::vector updated(idx.num_node_entries(), false); + + // setup inputs and placeholder. + for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) { + const auto& inode = idx[nid]; + bool need_copy = false; + for (const IndexedGraph::NodeEntry& e : inode.inputs) { + if (updated[idx.entry_id(e)]) { + need_copy = true; break; + } + } + if (!need_copy) { + for (const uint32_t cid : inode.control_deps) { + const auto& cnode = idx[cid]; + for (uint32_t i = 0 ; i < cnode.source->num_outputs(); ++i) { + if (updated[idx.entry_id(cid, i)]) { + need_copy = true; + } + } + if (need_copy) break; + } + } + + if (!need_copy) { + std::vector ret; + if (ftransform(nid, inode.weak_ref.lock(), &ret)) { + CHECK_EQ(ret.size(), static_cast(inode.source->num_outputs())); + for (uint32_t i = 0 ; i < inode.source->num_outputs(); ++i) { + updated[idx.entry_id(nid, i)] = true; + new_entry_map[idx.entry_id(nid, i)] = ret[i]; + } + } + } else { + NodePtr node = Node::Create(); + node->attrs = inode.source->attrs; + for (size_t i = 0; i < inode.inputs.size(); ++i) { + const IndexedGraph::NodeEntry& e = inode.inputs[i]; + if (updated[idx.entry_id(e)]) { + node->inputs.push_back(new_entry_map[idx.entry_id(e)]); + } else { + node->inputs.push_back(inode.source->inputs[i]); + } + } + for (size_t i = 0; i < inode.control_deps.size(); ++i) { + const uint32_t cid = inode.control_deps[i]; + const auto& cnode = idx[cid]; + CHECK_NE(cnode.source->num_outputs(), 0U); + NodePtr selected_ptr; + for (uint32_t j = 0 ; j < cnode.source->num_outputs(); ++j) { + NodePtr cptr = updated[idx.entry_id(cid, j)] ? + new_entry_map[idx.entry_id(cid, j)].node : inode.source->control_deps[i]; + if (selected_ptr == nullptr) { + selected_ptr = std::move(cptr); + } else { + CHECK(selected_ptr.get() == cptr.get()) + << "Control dependency node changed to more than one node"; + } + } + node->control_deps.push_back(selected_ptr); + } + std::vector ret; + if (ftransform(nid, node, &ret)) { + CHECK_EQ(ret.size(), static_cast(inode.source->num_outputs())); + for (uint32_t i = 0 ; i < inode.source->num_outputs(); ++i) { + updated[idx.entry_id(nid, i)] = true; + new_entry_map[idx.entry_id(nid, i)] = ret[i]; + } + } else { + for (uint32_t i = 0 ; i < inode.source->num_outputs(); ++i) { + updated[idx.entry_id(nid, i)] = true; + new_entry_map[idx.entry_id(nid, i)] = NodeEntry{node, i, 0}; + } + } + } + } + Graph ret; + for (size_t i = 0; i < idx.outputs().size(); ++i) { + const IndexedGraph::NodeEntry& e = idx.outputs()[i]; + if (updated[idx.entry_id(e)]) { + ret.outputs.push_back(new_entry_map[idx.entry_id(e)]); + } else { + ret.outputs.push_back(graph.outputs[i]); + } + } + return ret; +} + +} // namespace compiler +} // namespace nnvm + +#endif // NNVM_COMPILER_GRAPH_TRANSFORM_H_ diff --git a/nnvm/src/compiler/node_attr.h b/nnvm/src/compiler/node_attr.h new file mode 100644 index 000000000000..cd11981bffec --- /dev/null +++ b/nnvm/src/compiler/node_attr.h @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file node_attr.h + * \brief utility to access node attributes +*/ +#ifndef NNVM_COMPILER_NODE_ATTR_H_ +#define NNVM_COMPILER_NODE_ATTR_H_ + +#include +#include +#include +#include + +namespace nnvm { +namespace compiler { + +using AttrDict = std::unordered_map; +/*! + * \brief Get canonicalized attr dict from node + * \param attrs The node attrs + * \return The attribute dict + */ +inline AttrDict GetAttrDict(const NodeAttrs& attrs) { + static auto& fgetdict = nnvm::Op::GetAttr("FGetAttrDict"); + if (fgetdict.count(attrs.op)) { + return fgetdict[attrs.op](attrs); + } else { + return attrs.dict; + } +} + +} // namespace compiler +} // namespace nnvm +#endif // NNVM_COMPILER_NODE_ATTR_H_ diff --git a/nnvm/src/compiler/packed_func_ext.cc b/nnvm/src/compiler/packed_func_ext.cc new file mode 100644 index 000000000000..5680af1b2550 --- /dev/null +++ b/nnvm/src/compiler/packed_func_ext.cc @@ -0,0 +1,164 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file packed_func_ext.cc + * \brief Registeration of extension type. + */ +#include +#include +#include +#include +#include +#include +#include "node_attr.h" +#include "compile_engine.h" + +namespace tvm { +namespace runtime { + +TVM_REGISTER_EXT_TYPE(nnvm::Graph); +TVM_REGISTER_EXT_TYPE(nnvm::Symbol); +TVM_REGISTER_EXT_TYPE(nnvm::compiler::AttrDict); + +} // namespace runtime +} // namespace tvm + +namespace nnvm { +DMLC_JSON_ENABLE_ANY(int, int); +} // namespace nnvm + +namespace nnvm { +namespace compiler { + +using tvm::Tensor; +using tvm::Array; +using tvm::Node; +using tvm::runtime::TVMArgs; +using tvm::runtime::TVMRetValue; + +TVM_REGISTER_GLOBAL("nnvm.compiler._dict_get") +.set_body([](TVMArgs args, TVMRetValue *rv) { + const AttrDict& dict = args[0].AsExtension(); + std::string key = args[1]; + auto it = dict.find(key); + if (it != dict.end()) { + *rv = it->second; + } else { + *rv = nullptr; + } + }); + +TVM_REGISTER_GLOBAL("nnvm.compiler._dict_size") +.set_body([](TVMArgs args, TVMRetValue *rv) { + const AttrDict& dict = args[0].AsExtension(); + *rv = static_cast(dict.size()); + }); + +TVM_REGISTER_GLOBAL("nnvm.compiler._dict_keys") +.set_body([](TVMArgs args, TVMRetValue *rv) { + const AttrDict& dict = args[0].AsExtension(); + tvm::Array keys; + for (const auto& kv : dict) { + keys.push_back(kv.first); + } + *rv = keys; + }); + +TVM_REGISTER_GLOBAL("nnvm.compiler._register_alter_op_layout") +.set_body([](TVMArgs args, TVMRetValue *rv) { + // Intentionally copy and not de-allocate it, to avoid free pyobject during shutdown + PackedFunc* f = new PackedFunc(args[1].operator PackedFunc()); + Op& op = ::dmlc::Registry::Get()->__REGISTER_OR_GET__(args[0]); + auto fpack = [f](const NodeAttrs& attrs, + const Symbol& inputs, + const Array& tinfos, + Symbol* ret_symbol) { + TVMRetValue ret = (*f)(GetAttrDict(attrs), inputs, tinfos); + if (ret.type_code() == TVMTypeCode::kNull) { + return false; + } + CHECK_EQ(ret.type_code(), tvm::runtime::extension_type_info::code) + << " expected " << "Symbol (code = " << tvm::runtime::extension_type_info::code + << ") but get code = " << ret.type_code(); + *ret_symbol = *(static_cast(ret.value().v_handle)); + return true; + }; + op.set_attr("FTVMAlterOpLayout", fpack, args[2]); +}); + +// custom version of TVM compute +TVM_REGISTER_GLOBAL("nnvm._register_compute") +.set_body([](TVMArgs args, TVMRetValue *rv) { + // Intentionally copy and not de-allocate it, to avoid free pyobject during shutdown + PackedFunc* f = new PackedFunc(args[1].operator PackedFunc()); + Op& op = ::dmlc::Registry::Get()->__REGISTER_OR_GET__(args[0]); + auto fcompute = [f](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) + -> Array { + TVMRetValue ret = (*f)(GetAttrDict(attrs), inputs, out_info); + if (ret.IsObjectRef()) { + return {ret.operator Tensor()}; + } else { + return ret; + } + }; + op.set_attr("FTVMCompute", fcompute, args[2]); + }); + +TVM_REGISTER_GLOBAL("nnvm._register_schedule") +.set_body([](TVMArgs args, TVMRetValue *rv) { + // Intentionally copy and not de-allocate it, to avoid free pyobject during shutdown + PackedFunc* f = new PackedFunc(args[1].operator PackedFunc()); + Op& op = ::dmlc::Registry::Get()->__REGISTER_OR_GET__(args[0]); + auto fschedule = [f](const NodeAttrs& attrs, + const Array& outs, + const std::string& target) { + return (*f)(GetAttrDict(attrs), outs, target).operator Schedule(); + }; + op.set_attr("FTVMSchedule", fschedule, args[2]); + }); + +TVM_REGISTER_GLOBAL("nnvm._register_pattern") +.set_body([](TVMArgs args, TVMRetValue *rv) { + Op& op = ::dmlc::Registry::Get()->__REGISTER_OR_GET__(args[0]); + op.set_attr("TOpPattern", args[1].operator int(), args[2]); + }); + +TVM_REGISTER_GLOBAL("nnvm.graph._move_module") +.set_body([](TVMArgs args, TVMRetValue *rv) { + const nnvm::Graph& g = args[0].AsExtension(); + *rv = const_cast(&g)-> + MoveCopyAttr(args[1]); + }); + +TVM_REGISTER_GLOBAL("nnvm.graph._move_graph") +.set_body([](TVMArgs args, TVMRetValue *rv) { + const nnvm::Graph& g = args[0].AsExtension(); + std::string key = args[1]; + if (g.attrs.count(key)) { + *rv = const_cast(&g)-> + MoveCopyAttr(key); + } else { + *rv = nullptr; + } + }); +} // namespace compiler +} // namespace nnvm diff --git a/nnvm/src/compiler/pattern_util.h b/nnvm/src/compiler/pattern_util.h new file mode 100644 index 000000000000..d3f9725caefa --- /dev/null +++ b/nnvm/src/compiler/pattern_util.h @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file pattern_util.h + * \brief Utilities for doing various pattern matching in graph. +*/ +#ifndef NNVM_COMPILER_PATTERN_UTIL_H_ +#define NNVM_COMPILER_PATTERN_UTIL_H_ + +#include +#include +#include +#include +#include + +namespace nnvm { +namespace compiler { + +/*! + * \brief find axis in oshape, such that: + * bias_shape = [1,1, ... oshape[axis], 1,1,] + * + * This is used to detect bias or scaling factor on channel dimension. + * \param oshape The output shape + * \param bias_shape The shape of bias or scaling factor. + * \return Pair of matched axis in o shape and bias_shape if found. + */ +inline std::pair MatchBroadcast1DAxis( + const TShape& oshape, const TShape& bias_shape) { + dim_t axis_dim = bias_shape.ndim(); + for (dim_t i = bias_shape.ndim(); i != 0; --i, --axis_dim) { + if (bias_shape[i - 1] != 1) break; + } + // everything is 1 + if (axis_dim == 0) { + return {oshape.ndim() - bias_shape.ndim(), 0}; + } + axis_dim = axis_dim - 1; + // The bias shape is not 1D + for (dim_t i = 0; i < axis_dim; ++i) { + if (bias_shape[i] != 1) return {-1, -1}; + } + int axis = static_cast( + oshape.ndim() - bias_shape.ndim() + axis_dim); + if (oshape[axis] != bias_shape[axis_dim]) return {-1, -1}; + return {axis, axis_dim}; +} + +/*! + * \brief Expand bias dimension to match needed axis. + * + * \param bias The bias NodeEntry + * \param out_dim output dimension. + * \param bias_dim The current bias dimension. + * \param axis The axis we want to match on. + */ +inline NodeEntry +ExpandBiasToMatchAxis(NodeEntry bias, + int out_dim, + int bias_dim, + int axis) { + if (bias_dim != 1) { + bias = MakeNode("squeeze", bias.node->attrs.name + "_sqz", {bias}); + } + int num_pad_axis = out_dim - axis - 1; + if (num_pad_axis > 0) { + std::unordered_map kwargs{ + {"axis", "1"}, + {"num_newaxis", std::to_string(num_pad_axis)}}; + return MakeNode("expand_dims", bias.node->attrs.name + "_expand", + {bias}, kwargs); + + } else { + return bias; + } +} + +/*! + * \brief Get the reference count of each node. + * \param idx The IndexedGraph + * \return ref_count vector of length number nodes. + */ +inline std::vector +GetNodeRefCounts(const IndexedGraph& idx) { + std::vector ref_count(idx.num_nodes(), 0); + for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) { + const auto& inode = idx[nid]; + if (inode.source->is_variable()) continue; + for (const auto& e : inode.inputs) { + ++ref_count[e.node_id]; + } + } + for (const auto& e : idx.outputs()) { + // this line will realize all the outputs + ref_count[e.node_id] += 1; + } + return ref_count; +} +} // namespace compiler +} // namespace nnvm +#endif // NNVM_COMPILER_PATTERN_UTIL_H_ diff --git a/nnvm/src/compiler/simplify_inference.cc b/nnvm/src/compiler/simplify_inference.cc new file mode 100644 index 000000000000..0e33a2260986 --- /dev/null +++ b/nnvm/src/compiler/simplify_inference.cc @@ -0,0 +1,129 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file simplify_inference.cc + * \author Ziheng Jiang +*/ +#include +#include +#include +#include +#include +#include +#include "graph_transform.h" +#include "pattern_util.h" + +namespace nnvm { +namespace compiler { + +std::vector +BatchNormToInferUnpack(const nnvm::NodeAttrs& attrs, + nnvm::NodeEntry data, + nnvm::NodeEntry gamma, + nnvm::NodeEntry beta, + nnvm::NodeEntry moving_mean, + nnvm::NodeEntry moving_var, + TShape dshape, + TShape bshape) { + CHECK_NE(dshape.ndim(), 0); + CHECK(attrs.op); + static const Op* bn_op = Op::Get("batch_norm"); + CHECK(attrs.op == bn_op); + const auto& param = nnvm::get(attrs.parsed); + std::string bn_name = attrs.name; + + // transform batch_norm(data) to scale * data + shift + NodeEntry var_add_eps = MakeNode( + "__add_scalar__", bn_name + "_add_eps", + {moving_var}, {{"scalar", std::to_string(param.epsilon)}}); + + NodeEntry sqrt = MakeNode( + "sqrt", bn_name + "_sqrt", {var_add_eps}); + + NodeEntry scale = MakeNode( + "__rdiv_scalar__", bn_name + "_div", + {sqrt}, {{"scalar", "1"}}); + + if (param.scale) { + scale = MakeNode( + "elemwise_mul", bn_name + "_gamma_mul_div", + {scale, gamma}); + } + + NodeEntry neg_mean = MakeNode( + "negative", bn_name + "_neg_mean", {moving_mean}); + + NodeEntry shift = MakeNode( + "elemwise_mul", bn_name + "_neg_mean_mul_a", + {neg_mean, scale}); + + if (param.center) { + shift = MakeNode( + "elemwise_add", bn_name + "_add_beta", {shift, beta}); + } + int axis = param.axis; + scale = ExpandBiasToMatchAxis(scale, dshape.ndim()-bshape.ndim()+1, 1, axis); + shift = ExpandBiasToMatchAxis(shift, dshape.ndim()-bshape.ndim()+1, 1, axis); + + NodeEntry out = MakeNode("broadcast_mul", bn_name + "_a_mul_data", + {data, scale}); + out = MakeNode("broadcast_add", bn_name + "_out", + {out, shift}); + // It is invalid to ref the other values of BN after inference transform. + NodeEntry undef = MakeNode("__undef__", "undef", {}); + return {out, undef, undef}; +} + +Graph SimplifyInference(nnvm::Graph src) { + // Get attributes from the graph + const IndexedGraph& idx = src.indexed_graph(); + const ShapeVector& shape_vec = src.GetAttr("shape"); + auto transform = [&](uint32_t nid, const NodePtr& n, std::vector* ret) { + if (n->is_variable()) return false; + static const Op* bn_op = Op::Get("batch_norm"); + static const Op* dropout_op = Op::Get("dropout"); + if (n->op() == bn_op) { + *ret = BatchNormToInferUnpack( + n->attrs, + n->inputs[0], + n->inputs[1], + n->inputs[2], + n->inputs[3], + n->inputs[4], + shape_vec[idx.entry_id(nid, 0)], + shape_vec[idx.entry_id(nid, 1)]); + return true; + } else if (n->op() == dropout_op) { + NodeEntry undef = MakeNode("__undef__", "undef", {}); + *ret = {n->inputs[0], undef}; + return true; + } else { + return false; + } + }; + return GraphTransform(src, transform); +} + +NNVM_REGISTER_PASS(SimplifyInference) +.set_body(SimplifyInference) +.set_change_graph(true); + +} // namespace compiler +} // namespace nnvm diff --git a/nnvm/src/pass/plan_memory.cc b/nnvm/src/pass/plan_memory.cc index abd18eda5edd..f59a3006cf4c 100644 --- a/nnvm/src/pass/plan_memory.cc +++ b/nnvm/src/pass/plan_memory.cc @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -25,13 +25,14 @@ #include #include #include +#include #include #include "graph_algorithm.h" namespace nnvm { namespace pass { namespace { - + using namespace nnvm::top; // Return bytes of data flag. static int GetDTypeSize(int type_flag) { switch (type_flag) { @@ -39,7 +40,6 @@ static int GetDTypeSize(int type_flag) { case kInt8: return 1; case kFloat16: - case kBfloat16: case kInt16: case kUint16: return 2; diff --git a/nnvm/src/top/elemwise_op_common.h b/nnvm/src/top/elemwise_op_common.h new file mode 100644 index 000000000000..1864850eb436 --- /dev/null +++ b/nnvm/src/top/elemwise_op_common.h @@ -0,0 +1,369 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file elemwise_op_common.h + * \brief Common operator utilities + */ +#ifndef NNVM_TOP_ELEMWISE_OP_COMMON_H_ +#define NNVM_TOP_ELEMWISE_OP_COMMON_H_ + +#include +#include +#include +#include +#include +#include +#include "op_common.h" + +namespace nnvm { +namespace top { + +template +inline bool ElemwiseAttr(const nnvm::NodeAttrs& attrs, + std::vector *in_attrs, + std::vector *out_attrs, + const AttrType& none) { + AttrType dattr = none; + size_t in_size = in_attrs->size(); + size_t out_size = out_attrs->size(); + if (n_in != -1) + in_size = static_cast(n_in); + if (n_out != -1) + out_size = static_cast(n_out); + + auto deduce = [&](std::vector *vec, size_t size, const char *name) { + for (size_t i = 0; i < size; ++i) { + CHECK(assign(&dattr, (*vec)[i])) + << "Incompatible attr in node " << attrs.name << " at " << i << "-th " + << name << ": " << "expected " << attr_string(dattr) + << ", got " << attr_string((*vec)[i]); + } + }; + deduce(in_attrs, in_size, "input"); + if (reverse_infer) deduce(out_attrs, out_size, "output"); + + auto write = [&](std::vector *vec, size_t size, const char *name) { + for (size_t i = 0; i < size; ++i) { + CHECK(assign(&(*vec)[i], dattr)) + << "Incompatible attr in node " << attrs.name << " at " << i << "-th " + << name << ": " << "expected " << attr_string(dattr) + << ", got " << attr_string((*vec)[i]); + } + }; + write(in_attrs, in_size, "input"); + write(out_attrs, out_size, "output"); + + if (is_none(dattr)) return false; + return true; +} + +template +inline bool ElemwiseShape(const NodeAttrs& attrs, + std::vector *in_attrs, + std::vector *out_attrs) { + if (n_in != -1) { + CHECK_EQ(in_attrs->size(), static_cast(n_in)) << " in operator " << attrs.name; + } + if (n_out != -1) { + CHECK_EQ(out_attrs->size(), static_cast(n_out)) << " in operator " << attrs.name; + } + return ElemwiseAttr( + attrs, in_attrs, out_attrs, TShape()); +} + +template +inline bool ElemwiseType(const NodeAttrs& attrs, + std::vector *in_attrs, + std::vector *out_attrs) { + if (n_in != -1) { + CHECK_EQ(in_attrs->size(), static_cast(n_in)) << " in operator " << attrs.name; + } + if (n_out != -1) { + CHECK_EQ(out_attrs->size(), static_cast(n_out)) << " in operator " << attrs.name; + } + return ElemwiseAttr( + attrs, in_attrs, out_attrs, -1); +} + +inline bool ElementWiseReduceShape(const NodeAttrs& attrs, + std::vector *in_attrs, + std::vector *out_attrs) { + CHECK_EQ(out_attrs->size(), 1); + return ElemwiseAttr( + attrs, in_attrs, out_attrs, TShape()); +} + +inline bool ElementWiseReduceType(const NodeAttrs& attrs, + std::vector *in_attrs, + std::vector *out_attrs) { + CHECK_EQ(out_attrs->size(), 1); + return ElemwiseAttr( + attrs, in_attrs, out_attrs, -1); +} + +template +inline bool ElemwiseFixedLayout(const NodeAttrs& attrs, + std::vector *in_layouts, + const std::vector *last_in_layouts, + std::vector *out_layouts, + const std::function& finfer) { + const size_t in_size = (n_in == -1) ? in_layouts->size() : static_cast(n_in); + const size_t out_size = (n_out == -1) ? out_layouts->size() : static_cast(n_out); + + auto deduce = [&](Layout *target, const std::vector *vec, + size_t size, const char *name) { + for (size_t i = 0; i < size; ++i) { + if (vec->at(i).defined()) { + if (!target->defined()) { + *target = vec->at(i); + } + CHECK_EQ(*target, vec->at(i)) + << "Incompatible attr in node " << attrs.name << " at " << i << "-th " + << name << ": " << "expected " << *target + << ", got " << vec->at(i); + } + } + }; + + Layout in, last_in, out; + deduce(&in, in_layouts, in_size, "input"); + deduce(&last_in, last_in_layouts, in_size, "input (last infer pass)"); + deduce(&out, out_layouts, out_size, "output"); + + if (!last_in.defined()) { + last_in = in; + } else { + // else we copy in_layout produced by last infer pass to in_layout, + // and let LayoutTransform pass + // to insert an layout_transform node to fix the input layout. + in = last_in; + } + + out = finfer(in); + + auto write = [](std::vector *vec, Layout& value, size_t size) { + for (size_t i = 0; i < size; ++i) { + vec->at(i) = value; + } + }; + if (in.defined()) write(in_layouts, in, in_size); + if (out.defined()) write(out_layouts, out, out_size); + + return true; +} + +/*! \brief Fix the input layout as the previous inferred (if any) and copy to output */ +template +inline bool ElemwiseFixedLayoutCopyToOut(const NodeAttrs& attrs, + std::vector *in_layouts, + const std::vector *last_in_layouts, + std::vector *out_layouts) { + return ElemwiseFixedLayout( + attrs, in_layouts, last_in_layouts, out_layouts, [](const Layout& in) { + return in; + }); +} + +/*! \brief Fix the input layout as the previous inferred (if any) and do not define output */ +template +inline bool ElemwiseFixedLayoutUnknownOut(const NodeAttrs& attrs, + std::vector *in_layouts, + const std::vector *last_in_layouts, + std::vector *out_layouts) { + return ElemwiseFixedLayout( + attrs, in_layouts, last_in_layouts, out_layouts, [](const Layout& in) { + return Layout::Undef(); + }); +} + +/*! \brief take arbitrary input layout and copy to output */ +template +inline bool ElemwiseArbitraryLayout(const NodeAttrs& attrs, + std::vector *in_layouts, + const std::vector *last_in_layouts, + std::vector *out_layouts) { + const size_t in_size = (n_in == -1) ? in_layouts->size() : static_cast(n_in); + const size_t out_size = (n_out == -1) ? out_layouts->size() : static_cast(n_out); + + Layout in; + for (size_t i = 0; i < in_size; ++i) { + if (!in.defined()) in = in_layouts->at(i); + CHECK_EQ(in, in_layouts->at(i)) + << "Incompatible attr in node " << attrs.name << " at " << i + << "-th input: expected " << in + << ", got " << in_layouts->at(i); + } + + if (in.defined()) { + for (size_t i = 0; i < out_size; ++i) { + out_layouts->at(i) = in; + } + } + + return true; +} + +/*! + * \brief try to convert right layout to left layout if they are different. + * if the converting fails, it will use the last inferred layouts. + */ +inline bool ElemwiseBinaryKeepLeftLayout(const NodeAttrs& attrs, + std::vector *in_layouts, + const std::vector *last_in_layouts, + std::vector *out_layouts) { + CHECK_EQ(in_layouts->size(), 2U); + CHECK_EQ(last_in_layouts->size(), 2U); + CHECK_EQ(out_layouts->size(), 1U); + + const Layout& lhs_last = (*last_in_layouts)[0]; + const Layout& rhs_last = (*last_in_layouts)[1]; + CHECK((lhs_last.defined() && rhs_last.defined()) || + (!lhs_last.defined() && !rhs_last.defined())); + + const Layout& lhs = (*in_layouts)[0]; + const Layout& rhs = (*in_layouts)[1]; + + if (!lhs.defined() && !rhs.defined()) { + CHECK(!lhs_last.defined() && !rhs_last.defined()) + << "Lost input layouts in node " << attrs.name + << ": last inferred lhs=" << lhs_last << ", rhs=" << rhs_last; + return true; + } else if (!lhs.defined()) { + CHECK(!lhs_last.defined() && !rhs_last.defined()); + in_layouts->at(0) = rhs; + out_layouts->at(0) = rhs; + return true; + } else if (!rhs.defined()) { + CHECK(!lhs_last.defined() && !rhs_last.defined()); + in_layouts->at(1) = lhs; + out_layouts->at(0) = lhs; + return true; + } + + if (lhs == rhs) { + // for same layout, we can always do binary calculation + // and pass the layout to next layer + out_layouts->at(0) = lhs; + return true; + } + + if (rhs.convertible(lhs)) { + in_layouts->at(1) = lhs; + out_layouts->at(0) = lhs; + } else { + CHECK(lhs_last.defined() && rhs_last.defined()) + << "Incompatible input layouts in node " << attrs.name + << ". lhs: " << lhs << ", rhs: " << rhs; + CHECK(lhs_last == rhs_last); + in_layouts->at(0) = lhs_last; + in_layouts->at(1) = rhs_last; + out_layouts->at(0) = lhs_last; + } + + return true; +} + +#define NNVM_REGISTER_ELEMWISE_UNARY_OP(name) \ + NNVM_REGISTER_OP(name) \ + .set_num_inputs(1) \ + .set_num_outputs(1) \ + .set_attr("FInferShape", ElemwiseShape<1, 1>) \ + .set_attr("FInferType", ElemwiseType<1, 1>) \ + .set_attr("FCorrectLayout", \ + ElemwiseArbitraryLayout<1, 1>) \ + .set_attr("FInplaceOption", \ + [](const NodeAttrs& attrs){ \ + return std::vector >{{0, 0}}; \ + }) \ + .add_argument("data", "Tensor", "The input tensor.") + + +#define NNVM_REGISTER_INIT_OP(name) \ + NNVM_REGISTER_OP(name) \ + .set_num_inputs(0) \ + .set_num_outputs(1) + + +#define NNVM_REGISTER_INIT_LIKE_OP(name) \ + NNVM_REGISTER_ELEMWISE_UNARY_OP(name) \ + .set_attr("FGradient", MakeZeroGradNodes) \ + .add_argument("data", "Symbol", "The input") + + +#define NNVM_REGISTER_ELEMWISE_BINARY_OP(name) \ + NNVM_REGISTER_OP(name) \ + .set_num_inputs(2) \ + .set_num_outputs(1) \ + .set_attr("FInferShape", ElemwiseShape<2, 1>) \ + .set_attr("FInferType", ElemwiseType<2, 1>) \ + .set_attr("FCorrectLayout", \ + ElemwiseBinaryKeepLeftLayout) \ + .set_attr("FInplaceOption", \ + [](const NodeAttrs& attrs) { \ + return std::vector >{{0, 0}, {1, 0}}; \ + }) \ + .add_argument("lhs", "Tensor", "first input") \ + .add_argument("rhs", "Tensor", "second input") + + +#define NNVM_REGISTER_ELEMWISE_REDUCE_OP(name) \ + NNVM_REGISTER_OP(name) \ + .set_num_inputs([](const NodeAttrs& attrs) { \ + return static_cast( \ + dmlc::get(attrs.parsed).num_args); \ + }) \ + .set_attr_parser(ParamParser) \ + .set_attr("FGetAttrDict", \ + ParamGetAttrDict) \ + .set_attr("FInferShape", \ + ElementWiseReduceShape) \ + .set_attr("FCorrectLayout", \ + ElemwiseFixedLayoutCopyToOut<-1, 1>) \ + .set_attr("FInferType", ElementWiseReduceType) \ + .add_argument("args", "Symbol[]", "Positional input arguments") + + +#define NNVM_REGISTER_INDICATOR_OP(name) \ + NNVM_REGISTER_OP(name) \ + .set_num_outputs(1) \ + .set_attr( \ + "FInferType", [](const NodeAttrs& attrs, \ + std::vector* in_attrs, \ + std::vector* out_attrs) { \ + CHECK_EQ(out_attrs->size(), 1U); \ + NNVM_ASSIGN_OUTPUT_TYPE(attrs, *out_attrs, 0, \ + static_cast(kFloat32)); \ + return true; \ + }) \ + .set_attr("FCorrectLayout", \ + ElemwiseFixedLayoutUnknownOut<1, 1>) \ + .set_attr( \ + "FGradient", [](const NodePtr& n, \ + const std::vector& ograds) { \ + return MakeZeroGradNodes(n, ograds); \ + }) + + +} // namespace top +} // namespace nnvm +#endif // NNVM_TOP_ELEMWISE_OP_COMMON_H_ diff --git a/nnvm/src/top/image/resize.cc b/nnvm/src/top/image/resize.cc new file mode 100644 index 000000000000..a50b4ac961ea --- /dev/null +++ b/nnvm/src/top/image/resize.cc @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file resize.cc + * \brief Property def of resize operators. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include "../nn/nn_common.h" +#include "../op_common.h" +#include "../elemwise_op_common.h" +#include "topi/elemwise.h" +#include "topi/transform.h" +#include "topi/image/resize.h" +#include "resize.h" + +namespace nnvm { +namespace top { +using tvm::Expr; +using tvm::Array; +using tvm::Tensor; +using nnvm::compiler::FTVMCompute; + +DMLC_REGISTER_PARAMETER(ResizeParam); + +inline bool ResizeInferShape(const nnvm::NodeAttrs& attrs, + std::vector* in_shape, + std::vector* out_shape) { + static const Layout kNCHW("NCHW"); + const ResizeParam& param = nnvm::get(attrs.parsed); + CHECK_EQ(in_shape->size(), 1U); + CHECK_EQ(out_shape->size(), 1U); + TShape dshape = (*in_shape)[0]; + if (dshape.ndim() == 0) return false; + dshape = ConvertLayout(dshape, param.layout, kNCHW); + + TShape oshape = dshape; + oshape[2] = param.size[0]; + oshape[3] = param.size[1]; + + oshape = ConvertLayout(oshape, kNCHW, param.layout); + NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, oshape); + + return true; +} + +inline bool ResizeLayout(const NodeAttrs& attrs, + std::vector *in_layouts, + const std::vector *last_in_layouts, + std::vector *out_layouts) { + const ResizeParam& param = nnvm::get(attrs.parsed); + CHECK_EQ(in_layouts->size(), 1U); + CHECK_EQ(out_layouts->size(), 1U); + const Layout layout(param.layout); + NNVM_ASSIGN_LAYOUT(*in_layouts, 0, layout); + NNVM_ASSIGN_LAYOUT(*out_layouts, 0, layout); + return true; +} + +NNVM_REGISTER_OP(resize) +.describe(R"(Perform resize to input array with nearest neighbour or bilinear interpolation. + +- **data**: data is 4D array of shape + (batch_size, channels, in_height, in_width) for NCHW + (batch_size, in_height, in_width, channels) for NHWC + +- **out**: Output is 4D array of shape + for layout NCHW + (batch_size, channels, size[0], size[1]) + + for layout NHWC + (batch_size, size[0], size[1], channels) + +)" NNVM_ADD_FILELINE) +.add_argument("data", "4D Tensor", "Input data.") +.add_arguments(ResizeParam::__FIELDS__()) +.set_attr_parser(ParamParser) +.set_attr("FGetAttrDict", ParamGetAttrDict) +.set_attr("FInferShape", ResizeInferShape) +.set_attr("FInferType", ElemwiseType<1, 1>) +.set_attr("FCorrectLayout", ResizeLayout) +.set_num_outputs(1) +.set_num_inputs(1) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + const ResizeParam& param = nnvm::get(attrs.parsed); + Array oshape; + if (param.layout == "NCHW") { + oshape.push_back(out_info[0]->shape[2]); + oshape.push_back(out_info[0]->shape[3]); + } else { + oshape.push_back(out_info[0]->shape[1]); + oshape.push_back(out_info[0]->shape[2]); + } + + return Array{ topi::image::resize(inputs[0], oshape, param.layout, + param.align_corners, param.method)}; +}) +.set_support_level(2); + +} // namespace top +} // namespace nnvm diff --git a/nnvm/src/top/image/resize.h b/nnvm/src/top/image/resize.h new file mode 100644 index 000000000000..8c894140fabc --- /dev/null +++ b/nnvm/src/top/image/resize.h @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file resize.h + */ +#ifndef NNVM_TOP_IMAGE_RESIZE_H_ +#define NNVM_TOP_IMAGE_RESIZE_H_ + +#include +#include +#include +#include +#include + +namespace nnvm { +namespace top { + +struct ResizeParam : public dmlc::Parameter { + TShape size; + std::string layout; + std::string method; + bool align_corners; + + DMLC_DECLARE_PARAMETER(ResizeParam) { + DMLC_DECLARE_FIELD(size) + .describe("Output size"); + DMLC_DECLARE_FIELD(layout) + .set_default("NCHW") + .describe("Dimension ordering of data. Can be 'NCHW', 'NHWC', etc." + "'N', 'C', 'H', 'W' stands for batch, channel, height, and width" + "dimensions respectively. Resize is applied on the 'H' and" + "'W' dimensions."); + DMLC_DECLARE_FIELD(method) + .set_default("BILINEAR") + .describe("Specify the mode to use for scaling." + "NEAREST_NEIGHBOR - Nearest Neighbor" + "BILINEAR - Bilinear Interpolation"); + DMLC_DECLARE_FIELD(align_corners) + .set_default(false) + .describe("Should be true to preserve the values at the corner pixels"); + } +}; + +} // namespace top +} // namespace nnvm +#endif // NNVM_TOP_IMAGE_RESIZE_H_ diff --git a/nnvm/src/top/nn/convolution.cc b/nnvm/src/top/nn/convolution.cc new file mode 100644 index 000000000000..5c3b2d35991d --- /dev/null +++ b/nnvm/src/top/nn/convolution.cc @@ -0,0 +1,660 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file convolution.cc + * \brief Convolution operators + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "nn_common.h" +#include "../op_common.h" +#include "../elemwise_op_common.h" +#include "topi/nn.h" + + +using tvm::Tensor; +using tvm::Array; +using nnvm::compiler::FTVMCompute; + +namespace nnvm { +namespace top { + +// conv2d +DMLC_REGISTER_PARAMETER(Conv2DParam); + +inline bool Conv2DInferShape(const nnvm::NodeAttrs& attrs, + std::vector* in_shape, + std::vector* out_shape) { + static const Layout kNCHW("NCHW"); + static const Layout kOIHW("OIHW"); + + const Conv2DParam& param = nnvm::get(attrs.parsed); + + const Layout in_layout(param.layout); + const Layout kernel_layout(param.kernel_layout); + CHECK(in_layout.convertible(kNCHW)) + << "Conv only support input layouts that are convertible from NCHW." + << " But got " << in_layout; + CHECK(kernel_layout.convertible(kOIHW)) + << "Conv only support kernel layouts that are convertible from OIHW." + << " But got "<< kernel_layout; + + Layout out_layout(param.out_layout); + if (!out_layout.defined()) out_layout = in_layout; + CHECK(out_layout.convertible(kNCHW)) + << "Conv only support output layouts that are convertible from NCHW." + << " But got " << out_layout; + + if (param.use_bias) { + CHECK_EQ(in_shape->size(), 3U) << "Input:[data, weight, bias]"; + } else { + CHECK_EQ(in_shape->size(), 2U) << "Input:[data, weight]"; + } + CHECK_EQ(out_shape->size(), 1U); + + TShape dshape = in_shape->at(0); + if (dshape.ndim() == 0) return false; + dshape = ConvertLayout(dshape, in_layout, kNCHW); + + CHECK_EQ(dshape.ndim(), 4U) << "Input data should be 4D"; + CHECK_EQ(param.kernel_size.ndim(), 2U); + CHECK_EQ(param.strides.ndim(), 2U) + << "incorrect stride size: " << param.strides; + CHECK_EQ(param.dilation.ndim(), 2U) + << "incorrect dilate size: " << param.dilation; + CHECK_EQ(dshape[1] % param.groups, 0U) + << "input channels must divide group size"; + CHECK_EQ(param.channels % param.groups, 0U) + << "output channels must divide group size"; + + TShape wshape({param.channels, + dshape[1] / param.groups, + param.kernel_size[0], + param.kernel_size[1]}); + + wshape = ConvertLayout(wshape, kOIHW, kernel_layout); + + if (in_shape->at(Conv2DParam::kWeight).ndim() == 0) { + NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape, Conv2DParam::kWeight, wshape); + } + if (param.use_bias) { + static const Layout default_bias_layout("C"); + TShape bias_shape({param.channels}); + auto oc_block = out_layout.subsizeof('C'); + if (oc_block > 0) { + size_t split_axis = (out_layout.indexof('C') < out_layout.indexof('c')) ? 1 : 0; + bias_shape = ConvertLayout(bias_shape, default_bias_layout, + default_bias_layout.split('C', split_axis, oc_block)); + } + NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape, Conv2DParam::kBias, bias_shape); + } + // dilation + dim_t dilated_ksize_y = 1 + (param.kernel_size[0] - 1) * param.dilation[0]; + dim_t dilated_ksize_x = 1 + (param.kernel_size[1] - 1) * param.dilation[1]; + TShape oshape({dshape[0], param.channels, 0, 0}); + if (dshape[2] != 0) { + oshape[2] = (dshape[2] + param.padding[0] * 2 - dilated_ksize_y) / param.strides[0] + 1; + } + if (dshape[3] != 0) { + oshape[3] = (dshape[3] + param.padding[1] * 2 - dilated_ksize_x) / param.strides[1] + 1; + } + NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, ConvertLayout(oshape, kNCHW, out_layout)); + // Perform incomplete shape inference. Fill in the missing values in data shape. + // 1) We can always fill in the batch_size. + // 2) We can back-calculate the input height/width if the corresponding stride is 1. + oshape = ConvertLayout((*out_shape)[0], out_layout, kNCHW); + dshape[0] = oshape[0]; + if (oshape[2] && param.strides[0] == 1) { + dshape[2] = oshape[2] + dilated_ksize_y - 1 - 2 * param.padding[0]; + } + if (oshape[3] && param.strides[1] == 1) { + dshape[3] = oshape[3] + dilated_ksize_x - 1 - 2 * param.padding[1]; + } + NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape, Conv2DParam::kData, + ConvertLayout(dshape, kNCHW, in_layout)); + // Check whether the kernel sizes are valid + if (dshape[2] != 0) { + CHECK_LE(dilated_ksize_y, dshape[2] + 2 * param.padding[0]) + << "kernel size exceed input"; + } + if (dshape[3] != 0) { + CHECK_LE(dilated_ksize_x, dshape[3] + 2 * param.padding[1]) + << "kernel size exceed input"; + } + return true; +} + +template +inline bool WinogradConv2DInferShape(const nnvm::NodeAttrs& attrs, + std::vector* in_shape, + std::vector* out_shape) { + static const Layout kNCHW("NCHW"); + static const Layout kOIHW("OIHW"); + + const Param& param = nnvm::get(attrs.parsed); + + const Layout in_layout(param.layout); + const Layout kernel_layout(param.kernel_layout); + CHECK(in_layout.convertible(kNCHW)) + << "Conv only support input layouts that are convertible from NCHW." + << " But got " << in_layout; + CHECK(kernel_layout.convertible(kOIHW)) + << "Conv only support kernel layouts that are convertible from OIHW." + << " But got "<< kernel_layout; + + Layout out_layout(param.out_layout); + if (!out_layout.defined()) out_layout = in_layout; + CHECK(out_layout.convertible(kNCHW)) + << "Conv only support output layouts that are convertible from NCHW." + << " But got " << out_layout; + + if (param.use_bias) { + CHECK_EQ(in_shape->size(), 3U) << "Input:[data, weight, bias]"; + } else { + CHECK_EQ(in_shape->size(), 2U) << "Input:[data, weight]"; + } + CHECK_EQ(out_shape->size(), 1U); + + TShape dshape = in_shape->at(0); + if (dshape.ndim() == 0) return false; + dshape = ConvertLayout(dshape, in_layout, kNCHW); + + CHECK_EQ(dshape.ndim(), 4U) << "Input data should be 4D"; + CHECK_EQ(param.kernel_size.ndim(), 2U); + CHECK_EQ(param.strides.ndim(), 2U) + << "incorrect stride size: " << param.strides; + CHECK_EQ(param.dilation.ndim(), 2U) + << "incorrect dilate size: " << param.dilation; + CHECK_EQ(dshape[1] % param.groups, 0U) + << "input channels must divide group size"; + CHECK_EQ(param.channels % param.groups, 0U) + << "output channels must divide group size"; + + // NOTE: Do not check weight shape here! + // Different backend requires different layout to compute + // the batch gemm stage in winograd efficiently, but we want to + // make this NNVM symbol work for all backends. + // So we accept all weight shapes, and assume the TOPI developers + // can handle this correctly in alter_op_layout. + + if (param.use_bias) { + static const Layout default_bias_layout("C"); + TShape bias_shape({param.channels}); + auto oc_block = out_layout.subsizeof('C'); + if (oc_block > 0) { + size_t split_axis = (out_layout.indexof('C') < out_layout.indexof('c')) ? 1 : 0; + bias_shape = ConvertLayout(bias_shape, default_bias_layout, + default_bias_layout.split('C', split_axis, oc_block)); + } + NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape, WinogradConv2DParam::kBias, bias_shape); + } + // dilation + dim_t dilated_ksize_y = 1 + (param.kernel_size[0] - 1) * param.dilation[0]; + dim_t dilated_ksize_x = 1 + (param.kernel_size[1] - 1) * param.dilation[1]; + TShape oshape({dshape[0], param.channels, 0, 0}); + if (dshape[2] != 0) { + oshape[2] = (dshape[2] + param.padding[0] * 2 - dilated_ksize_y) / param.strides[0] + 1; + } + if (dshape[3] != 0) { + oshape[3] = (dshape[3] + param.padding[1] * 2 - dilated_ksize_x) / param.strides[1] + 1; + } + NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, ConvertLayout(oshape, kNCHW, out_layout)); + // Perform incomplete shape inference. Fill in the missing values in data shape. + // 1) We can always fill in the batch_size. + // 2) We can back-calculate the input height/width if the corresponding stride is 1. + oshape = ConvertLayout((*out_shape)[0], out_layout, kNCHW); + dshape[0] = oshape[0]; + if (oshape[2] && param.strides[0] == 1) { + dshape[2] = oshape[2] + dilated_ksize_y - 1 - 2 * param.padding[0]; + } + if (oshape[3] && param.strides[1] == 1) { + dshape[3] = oshape[3] + dilated_ksize_x - 1 - 2 * param.padding[1]; + } + NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape, WinogradConv2DParam::kData, + ConvertLayout(dshape, kNCHW, in_layout)); + // Check whether the kernel sizes are valid + if (dshape[2] != 0) { + CHECK_LE(dilated_ksize_y, dshape[2] + 2 * param.padding[0]) + << "kernel size exceed input"; + } + if (dshape[3] != 0) { + CHECK_LE(dilated_ksize_x, dshape[3] + 2 * param.padding[1]) + << "kernel size exceed input"; + } + return true; +} + +template +inline bool Conv2DInferType(const nnvm::NodeAttrs& attrs, + std::vector* in_type, + std::vector* out_type) { + const PARAM& param = nnvm::get(attrs.parsed); + if (param.use_bias) { + CHECK_EQ(in_type->size(), 3U) << "Input:[data, weight, bias]"; + } else { + CHECK_EQ(in_type->size(), 2U) << "Input:[data, weight]"; + } + CHECK_EQ(out_type->size(), 1U); + if (param.out_dtype != -1) { + CHECK(!type_is_none((*in_type)[0])); + for (size_t i = 1; i < in_type->size(); ++i) { + NNVM_ASSIGN_INPUT_TYPE(attrs, *in_type, i, (*in_type)[0]); + } + NNVM_ASSIGN_OUTPUT_TYPE(attrs, *out_type, 0, param.out_dtype); + } else { + ElemwiseType<-1, 1>(attrs, in_type, out_type); + } + return true; +} + + +template +inline bool Conv2DCorrectLayout(const NodeAttrs& attrs, + std::vector *ilayouts, + const std::vector *last_ilayouts, + std::vector *olayouts) { + const PARAM& param = nnvm::get(attrs.parsed); + + const Layout in_layout(param.layout); + Layout out_layout(param.out_layout); + if (!out_layout.defined()) out_layout = in_layout; + + const Layout kernel_layout(param.kernel_layout); + if (param.use_bias) { + CHECK_EQ(ilayouts->size(), 3U) << "Input:[data, weight, bias]"; + NNVM_ASSIGN_LAYOUT(*ilayouts, 0, in_layout); + NNVM_ASSIGN_LAYOUT(*ilayouts, 1, kernel_layout); + // automatically decide bias layout + Layout bias_layout("C"); + auto oc_block = out_layout.subsizeof('C'); + if (oc_block > 0) { + size_t split_axis = (out_layout.indexof('C') < out_layout.indexof('c')) ? 1 : 0; + bias_layout = bias_layout.split('C', split_axis, oc_block); + } + NNVM_ASSIGN_LAYOUT(*ilayouts, 2, bias_layout); + } else { + CHECK_EQ(ilayouts->size(), 2U) << "Input:[data, weight]"; + NNVM_ASSIGN_LAYOUT(*ilayouts, 0, in_layout); + NNVM_ASSIGN_LAYOUT(*ilayouts, 1, kernel_layout); + } + + CHECK_EQ(olayouts->size(), 1U); + NNVM_ASSIGN_LAYOUT(*olayouts, 0, out_layout); + + return true; +} + +NNVM_REGISTER_OP(conv2d) +.describe(R"code(2D convolution layer (e.g. spatial convolution over images). + +This layer creates a convolution kernel that is convolved +with the layer input to produce a tensor of +outputs. If `use_bias` is True, +a bias vector is created and added to the outputs. + +- **data**: This depends on the `layout` parameter. Input is 4D array of shape + (batch_size, in_channels, height, width) if `layout` is `NCHW`. +- **weight**: (channels, in_channels, kernel_size[0], kernel_size[1]) +- **bias**: (channels,) +- **out**: This depends on the `layout` parameter. Output is 4D array of shape + (batch_size, channels, out_height, out_width) if `layout` is `NCHW`. + +)code" NNVM_ADD_FILELINE) +.add_argument("data", "4D Tensor", "Input data.") +.add_argument("weight", "4D Tensor", "Weight matrix.") +.add_argument("bias", "1D Tensor", "Bias parameter.") +.add_arguments(Conv2DParam::__FIELDS__()) +.set_attr_parser(ParamParser) +.set_attr("FGetAttrDict", ParamGetAttrDict) +.set_attr("FListInputNames", UseBiasListInputNames) +.set_attr("FInferShape", Conv2DInferShape) +.set_attr("FInferType", Conv2DInferType) +.set_attr("FCorrectLayout", Conv2DCorrectLayout) +.set_num_outputs(1) +.set_num_inputs(UseBiasNumInputs) +.set_support_level(2) +.set_attr( + "FGradient", [](const NodePtr& n, + const std::vector& ograds) { + return MakeGradNode("_conv2d_grad", n, + {ograds[0], n->inputs[Conv2DParam::kData], + n->inputs[Conv2DParam::kWeight]}, + n->attrs.dict); +}); + +NNVM_REGISTER_OP(_contrib_conv2d_NCHWc) +.describe(R"code(2D convolution layer (e.g. spatial convolution over images). +)code" NNVM_ADD_FILELINE) +.add_argument("data", "5D Tensor", "Packed input data.") +.add_argument("weight", "6D Tensor", "Packed weight matrix.") +.add_argument("bias", "1D Tensor", "Bias parameter.") +.add_arguments(Conv2DParam::__FIELDS__()) +.set_attr_parser(ParamParser) +.set_attr("FGetAttrDict", ParamGetAttrDict) +.set_attr("FListInputNames", UseBiasListInputNames) +.set_attr("FInferShape", Conv2DInferShape) +.set_attr("FInferType", Conv2DInferType) +.set_attr("FCorrectLayout", Conv2DCorrectLayout) +.set_num_outputs(1) +.set_num_inputs(UseBiasNumInputs) +.set_support_level(2); + +NNVM_REGISTER_OP(_contrib_conv2d_winograd_weight_transform) +.describe(R"code(Weight transformation of winograd fast convolution algorithm. +Separate this into another nnvm symbol in order to enable Precompute Pass to compute the +weight transformation in advance. + +- **weight**: (channels, in_channels, kernel_size[0], kernel_size[1]) +)code" NNVM_ADD_FILELINE) +.add_argument("weight", "4D Tensor", "Weight tensor.") +.add_arguments(WinogradWeightTransformParam::__FIELDS__()) +.set_attr_parser(ParamParser) +.set_attr("FGetAttrDict", ParamGetAttrDict) +.set_attr("FInferShape", [](const nnvm::NodeAttrs& attrs, + std::vector *in_shape, + std::vector *out_shape) { + const auto& param = nnvm::get(attrs.parsed); + const TShape &wshape = (*in_shape)[0]; + + CHECK_EQ(wshape.ndim(), 4) << "Weight should be a 4 dimensional tensor"; + + TShape oshape({param.tile_size + wshape[2] - 1, + param.tile_size + wshape[3] - 1, + wshape[0], + wshape[1]}); + NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, oshape); + return true; + }) +.set_attr("FCorrectLayot", [](const NodeAttrs& attrs, + std::vector *ilayouts, + const std::vector *last_ilayouts, + std::vector *olayouts) { + Layout layout("OIHW"); + NNVM_ASSIGN_LAYOUT(*ilayouts, 0, layout); + NNVM_ASSIGN_LAYOUT(*olayouts, 0, layout); + return true; +}) +.set_attr("FInferType", ElemwiseType<1, 1>) +.set_num_outputs(1) +.set_num_inputs(1) +.set_support_level(5); + +DMLC_REGISTER_PARAMETER(WinogradWeightTransformParam); + +NNVM_REGISTER_OP(_contrib_conv2d_winograd_without_weight_transform) +.describe(R"code(Compute conv2d with winograd algorithm. + +- **data**: Input is 4D array of shape (batch_size, in_channels, height, width) +- **weight**: Any shape + We do not check shape for this input tensor. + +- **bias**: (channels,) +- **out**: Output is 4D array of shape (batch_size, channels, out_height, out_width) +)code" NNVM_ADD_FILELINE) +.add_argument("data", "4D Tensor", "Input data.") +.add_argument("weight", "Tensor", "Transformed weight tensor.") +.add_argument("bias", "1D Tensor", "Bias parameter.") +.add_arguments(WinogradConv2DParam::__FIELDS__()) +.set_attr_parser(ParamParser) +.set_attr("FGetAttrDict", ParamGetAttrDict) +.set_attr("FListInputNames", UseBiasListInputNames) +.set_attr("FInferShape", WinogradConv2DInferShape) +.set_attr("FInferType", Conv2DInferType) +.set_attr("FCorrectLayout", Conv2DCorrectLayout) +.set_num_outputs(1) +.set_num_inputs(UseBiasNumInputs) +.set_support_level(5); + +DMLC_REGISTER_PARAMETER(WinogradConv2DParam); + + +inline bool Conv2DWinogradNNPACKWTInferType(const nnvm::NodeAttrs& attrs, + std::vector* in_type, + std::vector* out_type) { + const WinogradNNPACKWeightTransformParam& param = + nnvm::get(attrs.parsed); + + CHECK_EQ(in_type->size(), 1U) << "Input:[weight]"; + CHECK_EQ(out_type->size(), 1U); + + if (param.out_dtype != -1) { + NNVM_ASSIGN_OUTPUT_TYPE(attrs, *out_type, 0, param.out_dtype); + } else { + ElemwiseType<1, 1>(attrs, in_type, out_type); + } + return true; +} + +NNVM_REGISTER_OP(_contrib_conv2d_winograd_nnpack_weight_transform) +.describe(R"code(Weight transformation of winograd fast convolution algorithm. +Separate this into another nnvm symbol in order to enable Precompute Pass to compute the +weight transformation in advance. +- **weight**: (channels, in_channels, kernel_size[0], kernel_size[1]) +)code" NNVM_ADD_FILELINE) +.add_argument("weight", "4D Tensor", "Weight tensor.") +.add_arguments(WinogradNNPACKWeightTransformParam::__FIELDS__()) +.set_attr_parser(ParamParser) +.set_attr("FGetAttrDict", ParamGetAttrDict) +.set_attr("FInferShape", [](const nnvm::NodeAttrs& attrs, + std::vector *in_shape, + std::vector *out_shape) { + const TShape &wshape = (*in_shape)[0]; + CHECK_EQ(wshape.ndim(), 4) << "Weight should be a 4 dimensional tensor"; + TShape oshape({wshape[0], wshape[1], 8, 8}); + NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, oshape); + return true; +}) +.set_attr("FCorrectLayout", [](const NodeAttrs& attrs, + std::vector *ilayouts, + const std::vector *last_ilayouts, + std::vector *olayouts) { + Layout layout("OIHW"); + NNVM_ASSIGN_LAYOUT(*ilayouts, 0, layout); + NNVM_ASSIGN_LAYOUT(*olayouts, 0, layout); + return true; +}) +.set_attr("FInferType", Conv2DWinogradNNPACKWTInferType) +.set_num_outputs(1) +.set_num_inputs(1) +.set_support_level(5); + +DMLC_REGISTER_PARAMETER(WinogradNNPACKWeightTransformParam); + +NNVM_REGISTER_OP(_contrib_conv2d_winograd_nnpack_without_weight_transform) +.describe(R"code(Compute conv2d with winograd nnpack. +- **data**: Input is 4D array of shape (batch_size, in_channels, height, width) +- **weight**: Any shape + We do not check shape for this input tensor. +- **bias**: (channels,) +- **out**: Output is 4D array of shape (batch_size, channels, out_height, out_width) +)code" NNVM_ADD_FILELINE) +.add_argument("data", "4D Tensor", "Input data.") +.add_argument("weight", "4D Tensor", "Transformed weight tensor.") +.add_argument("bias", "1D Tensor", "Bias parameter.") +.add_arguments(Conv2DParam::__FIELDS__()) +.set_attr_parser(ParamParser) +.set_attr("FGetAttrDict", ParamGetAttrDict) +.set_attr("FListInputNames", UseBiasListInputNames) +.set_attr("FInferShape", WinogradConv2DInferShape) +.set_attr("FInferType", Conv2DInferType) +.set_attr("FCorrectLayout", Conv2DCorrectLayout) +.set_num_outputs(1) +.set_num_inputs(UseBiasNumInputs) +.set_support_level(5); + + +NNVM_REGISTER_OP(_conv2d_grad) + .describe(R"code(2D convolution grad. + +)code" NNVM_ADD_FILELINE) +.add_argument("ograd", "4D Tensor", "Output grad.") +.add_argument("data", "4D Tensor", "Input data of conv2d.") +.add_argument("weight", "4D Tensor", "Input weight.") +.set_num_inputs(3) +.set_num_outputs(UseBiasNumInputs) +.set_attr("FListOutputNames", UseBiasListInputNames) +.set_attr_parser(ParamParser) +.set_attr("FGetAttrDict", ParamGetAttrDict) +.set_attr( + "FInferShape", [](const nnvm::NodeAttrs& attrs, + std::vector* in_attrs, + std::vector* out_attrs) { + const Conv2DParam& param = nnvm::get(attrs.parsed); + NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, Conv2DParam::kData, in_attrs->at(1)); + NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, Conv2DParam::kWeight, in_attrs->at(2)); + if (param.use_bias) { + NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, Conv2DParam::kBias, TShape({param.channels})); + } + return true; +}) +.set_attr("FInferType", ElemwiseType<3, -1>) +.set_attr("TIsBackward", true); + + +DMLC_REGISTER_PARAMETER(Conv2DTransposeParam); + +inline bool Conv2DTransposeInferShape(const nnvm::NodeAttrs& attrs, + std::vector* in_shape, + std::vector* out_shape) { + static const Layout kNCHW("NCHW"); + static const Layout kOIHW("OIHW"); + const Conv2DTransposeParam& param = nnvm::get(attrs.parsed); + const Layout layout(param.layout); + const Layout kernel_layout(param.kernel_layout); + if (param.use_bias) { + CHECK_EQ(in_shape->size(), 3U) << "Input:[data, weight, bias]"; + } else { + CHECK_EQ(in_shape->size(), 2U) << "Input:[data, weight]"; + } + CHECK_EQ(out_shape->size(), 1U); + + const TShape& dshape = (*in_shape)[Conv2DTransposeParam::kData]; + if (dshape.ndim() == 0) return false; + TShape dshape_nchw = ConvertLayout(dshape, layout, kNCHW); + + CHECK_EQ(dshape_nchw[1] % param.groups, 0U) + << "input num_filter must divide group size"; + CHECK_EQ(param.channels % param.groups, 0U) + << "output num_filter must divide group size"; + CHECK_EQ(param.kernel_size.ndim(), 2U) + << "incorrect kernel size: " << param.kernel_size; + CHECK_EQ(param.strides.ndim(), 2U) + << "incorrect stride size: " << param.strides; + CHECK_EQ(param.dilation.ndim(), 2U) + << "incorrect dilate size: " << param.dilation; + + TShape wshape({dshape_nchw[1], + param.channels / param.groups, + param.kernel_size[0], + param.kernel_size[1]}); + wshape = ConvertLayout(wshape, kOIHW, kernel_layout); + NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape, Conv2DTransposeParam::kWeight, wshape); + + if (param.use_bias) { + NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape, + Conv2DTransposeParam::kBias, + TShape({param.channels})); + } + // dilation + dim_t dilated_ksize_y = 1 + (param.kernel_size[0] - 1) * param.dilation[0]; + dim_t dilated_ksize_x = 1 + (param.kernel_size[1] - 1) * param.dilation[1]; + // output shape. + TShape oshape({dshape_nchw[0], param.channels, 0, 0}); + oshape[2] = (param.strides[0] * (dshape_nchw[2] - 1) + dilated_ksize_y - + 2 * param.padding[0] + param.output_padding[0]); + + oshape[3] = (param.strides[1] * (dshape_nchw[3] - 1) + dilated_ksize_x - + 2 * param.padding[1] + param.output_padding[1]); + NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, + ConvertLayout(oshape, kNCHW, layout)); + return true; +} + +inline bool Conv2DTransposeCorrectLayout(const NodeAttrs& attrs, + std::vector *ilayouts, + const std::vector *last_ilayouts, + std::vector *olayouts) { + const Conv2DTransposeParam& param = nnvm::get(attrs.parsed); + + const Layout in_layout(param.layout); + + const Layout kernel_layout(param.kernel_layout); + if (param.use_bias) { + CHECK_EQ(ilayouts->size(), 3U) << "Input:[data, weight, bias]"; + NNVM_ASSIGN_LAYOUT(*ilayouts, 0, in_layout); + NNVM_ASSIGN_LAYOUT(*ilayouts, 1, kernel_layout); + NNVM_ASSIGN_LAYOUT(*ilayouts, 2, Layout("C")); + } else { + CHECK_EQ(ilayouts->size(), 2U) << "Input:[data, weight]"; + NNVM_ASSIGN_LAYOUT(*ilayouts, 0, in_layout); + NNVM_ASSIGN_LAYOUT(*ilayouts, 1, kernel_layout); + } + + CHECK_EQ(olayouts->size(), 1U); + NNVM_ASSIGN_LAYOUT(*olayouts, 0, in_layout); + + return true; +} + +NNVM_REGISTER_OP(conv2d_transpose) +.describe(R"code(Transposed 2D convolution layer (sometimes called Deconvolution). + +The need for transposed convolutions generally arises +from the desire to use a transformation going in the opposite direction +of a normal convolution, i.e., from something that has the shape of the +output of some convolution to something that has the shape of its input +while maintaining a connectivity pattern that is compatible with +said convolution. + +- **data**: This depends on the `layout` parameter. Input is 4D array of shape + (batch_size, in_channels, height, width) if `layout` is `NCHW`. +- **weight**: (in_channels, channels, kernel_size[0], kernel_size[1]) +- **bias**: (channels,) +- **out**: This depends on the `layout` parameter. Output is 4D array of shape +v (batch_size, channels, out_height, out_width) if `layout` is `NCHW`. + + out_height and out_width are calculated as:: + out_height = (height-1)*strides[0]-2*padding[0]+kernel_size[0]+output_padding[0] + out_width = (width-1)*strides[1]-2*padding[1]+kernel_size[1]+output_padding[1] + +)code" NNVM_ADD_FILELINE) +.add_argument("data", "4D Tensor", "Input data.") +.add_argument("weight", "4D Tensor", "Weight matrix.") +.add_argument("bias", "1D Tensor", "Bias parameter.") +.add_arguments(Conv2DTransposeParam::__FIELDS__()) +.set_attr_parser(ParamParser) +.set_attr("FGetAttrDict", ParamGetAttrDict) +.set_attr("FListInputNames", UseBiasListInputNames) +.set_attr("FInferShape", Conv2DTransposeInferShape) +.set_attr("FInferType", Conv2DInferType) +.set_attr("FCorrectLayout", Conv2DTransposeCorrectLayout) +.set_num_outputs(1) +.set_num_inputs(UseBiasNumInputs) +.set_support_level(2); + +} // namespace top +} // namespace nnvm diff --git a/nnvm/src/top/nn/nn.cc b/nnvm/src/top/nn/nn.cc index 91b568187026..ec3643bef306 100644 --- a/nnvm/src/top/nn/nn.cc +++ b/nnvm/src/top/nn/nn.cc @@ -683,11 +683,11 @@ NNVM_REGISTER_OP(pad) << "Illegal pad_width"; Array pad_before; for (size_t i = 0; i < pad_width.ndim(); ++i) { - pad_before.push_back(tvm::make_const(tvm::DataType::Int(32), pad_width[i][0])); + pad_before.push_back(tvm::make_const(tvm::Int(32), pad_width[i][0])); } Array pad_after; for (size_t i = 0; i < pad_width.ndim(); ++i) { - pad_after.push_back(tvm::make_const(tvm::DataType::Int(32), pad_width[i][1])); + pad_after.push_back(tvm::make_const(tvm::Int(32), pad_width[i][1])); } return Array{ topi::pad(inputs[0], pad_before, pad_after, tvm::make_const(inputs[0]->dtype, param.pad_value)) }; diff --git a/nnvm/src/top/nn/nn_common.h b/nnvm/src/top/nn/nn_common.h new file mode 100644 index 000000000000..d7ce420b6d94 --- /dev/null +++ b/nnvm/src/top/nn/nn_common.h @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file nn_common.h + * \brief Common utilities for nn ops. + */ +#ifndef NNVM_TOP_NN_NN_COMMON_H_ +#define NNVM_TOP_NN_NN_COMMON_H_ + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace nnvm { +namespace top { + +template +inline uint32_t UseBiasNumInputs(const NodeAttrs& attrs) { + const ParamType& param = get(attrs.parsed); + return param.use_bias ? 3 : 2; +} + +template +inline std::vector UseBiasListInputNames(const NodeAttrs& attrs) { + const ParamType& param = nnvm::get(attrs.parsed); + if (param.use_bias) { + return {"data", "weight", "bias"}; + } else { + return {"data", "weight"}; + } +} + +/*! + * \brief Convert shape in src_layout to shape in dst_layout + * \param src original shape + * \param src_layout layout of original shape + * \param dst_layout target layout + * \return shape in target layout + */ +inline TShape ConvertLayout(TShape src, const Layout& src_layout, const Layout& dst_layout) { + if (src_layout == dst_layout) { + return src; + } else if (!src_layout.defined()) { + LOG(FATAL) << "cannot convert undefined layout to " << dst_layout; + } else if (!dst_layout.defined()) { + LOG(FATAL) << "cannot convert " << src_layout << " to undefined layout"; + } + + CHECK(src_layout.convertible(dst_layout)) << "cannot convert from " + << src_layout << " to " << dst_layout; + + TShape dst(dst_layout.ndim()); + for (size_t i = 0; i < src_layout.ndim(); ++i) { + Layout::LayoutDim src_dim = src_layout[i]; + if (Layout::is_superdim(src_dim)) { + int dst_major_pos = dst_layout.indexof(Layout::to_superdim(src_dim)); + int dst_minor_pos = dst_layout.indexof(Layout::to_subdim(src_dim)); + int src_minor_pos = src_layout.indexof(Layout::to_subdim(src_dim)); + int src_factor = src_layout.subsizeof(src_dim); + int dst_factor = dst_layout.subsizeof(src_dim); + + uint32_t src_dim_size = src[i]; + if (src_minor_pos >= 0) { + CHECK_EQ(src_factor, src[src_minor_pos]) << "src shape " << src + << " does not agree with layout " << src_layout; + src_dim_size *= src_factor; + } + + dst[dst_major_pos] = src_dim_size; + if (dst_minor_pos >= 0) { + CHECK_GT(dst_factor, 0); + CHECK_LE(dst_factor, src_dim_size) << "Converting " << src + << " from " << src_layout + << " to " << dst_layout + << ": cannot split dimension size of " + << src_dim_size << " by " << dst_factor; + dst[dst_major_pos] /= dst_factor; + dst[dst_minor_pos] = dst_factor; + } + } + } + return dst; +} + +} // namespace top +} // namespace nnvm + +#endif // NNVM_TOP_NN_NN_COMMON_H_ diff --git a/nnvm/src/top/nn/pooling.cc b/nnvm/src/top/nn/pooling.cc new file mode 100644 index 000000000000..11ca637d3b06 --- /dev/null +++ b/nnvm/src/top/nn/pooling.cc @@ -0,0 +1,435 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +/*! + * \file pooling.cc + * \brief Property def of pooling operators. + */ +#include +#include +#include +#include +#include +#include +#include "nn_common.h" +#include "../op_common.h" +#include "../elemwise_op_common.h" +#include "topi/nn/pooling.h" + +namespace nnvm { +namespace top { +using namespace tvm; +using namespace nnvm::compiler; + +DMLC_REGISTER_PARAMETER(MaxPool2DParam); + +template +inline bool Pool2DInferShape(const nnvm::NodeAttrs& attrs, + std::vector* in_shape, + std::vector* out_shape) { + const T& param = nnvm::get(attrs.parsed); + CHECK_EQ(in_shape->size(), 1U); + CHECK_EQ(out_shape->size(), 1U); + + TShape dshape = (*in_shape)[0]; + if (dshape.ndim() == 0) return false; + + CHECK_GE(dshape.ndim(), 2U) + << "Pool2D only support input >= 2-D: input must have height and width"; + + Layout layout(param.layout); + CHECK(layout.contains('H') && layout.contains('W') && + !layout.contains('h') && !layout.contains('w')) + << "Invalid layout " << layout + << ". Pool2D layout must have H and W, which cannot be split"; + + const auto hidx = layout.indexof('H'); + const auto widx = layout.indexof('W'); + + dim_t pad_h, pad_w; + if (param.padding.ndim() == 1) { + pad_h = param.padding[0] * 2; + pad_w = param.padding[0] * 2; + } else if (param.padding.ndim() == 2) { + // (top, left) + pad_h = param.padding[0] * 2; + pad_w = param.padding[1] * 2; + } else if (param.padding.ndim() == 4) { + // (top, left, bottom, right) + pad_h = param.padding[0] + param.padding[2]; + pad_w = param.padding[1] + param.padding[3]; + } else { + return false; + } + + TShape oshape = dshape; + CHECK(param.pool_size[0] <= dshape[hidx] + pad_h) + << "pool size (" << param.pool_size[0] << ") exceeds input (" << dshape[hidx] + << " padded to " << (dshape[hidx] + pad_h) << ")"; + CHECK(param.pool_size[1] <= dshape[widx] + pad_w) + << "pool size (" << param.pool_size[1] << ") exceeds input (" << dshape[widx] + << " padded to " << (dshape[widx] + pad_w) << ")"; + + if (!param.ceil_mode) { + oshape[hidx] = ((dshape[hidx] + pad_h - param.pool_size[0]) / + param.strides[0]) + 1; + oshape[widx] = ((dshape[widx] + pad_w - param.pool_size[1]) / + param.strides[1]) + 1; + } else { + oshape[hidx] = ((dshape[hidx] + pad_h - param.pool_size[0] + + param.strides[0] - 1) / param.strides[0]) + 1; + oshape[widx] = ((dshape[widx] + pad_w - param.pool_size[1] + + param.strides[1] - 1) / param.strides[1]) + 1; + } + NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, oshape); + return true; +} + +template +inline bool Pool2DCorrectLayout(const NodeAttrs& attrs, + std::vector *ilayouts, + const std::vector *last_ilayouts, + std::vector *olayouts) { + const T ¶m = nnvm::get(attrs.parsed); + CHECK_EQ(ilayouts->size(), 1); + CHECK_EQ(last_ilayouts->size(), 1); + CHECK_EQ(olayouts->size(), 1); + + Layout input = (*ilayouts)[0]; + const Layout layout(param.layout); + + if (input.defined()) { + CHECK(input.convertible(layout)) << "Invalid input layout " << input; + if (input.indexof('W') != layout.indexof('W') || + input.indexof('H') != layout.indexof('H') || + input.contains('w') || input.contains('h')) { + // as long as the index doesn't change for width and height + // pool2d can keep the input layout. + input = layout; + } + } else { + input = layout; + } + + NNVM_ASSIGN_LAYOUT(*ilayouts, 0, input); + NNVM_ASSIGN_LAYOUT(*olayouts, 0, input); + + return true; +} + +NNVM_REGISTER_OP(max_pool2d) +.describe(R"code(Max pooling operation for one dimensional data. + +- **data**: This depends on the `layout` parameter. Input is 4D array of shape + (batch_size, channels, height, width) if `layout` is `NCHW`. +- **out**: This depends on the `layout` parameter. Output is 4D array of shape + (batch_size, channels, out_height, out_width) if `layout` is `NCHW`. + out_height and out_width are calculated as:: + + out_height = floor((height+padding[0]+padding[2]-pool_size[0])/strides[0])+1 + out_width = floor((width+padding[1]+padding[3]-pool_size[1])/strides[1])+1 + + where padding will be an expanded array based on number of values passed as:: + one int : all sides same padding used. + two int : bottom, right use same as top and left. + four int: padding width in the order of (top, left, bottom, right). + + When `ceil_mode` is `True`, ceil will be used instead of floor in this + equation. + +)code" NNVM_ADD_FILELINE) +.add_argument("data", "4D Tensor", "Input data.") +.add_arguments(MaxPool2DParam::__FIELDS__()) +.set_attr_parser(ParamParser) +.set_attr("FGetAttrDict", ParamGetAttrDict) +.set_num_outputs(1) +.set_num_inputs(1) +.set_attr("FInferShape", Pool2DInferShape) +.set_attr("FInferType", ElemwiseType<1, 1>) +.set_attr("FCorrectLayout", Pool2DCorrectLayout) +.set_attr("FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + const MaxPool2DParam& param = nnvm::get(attrs.parsed); + auto pool_size = ShapeToArray(param.pool_size); + auto strides = ShapeToArray(param.strides); + auto padding = ShapeToArray(param.padding); + auto ceil_mode = param.ceil_mode; + + Layout layout(param.layout); + CHECK(layout.convertible(Layout("NCHW"))) + << "max_pool2d currently only supports layouts that are convertible from NCHW"; + CHECK_EQ(layout.indexof('h'), -1) << "max_pool2d does not support input split on height"; + CHECK_EQ(layout.indexof('w'), -1) << "max_pool2d does not support input split on width"; + + CHECK(inputs[0].ndim() == 4U || inputs[0].ndim() == 5U) + << "Pool2D only support 4-D input (e.g., NCHW)" + << " or 5-D input (last dimension is a split of channel)"; + + if (param.padding.ndim() == 1) { + padding.push_back(padding[0]); + padding.push_back(padding[0]); + padding.push_back(padding[0]); + } else if (param.padding.ndim() == 2) { + padding.push_back(padding[0]); + padding.push_back(padding[1]); + } + + return Array{ + topi::nn::pool(inputs[0], pool_size, strides, padding, + topi::nn::kMaxPool, ceil_mode, layout.name())}; +}) +.set_attr( + "FGradient", [](const NodePtr& n, + const std::vector& ograds) { + return MakeGradNode("_max_pool2d_grad", n, + {ograds[0], n->inputs[0], NodeEntry{n, 0, 0}}, + n->attrs.dict); +}) +.set_support_level(2); + +NNVM_REGISTER_OP(_max_pool2d_grad) + .describe(R"code(Max pooling 2D grad. + +)code" NNVM_ADD_FILELINE) +.add_argument("ograd", "4D Tensor", "Output grad.") +.add_argument("input", "4D Tensor", "Input data of max_pool2d grad.") +.add_argument("output", "4D Tensor", "Output data of max_pool2d grad.") +.set_num_inputs(3) +.set_num_outputs(1) +.set_attr_parser(ParamParser) +.set_attr("FGetAttrDict", ParamGetAttrDict) +.set_attr("FInferShape", AssignOutputAttr) +.set_attr("FInferType", ElemwiseType<3, 1>) +.set_attr("TIsBackward", true); + +DMLC_REGISTER_PARAMETER(AvgPool2DParam); + +NNVM_REGISTER_OP(avg_pool2d) +.describe(R"code(Average pooling operation for one dimensional data. + +- **data**: This depends on the `layout` parameter. Input is 4D array of shape + (batch_size, channels, height, width) if `layout` is `NCHW`. +- **out**: This depends on the `layout` parameter. Output is 4D array of shape + (batch_size, channels, out_height, out_width) if `layout` is `NCHW`. + out_height and out_width are calculated as:: + + out_height = floor((height+padding[0]+padding[2]-pool_size[0])/strides[0])+1 + out_width = floor((width+padding[1]+padding[3]-pool_size[1])/strides[1])+1 + + where padding will be an expanded array based on number of values passed as:: + one int : all sides same padding used. + two int : bottom, right use same as top and left. + four int: padding width in the order of (top, left, bottom, right). + + When `ceil_mode` is `True`, ceil will be used instead of floor in this + equation. + +)code" NNVM_ADD_FILELINE) +.add_argument("data", "4D Tensor", "Input data.") +.add_arguments(AvgPool2DParam::__FIELDS__()) +.set_attr_parser(ParamParser) +.set_attr("FGetAttrDict", ParamGetAttrDict) +.set_attr("FInferShape", Pool2DInferShape) +.set_attr("FInferType", ElemwiseType<1, 1>) +.set_attr("FCorrectLayout", Pool2DCorrectLayout) +.set_attr("FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + const AvgPool2DParam& param = nnvm::get(attrs.parsed); + auto pool_size = ShapeToArray(param.pool_size); + auto strides = ShapeToArray(param.strides); + auto padding = ShapeToArray(param.padding); + auto ceil_mode = param.ceil_mode; + auto count_include_pad = param.count_include_pad; + + Layout layout(param.layout); + CHECK(layout.convertible(Layout("NCHW"))) + << "avg_pool2d currently only supports layouts that are convertible from NCHW"; + CHECK_EQ(layout.indexof('h'), -1) << "avg_pool2d does not support input split on height"; + CHECK_EQ(layout.indexof('w'), -1) << "avg_pool2d does not support input split on width"; + + CHECK(inputs[0].ndim() == 4U || inputs[0].ndim() == 5U) + << "Pool2D only support 4-D input (e.g., NCHW)" + << " or 5-D input (last dimension is a split of channel)"; + + if (param.padding.ndim() == 1) { + padding.push_back(padding[0]); + padding.push_back(padding[0]); + padding.push_back(padding[0]); + } else if (param.padding.ndim() == 2) { + padding.push_back(padding[0]); + padding.push_back(padding[1]); + } + + return Array{ + topi::nn::pool(inputs[0], pool_size, strides, padding, + topi::nn::kAvgPool, ceil_mode, layout.name(), count_include_pad)}; +}) +.set_num_outputs(1) +.set_num_inputs(1) +.set_support_level(2); + + +DMLC_REGISTER_PARAMETER(GlobalPool2DParam); + +inline bool GlobalPool2DInferShape(const nnvm::NodeAttrs& attrs, + std::vector* in_shape, + std::vector* out_shape) { + static const Layout kNCHW("NCHW"); + const GlobalPool2DParam& param = nnvm::get(attrs.parsed); + CHECK_EQ(in_shape->size(), 1U); + CHECK_EQ(out_shape->size(), 1U); + + TShape dshape = (*in_shape)[0]; + if (dshape.ndim() == 0) return false; + + CHECK_GE(dshape.ndim(), 2U) + << "Pool2D only support input >= 2-D: input must have height and width"; + + Layout layout(param.layout); + CHECK(layout.contains('H') && layout.contains('W') && + !layout.contains('h') && !layout.contains('w')) + << "Invalid layout " << layout + << ". Pool2D layout must have H and W, which cannot be split"; + + const auto hidx = layout.indexof('H'); + const auto widx = layout.indexof('W'); + + TShape oshape = dshape; + oshape[hidx] = oshape[widx] = 1; + NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, oshape); + return true; +} + +inline bool GlobalPool2DCorrectLayout(const NodeAttrs& attrs, + std::vector *ilayouts, + const std::vector *last_ilayouts, + std::vector *olayouts) { + const GlobalPool2DParam ¶m = nnvm::get(attrs.parsed); + CHECK_EQ(ilayouts->size(), 1); + CHECK_EQ(last_ilayouts->size(), 1); + CHECK_EQ(olayouts->size(), 1); + + Layout input = (*ilayouts)[0]; + const Layout layout(param.layout); + + if (input.defined()) { + CHECK(input.convertible(layout)) << "Invalid input layout " << input; + if (input.indexof('W') != layout.indexof('W') || + input.indexof('H') != layout.indexof('H') || + input.contains('w') || input.contains('h')) { + // as long as the index doesn't change for width and height + // pool2d can keep the input layout. + input = layout; + } + } else { + input = layout; + } + + NNVM_ASSIGN_LAYOUT(*ilayouts, 0, input); + NNVM_ASSIGN_LAYOUT(*olayouts, 0, input); + + return true; +} + +NNVM_REGISTER_OP(global_max_pool2d) +.describe(R"code(Global max pooling operation for 2D data. + +- **data**: This depends on the `layout` parameter. Input is 4D array of shape + (batch_size, channels, height, width) if `layout` is `NCHW`. +- **out**: This depends on the `layout` parameter. Output is 4D array of shape + (batch_size, channels, 1, 1) if `layout` is `NCHW`. + +)code" NNVM_ADD_FILELINE) +.add_argument("data", "4D Tensor", "Input data.") +.add_arguments(GlobalPool2DParam::__FIELDS__()) +.set_attr_parser(ParamParser) +.set_attr("FGetAttrDict", ParamGetAttrDict) +.set_attr("FInferShape", GlobalPool2DInferShape) +.set_attr("FInferType", ElemwiseType<1, 1>) +.set_attr("FCorrectLayout", GlobalPool2DCorrectLayout) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + const GlobalPool2DParam& param = nnvm::get(attrs.parsed); + Layout layout(param.layout); + CHECK(layout.convertible(Layout("NCHW"))) + << "global_max_pool2d currently only supports layouts that are convertible from NCHW"; + CHECK_EQ(layout.indexof('h'), -1) + << "global_max_pool2d does not support input split on height"; + CHECK_EQ(layout.indexof('w'), -1) + << "global_max_pool2d does not support input split on width"; + + CHECK(inputs[0].ndim() == 4U || inputs[0].ndim() == 5U) + << "Pool2D only support 4-D input (e.g., NCHW)" + << " or 5-D input (last dimension is a split of channel)"; + + return Array{ + topi::nn::global_pool(inputs[0], topi::nn::kMaxPool, layout.name()) }; +}) +.set_num_outputs(1) +.set_num_inputs(1) +.set_support_level(2); + + +NNVM_REGISTER_OP(global_avg_pool2d) +.describe(R"code(Global average pooling operation for 2D data. + +- **data**: This depends on the `layout` parameter. Input is 4D array of shape + (batch_size, channels, height, width) if `layout` is `NCHW`. +- **out**: This depends on the `layout` parameter. Output is 4D array of shape + (batch_size, channels, 1, 1) if `layout` is `NCHW`. + +)code" NNVM_ADD_FILELINE) +.add_argument("data", "4D Tensor", "Input data.") +.add_arguments(GlobalPool2DParam::__FIELDS__()) +.set_attr_parser(ParamParser) +.set_attr("FGetAttrDict", ParamGetAttrDict) +.set_attr("FInferShape", GlobalPool2DInferShape) +.set_attr("FInferType", ElemwiseType<1, 1>) +.set_attr("FCorrectLayout", GlobalPool2DCorrectLayout) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + const GlobalPool2DParam& param = nnvm::get(attrs.parsed); + Layout layout(param.layout); + CHECK(layout.convertible(Layout("NCHW"))) + << "global_avg_pool2d currently only supports layouts that are convertible from NCHW"; + CHECK_EQ(layout.indexof('h'), -1) + << "global_avg_pool2d does not support input split on height"; + CHECK_EQ(layout.indexof('w'), -1) + << "global_avg_pool2d does not support input split on width"; + + CHECK(inputs[0].ndim() == 4U || inputs[0].ndim() == 5U) + << "Pool2D only support 4-D input (e.g., NCHW)" + << " or 5-D input (last dimension is a split of channel)"; + + return Array{ + topi::nn::global_pool(inputs[0], topi::nn::kAvgPool, layout.name()) }; +}) +.set_num_outputs(1) +.set_num_inputs(1) +.set_support_level(2); + +} // namespace top +} // namespace nnvm diff --git a/nnvm/src/top/nn/upsampling.cc b/nnvm/src/top/nn/upsampling.cc new file mode 100644 index 000000000000..68583ae616f2 --- /dev/null +++ b/nnvm/src/top/nn/upsampling.cc @@ -0,0 +1,124 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file upsampling.cc + * \brief Property def of upsampling operators. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include "nn_common.h" +#include "../op_common.h" +#include "../elemwise_op_common.h" +#include "topi/elemwise.h" +#include "topi/transform.h" +#include "topi/nn/upsampling.h" + +namespace nnvm { +namespace top { +using tvm::Expr; +using tvm::Array; +using tvm::Tensor; +using nnvm::compiler::FTVMCompute; + +DMLC_REGISTER_PARAMETER(UpSamplingParam); + +inline bool UpSamplingInferShape(const nnvm::NodeAttrs& attrs, + std::vector* in_shape, + std::vector* out_shape) { + static const Layout kNCHW("NCHW"); + const UpSamplingParam& param = nnvm::get(attrs.parsed); + CHECK_EQ(in_shape->size(), 1U); + CHECK_EQ(out_shape->size(), 1U); + TShape dshape = (*in_shape)[0]; + if (dshape.ndim() == 0) return false; + + dshape = ConvertLayout(dshape, param.layout, kNCHW); + TShape oshape = dshape; + oshape[2] = oshape[2] * param.scale; + oshape[3] = oshape[3] * param.scale; + oshape = ConvertLayout(oshape, kNCHW, param.layout); + NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, oshape); + + return true; +} + +inline bool UpsamplingLayout(const NodeAttrs& attrs, + std::vector *in_layouts, + const std::vector *last_in_layouts, + std::vector *out_layouts) { + const UpSamplingParam& param = nnvm::get(attrs.parsed); + CHECK_EQ(in_layouts->size(), 1U); + CHECK_EQ(out_layouts->size(), 1U); + const Layout layout(param.layout); + NNVM_ASSIGN_LAYOUT(*in_layouts, 0, layout); + NNVM_ASSIGN_LAYOUT(*out_layouts, 0, layout); + return true; +} + +NNVM_REGISTER_OP(upsampling) +.describe(R"(Perform upsampling to input array with nearest neighbour or bilinear interpolation. + +- **data**: data is 4D array of shape + (batch_size, channels, in_height, in_width) for NCHW + (batch_size, in_height, in_width, channels) for NHWC + +- **out**: Output is 4D array of shape + for layout NCHW + (batch_size, channels, in_height*scale, in_width*scale) + + for layout NHWC + (batch_size, in_height*scale, in_width*scale, channels) + +)" NNVM_ADD_FILELINE) +.add_argument("data", "4D Tensor", "Input data.") +.add_arguments(UpSamplingParam::__FIELDS__()) +.set_attr_parser(ParamParser) +.set_attr("FGetAttrDict", ParamGetAttrDict) +.set_attr("FInferShape", UpSamplingInferShape) +.set_attr("FInferType", ElemwiseType<1, 1>) +.set_attr("FCorrectLayout", UpsamplingLayout) +.set_num_outputs(1) +.set_num_inputs(1) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + const UpSamplingParam& param = nnvm::get(attrs.parsed); + Array oshape; + if (param.layout == "NCHW") { + oshape.push_back(out_info[0]->shape[2]); + oshape.push_back(out_info[0]->shape[3]); + } else { + oshape.push_back(out_info[0]->shape[1]); + oshape.push_back(out_info[0]->shape[2]); + } + + return Array{ topi::nn::upsampling(inputs[0], oshape, param.layout, param.method)}; +}) +.set_support_level(2); + +} // namespace top +} // namespace nnvm diff --git a/nnvm/src/top/op_common.h b/nnvm/src/top/op_common.h new file mode 100644 index 000000000000..7213e1c9c116 --- /dev/null +++ b/nnvm/src/top/op_common.h @@ -0,0 +1,351 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file op_common.h + * \brief Common operator utilities + */ +#ifndef NNVM_TOP_OP_COMMON_H_ +#define NNVM_TOP_OP_COMMON_H_ + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace nnvm { +namespace top { +/*! + * \brief Parse keyword arguments as PType arguments and save to parsed + * \tparam PType the parameter type. + * \param attrs The attributes. + */ +template +inline void ParamParser(nnvm::NodeAttrs* attrs) { + PType param; + try { + param.Init(attrs->dict); + } catch (const dmlc::ParamError& e) { + std::ostringstream os; + os << e.what(); + os << ", in operator " << attrs->op->name << "(" + << "name=\"" << attrs->name << "\""; + for (const auto& k : attrs->dict) { + os << ", " << k.first << "=\"" << k.second << "\""; + } + os << ")"; + throw dmlc::ParamError(os.str()); + } + attrs->parsed = std::move(param); +} + +/*! + * \brief Parse keyword arguments as PType arguments and save to parsed + * \tparam PType the arameter type. + * \param attrs The attributes. + */ +template +inline std::unordered_map +ParamGetAttrDict(const nnvm::NodeAttrs& attrs) { + std::unordered_map dict = attrs.dict; + nnvm::get(attrs.parsed).UpdateDict(&dict); + return dict; +} + +/*! \brief check if shape is empty or contains unkown (0) dim. */ +inline bool shape_is_none(const TShape& x) { + return x.ndim() == 0 || x.Size() == 0; +} + +/*! \brief check if type is none (-1) */ +inline bool type_is_none(const int& x) { + return x == -1; +} + +/*! \brief check if shape is scalar({1}). */ +inline bool shape_is_scalar(const TShape& x) { + return x.ndim() == 1 && x.Size() == 1; +} + +/*! \brief get string representation of shape */ +inline std::string shape_string(const TShape& x) { + std::ostringstream os; + os << x; + return os.str(); +} + +/*! \brief get string representation of shape */ +inline std::string type_string(const int& x) { + return std::to_string(x); +} + +/*! + * \brief Assign x to y. Checks for compatiblity when y is not empty. + * Allow missing dim in both x and y (as 0). + * \param y target shape. + * \param x source shape. + * \return whether x and y are compatible. + */ +inline bool shape_assign(TShape *y, const TShape& x) { + if (y->ndim() == 0) { + *y = x; + return true; + } else if (y->ndim() != x.ndim()) { + return x.ndim() == 0; + } else { + for (size_t i = 0; i < y->ndim(); ++i) { + if ((*y)[i] == 0) { + (*y)[i] = x[i]; + } else if ((*y)[i] != x[i] && x[i] != 0) { + return false; + } + } + return true; + } +} + +/*! + * \brief Assign x to y. Checks for compatiblity when y is not -1. + * \param y target type. + * \param x source type. + * \return whether x and y are compatible. + */ +inline bool type_assign(int *y, const int& x) { + if (*y == -1) { + *y = x; + return true; + } else if (*y != x && x != -1) { + return false; + } + return true; +} + +template +inline std::string attr_assign_error_msg(const NodeAttrs& attrs, + int index, bool is_input, + const AttrType& expected, + const AttrType& actual, + const char* attr_name) { + static const auto& flist_inputs = Op::GetAttr("FListInputNames"); + static const auto& flist_outputs = Op::GetAttr("FListOutputNames"); + const auto& flist = is_input ? flist_inputs : flist_outputs; + std::string name; + if (flist.count(attrs.op)) { + name = flist[attrs.op](attrs)[index]; + } else { + name = (is_input ? "data" : "output") + std::to_string(index); + } + std::ostringstream msg; + msg << "Operator " << attrs.op->name << "("; + for (const auto& kv : attrs.dict) msg << kv.first << "=" << kv.second << ", "; + msg << "name=" << attrs.name << ") expects " << name << "\'s " << attr_name + << " to be " << expected << ", but got " << actual << "."; + return msg.str(); +} + +/*! + * \brief macro assign shape to input if out is unknown otherwise check consistency + * Use macro so we can see the error file more clearly + * \param inputs the shape array to store the result + * \param index the index of in the array + * \param shape the inferred shape + */ +#define NNVM_ASSIGN_INPUT_SHAPE(attrs, inputs, index, shape) \ + { \ + if (!shape_assign(&(inputs)[index], TShape(shape))) { \ + LOG(FATAL) << attr_assign_error_msg(attrs, index, true, shape, \ + (inputs)[index], "shape"); \ + } \ + } + +/*! + * \brief macro assign shape to out if out is unknown otherwise check consistency + * Use macro so we can see the error file more clearly + * \param inputs the shape array to store the result + * \param index the index of in the array + * \param shape the inferred shape + */ +#define NNVM_ASSIGN_OUTPUT_SHAPE(attrs, outputs, index, shape) \ + { \ + if (!shape_assign(&(outputs)[index], TShape(shape))) { \ + LOG(FATAL) << attr_assign_error_msg(attrs, index, false, shape, \ + (outputs)[index], "shape"); \ + } \ + } + +/*! + * \brief macro assign type to out if out is unknown (-1) otherwise check consistency + * Use macro so we can see the error file more clearly + * \param inputs the type array to store the result + * \param index the index of in the array + * \param type the inferred type + */ +#define NNVM_ASSIGN_INPUT_TYPE(attrs, inputs, index, type) \ + { \ + if (!type_assign(&(inputs)[index], type)) { \ + LOG(FATAL) << attr_assign_error_msg(attrs, index, true, type, \ + (inputs)[index], "type"); \ + } \ + } + +/*! + * \brief macro assign type to out if out is unknown (-1) otherwise check consistency + * Use macro so we can see the error file more clearly + * \param inputs the type array to store the result + * \param index the index of in the array + * \param type the inferred type + */ +#define NNVM_ASSIGN_OUTPUT_TYPE(attrs, outputs, index, type) \ + { \ + if (!type_assign(&(outputs)[index], type)) { \ + LOG(FATAL) << attr_assign_error_msg(attrs, index, false, type, \ + (outputs)[index], "type"); \ + } \ + } + +#define NNVM_ASSIGN_LAYOUT(outputs, index, layout) \ + { \ + if (layout.defined()) { \ + (outputs)[index] = layout; \ + } \ + } + +/*! + * \brief macro assign rhs shape to lhs + * Use macro so we can see the error file more clearly + * \param lhs lhs shape + * \param rhs rhs shape + */ +#define SHAPE_ASSIGN(lhs, rhs) \ + if ((lhs).ndim() == 0) (lhs) = (rhs); \ + else \ + CHECK_EQ(lhs, rhs) << "shape inference inconsistent"; \ + +/*! + * \brief macro assign rhs type to lhs + * Use macro so we can see the error file more clearly + * \param lhs lhs type + * \param rhs rhs type + */ +#define DTYPE_ASSIGN(lhs, rhs) \ + if ((lhs) == -1) (lhs) = (rhs); \ + else \ + CHECK_EQ(lhs, rhs) << "type inference inconsistent"; \ + +// simply return the shape as same +inline bool SameShape(const NodeAttrs& attrs, + std::vector *ishape, + std::vector *oshape) { + if (ishape->size() == 0 || (*ishape)[0].ndim() == 0) return false; + for (TShape& pshape : *oshape) { + pshape = (*ishape)[0]; + } + for (TShape& pshape : *ishape) { + pshape = (*ishape)[0]; + } + return true; +} + +// return shape from node attrs +template +inline bool ZeroShape(const NodeAttrs& attrs, + std::vector *ishape, + std::vector *oshape) { + const TShape& ts = dmlc::get(attrs.parsed).shape; + if (ts.ndim() != 0) { + SHAPE_ASSIGN(oshape->at(0), ts); + return true; + } else { + return false; + } +} + +// do not infer layout +inline bool ZeroLayout(const NodeAttrs& attrs, + std::vector *in_layouts, + const std::vector *last_in_layouts, + std::vector *out_layouts) { + return true; +} + +// simply assign output shape or type from input +template +inline bool AssignOutputAttr(const NodeAttrs& attrs, + std::vector *in_attrs, + std::vector *out_attrs) { + CHECK_LT(in_index, in_attrs->size()); + CHECK_LT(out_index, out_attrs->size()); + const TShape &dshape = in_attrs->at(in_index); + NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, out_index, dshape); + return true; +} + +// return type from node attrs +template +inline bool ZeroType(const NodeAttrs& attrs, + std::vector *iattr, + std::vector *oattr) { + int dtype = dmlc::get(attrs.parsed).dtype; + DTYPE_ASSIGN(oattr->at(0), dtype); + return true; +} + +// Make zero grad node +inline std::vector MakeZeroGradNodes( + const NodePtr& n, + const std::vector& ograds) { + std::vector ret; + for (uint32_t i = 0; i < n->num_inputs(); ++i) { + std::ostringstream os; + ret.push_back(MakeNode("zeros_like", n->attrs.name + "_zero_grad", + {n->inputs[i]})); + } + return ret; +} + +// Helper to make gradient node +inline std::vector MakeGradNode( + const char* op_name, + const NodePtr& n, + std::vector inputs, + std::unordered_map attr = {{}}) { + NodePtr p = Node::Create(); + p->attrs.op = nnvm::Op::Get(op_name); + p->attrs.name = n->attrs.name + "_grad"; + p->inputs = std::move(inputs); + p->attrs.dict = std::move(attr); + if (p->attrs.op->attr_parser) { + p->attrs.op->attr_parser(&p->attrs); + } + std::vector ret; + for (uint32_t i = 0; i < p->num_outputs(); ++i) { + ret.emplace_back(NodeEntry{p, i, 0}); + } + return ret; +} + + +} // namespace top +} // namespace nnvm + +#endif // NNVM_TOP_OP_COMMON_H_ diff --git a/nnvm/src/top/tensor/elemwise.cc b/nnvm/src/top/tensor/elemwise.cc new file mode 100644 index 000000000000..7a79db041755 --- /dev/null +++ b/nnvm/src/top/tensor/elemwise.cc @@ -0,0 +1,998 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file elemwise.cc + * \brief Elemenwise operators + */ +#include +#include +#include +#include +#include +#include +#include +#include "../op_common.h" +#include "../elemwise_op_common.h" +#include "topi/broadcast.h" +#include "topi/elemwise.h" +#include "topi/tags.h" +#include "../../compiler/compile_engine.h" + +namespace nnvm { +namespace top { + +using namespace tvm; +using namespace nnvm::compiler; + +// undefined op +NNVM_REGISTER_ELEMWISE_UNARY_OP(__undef__) +.describe(R"code(undefined op. + +Used to produce invalide node during optimization. + +)code" NNVM_ADD_FILELINE) +.set_num_outputs(1) +.set_num_inputs(0); + +// floor +NNVM_REGISTER_ELEMWISE_UNARY_OP(floor) +.describe(R"code(Take floor input array, computed element-wise. +)code" NNVM_ADD_FILELINE) +.set_support_level(3) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + return Array{ topi::floor(inputs[0]) }; +}); + +// ceil +NNVM_REGISTER_ELEMWISE_UNARY_OP(ceil) +.describe(R"code(Take ceil input array, computed element-wise. +)code" NNVM_ADD_FILELINE) +.set_support_level(3) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + return Array{ topi::ceil(inputs[0]) }; +}); + +// trunc +NNVM_REGISTER_ELEMWISE_UNARY_OP(trunc) +.describe(R"code(Take truncated value of the input, element-wise. +)code" NNVM_ADD_FILELINE) +.set_support_level(3) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + return Array{ topi::trunc(inputs[0]) }; +}); + +// round +NNVM_REGISTER_ELEMWISE_UNARY_OP(round) +.describe(R"code(Round elements of the input to nearest integer. +)code" NNVM_ADD_FILELINE) +.set_support_level(3) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + return Array{ topi::round(inputs[0]) }; +}); + +// abs +NNVM_REGISTER_ELEMWISE_UNARY_OP(abs) +.describe(R"code(Take absolute value of elements of the input. +)code" NNVM_ADD_FILELINE) +.set_support_level(3) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + return Array{ topi::abs(inputs[0]) }; +}); + +// sigmoid +NNVM_REGISTER_ELEMWISE_UNARY_OP(sigmoid) +.describe(R"code(Computes sigmoid. + +.. math:: + Y = 1 / (1 + exp(-X)) + +)code" NNVM_ADD_FILELINE) +.set_support_level(1) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + return Array{ topi::sigmoid(inputs[0]) }; +}) +.set_attr( + "FGradient", [](const NodePtr& n, + const std::vector& ograds) { + // y = 1 / (1 + exp(-n0)) + // grad_0 = grad_y * y * (1 - y) + NodeEntry sub0 = MakeNode("elemwise_mul", n->attrs.name + "_grad_sub_0", + {ograds[0], NodeEntry{n, 0, 0}}); + NodeEntry sub1 = MakeNode("__rsub_scalar__", n->attrs.name + "_grad_sub_1", + {NodeEntry{n, 0, 0}}, {{"scalar", "1"}}); + return std::vector{ + MakeNode("elemwise_mul", n->attrs.name + "_grad_0", + {sub0, sub1}) + }; +}); + +// tanh +NNVM_REGISTER_ELEMWISE_UNARY_OP(tanh) +.describe(R"code(Computes hyperbolic tangent. + +.. math:: + Y = sinh(X) / cosh(X) + +)code" NNVM_ADD_FILELINE) +.set_support_level(1) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + return Array{ topi::tanh(inputs[0]) }; +}) +.set_attr( + "FGradient", [](const NodePtr& n, + const std::vector& ograds) { + // y = sinh(n0) / cosh(n0) + // grad_0 = grad_y * (1 - y^2) + NodeEntry sub0 = MakeNode("elemwise_mul", n->attrs.name + "_grad_sub_0", + {NodeEntry{n, 0, 0}, NodeEntry{n, 0, 0}}); + NodeEntry sub1 = MakeNode("__rsub_scalar__", n->attrs.name + "_grad_sub_1", + {sub0}, {{"scalar", "1"}}); + return std::vector{ + MakeNode("elemwise_mul", n->attrs.name + "_grad_0", + {ograds[0], sub1}) + }; +}); + +// exp +NNVM_REGISTER_ELEMWISE_UNARY_OP(exp) +.describe(R"code(Returns the exp input array, computed element-wise. + +.. math:: + exp(x) + +)code" NNVM_ADD_FILELINE) +.set_support_level(1) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + return Array{ topi::exp(inputs[0]) }; +}) +.set_attr( + "FGradient", [](const NodePtr& n, + const std::vector& ograds) { + // y = exp(n0) + // grad_0 = grad_y * y + return std::vector{ + MakeNode("elemwise_mul", n->attrs.name + "_grad_0", + {ograds[0], NodeEntry{n, 0, 0}}) + }; +}); + +// log +NNVM_REGISTER_ELEMWISE_UNARY_OP(log) +.describe(R"code(Returns the log input array, computed element-wise. + +.. math:: + log(x) + +)code" NNVM_ADD_FILELINE) +.set_support_level(1) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + return Array{ topi::log(inputs[0]) }; +}) +.set_attr( + "FGradient", [](const NodePtr& n, + const std::vector& ograds) { + // y = log(n0) + // grad_0 = grad_y / n0 + return std::vector{ + MakeNode("elemwise_div", n->attrs.name + "_grad_0", + {ograds[0], n->inputs[0]}) + }; +}); + +// sqrt +NNVM_REGISTER_ELEMWISE_UNARY_OP(sqrt) +.describe(R"code(Returns the sqrt input array, computed element-wise. + +.. math:: + \sqrt(x) + +)code" NNVM_ADD_FILELINE) +.set_support_level(1) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + return Array{ topi::sqrt(inputs[0]) }; +}) +.set_attr( + "FGradient", [](const NodePtr& n, + const std::vector& ograds) { + // y = sqrt(n0) + // grad_0 = grad_y / (2 * y) + NodeEntry sub0 = MakeNode("__mul_scalar__", n->attrs.name + "_grad_sub_0", + {NodeEntry{n, 0, 0}}, {{"scalar", "2"}}); + return std::vector{ + MakeNode("elemwise_div", n->attrs.name + "_grad_0", + {ograds[0], sub0}) + }; +}); + +// binary ops + +NNVM_REGISTER_ELEMWISE_BINARY_OP(elemwise_add) +.describe(R"code(Element-wise add + +)code") +.set_support_level(1) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + return Array{ topi::add(inputs[0], inputs[1]) }; + }) +.set_attr( + "FGradient", [](const NodePtr& n, + const std::vector& ograds){ + // y = n0 + n1 + // grad_0 = grad_y + // grad_1 = grad_y + return std::vector{ MakeNode("copy", n->attrs.name + "_grad_0", + {ograds[0]}), + MakeNode("copy", n->attrs.name + "_grad_0", + {ograds[0]}) }; +}); + +NNVM_REGISTER_ELEMWISE_BINARY_OP(elemwise_sub) +.describe(R"code(Element-wise substraction + +)code" NNVM_ADD_FILELINE) +.set_support_level(1) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + return Array{ topi::subtract(inputs[0], inputs[1]) }; +}) +.set_attr( + "FGradient", [](const NodePtr& n, + const std::vector& ograds){ + // y = n0 - n1 + // grad_0 = grad_y + // grad_1 = - grad_y + return std::vector{ + ograds[0], + MakeNode("negative", n->attrs.name + "_grad_1", {ograds[0]}), + }; +}); + +NNVM_REGISTER_ELEMWISE_BINARY_OP(elemwise_mul) +.describe(R"code(Element-wise multiplication + +)code" NNVM_ADD_FILELINE) +.set_support_level(1) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + return Array{ topi::multiply(inputs[0], inputs[1]) }; +}) +.set_attr( + "FGradient", [](const NodePtr& n, + const std::vector& ograds){ + // y = n0 * n1 + // grad_0 = grad_y * n1 + // grad_1 = grad_y * n0 + return std::vector{ + MakeNode("elemwise_mul", n->attrs.name + "_grad_0", + {ograds[0], n->inputs[1]}), + MakeNode("elemwise_mul", n->attrs.name + "_grad_1", + {ograds[0], n->inputs[0]}) + }; +}); + +NNVM_REGISTER_ELEMWISE_BINARY_OP(elemwise_div) +.describe(R"code(Element-wise division + +)code" NNVM_ADD_FILELINE) +.set_support_level(1) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + return Array{ topi::divide(inputs[0], inputs[1]) }; +}) +.set_attr( + "FGradient", [](const NodePtr& n, + const std::vector& ograds){ + // y = n0 / n1 + // grad_0 = grad_y / n1 + // grad_1 = - grad_y * n0 / n1^2 + NodeEntry sub0 = MakeNode("elemwise_mul", n->attrs.name + "_grad_sub_0", + {ograds[0], n->inputs[0]}); + NodeEntry sub1 = MakeNode("negative", n->attrs.name + "_grad_sub_1", + {sub0}); + NodeEntry sub2 = MakeNode("elemwise_mul", n->attrs.name + "_grad_sub_2", + {n->inputs[1], n->inputs[1]}); + return std::vector{ + MakeNode("elemwise_div", n->attrs.name + "_grad_0", + {ograds[0], n->inputs[1]}), + MakeNode("elemwise_div", n->attrs.name + "_grad_1", + {sub1, sub2}) + }; +}); + +NNVM_REGISTER_ELEMWISE_BINARY_OP(elemwise_mod) + .describe(R"code(Element-wise modulo + +)code" NNVM_ADD_FILELINE) +.set_support_level(1) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + return Array{ topi::mod(inputs[0], inputs[1]) }; +}); + +NNVM_REGISTER_ELEMWISE_BINARY_OP(elemwise_pow) + .describe(R"code(Element-wise power + +)code" NNVM_ADD_FILELINE) +.set_support_level(1) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + return Array{ topi::power(inputs[0], inputs[1]) }; +}); + +// logical +NNVM_REGISTER_ELEMWISE_BINARY_OP(logical_and) +.describe(R"code(Elementwise compute the logical AND + +)code") +.set_support_level(4) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + return Array{ topi::logical_and(inputs[0], inputs[1]) }; +}); + +NNVM_REGISTER_ELEMWISE_BINARY_OP(logical_or) +.describe(R"code(Elementwise compute the logical OR + +)code") +.set_support_level(4) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + return Array{ topi::logical_or(inputs[0], inputs[1]) }; +}); + +// negative +NNVM_REGISTER_ELEMWISE_UNARY_OP(negative) +.describe(R"code(Elemenwise numeric negative + +)code" NNVM_ADD_FILELINE) +.set_support_level(3) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + return Array{ topi::negative(inputs[0]) }; +}) +.set_attr( + "FGradient", [](const NodePtr& n, + const std::vector& ograds){ + // y = - n0 + // grad_0 = - grad_y + return std::vector{ + MakeNode("negative", n->attrs.name + "_grad_0", {ograds[0]}), + }; +}); + +// logical NOT +NNVM_REGISTER_ELEMWISE_UNARY_OP(logical_not) +.describe(R"code(Elementwise compute the logical NOT + +)code" NNVM_ADD_FILELINE) +.set_support_level(4) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + return Array{ topi::logical_not(inputs[0]) }; +}); + +// copy +NNVM_REGISTER_ELEMWISE_UNARY_OP(copy) +.describe(R"code(Copy tensor to another one. + +)code" NNVM_ADD_FILELINE) +.set_support_level(3) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + return Array{ topi::identity(inputs[0]) }; +}) +.set_attr( + "FGradient", [](const NodePtr& n, + const std::vector& ograds){ + // y = copy(n0) + // grad_0 = grad_y + return std::vector{ MakeNode("copy", n->attrs.name + "_grad_0", + {ograds[0]}) }; +}); + +DMLC_REGISTER_PARAMETER(InitOpParam); +DMLC_REGISTER_PARAMETER(InitOpWithScalarParam); +DMLC_REGISTER_PARAMETER(FillValueParam); + +// full +NNVM_REGISTER_INIT_OP(full) +.describe(R"code(Fill array with scalar value + +)code" NNVM_ADD_FILELINE) +.set_attr_parser(ParamParser) +.set_attr( + "FGetAttrDict", ParamGetAttrDict) +.add_arguments(InitOpWithScalarParam::__FIELDS__()) +.set_attr("FInferShape", ZeroShape) +.set_attr("FInferType", ZeroType) +.set_attr("FCorrectLayout", ZeroLayout) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + const InitOpWithScalarParam& param = nnvm::get(attrs.parsed); + Array shape = ShapeToArray(param.shape); + Type dtype = GetTVMType(param.dtype); + Expr fill_value = tvm::make_const(dtype, param.fill_value); + return Array{ topi::full(shape, dtype, fill_value) }; +}) +.set_support_level(4); + +NNVM_REGISTER_INIT_OP(zeros) +.describe(R"code(Fill target with zeros + +)code" NNVM_ADD_FILELINE) +.set_attr_parser(ParamParser) +.set_attr( + "FGetAttrDict", ParamGetAttrDict) +.add_arguments(InitOpParam::__FIELDS__()) +.set_attr("FInferShape", ZeroShape) +.set_attr("FInferType", ZeroType) +.set_attr("FCorrectLayout", ZeroLayout) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + const InitOpParam& param = nnvm::get(attrs.parsed); + Array shape = ShapeToArray(param.shape); + Type dtype = GetTVMType(param.dtype); + Expr fill_value = tvm::make_const(dtype, 0); + return Array{ topi::full(shape, dtype, fill_value) }; +}) +.set_support_level(4); + +NNVM_REGISTER_INIT_OP(ones) +.describe(R"code(Fill target with ones + +)code" NNVM_ADD_FILELINE) +.set_attr_parser(ParamParser) +.set_attr( + "FGetAttrDict", ParamGetAttrDict) +.add_arguments(InitOpParam::__FIELDS__()) +.set_attr("FInferShape", ZeroShape) +.set_attr("FInferType", ZeroType) +.set_attr("FCorrectLayout", ZeroLayout) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + const InitOpParam& param = nnvm::get(attrs.parsed); + Array shape = ShapeToArray(param.shape); + Type dtype = GetTVMType(param.dtype); + Expr fill_value = tvm::make_const(dtype, 1); + return Array{ topi::full(shape, dtype, fill_value) }; +}) +.set_support_level(4); + +// full_like +NNVM_REGISTER_INIT_LIKE_OP(full_like) +.describe(R"code(Return an scalar value array with the same shape and type +as the input array + +)code" NNVM_ADD_FILELINE) +.add_arguments(FillValueParam::__FIELDS__()) +.set_attr_parser(ParamParser) +.set_attr("FGetAttrDict", ParamGetAttrDict) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + const FillValueParam& param = nnvm::get(attrs.parsed); + const Expr fill_value = tvm::make_const(out_info[0]->dtype, param.fill_value); + return Array { topi::full_like(inputs[0], fill_value) }; +}) +.set_support_level(4); + +NNVM_REGISTER_INIT_LIKE_OP(zeros_like) +.describe(R"code(Return an array of zeros with the same shape and type +as the input array. + +)code") +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + return Array { topi::full_like(inputs[0], + tvm::make_const(out_info[0]->dtype, 0)) }; +}) +.set_support_level(4); + +NNVM_REGISTER_INIT_LIKE_OP(ones_like) +.describe(R"code(Return an array of ones with the same shape and type +as the input array. + +)code") +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + return Array { topi::full_like(inputs[0], + tvm::make_const(out_info[0]->dtype, 1)) }; +}) +.set_support_level(4); + +// unary scalar op +DMLC_REGISTER_PARAMETER(ScalarParam); + +#define NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(op) \ + NNVM_REGISTER_ELEMWISE_UNARY_OP(op) \ + .add_arguments(ScalarParam::__FIELDS__()) \ + .set_attr_parser(ParamParser) \ + .set_attr("FGetAttrDict", ParamGetAttrDict) + +inline Tensor binary_scalar_op(const NodeAttrs& attrs, + const Tensor& x, + std::function f) { + const ScalarParam& param = nnvm::get(attrs.parsed); + auto scalar_val = static_cast(param.scalar); + return compute(x->shape, [&](const Array& i) { + auto scalar_const = make_const(x->dtype, scalar_val); + return f(x(i), scalar_const); + }, "tensor", topi::kElementWise); +} + +NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__add_scalar__) +.describe(R"code(Tensor add scalar + +)code" NNVM_ADD_FILELINE) +.set_support_level(3) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + return Array{ binary_scalar_op(attrs, inputs[0], + [](Expr x, Expr y) { return x + y; }) }; +}) +.set_attr( + "FGradient", [](const NodePtr& n, + const std::vector& ograds){ + return std::vector{ MakeNode("copy", n->attrs.name + "_grad_0", + {ograds[0]}) }; +}); + +NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__sub_scalar__) +.describe(R"code(Tensor substract scalar + +)code" NNVM_ADD_FILELINE) +.set_support_level(3) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + return Array{ binary_scalar_op(attrs, inputs[0], + [](Expr x, Expr y) { return x - y; }) }; +}) +.set_attr( + "FGradient", [](const NodePtr& n, + const std::vector& ograds){ + return std::vector{ograds[0]}; +}); + +NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__rsub_scalar__) +.describe(R"code(scalar substract Tensor + +)code" NNVM_ADD_FILELINE) +.set_support_level(3) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + return Array{ binary_scalar_op(attrs, inputs[0], + [](Expr x, Expr y) { return y - x; }) }; +}) +.set_attr( + "FGradient", [](const NodePtr& n, + const std::vector& ograds){ + return std::vector{ + MakeNode("negative", n->attrs.name + "_grad_0", {ograds[0]}) + }; +}); + + +NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__lshift_scalar__) +.describe(R"code(Tensor left shift by scalar + +)code" NNVM_ADD_FILELINE) +.set_support_level(3) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + const ScalarParam& param = nnvm::get(attrs.parsed); + int scalar_val = static_cast(param.scalar); + return Array{ + topi::left_shift(inputs[0], + make_const(inputs[0]->dtype, scalar_val))}; + }); + +NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__rshift_scalar__) +.describe(R"code(Tensor right shift by scalar + +)code" NNVM_ADD_FILELINE) +.set_support_level(3) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + const ScalarParam& param = nnvm::get(attrs.parsed); + int scalar_val = static_cast(param.scalar); + return Array{ + topi::right_shift(inputs[0], + make_const(inputs[0]->dtype, scalar_val))}; + }); + +NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__mul_scalar__) +.describe(R"code(Tensor multiplies scalar + +)code" NNVM_ADD_FILELINE) +.set_support_level(3) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + return Array{ binary_scalar_op(attrs, inputs[0], + [](Expr x, Expr y) { return x * y; }) }; +}) +.set_attr( + "FGradient", [](const NodePtr& n, + const std::vector& ograds){ + // y = n0 * scalar + // grad_0 = grad_y * scalar + return std::vector{ + MakeNode("__mul_scalar__", n->attrs.name + "_grad_0", + {ograds[0]}, {{"scalar", n->attrs.dict["scalar"]}}) + }; +}); + +NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__div_scalar__) +.describe(R"code(Tensor divides scalar + +)code" NNVM_ADD_FILELINE) +.set_support_level(3) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + return Array{ binary_scalar_op(attrs, inputs[0], + [](Expr x, Expr y) { return x / y; }) }; +}) +.set_attr( + "FGradient", [](const NodePtr& n, + const std::vector& ograds){ + // y = n0 / scalar + // grad_0 = grad_y / scalar + return std::vector{ + MakeNode("__div_scalar__", n->attrs.name + "_grad_0", + {ograds[0]}, {{"scalar", n->attrs.dict["scalar"]}}) + }; +}); + +NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__rdiv_scalar__) +.describe(R"code(scalar divides Tensor + +)code" NNVM_ADD_FILELINE) +.set_support_level(3) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + return Array{ binary_scalar_op(attrs, inputs[0], + [](Expr x, Expr y) { return y / x; }) }; +}) +.set_attr( + "FGradient", [](const NodePtr& n, + const std::vector& ograds){ + // y = scalar / n0 + // grad_0 = - grad_y * scalar / n0^2 + NodeEntry sub0 = MakeNode("__mul_scalar__", n->attrs.name + "_grad_sub_0", + {ograds[0]}, + {{"scalar", n->attrs.dict["scalar"]}}); + NodeEntry sub1 = MakeNode("negative", n->attrs.name + "_grad_sub_1", + {sub0}); + NodeEntry sub2 = MakeNode("elemwise_mul", n->attrs.name + "_grad_sub_2", + {n->inputs[0], n->inputs[0]}); + return std::vector{ + MakeNode("elemwise_div", n->attrs.name + "_grad_0", + {sub1, sub2}) + }; +}); + +NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__pow_scalar__) +.describe(R"code(Tensor power scalar + +)code" NNVM_ADD_FILELINE) +.set_support_level(3) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + return Array{ binary_scalar_op(attrs, inputs[0], + [](Expr x, Expr y) { return tvm::pow(x, y); }) }; +}) +.set_attr( + "FGradient", [](const NodePtr& n, + const std::vector& ograds){ + // y = n0^scalar + // grad_0 = grad_y * scalar * n0^(scalar - 1) + double scalar = std::stod(n->attrs.dict["scalar"]); + NodeEntry sub0 = MakeNode("__pow_scalar__", n->attrs.name + "_grad_sub_0", + {n->inputs[0]}, + {{"scalar", std::to_string(scalar - 1)}}); + NodeEntry sub1 = MakeNode("__mul_scalar__", n->attrs.name + "_grad_sub_1", + {ograds[0]}, + {{"scalar", std::to_string(scalar)}}); + return std::vector{ + MakeNode("elemwise_mul", n->attrs.name + "_grad_0", + {sub0, sub1}) + }; +}); + +NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__rpow_scalar__) +.describe(R"code(scalar power Tensor + +)code" NNVM_ADD_FILELINE) +.set_support_level(3) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + return Array{ binary_scalar_op(attrs, inputs[0], + [](Expr x, Expr y) { return tvm::pow(y, x); }) }; +}) +.set_attr( + "FGradient", [](const NodePtr& n, + const std::vector& ograds){ + // y = scalar^n0 + // grad_0 = grad_y * scalar^n0 * log(scalar) + double num = std::stod(n->attrs.dict["scalar"]); + NodeEntry sub0 = MakeNode("__mul_scalar__", n->attrs.name + "_grad_sub_0", + {NodeEntry{n, 0, 0}}, + {{"scalar", std::to_string(std::log(num))}}); + return std::vector{ + MakeNode("__mul_symbol__", n->attrs.name + "_grad_0", + {ograds[0], sub0}) + }; +}); + +DMLC_REGISTER_PARAMETER(ElementWiseReduceParam); + +NNVM_REGISTER_ELEMWISE_REDUCE_OP(elemwise_sum) +.describe(R"code(Adds all input arguments element-wise. + +)code" NNVM_ADD_FILELINE) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + const ElementWiseReduceParam& param = nnvm::get(attrs.parsed); + CHECK_EQ(param.num_args, inputs.size()) << """Compute definition of elemwise sum"""; + return Array{ topi::elemwise_sum(inputs) }; +}) +.set_attr( + "FGradient", [](const NodePtr& n, + const std::vector& ograds){ + CHECK_EQ(ograds.size(), 1); + std::vector ret; + for (size_t i = 0; i < n->inputs.size(); i++) { + ret.push_back(MakeNode("copy", n->attrs.name + "_grad_0", {ograds[0]})); + } + return ret; + }) +.set_support_level(4); + +NNVM_REGISTER_ELEMWISE_UNARY_OP(block_grad) +.describe(R"code(Blocks gradient computation for input. + +)code" NNVM_ADD_FILELINE) +.set_attr( + "FInplaceIdentity", [](const NodeAttrs& attrs){ + return std::vector{true}; +}) +.set_attr("FGradient", MakeZeroGradNodes) +.set_support_level(4); + +DMLC_REGISTER_PARAMETER(IndicatorParam); + +// indicator function +NNVM_REGISTER_INDICATOR_OP(greater) +.describe(R"code(Greater function that returns a mask tensor +with 1.0 if (left > right), otherwise 0.0 element-wise. + +)code" NNVM_ADD_FILELINE) +.add_argument("lhs", "Tensor", "First input") +.add_argument("rhs", "Tensor", "Second input") +.set_num_inputs(2) +.set_attr("FInferShape", ElemwiseShape<2, 1>) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + return Array{ topi::cast(topi::greater(inputs[0], inputs[1]), out_info[0]->dtype) }; +}) +.set_support_level(4); + + +NNVM_REGISTER_INDICATOR_OP(less) + .describe(R"code(Less function that returns a mask tensor +with 1.0 if (left < right), otherwise 0.0 element-wise. + +)code" NNVM_ADD_FILELINE) +.add_argument("lhs", "Tensor", "First input") +.add_argument("rhs", "Tensor", "Second input") +.set_num_inputs(2) +.set_attr("FInferShape", ElemwiseShape<2, 1>) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + return Array{ topi::cast(topi::less(inputs[0], inputs[1]), out_info[0]->dtype) }; +}) +.set_support_level(4); + +NNVM_REGISTER_INDICATOR_OP(_max_mask) + .describe(R"code(Function that returns a mask tensor +with 1.0 if the value is maximum over given axes, otherwise 0.0 element-wise. + +)code" NNVM_ADD_FILELINE) +.add_argument("data", "Tensor", "Input") +.set_num_inputs(1) +.add_arguments(IndicatorParam::__FIELDS__()) +.set_attr_parser(ParamParser) +.set_attr("FGetAttrDict", ParamGetAttrDict) +.set_attr("FInferShape", ElemwiseShape<1, 1>) +.set_support_level(1); + +NNVM_REGISTER_INDICATOR_OP(_min_mask) + .describe(R"code(Function that returns a mask tensor +with 1.0 if the value is minimum over given axes, otherwise 0.0 element-wise. + +)code" NNVM_ADD_FILELINE) +.add_argument("data", "Tensor", "Input") +.set_num_inputs(1) +.add_arguments(IndicatorParam::__FIELDS__()) +.set_attr_parser(ParamParser) +.set_attr("FGetAttrDict", ParamGetAttrDict) +.set_attr("FInferShape", ElemwiseShape<1, 1>) +.set_support_level(1); + + +DMLC_REGISTER_PARAMETER(ClipParam); + +NNVM_REGISTER_OP(clip) +.describe(R"doc(Clips (limits) the values in an array. +Given an interval, values outside the interval are clipped to the interval edges. +Clipping ``x`` between `a_min` and `a_x` would be:: + clip(x, a_min, a_max) = max(min(x, a_max), a_min)) +Example:: + x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + clip(x,1,8) = [ 1., 1., 2., 3., 4., 5., 6., 7., 8., 8.] +)doc" NNVM_ADD_FILELINE) +.set_num_inputs(1) +.set_num_outputs(1) +.set_attr_parser(ParamParser) +.set_attr("FGetAttrDict", ParamGetAttrDict) +.set_attr("FInferShape", ElemwiseShape<1, 1>) +.set_attr("FInferType", ElemwiseType<1, 1>) +.set_attr("FCorrectLayout", ElemwiseFixedLayoutUnknownOut<1, 1>) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + const ClipParam params = get(attrs.parsed); + return Array{ + topi::clip(inputs[0], tvm::make_const(tvm::Float(32), params.a_min), + tvm::make_const(tvm::Float(32), params.a_max)) }; + }) +.add_argument("data", "NDArray-or-Symbol", "Input array.") +.add_arguments(ClipParam::__FIELDS__()) +.set_attr( + "FGradient", [](const NodePtr& n, + const std::vector& ograds){ + // y = clip(x, a_min, a_max) + // min_mask = greater_equal(x, a_min*ones_like(x)) + // => ones_like(x) - less(x, a_min) + // max_mask = less_equal(x, a_max*ones_like(x)) + // => ones_like(x) - greater(x, a_max) + // grad_x = min_mask * max_mask * grad_y + CHECK_EQ(ograds.size(), 1); + + NodeEntry sub0 = MakeNode("ones_like", n->attrs.name + "_grad_sub_0", + {n->inputs[0]}); + // min_mask + NodeEntry sub1 = MakeNode("__mul_scalar__", n->attrs.name + "_grad_sub_1", + {sub0}, {{"scalar", n->attrs.dict["a_min"]}}); + NodeEntry sub2 = MakeNode("less", n->attrs.name + "_grad_sub_2", + {n->inputs[0], sub1}); + NodeEntry sub3 = MakeNode("elemwise_sub", n->attrs.name + "_grad_sub_3", + {sub0, sub2}); + + // max_mask + NodeEntry sub4 = MakeNode("__mul_scalar__", n->attrs.name + "_grad_sub_4", + {sub0}, {{"scalar", n->attrs.dict["a_max"]}}); + NodeEntry sub5 = MakeNode("greater", n->attrs.name + "_grad_sub_5", + {n->inputs[0], sub4}); + NodeEntry sub6 = MakeNode("elemwise_sub", n->attrs.name + "_grad_sub_6", + {sub0, sub5}); + + // min_mask * max_mask + NodeEntry sub7 = MakeNode("elemwise_mul", n->attrs.name + "_grad_sub_7", + {sub3, sub6}); + return std::vector{ + MakeNode("elemwise_mul", n->attrs.name + "_grad", + {sub7, ograds[0]}) + }; + }) +.set_support_level(4); + +} // namespace top +} // namespace nnvm diff --git a/nnvm/src/top/tensor/matrix_op.cc b/nnvm/src/top/tensor/matrix_op.cc new file mode 100644 index 000000000000..b1810f40de20 --- /dev/null +++ b/nnvm/src/top/tensor/matrix_op.cc @@ -0,0 +1,195 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file matrix_op.cc + * \brief Matrix operators + */ +#include +#include +#include +#include +#include +#include +#include "../op_common.h" +#include "../elemwise_op_common.h" + +namespace nnvm { +namespace top { + +using namespace nnvm::compiler; + +DMLC_REGISTER_PARAMETER(MatMulParam); + +inline bool DotShape(const nnvm::NodeAttrs& attrs, + std::vector *in_attrs, + std::vector *out_attrs) { + const MatMulParam& param = nnvm::get(attrs.parsed); + CHECK_EQ(in_attrs->size(), 2U); + CHECK_EQ(out_attrs->size(), 1U); + TShape lshape = (*in_attrs)[0]; + TShape rshape = (*in_attrs)[1]; + + if (lshape.ndim() == 1) lshape = TShape{1, lshape[0]}; + if (rshape.ndim() == 1) rshape = TShape{1, rshape[0]}; + + if (param.transpose_a) std::reverse(lshape.begin(), lshape.end()); + if (param.transpose_b) std::reverse(rshape.begin(), rshape.end()); + + CHECK_EQ(lshape[lshape.ndim() - 1], rshape[0]) + << "dot shape inconsistent: " << lshape << " X " << rshape; + + TShape oshape(lshape.ndim() + rshape.ndim() - 2); + for (uint32_t i = 0; i < lshape.ndim() - 1; i++) oshape[i] = lshape[i]; + for (uint32_t i = 1; i < rshape.ndim(); i++) oshape[i + lshape.ndim() - 2] = rshape[i]; + + NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, oshape); + return true; +} + +inline bool DotCorrectLayout(const NodeAttrs& attrs, + std::vector *ilayouts, + const std::vector *last_ilayouts, + std::vector *olayouts) { + const MatMulParam& param = nnvm::get(attrs.parsed); + CHECK_EQ(ilayouts->size(), 2U); + CHECK_EQ(olayouts->size(), 1U); + const Layout& lhs = last_ilayouts->at(0).defined() ? last_ilayouts->at(0) + : ilayouts->at(0); + const Layout& rhs = last_ilayouts->at(1).defined() ? last_ilayouts->at(1) + : ilayouts->at(1); + NNVM_ASSIGN_LAYOUT(*ilayouts, 0, lhs); + NNVM_ASSIGN_LAYOUT(*ilayouts, 1, rhs); + + if (lhs.ndim() > 1 && rhs.ndim() > 1) { + // concat lhs and rhs layout + const Layout& lhs_out = param.transpose_a ? lhs.reverse() : lhs; + const Layout& rhs_out = param.transpose_b ? rhs.reverse() : rhs; + Layout out = lhs_out.sublayout(0, lhs_out.ndim()-1) + + rhs_out.sublayout(1, rhs_out.ndim()-1); + NNVM_ASSIGN_LAYOUT(*olayouts, 0, out); + } + return true; +} + +NNVM_REGISTER_OP(matmul) +.describe(R"doc(Matrix multiplication of two arrays. + +``dot``'s behavior depends on the input array dimensions: + +- 1-D arrays: inner product of vectors +- 2-D arrays: matrix multiplication +- N-D arrays: a sum product over the last axis of the first input and the first + axis of the second input + + For example, given 3-D ``x`` with shape `(n,m,k)` and ``y`` with shape `(k,r,s)`, the + result array will have shape `(n,m,r,s)`. It is computed by:: + + dot(x,y) = sum(x[i,j,:]*y[:,a,b]) + +)doc" NNVM_ADD_FILELINE) +.set_support_level(1) +.set_num_inputs(2) +.set_num_outputs(1) +.set_attr_parser(ParamParser) +.set_attr("FGetAttrDict", ParamGetAttrDict) +.add_arguments(MatMulParam::__FIELDS__()) +.add_argument("lhs", "NDArray-or-Symbol", "The first input") +.add_argument("rhs", "NDArray-or-Symbol", "The second input") +.set_attr("FInferShape", DotShape) +.set_attr("FInferType", ElemwiseType<2, 1>) +.set_attr("FCorrectLayout", DotCorrectLayout) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + const MatMulParam& param = nnvm::get(attrs.parsed); + return Array{ + topi::matmul(inputs[0], inputs[1], param.transpose_a, param.transpose_b) + }; + }) +.set_attr( + "FGradient", [](const NodePtr& n, + const std::vector& ograds) { + // z = x dot y + // xshape (n,m,k), yshape (k,r,s) + const MatMulParam& param = nnvm::get(n->attrs.parsed); + bool Ta = param.transpose_a; + bool Tb = param.transpose_b; + // Ta = false, Tb = false + // grad_x = grad_z dot y.T + // grad_y = x.T dot grad_z + if (!Ta && !Tb) { + return std::vector{ + MakeNode("matmul", n->attrs.name + "_grad_0", + {ograds[0], n->inputs[1]}, + {{"transpose_a", "false"}, + {"transpose_b", "true"}}), + MakeNode("matmul", n->attrs.name + "_grad_1", + {n->inputs[0], ograds[0]}, + {{"transpose_a", "true"}, + {"transpose_b", "false"}}) + }; + } else if (Ta && !Tb) { + // Ta = true, Tb = false + // grad_x = y dot grad_z.T + // grad_y = x dot grad_z + return std::vector{ + MakeNode("matmul", n->attrs.name + "_grad_0", + {n->inputs[1], ograds[0]}, + {{"transpose_a", "false"}, + {"transpose_b", "true"}}), + MakeNode("matmul", n->attrs.name + "_grad_1", + {n->inputs[0], ograds[0]}, + {{"transpose_a", "false"}, + {"transpose_b", "false"}}) + }; + } else if (!Ta && Tb) { + // Ta = false, Tb = true + // grad_x = grad_z dot y + // grad_y = grad_z.T dot x + return std::vector{ + MakeNode("matmul", n->attrs.name + "_grad_0", + {ograds[0], n->inputs[1]}, + {{"transpose_a", "false"}, + {"transpose_b", "false"}}), + MakeNode("matmul", n->attrs.name + "_grad_1", + {ograds[0], n->inputs[0]}, + {{"transpose_a", "true"}, + {"transpose_b", "false"}}) + }; + } else { + // Ta = true, Tb = true + // grad_x = y.T dot grad_z.T + // grad_y = grad_z.T dot x.T + return std::vector{ + MakeNode("matmul", n->attrs.name + "_grad_0", + {n->inputs[1], ograds[0]}, + {{"transpose_a", "true"}, + {"transpose_b", "true"}}), + MakeNode("matmul", n->attrs.name + "_grad_1", + {ograds[0], n->inputs[0]}, + {{"transpose_a", "true"}, + {"transpose_b", "true"}}) + }; + } +}); + +} // namespace top +} // namespace nnvm diff --git a/nnvm/src/top/tensor/reduce.cc b/nnvm/src/top/tensor/reduce.cc new file mode 100644 index 000000000000..dd8e23cf6fe9 --- /dev/null +++ b/nnvm/src/top/tensor/reduce.cc @@ -0,0 +1,411 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file reduce.cc + * \brief reduce operator. + */ +#include +#include +#include +#include +#include +#include +#include +#include "../op_common.h" +#include "../elemwise_op_common.h" +#include "topi/detail/constant_utils.h" +#include "topi/elemwise.h" +#include "topi/reduction.h" +#include "topi/transform.h" + +namespace nnvm { +namespace top { +using namespace tvm; +using namespace nnvm::compiler; + + +// reduce +DMLC_REGISTER_PARAMETER(ReduceParam); + +inline TShape GetReduceAxes(const uint32_t indim, + const TShape& axis, + bool exclude) { + if (axis.ndim() == 0) { + TShape r_axes(indim); + std::iota(r_axes.begin(), r_axes.end(), 0); + return r_axes; + } + + CHECK_LT(axis[axis.ndim() - 1], indim) + << "Reduction axis " << axis[axis.ndim() - 1] + << " exceeds input dimensions " << indim; + + TShape in_axis = axis; + for (auto& i : in_axis) { + i = i < 0 ? i + indim : i; + CHECK_GE(i, 0) << "axis out of bounds in reduce operator"; + CHECK_LT(i, indim) << "axis out of bounds in reduce operator"; + } + std::sort(in_axis.begin(), in_axis.end()); + if (!exclude) return in_axis; + TShape r_axis(indim - in_axis.ndim()); + for (unsigned i = 0, j = 0, k = 0; i < indim; ++i) { + if (j < in_axis.ndim() && i == in_axis[j]) { + ++j; + continue; + } + r_axis[k++] = i; + } + return r_axis; +} + +inline TShape ReduceShapeImpl(const TShape& ishape, + const TShape& axis, + bool keepdims, + bool exclude) { + uint32_t indim = ishape.ndim(); + TShape r_axes = GetReduceAxes(indim, axis, exclude); + if (!r_axes.ndim()) return ishape; + if (r_axes.ndim() == indim) + return TShape(keepdims ? indim : 1); + + CHECK(r_axes.ndim() < indim); + if (keepdims) { + TShape oshape(ishape); + for (unsigned i = 0, j = 0; i < indim; ++i) { + if (j >= r_axes.ndim() || i != r_axes[j]) continue; + oshape[i] = 1; + ++j; + } + return oshape; + } + + TShape oshape(indim - r_axes.ndim()); + for (unsigned i = 0, j = 0, k = 0; i < indim; ++i) { + if (j < r_axes.ndim() && i == r_axes[j]) { + ++j; + continue; + } + oshape[k++] = ishape[i]; + } + return oshape; +} + +inline bool ReduceShape(const nnvm::NodeAttrs& attrs, + std::vector* in_attrs, + std::vector* out_attrs) { + CHECK_EQ(in_attrs->size(), 1U); + CHECK_EQ(out_attrs->size(), 1U); + if ((*in_attrs)[0].ndim() == 0) return false; + const ReduceParam& param = nnvm::get(attrs.parsed); + NNVM_ASSIGN_OUTPUT_SHAPE( + attrs, *out_attrs, 0, + ReduceShapeImpl((*in_attrs)[0], param.axis, + param.keepdims, param.exclude)); + return true; +} + +inline bool CollapseShape(const nnvm::NodeAttrs& attrs, + std::vector* in_attrs, + std::vector* out_attrs) { + CHECK_EQ(in_attrs->size(), 2U); + CHECK_EQ(out_attrs->size(), 1U); + if ((*in_attrs)[0].ndim() == 1) return false; + NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, (*in_attrs)[1]); + return true; +} + +template +inline void AxesParamParser(nnvm::NodeAttrs* attrs) { + PType param; + param.Init(attrs->dict); + std::sort(¶m.axis[0], ¶m.axis[param.axis.ndim()]); + attrs->parsed = std::move(param); +} + +#define NNVM_REGISTER_BASE_REDUCE_OP(op) \ + NNVM_REGISTER_OP(op) \ + .add_arguments(ReduceParam::__FIELDS__()) \ + .set_attr_parser(AxesParamParser) \ + .set_attr("FGetAttrDict", ParamGetAttrDict) \ + .set_num_outputs(1) + +#define NNVM_REGISTER_REDUCE_OP(op) \ + NNVM_REGISTER_BASE_REDUCE_OP(op) \ + .add_argument("data", "Tensor", "The input") \ + .set_attr("FInferShape", ReduceShape) \ + .set_attr("FInferType", ElemwiseType<1, 1>) \ + .set_attr("FCorrectLayout", \ + ElemwiseFixedLayoutUnknownOut<1, 1>) \ + .set_num_inputs(1) + +NNVM_REGISTER_REDUCE_OP(sum) +.describe(R"code(Computes the sum of array elements over given axes. + +Example:: + + data = [[[1,2],[2,3],[1,3]], + [[1,4],[4,3],[5,2]], + [[7,1],[7,2],[7,3]]] + + sum(data, axis=1) + [[ 4. 8.] + [ 10. 9.] + [ 21. 6.]] + + sum(data, axis=[1,2]) + [ 12. 19. 27.] + +)code" NNVM_ADD_FILELINE) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + const ReduceParam& param = nnvm::get(attrs.parsed); + TShape r_axes = GetReduceAxes(inputs[0]->shape.size(), + param.axis, param.exclude); + if (!r_axes.ndim()) return Array { topi::identity(inputs[0]) }; + auto axis = ShapeToIntArray(r_axes); + return Array{ + topi::sum(inputs[0], axis, param.keepdims, true) }; +}) +.set_attr( + "FGradient", [](const NodePtr& n, + const std::vector& ograds){ + const ReduceParam& param = nnvm::get(n->attrs.parsed); + bool exclude = param.exclude; + TShape p_axis = param.axis; + if (!param.exclude && param.axis.ndim() == 0) { + exclude = true; + p_axis = TShape(); + } + std::ostringstream axis; axis << p_axis; + return std::vector{ + MakeNode("expand_like", n->attrs.name + "_grad", + {ograds[0], n->inputs[0]}, + {{"axis", axis.str()}, + {"exclude", std::to_string(exclude)}}) + }; +}); + +NNVM_REGISTER_REDUCE_OP(max) +.describe(R"code(Computes the max of array elements over given axes. + +)code" NNVM_ADD_FILELINE) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + const ReduceParam& param = nnvm::get(attrs.parsed); + TShape r_axes = GetReduceAxes(inputs[0]->shape.size(), + param.axis, param.exclude); + auto axis = ShapeToIntArray(r_axes); + return Array{ + topi::max(inputs[0], axis, param.keepdims, true) }; +}) +.set_attr( + "FGradient", [](const NodePtr& n, + const std::vector& ograds){ + const ReduceParam& param = nnvm::get(n->attrs.parsed); + std::ostringstream axis; axis << param.axis; + NodeEntry sub0 = MakeNode("expand_like", n->attrs.name + "_grad_sub0", + {ograds[0], n->inputs[0]}, + {{"axis", axis.str()}, + {"exclude", std::to_string(param.exclude)}}); + NodeEntry sub1 = MakeNode("_max_mask", n->attrs.name + "_grad_sub1", + {ograds[0]}, + {{"axis", axis.str()}, + {"exclude", std::to_string(param.exclude)}}); + return std::vector{ + MakeNode("elemwise_mul", n->attrs.name + "_grad", {sub0, sub1}) + }; +}); + +NNVM_REGISTER_REDUCE_OP(min) +.describe(R"code(Computes the min of array elements over given axes. + +)code" NNVM_ADD_FILELINE) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + const ReduceParam& param = nnvm::get(attrs.parsed); + TShape r_axes = GetReduceAxes(inputs[0]->shape.size(), + param.axis, param.exclude); + auto axis = ShapeToIntArray(r_axes); + return Array{ + topi::min(inputs[0], axis, param.keepdims, true) }; +}) +.set_attr( + "FGradient", [](const NodePtr& n, + const std::vector& ograds){ + const ReduceParam& param = nnvm::get(n->attrs.parsed); + std::ostringstream axis; axis << param.axis; + NodeEntry sub0 = MakeNode("expand_like", n->attrs.name + "_grad_sub0", + {ograds[0], n->inputs[0]}, + {{"axis", axis.str()}, + {"exclude", std::to_string(param.exclude)}}); + NodeEntry sub1 = MakeNode("_min_mask", n->attrs.name + "_grad_sub1", + {ograds[0]}, + {{"axis", axis.str()}, + {"exclude", std::to_string(param.exclude)}}); + return std::vector{ + MakeNode("elemwise_mul", n->attrs.name + "_grad", {sub0, sub1}) + }; +}); + +NNVM_REGISTER_BASE_REDUCE_OP(collapse_sum) +.add_argument("data", "Tensor", "The input") +.add_argument("as", "Tensor", "The reference") +.set_attr("FInferShape", CollapseShape) +.set_attr("FInferType", ElemwiseType<2, 1>) +.set_attr("FCorrectLayout", ElemwiseFixedLayoutUnknownOut<2, 1>) +.set_num_inputs(2) +.describe(R"code(Reduces lhs to the shape of rhs via sum)code" NNVM_ADD_FILELINE) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + return Array{ topi::collapse_sum(inputs[0], inputs[1]->shape) }; +}); + +inline bool InferFixedType(const NodeAttrs& attrs, + std::vector* in_attrs, + std::vector* out_attrs) { + CHECK_EQ(in_attrs->size(), 1U); + CHECK_EQ(out_attrs->size(), 1U); + const ReduceParam& param = nnvm::get(attrs.parsed); + NNVM_ASSIGN_OUTPUT_TYPE(attrs, *out_attrs, 0, param.dtype); + return true; +} + +NNVM_REGISTER_BASE_REDUCE_OP(argmax) +.describe(R"code(Creates an operation that finds the indices of the maximum +values over a given axis. + +)code" NNVM_ADD_FILELINE) +.add_argument("data", "Tensor", "The input") +.set_attr("FInferShape", ReduceShape) +.set_attr("FInferType", InferFixedType) +.set_attr("FCorrectLayout", ElemwiseFixedLayoutUnknownOut<1, 1>) +.set_num_inputs(1) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + const ReduceParam& param = nnvm::get(attrs.parsed); + TShape r_axes = GetReduceAxes(inputs[0]->shape.size(), + param.axis, param.exclude); + auto axis = ShapeToIntArray(r_axes); + Tensor out = topi::argmax(inputs[0], axis, param.keepdims, true); + if (param.dtype == kFloat32) out = topi::cast(out, out_info[0]->dtype); + return Array{out}; +}); + +NNVM_REGISTER_BASE_REDUCE_OP(argmin) +.describe(R"code(Creates an operation that finds the indices of the minimum +values over a given axis. + +)code" NNVM_ADD_FILELINE) +.add_argument("data", "Tensor", "The input") +.set_attr("FInferShape", ReduceShape) +.set_attr("FInferType", InferFixedType) +.set_attr("FCorrectLayout", ElemwiseFixedLayoutUnknownOut<1, 1>) +.set_num_inputs(1) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + const ReduceParam& param = nnvm::get(attrs.parsed); + TShape r_axes = GetReduceAxes(inputs[0]->shape.size(), + param.axis, param.exclude); + auto axis = ShapeToIntArray(r_axes); + Tensor out = topi::argmin(inputs[0], axis, param.keepdims, true); + if (param.dtype == kFloat32) out = topi::cast(out, out_info[0]->dtype); + return Array{out}; +}); + +NNVM_REGISTER_REDUCE_OP(mean) + .describe(R"code(Computes the mean of array elements over given axes. + +Example:: + + data = [[[1,2],[2,3],[1,3]], + [[1,4],[4,3],[5,2]], + [[7,1],[7,2],[7,3]]] + + mean(data) + [3.22] + + mean(data, axis=[1,2]) + [ 2. 3.16666667 4.5] + +)code" NNVM_ADD_FILELINE) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + const ReduceParam& param = nnvm::get(attrs.parsed); + TShape r_axes = GetReduceAxes(inputs[0]->shape.size(), + param.axis, param.exclude); + if (!r_axes.ndim()) return Array { topi::identity(inputs[0]) }; + auto axis = ShapeToIntArray(r_axes); + + Expr count = make_const(inputs[0]->dtype, 1); + for (auto& i : r_axes) { + count *= cast(inputs[0]->dtype, inputs[0]->shape[i]); + } + + return Array{ + topi::divide(topi::sum(inputs[0], axis, param.keepdims, true), count) }; +}); + +NNVM_REGISTER_REDUCE_OP(prod) + .describe(R"code(Computes the products of array elements over given axes. + +Example:: + + data = [[[1,2],[2,3],[1,3]], + [[1,4],[4,3],[5,2]], + [[7,1],[7,2],[7,3]]] + + mean(data, axis=1) + [35562240] + + mean(data, axis=[1,2]) + [ 36 480 2058] + +)code" NNVM_ADD_FILELINE) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + const ReduceParam& param = nnvm::get(attrs.parsed); + TShape r_axes = GetReduceAxes(inputs[0]->shape.size(), + param.axis, param.exclude); + if (!r_axes.ndim()) return Array { topi::identity(inputs[0]) }; + auto axis = ShapeToIntArray(r_axes); + return Array{ + topi::prod(inputs[0], axis, param.keepdims, true) }; +}); + + +} // namespace top +} // namespace nnvm diff --git a/nnvm/src/top/tensor/state_op.cc b/nnvm/src/top/tensor/state_op.cc new file mode 100644 index 000000000000..23c7158aecd3 --- /dev/null +++ b/nnvm/src/top/tensor/state_op.cc @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file state_op.cc + * \brief Experimental operators + * Currently we only support assign + */ +#include +#include +#include +#include +#include +#include +#include "../op_common.h" +#include "../elemwise_op_common.h" + +namespace nnvm { +namespace top { + +using namespace tvm; +using namespace nnvm::compiler; + +NNVM_REGISTER_OP(_assign) +.describe(R"doc(Assign rhs to the lhs. + +lhs must be a Variable. +This is an experimental operator. + +)doc" NNVM_ADD_FILELINE) +.set_num_inputs(2) +.set_num_outputs(1) +.set_attr( + "FMutateInputs", [](const NodeAttrs& attrs) { + return std::vector{0}; +}) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + // This implementation is needed for the special + // logic handling assign in the compiler + // It simply copies the result of rhs the output + // The later decoration in compiler will change + // the memory assignment of assign to tie + // the lhs to the output. + return Array{ topi::identity(inputs[1]) }; +}) +.set_attr("FInferShape", SameShape) +.set_attr( + "FCorrectLayout", [](const NodeAttrs& attrs, + std::vector *in_layouts, + const std::vector *last_in_layouts, + std::vector *out_layouts) { + NNVM_ASSIGN_LAYOUT(*in_layouts, 1, (*in_layouts)[0]); + NNVM_ASSIGN_LAYOUT(*out_layouts, 0, (*in_layouts)[0]); + return true; +}) +.set_attr( + "FInplaceOption", [](const NodeAttrs& attrs) { + return std::vector >{{1, 0}}; +}) +.set_attr( + "FGradient", [](const NodePtr& n, + const std::vector& ograds){ + return std::vector{ + MakeNode("zeros_like", n->attrs.name + "_zero_grad", + {n->inputs[0]}), + ograds[0] + }; +}); + +} // namespace top +} // namespace nnvm diff --git a/nnvm/src/top/tensor/transform.cc b/nnvm/src/top/tensor/transform.cc index 5d0bf5c4d56e..efe24faae18d 100644 --- a/nnvm/src/top/tensor/transform.cc +++ b/nnvm/src/top/tensor/transform.cc @@ -477,7 +477,7 @@ NNVM_REGISTER_OP(cast) const Array& inputs, const Array& out_info) { const CastParam& param = nnvm::get(attrs.parsed); - DataType dtype = GetTVMType(param.dtype); + Type dtype = GetTVMType(param.dtype); return Array{ topi::cast(inputs[0], dtype) }; }) .set_support_level(1); @@ -1266,8 +1266,8 @@ NNVM_REGISTER_OP(slice_like) Array target_shape = inputs[1]->shape; Array begin_idx, end_idx, strides; for (size_t i = 0; i < src_shape.size(); ++i) { - begin_idx.push_back(make_const(tvm::DataType::Int(32), 0)); - strides.push_back(make_const(tvm::DataType::Int(32), 1)); + begin_idx.push_back(make_const(tvm::Int(32), 0)); + strides.push_back(make_const(tvm::Int(32), 1)); } end_idx = Array(src_shape); if (param.axis.ndim() == 0) { diff --git a/nnvm/src/top/vision/nms.cc b/nnvm/src/top/vision/nms.cc new file mode 100644 index 000000000000..ec97408284e5 --- /dev/null +++ b/nnvm/src/top/vision/nms.cc @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file nms.cc + * \brief Property def of SSD non-maximum suppression operator. + */ + +#include +#include +#include +#include +#include +#include +#include "../op_common.h" +#include "../elemwise_op_common.h" + +namespace nnvm { +namespace top { +using compiler::FTVMCompute; +using tvm::Tensor; +using tvm::Array; + +DMLC_REGISTER_PARAMETER(NonMaximumSuppressionParam); + +bool NMSShape(const NodeAttrs& attrs, + std::vector *in_attrs, + std::vector *out_attrs) { + const NonMaximumSuppressionParam& param = + nnvm::get(attrs.parsed); + CHECK_EQ(in_attrs->size(), 2U) << "Inputs: [data, valid_count]"; + TShape dshape = in_attrs->at(0); + TShape vshape = in_attrs->at(1); + CHECK_EQ(dshape.ndim(), 3U) << "Input data should be 3-D."; + CHECK_EQ(vshape.ndim(), 1U) << "Input valid count should be 1-D."; + CHECK_EQ(dshape[2], 6U) << "Data input should have shape " + "(batch_size, num_anchors, 6)."; + CHECK_EQ(dshape[0], vshape[0]) << "batch_size mismatch."; + out_attrs->clear(); + if (param.return_indices) { + TShape oshape = TShape(2); + oshape[0] = dshape[0]; + oshape[1] = dshape[1]; + NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, oshape); + } else { + NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, dshape); + } + return true; +} + +inline bool NMSInferType(const NodeAttrs &attrs, + std::vector *in_attrs, + std::vector *out_attrs) { + DTYPE_ASSIGN(out_attrs->at(0), in_attrs->at(0)); + return true; +} + +inline bool NMSInferLayout(const NodeAttrs& attrs, + std::vector *ilayouts, + const std::vector *last_ilayouts, + std::vector *olayouts) { + static const Layout kNCHW("NCHW"); + CHECK_EQ(ilayouts->size(), 2U); + CHECK_EQ(olayouts->size(), 1U); + NNVM_ASSIGN_LAYOUT(*ilayouts, 0, kNCHW); + NNVM_ASSIGN_LAYOUT(*ilayouts, 1, kNCHW); + return true; +} + +NNVM_REGISTER_OP(non_max_suppression) + .describe(R"doc("Non-maximum suppression." +)doc" NNVM_ADD_FILELINE) +.set_num_inputs(2) +.set_num_outputs(1) +.set_attr_parser(ParamParser) +.set_attr("FGetAttrDict", + ParamGetAttrDict) +.add_arguments(NonMaximumSuppressionParam::__FIELDS__()) +.add_argument("data", "Tensor", "Input data.") +.add_argument("valid_count", "Tensor", "Number of valid anchor boxes.") +.set_attr("FListInputNames", [](const NodeAttrs& attrs) { + return std::vector{"data", "valid_count"}; +}) +.set_attr("FInferShape", NMSShape) +.set_attr("FInferType", NMSInferType) +.set_attr("FCorrectLayout", NMSInferLayout) +.set_support_level(4); + +} // namespace top +} // namespace nnvm + diff --git a/nnvm/src/top/vision/ssd/mutibox_op.cc b/nnvm/src/top/vision/ssd/mutibox_op.cc new file mode 100644 index 000000000000..47f2f82a8664 --- /dev/null +++ b/nnvm/src/top/vision/ssd/mutibox_op.cc @@ -0,0 +1,176 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file multibox_op.cc + * \brief Property def of SSD multibox related operators. + */ + +#include +#include +#include +#include +#include +#include +#include "../../op_common.h" +#include "../../elemwise_op_common.h" + +namespace nnvm { +namespace top { +using compiler::FTVMCompute; +using tvm::Tensor; +using tvm::Array; + +DMLC_REGISTER_PARAMETER(MultiBoxPriorParam); + +bool MultiBoxPriorShape(const NodeAttrs& attrs, + std::vector *in_attrs, + std::vector *out_attrs) { + const MultiBoxPriorParam& param = nnvm::get(attrs.parsed); + CHECK_EQ(in_attrs->size(), 1U) << "Inputs: [data]" << in_attrs->size(); + TShape dshape = in_attrs->at(0); + CHECK_GE(dshape.ndim(), 4U) << "Input data should be 4D: " + "[batch, channel, height, width]"; + int in_height = dshape[2]; + CHECK_GT(in_height, 0) << "Input height should > 0"; + int in_width = dshape[3]; + CHECK_GT(in_width, 0) << "Input width should > 0"; + // since input sizes are same in each batch, we could share MultiBoxPrior + TShape oshape = TShape(3); + int num_sizes = param.sizes.ndim(); + int num_ratios = param.ratios.ndim(); + oshape[0] = 1; + oshape[1] = in_height * in_width * (num_sizes + num_ratios - 1); + oshape[2] = 4; + CHECK_EQ(param.steps.ndim(), 2) << "Step ndim must be 2: (step_y, step_x)"; + CHECK_GE(param.steps[0] * param.steps[1], 0) << "Must specify both " + "step_y and step_x"; + out_attrs->clear(); + NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, oshape); + return true; +} + +inline bool MultiBoxPriorLayout(const NodeAttrs& attrs, + std::vector *ilayouts, + const std::vector *last_ilayouts, + std::vector *olayouts) { + static const Layout kNCHW("NCHW"); + CHECK_EQ(ilayouts->size(), 1U); + CHECK_EQ(olayouts->size(), 1U); + NNVM_ASSIGN_LAYOUT(*ilayouts, 0, kNCHW); + return true; +} + +NNVM_REGISTER_OP(multibox_prior) + .describe(R"doc("Generate prior(anchor) boxes from data, sizes and ratios." +)doc" NNVM_ADD_FILELINE) +.set_num_inputs(1) +.set_num_outputs(1) +.set_attr_parser(ParamParser) +.set_attr("FGetAttrDict", ParamGetAttrDict) +.add_arguments(MultiBoxPriorParam::__FIELDS__()) +.add_argument("data", "Tensor", "Input data") +.set_attr("FInferShape", MultiBoxPriorShape) +.set_attr("FInferType", ElemwiseType<1, 1>) +.set_attr("FCorrectLayout", MultiBoxPriorLayout) +.set_attr( + "FGradient", [](const NodePtr& n, + const std::vector& ograds) { + return std::vector{ + MakeNode("zeros_like", n->attrs.name + "_zero_grad", + {n->inputs[0]}), + ograds[0] + }; +}) +.set_support_level(4); + +DMLC_REGISTER_PARAMETER(MultiBoxTransformLocParam); + +bool MultiBoxTransformLocShape(const NodeAttrs& attrs, + std::vector *in_attrs, + std::vector *out_attrs) { + CHECK_EQ(in_attrs->size(), 3U) << "Inputs: [cls_prob, loc_pred, anchor]"; + TShape cshape = in_attrs->at(0); + TShape lshape = in_attrs->at(1); + TShape ashape = in_attrs->at(2); + CHECK_EQ(cshape.ndim(), 3U) << "Class probability should be 3-D."; + CHECK_EQ(lshape.ndim(), 2U) << "Location prediction should be 2-D."; + CHECK_EQ(ashape.ndim(), 3U) << "Anchor should be 3-D."; + CHECK_EQ(cshape[2], ashape[1]) << "Number of anchors mismatch."; + CHECK_EQ(cshape[2] * 4, lshape[1]) << "# anchors mismatch with # loc."; + CHECK_GT(ashape[1], 0U) << "Number of anchors must > 0."; + CHECK_EQ(ashape[2], 4U); + TShape oshape0 = TShape(3); + oshape0[0] = cshape[0]; + oshape0[1] = ashape[1]; + oshape0[2] = 6; // [id, prob, xmin, ymin, xmax, ymax] + TShape oshape1 = TShape(1); + oshape1[0] = cshape[0]; + out_attrs->clear(); + NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, oshape0); + NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 1, oshape1); + return true; +} + +inline bool MultiBoxTransformLocLayout(const NodeAttrs& attrs, + std::vector *ilayouts, + const std::vector *last_ilayouts, + std::vector *olayouts) { + CHECK_EQ(ilayouts->size(), 3U); + CHECK_EQ(last_ilayouts->size(), 3U); + CHECK_EQ(olayouts->size(), 2U); + for (size_t i = 0; i < last_ilayouts->size(); ++i) { + const Layout& last_layout = last_ilayouts->at(i); + if (last_layout.defined()) { + NNVM_ASSIGN_LAYOUT(*ilayouts, i, last_layout); + } + } + return true; +} + +inline bool MultiBoxTransformLocInferType(const NodeAttrs &attrs, + std::vector *in_attrs, + std::vector *out_attrs) { + DTYPE_ASSIGN(out_attrs->at(0), in_attrs->at(0)); + DTYPE_ASSIGN(out_attrs->at(1), 4U); + return true; +} + +NNVM_REGISTER_OP(multibox_transform_loc) + .describe(R"doc("Location transformation for multibox detection." +)doc" NNVM_ADD_FILELINE) +.set_num_inputs(3) +.set_num_outputs(2) +.set_attr_parser(ParamParser) +.set_attr("FGetAttrDict", + ParamGetAttrDict) +.add_arguments(MultiBoxTransformLocParam::__FIELDS__()) +.add_argument("cls_prob", "Tensor", "Class probabilities.") +.add_argument("loc_pred", "Tensor", "Location regression predictions.") +.add_argument("anchor", "Tensor", "Multibox prior anchor boxes") +.set_attr("FListInputNames", [](const NodeAttrs& attrs) { + return std::vector{"cls_prob", "loc_pred", "anchor"}; +}) +.set_attr("FInferShape", MultiBoxTransformLocShape) +.set_attr("FInferType", MultiBoxTransformLocInferType) +.set_attr("FCorrectLayout", MultiBoxTransformLocLayout) +.set_support_level(4); + +} // namespace top +} // namespace nnvm diff --git a/nnvm/src/top/vision/yolo/reorg.cc b/nnvm/src/top/vision/yolo/reorg.cc new file mode 100644 index 000000000000..c16d46ff4652 --- /dev/null +++ b/nnvm/src/top/vision/yolo/reorg.cc @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file reorg.cc + */ +#include +#include +#include +#include +#include "../../op_common.h" +#include "../../elemwise_op_common.h" +#include "reorg.h" + +namespace nnvm { +namespace top { + +// reorg +DMLC_REGISTER_PARAMETER(ReorgParam); + +inline bool ReorgInferShape(const nnvm::NodeAttrs &attrs, + std::vector *in_shape, + std::vector *out_shape) { + const ReorgParam ¶m = nnvm::get(attrs.parsed); + TShape dshape = in_shape->at(0); + if (dshape.ndim() == 0) + return false; + NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape, 0, dshape); + CHECK_EQ(dshape.ndim(), 4) << "Input data should be 4D"; + CHECK_GT(param.stride, 0U) << "Stride value cannot be 0"; + TShape oshape({dshape[0], 0, 0, 0}); + oshape[1] = dshape[1] * param.stride * param.stride; + oshape[2] = dshape[2] / param.stride; + oshape[3] = dshape[3] / param.stride; + NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, oshape); + return true; +} + +NNVM_REGISTER_OP(yolo_reorg) +.describe(R"(Perform reorg operation on input array based on the stride value. +- **data**: Input is 4D array of shape (batch_size, channels, in_height, in_width). +- **out**: Output is 4D array of shape (batch_size, channels/(stride*stride), in_height*stride, in_width*stride). +)" NNVM_ADD_FILELINE) +.set_num_inputs(1) +.set_num_outputs(1) +.set_support_level(5) +.add_argument("data", "Tensor", "Data input to reorganize") +.set_attr_parser(ParamParser) +.add_arguments(ReorgParam::__FIELDS__()) +.set_attr("FGetAttrDict", ParamGetAttrDict) +.set_attr("FInferType", ElemwiseType<-1, 1>) +.set_attr("FInferShape", ReorgInferShape); +} // namespace top +} // namespace nnvm diff --git a/nnvm/src/top/vision/yolo/reorg.h b/nnvm/src/top/vision/yolo/reorg.h new file mode 100644 index 000000000000..53549df3634a --- /dev/null +++ b/nnvm/src/top/vision/yolo/reorg.h @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file reorg.h + */ +#ifndef NNVM_TOP_VISION_YOLO_REORG_H_ +#define NNVM_TOP_VISION_YOLO_REORG_H_ + +#include +#include +#include +#include +#include + +namespace nnvm { +namespace top { + +template +inline bool ReorgAttr(const nnvm::NodeAttrs &attrs, + std::vector *in_attrs, + std::vector *out_attrs, + const AttrType &none) { + AttrType dattr = none; + size_t in_size = in_attrs->size(); + size_t out_size = out_attrs->size(); + if (n_in != -1) { + in_size = static_cast(n_in); + } + if (n_out != -1) { + out_size = static_cast(n_out); + } + + auto deduce = [&](std::vector *vec, size_t size, const char *name) { + for (size_t i = 0; i < size; ++i) { + if (i == 0) { + CHECK(assign(&dattr, (*vec)[i])) + << "Incompatible attr in node " << attrs.name << " at " << i + << "-th " << name << ": " + << "expected " << attr_string(dattr) << ", got " + << attr_string((*vec)[i]); + } + } + }; + deduce(in_attrs, in_size, "input"); + + auto write = [&](std::vector *vec, size_t size, const char *name) { + for (size_t i = 0; i < size; ++i) { + CHECK(assign(&(*vec)[i], dattr)) + << "Incompatible attr in node " << attrs.name << " at " << i << "-th " + << name << ": " + << "expected " << attr_string(dattr) << ", got " + << attr_string((*vec)[i]); + } + }; + write(out_attrs, out_size, "output"); + + if (is_none(dattr)) { + return false; + } + return true; +} + +template +inline bool ReorgShape(const NodeAttrs &attrs, + std::vector *in_attrs, + std::vector *out_attrs) { + if (n_in != -1) { + CHECK_EQ(in_attrs->size(), static_cast(n_in)) + << " in operator " << attrs.name; + } + if (n_out != -1) { + CHECK_EQ(out_attrs->size(), static_cast(n_out)) + << " in operator " << attrs.name; + } + return ReorgAttr( + attrs, in_attrs, out_attrs, TShape()); +} + +template +inline bool ReorgType(const NodeAttrs &attrs, + std::vector *in_attrs, + std::vector *out_attrs) { + if (n_in != -1) { + CHECK_EQ(in_attrs->size(), static_cast(n_in)) + << " in operator " << attrs.name; + } + if (n_out != -1) { + CHECK_EQ(out_attrs->size(), static_cast(n_out)) + << " in operator " << attrs.name; + } + return ReorgAttr( + attrs, in_attrs, out_attrs, -1); +} + +struct ReorgParam : public dmlc::Parameter { + int stride; + + DMLC_DECLARE_PARAMETER(ReorgParam) { + DMLC_DECLARE_FIELD(stride).set_default(1).describe("Stride value"); + } +}; +} // namespace top +} // namespace nnvm +#endif // NNVM_TOP_VISION_YOLO_REORG_H_ diff --git a/nnvm/tests/python/compiler/test_alter_op_layout.py b/nnvm/tests/python/compiler/test_alter_op_layout.py new file mode 100644 index 000000000000..aad634f03843 --- /dev/null +++ b/nnvm/tests/python/compiler/test_alter_op_layout.py @@ -0,0 +1,121 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Unittest cases for AlterOpLayout pass""" +from nnvm import symbol as sym +from nnvm.compiler import graph_attr +from nnvm.top import registry as reg +import nnvm.graph as graph + +def get_layouts(g): + ldict = {} + vlayout = g.json_attr("layout") + entry_ptr = g.index.entry_ptr + for i, n in enumerate(g.index.nodes): + begin, end = entry_ptr[i], entry_ptr[i + 1] + ldict[n["name"]] = vlayout[begin:end] + return ldict + + +def test_alter_conv2d_layout(): + data = sym.Variable("data", shape=(1, 32, 512, 512)) + conv = sym.conv2d(data, name="conv", channels=16, + kernel_size=(3,3), padding=(1,1), + use_bias=False, layout="NCHW") + # split here + convs = sym.split(conv, indices_or_sections=2) + relus = [sym.relu(x, name="relu") for x in convs] + relu = sym.concatenate(*relus) + flatten = sym.flatten(relu, name="flatten") + softmax = sym.softmax(flatten, name="softmax") + g = graph.create(softmax) + + g = g.apply("CorrectLayout") + g = graph_attr.set_dtype_inputs(g, "float32") + g = g.apply(["InferShape", "InferType"]) + layouts_origin = get_layouts(g) + + @reg.register_alter_op_layout("conv2d", level=100) + def alter_conv2d_layout(attrs, inputs, tinfos): + new_attrs = {k : attrs[k] for k in attrs.keys()} + new_attrs["layout"] = "NCHW16c" + new_attrs["kernel_layout"] = "NCHW16c" + new_attrs["name"] = "conv_alter" + return sym.conv2d(inputs[0], inputs[1], **new_attrs) + + g = g.apply("AlterOpLayout") + layouts = get_layouts(g) + + # check copy layouts + for node in ["data", "relu", "flatten", "softmax", "conv_weight"]: + assert layouts[node] == layouts_origin[node] + assert layouts["conv_alter"] == layouts_origin["conv"] + + +def test_consecutive_alter_layout(): + data = sym.Variable("data", shape=(1, 32, 512, 512)) + pool1 = sym.global_avg_pool2d(data, name="global_avg_pool2d_1", layout="NCHW") + pool2 = sym.global_avg_pool2d(pool1, name="global_avg_pool2d_2", layout="NCHW") + relu = sym.relu(pool2, name="relu") + + g = graph.create(relu) + g = g.apply("CorrectLayout") + g = graph_attr.set_dtype_inputs(g, "float32") + g = g.apply(["InferShape", "InferType"]) + assert g.json_attr("layout") == ['NCHW', 'NCHW', 'NCHW', 'NCHW'] + + @reg.register_alter_op_layout("global_avg_pool2d", level=100) + def alter_global_avg_pool2d_layout(attrs, inputs, tinfos): + new_attrs = {k : attrs[k] for k in attrs.keys()} + new_attrs["layout"] = "NCHW16c" + return sym.global_avg_pool2d(inputs[0], **new_attrs) + + g = g.apply("AlterOpLayout") + + # pool1 get replaced - output layout of pool1 is not recorded + # pool2 get replaced - input layout of pool2 is not recorded + # thus the second entry must be undefined - it can neither recover from pool1's output, + # nor from pool2's input. + assert g.json_attr("layout") == ['NCHW', '__undef__', 'NCHW', 'NCHW'] + + +def test_alter_func_return_none(): + data = sym.Variable("data", shape=(1, 32, 512, 512)) + pool1 = sym.global_max_pool2d(data, name="pool1", layout="NCHW") + pool2 = sym.global_max_pool2d(pool1, name="pool2", layout="NCHW") + relu = sym.relu(pool2, name="relu") + + g = graph.create(relu) + g = g.apply("CorrectLayout") + g = graph_attr.set_dtype_inputs(g, "float32") + g = g.apply(["InferShape", "InferType"]) + assert g.json_attr("layout") == ['NCHW', 'NCHW', 'NCHW', 'NCHW'] + + @reg.register_alter_op_layout("global_max_pool2d", level=100) + def alter_global_max_pool2d_layout(attrs, inputs, tinfos): + return None + + g = g.apply("AlterOpLayout") + + # alter func return none, nothing get replaced, + # the layouts should remain the same + assert g.json_attr("layout") == ['NCHW', 'NCHW', 'NCHW', 'NCHW'] + + +if __name__ == "__main__": + test_alter_conv2d_layout() + test_consecutive_alter_layout() + test_alter_func_return_none() diff --git a/nnvm/tests/python/compiler/test_autotvm_task_extraction.py b/nnvm/tests/python/compiler/test_autotvm_task_extraction.py new file mode 100644 index 000000000000..1ecbf053f923 --- /dev/null +++ b/nnvm/tests/python/compiler/test_autotvm_task_extraction.py @@ -0,0 +1,79 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Test task extraction for autotvm""" + +import nnvm.testing +import nnvm.compiler +from tvm import autotvm + +def get_network(name, batch_size): + """Get the symbol definition and random weight of a network""" + input_shape = (batch_size, 3, 224, 224) + output_shape = (batch_size, 1000) + + if name == 'resnet-18': + net, params = nnvm.testing.resnet.get_workload(num_layers=18, batch_size=batch_size) + elif name == 'mobilenet': + net, params = nnvm.testing.mobilenet.get_workload(batch_size=batch_size) + elif name == 'squeezenet v1.1': + net, params = nnvm.testing.squeezenet.get_workload(batch_size=batch_size, version='1.1') + elif name == 'vgg-16': + net, params = nnvm.testing.vgg.get_workload(num_layers=16, batch_size=batch_size) + elif name == 'dcgan': + net, params = nnvm.testing.dcgan.get_workload(batch_size=batch_size) + input_shape = (batch_size, 100) + else: + raise ValueError("Unsupported network: " + name) + + return net, params, input_shape, output_shape + +def test_task_extraction(): + target = 'llvm' + dtype = 'float32' + + net, params, input_shape, out_shape = get_network('resnet-18', batch_size=1) + tasks = autotvm.task.extract_from_graph(net, target=target, + shape={'data': input_shape}, dtype=dtype, + symbols=(nnvm.sym.conv2d,)) + assert len(tasks) == 12 + + net, params, input_shape, out_shape = get_network('resnet-18', batch_size=1) + tasks = autotvm.task.extract_from_graph(net, target=target, + shape={'data': input_shape}, dtype=dtype, + symbols=(nnvm.sym.dense,)) + assert len(tasks) == 1 + + net, params, input_shape, out_shape = get_network('resnet-18', batch_size=1) + tasks = autotvm.task.extract_from_graph(net, target=target, + shape={'data': input_shape}, dtype=dtype, + symbols=(nnvm.sym.conv2d, nnvm.sym.dense)) + assert len(tasks) == 13 + + net, params, input_shape, out_shape = get_network('mobilenet', batch_size=1) + tasks = autotvm.task.extract_from_graph(net, target=target, + shape={'data': input_shape}, dtype=dtype, + symbols=(nnvm.sym.conv2d, nnvm.sym.dense)) + assert len(tasks) == 20 + + net, params, input_shape, out_shape = get_network('dcgan', batch_size=1) + tasks = autotvm.task.extract_from_graph(net, target=target, + shape={'data': input_shape}, dtype=dtype, + symbols=(nnvm.sym.conv2d_transpose,)) + assert len(tasks) == 4 + +if __name__ == '__main__': + test_task_extraction() diff --git a/nnvm/tests/python/compiler/test_build.py b/nnvm/tests/python/compiler/test_build.py new file mode 100644 index 000000000000..a2a5ac659c8f --- /dev/null +++ b/nnvm/tests/python/compiler/test_build.py @@ -0,0 +1,176 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import numpy as np + +import tvm +from tvm.contrib import graph_runtime +import nnvm.symbol as sym +import nnvm.compiler +from nnvm.compiler.build_module import _run_graph, precompute_prune + +def test_compile(): + x = sym.Variable("x") + y = sym.Variable("y") + z = sym.exp(y + x) + shape = (10, 128) + dtype = tvm.float32 + shape_dict = {"x": shape, "y": shape} + def verify(graph, lib): + m = graph_runtime.create(graph, lib, tvm.cpu(0)) + # get member functions + set_input, run, get_output = m["set_input"], m["run"], m["get_output"] + na = tvm.nd.array(np.random.uniform(size=shape).astype(dtype)) + nb = tvm.nd.array(np.random.uniform(size=shape).astype(dtype)) + # set inputs + set_input("x", na) + set_input("y", nb) + # execute + run() + # get outputs + out = tvm.nd.empty(shape, dtype) + get_output(0, out) + tvm.testing.assert_allclose( + out.asnumpy(), np.exp(na.asnumpy() + nb.asnumpy())) + + graph, lib, _ = nnvm.compiler.build(z, "llvm", shape_dict) + assert graph.index.num_nodes == 3 + verify(graph, lib) + + with nnvm.compiler.build_config(opt_level=0): + graph, lib, _ = nnvm.compiler.build(z, "llvm", shape_dict) + # print(graph.ir()) + assert graph.index.num_nodes == 4 + verify(graph, lib) + +def test_run(): + x = sym.Variable("x") + y = sym.Variable("y") + z = sym.exp(y + x) + shape = (10, 10) + dtype = tvm.float32 + nx = tvm.nd.array(np.random.uniform(size=shape).astype(dtype)) + ny = tvm.nd.array(np.random.uniform(size=shape).astype(dtype)) + res = _run_graph(z, {"x": nx, "y": ny}) + tvm.testing.assert_allclose( + res[0].asnumpy(), np.exp(nx.asnumpy() + ny.asnumpy())) + + +def test_precompute_prune(): + x = sym.Variable("x") + 1 + a = sym.Variable("a") + y = sym.Variable("y") + z = y + x + a + shape = (10, 10) + dtype = tvm.float32 + nx = tvm.nd.array(np.random.uniform(size=shape).astype(dtype)) + na = tvm.nd.array(np.random.uniform(size=shape).astype(dtype)) + ny = tvm.nd.array(np.random.uniform(size=shape).astype(dtype)) + params = {"x": nx, "a": na} + graph, lib, params = nnvm.compiler.build( + z, "llvm", shape={"y": ny.shape}, params=params) + assert graph.index.num_nodes == 4 + m = graph_runtime.create(graph, lib, tvm.cpu(0)) + params["y"] = ny + res = tvm.nd.empty(shape) + m["load_params"](nnvm.compiler.save_param_dict(params)) + m.run() + out = m.get_output(0, out=res) + tvm.testing.assert_allclose( + res.asnumpy(), nx.asnumpy() + 1 + ny.asnumpy() + na.asnumpy()) + + +def test_dtypes(): + x = sym.Variable("x") + y = sym.relu(x) + dshape = (1, 3, 32, 32) + oshape = dshape + for dtype in ['float32', 'float64', 'int32', 'int16', 'int8', 'int64']: + graph, lib, _ = nnvm.compiler.build(y, 'llvm', {"x": dshape}, dtype=dtype) + m = graph_runtime.create(graph, lib, tvm.cpu()) + if 'float' in dtype: + data = np.random.uniform(size=dshape).astype(dtype) + elif 'int' in dtype: + data = np.random.randint(-127, 127, dshape).astype(dtype) + m.run(x=data) + data = (data > 0) * data + out = m.get_output(0, tvm.nd.empty(oshape, dtype)) + tvm.testing.assert_allclose(out.asnumpy(), data, atol=1e-5, rtol=1e-5) + +def test_ndarray_output(): + x = sym.Variable("x") + y = sym.Variable("y") + z = x + y + shape = (10, 10) + dtype = tvm.float32 + nx = tvm.nd.array(np.random.uniform(size=shape).astype(dtype)) + ny = tvm.nd.array(np.random.uniform(size=shape).astype(dtype)) + params = {"x": nx, "ny": ny} + graph, lib, params = nnvm.compiler.build( + z, "llvm", shape={"y": ny.shape, "x": nx.shape}, params=params) + m = graph_runtime.create(graph, lib, tvm.cpu(0)) + m.set_input("x", nx) + m.set_input("y", ny) + m.run() + out = m.get_output(0) + tvm.testing.assert_allclose( + out.asnumpy(), nx.asnumpy() + ny.asnumpy()) + +def test_ndarray_input(): + x = sym.Variable("x") + y = sym.Variable("y") + z = x + y + shape = (10, 10) + dtype = tvm.float32 + nx = tvm.nd.array(np.random.uniform(size=shape).astype(dtype)) + ny = tvm.nd.array(np.random.uniform(size=shape).astype(dtype)) + params = {"x": nx, "ny": ny} + graph, lib, params = nnvm.compiler.build( + z, "llvm", shape={"y": ny.shape, "x": nx.shape}, params=params) + m = graph_runtime.create(graph, lib, tvm.cpu(0)) + m.set_input("x", nx) + m.set_input("y", ny) + in_x = tvm.nd.empty(shape, dtype) + in_y = tvm.nd.empty(shape, dtype) + m.get_input("x", in_x) + m.get_input("y", in_y) + tvm.testing.assert_allclose(nx.asnumpy(), in_x.asnumpy()) + tvm.testing.assert_allclose(ny.asnumpy(), in_y.asnumpy()) + in_nx = m.get_input("x") + in_ny = m.get_input("y") + tvm.testing.assert_allclose(nx.asnumpy(), in_nx.asnumpy()) + tvm.testing.assert_allclose(ny.asnumpy(), in_ny.asnumpy()) + +def test_num_outputs(): + x = sym.Variable('x') + z = sym.split(x, indices_or_sections=5, axis=1) + shape = (10, 10) + dtype = tvm.float32 + nx = tvm.nd.array(np.random.uniform(size=shape).astype(dtype)) + params = {"x": nx} + graph, lib, params = nnvm.compiler.build( + z, "llvm", shape={"x": nx.shape}, params=params) + m = graph_runtime.create(graph, lib, tvm.cpu(0)) + assert m.get_num_outputs() == 5 + +if __name__ == "__main__": + test_precompute_prune() + test_compile() + test_run() + test_dtypes() + test_ndarray_output() + test_ndarray_input() + test_num_outputs() diff --git a/nnvm/tests/python/compiler/test_fold_axis.py b/nnvm/tests/python/compiler/test_fold_axis.py new file mode 100644 index 000000000000..2bceb652162a --- /dev/null +++ b/nnvm/tests/python/compiler/test_fold_axis.py @@ -0,0 +1,174 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Unittest cases for fold_axis""" +import tvm +import nnvm +import nnvm.testing.resnet +import numpy as np +from nnvm import symbol as sym +from nnvm.compiler import graph_util, graph_attr + +def test_fold_axis_conv(): + # Before simplify + def before(x, conv_weight, conv_bias, in_scale, out_scale, channels): + x = x * sym.expand_dims(in_scale, axis=1, num_newaxis=2) + y = sym.conv2d(x, conv_weight, conv_bias, + channels=channels, + kernel_size=(3, 3), + padding=(1, 1), + name="conv") + y = sym.relu(y) + y = y * sym.expand_dims(out_scale, axis=1, num_newaxis=2) + return y + + def expected(x, conv_weight, conv_bias, in_scale, out_scale, channels): + conv_weight = conv_weight * sym.expand_dims(out_scale, axis=1, num_newaxis=3) + conv_weight = conv_weight * sym.expand_dims(in_scale, axis=1, num_newaxis=2) + conv_bias = conv_bias * out_scale + y = sym.conv2d(x, + conv_weight, + conv_bias, + channels=channels, + kernel_size=(3, 3), + padding=(1, 1), + name="conv") + y = sym.relu(y) + return y + + def check(shape, channels): + x = sym.Variable("x") + 1 + weight = sym.Variable("weight") + bias = sym.Variable("bias") + in_scale = sym.Variable("in_scale") + out_scale = sym.Variable("out_scale") + y1 = before(x, weight, bias, in_scale, out_scale, channels) + y2 = expected(x, weight, bias, in_scale, out_scale, channels) + ishape = {"x": shape, "out_scale": (channels,), "in_scale": (shape[1],)} + g1 = nnvm.graph.create(y1) + g2 = nnvm.graph.create(y2) + graph_attr.set_shape_inputs(g1, ishape) + g1 = g1.apply("InferShape").apply("FoldScaleAxis") + # assert graph equals as expected + graph_util.check_graph_equal(g1, g2) + + check((2, 4, 10, 10), 2) + +def test_fold_axis_depthwise_conv(): + # Before simplify + def before(x, conv_weight, conv_bias, in_scale, out_scale, channels): + x = x * sym.expand_dims(in_scale, axis=1, num_newaxis=2) + y = sym.conv2d(x, conv_weight, conv_bias, + channels=channels, + kernel_size=(3, 3), + padding=(1, 1), + groups=54, + name="depthiwise_conv") + y = sym.relu(y) + y = y * sym.expand_dims(out_scale, axis=1, num_newaxis=2) + return y + + def expected(x, conv_weight, conv_bias, in_scale, out_scale, channels): + conv_weight = conv_weight * sym.expand_dims(out_scale, axis=1, num_newaxis=3) + conv_weight = conv_weight * sym.expand_dims(in_scale, axis=1, num_newaxis=3) + conv_bias = conv_bias * out_scale + y = sym.conv2d(x, + conv_weight, + conv_bias, + channels=channels, + kernel_size=(3, 3), + padding=(1, 1), + groups=54, + name="depthiwise_conv") + y = sym.relu(y) + return y + + def check(shape, channels): + x = sym.Variable("x") + 1 + weight = sym.Variable("weight") + bias = sym.Variable("bias") + in_scale = sym.Variable("in_scale") + out_scale = sym.Variable("out_scale") + y1 = before(x, weight, bias, in_scale, out_scale, channels) + y2 = expected(x, weight, bias, in_scale, out_scale, channels) + ishape = {"x": shape, "out_scale": (channels,), "in_scale": (shape[1],)} + g1 = nnvm.graph.create(y1) + g2 = nnvm.graph.create(y2) + graph_attr.set_shape_inputs(g1, ishape) + g1 = g1.apply("InferShape").apply("FoldScaleAxis") + # assert graph equals as expected + graph_util.check_graph_equal(g1, g2) + + check((1, 54, 63, 127), 54) + +def test_fold_fail(): + # Before simplify + def before(x, scale, channels): + y = sym.conv2d(x, + channels=channels, + kernel_size=(3, 3), + padding=(1, 1), + name="conv") + y = y * sym.expand_dims(scale, axis=1, num_newaxis=1) + return y + + def check(shape, channels): + x = sym.Variable("x") + bias = sym.Variable("bias") + scale = sym.Variable("scale") + y1 = before(x, scale, channels) + ishape = {"x": shape, "scale": (channels,), "bias": (channels,)} + g1 = nnvm.graph.create(y1) + graph_attr.set_shape_inputs(g1, ishape) + g2 = g1.apply("InferShape").apply("FoldScaleAxis") + # assert graph equals as expected + graph_util.check_graph_equal(g1, g2) + + check((2, 10, 10, 10), 10) + + +def test_fold_resnet(): + batch_size = 1 + num_classes = 1000 + image_shape = (3, 224, 224) + data_shape = (batch_size,) +image_shape + net, params = nnvm.testing.resnet.get_workload( + batch_size=1, image_shape=image_shape) + ishape = {"data" : data_shape} + graph = nnvm.graph.create(net) + data = np.random.uniform(size=data_shape).astype("float32") + # Initial pass do shape type inference + shape, _ = graph_util.infer_shape(graph, **ishape) + ishape.update(zip(graph.index.input_names, shape)) + + def run_prune(graph, params, opt_level): + # Apply optimization + with nnvm.compiler.build_config(opt_level=0): + graph = nnvm.compiler.optimize(graph, ishape) + graph, params = nnvm.compiler.build_module.precompute_prune(graph, params) + params["data"] = data + return nnvm.compiler.build_module._run_graph(graph, params) + + x = run_prune(graph, params, 0) + y = run_prune(graph, params, 3) + tvm.testing.assert_allclose(y[0].asnumpy(), x[0].asnumpy()) + + +if __name__ == "__main__": + test_fold_resnet() + test_fold_axis_conv() + test_fold_fail() + test_fold_axis_depthwise_conv() diff --git a/nnvm/tests/python/compiler/test_graph_pass.py b/nnvm/tests/python/compiler/test_graph_pass.py new file mode 100644 index 000000000000..d65a2be9abf8 --- /dev/null +++ b/nnvm/tests/python/compiler/test_graph_pass.py @@ -0,0 +1,34 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Unittest cases for graph pass""" +import nnvm +import nnvm.compiler +from nnvm import symbol as sym +from nnvm.compiler import graph_util, graph_attr + +def test_infer_attr(): + x = sym.Variable("x") + y = x * 2 + g = nnvm.graph.create(y) + ishape, oshape = graph_util.infer_shape(g, x=(10,20)) + assert tuple(oshape[0]) == (10, 20) + + itype, otype = graph_util.infer_dtype(g, x="float32") + assert otype[0] == "float32" + +if __name__ == "__main__": + test_infer_attr() diff --git a/nnvm/tests/python/compiler/test_nhwc_layout.py b/nnvm/tests/python/compiler/test_nhwc_layout.py new file mode 100644 index 000000000000..e3747daf8563 --- /dev/null +++ b/nnvm/tests/python/compiler/test_nhwc_layout.py @@ -0,0 +1,73 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import numpy as np +import tvm +from tvm.contrib import graph_runtime as runtime +import nnvm.symbol as sym +import nnvm.compiler +from nnvm.testing.config import ctx_list + +def get_sym(layout, kernel_layout, channels): + data = sym.Variable(name="data") + data = sym.conv2d(data=data, kernel_size=(3,3), channels=channels, padding=(1, 1), + layout=layout, kernel_layout=kernel_layout, use_bias=True) + data = sym.max_pool2d(data=data, pool_size=(2, 2), strides=(2, 2), layout=layout) + data = sym.upsampling(data=data, scale=2, layout=layout) + softmax_axis = 1 + if layout == "NHWC": + softmax_axis = 3 + data = sym.softmax(data=data, axis=softmax_axis) + return data + + +def build_and_run(sym, params, data, out_shape): + ctx = tvm.cpu(0) + graph, lib, params = nnvm.compiler.build(sym, "llvm", shape={"data":data.shape}, params=params) + module = runtime.create(graph, lib, ctx) + module.set_input(**params) + module.set_input("data", data) + module.run() + out = module.get_output(0, tvm.nd.empty(out_shape)) + return out.asnumpy() + + +def test_nhwc(): + data_shape = (1, 3, 224, 224) + out_channel = 8 + nchw_sym = get_sym("NCHW", "OIHW", out_channel) + nhwc_sym = get_sym("NHWC", "HWIO", out_channel) + conv_weight = np.random.uniform(-1, 1, (out_channel, 3, 3, 3)).astype(np.float32) + conv_bias = np.random.uniform(-1, 1, (out_channel)).astype(np.float32) + nchw_params = { + "conv2d0_weight" : tvm.nd.array(conv_weight, ctx=tvm.cpu(0)), + "conv2d0_bias" : tvm.nd.array(conv_bias, ctx=tvm.cpu(0)) + } + nhwc_params = { + "conv2d1_weight" : tvm.nd.array(conv_weight.transpose(2, 3, 1, 0), ctx=tvm.cpu(0)), + "conv2d1_bias" : tvm.nd.array(conv_bias, ctx=tvm.cpu(0)) + } + + data = np.random.uniform(-1, 1, data_shape).astype(np.float32) + oshape = (1, out_channel, 224, 224) + oshape_nhwc = (1, 224, 224, out_channel) + nchw_output = build_and_run(nchw_sym, nchw_params, data, oshape) + nhwc_output = build_and_run(nhwc_sym, nhwc_params, data.transpose(0, 2, 3, 1), oshape_nhwc) + tvm.testing.assert_allclose(nchw_output, nhwc_output.transpose(0, 3, 1, 2), rtol=1e-5, atol=1e-5) + + +if __name__ == "__main__": + test_nhwc() diff --git a/nnvm/tests/python/compiler/test_op_fusion.py b/nnvm/tests/python/compiler/test_op_fusion.py new file mode 100644 index 000000000000..bc0caeecf58c --- /dev/null +++ b/nnvm/tests/python/compiler/test_op_fusion.py @@ -0,0 +1,248 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import nnvm +import numpy as np +import tvm +import topi.testing +from tvm.contrib import graph_runtime +from nnvm import symbol as sym +from nnvm.compiler import graph_util, graph_attr +from nnvm.testing import ctx_list, utils + +def test_ewise_injective(): + x = sym.Variable("x") + y = x * 2 + y = sym.flatten(y) + 1 + dshape = (10, 2, 3) + shape_dict = {"x": dshape} + dtype = "float32" + target = "llvm" + for target, ctx in ctx_list(): + graph, lib, _ = nnvm.compiler.build(y, target, shape_dict) + assert graph.index.num_nodes == 2 + m = graph_runtime.create(graph, lib, ctx) + x_np = np.random.uniform(size=dshape).astype(dtype) + m.run(x=x_np) + out = m.get_output(0, tvm.nd.empty((10, 6))) + tvm.testing.assert_allclose( + out.asnumpy(), x_np.reshape(out.shape) * 2 + 1, + atol=1e-5, rtol=1e-5) + + +def test_conv_ewise_injective(): + x = sym.Variable("x") + y = sym.conv2d(x, channels=32, kernel_size=(3, 3), groups=32, + name="y", padding=(1,1)) + y = sym.flatten(y + 1) + 1 + dtype = "float32" + dshape = (1, 32, 18, 18) + kshape = (32, 1, 3, 3) + oshape = (1, 32* 18 * 18) + shape_dict = {"x": dshape} + + for target, ctx in ctx_list(): + graph, lib, _ = nnvm.compiler.build(y, target, shape_dict) + m = graph_runtime.create(graph, lib, ctx) + # print(graph.ir(join_entry_attrs=["shape"])) + assert graph.index.num_nodes == 5 + # set input + data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype)) + kernel = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype)) + bias = tvm.nd.array(np.random.uniform(size=kshape[0]).astype(dtype)) + m.run(x=data, y_weight=kernel, y_bias=bias) + # get output + out = m.get_output(0, tvm.nd.empty(oshape, dtype)) + c_np = topi.testing.depthwise_conv2d_python_nchw( + data.asnumpy(), kernel.asnumpy(), (1,1), 'SAME') + c_np = c_np + bias.asnumpy().reshape(kshape[0], 1, 1) + 1 + c_np = c_np.reshape(c_np.shape[0], np.prod(c_np.shape[1:])) + 1 + tvm.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5) + + +def test_injective_reduce_injective(): + x = sym.Variable("x") + x = sym.flatten(x) + 1 + y = sym.sum(x, axis=1) + dtype = "float32" + dshape = (32, 1, 18, 18) + shape_dict = {"x": dshape} + + for target, ctx in ctx_list(): + graph, lib, _ = nnvm.compiler.build(y, target, shape_dict) + m = graph_runtime.create(graph, lib, ctx) + assert graph.index.num_nodes == 2 + data = np.random.uniform(size=dshape).astype(dtype) + m.run(x=data) + c_np = np.sum(data.reshape(32, 18 * 18) + 1, axis=1) + # get output + out = m.get_output(0, tvm.nd.empty(c_np.shape, dtype)) + tvm.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5) + + +def test_injective_conv2d(): + channels = 16 + data = sym.Variable(name="data") + pool = sym.global_avg_pool2d(data=data) + weight = sym.reshape(pool, shape=[1, channels, 1, 1]) + residual = sym.conv2d(data=data, kernel_size=(3,3), channels=channels, padding=(1, 1), + layout="NCHW", kernel_layout="OIHW", use_bias=False, name="conv") + net = weight * data + residual + size = 56 + dtype="float32" + dshape = (1, channels, size, size) + kshape = (channels, channels, 3, 3) + oshape = dshape + shape_dict = {"data": dshape} + + for target, ctx in ctx_list(): + graph, lib, _ = nnvm.compiler.build(net, target, shape_dict) + # data, global_avg_pool, conv weight, conv op, fused elemwise add + assert graph.index.num_nodes == 5 + + data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype)) + kernel = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype)) + m = graph_runtime.create(graph, lib, ctx) + m.run(data=data, conv_weight=kernel) + # get output + out = m.get_output(0, tvm.nd.empty(oshape, dtype)) + residual = topi.testing.conv2d_nchw_python( + data.asnumpy(), kernel.asnumpy(), (1,1), 'SAME') + weight = np.mean(data.asnumpy(), axis=(2, 3)) + c_np = weight[:, :, np.newaxis, np.newaxis] * data.asnumpy() + residual + tvm.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5) + + +def test_concatenate_conv2d(): + ch = 3 + size = 8 + data = sym.Variable(name="data") + concat = sym.concatenate(data, data, axis=1) + conv = sym.conv2d(data=concat, kernel_size=(1,1), channels=ch*2, use_bias=False, name="conv") + net = sym.elemwise_add(concat, conv) + + dtype="float32" + dshape = (1, ch, size, size) + kshape = (ch*2, ch*2, 1, 1) + oshape = (1, ch*2, size, size) + shape_dict = {"data": dshape} + + for target, ctx in ctx_list(): + graph, lib, _ = nnvm.compiler.build(net, target, shape_dict) + # data, conv weight, conv op, concat + assert graph.index.num_nodes == 4 + + data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype)) + kernel = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype)) + m = graph_runtime.create(graph, lib, ctx) + m.run(data=data, conv_weight=kernel) + # get output + out = m.get_output(0, tvm.nd.empty(oshape, dtype)) + + concat = np.concatenate((data.asnumpy(), data.asnumpy()), axis=1) + conv = topi.testing.conv2d_nchw_python( + concat, kernel.asnumpy(), (1,1), 'SAME') + ref = concat + conv + tvm.testing.assert_allclose(out.asnumpy(), ref, rtol=1e-5) + + +def test_residual_block_layout_transform(): + ch = 16 + size = 32 + data = sym.Variable(name="data") + conv1 = sym.conv2d(data=data, kernel_size=(3,3), channels=ch, padding = (1, 1), use_bias=False, name="conv1") + layout_transform1 = sym.__layout_transform__(data=conv1, src_layout="NCHW", dst_layout="NCHW8c") + layout_transform2 = sym.__layout_transform__(data=layout_transform1, src_layout="NCHW8c", dst_layout="NCHW") + conv2 = sym.conv2d(data=conv1, kernel_size=(3,3), channels=ch, padding = (1, 1), use_bias=False, name="conv2") + elemwise_sum = sym.elemwise_add(layout_transform2, conv2) + out = sym.relu(elemwise_sum) + + dtype="float32" + dshape = (1, ch, size, size) + kshape = (ch, ch, 3, 3) + oshape = (1, ch, size, size) + shape_dict = {"data": dshape} + + target = "llvm" # only test on llvm since it involves NCHW8c layout + ctx = tvm.context(target, 0) + graph, lib, _ = nnvm.compiler.build(out, target, shape_dict) + # data, conv1 weight, conv1, layout transform + elemwise add + relu, conv2 weight, conv2 op + assert graph.index.num_nodes == 6 + + data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype)) + kernel1 = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype)) + kernel2 = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype)) + m = graph_runtime.create(graph, lib, ctx) + m.run(data=data, conv1_weight=kernel1, conv2_weight=kernel2) + out = m.get_output(0, tvm.nd.empty(oshape, dtype)) + + conv1 = topi.testing.conv2d_nchw_python( + data.asnumpy(), kernel1.asnumpy(), (1,1), 'SAME') + conv2 = topi.testing.conv2d_nchw_python( + conv1, kernel2.asnumpy(), (1,1), 'SAME') + ref = np.maximum(conv1 + conv2, 0) + tvm.testing.assert_allclose(out.asnumpy(), ref, rtol=1e-5) + + +def build_and_run(sym, params, data, out_shape, target, ctx, opt_level=2): + with nnvm.compiler.build_config(opt_level=opt_level): + graph, lib, params = nnvm.compiler.build(sym, target, shape={"data":data.shape}, params=params) + module = graph_runtime.create(graph, lib, ctx) + module.set_input(**params) + module.set_input("data", data) + module.run() + out = module.get_output(0, tvm.nd.empty(out_shape)) + return out.asnumpy(), graph + + +def test_fuse_conv2d_elu(): + def elu(data): + return -0.5 * sym.relu(1 - sym.exp(data)) + sym.relu(data) + + def get_sym(out_channel): + data = sym.Variable(name="data") + data = sym.conv2d(data=data, kernel_size=(3,3), channels=out_channel, padding=(1, 1), + layout="NCHW", kernel_layout="OIHW", use_bias=True) + data = sym.batch_norm(data) + data = elu(data) + return data + + in_channel = 8 + out_channel = 16 + size = 64 + dshape = (1, in_channel, size, size) + oshape = (1, out_channel, size, size) + data = np.random.uniform(-1, 1, dshape).astype(np.float32) + + for target, ctx in ctx_list(): + sym1 = get_sym(out_channel) + sym2 = get_sym(out_channel) + _, params1 = utils.create_workload(sym1, 1, dshape[1:], seed=0) + _, params2 = utils.create_workload(sym2, 1, dshape[1:], seed=0) + output1, g1 = build_and_run(sym1, params1, data, oshape, target, ctx, opt_level=2) + output2, g2 = build_and_run(sym2, params2, data, oshape, target, ctx, opt_level=0) + tvm.testing.assert_allclose(output1, output2, rtol=1e-5, atol=1e-5) + # data, conv weight, bias, batch norm gamma, batch norm beta, conv op + assert g1.index.num_nodes == 6 + +if __name__ == "__main__": + test_injective_reduce_injective() + test_ewise_injective() + test_conv_ewise_injective() + test_fuse_conv2d_elu() + test_injective_conv2d() + test_concatenate_conv2d() + test_residual_block_layout_transform() diff --git a/nnvm/tests/python/compiler/test_optimizer.py b/nnvm/tests/python/compiler/test_optimizer.py new file mode 100644 index 000000000000..86a9b71b46dc --- /dev/null +++ b/nnvm/tests/python/compiler/test_optimizer.py @@ -0,0 +1,134 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import numpy as np +import tvm +import nnvm +import nnvm.compiler.optimizer as optimizer +import nnvm.compiler.lr_scheduler as lr_scheduler + +from nnvm.testing.config import ctx_list +from tvm.contrib import graph_runtime + + +def helper(symbol, inputs, params, update_func, run_times, target, ctx, dtype="float32"): + ishapes = {} + np_inputs = {} + params_dict = {} + for (name, shape, s) in inputs: + ishapes.update({name: shape}) + np_inputs.update({name: np.random.uniform(size=shape).astype(dtype)}) + for (name, shape, s) in params: + np_inputs.update({name: np.random.uniform(size=shape).astype(dtype)}) + params_dict.update({name: np_inputs[name]}) + + graph, lib, rt_params = nnvm.compiler.build(symbol, target, shape=ishapes) + m = graph_runtime.create(graph, lib, ctx) + m.set_input(**np_inputs) + m.set_input(**rt_params) + for _ in range(run_times): + m.run() + y_np = update_func(**np_inputs) + out = m.get_output(0, tvm.nd.empty(y_np.shape, dtype)) + tvm.testing.assert_allclose(out.asnumpy(), y_np, atol=1e-5, rtol=1e-5) + + +def test_sgd(): + for target, ctx in ctx_list(): + data = nnvm.sym.Variable("data") + weight = nnvm.sym.Variable("weight") + out = nnvm.sym.elemwise_mul(data, weight ** 2) + + dshape = (1, 2, 3) + wshape = dshape + + base_lr = 0.1 + lr_factor = 0.5 + rescale_grad = 0.2 + wd = 0.1 + clip_gradient = 0.25 + + scheduler = lr_scheduler.FactorScheduler(base_lr=base_lr, step=1, factor=lr_factor) + opt = optimizer.SGD(learning_rate=base_lr, lr_scheduler=scheduler, + rescale_grad=rescale_grad, clip_gradient=clip_gradient, + wd=wd) + opt_sym = opt.minimize(out, var=weight) + + inputs = [("data", dshape, data)] + params = [("weight", wshape, weight)] + + def update_func(data, weight): + gradient_0 = data * 2 * weight * rescale_grad + gradient_0 = np.clip(gradient_0, -clip_gradient, clip_gradient) + weight_0 = weight - base_lr * lr_factor * (gradient_0 + wd * weight) + gradient_1 = data * 2 * weight_0 * rescale_grad + gradient_1 = np.clip(gradient_1, -clip_gradient, clip_gradient) + weight_1 = weight_0 - base_lr * (lr_factor ** 2) * (gradient_1 + wd * weight_0) + return weight_1 + + helper(opt_sym, inputs, params, update_func, 2, target, ctx) + + + +def test_adam(): + for target, ctx in ctx_list(): + data = nnvm.sym.Variable("data") + weight = nnvm.sym.Variable("weight") + out = nnvm.sym.elemwise_mul(data, weight ** 2) + + dshape = (1, 2, 3) + wshape = dshape + + base_lr = 0.1 + beta1 = 0.9 + beta2 = 0.999 + epsilon = 1e-8 + lr_factor = 0.5 + rescale_grad = 0.2 + wd = 0.1 + clip_gradient = 0.25 + + scheduler = lr_scheduler.FactorScheduler(base_lr=base_lr, step=1, factor=lr_factor) + opt = optimizer.Adam(learning_rate=base_lr, beta1=beta1, beta2=beta2, epsilon=epsilon, + lr_scheduler=scheduler, rescale_grad=rescale_grad, + clip_gradient=clip_gradient, wd=wd) + opt_sym = opt.minimize(out, var=weight) + + inputs = [("data", dshape, data)] + params = [("weight", wshape, weight)] + + def update_func(data, weight): + rate_0 = np.sqrt(1 - beta2) / (1 - beta1) + lr_0 = base_lr * lr_factor * rate_0 + gradient_0 = data * 2 * weight * rescale_grad + gradient_0 = np.clip(gradient_0, -clip_gradient, clip_gradient) + m_0 = (1 - beta1) * gradient_0 + v_0 = (1 - beta2) * (gradient_0 ** 2) + weight_0 = weight - lr_0 * (m_0 / (np.sqrt(v_0) + epsilon) + wd * weight) + rate_1 = np.sqrt(1 - beta2 ** 2) / (1 - beta1 ** 2) + lr_1 = base_lr * (lr_factor ** 2) * rate_1 + gradient_1 = data * 2 * weight_0 * rescale_grad + gradient_1 = np.clip(gradient_1, -clip_gradient, clip_gradient) + m_1 = beta1 * m_0 + (1 - beta1) * gradient_1 + v_1 = beta2 * v_0 + (1 - beta2) * (gradient_1 ** 2) + weight_1 = weight_0 - lr_1 * (m_1 / (np.sqrt(v_1) + epsilon) + wd * weight_0) + return weight_1 + + helper(opt_sym, inputs, params, update_func, 2, target, ctx) + +if __name__ == "__main__": + test_sgd() + test_adam() diff --git a/nnvm/tests/python/compiler/test_param_dict.py b/nnvm/tests/python/compiler/test_param_dict.py new file mode 100644 index 000000000000..b30f8f99082c --- /dev/null +++ b/nnvm/tests/python/compiler/test_param_dict.py @@ -0,0 +1,100 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import os +import numpy as np +import nnvm.compiler +import tvm +import json +import base64 +from tvm._ffi.base import py_str +from tvm import rpc +from tvm.contrib import util, graph_runtime + + +def test_save_load(): + x = np.random.uniform(size=(10, 2)).astype("float32") + y = np.random.uniform(size=(1, 2, 3)).astype("float32") + x[:] = 1 + y[:] = 1 + params = {"x": x, "y": y} + param_bytes = nnvm.compiler.save_param_dict(params) + assert isinstance(param_bytes, bytearray) + param2 = nnvm.compiler.load_param_dict(param_bytes) + assert len(param2) == 2 + np.testing.assert_equal(param2["x"].asnumpy(), x) + np.testing.assert_equal(param2["y"].asnumpy(), y) + + +def test_ndarray_reflection(): + x = np.random.uniform(size=(10, 2)).astype("float32") + xx = tvm.nd.array(x) + xnode = tvm.make.node("NDArrayWrapper", name="xx", array=xx) + xnode2 = tvm.make.node("NDArrayWrapper", name="x2", array=xx) + assert xnode.array.same_as(xx) + json_str = tvm.save_json([xnode, xnode2]) + json_dict = json.loads(json_str) + b64_str = json_dict["b64ndarrays"][0] + decoded = py_str(base64.b64encode(base64.b64decode(b64_str))) + assert b64_str == decoded + xlist = tvm.load_json(json_str) + np.testing.assert_equal(xlist[0].array.asnumpy(), xx.asnumpy()) + assert xlist[1].array == xlist[0].array + + +def test_bigendian_rpc_param(): + """Test big endian rpc when there is a PowerPC RPC server available""" + host = os.environ.get("TVM_POWERPC_TEST_HOST", None) + port = os.environ.get("TVM_POWERPC_TEST_PORT", 9090) + if host is None: + return + + def verify_nnvm(remote, target, shape, dtype): + x = nnvm.sym.Variable("x") + y = x + 1 + graph, lib, _ = nnvm.compiler.build( + y, target, + shape={"x": shape}, + dtype={"x": dtype}) + + temp = util.tempdir() + path_dso = temp.relpath("dev_lib.o") + lib.save(path_dso) + remote.upload(path_dso) + lib = remote.load_module("dev_lib.o") + a = np.random.randint(0, 256, size=shape).astype(dtype) + a[:] = 1 + params = {"x" : a} + ctx = remote.cpu(0) + m = graph_runtime.create(graph, lib, ctx) + # uses save param_dict + m.load_params(nnvm.compiler.save_param_dict(params)) + m.run() + out = m.get_output(0, tvm.nd.empty(shape, dtype=dtype, ctx=ctx)) + tvm.testing.assert_allclose(a + 1, out.asnumpy()) + + print("Test RPC connection to PowerPC...") + remote = rpc.connect(host, port) + target = "llvm -mtriple=powerpc-linux-gnu" + for dtype in ["float32", "float64", "int32", "int8"]: + verify_nnvm(remote, target, (10,), dtype) + + + +if __name__ == "__main__": + test_ndarray_reflection() + test_save_load() + test_bigendian_rpc_param() diff --git a/nnvm/tests/python/compiler/test_rpc_exec.py b/nnvm/tests/python/compiler/test_rpc_exec.py new file mode 100644 index 000000000000..1584f7c589a4 --- /dev/null +++ b/nnvm/tests/python/compiler/test_rpc_exec.py @@ -0,0 +1,67 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import tvm +from tvm import rpc +from tvm.contrib import util, graph_runtime +import nnvm.symbol as sym +import nnvm.compiler +import numpy as np +import time + +def test_rpc_executor(): + host = "localhost" + port = 9021 + server = rpc.Server(host, port, use_popen=True) + time.sleep(1) + x = sym.Variable("x") + y = sym.Variable("y") + z = sym.exp(y + x) + shape = (10, 128) + dtype = tvm.float32 + shape_dict = {"x": shape, "y": shape} + tmp = util.tempdir() + lib_name = tmp.relpath("net.o") + + graph, lib, _ = nnvm.compiler.build(z, "llvm", shape_dict) + # save module + lib.save(lib_name) + remote = rpc.connect(host, port) + remote.upload(lib_name) + ctx = remote.cpu(0) + # load remote + rlib = remote.load_module("net.o") + + # Create remotemodule + m = graph_runtime.create(graph, rlib, remote.cpu(0)) + # get member functions + set_input, run, get_output = m["set_input"], m["run"], m["get_output"] + na = tvm.nd.array(np.ones(shape).astype(dtype), ctx) + nb = tvm.nd.array(np.ones(shape).astype(dtype), ctx) + # set inputs + set_input("x", na) + set_input("y", nb) + # execute + run() + # get outputs + out = tvm.nd.empty(shape, dtype, ctx) + get_output(0, out) + tvm.testing.assert_allclose( + out.asnumpy(), np.exp(na.asnumpy() + nb.asnumpy())) + server.terminate() + +if __name__ == "__main__": + test_rpc_executor() diff --git a/nnvm/tests/python/compiler/test_simplify_inference.py b/nnvm/tests/python/compiler/test_simplify_inference.py new file mode 100644 index 000000000000..2f520bd6c125 --- /dev/null +++ b/nnvm/tests/python/compiler/test_simplify_inference.py @@ -0,0 +1,65 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Unittest cases for simplify batch_norm""" +import nnvm +from nnvm import symbol as sym +from nnvm.compiler import graph_util, graph_attr + +def test_simplify_batchnorm(): + def simple_bn(x, gamma, beta, moving_mean, moving_var, + axis=1, epsilon=1e-5, shape=None): + # expect = (x - moving_mean) / sym.sqrt(moving_var + eps) * gamma + beta + scale = sym.elemwise_mul(1 / sym.sqrt(moving_var + epsilon), gamma) + shift = sym.elemwise_add( + sym.elemwise_mul(sym.negative(moving_mean), scale), beta) + # for 2D + num_newaxis=len(shape) - axis - 1 + if num_newaxis: + scale = sym.expand_dims(scale, axis=1, num_newaxis=num_newaxis) + shift = sym.expand_dims(shift, axis=1, num_newaxis=num_newaxis) + return x * scale + shift + + + # Before simplify + def check(dim, axis, nstep): + eps = 0.01 + x = sym.Variable("x") + 1 + beta = sym.Variable("beta") + gamma = sym.Variable("gamma") + moving_var = sym.Variable("moving_var") + moving_mean = sym.Variable("moving_mean") + y1, y2 = x, sym.Variable("xx") + 1 + ishape = {"x": tuple(10 for i in range(dim))} + for i in range(nstep): + y1 = sym.batch_norm( + y1 + 1, gamma, beta, moving_mean, moving_var, epsilon=eps, axis=axis) + y1 = sym.dropout(y1) + y2 = simple_bn(y2 + 1, gamma, beta, moving_mean, moving_var, + epsilon=eps, axis=axis, shape=ishape["x"]) + g = nnvm.graph.create(y1) + g2 = nnvm.graph.create(y2) + graph_attr.set_shape_inputs(g, ishape) + g1 = g.apply("InferShape").apply("SimplifyInference") + # assert graph equals as expected + graph_util.check_graph_equal(g1, g2) + + check(2, 1, 1) + check(4, 0, 3) + check(4, 1, 2) + +if __name__ == "__main__": + test_simplify_batchnorm() diff --git a/nnvm/tests/python/compiler/test_to_relay.py b/nnvm/tests/python/compiler/test_to_relay.py new file mode 100644 index 000000000000..dac14a8c1f22 --- /dev/null +++ b/nnvm/tests/python/compiler/test_to_relay.py @@ -0,0 +1,58 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import nnvm +from nnvm import testing +from nnvm import to_relay +import tvm +from tvm.relay import transform +from tvm.relay import create_executor +from tvm.contrib import graph_runtime +import numpy as np + +def check_model(sym, shapes, dtypes, params): + net = nnvm.graph.create(sym) + graph_json, mod, params = nnvm.compiler.build( + net, + 'llvm', + shape=shapes, + dtype=dtypes, + params=params) + nnvm_rts = graph_runtime.create(graph_json, mod, tvm.cpu(0)) + inputs = {} + for name in shapes: + np_array = np.random.rand(*shapes[name]).astype('float32') + inputs[name] = tvm.nd.array(np_array) + + nnvm_rts.set_input(**params) + nnvm_rts.run(**inputs) + nnvm_out = nnvm_rts.get_output(0) + relay_model, params = to_relay.to_relay(net, shapes, dtypes, params) + mod = tvm.relay.Module.from_expr(relay_model) + mod = transform.InferType()(mod) + relay_rts = create_executor(kind='graph', mod=mod, ctx=tvm.cpu(0), target='llvm') + inputs.update(params) + relay_out = relay_rts.evaluate()(*list(inputs.values())) + np.testing.assert_allclose(nnvm_out.asnumpy(), relay_out.asnumpy()) + +# def test_mlp(): +# mlp, params = testing.mlp.get_workload(1) +# shapes = { "data": (10, 3, 224, 224) } +# dtypes = { "data": 'float32' } +# check_model(mlp, shapes, dtypes, params) + +if __name__ == "__main__": + test_mlp() diff --git a/nnvm/tests/python/compiler/test_top_assign.py b/nnvm/tests/python/compiler/test_top_assign.py new file mode 100644 index 000000000000..dae0506edc36 --- /dev/null +++ b/nnvm/tests/python/compiler/test_top_assign.py @@ -0,0 +1,57 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import numpy as np + +import tvm +from tvm.contrib import graph_runtime + +import nnvm.symbol as sym +import nnvm.compiler +from nnvm.testing.config import ctx_list + + +def test_update(): + w = sym.Variable("w") + w2 = sym.Variable("w2") + w = sym._assign(w, w + 1) + w2 = sym._assign(w2, w + 1) + + dshape = (5, 3, 18, 18) + shape_dict = {"w": dshape, "w2":dshape} + dtype = "float32" + + def check(target, ctx): + graph, lib, _ = nnvm.compiler.build(w2, target, shape_dict) + + m = graph_runtime.create(graph, lib, ctx) + + data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype)) + m.set_input("w", data) + m.run() + out = m.get_input("w2", tvm.nd.empty(dshape, dtype)) + tvm.testing.assert_allclose(out.asnumpy(), data.asnumpy() + 2, rtol=1e-5) + + m.run() + out = m.get_input("w2", tvm.nd.empty(dshape, dtype)) + tvm.testing.assert_allclose(out.asnumpy(), data.asnumpy() + 3, rtol=1e-5) + + for target, ctx in ctx_list(): + check(target, ctx) + + +if __name__ == "__main__": + test_update() diff --git a/nnvm/tests/python/compiler/test_top_level1.py b/nnvm/tests/python/compiler/test_top_level1.py new file mode 100644 index 000000000000..ae6266cdde54 --- /dev/null +++ b/nnvm/tests/python/compiler/test_top_level1.py @@ -0,0 +1,605 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import numpy as np +import tvm +from tvm.contrib import graph_runtime +import topi.testing +import nnvm.symbol as sym +import nnvm.compiler +from nnvm.testing.config import ctx_list +from nnvm.testing.check_computation import check_function + +def test_check_function(): + # test the testing function + + x = sym.Variable("x") + y = sym.Variable("y") + + # different styles of returning gradients from the backward function + check_function(x + 2*y, lambda x, y: x + 2*y, + lambda x, y, head_grads: [head_grads, 2*head_grads], + shape={'x': (1, 2), y: (1, 2)}, dtype='float32') + check_function(x + 2*y, lambda x, y: x + 2*y, + lambda x, y, head_grads: (head_grads, 2*head_grads), + shape={'x': (1, 2), y: (1, 2)}, dtype='float32') + check_function(x + 2*y, lambda x, y: x + 2*y, + lambda x, y, head_grads: {'x': head_grads, 'y': 2*head_grads}, + shape={'x': (1, 2), y: (1, 2)}, dtype='float32') + check_function(x + 2*y, lambda x, y: x + 2*y, + lambda x, y, head_grads: {'y': 2*head_grads}, + shape={'x': (1, 2), y: (1, 2)}, dtype='float32') + check_function(x + 2*y, lambda x, y: x + 2*y, + lambda x, y, head_grads: [2*head_grads], + grad_input_vars=[y], + shape={'x': (1, 2), y: (1, 2)}, dtype='float32') + check_function(x + 2*y, lambda x, y: x + 2*y, + lambda x, y, head_grads: 2*head_grads, + grad_input_vars=[y], + shape={'x': (1, 2), y: (1, 2)}, dtype='float32') + check_function(x + 2*y, lambda x, y: x + 2*y, + lambda x, y, head_grads: 2*head_grads, + grad_input_vars=[y], + shape={'x': (1, 2), y: (1, 2)}, dtype='float64') + + # test just numerical gradients + # different styles of shape and dtype passing + check_function(x + 2*y, shape={'x': (1, 2), y: (1, 2)}, + numerical_grads=True) + check_function(x + 2*y, shape={'x': (1, 2), y: (1, 2)}, dtype='float32', + numerical_grads=True) + check_function(x + 2*y, shape={'x': (1, 2), y: (1, 2)}, dtype={x: 'float32', 'y': 'float32'}, + numerical_grads=True) + check_function(x + 2*y, shape=(1, 2), dtype='float32', + numerical_grads=True) + + # specifying variable attributes on variable creation + # (in this case type codes must be used) + x = sym.Variable("x", dtype=0, shape=(1, 2)) + check_function(x + 2*y, shape={y: (1, 2)}, dtype={'y': 'float32'}, numerical_grads=True) + y = sym.Variable("y", dtype=0, shape=(1, 2)) + + # shape overriding + def _fwd1(x, y): + assert x.shape == (1, 1) + assert y.shape == (1, 2) + return x + 2*y + check_function(x + 2*y, _fwd1, shape={x: (1, 1)}) + + # in_range + def _fwd2(x, y): + assert x.shape == (100,) + assert (x <= 0.9).all() + assert (x >= 0.8).all() + return x + 2*y + check_function(x + 2*y, _fwd2, shape=(100,), in_range=(0.8, 0.9), numerical_grads=False) + check_function(x + 2*y, _fwd2, shape=(100,), in_range={'x': (0.8, 0.9)}, numerical_grads=False) + check_function(x + 2*y, backward=lambda x, y, head_grads: [1.0, 2.0], + in_range={'head_grads_0': (1.0, 1.0)}) + # explicit passing of values + check_function(x + 2*y, backward=lambda x, y, head_grads: [1.0, 2.0], + values={'head_grads_0': np.full((1, 2), 1.0)}) + + # check that the function reports errors + def _check_function_must_fail(*args, **kwargs): + error = AssertionError + if 'error' in kwargs: + error = kwargs['error'] + del kwargs['error'] + try: + check_function(*args, quiet=True, **kwargs) + except error: + pass + else: + raise AssertionError("check_function didn't raise an exception") + + _check_function_must_fail(x + 2*y, error=ValueError) + _check_function_must_fail(x + 2*y, lambda x, y: x + y) + _check_function_must_fail(x + 2*y, backward=lambda x, y, head_grads: [1.0, 2.0]) + _check_function_must_fail(sym.block_grad(x + 2*y), numerical_grads=True) + _check_function_must_fail(x*x, numerical_grads=True, + numerical_grads_params={'atol': 0.0, 'rtol': 0.0}) + _check_function_must_fail(sym.log(-x*x), numerical_grads=True, error=ValueError) + + # different styles of returning results from the forward function + check_function(x + 2*y, lambda x, y: [x + 2*y], numerical_grads=False) + _check_function_must_fail(x + 2*y, lambda x, y: [x + 2*y, x], numerical_grads=False, + error=ValueError) + _check_function_must_fail(x + 2*y, lambda x, y: [], numerical_grads=False, + error=ValueError) + + # multiple outputs + z = sym.Group([2*x + y, x + 2*y]) + check_function(z, lambda x, y: [2*x + y, x + 2*y]) + check_function(z, lambda x, y: (2*x + y, x + 2*y)) + check_function(z, backward=lambda x, y, head_grads: [2*head_grads[0] + head_grads[1], + head_grads[0] + 2*head_grads[1]]) + _check_function_must_fail(z, backward=lambda x, y, head_grads: [2*head_grads[0], + 2*head_grads[1]]) + check_function(z, backward=lambda x, y, head_grads: [head_grads[1], 2*head_grads[1]], + in_range={'head_grads_0': (0, 0)}) + check_function(z, numerical_grads=True) + + z = sym.Group([sym.block_grad(2*x + y), x + 2*y]) + check_function(z, lambda x, y: [2*x + y, x + 2*y], numerical_grads=False) + _check_function_must_fail(z, lambda x, y: [2*x + y, x + 2*y]) + _check_function_must_fail(z, numerical_grads=True) + + z = sym.Group([2*x + y, sym.block_grad(x + 2*y)]) + _check_function_must_fail(z, numerical_grads=True) + + z = sym.Group([2*x + y, x + 2*y, x, y, sym.sum(x)]) + check_function(z, lambda x, y: [2*x + y, x + 2*y, x, y, np.sum(x)]) + + # passing additional parameters to forward and backward + def _fwd3(x, p): + assert p == 'v' + return x + 1 + def _bwd3(x, p, head_grads): + assert p == 'v' + return head_grads + check_function(x + 1, _fwd3, _bwd3, additional_params={'p': 'v'}) + + # implicitly created variables and shape/dtype inference for inputs + x = sym.Variable("x", shape=(2, 3), dtype=0) + b = sym.Variable("b") + y = sym.dense(data=x, bias=b, units=4) + # Don't check gradients on cuda because is doesn't yet support ewise after reduce + check_function(y, exclude_targets={'cuda'}, numerical_grads=True) + check_function(y, shape={'x': (3, 4)}, exclude_targets={'cuda'}, numerical_grads=True) + check_function(y, dtype={'x': 'float64'}, exclude_targets={'cuda'}, numerical_grads=True) + + x = sym.Variable("x") + b = sym.Variable("b") + w = sym.Variable("w") + y = sym.dense(data=x, bias=b, weight=w, units=4) + def _fwd_dense(x, w, b): + return np.dot(x, w.T) + b + check_function(y, _fwd_dense, shape={'x': (1,2)}, dtype={'x': 'float32'}, numerical_grads=False) + check_function(y, _fwd_dense, shape={'x': (1,2)}, dtype={'w': 'float64'}, numerical_grads=False) + _check_function_must_fail(y, _fwd_dense, shape={'x': (1,2)}, + dtype={'w': 'float64', 'b': 'float32'}, + numerical_grads=False, + error=nnvm._base.NNVMError) + # fails because no shape + _check_function_must_fail(y, _fwd_dense, numerical_grads=False, error=ValueError) + # ok because type is float32 by default + check_function(y, _fwd_dense, shape={'x': (1,2)}, numerical_grads=False) + +def test_relu(): + x = sym.Variable("x") + y = sym.relu(sym.leaky_relu(x, alpha=0.3) - 0.2) + + def forward(x): + x = (x < 0) * x * 0.3 + (x > 0) * x - 0.2 + return (x > 0) * x + + def backward(head_grads, x): + sub = (x < 0) * x * 0.3 + (x > 0) * x - 0.2 + return [(sub > 0).astype("float") * \ + ((x > 0).astype("float") + 0.3 * (x < 0).astype("float")) * head_grads] + + shape = {'x': (1, 3, 32, 32)} + check_function(y, forward, backward, shape=shape) + +def test_prelu_nchw(): + x = sym.Variable("x") + a = sym.Variable("a") + y = sym.prelu(data=x, alpha=a) + + def forward(x, a): + return (x < 0) * (x * a.reshape(3, 1, 1)) + (x>=0) * x + + shape = {'x': (1, 3, 32, 32), 'a': (3,)} + check_function(y, forward, shape=shape) + +def test_prelu_nhwc(): + x = sym.Variable("x") + a = sym.Variable("a") + y = sym.prelu(data=x, alpha=a, axis=3) + + def forward(x, a): + return (x < 0) * (x * a.reshape(1, 1, 3)) + (x>=0) * x + + shape = {'x': (1, 32, 32, 3), 'a': (3,)} + check_function(y, forward, shape=shape) + +def test_sym_scalar_pow(): + scalar = 3 + x = sym.Variable("x") + y = x**scalar + + def forward(x): + return x**scalar + + def backward(head_grads, x): + return [scalar * x**(scalar - 1) * head_grads] + + shape = {'x': (1, 3, 32, 32)} + check_function(y, forward, backward, shape=shape) + + +def test_scalar_sym_pow(): + scalar = 3 + x = sym.Variable("x") + y = scalar**x + + def forward(x): + return scalar**x + + def backward(head_grads, x): + return [np.log(scalar) * scalar**x * head_grads] + + shape = {'x': (1, 3, 32, 32)} + check_function(y, forward, backward, shape=shape) + + +def test_exp(): + x = sym.Variable("x") + y = sym.exp(x) + + def forward(x): + return np.exp(x) + + def backward(head_grads, x): + return [np.exp(x) * head_grads] + + shape = {'x': (1, 3, 32, 32)} + check_function(y, forward, backward, shape=shape) + + +def test_log(): + x = sym.Variable("x") + y = sym.log(x) + + def forward(x): + return np.log(x) + + def backward(head_grads, x): + return [1. / x * head_grads] + + shape = {'x': (1, 3, 32, 32)} + check_function(y, forward, backward, in_range=(0.002, 2.0), shape=shape) + + +def test_tanh(): + x = sym.Variable("x") + y = sym.tanh(x) + + def forward(x): + return np.sinh(x) / np.cosh(x) + + def backward(head_grads, x): + y_np = forward(x) + return [(1 - y_np**2) * head_grads] + + shape = {'x': (1, 3, 32, 32)} + check_function(y, forward, backward, shape=shape) + + +def test_sigmoid(): + x = sym.Variable("x") + y = sym.sigmoid(x) + + def forward(x): + return 1.0 / (1.0 + np.exp(-x)) + + def backward(head_grads, x): + y_np = forward(x) + return [y_np *(1 - y_np) * head_grads] + + shape = {'x': (1, 3, 32, 32)} + check_function(y, forward, backward, shape=shape) + + +def test_softmax(): + x = sym.Variable("x") + y = sym.softmax(x) + + def forward(x): + return topi.testing.softmax_python(x) + + def backward(head_grads, x): + y = topi.testing.softmax_python(x) + grad = y * (head_grads - np.sum(y * head_grads, axis=1, keepdims=True)) + return [grad] + + check_function(y, forward, backward, + shape={'x': (10, 1000)}, numerical_grads=False) + check_function(y, forward, backward, + shape={'x': (2, 10)}) + + +def test_log_softmax(): + x = sym.Variable("x") + y = sym.log_softmax(x) + + def forward(x): + return topi.testing.log_softmax_python(x) + + def backward(head_grads, x): + y = topi.testing.log_softmax_python(x) + grad = head_grads - np.exp(y) * np.sum(head_grads, axis=1, keepdims=True) + return [grad] + + check_function(y, forward, backward, + shape={'x': (10, 1000)}, numerical_grads=False) + check_function(y, forward, backward, + shape={'x': (2, 10)}) + + +def test_dense(): + x = sym.Variable("x", shape=(10, 100)) + w = sym.Variable("dense_weight", shape=(3, 100)) + b = sym.Variable("dense_bias", shape=(3,)) + y = sym.dense(x, w, b, use_bias=True, units=3, name="dense") + y = sym.flatten(y) + + def forward(x, dense_weight, dense_bias): + return np.dot(x, dense_weight.T) + dense_bias + shape = { + 'x': (10, 100), + 'w': (3, 100), + 'b': (3,) + } + # Don't check gradients on cuda because is doesn't yet support ewise after reduce + check_function(y, forward, shape=shape, + exclude_targets={'cuda'}, numerical_grads=True) + check_function(y, forward, shape=shape, + only_targets={'cuda'}, numerical_grads=False) + + +def test_batchnorm(): + x = sym.Variable("x") + beta = sym.Variable("beta") + gamma = sym.Variable("gamma") + moving_var = sym.Variable("moving_var") + moving_mean = sym.Variable("moving_mean") + eps = 1e-5 + y = sym.batch_norm( + x, gamma, beta, moving_mean, moving_var, epsilon=eps) + + def forward(x, gamma, beta, moving_mean, moving_var): + return (x - moving_mean) / np.sqrt(moving_var + eps) * gamma + beta + + shape = { + 'x': (10, 20), + 'gamma': (20,), + 'beta': (20,), + 'moving_mean': (20,), + 'moving_var': (20,) + } + + check_function(y, forward, in_range=(0.001, 1.0), shape=shape) + + +def verify_concatenate(ishape, axis): + x = [sym.Variable("x%d" % i, shape=ishape[i]) for i in range(len(ishape))] + y = sym.concatenate(*x, axis=axis) + 1 + + def forward(**kwargs): + return np.concatenate(list(kwargs.values()), axis=axis) + 1 + + check_function(y, forward) + + +def test_concatenate(): + verify_concatenate([(2, 3, 4), (1, 3, 4)], axis=0) + verify_concatenate([(2, 4), (2, 7)], axis=1) + + +def verify_split(ishape, indices_or_sections, axis): + x = sym.Variable("x", shape=ishape) + y = sym.split(x, indices_or_sections=indices_or_sections, axis=axis) + + def forward(x): + return np.split(x, indices_or_sections, axis=axis) + + check_function(y, forward) + + +def test_split(): + verify_split((2, 3), 2, axis=0) + verify_split((5, 3), [3], axis=0) + verify_split((5, 9, 3), [3, 4], axis=1) + +def verify_strided_slice(ishape, begin, end, strideinp=None): + stride = strideinp if strideinp else [1, 1, 1] + x = sym.Variable("x", shape=ishape) + if strideinp: + y = sym.strided_slice(x, begin = begin, end = end, stride = stride) + 1 + else: + y = sym.strided_slice(x, begin = begin, end = end) + 1 + + for i in range(len(begin), 3): + begin.append(0) + for i in range(len(end), 3): + end.append(ishape[i]) + + def test_forward(x): + return x[begin[0]:end[0]:stride[0], + begin[1]:end[1]:stride[1], begin[2]:end[2]:stride[2]] + 1 + + check_function(y, test_forward) + +def test_strided_slice(): + verify_strided_slice((3, 4, 3), [0, 0, 0], [4, -5, 4], [1, -1, 2]) + verify_strided_slice((3, 4, 3), [1, 1, 0], [4, 4, 3], [2, 1, 1]) + verify_strided_slice((3, 4, 3), [1, -1, 0], [4, -5, 3], [2, -1, 1]) + verify_strided_slice((3, 4, 3), [1, 0, 0], [2, 2, 3], [1, 1, 2]) + verify_strided_slice((3, 4, 3), [1, -1, 0], [2, -3, 3], [1, -1, 1]) + verify_strided_slice((3, 4, 3), [1, 1, 0], [4, 4, 3]) + verify_strided_slice((3, 4, 3), [1, 1, 0], [4, 1000, 3]) + verify_strided_slice((3, 4, 3), [1, 1, 0], [4, 4]) + verify_strided_slice((3, 4, 3), [1, 1], [4, 4, 3]) + +def verify_take(src_shape, indices_src, axis=None): + src_dtype = "float32" + indices_dtype = "int32" + indices_src = np.array(indices_src, dtype=indices_dtype) + a = sym.Variable("a", shape=src_shape) + indices = sym.Variable("indices", shape=indices_src.shape) + y = sym.take(a, indices, axis=axis) + + def forward(a, indices): + return np.take(a, indices=indices, axis=axis) + + a_src = np.arange(np.prod(src_shape), dtype=src_dtype).reshape(src_shape) + + check_function(y, forward, + dtype={'a': src_dtype, 'indices': indices_dtype}, + values={'a': a_src, 'indices': indices_src}) + +def test_take(): + verify_take((4,), [1]) + verify_take((4,), [[0,1,2,3]]) + verify_take((3,3,3), [[11,25]]) + verify_take((4,), [[0,1],[2,3]]) + verify_take((4,), [1], 0) + verify_take((2,2), [[[1,0],[0,1]]], 0) + verify_take((2,2), [[[1,0],[0,1]]], 1) + verify_take((4,3,5,6), [[2,1,0,0]], -2) + + +def verify_squeeze(shape, axis): + x = sym.Variable("x") + if axis is not None: + y = sym.squeeze(x, axis=axis) + else: + y = sym.squeeze(x) + y = y + 1 + + def forward(x): + return np.squeeze(x, axis=axis) + 1 + + def backward(head_grads, x): + return [np.reshape(head_grads, x.shape)] + + check_function(y, forward, backward, shape=shape) + + +def test_squeeze(): + verify_squeeze((1, 3, 2, 5), None) + verify_squeeze((1, 3, 1), axis=0) + verify_squeeze((1, 3, 2, 5, 1), axis=-1) + + +def test_pad(): + x = sym.Variable("x") + y = sym.pad(x, pad_width=((0, 0), (0, 0), (0, 1), (2, 3)), pad_value=1.) + + def forward(x): + return np.pad(x, + pad_width=((0, 0), (0, 0), (0, 1), (2, 3)), + mode='constant', constant_values=1.) + + shape = {'x': (1, 3, 28, 28)} + check_function(y, forward, shape=shape) + +def verify_lrn(ishape, size, axis, bias, alpha, beta): + x = sym.Variable("x", shape=ishape) + y = sym.lrn(x, size=size, axis=axis, bias=bias, alpha=alpha, beta=beta) + + def forward1(x): + return topi.testing.lrn_python(x, size, axis, bias, alpha, beta) + + check_function(y, forward1) + + def forward2(x): + y = forward1(x) + return (y > 0)*y + + #Checking LRN op followed by elementwise op relu + check_function(sym.relu(y), forward2, in_range={'x': (-10.0, 10.0)}) + +def verify_l2_normalize(ishape, eps, axis): + x = sym.Variable("x", shape=ishape) + y = sym.l2_normalize(x, eps=eps, axis=axis) + + def forward1(x): + return topi.testing.l2_normalize_python(x, eps, axis) + + check_function(y, forward1) + + def forward2(x): + y = forward1(x) + return (y > 0)*y + + #Checking L2 normalization op followed by elementwise op relu + check_function(sym.relu(y), forward2, in_range={'x': (-10.0, 10.0)}) + +def test_lrn(): + verify_lrn((1, 3, 20, 20), 3, 1, 1.0, 1.0, 0.5) + verify_lrn((1, 3, 20, 20), 3, 1, 2.0, 1.0, 0.75) + +def test_l2_normalize(): + verify_l2_normalize((1, 3, 20, 20), 0.001, (1,)) + verify_l2_normalize((1, 3, 20, 20), 0.001, (1, 2)) + +def verify_gather_nd(src_shape, indices_src): + src_dtype = "float32" + indices_dtype = "int32" + indices_src = np.array(indices_src, dtype=indices_dtype) + a = sym.Variable("a", shape=src_shape) + indices = sym.Variable("indices", shape=indices_src.shape) + y = sym.gather_nd(a, indices) + + def forward(a, indices): + return topi.testing.gather_nd_python(a, indices) + + a_src = np.arange(np.prod(src_shape), dtype=src_dtype).reshape(src_shape) + + check_function(y, forward, + dtype={'a': src_dtype, 'indices': indices_dtype}, + values={'a': a_src, 'indices': indices_src}) + +def test_gather_nd(): + verify_gather_nd((4,), [[1]]) + verify_gather_nd((4,), [[1, 3, 2]]) + verify_gather_nd((2, 3), [[1]]) + verify_gather_nd((2, 3), [[1], [0]]) + verify_gather_nd((2, 3), [[1, 0], [0, 2]]) + verify_gather_nd((2, 3, 4), [[1, 0], [0, 2]]) + verify_gather_nd((2, 3, 4), [[1, 0], [0, 2], [3, 1]]) + verify_gather_nd((2, 3, 4), [[[1, 0], [0, 1]], [[0, 2], [1, 2]], + [[3, 1], [0, 2]]]) + verify_gather_nd((2, 3, 4, 5), [[1, 0], [0, 2]]) + verify_gather_nd((2, 3, 4, 5), [[1, 0], [2, 1], [3, 2], [4, 2]]) + +if __name__ == "__main__": + test_check_function() + test_split() + test_concatenate() + test_log_softmax() + test_batchnorm() + test_dense() + test_relu() + test_prelu_nchw() + test_prelu_nhwc() + test_sym_scalar_pow() + test_scalar_sym_pow() + test_exp() + test_log() + test_tanh() + test_sigmoid() + test_softmax() + test_squeeze() + test_pad() + test_take() + test_lrn() + test_l2_normalize() + test_strided_slice() + test_gather_nd() diff --git a/nnvm/tests/python/compiler/test_top_level2.py b/nnvm/tests/python/compiler/test_top_level2.py new file mode 100644 index 000000000000..b558428f0144 --- /dev/null +++ b/nnvm/tests/python/compiler/test_top_level2.py @@ -0,0 +1,362 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import numpy as np + +import tvm +from tvm.contrib import graph_runtime +import topi +import topi.testing +import nnvm.symbol as sym +import nnvm.compiler +from nnvm.testing.config import ctx_list + + +def test_conv2d(): + def run_test_conv2d(sym, dtype, dshape, kshape, oshape, shape_dict, padding): + for target, ctx in ctx_list(): + graph, lib, _ = nnvm.compiler.build(sym, target, shape_dict) + m = graph_runtime.create(graph, lib, ctx) + data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype)) + kernel = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype)) + bias = tvm.nd.array(np.random.uniform(size=kshape[0]).astype(dtype)) + m.run(x=data, y_weight=kernel, y_bias=bias) + out = m.get_output(0, tvm.nd.empty(oshape, dtype)) + c_np = topi.testing.conv2d_nchw_python( + data.asnumpy(), kernel.asnumpy(), 1, padding) + c_np = c_np + bias.asnumpy().reshape(kshape[0], 1, 1) + tvm.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5) + + x = sym.Variable("x") + y = sym.conv2d(x, channels=10, kernel_size=(3,3), + name="y", padding=(1,1)) + dtype = "float32" + dshape = (1, 3, 18, 18) + kshape = (10, 3, 3, 3) + oshape = (1, 10, 18, 18) + shape_dict = {"x": dshape} + run_test_conv2d(y, dtype, dshape, kshape, oshape, shape_dict, (1,1)) + + x = sym.Variable("x") + y = sym.conv2d(x, channels=10, kernel_size=(1,3), + name="y", padding=(0,1)) + dtype = "float32" + dshape = (1, 3, 224, 224) + kshape = (10, 3, 1, 3) + oshape = (1, 10, 224, 224) + shape_dict = {"x": dshape} + run_test_conv2d(y, dtype, dshape, kshape, oshape, shape_dict, (0,1)) + + +def test_mixed_precision(): + x = sym.Variable("x") + dtype = "int8" + out_dtype="int32" + y = sym.conv2d(x, + channels=10, + kernel_size=(3,3), + name="y", + padding=(1,1), + use_bias=False, + out_dtype="int32") + dshape = (1, 3, 18, 18) + kshape = (10, 3, 3, 3) + oshape = (1, 10, 18, 18) + shape_dict = {"x": dshape} + dtype_dict = {"x": dtype} + for target, ctx in ctx_list(): + graph, lib, _ = nnvm.compiler.build(y, target, shape_dict, dtype_dict) + m = graph_runtime.create(graph, lib, ctx) + data = tvm.nd.array(np.random.uniform(-127, 127, size=dshape).astype(dtype)) + kernel = tvm.nd.array(np.random.uniform(-127, 127, size=kshape).astype(dtype)) + m.run(x=data, y_weight=kernel) + out = m.get_output(0, tvm.nd.empty(oshape, out_dtype)) + c_np = topi.testing.conv2d_nchw_python( + data.asnumpy().astype(out_dtype), + kernel.asnumpy().astype(out_dtype), 1, 1) + tvm.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5) + + +def test_dilated_conv2d(): + dilation = 3 + x = sym.Variable("x") + y = sym.conv2d(x, channels=10, kernel_size=(3, 3), dilation=(dilation, dilation), + name="y", padding=(1, 1)) + dtype = "float32" + dshape = (1, 3, 18, 18) + kshape = (10, 3, 3, 3) + oshape = (1, 10, 14, 14) + shape_dict = {"x": dshape} + for target, ctx in ctx_list(): + graph, lib, _ = nnvm.compiler.build(y, target, shape_dict) + m = graph_runtime.create(graph, lib, ctx) + data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype)) + bias = tvm.nd.array(np.random.uniform(size=kshape[0]).astype(dtype)) + kernel_np = np.random.uniform(size=kshape).astype(dtype) + kernel = tvm.nd.array(kernel_np) + dkernel_np = topi.testing.dilate_python(kernel_np, (1, 1, dilation, dilation)) + m.run(x=data, y_weight=kernel, y_bias=bias) + out = m.get_output(0, tvm.nd.empty(oshape, dtype)) + c_np = topi.testing.conv2d_nchw_python( + data.asnumpy(), dkernel_np, 1, 1) + c_np = c_np + bias.asnumpy().reshape(kshape[0], 1, 1) + tvm.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5) + + +def test_grouped_conv2d_nchw(): + x = sym.Variable("x") + y = sym.conv2d(x, channels=32, kernel_size=(3,3), groups=32, + name="y", padding=(1,1)) + dtype = "float32" + dshape = (1, 32, 18, 18) + kshape = (32, 1, 3, 3) + oshape = (1, 32, 18, 18) + shape_dict = {"x": dshape} + for target, ctx in ctx_list(): + graph, lib, _ = nnvm.compiler.build(y, target, shape_dict) + m = graph_runtime.create(graph, lib, ctx) + data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype)) + kernel = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype)) + bias = tvm.nd.array(np.random.uniform(size=kshape[0]).astype(dtype)) + m.run(x=data, y_weight=kernel, y_bias=bias) + out = m.get_output(0, tvm.nd.empty(oshape, dtype)) + c_np = topi.testing.depthwise_conv2d_python_nchw( + data.asnumpy(), kernel.asnumpy(), (1,1), 'SAME') + c_np = c_np + bias.asnumpy().reshape(kshape[0], 1, 1) + tvm.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5) + +def test_grouped_conv2d_nhwc(): + x = sym.Variable("x") + y = sym.conv2d(x, channels=32, kernel_size=(3,3), groups=32, + name="y", padding=(1,1), layout="NHWC", kernel_layout ='HWOI') + dtype = "float32" + dshape = (1, 18, 18, 32) + kshape = (3, 3, 32, 1) + oshape = (1, 18, 18, 32) + shape_dict = {"x": dshape} + for target, ctx in ctx_list(): + graph, lib, _ = nnvm.compiler.build(y, target, shape_dict) + m = graph_runtime.create(graph, lib, ctx) + data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype)) + kernel = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype)) + bias = tvm.nd.array(np.random.uniform(size=kshape[2]).astype(dtype)) + m.run(x=data, y_weight=kernel, y_bias=bias) + out = m.get_output(0, tvm.nd.empty(oshape, dtype)) + c_np = topi.testing.depthwise_conv2d_python_nhwc( + data.asnumpy(), kernel.asnumpy(), (1,1), 'SAME') + c_np = c_np + bias.asnumpy().reshape(1, 1, kshape[2]) + tvm.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5) + + +def test_conv2d_transpose(): + x = sym.Variable("x") + y = sym.conv2d_transpose(x, channels=10, kernel_size=(3,3), strides=(2,2), + name="y", padding=(1,1), output_padding=(2,2)) + dtype = "float32" + dshape = (1, 3, 18, 18) + kshape = (3, 10, 3, 3) + oshape = (1, 10, 37, 37) + shape_dict = {"x": dshape} + for target, ctx in ctx_list(): + graph, lib, _ = nnvm.compiler.build(y, target, shape_dict) + m = graph_runtime.create(graph, lib, ctx) + data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype)) + kernel = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype)) + bias = tvm.nd.array(np.random.uniform(size=kshape[1]).astype(dtype)) + m.run(x=data, y_weight=kernel, y_bias=bias) + out = m.get_output(0, tvm.nd.empty(oshape, dtype)) + c_np = topi.testing.conv2d_transpose_nchw_python( + data.asnumpy(), kernel.asnumpy(), 2, 1) + c_np = c_np + bias.asnumpy().reshape(kshape[1], 1, 1) + d_np = np.zeros(shape=oshape) + d_np[:,:,0:c_np.shape[2],0:c_np.shape[3]] = c_np + tvm.testing.assert_allclose(out.asnumpy(), d_np, rtol=1e-5) + + +def test_max_pool2d(): + x = sym.Variable("x") + y = sym.max_pool2d(x, pool_size=(2,2), strides=(2,2), + padding=(0,0), name="y", ceil_mode=True) + dtype = "float32" + dshape = (1, 3, 28, 28) + oshape = (1, 3, 14, 14) + shape_dict = {"x": dshape} + for target, ctx in ctx_list(): + graph, lib, _ = nnvm.compiler.build(y, target, shape_dict) + m = graph_runtime.create(graph, lib, ctx) + data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype)) + m.run(x=data) + out = m.get_output(0, tvm.nd.empty(oshape, dtype)) + b_np = np.max(data.asnumpy().reshape(1,3,14,2,14,2), axis=(3,5)) + tvm.testing.assert_allclose(out.asnumpy(), b_np, rtol=1e-5) + + +def test_avg_pool2d(): + x = sym.Variable("x") + y = sym.avg_pool2d(x, pool_size=(2,2), strides=(2,2), padding=(0,0), name="y") + dtype = "float32" + dshape = (1, 3, 28, 28) + oshape = (1, 3, 14, 14) + shape_dict = {"x": dshape} + for target, ctx in ctx_list(): + graph, lib, _ = nnvm.compiler.build(y, target, shape_dict) + m = graph_runtime.create(graph, lib, ctx) + data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype)) + m.run(x=data) + out = m.get_output(0, tvm.nd.empty(oshape, dtype)) + b_np = np.mean(data.asnumpy().reshape(1,3,14,2,14,2), axis=(3,5)) + tvm.testing.assert_allclose(out.asnumpy(), b_np, rtol=1e-5) + + +def test_avg_pool2d_no_count_pad(): + kh, kw = (4, 4) + sh, sw = (2, 2) + ph, pw = (2, 2) + + x = sym.Variable("x") + y = sym.avg_pool2d(x, pool_size=(kh, kw), strides=(sw, sw), padding=(ph, pw), + name="y", count_include_pad=False) + dtype = "float32" + n = 1 + (ic, ih, iw) = (3, 28, 28) + (oc, oh, ow) = (3, 15, 15) + + a_np = np.random.uniform(low=0.001, size=(n, ic, ih, iw)).astype(dtype) + pad_np = np.zeros(shape=(n, ic, ih+2*ph, iw+2*pw)).astype(dtype) + no_zero = (range(n), range(ic), (range(ph, ih+ph)), (range(pw, iw+pw))) + pad_np[np.ix_(*no_zero)] = a_np + b_np = np.zeros(shape=(n, oc, oh, ow)).astype(dtype) + + for i in range(oh): + for j in range(ow): + pad_count = np.sum(pad_np[:, :, i*sh:i*sh+kh, j*sw:j*sw+kw] > 0, axis=(2,3)) + b_np[:,:,i,j] = np.sum(pad_np[:, :, i*sh:i*sh+kh, j*sw:j*sw+kw], + axis=(2,3)) / np.maximum(pad_count, 1) + b_np = np.maximum(b_np, 0.0) + shape_dict = {"x": (n, ic, ih, iw)} + for target, ctx in ctx_list(): + graph, lib, _ = nnvm.compiler.build(y, target, shape_dict) + m = graph_runtime.create(graph, lib, ctx) + data = tvm.nd.array(a_np) + m.run(x=data) + out = m.get_output(0, tvm.nd.empty((n, oc, oh, ow), dtype)) + tvm.testing.assert_allclose(out.asnumpy(), b_np, rtol=1e-5) + + +def test_global_max_pool2d(): + x = sym.Variable("x") + y = sym.global_max_pool2d(x, name="y") + dtype = "float32" + dshape = (1, 1024, 7, 7) + oshape = (1, 1024, 1, 1) + shape_dict = {"x": dshape} + for target, ctx in ctx_list(): + graph, lib, _ = nnvm.compiler.build(y, target, shape_dict) + m = graph_runtime.create(graph, lib, ctx) + data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype)) + m.run(x=data) + out = m.get_output(0, tvm.nd.empty(oshape, dtype)) + b_np = np.max(data.asnumpy(), axis=(2,3), keepdims=True) + tvm.testing.assert_allclose(out.asnumpy(), b_np, rtol=1e-5) + + +def test_global_avg_pool2d(): + x = sym.Variable("x") + y = sym.global_avg_pool2d(x, name="y") + dtype = "float32" + dshape = (1, 1024, 7, 7) + oshape = (1, 1024, 1, 1) + shape_dict = {"x": dshape} + for target, ctx in ctx_list(): + graph, lib, _ = nnvm.compiler.build(y, target, shape_dict) + m = graph_runtime.create(graph, lib, ctx) + data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype)) + m.run(x=data) + out = m.get_output(0, tvm.nd.empty(oshape, dtype)) + b_np = np.mean(data.asnumpy(), axis=(2,3), keepdims=True) + tvm.testing.assert_allclose(out.asnumpy(), b_np, rtol=1e-5) + + +def test_upsampling_nearest_neighbor(): + x = sym.Variable("x") + scale = 2 + y = sym.upsampling(x, scale=scale, name="y") + dtype = "float32" + dshape = (1, 16, 32, 32) + oshape = (1, 16, 32*scale, 32*scale) + shape_dict = {"x": dshape} + for target, ctx in ctx_list(): + graph, lib, _ = nnvm.compiler.build(y, target, shape_dict) + m = graph_runtime.create(graph, lib, ctx) + a_np = np.random.uniform(size=dshape).astype(dtype) + data = tvm.nd.array(a_np) + m.run(x=data) + out = m.get_output(0, tvm.nd.empty(oshape, dtype)) + b_np = topi.testing.upsampling_python(a_np, (scale, scale), "NCHW") + tvm.testing.assert_allclose(out.asnumpy(), b_np, rtol=1e-5) + +def test_upsampling_bilinear(): + x = sym.Variable("x") + scale = 2 + y = sym.upsampling(x, scale=scale, method="BILINEAR", name="y", layout="NCHW") + dtype = "float32" + dshape = (1, 4, 32, 32) + oshape = (1, 4, 32*scale, 32*scale) + shape_dict = {"x": dshape} + dtype_dict = {"x": dtype} + for target, ctx in ctx_list(): + graph, lib, _ = nnvm.compiler.build(y, target, shape_dict, dtype_dict) + m = graph_runtime.create(graph, lib, ctx) + a_np = np.random.uniform(size=dshape).astype(dtype) + data = tvm.nd.array(a_np) + m.run(x=data) + out = m.get_output(0, tvm.nd.empty(oshape, dtype)) + b_np = topi.testing.bilinear_resize_python(a_np, (32*scale, 32*scale), "NCHW", align_corners=False) + tvm.testing.assert_allclose(out.asnumpy(), b_np, rtol=1e-5, atol=1e-5) + +def test_resize_bilinear(): + x = sym.Variable("x") + y = sym.resize(x, size=(60, 60), method="BILINEAR", name="y", layout="NHWC", align_corners=True) + dtype = "float32" + dshape = (1, 32, 32, 4) + oshape = (1, 60, 60, 4) + shape_dict = {"x": dshape} + dtype_dict = {"x": dtype} + for target, ctx in ctx_list(): + graph, lib, _ = nnvm.compiler.build(y, target, shape_dict, dtype_dict) + m = graph_runtime.create(graph, lib, ctx) + a_np = np.random.uniform(size=dshape).astype(dtype) + data = tvm.nd.array(a_np) + m.run(x=data) + out = m.get_output(0, tvm.nd.empty(oshape, dtype)) + b_np = topi.testing.bilinear_resize_python(a_np, (60, 60), "NHWC") + tvm.testing.assert_allclose(out.asnumpy(), b_np, rtol=1e-5, atol=1e-5) + +if __name__ == "__main__": + test_mixed_precision() + test_conv2d() + test_dilated_conv2d() + test_grouped_conv2d_nchw() + test_grouped_conv2d_nhwc() + test_conv2d_transpose() + test_max_pool2d() + test_avg_pool2d() + test_avg_pool2d_no_count_pad() + test_global_max_pool2d() + test_global_avg_pool2d() + test_upsampling_nearest_neighbor() + test_upsampling_bilinear() + test_resize_bilinear() diff --git a/nnvm/tests/python/compiler/test_top_level3.py b/nnvm/tests/python/compiler/test_top_level3.py new file mode 100644 index 000000000000..c60f0450b30a --- /dev/null +++ b/nnvm/tests/python/compiler/test_top_level3.py @@ -0,0 +1,63 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import numpy as np +import tvm +from tvm.contrib import graph_runtime +import topi.testing +import nnvm.symbol as sym +import nnvm.compiler +from nnvm.testing.config import ctx_list +from nnvm.testing.check_computation import check_function + +def check_map(symfunc, np_func, np_backward=None, dtype="float32", rnd_min=-1, rnd_max=1): + x = sym.Variable("x") + y = symfunc(x) + shape = {'x': (1, 3, 32, 32)} + check_function(y, lambda x: np_func(x), np_backward, + dtype=dtype, shape=shape, in_range=(rnd_min, rnd_max)) + + +def test_floor(): + check_map(sym.floor, np.floor) + +def test_ceil(): + check_map(sym.ceil, np.ceil) + +def test_trunc(): + check_map(sym.trunc, np.trunc) + +def test_round(): + check_map(sym.round, np.round) + +def test_abs(): + check_map(sym.abs, np.abs) + check_map(sym.abs, np.abs, dtype = "int32") + check_map(sym.abs, np.abs, dtype = "int8") + +def test_shift(): + n = 3 + for dtype in ["int32", "int8"]: + check_map(lambda x : x >> n, lambda x: x >> n, dtype=dtype, rnd_min=-100, rnd_max=100) + check_map(lambda x : x << n, lambda x: x << n, dtype=dtype, rnd_min=-100, rnd_max=100) + +if __name__ == "__main__": + test_shift() + test_floor() + test_ceil() + test_round() + test_abs() + test_trunc() diff --git a/nnvm/tests/python/compiler/test_top_level4.py b/nnvm/tests/python/compiler/test_top_level4.py new file mode 100644 index 000000000000..691163974470 --- /dev/null +++ b/nnvm/tests/python/compiler/test_top_level4.py @@ -0,0 +1,746 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import math +import numpy as np +import tvm +from tvm.contrib import graph_runtime +import topi +import nnvm.symbol as sym +import nnvm.compiler +from nnvm.testing.config import ctx_list +from nnvm.testing.check_computation import check_function + +def verify_transpose(dshape, axes): + x = sym.Variable("x") + if axes: + y = sym.transpose(x, axes=axes) + else: + y = sym.transpose(x) + y = y + 1 + dtype = "float32" + for target, ctx in ctx_list(): + graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape}) + m = graph_runtime.create(graph, lib, ctx) + # set input + data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype)) + m.run(x=data) + out_np = np.transpose(data.asnumpy(), axes=axes) + 1 + out = m.get_output(0, tvm.nd.empty(out_np.shape)) + tvm.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5) + +def verify_reduce_explicit(dshape, data, result, fsym, oshape=None, otype='float32', **kwargs): + """ Verify reduce operations by comparign its result with `result` """ + x = sym.Variable("x") + y = fsym(x + 0, **kwargs) + for target, ctx in ctx_list(): + # TODO(yuruofei): remove when cuda reduce schedule is done + if target == 'cuda' and fsym == sym.mean: + continue + graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape}) + m = graph_runtime.create(graph, lib, ctx) + # set input + m.run(x=data) + # oshape set to None means do not test the shape-correctness + oshape = result.shape if isinstance(result, np.ndarray) else (1,) if oshape is None else oshape + out = m.get_output(0, tvm.nd.empty(oshape, dtype=otype)) + if isinstance(result, np.ndarray): + np.testing.assert_equal(out.asnumpy().shape, result.shape) + tvm.testing.assert_allclose(out.asnumpy(), result, atol=1e-5, rtol=1e-5) + else: + tvm_out = out.asnumpy() + assert abs(result - tvm_out) <= (1e-5 + 1e-5 * abs(tvm_out)) + +def verify_reduce(dshape, fnp, fsym, oshape=None, otype='float32', **kwargs): + """ Verify reduce operations by generating data at random and calling numpy + version as reference """ + data = np.random.uniform(size=dshape).astype(otype) + result = fnp(data + 0, **kwargs) + verify_reduce_explicit(dshape, data, result, fsym, oshape=oshape, otype=otype, **kwargs) + +def verify_collapse(dshape, target_shape, fnp): + x = sym.Variable("x", shape=dshape) + t = sym.Variable("t", shape=target_shape) + y = sym.collapse_sum(x, t) + dtype = "float32" + for target, ctx in ctx_list(): + graph, lib, _ = nnvm.compiler.build(y, target, + {"x": dshape, "t": target_shape}) + m = graph_runtime.create(graph, lib, ctx) + data = np.random.uniform(size=dshape).astype(dtype) + m.run(x=data) + out = m.get_output(0, tvm.nd.empty(target_shape)) + out_np = fnp(data) + tvm.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5) + + +def test_transpose(): + verify_transpose((2, 3, 4), (0, 2, 1)) + verify_transpose((2, 3, 4), None) + + +def test_reduce(): + + def _with_keepdims(func): + """ Wrapper around numpy's argmax/argmin with `keepdims` argument supported """ + def wrapper(data, axis=None, keepdims=False): + if not keepdims: + return func(data, axis=axis) + else: + if axis is not None: + out_shape = list(data.shape) + out_shape[axis] = 1 + else: + out_shape = [1 for _ in range(len(data.shape))] + return func(data, axis=axis).reshape(out_shape) + return wrapper + + verify_reduce((2, 3, 4), np.max, sym.max, axis=1, keepdims=True) + verify_reduce((4, 4, 3), np.min, sym.min, keepdims=True) + verify_reduce((4, 4, 3), np.sum, sym.sum, axis=(0, 2)) + verify_reduce((4, 4, 3), np.sum, sym.sum) + verify_reduce((128, 24, 128), np.mean, sym.mean, axis=(0, 1), keepdims=False) + verify_reduce((128, 24, 128), np.mean, sym.mean, axis=(0, 2), keepdims=False) + verify_reduce((128, 24, 128), np.mean, sym.mean, axis=(0, 1), keepdims=True) + verify_reduce((128, 24, 128), np.mean, sym.mean, axis=(0, 2), keepdims=True) + verify_reduce((128, 24, 128), np.mean, sym.mean, keepdims=True) + verify_reduce((128, 24, 128), np.mean, sym.mean, keepdims=False) + verify_reduce((128, 24, 128), np.mean, sym.mean, axis=(0, 1, 2), keepdims=True) + + data = np.array([[[1,2],[3,4]],[[3,44],[5,6]]], dtype=np.float32) + verify_reduce_explicit([2,2,2], data, np.array([[1,1],[1,0]]), sym.argmax, otype='int32', axis=[0,2], exclude=True) + verify_reduce_explicit([2,2,2], data, np.array([[0,0],[0,1]]), sym.argmin, otype='int32', axis=[0,2], exclude=True) + shape = [4, 4, 3] + for axis in [None, 0, 1, 2]: + for keepdims in [True,False]: + kwargs = { 'keepdims':keepdims } + if axis is None: + # FIXME: NNVM doesn't support setting `axis=None` explicitly. + kwargs.update({'oshape': [1,1,1] if keepdims else [1] }) + else: + kwargs.update({'axis': axis}) + kwargs.update({'oshape': shape[:axis]+[1]+shape[axis+1:] if keepdims else shape[:axis]+shape[axis+1:]}) + + verify_reduce(shape, _with_keepdims(np.argmax), sym.argmax, otype='int32', **kwargs) + verify_reduce(shape, _with_keepdims(np.argmin), sym.argmin, otype='int32', **kwargs) + + +def test_collapse(): + verify_collapse((2, 3, 4), (1,), lambda x: x.sum()) + verify_collapse((2, 3, 4), (1, 1, 1), lambda x: x.sum(keepdims=True)) + verify_collapse((2, 3, 4), (1, 1), lambda x: x.sum().reshape(1, 1)) + verify_collapse((2, 3, 4), (1, 4), lambda x: x.reshape(-1, 4).sum(0, keepdims=True)) + verify_collapse((2, 3, 4), (3, 4), lambda x: x.sum(0)) + verify_collapse((2, 3, 4), (1, 3, 4), lambda x: x.sum(0, keepdims=True)) + verify_collapse((2, 3, 4), (1, 1, 4), lambda x: x.sum((0, 1), keepdims=True)) + verify_collapse((2, 3, 4), (2, 1, 4), lambda x: x.sum(1, keepdims=True)) + verify_collapse((2, 3, 4), (2, 1, 1), lambda x: x.sum((1, 2), keepdims=True)) + verify_collapse((2, 3, 4), (2, 3, 1), lambda x: x.sum(2, keepdims=True)) + verify_collapse((2, 3, 4), (2, 3, 4), lambda x: x) + + +def verify_flip(ishape, axis): + x = sym.Variable("x") + y = sym.flip(x, axis=axis) + 1 + dtype = "float32" + x_np = np.random.uniform(size=ishape).astype(dtype) + res = np.flip(x_np, axis) + 1 + + for target, ctx in ctx_list(): + # set input + graph, lib, _ = nnvm.compiler.build(y, target, {"x": ishape}) + m = graph_runtime.create(graph, lib, ctx) + m.run(x=x_np) + out = m.get_output(0, tvm.nd.empty(res.shape)) + tvm.testing.assert_allclose(out.asnumpy(), res, atol=1e-5, rtol=1e-5) + + +def test_flip(): + verify_flip((3, 4, 3), 1) + verify_flip((3, 4, 3), 0) + verify_flip((3, 4, 3), 2) + verify_flip((3, 4, 3), -1) + verify_flip((3, 4, 3), -3) + verify_flip((3, 4, 3), -2) + + +def verify_reshape(dshape, oshape): + x = sym.Variable("x") + y = sym.reshape(x, shape=oshape) + y = y + 1 + dtype = "float32" + for target, ctx in ctx_list(): + graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape}) + m = graph_runtime.create(graph, lib, ctx) + # set input + data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype)) + m.run(x=data) + out_np = data.asnumpy().reshape(oshape) + 1 + out = m.get_output(0, tvm.nd.empty(out_np.shape)) + tvm.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5) + + +def test_reshape(): + verify_reshape((2, 3, 4), (-1, 2, 1)) + verify_reshape((2, 3, 4), (8, 3)) + verify_reshape((4, 7), (2, 7, 2)) + + +def test_clip(): + x = sym.Variable("x") + a_min=0.2 + a_max=0.75 + y = sym.clip(x, a_min=a_min, a_max=a_max) + + def forward(x): + return np.clip(x, a_min=a_min, a_max=a_max) + + def backward(head_grads, x): + mask1 = np.greater_equal(x, a_min).astype("float") + mask2 = np.less_equal(x, a_max).astype("float") + return [head_grads * mask1 * mask2] + + shape = {'x': (3, 4, 5)} + check_function(y, forward, backward, shape=shape) + + +def test_broadcast(): + a = sym.Variable("a") + b = sym.Variable("b") + shape = {'a': (3, 4, 5), 'b': (1, 5)} + + def _collapse(g): + return g.reshape(-1, shape['b'][-1]).sum(0, keepdims=True) + + y = sym.broadcast_add(a, b) + def _backward_add(head_grads, a, b): + da = head_grads + db = _collapse(head_grads) + return da, db + check_function(y, lambda a, b: a + b, _backward_add, shape=shape) + + y = sym.broadcast_sub(a, b) + def _backward_sub(head_grads, a, b): + da = head_grads + db = -_collapse(head_grads) + return da, db + check_function(y, lambda a, b: a - b, _backward_sub, shape=shape) + + y = sym.broadcast_mul(a, b) + def _backward_mul(head_grads, a, b): + da = head_grads * b + db = _collapse(head_grads * a) + return da, db + check_function(y, lambda a, b: a * b, _backward_mul, shape=shape) + + y = sym.broadcast_div(a, b) + def _backward_div(head_grads, a, b): + da = head_grads / b + db = _collapse(- head_grads * a / b**2) + return da, db + # We avoid computing numerical derivatives too close to zero here + check_function(y, lambda a, b: a / b, _backward_div, shape=shape, numerical_grads=False) + check_function(y, lambda a, b: a / b, _backward_div, shape=shape, + in_range={'b': (0.1, 20)}) + + y = sym.broadcast_mod(a, b) + check_function(y, + lambda a, b: np.mod(a, b), + in_range={'a': (0.001, 100), 'b': (1, 100)}, dtype='int32', shape=shape) + + y = sym.broadcast_max(a, b) + check_function(y, lambda a, b: np.maximum(a, b), shape=shape) + + y = sym.broadcast_min(a, b) + check_function(y, lambda a, b: np.minimum(a, b), shape=shape) + + y = sym.broadcast_pow(a, b) + check_function(y, + lambda a, b: np.power(a, b), + in_range={'a': (0.001, 100), 'b': (0.001, 2)}, shape=shape) + + y = sym.broadcast_left_shift(a, b) + check_function(y, lambda a, b: a << b, dtype='int32', shape=shape) + + y = sym.broadcast_right_shift(a, b) + check_function(y, lambda a, b: a >> b, dtype='int32', shape=shape) + + y = sym.broadcast_greater(a, b) + check_function(y, lambda a, b: np.greater(a, b), shape=shape) + + y = sym.broadcast_less(a, b) + check_function(y, lambda a, b: np.less(a, b), shape=shape) + + y = sym.broadcast_equal(a, b) + check_function(y, lambda a, b: np.equal(a, b), + in_range={'a': (-2, 2), 'b': (-2, 2)}, dtype='int32', shape=shape) + + y = sym.broadcast_not_equal(a, b) + check_function(y, lambda a, b: np.not_equal(a, b), + in_range={'a': (-2, 2), 'b': (-2, 2)}, dtype='int32', shape=shape) + + y = sym.broadcast_greater_equal(a, b) + check_function(y, lambda a, b: np.greater_equal(a, b), + in_range={'a': (-3, 3), 'b': (-3, 3)}, dtype='int32', shape=shape) + + y = sym.broadcast_less_equal(a, b) + check_function(y, lambda a, b: np.less_equal(a, b), + in_range={'a': (-3, 3), 'b': (-3, 3)}, dtype='int32', shape=shape) + +def test_greater(): + l = sym.Variable("l") + r = sym.Variable("r") + y = sym.greater(l, r) + + def forward(l, r): + return np.greater(l, r).astype("float32") + + def backward(head_grads, l, r): + return {'l': np.zeros_like(l)} + + shape = {'l': (3, 4, 5), 'r': (3, 4, 5)} + check_function(y, forward, backward, shape=shape) + + +def test_less(): + l = sym.Variable("l") + r = sym.Variable("r") + y = sym.less(l, r) + + def forward(l, r): + return np.less(l, r).astype("float32") + + def backward(head_grads, l, r): + return {'l': np.zeros_like(l)} + + shape = {'l': (3, 4, 5), 'r': (3, 4, 5)} + check_function(y, forward, backward, shape=shape) + + +def test_reshape_like(): + x = sym.Variable("x") + y = sym.Variable("y") + z = sym.reshape_like(x, y) + + def forward(x, y): + return np.reshape(x, y.shape) + + def backward(head_grads, x, y): + return [np.reshape(head_grads, x.shape), + np.zeros_like(y)] + + shape = {'x': (3, 4, 5), 'y': (5, 4, 3)} + check_function(z, forward, backward, shape=shape) + + +def verify_expand_like(in_shape, out_shape, axis, exclude): + x = sym.Variable("x") + y = sym.Variable("y") + z = sym.expand_like(x, y, axis=axis, exclude=exclude) + + def forward(x, y): + odim = len(out_shape) + + if len(x.shape) == len(y.shape): + return np.broadcast_to(x, y.shape) + + if x.shape == (1,) and len(y.shape) == odim: + x = np.reshape(x, ()) + + real_axis = [i if i >= 0 else i + odim for i in axis] + real_axis = sorted(real_axis) + if exclude: + real_axis = list(set(range(odim)) - set(real_axis)) + for i in real_axis: + x = np.expand_dims(x, i).astype(x.dtype) + for i in real_axis: + x = np.concatenate([x]*out_shape[i], axis=i).astype(x.dtype) + + return x + + def backward(head_grads, x, y): + odim = len(out_shape) + + keepdims = len(x.shape) == len(y.shape) + + if x.shape == (1,) and len(y.shape) == odim: + x = np.reshape(x, ()) + + real_axis = [i if i >= 0 else i + odim for i in axis] + real_axis = sorted(real_axis) + if exclude: + real_axis = list(set(range(odim)) - set(real_axis)) + return [np.sum(head_grads, axis=tuple(real_axis), keepdims=keepdims), + np.zeros_like(y)] + + + shape = {'x': in_shape, 'y': out_shape} + check_function(z, forward, backward, shape=shape) + + +def test_expand_like(): + verify_expand_like((3,), (3, 2), [1], False) + verify_expand_like((2,), (2, 3), [1], False) + verify_expand_like((3, 4), (3, 5, 4), [1], False) + verify_expand_like((5, 7), (5, 6, 7, 8), [0, 2], True) + verify_expand_like((2, 3), (2, 3), [], False) + verify_expand_like((1,), (2, 3), [0, 1], False) + verify_expand_like((1, 1), (2, 3), [0, 1], False) + verify_expand_like((2, 1), (2, 3), [1], False) + verify_expand_like((1, 3), (2, 3), [0], False) + + +def verify_elemwise_sum(num_args): + s = [sym.Variable("input" + str(i)) for i in range(num_args)] + y = sym.elemwise_sum(*s, num_args=num_args) + + def forward(**inputs): + return np.sum(np.array(list(inputs.values())), axis=0) + + def backward(head_grads, **inputs): + return [head_grads] * num_args + + shape = {s[i]: (3, 4, 5) for i in range(num_args)} + check_function(y, forward, backward, shape=shape) + + +def test_elemwise_sum(): + verify_elemwise_sum(1) + verify_elemwise_sum(5) + verify_elemwise_sum(7) + + +def test_block_grad(): + x = sym.Variable("x") + y = sym.block_grad(x) + + def forward(x): + return x + + def backward(head_grads, x): + return [np.zeros_like(head_grads)] + + + shape = {'x': (3, 4, 5)} + # Numerical grad checking would fail for this function + check_function(y, forward, backward, shape=shape, numerical_grads=False) + + +def test_full(): + shape = (3, 4, 5) + value = 7 + dtype = "float32" + for target, ctx in ctx_list(): + data = sym.Variable("data", dtype=dtype) + # full_like + s = sym.full_like(data=data, fill_value=value, name="s") + graph, lib, _ = nnvm.compiler.build(s, target, {"data": shape}) + m = graph_runtime.create(graph, lib, ctx) + m.run(data=np.random.uniform(size=shape).astype(dtype)) + out = m.get_output(0, tvm.nd.empty(shape, dtype=dtype)) + tvm.testing.assert_allclose( + out.asnumpy(), + np.full(shape, fill_value=value, dtype=dtype), + atol=1e-5, rtol=1e-5) + # ones_like + s = sym.ones_like(data=data, fill_value=value, name="s") + graph, lib, _ = nnvm.compiler.build(s, target, {"data": shape}) + m = graph_runtime.create(graph, lib, ctx) + m.run(data=np.random.uniform(size=shape).astype(dtype)) + out = m.get_output(0, tvm.nd.empty(shape, dtype=dtype)) + tvm.testing.assert_allclose( + out.asnumpy(), + np.full(shape, fill_value=1, dtype=dtype), + atol=1e-5, rtol=1e-5) + # zeros_like + s = sym.zeros_like(data=data, fill_value=value, name="s") + graph, lib, _ = nnvm.compiler.build(s, target, {"data": shape}) + m = graph_runtime.create(graph, lib, ctx) + m.run(data=np.random.uniform(size=shape).astype(dtype)) + out = m.get_output(0, tvm.nd.empty(shape, dtype=dtype)) + tvm.testing.assert_allclose( + out.asnumpy(), + np.full(shape, fill_value=0, dtype=dtype), + atol=1e-5, rtol=1e-5) + # full + s = sym.full(shape=shape, dtype=dtype, fill_value=value, name="s") + graph, lib, _ = nnvm.compiler.build(s, target) + m = graph_runtime.create(graph, lib, ctx) + m.run() + out = m.get_output(0, tvm.nd.empty(shape, dtype=dtype)) + tvm.testing.assert_allclose( + out.asnumpy(), + np.full(shape, fill_value=value, dtype=dtype), + atol=1e-5, rtol=1e-5) + # ones + s = sym.ones(shape=shape, dtype=dtype, name="s") + graph, lib, _ = nnvm.compiler.build(s, target) + m = graph_runtime.create(graph, lib, ctx) + m.run() + out = m.get_output(0, tvm.nd.empty(shape, dtype=dtype)) + tvm.testing.assert_allclose( + out.asnumpy(), + np.full(shape, fill_value=1, dtype=dtype), + atol=1e-5, rtol=1e-5) + # zeros + s = sym.zeros(shape=shape, dtype=dtype, name="s") + graph, lib, _ = nnvm.compiler.build(s, target) + m = graph_runtime.create(graph, lib, ctx) + m.run() + out = m.get_output(0, tvm.nd.empty(shape, dtype=dtype)) + tvm.testing.assert_allclose( + out.asnumpy(), + np.full(shape, fill_value=0, dtype=dtype), + atol=1e-5, rtol=1e-5) + +def verify_multibox_prior(dshape, sizes=(1,), ratios=(1,), steps=(-1, -1), + offsets=(0.5, 0.5), clip=False): + data = sym.Variable("data") + out = sym.multibox_prior(data=data, sizes=sizes, ratios=ratios, steps=steps, + offsets=offsets, clip=clip) + + in_height = dshape[2] + in_width = dshape[3] + num_sizes = len(sizes) + num_ratios = len(ratios) + size_ratio_concat = sizes + ratios + steps_h = steps[0] if steps[0] > 0 else 1.0 / in_height + steps_w = steps[1] if steps[1] > 0 else 1.0 / in_width + offset_h = offsets[0] + offset_w = offsets[1] + + oshape = (1, in_height * in_width * (num_sizes + num_ratios - 1), 4) + dtype = "float32" + np_out = np.zeros(oshape).astype(dtype) + + for i in range(in_height): + center_h = (i + offset_h) * steps_h + for j in range(in_width): + center_w = (j + offset_w) * steps_w + for k in range(num_sizes + num_ratios - 1): + w = size_ratio_concat[k] * in_height / in_width / 2.0 if k < num_sizes else \ + size_ratio_concat[0] * in_height / in_width * math.sqrt(size_ratio_concat[k + 1]) / 2.0 + h = size_ratio_concat[k] / 2.0 if k < num_sizes else \ + size_ratio_concat[0] / math.sqrt(size_ratio_concat[k + 1]) / 2.0 + count = i * in_width * (num_sizes + num_ratios - 1) + j * (num_sizes + num_ratios - 1) + k + np_out[0][count][0] = center_w - w + np_out[0][count][1] = center_h - h + np_out[0][count][2] = center_w + w + np_out[0][count][3] = center_h + h + if clip: + np_out = np.clip(np_out, 0, 1) + + for target, ctx in ctx_list(): + graph, lib, _ = nnvm.compiler.build(out, target, {"data": dshape}) + m = graph_runtime.create(graph, lib, ctx) + m.set_input("data", np.random.uniform(size=dshape).astype(dtype)) + m.run() + tvm_out = m.get_output(0, tvm.nd.empty(np_out.shape, dtype)) + tvm.testing.assert_allclose(tvm_out.asnumpy(), np_out, atol=1e-5, rtol=1e-5) + +def test_multibox_prior(): + verify_multibox_prior((1, 3, 50, 50)) + verify_multibox_prior((1, 3, 224, 224), sizes=(0.5, 0.25, 0.1), ratios=(1, 2, 0.5)) + verify_multibox_prior((1, 32, 32, 32), sizes=(0.5, 0.25), ratios=(1, 2), steps=(2, 2), clip=True) + +def test_multibox_transform_loc(): + batch_size = 1 + num_anchors = 3 + num_classes = 3 + cls_prob = sym.Variable("cls_prob") + loc_preds = sym.Variable("loc_preds") + anchors = sym.Variable("anchors") + transform_loc_data, valid_count = sym.multibox_transform_loc(cls_prob=cls_prob, loc_pred=loc_preds, + anchor=anchors) + out = sym.non_max_suppression(data=transform_loc_data, valid_count=valid_count, return_indices=False) + + # Manually create test case + np_cls_prob = np.array([[[0.2, 0.5, 0.3], [0.25, 0.3, 0.45], [0.7, 0.1, 0.2]]]) + np_loc_preds = np.array([[0.1, -0.2, 0.3, 0.2, 0.2, 0.4, 0.5, -0.3, 0.7, -0.2, -0.4, -0.8]]) + np_anchors = np.array([[[-0.1, -0.1, 0.1, 0.1], [-0.2, -0.2, 0.2, 0.2], [1.2, 1.2, 1.5, 1.5]]]) + + expected_np_out = np.array([[[1, 0.69999999, 0, 0, 0.10818365, 0.10008108], + [0, 0.44999999, 1, 1, 1, 1], + [0, 0.30000001, 0, 0, 0.22903419, 0.20435292]]]) + + dtype = "float32" + for target, ctx in ctx_list(): + graph, lib, _ = nnvm.compiler.build(out, target, {"cls_prob": (batch_size, num_anchors, num_classes), + "loc_preds": (batch_size, num_anchors * 4), + "anchors": (1, num_anchors, 4)}) + m = graph_runtime.create(graph, lib, ctx) + m.set_input(**{"cls_prob": np_cls_prob.astype(dtype), "loc_preds": np_loc_preds.astype(dtype), "anchors": np_anchors.astype(dtype)}) + m.run() + tvm_out = m.get_output(0, tvm.nd.empty(expected_np_out.shape, dtype)) + tvm.testing.assert_allclose(tvm_out.asnumpy(), expected_np_out, atol=1e-5, rtol=1e-5) + +def test_non_max_suppression(): + dshape = (1, 5, 6) + data = sym.Variable("data") + valid_count = sym.Variable("valid_count", dtype="int32") + iou_threshold = 0.7 + force_suppress = True + top_k = 2 + out = sym.non_max_suppression(data=data, valid_count=valid_count, return_indices=False, + iou_threshold=iou_threshold, force_suppress=force_suppress, top_k=top_k) + + np_data = np.array([[[0, 0.8, 1, 20, 25, 45], [1, 0.7, 30, 60, 50, 80], + [0, 0.4, 4, 21, 19, 40], [2, 0.9, 35, 61, 52, 79], + [1, 0.5, 100, 60, 70, 110]]]).astype("float32") + np_valid_count = np.array([4]).astype("int32") + np_result = np.array([[[2, 0.9, 35, 61, 52, 79], [0, 0.8, 1, 20, 25, 45], + [-1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1], + [-1, -1, -1, -1, -1, -1]]]) + + for target, ctx in ctx_list(): + graph, lib, _ = nnvm.compiler.build(out, target, {"data": dshape, "valid_count": (dshape[0],)}, + dtype={"data": "float32", "valid_count": "int32"}) + m = graph_runtime.create(graph, lib, ctx) + m.set_input(**{"data": np_data, "valid_count": np_valid_count}) + m.run() + tvm_out = m.get_output(0, tvm.nd.empty(np_result.shape, "float32")) + tvm.testing.assert_allclose(tvm_out.asnumpy(), np_result, atol=1e-5, rtol=1e-5) + +def np_slice_like(np_data, np_shape_like, axis=[]): + begin_idx = [0 for _ in np_data.shape] + end_idx = list(np_data.shape) + if len(axis) > 0: + for i in axis: + if i < 0: + i = len(np_data.shape) + i + end_idx[i] = np_shape_like.shape[i] + else: + for i in range(len(np_data.shape)): + if i < len(np_shape_like.shape): + end_idx[i] = np_shape_like.shape[i] + slice_idx = [] + for b, e in zip(begin_idx, end_idx): + slice_idx.append(slice(b, e)) + np_result = np_data[slice_idx] + return np_result + +def verify_slice_like(np_data, np_shape_like, axis=[]): + dtype = "float32" + np_data = np_data.astype(dtype) + np_shape_like = np_shape_like.astype(dtype) + np_result = np_slice_like(np_data, np_shape_like, axis) + data1 = sym.Variable("data1") + data2 = sym.Variable("data2") + net = sym.slice_like(data=data1, slice_like=data2, axis=axis) + for target, ctx in ctx_list(): + graph, lib, _ = nnvm.compiler.build(net, target, {"data1": np_data.shape, + "data2": np_shape_like.shape}) + m = graph_runtime.create(graph, lib, ctx) + m.set_input(**{"data1": np_data, "data2": np_shape_like}) + m.run() + out = m.get_output(0, tvm.nd.empty(np_result.shape, dtype)) + tvm.testing.assert_allclose(out.asnumpy(), np_result, atol=1e-5, rtol=1e-5) + +def test_slice_like(): + np_data = np.random.uniform(size=(3, 4, 5)) + np_shape_like = np.random.uniform(size=(1, 2, 3)) + verify_slice_like(np_data, np_shape_like) + np_data = np.random.uniform(size=(3, 4, 5)) + np_shape_like = np.random.uniform(size=(1, 2)) + verify_slice_like(np_data, np_shape_like) + np_data = np.random.uniform(size=(3, 4, 5)) + np_shape_like = np.random.uniform(size=(1, 2, 3)) + axis = (1, 2) + verify_slice_like(np_data, np_shape_like, axis) + np_data = np.random.uniform(size=(3, 4, 5)) + np_shape_like = np.random.uniform(size=(1, 2, 3)) + axis = (-1, -3) + verify_slice_like(np_data, np_shape_like, axis) + np_data = np.random.uniform(size=(1, 3, 224, 224)) + np_shape_like = np.random.uniform(size=(1, 3, 112, 112)) + axis = (2, 3) + verify_slice_like(np_data, np_shape_like, axis) + +def verify_where(condition, x, y): + dtype = "float32" + if len(condition.shape) == 1: + np_out = np.array([xv if c else yv for (c,xv,yv) in zip(condition,x,y)]) + else: + np_out = np.where(condition, x, y) + cond_var = sym.Variable("condition") + x_var = sym.Variable("x") + y_var = sym.Variable("y") + net = sym.where(cond_var, x_var, y_var) + for target, ctx in ctx_list(): + graph, lib, _ = nnvm.compiler.build(net, target, {"condition": condition.shape, + "x": x.shape, "y": y.shape}) + m = graph_runtime.create(graph, lib, ctx) + m.set_input(**{"condition": condition, "x": x, "y": y}) + m.run() + out = m.get_output(0, tvm.nd.empty(x.shape, dtype)) + tvm.testing.assert_allclose(out.asnumpy(), np_out, atol=1e-5, rtol=1e-5) + +def test_where(): + shape = (13, 8, 224, 224, 6) + condition = np.random.uniform(low=-1, high=1, size=shape).astype("float32") + x = np.random.uniform(size=shape).astype("float32") + y = np.random.uniform(size=shape).astype("float32") + verify_where(condition, x, y) + condition = np.random.uniform(low=-1, high=1, size=(shape[0],)).astype("float32") + x = np.random.uniform(size=shape).astype("float32") + y = np.random.uniform(size=shape).astype("float32") + verify_where(condition, x, y) + +def test_argmax(): + dshape = (204800, 2) + oshape = (1, 320, 640) + + dtype = "float32" + x = sym.Variable("x", shape=dshape, dtype=dtype) + x = sym.reshape(x, shape=(1, 320, 640, 2)) + x = sym.transpose(x, axes=(0, 3, 1, 2)) + y = sym.argmax(x, axis=1) + target_str = "llvm" + target = tvm.target.create(target_str) + ctx = tvm.context(target_str, 0) + with nnvm.compiler.build_config(opt_level=2): + graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape}) + m = graph_runtime.create(graph, lib, ctx) + data = np.random.uniform(size=dshape).astype(dtype) + m.run(x=data) + np_reshape = np.reshape(data, (1, 320, 640, 2)) + np_transpose = np.transpose(np_reshape, axes=(0, 3, 1, 2)) + np_argmax = np.argmax(np_transpose, axis=1) + out = m.get_output(0) + np.testing.assert_allclose(out.asnumpy(), np_argmax, atol=1e-5, rtol=1e-5) + +if __name__ == "__main__": + test_reshape() + test_broadcast() + test_reduce() + test_collapse() + test_transpose() + test_clip() + test_greater() + test_less() + test_reshape_like() + test_expand_like() + test_elemwise_sum() + test_block_grad() + test_full() + test_flip() + test_multibox_prior() + test_multibox_transform_loc() + test_non_max_suppression() + test_slice_like() + test_where() + test_argmax() + print(nnvm.compiler.engine.dump()) diff --git a/nnvm/tests/python/frontend/caffe2/model_zoo/__init__.py b/nnvm/tests/python/frontend/caffe2/model_zoo/__init__.py new file mode 100644 index 000000000000..2dc1f08f6ec9 --- /dev/null +++ b/nnvm/tests/python/frontend/caffe2/model_zoo/__init__.py @@ -0,0 +1,35 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Store for caffe2 examples and common models.""" +from __future__ import absolute_import as _abs +import os +import importlib + +models = [ + 'squeezenet', + 'resnet50', + 'vgg19', +] + +# skip download if model exist +for model in models: + try: + locals()['c2_' + model] = importlib.import_module('caffe2.python.models.' + model) + except ImportError: + os.system("python -m caffe2.python.models.download -i -f " + model) + locals()['c2_' + model] = importlib.import_module('caffe2.python.models.' + model) diff --git a/nnvm/tests/python/frontend/caffe2/model_zoo/squeezenet.py b/nnvm/tests/python/frontend/caffe2/model_zoo/squeezenet.py new file mode 100644 index 000000000000..2de2d1075494 --- /dev/null +++ b/nnvm/tests/python/frontend/caffe2/model_zoo/squeezenet.py @@ -0,0 +1,118 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# coding: utf-8 +# pylint: disable=unused-argument + +""" +Symbol of SqueezeNet + +Reference: +Iandola, Forrest N., et al. +"Squeezenet: Alexnet-level accuracy with 50x fewer parameters and< 0.5 mb model size." (2016). +""" + +from nnvm import symbol as sym +from nnvm.testing.utils import create_workload + +# Helpers +def _make_fire(net, squeeze_channels, expand1x1_channels, expand3x3_channels): + net = _make_fire_conv(net, squeeze_channels, 1, 0) + + left = _make_fire_conv(net, expand1x1_channels, 1, 0) + right = _make_fire_conv(net, expand3x3_channels, 3, 1) + # NOTE : Assume NCHW layout here + net = sym.concatenate(left, right, axis=1) + + return net + +def _make_fire_conv(net, channels, kernel_size, padding=0): + net = sym.conv2d(net, channels=channels, kernel_size=(kernel_size, kernel_size), + padding=(padding, padding)) + net = sym.relu(net) + return net + +# Net +def get_symbol(num_classes, version, **kwargs): + """Get symbol of SqueezeNet + + Parameters + ---------- + num_classes: int + The number of classification results + + version : str, optional + "1.0" or "1.1" of SqueezeNet + """ + assert version == '1.1', ("Unsupported SqueezeNet version {version}:" + "1.1 expected".format(version=version)) + net = sym.Variable("data") + + net = sym.conv2d(net, channels=64, kernel_size=(3, 3), strides=(2, 2)) + net = sym.relu(net) + net = sym.max_pool2d(net, pool_size=(3, 3), strides=(2, 2)) + net = _make_fire(net, 16, 64, 64) + net = _make_fire(net, 16, 64, 64) + net = sym.max_pool2d(net, pool_size=(3, 3), strides=(2, 2)) + net = _make_fire(net, 32, 128, 128) + net = _make_fire(net, 32, 128, 128) + net = sym.max_pool2d(net, pool_size=(3, 3), strides=(2, 2)) + net = _make_fire(net, 48, 192, 192) + net = _make_fire(net, 48, 192, 192) + net = _make_fire(net, 64, 256, 256) + net = _make_fire(net, 64, 256, 256) + + net = sym.dropout(net, rate=0.5) + net = sym.conv2d(net, channels=num_classes, kernel_size=(1, 1)) + net = sym.relu(net) + net = sym.global_avg_pool2d(net) + return sym.softmax(net, axis=1) + +def get_workload(batch_size=1, num_classes=1000, version='1.0', + image_shape=(3, 224, 224), dtype="float32", **kwargs): + """Get benchmark workload for SqueezeNet + + Parameters + ---------- + batch_size : int + The batch size used in the model + + num_classes : int, optional + Number of classes + + version : str, optional + "1.0" or "1.1" of SqueezeNet + + image_shape : tuple, optional + The input image shape + + dtype : str, optional + The data type + + kwargs : dict + Extra arguments + + Returns + ------- + net : nnvm.Symbol + The computational graph + + params : dict of str to NDArray + The parameters. + """ + net = get_symbol(num_classes=num_classes, version=version, **kwargs) + return create_workload(net, batch_size, image_shape, dtype) diff --git a/nnvm/tests/python/frontend/caffe2/test_forward.py b/nnvm/tests/python/frontend/caffe2/test_forward.py new file mode 100644 index 000000000000..2a216314ba1a --- /dev/null +++ b/nnvm/tests/python/frontend/caffe2/test_forward.py @@ -0,0 +1,108 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import numpy as np +import nnvm +import tvm +from tvm.contrib import graph_runtime +from nnvm.testing.config import ctx_list +from model_zoo import c2_squeezenet, c2_resnet50, c2_vgg19 + +from caffe2.python import workspace + + +def get_tvm_output(model, + input_data, + target, + ctx, + output_shape, + output_dtype='float32'): + """ Generic function to execute and get tvm output""" + sym, params = nnvm.frontend.from_caffe2(model.init_net, model.predict_net) + + # supporting multiple inputs in caffe2 in a bit tricky, + # because the input names can appear at the beginning or end of model.predict_net.external_input + assert isinstance(input_data, np.ndarray) + + # here we use the first input blob to the first op to get the input name + input_names = model.predict_net.op[0].input[0] + shape_dict = {input_names: input_data.shape} + dtype_dict = {input_names: input_data.dtype} + + graph, lib, params = nnvm.compiler.build( + sym, target, shape=shape_dict, dtype=dtype_dict, params=params) + + m = graph_runtime.create(graph, lib, ctx) + + # set inputs + m.set_input(input_names, tvm.nd.array(input_data.astype(input_data.dtype))) + m.set_input(**params) + + # execute + m.run() + + # get outputs + if isinstance(output_shape, list) and isinstance(output_dtype, list): + tvm_output_list = [] + for i, s in enumerate(output_shape): + tvm_output = m.get_output(i, tvm.nd.empty((s), output_dtype[i])) + tvm_output_list.append(tvm_output.asnumpy()) + return tvm_output_list + else: + tvm_output = m.get_output(0, tvm.nd.empty((output_shape), + output_dtype)) + return tvm_output.asnumpy() + + +def get_caffe2_output(model, x, dtype='float32'): + workspace.RunNetOnce(model.init_net) + + input_blob = model.predict_net.op[0].input[0] + workspace.FeedBlob(input_blob, x.astype(dtype)) + workspace.RunNetOnce(model.predict_net) + + output_blob = model.predict_net.external_output[0] + c2_output = workspace.FetchBlob(output_blob) + return c2_output + + +def verify_caffe2_forward_impl(model, data_shape, out_shape): + dtype = 'float32' + data = np.random.uniform(size=data_shape).astype(dtype) + c2_out = get_caffe2_output(model, data, dtype) + for target, ctx in ctx_list(): + tvm_out = get_tvm_output(model, data, target, ctx, out_shape, dtype) + tvm.testing.assert_allclose(c2_out, tvm_out, rtol=1e-5, atol=1e-5) + + +def test_squeezenet1_1(): + verify_caffe2_forward_impl(c2_squeezenet, (1, 3, 224, 224), + (1, 1000, 1, 1)) + + +def test_resnet50(): + verify_caffe2_forward_impl(c2_resnet50, (1, 3, 224, 224), + (1, 1000)) + + +def test_vgg19(): + verify_caffe2_forward_impl(c2_vgg19, (1, 3, 224, 224), (1, 1000)) + + +if __name__ == '__main__': + test_squeezenet1_1() + test_resnet50() + test_vgg19() diff --git a/nnvm/tests/python/frontend/caffe2/test_graph.py b/nnvm/tests/python/frontend/caffe2/test_graph.py new file mode 100644 index 000000000000..c8203815e6d0 --- /dev/null +++ b/nnvm/tests/python/frontend/caffe2/test_graph.py @@ -0,0 +1,40 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Test graph equality of caffe2 models.""" +import nnvm +from nnvm.compiler import graph_util, graph_attr +from model_zoo import c2_squeezenet, squeezenet + +def compare_graph(init, predict, nnvm_sym, ishape): + caffe2_sym, params = nnvm.frontend.from_caffe2(init, predict) + g1 = nnvm.graph.create(caffe2_sym) + g2 = nnvm.graph.create(nnvm_sym) + input_name = predict.external_input[0] + ishapes = {input_name: ishape} + graph_attr.set_shape_inputs(g1, ishapes) + graph_attr.set_shape_inputs(g2, ishapes) + g1 = g1.apply("InferShape").apply("SimplifyInference") + g2 = g2.apply("InferShape").apply("SimplifyInference") + graph_util.check_graph_equal(g1, g2) + +def test_squeeze_net(): + symbol, params = squeezenet.get_workload(version='1.1') + compare_graph(c2_squeezenet.init_net, c2_squeezenet.predict_net, symbol, ishape=(1, 3, 224, 224)) + + +if __name__ == '__main__': + test_squeeze_net() diff --git a/nnvm/tests/python/frontend/coreml/model_zoo/.gitignore b/nnvm/tests/python/frontend/coreml/model_zoo/.gitignore new file mode 100644 index 000000000000..4242a1b2e2e0 --- /dev/null +++ b/nnvm/tests/python/frontend/coreml/model_zoo/.gitignore @@ -0,0 +1,3 @@ +*.mlmodel +*.jpg +*.png diff --git a/nnvm/tests/python/frontend/coreml/model_zoo/__init__.py b/nnvm/tests/python/frontend/coreml/model_zoo/__init__.py new file mode 100644 index 000000000000..2dbaf2b10483 --- /dev/null +++ b/nnvm/tests/python/frontend/coreml/model_zoo/__init__.py @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os +from PIL import Image +import numpy as np +from tvm.contrib.download import download_testdata + +def get_mobilenet(): + url = 'https://docs-assets.developer.apple.com/coreml/models/MobileNet.mlmodel' + dst = 'mobilenet.mlmodel' + real_dst = download_testdata(url, dst, module='coreml') + return real_dst + +def get_resnet50(): + url = 'https://docs-assets.developer.apple.com/coreml/models/Resnet50.mlmodel' + dst = 'resnet50.mlmodel' + real_dst = download_testdata(url, dst, module='coreml') + return real_dst + +def get_cat_image(): + url = 'https://gist.githubusercontent.com/zhreshold/bcda4716699ac97ea44f791c24310193/raw/fa7ef0e9c9a5daea686d6473a62aacd1a5885849/cat.png' + dst = 'cat.png' + real_dst = download_testdata(url, dst, module='data') + img = Image.open(real_dst).resize((224, 224)) + img = np.transpose(img, (2, 0, 1))[np.newaxis, :] + return np.asarray(img) diff --git a/nnvm/tests/python/frontend/coreml/test_forward.py b/nnvm/tests/python/frontend/coreml/test_forward.py new file mode 100644 index 000000000000..7a9f294f4359 --- /dev/null +++ b/nnvm/tests/python/frontend/coreml/test_forward.py @@ -0,0 +1,370 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import numpy as np + +from coremltools.models.neural_network import NeuralNetworkBuilder +from coremltools.models import datatypes + +import tvm +from tvm.contrib import graph_runtime +import topi +import topi.testing +import nnvm.symbol as sym +import nnvm.compiler +from nnvm.testing.config import ctx_list +from nnvm import frontend +import coremltools as cm +import model_zoo + +def get_tvm_output(symbol, x, params, target, ctx, + out_shape=(1, 1000), input_name='image', dtype='float32'): + shape_dict = {input_name : x.shape} + with nnvm.compiler.build_config(opt_level=2): + graph, lib, params = nnvm.compiler.build(symbol, target, shape_dict, params=params) + m = graph_runtime.create(graph, lib, ctx) + # set inputs + m.set_input(input_name, tvm.nd.array(x.astype(dtype))) + m.set_input(**params) + m.run() + # get outputs + out = m.get_output(0, tvm.nd.empty(out_shape, dtype)) + return out.asnumpy() + +def run_model_checkonly(model_file, model_name=''): + model = cm.models.MLModel(model_file) + sym, params = nnvm.frontend.from_coreml(model) + x = model_zoo.get_cat_image() + for target, ctx in ctx_list(): + tvm_output = get_tvm_output(sym, x, params, target, ctx) + print(target, ctx, model_name, 'prediction id: ', np.argmax(tvm_output.flat)) + +def test_mobilenet_checkonly(): + model_file = model_zoo.get_mobilenet() + run_model_checkonly(model_file, 'mobilenet') + +def test_resnet50_checkonly(): + model_file = model_zoo.get_resnet50() + run_model_checkonly(model_file, 'resnet50') + +def run_tvm_graph(graph_def, input_data, input_name, output_shape, output_dtype='float32'): + """ Generic function to compile on nnvm and execute on tvm """ + + sym, params = nnvm.frontend.from_coreml(graph_def) + target = 'llvm' + if isinstance(input_data, list): + shape_dict = {} + dtype_dict = {} + for i, e in enumerate(input_name): + shape_dict[e] = input_data[i].shape + dtype_dict[e] = input_data[i].dtype + else: + shape_dict = {input_name: input_data.shape} + dtype_dict = {input_name: input_data.dtype} + + graph, lib, params = nnvm.compiler.build(sym, target, shape_dict, + dtype=dtype_dict, params=params) + + ctx = tvm.cpu(0) + from tvm.contrib import graph_runtime + m = graph_runtime.create(graph, lib, ctx) + # set inputs + if isinstance(input_data, list): + for i, e in enumerate(input_name): + m.set_input(e, tvm.nd.array(input_data[i].astype(input_data[i].dtype))) + else: + m.set_input(input_name, tvm.nd.array(input_data.astype(input_data.dtype))) + + m.set_input(**params) + # execute + m.run() + # get outputs + if isinstance(output_shape, list) and isinstance(output_dtype, list): + tvm_output_list = [] + for i, s in enumerate(output_shape): + tvm_output = m.get_output(i, tvm.nd.empty((s), output_dtype[i])) + tvm_output_list.append(tvm_output.asnumpy()) + return tvm_output_list + else: + tvm_output = m.get_output(0, tvm.nd.empty((output_shape), output_dtype)) + return tvm_output.asnumpy() + +def verify_AddLayerParams(input_dim, alpha=2): + dtype = 'float32' + + a_np1 = np.random.uniform(size=input_dim).astype(dtype) + a_np2 = np.random.uniform(size=input_dim).astype(dtype) + + b_np = np.add(a_np1, a_np2) + alpha + inputs = [('input1', datatypes.Array(*input_dim)), + ('input2', datatypes.Array(*input_dim))] + output = [('output', datatypes.Array(*b_np.shape))] + builder = NeuralNetworkBuilder(inputs, output) + builder.add_elementwise(name='Add', + alpha=alpha, + input_names=['input1', 'input2'], + output_name='output', + mode='ADD') + model = cm.models.MLModel(builder.spec) + for target, ctx in ctx_list(): + out = run_tvm_graph(model, + [a_np1, a_np2], + ['input1', 'input2'], + b_np.shape, + dtype) + tvm.testing.assert_allclose(out, b_np, rtol=1e-5) + +def test_forward_AddLayerParams(): + verify_AddLayerParams((1, 2, 2), 0) + verify_AddLayerParams((1, 2, 2), 1) + verify_AddLayerParams((1, 3, 3), 2) + +def verify_MultiplyLayerParams(input_dim, alpha): + dtype = 'float32' + + a_np1 = np.random.uniform(size=input_dim).astype(dtype) + a_np2 = np.random.uniform(size=input_dim).astype(dtype) + + b_np = np.multiply(a_np1, a_np2) * alpha + inputs = [('input1', datatypes.Array(*input_dim)), + ('input2', datatypes.Array(*input_dim))] + output = [('output', datatypes.Array(*b_np.shape))] + builder = NeuralNetworkBuilder(inputs, output) + builder.add_elementwise(name='Mul', + alpha=alpha, + input_names=['input1', 'input2'], + output_name='output', + mode='MULTIPLY') + model = cm.models.MLModel(builder.spec) + for target, ctx in ctx_list(): + out = run_tvm_graph(model, + [a_np1, a_np2], + ['input1', 'input2'], + b_np.shape, + dtype) + tvm.testing.assert_allclose(out, b_np, rtol=1e-5) + +def test_forward_MultiplyLayerParams(): + verify_MultiplyLayerParams((1, 2, 2), 0) + verify_MultiplyLayerParams((1, 2, 2), 1) + verify_MultiplyLayerParams((1, 3, 3), 2) + +def verify_ConcatLayerParams(input1_dim, input2_dim): + dtype = 'float32' + + a_np1 = np.random.uniform(size=input1_dim).astype(dtype) + a_np2 = np.random.uniform(size=input2_dim).astype(dtype) + + b_np = np.concatenate((a_np1, a_np2), axis=1) + inputs = [('input1', datatypes.Array(*input1_dim)), + ('input2', datatypes.Array(*input2_dim))] + output = [('output', datatypes.Array(*b_np.shape))] + builder = NeuralNetworkBuilder(inputs, output) + builder.add_elementwise(name='Concate', + input_names=['input1', 'input2'], + output_name='output', + mode='CONCAT') + model = cm.models.MLModel(builder.spec) + for target, ctx in ctx_list(): + out = run_tvm_graph(model, + [a_np1, a_np2], + ['input1', 'input2'], + b_np.shape, + dtype) + tvm.testing.assert_allclose(out, b_np, rtol=1e-5) + +def test_forward_ConcatLayerParams(): + verify_ConcatLayerParams((1, 1, 2, 2), (1, 2, 2, 2)) + verify_ConcatLayerParams((1, 2, 4, 4), (1, 3, 4, 4)) + +def verify_UpsampleLayerParams(input_dim, scale, mode): + dtype = "float32" + + a_np = np.full(input_dim, 1, dtype=dtype) + if mode == 'NN': + b_np = topi.testing.upsampling_python(a_np, (scale, scale)) + else: + new_h = input_dim[2] * scale + new_w = input_dim[3] * scale + b_np = topi.testing.bilinear_resize_python(a_np, (new_h, new_w), 'NCHW') + + input = [('input', datatypes.Array(*input_dim))] + output = [('output', datatypes.Array(*b_np.shape))] + builder = NeuralNetworkBuilder(input, output) + builder.add_upsample(name='Upsample', + scaling_factor_h=scale, + scaling_factor_w=scale, + mode=mode, + input_name='input', + output_name='output') + + model = cm.models.MLModel(builder.spec) + for target, ctx in ctx_list(): + out = run_tvm_graph(model, a_np, 'input', b_np.shape, dtype) + tvm.testing.assert_allclose(out, b_np, rtol=1e-5) + +def test_forward_UpsampleLayerParams(): + verify_UpsampleLayerParams((1, 16, 32, 32), 2, 'NN') + verify_UpsampleLayerParams((1, 4, 6, 6), 3, 'BILINEAR') + +def verify_l2_normalize(input_dim, eps): + dtype = "float32" + + a_np = np.random.uniform(size=input_dim).astype(dtype) + b_np = topi.testing.l2_normalize_python(a_np, eps, 1) + + input = [('input', datatypes.Array(*input_dim))] + output = [('output', datatypes.Array(*b_np.shape))] + builder = NeuralNetworkBuilder(input, output) + builder.add_l2_normalize(name='L2', epsilon=eps, input_name='input', output_name='output') + + model = cm.models.MLModel(builder.spec) + for target, ctx in ctx_list(): + out = run_tvm_graph(model, a_np, 'input', b_np.shape, dtype) + tvm.testing.assert_allclose(out, b_np, rtol=1e-5) + +def test_forward_l2_normalize(): + verify_l2_normalize((1, 3, 20, 20), 0.001) + +def verify_lrn(input_dim, size, bias, alpha, beta): + dtype = "float32" + axis=1 + a_np = np.random.uniform(size=input_dim).astype(dtype) + b_np = topi.testing.lrn_python(a_np, size, axis, bias, alpha, beta) + + input = [('input', datatypes.Array(*input_dim))] + output = [('output', datatypes.Array(*b_np.shape))] + builder = NeuralNetworkBuilder(input, output) + builder.add_lrn(name='LRN', + input_name='input', + output_name='output', + alpha=alpha, + beta=beta, + k=bias, + local_size=size) + + model = cm.models.MLModel(builder.spec) + for target, ctx in ctx_list(): + out = run_tvm_graph(model, a_np, 'input', b_np.shape, dtype) + tvm.testing.assert_allclose(out, b_np, rtol=1e-5) + +def test_forward_lrn(): + verify_lrn((1, 3, 10, 20), 3, 1.0, 1.0, 0.5) + +def verify_average(input_dim1, input_dim2, axis=0): + dtype = 'float32' + + a_np1 = np.random.uniform(size=input_dim1).astype(dtype) + a_np2 = np.random.uniform(size=input_dim2).astype(dtype) + + b_np = np.mean((a_np1, a_np2), axis=axis) + + inputs = [('input1', datatypes.Array(*input_dim1)), + ('input2', datatypes.Array(*input_dim2))] + output = [('output', datatypes.Array(*b_np.shape))] + builder = NeuralNetworkBuilder(inputs, output) + builder.add_elementwise(name='MEAN', + input_names=['input1', 'input2'], + output_name='output', + mode='AVE') + model = cm.models.MLModel(builder.spec) + for target, ctx in ctx_list(): + out = run_tvm_graph(model, + [a_np1, a_np2], + ['input1', 'input2'], + b_np.shape, + dtype) + tvm.testing.assert_allclose(out, b_np, rtol=1e-5) + +def test_forward_average(): + verify_average((1, 3, 20, 20), (1, 3, 20, 20)) + verify_average((3, 20, 20), (1, 3, 20, 20)) + verify_average((20, 20), (1, 3, 20, 20)) + +def verify_max(input_dim): + dtype = 'float32' + + a_np1 = np.random.uniform(size=input_dim).astype(dtype) + a_np2 = np.random.uniform(size=input_dim).astype(dtype) + a_np3 = np.random.uniform(size=input_dim).astype(dtype) + + b_np = np.max((a_np1, a_np2, a_np3), axis=0) + + inputs = [('input1', datatypes.Array(*input_dim)), + ('input2', datatypes.Array(*input_dim)), + ('input3', datatypes.Array(*input_dim))] + output = [('output', datatypes.Array(*b_np.shape))] + builder = NeuralNetworkBuilder(inputs, output) + builder.add_elementwise(name='Max', + input_names=['input1', 'input2', 'input3'], + output_name='output', + mode='MAX') + model = cm.models.MLModel(builder.spec) + for target, ctx in ctx_list(): + out = run_tvm_graph(model, + [a_np1, a_np2, a_np3], + ['input1', 'input2', 'input3'], + b_np.shape, + dtype) + tvm.testing.assert_allclose(out, b_np, rtol=1e-5) + +def test_forward_max(): + verify_max((1, 3, 20, 20)) + verify_max((20, 20)) + +def verify_min(input_dim): + dtype = 'float32' + + a_np1 = np.random.uniform(size=input_dim).astype(dtype) + a_np2 = np.random.uniform(size=input_dim).astype(dtype) + a_np3 = np.random.uniform(size=input_dim).astype(dtype) + + b_np = np.min((a_np1, a_np2, a_np3), axis=0) + + inputs = [('input1', datatypes.Array(*input_dim)), + ('input2', datatypes.Array(*input_dim)), + ('input3', datatypes.Array(*input_dim))] + output = [('output', datatypes.Array(*b_np.shape))] + builder = NeuralNetworkBuilder(inputs, output) + builder.add_elementwise(name='Min', + input_names=['input1', 'input2', 'input3'], + output_name='output', + mode='MIN') + model = cm.models.MLModel(builder.spec) + for target, ctx in ctx_list(): + out = run_tvm_graph(model, + [a_np1, a_np2, a_np3], + ['input1', 'input2', 'input3'], + b_np.shape, + dtype) + tvm.testing.assert_allclose(out, b_np, rtol=1e-5) + +def test_forward_min(): + verify_min((1, 3, 20, 20)) + verify_min((20, 20)) + +if __name__ == '__main__': + test_mobilenet_checkonly() + test_resnet50_checkonly() + test_forward_AddLayerParams() + test_forward_ConcatLayerParams() + test_forward_MultiplyLayerParams() + test_forward_UpsampleLayerParams() + test_forward_l2_normalize() + test_forward_lrn() + test_forward_average() + test_forward_max() + test_forward_min() diff --git a/nnvm/tests/python/frontend/darknet/test_forward.py b/nnvm/tests/python/frontend/darknet/test_forward.py new file mode 100644 index 000000000000..4e62ff2e1f33 --- /dev/null +++ b/nnvm/tests/python/frontend/darknet/test_forward.py @@ -0,0 +1,525 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +Compile Darknet Models +===================== +This article is a test script to test darknet models with NNVM. +All the required models and libraries will be downloaded from the internet +by the script. +""" +import numpy as np +import tvm +from tvm.contrib import graph_runtime +from tvm.contrib.download import download_testdata +download_testdata.__test__ = False +from nnvm import frontend +from tvm.relay.testing.darknet import LAYERTYPE +from tvm.relay.testing.darknet import __darknetffi__ +import nnvm.compiler + +DARKNET_LIB = 'libdarknet2.0.so' +DARKNETLIB_URL = 'https://github.com/siju-samuel/darknet/blob/master/lib/' \ + + DARKNET_LIB + '?raw=true' +LIB = __darknetffi__.dlopen(download_testdata(DARKNETLIB_URL, DARKNET_LIB, module='darknet')) + +DARKNET_TEST_IMAGE_NAME = 'dog.jpg' +DARKNET_TEST_IMAGE_URL = 'https://github.com/siju-samuel/darknet/blob/master/data/' + DARKNET_TEST_IMAGE_NAME +'?raw=true' +DARKNET_TEST_IMAGE_PATH = download_testdata(DARKNET_TEST_IMAGE_URL, DARKNET_TEST_IMAGE_NAME, module='data') + +def _read_memory_buffer(shape, data, dtype='float32'): + length = 1 + for x in shape: + length *= x + data_np = np.zeros(length, dtype=dtype) + for i in range(length): + data_np[i] = data[i] + return data_np.reshape(shape) + +def _get_tvm_output(net, data, build_dtype='float32'): + '''Compute TVM output''' + dtype = 'float32' + sym, params = frontend.darknet.from_darknet(net, dtype) + + target = 'llvm' + shape_dict = {'data': data.shape} + graph, library, params = nnvm.compiler.build(sym, target, shape_dict, + build_dtype, params=params) + # Execute on TVM + ctx = tvm.cpu(0) + m = graph_runtime.create(graph, library, ctx) + # set inputs + m.set_input('data', tvm.nd.array(data.astype(dtype))) + m.set_input(**params) + m.run() + # get outputs + tvm_out = [] + for i in range(m.get_num_outputs()): + tvm_out.append(m.get_output(i).asnumpy()) + return tvm_out + +def _load_net(cfg_url, cfg_name, weights_url, weights_name): + cfg_path = download_testdata(cfg_url, cfg_name, module='darknet') + weights_path = download_testdata(weights_url, weights_name, module='darknet') + net = LIB.load_network(cfg_path.encode('utf-8'), weights_path.encode('utf-8'), 0) + return net + +def verify_darknet_frontend(net, build_dtype='float32'): + '''Test network with given input image on both darknet and tvm''' + def get_darknet_output(net, img): + LIB.network_predict_image(net, img) + out = [] + for i in range(net.n): + layer = net.layers[i] + if layer.type == LAYERTYPE.REGION: + attributes = np.array([layer.n, layer.out_c, layer.out_h, + layer.out_w, layer.classes, + layer.coords, layer.background], + dtype=np.int32) + out.insert(0, attributes) + out.insert(0, _read_memory_buffer((layer.n*2, ), layer.biases)) + layer_outshape = (layer.batch, layer.out_c, + layer.out_h, layer.out_w) + out.insert(0, _read_memory_buffer(layer_outshape, layer.output)) + elif layer.type == LAYERTYPE.YOLO: + attributes = np.array([layer.n, layer.out_c, layer.out_h, + layer.out_w, layer.classes, + layer.total], + dtype=np.int32) + out.insert(0, attributes) + out.insert(0, _read_memory_buffer((layer.total*2, ), layer.biases)) + out.insert(0, _read_memory_buffer((layer.n, ), layer.mask, dtype='int32')) + layer_outshape = (layer.batch, layer.out_c, + layer.out_h, layer.out_w) + out.insert(0, _read_memory_buffer(layer_outshape, layer.output)) + elif i == net.n-1: + if layer.type == LAYERTYPE.CONNECTED: + darknet_outshape = (layer.batch, layer.out_c) + elif layer.type in [LAYERTYPE.SOFTMAX]: + darknet_outshape = (layer.batch, layer.outputs) + else: + darknet_outshape = (layer.batch, layer.out_c, + layer.out_h, layer.out_w) + out.insert(0, _read_memory_buffer(darknet_outshape, layer.output)) + return out + + dtype = 'float32' + + img = LIB.letterbox_image(LIB.load_image_color(DARKNET_TEST_IMAGE_PATH.encode('utf-8'), 0, 0), net.w, net.h) + darknet_output = get_darknet_output(net, img) + batch_size = 1 + data = np.empty([batch_size, img.c, img.h, img.w], dtype) + i = 0 + for c in range(img.c): + for h in range(img.h): + for k in range(img.w): + data[0][c][h][k] = img.data[i] + i = i + 1 + + tvm_out = _get_tvm_output(net, data, build_dtype) + for tvm_outs, darknet_out in zip(tvm_out, darknet_output): + tvm.testing.assert_allclose(darknet_out, tvm_outs, rtol=1e-3, atol=1e-3) + +def verify_rnn_forward(net): + '''Test network with given input data on both darknet and tvm''' + def get_darknet_network_predict(net, data): + return LIB.network_predict(net, data) + from cffi import FFI + ffi = FFI() + np_arr = np.zeros([1, net.inputs], dtype='float32') + np_arr[0, 84] = 1 + cffi_arr = ffi.cast('float*', np_arr.ctypes.data) + tvm_out = _get_tvm_output(net, np_arr)[0] + darknet_output = get_darknet_network_predict(net, cffi_arr) + darknet_out = np.zeros(net.outputs, dtype='float32') + for i in range(net.outputs): + darknet_out[i] = darknet_output[i] + last_layer = net.layers[net.n-1] + darknet_outshape = (last_layer.batch, last_layer.outputs) + darknet_out = darknet_out.reshape(darknet_outshape) + tvm.testing.assert_allclose(darknet_out, tvm_out, rtol=1e-4, atol=1e-4) + +def test_forward_extraction(): + '''test extraction model''' + model_name = 'extraction' + cfg_name = model_name + '.cfg' + weights_name = model_name + '.weights' + cfg_url = 'https://github.com/pjreddie/darknet/blob/master/cfg/' + cfg_name + '?raw=true' + weights_url = 'http://pjreddie.com/media/files/' + weights_name + '?raw=true' + net = _load_net(cfg_url, cfg_name, weights_url, weights_name) + verify_darknet_frontend(net) + LIB.free_network(net) + +def test_forward_alexnet(): + '''test alexnet model''' + model_name = 'alexnet' + cfg_name = model_name + '.cfg' + weights_name = model_name + '.weights' + cfg_url = 'https://github.com/pjreddie/darknet/blob/master/cfg/' + cfg_name + '?raw=true' + weights_url = 'http://pjreddie.com/media/files/' + weights_name + '?raw=true' + net = _load_net(cfg_url, cfg_name, weights_url, weights_name) + verify_darknet_frontend(net) + LIB.free_network(net) + +def test_forward_resnet50(): + '''test resnet50 model''' + model_name = 'resnet50' + cfg_name = model_name + '.cfg' + weights_name = model_name + '.weights' + cfg_url = 'https://github.com/pjreddie/darknet/blob/master/cfg/' + cfg_name + '?raw=true' + weights_url = 'http://pjreddie.com/media/files/' + weights_name + '?raw=true' + net = _load_net(cfg_url, cfg_name, weights_url, weights_name) + verify_darknet_frontend(net) + LIB.free_network(net) + +def test_forward_yolov2(): + '''test yolov2 model''' + model_name = 'yolov2' + cfg_name = model_name + '.cfg' + weights_name = model_name + '.weights' + cfg_url = 'https://github.com/pjreddie/darknet/blob/master/cfg/' + cfg_name + '?raw=true' + weights_url = 'http://pjreddie.com/media/files/' + weights_name + '?raw=true' + net = _load_net(cfg_url, cfg_name, weights_url, weights_name) + build_dtype = {} + verify_darknet_frontend(net, build_dtype) + LIB.free_network(net) + +def test_forward_yolov3(): + '''test yolov3 model''' + model_name = 'yolov3' + cfg_name = model_name + '.cfg' + weights_name = model_name + '.weights' + cfg_url = 'https://github.com/pjreddie/darknet/blob/master/cfg/' + cfg_name + '?raw=true' + weights_url = 'http://pjreddie.com/media/files/' + weights_name + '?raw=true' + net = _load_net(cfg_url, cfg_name, weights_url, weights_name) + build_dtype = {} + verify_darknet_frontend(net, build_dtype) + LIB.free_network(net) + +def test_forward_convolutional(): + '''test convolutional layer''' + net = LIB.make_network(1) + layer = LIB.make_convolutional_layer(1, 224, 224, 3, 32, 1, 3, 2, 0, 1, 0, 0, 0, 0) + net.layers[0] = layer + net.w = net.h = 224 + LIB.resize_network(net, 224, 224) + verify_darknet_frontend(net) + LIB.free_network(net) + +def test_forward_dense(): + '''test fully connected layer''' + net = LIB.make_network(1) + layer = LIB.make_connected_layer(1, 75, 20, 1, 0, 0) + net.layers[0] = layer + net.w = net.h = 5 + LIB.resize_network(net, 5, 5) + verify_darknet_frontend(net) + LIB.free_network(net) + +def test_forward_dense_batchnorm(): + '''test fully connected layer with batchnorm''' + net = LIB.make_network(1) + layer = LIB.make_connected_layer(1, 12, 2, 1, 1, 0) + for i in range(5): + layer.rolling_mean[i] = np.random.rand(1) + layer.rolling_variance[i] = np.random.rand(1) + layer.scales[i] = np.random.rand(1) + net.layers[0] = layer + net.w = net.h = 2 + LIB.resize_network(net, 2, 2) + verify_darknet_frontend(net) + LIB.free_network(net) + +def test_forward_maxpooling(): + '''test maxpooling layer''' + net = LIB.make_network(1) + layer = LIB.make_maxpool_layer(1, 224, 224, 3, 2, 2, 0) + net.layers[0] = layer + net.w = net.h = 224 + LIB.resize_network(net, 224, 224) + verify_darknet_frontend(net) + LIB.free_network(net) + +def test_forward_avgpooling(): + '''test avgerage pooling layer''' + net = LIB.make_network(1) + layer = LIB.make_avgpool_layer(1, 224, 224, 3) + net.layers[0] = layer + net.w = net.h = 224 + LIB.resize_network(net, 224, 224) + verify_darknet_frontend(net) + LIB.free_network(net) + +def test_forward_batch_norm(): + '''test batch normalization layer''' + net = LIB.make_network(1) + layer = LIB.make_convolutional_layer(1, 224, 224, 3, 32, 1, 3, 2, 0, 1, 1, 0, 0, 0) + for i in range(32): + layer.rolling_mean[i] = np.random.rand(1) + layer.rolling_variance[i] = np.random.rand(1) + net.layers[0] = layer + net.w = net.h = 224 + LIB.resize_network(net, 224, 224) + verify_darknet_frontend(net) + LIB.free_network(net) + +def test_forward_shortcut(): + '''test shortcut layer''' + net = LIB.make_network(3) + layer_1 = LIB.make_convolutional_layer(1, 224, 224, 3, 32, 1, 3, 2, 0, 1, 0, 0, 0, 0) + layer_2 = LIB.make_convolutional_layer(1, 111, 111, 32, 32, 1, 1, 1, 0, 1, 0, 0, 0, 0) + layer_3 = LIB.make_shortcut_layer(1, 0, 111, 111, 32, 111, 111, 32) + layer_3.activation = 1 + layer_3.alpha = 1 + layer_3.beta = 1 + net.layers[0] = layer_1 + net.layers[1] = layer_2 + net.layers[2] = layer_3 + net.w = net.h = 224 + LIB.resize_network(net, 224, 224) + verify_darknet_frontend(net) + LIB.free_network(net) + +def test_forward_reorg(): + '''test reorg layer''' + net = LIB.make_network(2) + layer_1 = LIB.make_convolutional_layer(1, 222, 222, 3, 32, 1, 3, 2, 0, 1, 0, 0, 0, 0) + layer_2 = LIB.make_reorg_layer(1, 110, 110, 32, 2, 0, 0, 0) + net.layers[0] = layer_1 + net.layers[1] = layer_2 + net.w = net.h = 222 + LIB.resize_network(net, 222, 222) + verify_darknet_frontend(net) + LIB.free_network(net) + +def test_forward_region(): + '''test region layer''' + net = LIB.make_network(2) + layer_1 = LIB.make_convolutional_layer(1, 19, 19, 3, 425, 1, 1, 1, 0, 1, 0, 0, 0, 0) + layer_2 = LIB.make_region_layer(1, 19, 19, 5, 80, 4) + layer_2.softmax = 1 + net.layers[0] = layer_1 + net.layers[1] = layer_2 + net.w = net.h = 19 + LIB.resize_network(net, 19, 19) + build_dtype = {} + verify_darknet_frontend(net, build_dtype) + LIB.free_network(net) + +def test_forward_yolo_op(): + '''test yolo layer''' + net = LIB.make_network(2) + layer_1 = LIB.make_convolutional_layer(1, 224, 224, 3, 14, 1, 3, 2, 0, 1, 0, 0, 0, 0) + layer_2 = LIB.make_yolo_layer(1, 111, 111, 2, 9, __darknetffi__.NULL, 2) + net.layers[0] = layer_1 + net.layers[1] = layer_2 + net.w = net.h = 224 + LIB.resize_network(net, 224, 224) + build_dtype = {} + verify_darknet_frontend(net, build_dtype) + LIB.free_network(net) + +def test_forward_upsample(): + '''test upsample layer''' + net = LIB.make_network(1) + layer = LIB.make_upsample_layer(1, 19, 19, 3, 3) + layer.scale = 1 + net.layers[0] = layer + net.w = net.h = 19 + LIB.resize_network(net, 19, 19) + verify_darknet_frontend(net) + LIB.free_network(net) + +def test_forward_l2normalize(): + '''test l2 normalization layer''' + net = LIB.make_network(1) + layer = LIB.make_l2norm_layer(1, 224*224*3) + layer.c = layer.out_c = 3 + layer.h = layer.out_h = 224 + layer.w = layer.out_w = 224 + net.layers[0] = layer + net.w = net.h = 224 + LIB.resize_network(net, 224, 224) + verify_darknet_frontend(net) + LIB.free_network(net) + +def test_forward_elu(): + '''test elu activation layer''' + net = LIB.make_network(1) + layer_1 = LIB.make_convolutional_layer(1, 224, 224, 3, 32, 1, 3, 2, 0, 1, 0, 0, 0, 0) + layer_1.activation = 8 + net.layers[0] = layer_1 + net.w = net.h = 224 + LIB.resize_network(net, 224, 224) + verify_darknet_frontend(net) + LIB.free_network(net) + +def test_forward_softmax(): + '''test softmax layer''' + net = LIB.make_network(1) + layer_1 = LIB.make_softmax_layer(1, 75, 1) + layer_1.temperature = 1 + net.layers[0] = layer_1 + net.w = net.h = 5 + LIB.resize_network(net, net.w, net.h) + verify_darknet_frontend(net) + LIB.free_network(net) + +def test_forward_softmax_temperature(): + '''test softmax layer''' + net = LIB.make_network(1) + layer_1 = LIB.make_softmax_layer(1, 75, 1) + layer_1.temperature = 0.8 + net.layers[0] = layer_1 + net.w = net.h = 5 + LIB.resize_network(net, net.w, net.h) + verify_darknet_frontend(net) + LIB.free_network(net) + +def test_forward_rnn(): + '''test RNN layer''' + net = LIB.make_network(1) + batch = 1 + inputs = 256 + outputs = 256 + steps = 1 + activation = 1 + batch_normalize = 0 + adam = 0 + layer_1 = LIB.make_rnn_layer(batch, inputs, outputs, steps, activation, batch_normalize, adam) + net.layers[0] = layer_1 + net.inputs = inputs + net.outputs = outputs + net.w = net.h = 0 + LIB.resize_network(net, net.w, net.h) + verify_rnn_forward(net) + LIB.free_network(net) + +def _test_forward_crnn(): + '''test CRNN layer''' + net = LIB.make_network(1) + batch = 1 + c = 3 + h = 224 + w = 224 + hidden_filters = c + output_filters = c + steps = 1 + activation = 0 + batch_normalize = 0 + inputs = 256 + outputs = 256 + layer_1 = LIB.make_crnn_layer(batch, h, w, c, hidden_filters, output_filters, + steps, activation, batch_normalize) + net.layers[0] = layer_1 + net.inputs = inputs + net.outputs = output_filters * h * w + net.w = w + net.h = h + LIB.resize_network(net, net.w, net.h) + verify_darknet_frontend(net) + LIB.free_network(net) + +def test_forward_lstm(): + '''test LSTM layer''' + net = LIB.make_network(1) + batch = 1 + inputs = 256 + outputs = 256 + steps = 1 + batch_normalize = 0 + adam = 0 + layer_1 = LIB.make_lstm_layer(batch, inputs, outputs, steps, batch_normalize, adam) + net.layers[0] = layer_1 + net.inputs = inputs + net.outputs = outputs + net.w = net.h = 0 + LIB.resize_network(net, net.w, net.h) + verify_rnn_forward(net) + LIB.free_network(net) + +def test_forward_gru(): + '''test GRU layer''' + net = LIB.make_network(1) + batch = 1 + inputs = 256 + outputs = 256 + steps = 1 + batch_normalize = 0 + adam = 0 + layer_1 = LIB.make_gru_layer(batch, inputs, outputs, steps, batch_normalize, adam) + net.layers[0] = layer_1 + net.inputs = inputs + net.outputs = outputs + net.w = net.h = 0 + LIB.resize_network(net, net.w, net.h) + verify_rnn_forward(net) + LIB.free_network(net) + +def test_forward_activation_logistic(): + '''test logistic activation layer''' + net = LIB.make_network(1) + batch = 1 + h = 224 + w = 224 + c = 3 + n = 32 + groups = 1 + size = 3 + stride = 2 + padding = 0 + activation = 0 + batch_normalize = 0 + binary = 0 + xnor = 0 + adam = 0 + layer_1 = LIB.make_convolutional_layer(batch, h, w, c, n, groups, size, stride, padding, + activation, batch_normalize, binary, xnor, adam) + net.layers[0] = layer_1 + net.w = w + net.h = h + LIB.resize_network(net, net.w, net.h) + verify_darknet_frontend(net) + LIB.free_network(net) + +if __name__ == '__main__': + test_forward_resnet50() + test_forward_alexnet() + test_forward_extraction() + test_forward_yolov2() + test_forward_yolov3() + test_forward_convolutional() + test_forward_maxpooling() + test_forward_avgpooling() + test_forward_batch_norm() + test_forward_shortcut() + test_forward_dense() + test_forward_dense_batchnorm() + test_forward_softmax() + test_forward_softmax_temperature() + test_forward_rnn() + test_forward_reorg() + test_forward_region() + test_forward_yolo_op() + test_forward_upsample() + test_forward_l2normalize() + test_forward_elu() + test_forward_rnn() +# FIXME: Skip CRNN test since it causes segfault in libdarknet2.0.so +# _test_forward_crnn() + test_forward_lstm() + test_forward_gru() + test_forward_activation_logistic() diff --git a/nnvm/tests/python/frontend/keras/test_forward.py b/nnvm/tests/python/frontend/keras/test_forward.py new file mode 100644 index 000000000000..78e4204e8250 --- /dev/null +++ b/nnvm/tests/python/frontend/keras/test_forward.py @@ -0,0 +1,354 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import numpy as np +import nnvm +import tvm +from tvm.contrib import graph_runtime +from nnvm.testing.config import ctx_list +import keras + +# prevent keras from using up all gpu memory +import tensorflow as tf +from keras.backend.tensorflow_backend import set_session +config = tf.ConfigProto() +config.gpu_options.per_process_gpu_memory_fraction = 0.5 +set_session(tf.Session(config=config)) + + +def verify_keras_frontend(keras_model, need_transpose=True): + # Keras frontend currently supports tensorflow backend only. + assert(keras.backend.backend() == 'tensorflow') + + in_shapes = [] + for layer in keras_model._input_layers: + in_shapes.append(tuple(dim.value if dim.value is not None else 1 for dim in layer.input.shape)) + + def get_keras_output(xs, dtype='float32'): + return keras_model.predict(xs) + + def get_tvm_output(xs, target, ctx, dtype='float32'): + sym, params = nnvm.frontend.from_keras(keras_model) + shape_dict = {name: x.shape for (name, x) in zip(keras_model.input_names, xs)} + with nnvm.compiler.build_config(opt_level=2): + graph, lib, params = nnvm.compiler.build(sym, target, shape_dict, params=params) + m = graph_runtime.create(graph, lib, ctx) + for name, x in zip(keras_model.input_names, xs): + m.set_input(name, tvm.nd.array(x.astype(dtype))) + m.set_input(**params) + m.run() + + return [m.get_output(i).asnumpy() for i in range(m.get_num_outputs())] + + def to_channels_first(arr): + return arr.transpose([0, -1] + list(range(1, arr.ndim - 1))) + + def to_channels_last(arr): + return arr.transpose([0] + list(range(2, arr.ndim)) + [1]) + + xs = [np.random.uniform(size=shape, low=-1.0, high=1.0) for shape in in_shapes] + keras_out = get_keras_output(xs) + + keras_out = keras_out if isinstance(keras_out, list) else [keras_out] + for target, ctx in ctx_list(): + tvm_out = get_tvm_output([to_channels_first(x) for x in xs] if need_transpose else xs, target, ctx) + for kout, tout in zip(keras_out, tvm_out): + if need_transpose: + tout = to_channels_last(tout) + tvm.testing.assert_allclose(kout, tout, rtol=1e-5, atol=1e-5) + +def test_forward_elemwise_add(): + r = [] + data = keras.layers.Input(shape=(32,32,3)) + x = keras.layers.Conv2D(8, (3, 3), padding="same")(data) + r.append(x) + x = keras.layers.Conv2D(8, (3, 3), padding="same")(x) + r.append(x) + x = keras.layers.Conv2D(8, (3, 3), padding="same")(x) + # add two symbols + y = keras.layers.add([keras.layers.add([x, r[0]]), r[1]]) + y = keras.layers.GlobalAveragePooling2D()(y) + keras_model = keras.models.Model(data, y) + verify_keras_frontend(keras_model) + # add three symbols + y = keras.layers.add([x, r[0], r[1]]) + y = keras.layers.GlobalAveragePooling2D()(y) + keras_model = keras.models.Model(data, y) + verify_keras_frontend(keras_model) + + +def _test_forward_dense(): + data = keras.layers.Input(shape=(32,32,1)) + x = keras.layers.Flatten()(data) + x = keras.layers.Dropout(0.5)(x) + x = keras.layers.Dense(10, activation='relu', kernel_initializer='uniform')(x) + keras_model = keras.models.Model(data, x) + verify_keras_frontend(keras_model) + +def _test_forward_dense_with_3d_inp(): + data = keras.layers.Input(shape=(1, 20)) + x = keras.layers.Dense(10, activation='relu', kernel_initializer='uniform')(data) + keras_model = keras.models.Model(data, x) + verify_keras_frontend(keras_model, need_transpose=False) + +def test_forward_dense(): + _test_forward_dense() + _test_forward_dense_with_3d_inp() + +def test_forward_pool(): + data = keras.layers.Input(shape=(32,32,1)) + # maxpool + x = keras.layers.MaxPooling2D((3, 3), strides=(1, 1), padding='same')(data) + keras_model = keras.models.Model(data, x) + verify_keras_frontend(keras_model) + # avgpool + y = keras.layers.AveragePooling2D((3, 3), strides=(1, 1), padding='same')(data) + keras_model = keras.models.Model(data, y) + verify_keras_frontend(keras_model) + + +def test_forward_conv(): + data = keras.layers.Input(shape=(32,32,3)) + conv_funcs = [keras.layers.Conv2D(filters=10, kernel_size=(3,3), + strides=(2,2), padding='same'), + keras.layers.Conv2D(filters=10, kernel_size=(3,3), + dilation_rate=(2,2), padding='same'), + keras.layers.DepthwiseConv2D(kernel_size=(3,3), padding='same'), + keras.layers.Conv2DTranspose(filters=10, kernel_size=(3,3), padding='valid'), + keras.layers.SeparableConv2D(filters=10, kernel_size=(3,3), padding='same')] + for conv_func in conv_funcs: + x = conv_func(data) + keras_model = keras.models.Model(data, x) + verify_keras_frontend(keras_model) + + +def test_forward_upsample(): + data = keras.layers.Input(shape=(32,32,3)) + x = keras.layers.UpSampling2D(size=(3,3))(data) + keras_model = keras.models.Model(data, x) + verify_keras_frontend(keras_model) + + +def test_forward_reshape(): + data = keras.layers.Input(shape=(32,32,3)) + x = keras.layers.Reshape(target_shape=(32,32,3))(data) + keras_model = keras.models.Model(data, x) + verify_keras_frontend(keras_model) + + +def test_forward_crop(): + data = keras.layers.Input(shape=(32,32,3)) + x = keras.layers.Cropping2D(cropping=((1, 1), (1, 1)))(data) + x = keras.layers.Cropping2D(cropping=(1, 1))(x) + x = keras.layers.Cropping2D(cropping=1)(x) + x = keras.layers.Cropping2D(cropping=((0, 1), (1, 0)))(x) + x = keras.layers.Cropping2D(cropping=(1, 0))(x) + x = keras.layers.Cropping2D(cropping=0)(x) + x = keras.layers.Add()([x, x]) + keras_model = keras.models.Model(data, x) + verify_keras_frontend(keras_model) + + +def test_forward_vgg16(): + keras_model = keras.applications.vgg16.VGG16(include_top=True, weights='imagenet', + input_shape=(224,224,3), classes=1000) + verify_keras_frontend(keras_model) + + +def test_forward_xception(): + keras_model = keras.applications.xception.Xception(include_top=True, weights='imagenet', + input_shape=(299,299,3), classes=1000) + verify_keras_frontend(keras_model) + + +def test_forward_resnet50(): + keras_model = keras.applications.resnet50.ResNet50(include_top=True, weights='imagenet', + input_shape=(224,224,3), classes=1000) + verify_keras_frontend(keras_model) + + +def test_forward_mobilenet(): + keras_model = keras.applications.mobilenet.MobileNet(include_top=True, weights='imagenet', + input_shape=(224,224,3), classes=1000) + verify_keras_frontend(keras_model) + + +def test_forward_activations(): + data = keras.layers.Input(shape=(32,32,3)) + weights = np.random.rand(1, 32, 32, 3) + act_funcs = [keras.layers.Activation('softmax'), + keras.layers.Activation('softplus'), + keras.layers.ReLU(), + keras.layers.ReLU(max_value=6.), + keras.layers.LeakyReLU(alpha=0.3), + keras.layers.PReLU(weights=weights, alpha_initializer="zero"), + keras.layers.ELU(alpha=0.5), + keras.layers.Activation('selu'), + keras.layers.ThresholdedReLU(theta=0.5), + keras.layers.Activation('softsign'), + keras.layers.Activation('hard_sigmoid'), + keras.layers.Activation('sigmoid'), + keras.layers.Activation('tanh'), + keras.layers.Activation('linear')] + for act_func in act_funcs: + x = act_func(data) + keras_model = keras.models.Model(data, x) + verify_keras_frontend(keras_model) + + +def test_forward_multi_inputs(): + data1 = keras.layers.Input(shape=(32,32,3)) + data2 = keras.layers.Input(shape=(32,32,3)) + x = keras.layers.Conv2D(8, (3, 3), padding="same")(data1) + y = keras.layers.Conv2D(8, (3, 3), padding="same")(data2) + z = keras.layers.add([x, y]) + z = keras.layers.GlobalAveragePooling2D()(z) + keras_model = keras.models.Model([data1, data2], z) + verify_keras_frontend(keras_model) + + +def test_forward_multi_outputs(): + data = keras.layers.Input(shape=(32,32,3)) + x = keras.layers.Conv2D(8, (3, 3), padding="same")(data) + x = keras.layers.GlobalAveragePooling2D()(x) + y = keras.layers.Conv2D(8, (3, 3), padding="same")(data) + y = keras.layers.GlobalAveragePooling2D()(y) + keras_model = keras.models.Model(data, [x, y]) + verify_keras_frontend(keras_model) + + +def test_forward_reuse_layers(): + # reuse conv2d + data = keras.layers.Input(shape=(32,32,3)) + conv2d = keras.layers.Conv2D(8, (3, 3), padding="same") + x = conv2d(data) + y = conv2d(data) + z = keras.layers.add([x, y]) + z = keras.layers.GlobalAveragePooling2D()(z) + keras_model = keras.models.Model(data, z) + verify_keras_frontend(keras_model) + + # reuse add + data = keras.layers.Input(shape=(32,32,3)) + x = keras.layers.Conv2D(8, (3, 3), padding="same")(data) + add = keras.layers.Add() + x = add([x, x]) + x = add([x, x]) + z = keras.layers.GlobalAveragePooling2D()(x) + keras_model = keras.models.Model(data, z) + verify_keras_frontend(keras_model) + +def _test_LSTM(time_steps, inputs, hidden, return_state=True): + data = keras.layers.Input(shape=(time_steps, inputs)) + lstm_out = keras.layers.LSTM(hidden, + return_state=return_state, + recurrent_activation='sigmoid', + activation='tanh') + x = lstm_out(data) + keras_model = keras.models.Model(data, x) + verify_keras_frontend(keras_model, need_transpose=False) + +def _test_LSTM_MultiLayer(inputs, hidden): + inputs = keras.layers.Input(shape=(1, inputs)) + layer = keras.layers.LSTM(hidden, return_state=True, return_sequences=True, + recurrent_activation='sigmoid', + activation='tanh') + outputs = layer(inputs) + output, state = outputs[0], outputs[1:] + output = keras.layers.LSTM(hidden, recurrent_activation='sigmoid', + activation='tanh')(output, initial_state=state) + keras_model = keras.models.Model(inputs, output) + verify_keras_frontend(keras_model, need_transpose=False) + + +def test_forward_LSTM(): + _test_LSTM(1, 8, 8, return_state=True) + _test_LSTM(1, 4, 4, return_state=False) + _test_LSTM(20, 16, 256, return_state=False) + _test_LSTM_MultiLayer(4, 4) + +def _test_RNN(inputs, units): + data = keras.layers.Input(shape=(1, inputs)) + rnn_out = keras.layers.SimpleRNN(units, return_state=True, + activation='tanh') + x = rnn_out(data) + keras_model = keras.models.Model(data, x) + verify_keras_frontend(keras_model, need_transpose=False) + +def _test_RNN_MultiLayer(inputs, units): + inputs = keras.layers.Input(shape=(1, inputs)) + layer = keras.layers.SimpleRNN(units, return_state=True, return_sequences=True, + activation='tanh') + outputs = layer(inputs) + output, state = outputs[0], outputs[1:] + output = keras.layers.SimpleRNN(units, activation='tanh')(output, initial_state=state) + keras_model = keras.models.Model(inputs, output) + verify_keras_frontend(keras_model, need_transpose=False) + +def test_forward_RNN(): + _test_RNN(2, 4) + _test_RNN(4, 3) + _test_RNN_MultiLayer(4, 12) + +def _test_GRU(inputs, units): + data = keras.layers.Input(shape=(1, inputs)) + gru_out = keras.layers.GRU(units, + return_state=True, + recurrent_activation='sigmoid', + activation='tanh') + x = gru_out(data) + keras_model = keras.models.Model(data, x) + verify_keras_frontend(keras_model, need_transpose=False) + +def _test_GRU_MultiLayer(inputs, units): + inputs = keras.layers.Input(shape=(1, inputs)) + layer = keras.layers.GRU(units, + return_state=True, + return_sequences=True, + recurrent_activation='sigmoid', + activation='tanh') + outputs = layer(inputs) + output, state = outputs[0], outputs[1:] + output = keras.layers.GRU(units, recurrent_activation='sigmoid', + activation='tanh')(output, initial_state=state) + keras_model = keras.models.Model(inputs, output) + verify_keras_frontend(keras_model, need_transpose=False) + +def test_forward_GRU(): + _test_GRU(2, 4) + _test_GRU(4, 3) + _test_GRU_MultiLayer(4, 4) + +if __name__ == '__main__': + test_forward_elemwise_add() + test_forward_activations() + test_forward_dense() + test_forward_pool() + test_forward_conv() + test_forward_upsample() + test_forward_reshape() + test_forward_crop() + test_forward_vgg16() + test_forward_xception() + test_forward_resnet50() + test_forward_mobilenet() + + test_forward_multi_inputs() + test_forward_multi_outputs() + test_forward_reuse_layers() + test_forward_LSTM() + test_forward_RNN() + test_forward_GRU() diff --git a/nnvm/tests/python/frontend/mxnet/model_zoo/__init__.py b/nnvm/tests/python/frontend/mxnet/model_zoo/__init__.py new file mode 100644 index 000000000000..3922ba673f2f --- /dev/null +++ b/nnvm/tests/python/frontend/mxnet/model_zoo/__init__.py @@ -0,0 +1,62 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""MXNet and NNVM model zoo.""" +from __future__ import absolute_import +from . import mlp, resnet, vgg, dqn, dcgan, squeezenet, inception_v3 +import nnvm.testing + +_num_class = 1000 + +# mlp fc +mx_mlp = mlp.get_symbol(_num_class) +nnvm_mlp = nnvm.testing.mlp.get_workload(1, _num_class)[0] + +# resnet fc +mx_resnet = {} +nnvm_resnet = {} +for num_layer in [18, 34, 50, 101, 152, 200, 269]: + mx_resnet[num_layer] = resnet.get_symbol(_num_class, num_layer, '3,224,224') + nnvm_resnet[num_layer] = nnvm.testing.resnet.get_workload( + 1, _num_class, num_layers=num_layer)[0] + +# vgg fc +mx_vgg = {} +nnvm_vgg = {} +for num_layer in [11, 13, 16, 19]: + mx_vgg[num_layer] = vgg.get_symbol(_num_class, num_layer) + nnvm_vgg[num_layer] = nnvm.testing.vgg.get_workload( + 1, _num_class, num_layers=num_layer)[0] + +# squeezenet +mx_squeezenet = {} +nnvm_squeezenet = {} +for version in ['1.0', '1.1']: + mx_squeezenet[version] = squeezenet.get_symbol(version=version) + nnvm_squeezenet[version] = nnvm.testing.squeezenet.get_workload(1, version=version)[0] + +# inception +mx_inception_v3 = inception_v3.get_symbol() +nnvm_inception_v3 = nnvm.testing.inception_v3.get_workload(1)[0] + +# dqn +mx_dqn = dqn.get_symbol() +nnvm_dqn = nnvm.testing.dqn.get_workload(1)[0] + +# dcgan generator +mx_dcgan = dcgan.get_symbol() +nnvm_dcgan = nnvm.testing.dcgan.get_workload(1)[0] diff --git a/nnvm/tests/python/frontend/mxnet/model_zoo/dcgan.py b/nnvm/tests/python/frontend/mxnet/model_zoo/dcgan.py new file mode 100644 index 000000000000..e606b78e1597 --- /dev/null +++ b/nnvm/tests/python/frontend/mxnet/model_zoo/dcgan.py @@ -0,0 +1,82 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=unused-argument +""" +The MXNet symbol of DCGAN generator + +Adopted from: +https://github.com/tqchen/mxnet-gan/blob/master/mxgan/generator.py + +Reference: +Radford, Alec, Luke Metz, and Soumith Chintala. +"Unsupervised representation learning with deep convolutional generative adversarial networks." +arXiv preprint arXiv:1511.06434 (2015). +""" + +import mxnet as mx + +def deconv2d(data, ishape, oshape, kshape, name, stride=(2, 2)): + """a deconv layer that enlarges the feature map""" + target_shape = (oshape[-2], oshape[-1]) + pad_y = (kshape[0] - 1) // 2 + pad_x = (kshape[1] - 1) // 2 + adj_y = (target_shape[0] + 2 * pad_y - kshape[0]) % stride[0] + adj_x = (target_shape[1] + 2 * pad_x - kshape[1]) % stride[1] + + net = mx.sym.Deconvolution(data, + kernel=kshape, + stride=stride, + pad=(pad_y, pad_x), + adj=(adj_y, adj_x), + num_filter=oshape[0], + no_bias=True, + name=name) + return net + +def deconv2d_bn_relu(data, prefix, **kwargs): + """a block of deconv + batch norm + relu""" + eps = 1e-5 + 1e-12 + + net = deconv2d(data, name="%s_deconv" % prefix, **kwargs) + net = mx.sym.BatchNorm(net, eps=eps, name="%s_bn" % prefix) + net = mx.sym.Activation(net, name="%s_act" % prefix, act_type='relu') + return net + +def get_symbol(oshape=(3, 64, 64), ngf=128, code=None): + """get symbol of dcgan generator""" + assert oshape[-1] == 64, "Only support 64x64 image" + assert oshape[-2] == 64, "Only support 64x64 image" + + code = mx.sym.Variable("data") if code is None else code + net = mx.sym.FullyConnected(code, name="g1", num_hidden=ngf*8*4*4, no_bias=True, flatten=False) + net = mx.sym.Activation(net, act_type='relu') + # 4 x 4 + net = mx.sym.reshape(net, shape=(-1, ngf * 8, 4, 4)) + # 8 x 8 + net = deconv2d_bn_relu( + net, ishape=(ngf * 8, 4, 4), oshape=(ngf * 4, 8, 8), kshape=(4, 4), prefix="g2") + # 16x16 + net = deconv2d_bn_relu( + net, ishape=(ngf * 4, 8, 8), oshape=(ngf * 2, 16, 16), kshape=(4, 4), prefix="g3") + # 32x32 + net = deconv2d_bn_relu( + net, ishape=(ngf * 2, 16, 16), oshape=(ngf, 32, 32), kshape=(4, 4), prefix="g4") + # 64x64 + net = deconv2d( + net, ishape=(ngf, 32, 32), oshape=oshape[-3:], kshape=(4, 4), name="g5_deconv") + net = mx.sym.Activation(net, act_type='tanh') + return net diff --git a/nnvm/tests/python/frontend/mxnet/model_zoo/dqn.py b/nnvm/tests/python/frontend/mxnet/model_zoo/dqn.py new file mode 100644 index 000000000000..e661e18debcb --- /dev/null +++ b/nnvm/tests/python/frontend/mxnet/model_zoo/dqn.py @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +The mxnet symbol of Nature DQN + +Reference: +Mnih, Volodymyr, et al. +"Human-level control through deep reinforcement learning." +Nature 518.7540 (2015): 529. +""" + +import mxnet as mx + +def get_symbol(num_action=18): + data = mx.sym.Variable(name='data') + net = mx.sym.Convolution(data, kernel=(8, 8), stride=(4, 4), + num_filter=32, name='conv1') + net = mx.sym.Activation(net, act_type='relu', name='relu1') + net = mx.sym.Convolution(net, kernel=(4, 4), stride=(2, 2), + num_filter=64, name='conv2') + net = mx.sym.Activation(net, act_type='relu', name='relu2') + net = mx.sym.Convolution(net, kernel=(3, 3), stride=(1, 1), + num_filter=64, name='conv3') + net = mx.sym.Activation(net, act_type='relu', name='relu3') + net = mx.sym.FullyConnected(net, num_hidden=512, name='fc4') + net = mx.sym.Activation(net, act_type='relu', name='relu4') + net = mx.sym.FullyConnected(net, num_hidden=num_action, name='fc5', flatten=False) + + return net diff --git a/nnvm/tests/python/frontend/mxnet/model_zoo/inception_v3.py b/nnvm/tests/python/frontend/mxnet/model_zoo/inception_v3.py new file mode 100644 index 000000000000..8e8f36a3e644 --- /dev/null +++ b/nnvm/tests/python/frontend/mxnet/model_zoo/inception_v3.py @@ -0,0 +1,186 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +Inception V3, suitable for images with around 299 x 299 + +Reference: +Szegedy, Christian, et al. "Rethinking the Inception Architecture for Computer Vision." arXiv preprint arXiv:1512.00567 (2015). + +Adopted from https://github.com/apache/incubator-mxnet/blob/ + master/example/image-classification/symbols/inception-v3.py +""" +import mxnet as mx +import numpy as np + +def Conv(data, num_filter, kernel=(1, 1), stride=(1, 1), pad=(0, 0), name=None, suffix=''): + conv = mx.sym.Convolution(data=data, num_filter=num_filter, kernel=kernel, stride=stride, pad=pad, no_bias=True, name='%s%s_conv2d' %(name, suffix)) + bn = mx.sym.BatchNorm(data=conv, eps=2e-5, name='%s%s_batchnorm' % (name, suffix)) + act = mx.sym.Activation(data=bn, act_type='relu', name='%s%s_relu' %(name, suffix)) + return act + + +def Inception7A(data, + num_1x1, + num_3x3_red, num_3x3_1, num_3x3_2, + num_5x5_red, num_5x5, + pool, proj, + name): + tower_1x1 = Conv(data, num_1x1, name=('%s_conv' % name)) + tower_5x5 = Conv(data, num_5x5_red, name=('%s_tower' % name), suffix='_conv') + tower_5x5 = Conv(tower_5x5, num_5x5, kernel=(5, 5), pad=(2, 2), name=('%s_tower' % name), suffix='_conv_1') + tower_3x3 = Conv(data, num_3x3_red, name=('%s_tower_1' % name), suffix='_conv') + tower_3x3 = Conv(tower_3x3, num_3x3_1, kernel=(3, 3), pad=(1, 1), name=('%s_tower_1' % name), suffix='_conv_1') + tower_3x3 = Conv(tower_3x3, num_3x3_2, kernel=(3, 3), pad=(1, 1), name=('%s_tower_1' % name), suffix='_conv_2') + pooling = mx.sym.Pooling(data=data, kernel=(3, 3), stride=(1, 1), pad=(1, 1), pool_type=pool, name=('%s_pool_%s_pool' % (pool, name))) + cproj = Conv(pooling, proj, name=('%s_tower_2' % name), suffix='_conv') + concat = mx.sym.Concat(*[tower_1x1, tower_5x5, tower_3x3, cproj], name='ch_concat_%s_chconcat' % name) + return concat + +# First Downsample +def Inception7B(data, + num_3x3, + num_d3x3_red, num_d3x3_1, num_d3x3_2, + pool, + name): + tower_3x3 = Conv(data, num_3x3, kernel=(3, 3), pad=(0, 0), stride=(2, 2), name=('%s_conv' % name)) + tower_d3x3 = Conv(data, num_d3x3_red, name=('%s_tower' % name), suffix='_conv') + tower_d3x3 = Conv(tower_d3x3, num_d3x3_1, kernel=(3, 3), pad=(1, 1), stride=(1, 1), name=('%s_tower' % name), suffix='_conv_1') + tower_d3x3 = Conv(tower_d3x3, num_d3x3_2, kernel=(3, 3), pad=(0, 0), stride=(2, 2), name=('%s_tower' % name), suffix='_conv_2') + pooling = mx.sym.Pooling(data=data, kernel=(3, 3), stride=(2, 2), pad=(0,0), pool_type="max", name=('max_pool_%s_pool' % name)) + concat = mx.sym.Concat(*[tower_3x3, tower_d3x3, pooling], name='ch_concat_%s_chconcat' % name) + return concat + +def Inception7C(data, + num_1x1, + num_d7_red, num_d7_1, num_d7_2, + num_q7_red, num_q7_1, num_q7_2, num_q7_3, num_q7_4, + pool, proj, + name): + tower_1x1 = Conv(data=data, num_filter=num_1x1, kernel=(1, 1), name=('%s_conv' % name)) + tower_d7 = Conv(data=data, num_filter=num_d7_red, name=('%s_tower' % name), suffix='_conv') + tower_d7 = Conv(data=tower_d7, num_filter=num_d7_1, kernel=(1, 7), pad=(0, 3), name=('%s_tower' % name), suffix='_conv_1') + tower_d7 = Conv(data=tower_d7, num_filter=num_d7_2, kernel=(7, 1), pad=(3, 0), name=('%s_tower' % name), suffix='_conv_2') + tower_q7 = Conv(data=data, num_filter=num_q7_red, name=('%s_tower_1' % name), suffix='_conv') + tower_q7 = Conv(data=tower_q7, num_filter=num_q7_1, kernel=(7, 1), pad=(3, 0), name=('%s_tower_1' % name), suffix='_conv_1') + tower_q7 = Conv(data=tower_q7, num_filter=num_q7_2, kernel=(1, 7), pad=(0, 3), name=('%s_tower_1' % name), suffix='_conv_2') + tower_q7 = Conv(data=tower_q7, num_filter=num_q7_3, kernel=(7, 1), pad=(3, 0), name=('%s_tower_1' % name), suffix='_conv_3') + tower_q7 = Conv(data=tower_q7, num_filter=num_q7_4, kernel=(1, 7), pad=(0, 3), name=('%s_tower_1' % name), suffix='_conv_4') + pooling = mx.sym.Pooling(data=data, kernel=(3, 3), stride=(1, 1), pad=(1, 1), pool_type=pool, name=('%s_pool_%s_pool' % (pool, name))) + cproj = Conv(data=pooling, num_filter=proj, kernel=(1, 1), name=('%s_tower_2' % name), suffix='_conv') + # concat + concat = mx.sym.Concat(*[tower_1x1, tower_d7, tower_q7, cproj], name='ch_concat_%s_chconcat' % name) + return concat + +def Inception7D(data, + num_3x3_red, num_3x3, + num_d7_3x3_red, num_d7_1, num_d7_2, num_d7_3x3, + pool, + name): + tower_3x3 = Conv(data=data, num_filter=num_3x3_red, name=('%s_tower' % name), suffix='_conv') + tower_3x3 = Conv(data=tower_3x3, num_filter=num_3x3, kernel=(3, 3), pad=(0,0), stride=(2, 2), name=('%s_tower' % name), suffix='_conv_1') + tower_d7_3x3 = Conv(data=data, num_filter=num_d7_3x3_red, name=('%s_tower_1' % name), suffix='_conv') + tower_d7_3x3 = Conv(data=tower_d7_3x3, num_filter=num_d7_1, kernel=(1, 7), pad=(0, 3), name=('%s_tower_1' % name), suffix='_conv_1') + tower_d7_3x3 = Conv(data=tower_d7_3x3, num_filter=num_d7_2, kernel=(7, 1), pad=(3, 0), name=('%s_tower_1' % name), suffix='_conv_2') + tower_d7_3x3 = Conv(data=tower_d7_3x3, num_filter=num_d7_3x3, kernel=(3, 3), stride=(2, 2), name=('%s_tower_1' % name), suffix='_conv_3') + pooling = mx.sym.Pooling(data=data, kernel=(3, 3), stride=(2, 2), pool_type=pool, name=('%s_pool_%s_pool' % (pool, name))) + # concat + concat = mx.sym.Concat(*[tower_3x3, tower_d7_3x3, pooling], name='ch_concat_%s_chconcat' % name) + return concat + +def Inception7E(data, + num_1x1, + num_d3_red, num_d3_1, num_d3_2, + num_3x3_d3_red, num_3x3, num_3x3_d3_1, num_3x3_d3_2, + pool, proj, + name): + tower_1x1 = Conv(data=data, num_filter=num_1x1, kernel=(1, 1), name=('%s_conv' % name)) + tower_d3 = Conv(data=data, num_filter=num_d3_red, name=('%s_tower' % name), suffix='_conv') + tower_d3_a = Conv(data=tower_d3, num_filter=num_d3_1, kernel=(1, 3), pad=(0, 1), name=('%s_tower' % name), suffix='_mixed_conv') + tower_d3_b = Conv(data=tower_d3, num_filter=num_d3_2, kernel=(3, 1), pad=(1, 0), name=('%s_tower' % name), suffix='_mixed_conv_1') + tower_3x3_d3 = Conv(data=data, num_filter=num_3x3_d3_red, name=('%s_tower_1' % name), suffix='_conv') + tower_3x3_d3 = Conv(data=tower_3x3_d3, num_filter=num_3x3, kernel=(3, 3), pad=(1, 1), name=('%s_tower_1' % name), suffix='_conv_1') + tower_3x3_d3_a = Conv(data=tower_3x3_d3, num_filter=num_3x3_d3_1, kernel=(1, 3), pad=(0, 1), name=('%s_tower_1' % name), suffix='_mixed_conv') + tower_3x3_d3_b = Conv(data=tower_3x3_d3, num_filter=num_3x3_d3_2, kernel=(3, 1), pad=(1, 0), name=('%s_tower_1' % name), suffix='_mixed_conv_1') + pooling = mx.sym.Pooling(data=data, kernel=(3, 3), stride=(1, 1), pad=(1, 1), pool_type=pool, name=('%s_pool_%s_pool' % (pool, name))) + cproj = Conv(data=pooling, num_filter=proj, kernel=(1, 1), name=('%s_tower_2' % name), suffix='_conv') + # concat + concat = mx.sym.Concat(*[tower_1x1, tower_d3_a, tower_d3_b, tower_3x3_d3_a, tower_3x3_d3_b, cproj], name='ch_concat_%s_chconcat' % name) + return concat + +def get_symbol(num_classes=1000, **kwargs): + data = mx.sym.Variable(name="data") + # stage 1 + conv = Conv(data, 32, kernel=(3, 3), stride=(2, 2), name="conv") + conv_1 = Conv(conv, 32, kernel=(3, 3), name="conv_1") + conv_2 = Conv(conv_1, 64, kernel=(3, 3), pad=(1, 1), name="conv_2") + pool = mx.sym.Pooling(data=conv_2, kernel=(3, 3), stride=(2, 2), pool_type="max", name="pool") + # stage 2 + conv_3 = Conv(pool, 80, kernel=(1, 1), name="conv_3") + conv_4 = Conv(conv_3, 192, kernel=(3, 3), name="conv_4") + pool1 = mx.sym.Pooling(data=conv_4, kernel=(3, 3), stride=(2, 2), pool_type="max", name="pool1") + + # # stage 3 + in3a = Inception7A(pool1, 64, + 64, 96, 96, + 48, 64, + "avg", 32, "mixed") + in3b = Inception7A(in3a, 64, + 64, 96, 96, + 48, 64, + "avg", 64, "mixed_1") + in3c = Inception7A(in3b, 64, + 64, 96, 96, + 48, 64, + "avg", 64, "mixed_2") + in3d = Inception7B(in3c, 384, + 64, 96, 96, + "max", "mixed_3") + # stage 4 + in4a = Inception7C(in3d, 192, + 128, 128, 192, + 128, 128, 128, 128, 192, + "avg", 192, "mixed_4") + in4b = Inception7C(in4a, 192, + 160, 160, 192, + 160, 160, 160, 160, 192, + "avg", 192, "mixed_5") + in4c = Inception7C(in4b, 192, + 160, 160, 192, + 160, 160, 160, 160, 192, + "avg", 192, "mixed_6") + in4d = Inception7C(in4c, 192, + 192, 192, 192, + 192, 192, 192, 192, 192, + "avg", 192, "mixed_7") + in4e = Inception7D(in4d, 192, 320, + 192, 192, 192, 192, + "max", "mixed_8") + # stage 5 + in5a = Inception7E(in4e, 320, + 384, 384, 384, + 448, 384, 384, 384, + "avg", 192, "mixed_9") + in5b = Inception7E(in5a, 320, + 384, 384, 384, + 448, 384, 384, 384, + "max", 192, "mixed_10") + # pool + pool = mx.sym.Pooling(data=in5b, kernel=(8, 8), stride=(1, 1), pool_type="avg", name="global_pool") + flatten = mx.sym.Flatten(data=pool, name="flatten") + fc1 = mx.sym.FullyConnected(data=flatten, num_hidden=num_classes, name='fc1', flatten=False) + softmax = mx.sym.SoftmaxOutput(data=fc1, name='softmax') + return softmax diff --git a/nnvm/tests/python/frontend/mxnet/model_zoo/mlp.py b/nnvm/tests/python/frontend/mxnet/model_zoo/mlp.py new file mode 100644 index 000000000000..922b208749bf --- /dev/null +++ b/nnvm/tests/python/frontend/mxnet/model_zoo/mlp.py @@ -0,0 +1,40 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +a simple multilayer perceptron +""" +import mxnet as mx + +def get_symbol(num_classes=10, **kwargs): + data = mx.symbol.Variable('data') + data = mx.sym.Flatten(data=data) + try: + fc1 = mx.symbol.FullyConnected(data = data, name='fc1', num_hidden=128, flatten=False) + act1 = mx.symbol.Activation(data = fc1, name='relu1', act_type="relu") + fc2 = mx.symbol.FullyConnected(data = act1, name = 'fc2', num_hidden = 64, flatten=False) + act2 = mx.symbol.Activation(data = fc2, name='relu2', act_type="relu") + fc3 = mx.symbol.FullyConnected(data = act2, name='fc3', num_hidden=num_classes, flatten=False) + mlp = mx.symbol.softmax(data = fc3, name = 'softmax') + except: + fc1 = mx.symbol.FullyConnected(data = data, name='fc1', num_hidden=128) + act1 = mx.symbol.Activation(data = fc1, name='relu1', act_type="relu") + fc2 = mx.symbol.FullyConnected(data = act1, name = 'fc2', num_hidden = 64) + act2 = mx.symbol.Activation(data = fc2, name='relu2', act_type="relu") + fc3 = mx.symbol.FullyConnected(data = act2, name='fc3', num_hidden=num_classes) + mlp = mx.symbol.softmax(data = fc3, name = 'softmax') + return mlp diff --git a/nnvm/tests/python/frontend/mxnet/model_zoo/resnet.py b/nnvm/tests/python/frontend/mxnet/model_zoo/resnet.py new file mode 100644 index 000000000000..3f9a870d31c0 --- /dev/null +++ b/nnvm/tests/python/frontend/mxnet/model_zoo/resnet.py @@ -0,0 +1,199 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +''' +Adapted from https://github.com/tornadomeet/ResNet/blob/master/symbol_resnet.py +Original author Wei Wu + +Implemented the following paper: + +Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun. "Identity Mappings in Deep Residual Networks" +''' +import mxnet as mx +import numpy as np + +def residual_unit(data, num_filter, stride, dim_match, name, bottle_neck=True, bn_mom=0.9, workspace=256, memonger=False): + """Return ResNet Unit symbol for building ResNet + Parameters + ---------- + data : str + Input data + num_filter : int + Number of output channels + bnf : int + Bottle neck channels factor with regard to num_filter + stride : tuple + Stride used in convolution + dim_match : Boolean + True means channel number between input and output is the same, otherwise means differ + name : str + Base name of the operators + workspace : int + Workspace used in convolution operator + """ + if bottle_neck: + bn1 = mx.sym.BatchNorm(data=data, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn1') + act1 = mx.sym.Activation(data=bn1, act_type='relu', name=name + '_relu1') + conv1 = mx.sym.Convolution(data=act1, num_filter=int(num_filter*0.25), kernel=(1,1), stride=stride, pad=(0,0), + no_bias=True, workspace=workspace, name=name + '_conv1') + bn2 = mx.sym.BatchNorm(data=conv1, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn2') + act2 = mx.sym.Activation(data=bn2, act_type='relu', name=name + '_relu2') + conv2 = mx.sym.Convolution(data=act2, num_filter=int(num_filter*0.25), kernel=(3,3), stride=(1,1), pad=(1,1), + no_bias=True, workspace=workspace, name=name + '_conv2') + bn3 = mx.sym.BatchNorm(data=conv2, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn3') + act3 = mx.sym.Activation(data=bn3, act_type='relu', name=name + '_relu3') + conv3 = mx.sym.Convolution(data=act3, num_filter=num_filter, kernel=(1,1), stride=(1,1), pad=(0,0), no_bias=True, + workspace=workspace, name=name + '_conv3') + if dim_match: + shortcut = data + else: + shortcut = mx.sym.Convolution(data=act1, num_filter=num_filter, kernel=(1,1), stride=stride, no_bias=True, + workspace=workspace, name=name+'_sc') + if memonger: + shortcut._set_attr(mirror_stage='True') + return conv3 + shortcut + else: + bn1 = mx.sym.BatchNorm(data=data, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name + '_bn1') + act1 = mx.sym.Activation(data=bn1, act_type='relu', name=name + '_relu1') + conv1 = mx.sym.Convolution(data=act1, num_filter=num_filter, kernel=(3,3), stride=stride, pad=(1,1), + no_bias=True, workspace=workspace, name=name + '_conv1') + bn2 = mx.sym.BatchNorm(data=conv1, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name + '_bn2') + act2 = mx.sym.Activation(data=bn2, act_type='relu', name=name + '_relu2') + conv2 = mx.sym.Convolution(data=act2, num_filter=num_filter, kernel=(3,3), stride=(1,1), pad=(1,1), + no_bias=True, workspace=workspace, name=name + '_conv2') + if dim_match: + shortcut = data + else: + shortcut = mx.sym.Convolution(data=act1, num_filter=num_filter, kernel=(1,1), stride=stride, no_bias=True, + workspace=workspace, name=name+'_sc') + if memonger: + shortcut._set_attr(mirror_stage='True') + return conv2 + shortcut + +def resnet(units, num_stages, filter_list, num_classes, image_shape, bottle_neck=True, bn_mom=0.9, workspace=256, dtype='float32', memonger=False): + """Return ResNet symbol of + Parameters + ---------- + units : list + Number of units in each stage + num_stages : int + Number of stage + filter_list : list + Channel size of each stage + num_classes : int + Ouput size of symbol + dataset : str + Dataset type, only cifar10 and imagenet supports + workspace : int + Workspace used in convolution operator + dtype : str + Precision (float32 or float16) + """ + num_unit = len(units) + assert(num_unit == num_stages) + data = mx.sym.Variable(name='data') + if dtype == 'float32': + # data = mx.sym.identity(data=data, name='id') + data = data + else: + if dtype == 'float16': + data = mx.sym.Cast(data=data, dtype=np.float16) + data = mx.sym.BatchNorm(data=data, fix_gamma=True, eps=2e-5, momentum=bn_mom, name='bn_data') + (nchannel, height, width) = image_shape + if height <= 32: # such as cifar10 + body = mx.sym.Convolution(data=data, num_filter=filter_list[0], kernel=(3, 3), stride=(1,1), pad=(1, 1), + no_bias=True, name="conv0", workspace=workspace) + else: # often expected to be 224 such as imagenet + body = mx.sym.Convolution(data=data, num_filter=filter_list[0], kernel=(7, 7), stride=(2,2), pad=(3, 3), + no_bias=True, name="conv0", workspace=workspace) + body = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name='bn0') + body = mx.sym.Activation(data=body, act_type='relu', name='relu0') + body = mx.sym.Pooling(data=body, kernel=(3, 3), stride=(2,2), pad=(1,1), pool_type='max') + + for i in range(num_stages): + body = residual_unit(body, filter_list[i+1], (1 if i==0 else 2, 1 if i==0 else 2), False, + name='stage%d_unit%d' % (i + 1, 1), bottle_neck=bottle_neck, workspace=workspace, + memonger=memonger) + for j in range(units[i]-1): + body = residual_unit(body, filter_list[i+1], (1,1), True, name='stage%d_unit%d' % (i + 1, j + 2), + bottle_neck=bottle_neck, workspace=workspace, memonger=memonger) + bn1 = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name='bn1') + relu1 = mx.sym.Activation(data=bn1, act_type='relu', name='relu1') + # Although kernel is not used here when global_pool=True, we should put one + pool1 = mx.sym.Pooling(data=relu1, global_pool=True, kernel=(7, 7), pool_type='avg', name='pool1') + flat = mx.sym.Flatten(data=pool1) + try: + fc1 = mx.sym.FullyConnected(data=flat, num_hidden=num_classes, name='fc1', flatten=False) + except: + fc1 = mx.sym.FullyConnected(data=flat, num_hidden=num_classes, name='fc1') + if dtype == 'float16': + fc1 = mx.sym.Cast(data=fc1, dtype=np.float32) + return mx.sym.softmax(data=fc1, name='softmax') + +def get_symbol(num_classes, num_layers, image_shape, conv_workspace=256, dtype='float32', **kwargs): + """ + Adapted from https://github.com/tornadomeet/ResNet/blob/master/train_resnet.py + Original author Wei Wu + """ + image_shape = [int(l) for l in image_shape.split(',')] + (nchannel, height, width) = image_shape + if height <= 28: + num_stages = 3 + if (num_layers-2) % 9 == 0 and num_layers >= 164: + per_unit = [(num_layers-2)//9] + filter_list = [16, 64, 128, 256] + bottle_neck = True + elif (num_layers-2) % 6 == 0 and num_layers < 164: + per_unit = [(num_layers-2)//6] + filter_list = [16, 16, 32, 64] + bottle_neck = False + else: + raise ValueError("no experiments done on num_layers {}, you can do it yourself".format(num_layers)) + units = per_unit * num_stages + else: + if num_layers >= 50: + filter_list = [64, 256, 512, 1024, 2048] + bottle_neck = True + else: + filter_list = [64, 64, 128, 256, 512] + bottle_neck = False + num_stages = 4 + if num_layers == 18: + units = [2, 2, 2, 2] + elif num_layers == 34: + units = [3, 4, 6, 3] + elif num_layers == 50: + units = [3, 4, 6, 3] + elif num_layers == 101: + units = [3, 4, 23, 3] + elif num_layers == 152: + units = [3, 8, 36, 3] + elif num_layers == 200: + units = [3, 24, 36, 3] + elif num_layers == 269: + units = [3, 30, 48, 8] + else: + raise ValueError("no experiments done on num_layers {}, you can do it yourself".format(num_layers)) + + return resnet(units = units, + num_stages = num_stages, + filter_list = filter_list, + num_classes = num_classes, + image_shape = image_shape, + bottle_neck = bottle_neck, + workspace = conv_workspace, + dtype = dtype) diff --git a/nnvm/tests/python/frontend/mxnet/model_zoo/squeezenet.py b/nnvm/tests/python/frontend/mxnet/model_zoo/squeezenet.py new file mode 100644 index 000000000000..093da51a78a7 --- /dev/null +++ b/nnvm/tests/python/frontend/mxnet/model_zoo/squeezenet.py @@ -0,0 +1,92 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +Symbol of SqueezeNet + +Reference: +Iandola, Forrest N., et al. +"Squeezenet: Alexnet-level accuracy with 50x fewer parameters and< 0.5 mb model size." (2016). +""" + +import mxnet as mx + +# Helpers +def _make_fire(net, squeeze_channels, expand1x1_channels, expand3x3_channels): + net = _make_fire_conv(net, squeeze_channels, 1, 0) + + left = _make_fire_conv(net, expand1x1_channels, 1, 0) + right = _make_fire_conv(net, expand3x3_channels, 3, 1) + # NOTE : Assume NCHW layout here + net = mx.sym.concat(left, right, dim=1) + + return net + +def _make_fire_conv(net, channels, kernel_size, padding=0): + net = mx.sym.Convolution(net, num_filter=channels, kernel=(kernel_size, kernel_size), + pad=(padding, padding)) + net = mx.sym.Activation(net, act_type='relu') + return net + +# Net +def get_symbol(num_classes=1000, version='1.0', **kwargs): + """Get symbol of SqueezeNet + + Parameters + ---------- + num_classes: int + The number of classification results + + version : str, optional + "1.0" or "1.1" of SqueezeNet + """ + assert version in ['1.0', '1.1'], ("Unsupported SqueezeNet version {version}:" + "1.0 or 1.1 expected".format(version=version)) + net = mx.sym.Variable("data") + if version == '1.0': + net = mx.sym.Convolution(net, num_filter=96, kernel=(7, 7), stride=(2, 2), pad=(3, 3)) + net = mx.sym.Activation(net, act_type='relu') + net = mx.sym.Pooling(data=net, kernel=(3, 3), pool_type='max', stride=(2, 2)) + net = _make_fire(net, 16, 64, 64) + net = _make_fire(net, 16, 64, 64) + net = _make_fire(net, 32, 128, 128) + net = mx.sym.Pooling(data=net, kernel=(3, 3), pool_type='max', stride=(2, 2)) + net = _make_fire(net, 32, 128, 128) + net = _make_fire(net, 48, 192, 192) + net = _make_fire(net, 48, 192, 192) + net = _make_fire(net, 64, 256, 256) + net = mx.sym.Pooling(data=net, kernel=(3, 3), pool_type='max', stride=(2, 2)) + net = _make_fire(net, 64, 256, 256) + else: + net = mx.sym.Convolution(net, num_filter=64, kernel=(3, 3), stride=(2, 2), pad=(1, 1)) + net = mx.sym.Activation(net, act_type='relu') + net = mx.sym.Pooling(data=net, kernel=(3, 3), pool_type='max', stride=(2, 2)) + net = _make_fire(net, 16, 64, 64) + net = _make_fire(net, 16, 64, 64) + net = mx.sym.Pooling(data=net, kernel=(3, 3), pool_type='max', stride=(2, 2)) + net = _make_fire(net, 32, 128, 128) + net = _make_fire(net, 32, 128, 128) + net = mx.sym.Pooling(data=net, kernel=(3, 3), pool_type='max', stride=(2, 2)) + net = _make_fire(net, 48, 192, 192) + net = _make_fire(net, 48, 192, 192) + net = _make_fire(net, 64, 256, 256) + net = _make_fire(net, 64, 256, 256) + net = mx.sym.Dropout(net, p=0.5) + net = mx.sym.Convolution(net, num_filter=num_classes, kernel=(1, 1)) + net = mx.sym.Activation(net, act_type='relu') + net = mx.sym.Pooling(data=net, global_pool=True, kernel=(13, 13), pool_type='avg') + net = mx.sym.flatten(net) + return mx.sym.softmax(net) diff --git a/nnvm/tests/python/frontend/mxnet/model_zoo/vgg.py b/nnvm/tests/python/frontend/mxnet/model_zoo/vgg.py new file mode 100644 index 000000000000..68215bb80aaa --- /dev/null +++ b/nnvm/tests/python/frontend/mxnet/model_zoo/vgg.py @@ -0,0 +1,85 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""References: + +Simonyan, Karen, and Andrew Zisserman. "Very deep convolutional networks for +large-scale image recognition." arXiv preprint arXiv:1409.1556 (2014). +""" + +import mxnet as mx +import numpy as np + +def get_feature(internel_layer, layers, filters, batch_norm = False, **kwargs): + for i, num in enumerate(layers): + for j in range(num): + internel_layer = mx.sym.Convolution(data = internel_layer, kernel=(3, 3), pad=(1, 1), num_filter=filters[i], name="conv%s_%s" %(i + 1, j + 1)) + if batch_norm: + internel_layer = mx.symbol.BatchNorm(data=internel_layer, name="bn%s_%s" %(i + 1, j + 1)) + internel_layer = mx.sym.Activation(data=internel_layer, act_type="relu", name="relu%s_%s" %(i + 1, j + 1)) + internel_layer = mx.sym.Pooling(data=internel_layer, pool_type="max", kernel=(2, 2), stride=(2,2), name="pool%s" %(i + 1)) + return internel_layer + +def get_classifier(input_data, num_classes, **kwargs): + flatten = mx.sym.Flatten(data=input_data, name="flatten") + try: + fc6 = mx.sym.FullyConnected(data=flatten, num_hidden=4096, name="fc6", flatten=False) + relu6 = mx.sym.Activation(data=fc6, act_type="relu", name="relu6") + drop6 = mx.sym.Dropout(data=relu6, p=0.5, name="drop6") + fc7 = mx.sym.FullyConnected(data=drop6, num_hidden=4096, name="fc7", flatten=False) + relu7 = mx.sym.Activation(data=fc7, act_type="relu", name="relu7") + drop7 = mx.sym.Dropout(data=relu7, p=0.5, name="drop7") + fc8 = mx.sym.FullyConnected(data=drop7, num_hidden=num_classes, name="fc8", flatten=False) + except: + fc6 = mx.sym.FullyConnected(data=flatten, num_hidden=4096, name="fc6") + relu6 = mx.sym.Activation(data=fc6, act_type="relu", name="relu6") + drop6 = mx.sym.Dropout(data=relu6, p=0.5, name="drop6") + fc7 = mx.sym.FullyConnected(data=drop6, num_hidden=4096, name="fc7") + relu7 = mx.sym.Activation(data=fc7, act_type="relu", name="relu7") + drop7 = mx.sym.Dropout(data=relu7, p=0.5, name="drop7") + fc8 = mx.sym.FullyConnected(data=drop7, num_hidden=num_classes, name="fc8") + return fc8 + +def get_symbol(num_classes, num_layers=11, batch_norm=False, dtype='float32', **kwargs): + """ + Parameters + ---------- + num_classes : int, default 1000 + Number of classification classes. + num_layers : int + Number of layers for the variant of densenet. Options are 11, 13, 16, 19. + batch_norm : bool, default False + Use batch normalization. + dtype: str, float32 or float16 + Data precision. + """ + vgg_spec = {11: ([1, 1, 2, 2, 2], [64, 128, 256, 512, 512]), + 13: ([2, 2, 2, 2, 2], [64, 128, 256, 512, 512]), + 16: ([2, 2, 3, 3, 3], [64, 128, 256, 512, 512]), + 19: ([2, 2, 4, 4, 4], [64, 128, 256, 512, 512])} + if num_layers not in vgg_spec: + raise ValueError("Invalide num_layers {}. Possible choices are 11,13,16,19.".format(num_layers)) + layers, filters = vgg_spec[num_layers] + data = mx.sym.Variable(name="data") + if dtype == 'float16': + data = mx.sym.Cast(data=data, dtype=np.float16) + feature = get_feature(data, layers, filters, batch_norm) + classifier = get_classifier(feature, num_classes) + if dtype == 'float16': + classifier = mx.sym.Cast(data=classifier, dtype=np.float32) + symbol = mx.sym.softmax(data=classifier, name='softmax') + return symbol diff --git a/nnvm/tests/python/frontend/mxnet/test_forward.py b/nnvm/tests/python/frontend/mxnet/test_forward.py new file mode 100644 index 000000000000..dd315c6f87b0 --- /dev/null +++ b/nnvm/tests/python/frontend/mxnet/test_forward.py @@ -0,0 +1,333 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import numpy as np + +import topi +import tvm +from tvm.contrib import graph_runtime +import nnvm.symbol as sym +import nnvm.compiler +from nnvm.testing.config import ctx_list +from nnvm import frontend +import mxnet as mx +from mxnet import gluon +from mxnet.gluon.model_zoo import vision +import model_zoo + + +def verify_mxnet_frontend_impl(mx_symbol, data_shape=(1, 3, 224, 224), out_shape=(1, 1000), + gluon_impl=False, name=None, dtype='float32'): + """Use name different from test to avoid pytest picking it up""" + if gluon_impl: + def get_gluon_output(name, x): + net = vision.get_model(name) + net.collect_params().initialize(mx.init.Xavier()) + net_sym = gluon.nn.SymbolBlock(outputs=net(mx.sym.var('data')), + inputs=mx.sym.var('data'), + params=net.collect_params()) + out = net_sym(mx.nd.array(x.astype(dtype))).asnumpy() + return out, net_sym + else: + def get_mxnet_output(symbol, x, dtype='float32'): + from collections import namedtuple + Batch = namedtuple('Batch', ['data']) + mod = mx.mod.Module(symbol, label_names=None) + mod.bind(data_shapes=[('data', x.shape)], for_training=False) + mod.init_params() + mod.forward(Batch([mx.nd.array(x.astype(dtype))])) + out = mod.get_outputs()[0].asnumpy() + args, auxs = mod.get_params() + return out, args, auxs + + def get_tvm_output(symbol, x, args, auxs, target, ctx, dtype='float32'): + if gluon_impl: + new_sym, params = frontend.from_mxnet(symbol) + else: + new_sym, params = frontend.from_mxnet(symbol, args, auxs) + + dshape = x.shape + shape_dict = {'data': dshape} + with nnvm.compiler.build_config(opt_level=3): + graph, lib, params = nnvm.compiler.build(new_sym, target, shape_dict, params=params) + m = graph_runtime.create(graph, lib, ctx) + # set inputs + m.set_input("data", tvm.nd.array(x.astype(dtype))) + m.set_input(**params) + m.run() + # get outputs + out = m.get_output(0, tvm.nd.empty(out_shape, dtype)) + return out.asnumpy() + + # random input + x = np.random.uniform(size=data_shape) + if gluon_impl: + gluon_out, gluon_sym = get_gluon_output(name, x) + for target, ctx in ctx_list(): + tvm_out = get_tvm_output(gluon_sym, x, None, None, target, ctx, dtype) + tvm.testing.assert_allclose(gluon_out, tvm_out, rtol=1e-5, atol=1e-5) + else: + mx_out, args, auxs = get_mxnet_output(mx_symbol, x, dtype) + assert "data" not in args + for target, ctx in ctx_list(): + tvm_out = get_tvm_output(mx_symbol, x, args, auxs, target, ctx, dtype) + tvm.testing.assert_allclose(mx_out, tvm_out, rtol=1e-5, atol=1e-5) + +def test_forward_mlp(): + mlp = model_zoo.mx_mlp + verify_mxnet_frontend_impl(mlp) + +def test_forward_vgg(): + for n in [11]: + mx_sym = model_zoo.mx_vgg[n] + verify_mxnet_frontend_impl(mx_sym) + +def test_forward_resnet(): + for n in [18]: + mx_sym = model_zoo.mx_resnet[n] + verify_mxnet_frontend_impl(mx_sym) + +def test_forward_elu(): + data = mx.sym.var('data') + data = mx.sym.concat(data, -data, dim=1) # negative part explicitly + mx_sym = mx.sym.LeakyReLU(data, act_type='elu') + verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 6, 100, 100)) + +def test_forward_rrelu(): + data = mx.sym.var('data') + data = mx.sym.concat(data, -data, dim=1) # negative part explicitly + mx_sym = mx.sym.LeakyReLU(data, act_type='rrelu', lower_bound=0.3, upper_bound=0.7) + verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 6, 100, 100)) + +def test_forward_prelu(): + data = mx.sym.var('data') + data = mx.sym.concat(data, -data, dim=1) # negative part explicitly + mx_sym = mx.sym.LeakyReLU(data, act_type='prelu') + verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 6, 100, 100)) + +def test_forward_softrelu(): + data = mx.sym.var('data') + data = mx.sym.concat(data, -data, dim=1) # negative part explicitly + mx_sym = mx.sym.Activation(data, act_type='softrelu') + verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 6, 100, 100)) + +def test_forward_fc_flatten(): + # test flatten=True option in mxnet 0.11.1 + data = mx.sym.var('data') + try: + mx_sym = mx.sym.FullyConnected(data, num_hidden=100, flatten=True) + verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 100)) + mx_sym = mx.sym.FullyConnected(mx.sym.Flatten(data), num_hidden=100, flatten=False) + verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 100)) + except: + pass + +def test_forward_clip(): + data = mx.sym.var('data') + data = mx.sym.concat(data, -data, dim=1) # negative part explicitly + mx_sym = mx.sym.clip(data, a_min=0, a_max=1) + verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 6, 100, 100)) + +def test_forward_split(): + data = mx.sym.var('data') + mx_sym = mx.sym.split(data, axis=1, num_outputs=4, squeeze_axis=False) + verify_mxnet_frontend_impl(mx_sym, (1, 4, 2, 1), (1, 1, 2, 1)) + +def test_forward_split_squeeze(): + data = mx.sym.var('data') + mx_sym = mx.sym.split(data, axis=1, num_outputs=4, squeeze_axis=True) + verify_mxnet_frontend_impl(mx_sym, (1, 4, 2, 1), (1, 2, 1)) + +def test_forward_expand_dims(): + data = mx.sym.var('data') + mx_sym = mx.sym.expand_dims(data, axis=1) + verify_mxnet_frontend_impl(mx_sym, (2, 3, 4), (2, 1, 3, 4)) + +def test_forward_pooling(): + data = mx.sym.var('data') + mx_sym = mx.sym.Pooling(data, kernel=(3, 3), pad=(1, 1), pool_type='avg') + verify_mxnet_frontend_impl(mx_sym, (1, 20, 8, 8), (1, 20, 8, 8)) + + mx_sym = mx.sym.Pooling(data, kernel=(3, 3), pad=(1, 1), pool_type='max') + verify_mxnet_frontend_impl(mx_sym, (1, 20, 8, 8), (1, 20, 8, 8)) + +def test_forward_lrn(): + data = mx.sym.var('data') + mx_sym = mx.sym.LRN(data, alpha=2, beta=2, knorm=1, nsize=5) + verify_mxnet_frontend_impl(mx_sym, (1, 10, 24, 24), (1, 10, 24, 24)) + +def test_forward_ones(): + data = mx.sym.var('data') + ones = mx.sym.ones(shape=(2, 3, 4), dtype='float32') + mx_sym = mx.sym.elemwise_add(data, ones) + verify_mxnet_frontend_impl(mx_sym, (2, 3, 4), (2, 3, 4)) + +def test_forward_zeros(): + data = mx.sym.var('data') + zeros = mx.sym.zeros(shape=(2, 3, 4), dtype='float32') + mx_sym = mx.sym.elemwise_add(data, zeros) + verify_mxnet_frontend_impl(mx_sym, (2, 3, 4), (2, 3, 4)) + +def test_forward_ones_like(): + data = mx.sym.var('data') + mx_sym = mx.sym.ones_like(data, dtype='float32') + verify_mxnet_frontend_impl(mx_sym, (2, 3, 4), (2, 3, 4)) + +def test_forward_zeros_like(): + data = mx.sym.var('data') + mx_sym = mx.sym.zeros_like(data, dtype='float32') + verify_mxnet_frontend_impl(mx_sym, (2, 3, 4), (2, 3, 4)) + +def test_forward_argmax(): + data = mx.sym.var('data') + mx_sym = mx.sym.argmax(data, axis=1) + verify_mxnet_frontend_impl(mx_sym, (5, 3), (5,)) + +def test_forward_argmin(): + data = mx.sym.var('data') + mx_sym = mx.sym.argmin(data, axis=0) + verify_mxnet_frontend_impl(mx_sym, (5, 4), (4,)) + +def test_forward_where(): + cond = mx.sym.var('cond') + x = mx.sym.var('x') + y = mx.sym.var('y') + dshape = (2, 2) + dtype = 'float32' + mx_sym = mx.sym.where(cond, x, y) + np_cond = np.array([[0, 1], [-1, 0]]).astype(dtype) + np_x = np.random.uniform(size=dshape).astype(dtype) + np_y = np.random.uniform(size=dshape).astype(dtype) + mx_cond = mx.nd.array(np_cond) + mx_x = mx.nd.array(np_x) + mx_y = mx.nd.array(np_y) + mod = mx.mod.Module(mx_sym, label_names=None, data_names=['cond', 'x', 'y']) + mod.bind(data_shapes=[('cond', dshape), ('x', dshape), ('y', dshape)], for_training=False) + mod.init_params() + args, auxs = mod.get_params() + mx_out = mx.nd.where(mx_cond, mx_x, mx_y).asnumpy() + out_shape = dshape + new_sym, params = frontend.from_mxnet(mx_sym, args, auxs) + shape_dict = {'cond': dshape, 'x': dshape, 'y': dshape} + for target, ctx in ctx_list(): + with nnvm.compiler.build_config(opt_level=3): + graph, lib, params = nnvm.compiler.build(new_sym, target, shape_dict, params=params) + m = graph_runtime.create(graph, lib, ctx) + # set inputs + m.set_input("cond", tvm.nd.array(np_cond)) + m.set_input("x", tvm.nd.array(np_x)) + m.set_input("y", tvm.nd.array(np_y)) + m.set_input(**params) + m.run() + # get outputs + tvm_out = m.get_output(0, tvm.nd.empty(out_shape, dtype)).asnumpy() + tvm.testing.assert_allclose(mx_out, tvm_out, rtol=1e-5, atol=1e-5) + +def test_forward_slice(): + data = mx.sym.var('data') + mx_sym = mx.sym.slice(data, begin=(0, 1), end=(2, 4)) + verify_mxnet_frontend_impl(mx_sym, (3, 4), (2, 3)) + mx_sym = mx.sym.slice(data, begin=(-1, 1), end=(-3, 4), step=(-1, 2)) + verify_mxnet_frontend_impl(mx_sym, (3, 4), (2, 2)) + +def test_forward_maximum(): + a = mx.sym.var('a') + b = mx.sym.var('b') + dshape = (10, 20) + dtype = 'float32' + mx_sym = mx.sym._internal._maximum(a, b) + np_a = np.random.uniform(size=dshape).astype(dtype) + np_b = np.random.uniform(size=dshape).astype(dtype) + mx_a = mx.nd.array(np_a) + mx_b = mx.nd.array(np_b) + mod = mx.mod.Module(mx_sym, label_names=None, data_names=['a', 'b']) + mod.bind(data_shapes=[('a', dshape), ('b', dshape)], for_training=False) + mod.init_params() + args, auxs = mod.get_params() + mx_out = mx.nd._internal._maximum(mx_a, mx_b).asnumpy() + out_shape = dshape + new_sym, params = frontend.from_mxnet(mx_sym, args, auxs) + shape_dict = {'a': dshape, 'b': dshape} + for target, ctx in ctx_list(): + with nnvm.compiler.build_config(opt_level=3): + graph, lib, params = nnvm.compiler.build(new_sym, target, shape_dict, params=params) + m = graph_runtime.create(graph, lib, ctx) + # set inputs + m.set_input("a", tvm.nd.array(np_a)) + m.set_input("b", tvm.nd.array(np_b)) + m.set_input(**params) + m.run() + # get outputs + tvm_out = m.get_output(0, tvm.nd.empty(out_shape, dtype)).asnumpy() + tvm.testing.assert_allclose(mx_out, tvm_out, rtol=1e-5, atol=1e-5) + +def test_forward_minimum(): + a = mx.sym.var('a') + b = mx.sym.var('b') + dshape = (10, 20) + dtype = 'float32' + mx_sym = mx.sym._internal._minimum(a, b) + np_a = np.random.uniform(size=dshape).astype(dtype) + np_b = np.random.uniform(size=dshape).astype(dtype) + mx_a = mx.nd.array(np_a) + mx_b = mx.nd.array(np_b) + mod = mx.mod.Module(mx_sym, label_names=None, data_names=['a', 'b']) + mod.bind(data_shapes=[('a', dshape), ('b', dshape)], for_training=False) + mod.init_params() + args, auxs = mod.get_params() + mx_out = mx.nd._internal._minimum(mx_a, mx_b).asnumpy() + out_shape = dshape + new_sym, params = frontend.from_mxnet(mx_sym, args, auxs) + shape_dict = {'a': dshape, 'b': dshape} + for target, ctx in ctx_list(): + with nnvm.compiler.build_config(opt_level=3): + graph, lib, params = nnvm.compiler.build(new_sym, target, shape_dict, params=params) + m = graph_runtime.create(graph, lib, ctx) + # set inputs + m.set_input("a", tvm.nd.array(np_a)) + m.set_input("b", tvm.nd.array(np_b)) + m.set_input(**params) + m.run() + # get outputs + tvm_out = m.get_output(0, tvm.nd.empty(out_shape, dtype)).asnumpy() + tvm.testing.assert_allclose(mx_out, tvm_out, rtol=1e-5, atol=1e-5) + + +if __name__ == '__main__': + test_forward_mlp() + test_forward_vgg() + test_forward_resnet() + test_forward_elu() + test_forward_rrelu() + test_forward_prelu() + test_forward_softrelu() + test_forward_fc_flatten() + test_forward_clip() + test_forward_split() + test_forward_split_squeeze() + test_forward_expand_dims() + test_forward_pooling() + test_forward_lrn() + test_forward_ones() + test_forward_zeros() + test_forward_ones_like() + test_forward_zeros_like() + test_forward_argmax() + test_forward_argmin() + test_forward_where() + test_forward_slice() + test_forward_maximum() + test_forward_minimum() diff --git a/nnvm/tests/python/frontend/mxnet/test_graph.py b/nnvm/tests/python/frontend/mxnet/test_graph.py new file mode 100644 index 000000000000..1bbd0a97e8e1 --- /dev/null +++ b/nnvm/tests/python/frontend/mxnet/test_graph.py @@ -0,0 +1,95 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import mxnet as mx +import nnvm +from nnvm.compiler import graph_util, graph_attr +import model_zoo + +def compare_graph(sym1, sym2, ishape=(2, 3, 224, 224)): + g1 = nnvm.graph.create(sym1) + g2 = nnvm.graph.create(sym2) + graph_attr.set_shape_inputs(g1, {'data':ishape}) + graph_attr.set_shape_inputs(g2, {'data':ishape}) + g1 = g1.apply("InferShape").apply("SimplifyInference") + g2 = g2.apply("InferShape").apply("SimplifyInference") + graph_util.check_graph_equal(g1, g2) + +def test_mlp(): + mx_sym = model_zoo.mx_mlp + from_mx_sym, _ = nnvm.frontend.from_mxnet(mx_sym) + nnvm_sym = model_zoo.nnvm_mlp + compare_graph(from_mx_sym, nnvm_sym) + +def test_vgg(): + for n in [11, 13, 16, 19]: + mx_sym = model_zoo.mx_vgg[n] + from_mx_sym, _ = nnvm.frontend.from_mxnet(mx_sym) + nnvm_sym = model_zoo.nnvm_vgg[n] + compare_graph(from_mx_sym, nnvm_sym) + +def test_resnet(): + for n in [18, 34, 50, 101]: + mx_sym = model_zoo.mx_resnet[n] + from_mx_sym, _ = nnvm.frontend.from_mxnet(mx_sym) + nnvm_sym = model_zoo.nnvm_resnet[n] + compare_graph(from_mx_sym, nnvm_sym) + +def test_squeezenet(): + for version in ['1.0', '1.1']: + mx_sym = model_zoo.mx_squeezenet[version] + from_mx_sym, _ = nnvm.frontend.from_mxnet(mx_sym) + nnvm_sym = model_zoo.nnvm_squeezenet[version] + compare_graph(from_mx_sym, nnvm_sym) + +def test_inception_v3(): + mx_sym = model_zoo.mx_inception_v3 + from_mx_sym, _ = nnvm.frontend.from_mxnet(mx_sym) + nnvm_sym = model_zoo.nnvm_inception_v3 + compare_graph(from_mx_sym, nnvm_sym, ishape=(2, 3, 299, 299)) + +def test_dqn(): + mx_sym = model_zoo.mx_dqn + from_mx_sym, _ = nnvm.frontend.from_mxnet(mx_sym) + nnvm_sym = model_zoo.nnvm_dqn + compare_graph(from_mx_sym, nnvm_sym, ishape=(2, 4, 84, 84)) + +def test_dcgan(): + mx_sym = model_zoo.mx_dcgan + from_mx_sym, _ = nnvm.frontend.from_mxnet(mx_sym) + nnvm_sym = model_zoo.nnvm_dcgan + compare_graph(from_mx_sym, nnvm_sym, ishape=(2, 100)) + +def test_multi_outputs(): + def compose(F, **kwargs): + x = F.sym.Variable('x') + y = F.sym.Variable('y') + z = F.sym.split(x, **kwargs) + return F.sym.broadcast_sub(F.sym.broadcast_add(z[0], z[2]), y) + mx_sym = compose(mx, num_outputs=3, axis=1) + from_mx_sym, _ = nnvm.frontend.from_mxnet(mx_sym) + nnvm_sym = compose(nnvm, indices_or_sections=3, axis=1) + compare_graph(from_mx_sym, nnvm_sym) + +if __name__ == '__main__': + test_mlp() + test_vgg() + test_resnet() + test_multi_outputs() + test_dqn() + test_dcgan() + test_squeezenet() + test_inception_v3() diff --git a/nnvm/tests/python/frontend/onnx/model_zoo/__init__.py b/nnvm/tests/python/frontend/onnx/model_zoo/__init__.py new file mode 100644 index 000000000000..f5eb604acfd7 --- /dev/null +++ b/nnvm/tests/python/frontend/onnx/model_zoo/__init__.py @@ -0,0 +1,40 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Store for onnx examples and common models.""" +from __future__ import absolute_import as _abs +import os +import logging +from .super_resolution import get_super_resolution +from tvm.contrib.download import download_testdata + + +URLS = { + 'super_resolution.onnx': 'https://gist.github.com/zhreshold/bcda4716699ac97ea44f791c24310193/raw/93672b029103648953c4e5ad3ac3aadf346a4cdc/super_resolution_0.2.onnx', + 'squeezenet1_1.onnx': 'https://gist.github.com/zhreshold/bcda4716699ac97ea44f791c24310193/raw/93672b029103648953c4e5ad3ac3aadf346a4cdc/squeezenet1_1_0.2.onnx', + 'lenet.onnx': 'https://gist.github.com/zhreshold/bcda4716699ac97ea44f791c24310193/raw/93672b029103648953c4e5ad3ac3aadf346a4cdc/lenet_0.2.onnx', + 'resnet18_1_0.onnx': 'https://gist.github.com/zhreshold/bcda4716699ac97ea44f791c24310193/raw/b385b1b242dc89a35dd808235b885ed8a19aedc1/resnet18_1.0.onnx'} + +# download and add paths +for k, v in URLS.items(): + name = k.split('.')[0] + relpath = os.path.join('onnx', k) + abspath = download_testdata(v, relpath, module='onnx') + locals()[name] = abspath + +# symbol for graph comparison +super_resolution_sym = get_super_resolution() diff --git a/nnvm/tests/python/frontend/onnx/model_zoo/squeezenet.py b/nnvm/tests/python/frontend/onnx/model_zoo/squeezenet.py new file mode 100644 index 000000000000..2de2d1075494 --- /dev/null +++ b/nnvm/tests/python/frontend/onnx/model_zoo/squeezenet.py @@ -0,0 +1,118 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# coding: utf-8 +# pylint: disable=unused-argument + +""" +Symbol of SqueezeNet + +Reference: +Iandola, Forrest N., et al. +"Squeezenet: Alexnet-level accuracy with 50x fewer parameters and< 0.5 mb model size." (2016). +""" + +from nnvm import symbol as sym +from nnvm.testing.utils import create_workload + +# Helpers +def _make_fire(net, squeeze_channels, expand1x1_channels, expand3x3_channels): + net = _make_fire_conv(net, squeeze_channels, 1, 0) + + left = _make_fire_conv(net, expand1x1_channels, 1, 0) + right = _make_fire_conv(net, expand3x3_channels, 3, 1) + # NOTE : Assume NCHW layout here + net = sym.concatenate(left, right, axis=1) + + return net + +def _make_fire_conv(net, channels, kernel_size, padding=0): + net = sym.conv2d(net, channels=channels, kernel_size=(kernel_size, kernel_size), + padding=(padding, padding)) + net = sym.relu(net) + return net + +# Net +def get_symbol(num_classes, version, **kwargs): + """Get symbol of SqueezeNet + + Parameters + ---------- + num_classes: int + The number of classification results + + version : str, optional + "1.0" or "1.1" of SqueezeNet + """ + assert version == '1.1', ("Unsupported SqueezeNet version {version}:" + "1.1 expected".format(version=version)) + net = sym.Variable("data") + + net = sym.conv2d(net, channels=64, kernel_size=(3, 3), strides=(2, 2)) + net = sym.relu(net) + net = sym.max_pool2d(net, pool_size=(3, 3), strides=(2, 2)) + net = _make_fire(net, 16, 64, 64) + net = _make_fire(net, 16, 64, 64) + net = sym.max_pool2d(net, pool_size=(3, 3), strides=(2, 2)) + net = _make_fire(net, 32, 128, 128) + net = _make_fire(net, 32, 128, 128) + net = sym.max_pool2d(net, pool_size=(3, 3), strides=(2, 2)) + net = _make_fire(net, 48, 192, 192) + net = _make_fire(net, 48, 192, 192) + net = _make_fire(net, 64, 256, 256) + net = _make_fire(net, 64, 256, 256) + + net = sym.dropout(net, rate=0.5) + net = sym.conv2d(net, channels=num_classes, kernel_size=(1, 1)) + net = sym.relu(net) + net = sym.global_avg_pool2d(net) + return sym.softmax(net, axis=1) + +def get_workload(batch_size=1, num_classes=1000, version='1.0', + image_shape=(3, 224, 224), dtype="float32", **kwargs): + """Get benchmark workload for SqueezeNet + + Parameters + ---------- + batch_size : int + The batch size used in the model + + num_classes : int, optional + Number of classes + + version : str, optional + "1.0" or "1.1" of SqueezeNet + + image_shape : tuple, optional + The input image shape + + dtype : str, optional + The data type + + kwargs : dict + Extra arguments + + Returns + ------- + net : nnvm.Symbol + The computational graph + + params : dict of str to NDArray + The parameters. + """ + net = get_symbol(num_classes=num_classes, version=version, **kwargs) + return create_workload(net, batch_size, image_shape, dtype) diff --git a/nnvm/tests/python/frontend/onnx/model_zoo/super_resolution.py b/nnvm/tests/python/frontend/onnx/model_zoo/super_resolution.py new file mode 100644 index 000000000000..a98478e58307 --- /dev/null +++ b/nnvm/tests/python/frontend/onnx/model_zoo/super_resolution.py @@ -0,0 +1,36 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""NNVM symbol corresponding to super_resolution.onnx example.""" +from nnvm import sym + +def get_super_resolution(): + factor = 3 + size = 224 + data = sym.Variable(name='9') + conv1 = sym.conv2d(data, channels=64, kernel_size=(5, 5), padding=(2, 2), use_bias=False) + relu1 = sym.relu(conv1 + sym.expand_dims(sym.Variable(name='2', shape=(64)), axis=1, num_newaxis=2)) + conv2 = sym.conv2d(relu1, channels=64, kernel_size=(3, 3), padding=(1, 1), use_bias=False) + relu2 = sym.relu(conv2 + sym.expand_dims(sym.Variable(name='4', shape=(64)), axis=1, num_newaxis=2)) + conv3 = sym.conv2d(relu2, channels=32, kernel_size=(3, 3), padding=(1, 1), use_bias=False) + relu3 = sym.relu(conv3 + sym.expand_dims(sym.Variable(name='6', shape=(32)), axis=1, num_newaxis=2)) + conv4 = sym.conv2d(relu3, channels=factor**2, kernel_size=(3, 3), padding=(1, 1), use_bias=False) + conv4 = conv4 + sym.expand_dims(sym.Variable(name='8', shape=(factor**2)), axis=1, num_newaxis=2) + # TODO(zhreshold): allow shape inference for batch size > 1 + r1 = sym.reshape(conv4, shape=(1, 1, factor, factor, size, size)) + t1 = sym.transpose(r1, axes=(0, 1, 4, 2, 5, 3)) + r2 = sym.reshape(t1, shape=(1, 1, size * factor, size * factor)) + return r2 diff --git a/nnvm/tests/python/frontend/onnx/test_forward.py b/nnvm/tests/python/frontend/onnx/test_forward.py new file mode 100644 index 000000000000..8cb6876956c4 --- /dev/null +++ b/nnvm/tests/python/frontend/onnx/test_forward.py @@ -0,0 +1,1099 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import numpy as np +import math +import nnvm +import topi +import topi.testing +import tvm +from tvm.contrib import graph_runtime +from nnvm.testing.config import ctx_list +import onnx +from model_zoo import super_resolution, squeezenet1_1, lenet, resnet18_1_0 +from onnx import helper, TensorProto + +def get_tvm_output(graph_def, input_data, target, ctx, output_shape=None, output_dtype='float32'): + """ Generic function to execute and get tvm output""" + + sym, params = nnvm.frontend.from_onnx(graph_def) + target = 'llvm' + if isinstance(input_data, list): + input_names = {} + shape_dict = {} + dtype_dict = {} + for i, _ in enumerate(input_data): + input_names[i] = graph_def.graph.input[i].name + shape_dict[input_names[i]] = input_data[i].shape + dtype_dict[input_names[i]] = input_data[i].dtype + else: + input_names = graph_def.graph.input[0].name + shape_dict = {input_names: input_data.shape} + dtype_dict = {input_names: input_data.dtype} + + graph, lib, params = nnvm.compiler.build(sym, target, shape_dict, + dtype=dtype_dict, params=params) + + ctx = tvm.cpu(0) + from tvm.contrib import graph_runtime + m = graph_runtime.create(graph, lib, ctx) + # set inputs + if isinstance(input_data, list): + for i, e in enumerate(input_names): + m.set_input(input_names[i], tvm.nd.array(input_data[i].astype(input_data[i].dtype))) + else: + m.set_input(input_names, tvm.nd.array(input_data.astype(input_data.dtype))) + + m.set_input(**params) + # execute + m.run() + # get outputs + if isinstance(output_shape, list) and isinstance(output_dtype, list): + tvm_output_list = [] + for i, _ in enumerate(output_shape): + tvm_output = m.get_output(i) + tvm_output_list.append(tvm_output.asnumpy()) + return tvm_output_list + else: + tvm_output = m.get_output(0) + return tvm_output.asnumpy() + +def get_caffe2_output(model, x, dtype='float32'): + import caffe2.python.onnx.backend + prepared_backend = caffe2.python.onnx.backend.prepare(model) + W = {model.graph.input[0].name: x.astype(dtype)} + c2_out = prepared_backend.run(W)[0] + return c2_out + + +def verify_onnx_forward_impl(graph_file, data_shape, out_shape): + dtype = 'float32' + x = np.random.uniform(size=data_shape) + model = onnx.load_model(graph_file) + c2_out = get_caffe2_output(model, x, dtype) + for target, ctx in ctx_list(): + tvm_out = get_tvm_output(model, x, target, ctx, out_shape, dtype) + tvm.testing.assert_allclose(c2_out, tvm_out, rtol=1e-5, atol=1e-5) + +def verify_super_resolution_example(): + verify_onnx_forward_impl(super_resolution, (1, 1, 224, 224), (1, 1, 672, 672)) + +def verify_squeezenet1_1(): + verify_onnx_forward_impl(squeezenet1_1, (1, 3, 224, 224), (1, 1000)) + +def verify_lenet(): + verify_onnx_forward_impl(lenet, (1, 1, 28, 28), (1, 10)) + +def verify_resnet18(): + verify_onnx_forward_impl(resnet18_1_0, (1, 3, 224, 224), (1, 1000)) + + +def test_reshape(): + in_shape = (4, 3, 3, 4) + ref_shape = (3, 4, 4, 3) + + ref_array = np.array(ref_shape) + ref_node = onnx.helper.make_node('Constant', + inputs=[], + outputs=['ref_in'], + value=onnx.helper.make_tensor(name = 'const_tensor', + data_type = onnx.TensorProto.INT32, + dims = ref_array.shape, + vals = ref_array.flatten().astype(int))) + reshape_node = helper.make_node("Reshape", ["in", "ref_in"], ["out"]) + + graph = helper.make_graph([ref_node, reshape_node], + "reshape_test", + inputs = [helper.make_tensor_value_info("in", + TensorProto.FLOAT, list(in_shape))], + outputs = [helper.make_tensor_value_info("out", + TensorProto.FLOAT, list(ref_shape))]) + + model = helper.make_model(graph, producer_name='reshape_test') + + for target, ctx in ctx_list(): + x = np.random.uniform(size=in_shape).astype('int32') + tvm_out = get_tvm_output(model, x, target, ctx, ref_shape, 'float32') + + tvm.testing.assert_allclose(ref_shape, tvm_out.shape) + +def test_reshape_like(): + in_shape = (4, 3, 3, 4) + ref_shape = (3, 4, 4, 3) + + ref_array = np.random.uniform(size=ref_shape).astype('float32') + ref_node = onnx.helper.make_node('Constant', + inputs=[], + outputs=['ref_in'], + value=onnx.helper.make_tensor(name = 'const_tensor', + data_type = onnx.TensorProto.FLOAT, + dims = ref_array.shape, + vals = ref_array.flatten().astype(float))) + copy_node = helper.make_node("Identity", ["ref_in"], ["copy_in"]) + reshape_node = helper.make_node("Reshape", ["in", "copy_in"], ["out"]) + + graph = helper.make_graph([ref_node, copy_node, reshape_node], + "reshape_like_test", + inputs = [helper.make_tensor_value_info("in", + TensorProto.FLOAT, list(in_shape))], + outputs = [helper.make_tensor_value_info("out", + TensorProto.FLOAT, list(ref_shape))]) + + model = helper.make_model(graph, producer_name='reshape_like_test') + + for target, ctx in ctx_list(): + x = np.random.uniform(size=in_shape).astype('float32') + tvm_out = get_tvm_output(model, x, target, ctx, ref_shape, 'float32') + + tvm.testing.assert_allclose(ref_shape, tvm_out.shape) + +def _test_power_iteration(x_shape, y_shape): + if isinstance(y_shape, int): + y_shape = [y_shape] + + x = np.random.uniform(size=x_shape).astype(np.float32) + y = np.random.uniform(size=y_shape).astype(np.float32) + + np_res = np.power(x, y).astype(np.float32) + + res = helper.make_node("Pow", ['x', 'y'], ['out']) + + graph = helper.make_graph([res], + 'power_test', + inputs = [helper.make_tensor_value_info("x", + TensorProto.FLOAT, list(x_shape)), + helper.make_tensor_value_info("y", + TensorProto.FLOAT, list(y_shape))], + outputs = [helper.make_tensor_value_info("out", + TensorProto.FLOAT, list(np_res.shape))]) + + model = helper.make_model(graph, producer_name='power_test') + + for target, ctx in ctx_list(): + tvm_out = get_tvm_output(model, [x, y], target, ctx, np_res.shape) + tvm.testing.assert_allclose(np_res, tvm_out, rtol=1e-5, atol=1e-5) + +def test_power(): + _test_power_iteration((1, 3), (1)) + _test_power_iteration((2, 3), (2, 3)) + _test_power_iteration((2, 3), (1, 3)) + +def test_squeeze(): + in_shape = (1, 3, 1, 3, 1, 1) + out_shape = (3, 3) + y = helper.make_node("Squeeze", ['in'], ['out'], axes=[0, 2, 4, 5]) + + graph = helper.make_graph([y], + 'squeeze_test', + inputs = [helper.make_tensor_value_info("in", + TensorProto.FLOAT, list(in_shape))], + outputs = [helper.make_tensor_value_info("out", + TensorProto.FLOAT, list(out_shape))]) + + model = helper.make_model(graph, producer_name='squeeze_test') + + for target, ctx in ctx_list(): + x = np.random.uniform(size=in_shape).astype('float32') + tvm_out = get_tvm_output(model, x, target, ctx, out_shape, 'float32') + + tvm.testing.assert_allclose(out_shape, tvm_out.shape) + +def test_unsqueeze(): + in_shape = (3, 3) + axis = (0, 3, 4) + out_shape = (1, 3, 3, 1, 1) + y = helper.make_node("Unsqueeze", ['in'], ['out'], axes=list(axis)) + + graph = helper.make_graph([y], + 'squeeze_test', + inputs = [helper.make_tensor_value_info("in", + TensorProto.FLOAT, list(in_shape))], + outputs = [helper.make_tensor_value_info("out", + TensorProto.FLOAT, list(out_shape))]) + + model = helper.make_model(graph, producer_name='squeeze_test') + + for target, ctx in ctx_list(): + x = np.random.uniform(size=in_shape).astype('float32') + tvm_out = get_tvm_output(model, x, target, ctx, out_shape, 'float32') + + tvm.testing.assert_allclose(out_shape, tvm_out.shape) + +def verify_gather(in_shape, indices, axis, dtype): + x = np.random.uniform(size=in_shape).astype(dtype) + indices = np.array(indices, dtype="int32") + out_np = np.take(x, indices, axis=axis) + + y = helper.make_node("Gather", ['in', 'indices'], ['out'], axis=axis) + + graph = helper.make_graph([y], + 'gather_test', + inputs = [helper.make_tensor_value_info("in", + TensorProto.FLOAT, list(in_shape)), + helper.make_tensor_value_info("indices", + TensorProto.INT32, list(indices.shape))], + outputs = [helper.make_tensor_value_info("out", + TensorProto.FLOAT, list(out_np.shape))]) + model = helper.make_model(graph, producer_name='gather_test') + + for target, ctx in ctx_list(): + tvm_out = get_tvm_output(model, [x, indices], target, ctx, out_np.shape) + tvm.testing.assert_allclose(out_np, tvm_out) + +def test_gather(): + verify_gather((4,), [1], 0, 'int32') + verify_gather((1,4), [0], 0, 'int32') + verify_gather((4,), [[[1,0],[0,1]]], 0, 'float32') + verify_gather((2,2), [[[1,0],[0,1]]], 1, 'int32') + verify_gather((3,3,3), [[[1,0]]], -1, 'int32') + verify_gather((4,3,5,6), [[2,1,0,0]], 0, 'float32') + +def _test_slice_iteration(indata, outdata, starts, ends, axes=None): + if axes: + y = helper.make_node("Slice", ['in'], ['out'], axes=axes, starts=starts, ends=ends) + else: + y = helper.make_node("Slice", ['in'], ['out'], starts=starts, ends=ends) + + graph = helper.make_graph([y], + 'slice_test', + inputs = [helper.make_tensor_value_info("in", + TensorProto.FLOAT, list(indata.shape))], + outputs = [helper.make_tensor_value_info("out", + TensorProto.FLOAT, list(outdata.shape))]) + + model = helper.make_model(graph, producer_name='slice_test') + + for target, ctx in ctx_list(): + tvm_out = get_tvm_output(model, indata, target, ctx, outdata.shape, 'float32') + + tvm.testing.assert_allclose(outdata, tvm_out) + +def test_slice(): + x = np.random.randn(20, 10, 5).astype(np.float32) + _test_slice_iteration(x, x[0:3, 0:10], (0, 0), (3, 10), (0, 1)) + _test_slice_iteration(x, x[:, :, 3:4], (0, 0, 3), (20, 10, 4)) + _test_slice_iteration(x, x[:, 1:1000], (1), (1000), (1)) + _test_slice_iteration(x, x[:, 0:-1], (0), (-1), (1)) + +def _test_onnx_op_elementwise(inshape, outfunc, npargs, dtype, opname, kwargs, rtol=1e-7, atol=1e-7): + indata = np.random.uniform(-1, 1, size=inshape).astype(dtype) + outdata = outfunc(indata, **npargs) + + y = helper.make_node(opname, ['in'], ['out'], **kwargs) + + graph = helper.make_graph([y], + opname+'_test', + inputs = [helper.make_tensor_value_info("in", + TensorProto.FLOAT, list(indata.shape))], + outputs = [helper.make_tensor_value_info("out", + TensorProto.FLOAT, list(outdata.shape))]) + + model = helper.make_model(graph, producer_name=opname+'_test') + + for target, ctx in ctx_list(): + tvm_out = get_tvm_output(model, indata, target, ctx, outdata.shape, dtype) + + tvm.testing.assert_allclose(outdata, tvm_out, rtol=rtol, atol=atol) + +def test_floor(): + _test_onnx_op_elementwise((2, 4, 5, 6), np.floor, {}, 'float32', 'Floor', {}) + +def test_ceil(): + _test_onnx_op_elementwise((2, 4, 5, 6), np.ceil, {}, 'float32', 'Ceil', {}) + +def test_clip(): + _test_onnx_op_elementwise((2, 4, 5, 6), + np.clip, + {'a_min': -1.0, 'a_max': 1.0}, + 'float32', + 'Clip', + {'min': -1.0, 'max': 1.0}) + +def test_matmul(): + a_shape = (4, 3) + b_shape = (3, 4) + + a_array = np.random.uniform(size=a_shape).astype('float32') + b_array = np.random.uniform(size=b_shape).astype('float32') + out_np = np.matmul(a_array, b_array) + + mul_node = helper.make_node("MatMul", ["a", "b"], ["out"]) + + graph = helper.make_graph([mul_node], + "matmul_test", + inputs = [helper.make_tensor_value_info("a", + TensorProto.FLOAT, list(a_shape)), + helper.make_tensor_value_info("b", + TensorProto.FLOAT, list(b_shape))], + outputs = [helper.make_tensor_value_info("out", + TensorProto.FLOAT, list(out_np.shape))]) + + model = helper.make_model(graph, producer_name='matmul_test') + + for target, ctx in ctx_list(): + tvm_out = get_tvm_output(model, [a_array, b_array], target, ctx, out_np.shape) + tvm.testing.assert_allclose(out_np, tvm_out, rtol=1e-5, atol=1e-5) + +def verify_lrn(shape, nsize, dtype, alpha=None, beta=None, bias=None): + in_array = np.random.uniform(size=shape).astype(dtype) + + if alpha == None and beta == None and bias==None: + alpha = 0.0001 + beta = 0.75 + bias = 1.0 + node = onnx.helper.make_node('LRN', inputs=['in'], outputs=['out'], size=nsize) + else: + node = onnx.helper.make_node('LRN', inputs=['in'], outputs=['out'], alpha=alpha, + beta=beta, bias=bias, size=nsize) + + graph = helper.make_graph([node], + "lrn_test", + inputs = [helper.make_tensor_value_info("in", TensorProto.FLOAT, list(shape))], + outputs = [helper.make_tensor_value_info("out", TensorProto.FLOAT, list(shape))]) + model = helper.make_model(graph, producer_name='lrn_test') + + def _get_python_lrn(): + square_sum = np.zeros(shape).astype(dtype) + for n, c, h, w in np.ndindex(in_array.shape): + square_sum[n, c, h, w] = sum(in_array[n, + max(0, c - int(math.floor((nsize - 1) / 2))): \ + min(5, c + int(math.ceil((nsize - 1) / 2)) + 1), + h, + w] ** 2) + py_out = in_array / ((bias + (alpha / nsize) * square_sum) ** beta) + return py_out + + for target, ctx in ctx_list(): + new_sym, params = nnvm.frontend.from_onnx(model) + + input_name = model.graph.input[0].name + shape_dict = {input_name: in_array.shape} + dtype_dict = {input_name: dtype} + graph, lib, params = nnvm.compiler.build(new_sym, target, + shape_dict, dtype_dict, params=params) + m = graph_runtime.create(graph, lib, ctx) + # set inputs + m.set_input(input_name, tvm.nd.array(in_array.astype(dtype))) + m.set_input(**params) + m.run() + # get outputs + tvm_out = m.get_output(0, tvm.nd.empty(shape, dtype)) + py_out = _get_python_lrn() + tvm.testing.assert_allclose(py_out, tvm_out.asnumpy(), rtol=1e-5, atol=1e-5) + +def test_lrn(): + verify_lrn((5, 5, 5, 5), 3, 'float32') + verify_lrn((5, 5, 5, 5), 3, 'float32', alpha=0.0002, beta=0.5, bias=2.0) + +def _test_upsample_nearest(): + scale = 2 + in_shape = (1, 1, 3, 3) + out_shape = (1, 1, 3*scale, 3*scale) + y = helper.make_node("Upsample", ['in'], ['out'], mode='nearest', scales=[1.0, 1.0, 2.0, 2.0]) + + in_array = np.random.uniform(size=in_shape).astype(np.float32) + out_array = topi.testing.upsampling_python(in_array, (scale, scale), "NCHW") + + graph = helper.make_graph([y], + 'upsample_nearest_test', + inputs = [helper.make_tensor_value_info("in", TensorProto.FLOAT, list(in_shape))], + outputs = [helper.make_tensor_value_info("out", TensorProto.FLOAT, list(out_shape))]) + + model = helper.make_model(graph, producer_name='upsample_nearest_test') + + for target, ctx in ctx_list(): + tvm_out = get_tvm_output(model, in_array, target, ctx, out_shape, 'float32') + tvm.testing.assert_allclose(out_array, tvm_out) + +def _test_upsample_bilinear(): + scale = 2 + in_shape = (1, 1, 3, 3) + out_shape = (1, 1, 3*scale, 3*scale) + y = helper.make_node("Upsample", ['in'], ['out'], mode='linear', scales=[1.0, 1.0, 2.0, 2.0]) + + in_array = np.random.uniform(size=in_shape).astype(np.float32) + out_array = topi.testing.bilinear_resize_python(in_array, (3*scale, 3*scale), "NCHW", align_corners=False) + + graph = helper.make_graph([y], + 'upsample_bilinear_test', + inputs = [helper.make_tensor_value_info("in", TensorProto.FLOAT, list(in_shape))], + outputs = [helper.make_tensor_value_info("out", TensorProto.FLOAT, list(out_shape))]) + + model = helper.make_model(graph, producer_name='upsample_bilinear_test') + + for target, ctx in ctx_list(): + tvm_out = get_tvm_output(model, in_array, target, ctx, out_shape, 'float32') + tvm.testing.assert_allclose(out_array, tvm_out, rtol=1e-5, atol=1e-5) + +def _test_upsample_bilinear_opset9(): + scale = 2 + in_shape = (1, 1, 3, 3) + out_shape = (1, 1, 3*scale, 3*scale) + y = helper.make_node("Upsample", ['in','scales'], ['out'], mode='linear') + scales=[1.0, 1.0, 2.0, 2.0] + in_array = np.random.uniform(size=in_shape).astype(np.float32) + out_array = topi.testing.bilinear_resize_python(in_array, (3*scale, 3*scale), "NCHW", align_corners=False) + + ref_array = np.array(scales) + ref_node = helper.make_node('Constant', + inputs=[], + outputs=['scales'], + value=onnx.helper.make_tensor(name = 'const_tensor', + data_type = TensorProto.FLOAT, + dims = ref_array.shape, + vals = ref_array.flatten().astype(float))) + + graph = helper.make_graph([ref_node, y], + 'upsample_bilinear_opset9_test', + inputs = [helper.make_tensor_value_info("in", TensorProto.FLOAT, list(in_shape))], + outputs = [helper.make_tensor_value_info("out", TensorProto.FLOAT, list(out_shape))]) + + model = helper.make_model(graph, producer_name='upsample_bilinear_opset9_test') + inputs = [] + inputs.append(in_array) + + for target, ctx in ctx_list(): + tvm_out = get_tvm_output(model, inputs, target, ctx, out_shape, 'float32') + tvm.testing.assert_allclose(out_array, tvm_out, rtol=1e-5, atol=1e-5) + +def test_upsample(): + _test_upsample_nearest() + _test_upsample_bilinear() + _test_upsample_bilinear_opset9() + +def _test_softmax(inshape, axis): + opname = 'Softmax' + indata = np.random.uniform(size=inshape).astype(np.float32) + outshape = inshape + outdata = topi.testing.softmax_python(indata) + if isinstance(axis, int): + y = helper.make_node(opname, ['in'], ['out'], axis = axis) + elif axis is None: + y = helper.make_node(opname, ['in'], ['out']) + + graph = helper.make_graph([y], + opname+'_test', + inputs = [helper.make_tensor_value_info("in", + TensorProto.FLOAT, list(indata.shape))], + outputs = [helper.make_tensor_value_info("out", + TensorProto.FLOAT, list(outdata.shape))]) + + model = helper.make_model(graph, producer_name=opname+'_test') + + for target, ctx in ctx_list(): + tvm_out = get_tvm_output(model, indata, target, ctx, outshape, 'float32') + tvm.testing.assert_allclose(outdata, tvm_out, rtol=1e-5, atol=1e-5) + +def test_softmax(): + _test_softmax((1, 10), None) + _test_softmax((1, 10), 1) + +def verify_min(input_dim): + dtype = 'float32' + + a_np1 = np.random.uniform(size=input_dim).astype(dtype) + a_np2 = np.random.uniform(size=input_dim).astype(dtype) + a_np3 = np.random.uniform(size=input_dim).astype(dtype) + + b_np = np.min((a_np1, a_np2, a_np3), axis=0) + + min_node = helper.make_node("Min", ["a_np1", "a_np2", "a_np3"], ["out"]) + + graph = helper.make_graph([min_node], + "Min_test", + inputs = [helper.make_tensor_value_info("a_np1", + TensorProto.FLOAT, list(input_dim)), + helper.make_tensor_value_info("a_np2", + TensorProto.FLOAT, list(input_dim)), + helper.make_tensor_value_info("a_np3", + TensorProto.FLOAT, list(input_dim))], + outputs = [helper.make_tensor_value_info("out", + TensorProto.FLOAT, list(b_np.shape))]) + + model = helper.make_model(graph, producer_name='Min_test') + + for target, ctx in ctx_list(): + tvm_out = get_tvm_output(model, [a_np1, a_np2, a_np3], target, ctx, b_np.shape) + tvm.testing.assert_allclose(b_np, tvm_out, rtol=1e-5, atol=1e-5) + +def test_forward_min(): + verify_min((1, 3, 20, 20)) + verify_min((20, 20)) + +def verify_max(input_dim): + dtype = 'float32' + + a_np1 = np.random.uniform(size=input_dim).astype(dtype) + a_np2 = np.random.uniform(size=input_dim).astype(dtype) + a_np3 = np.random.uniform(size=input_dim).astype(dtype) + + b_np = np.max((a_np1, a_np2, a_np3), axis=0) + + max_node = helper.make_node("Max", ["a_np1", "a_np2", "a_np3"], ["out"]) + + graph = helper.make_graph([max_node], + "Max_test", + inputs = [helper.make_tensor_value_info("a_np1", + TensorProto.FLOAT, list(input_dim)), + helper.make_tensor_value_info("a_np2", + TensorProto.FLOAT, list(input_dim)), + helper.make_tensor_value_info("a_np3", + TensorProto.FLOAT, list(input_dim))], + outputs = [helper.make_tensor_value_info("out", + TensorProto.FLOAT, list(b_np.shape))]) + + model = helper.make_model(graph, producer_name='Max_test') + + for target, ctx in ctx_list(): + tvm_out = get_tvm_output(model, [a_np1, a_np2, a_np3], target, ctx, b_np.shape) + tvm.testing.assert_allclose(b_np, tvm_out, rtol=1e-5, atol=1e-5) + +def test_forward_max(): + verify_max((1, 3, 20, 20)) + verify_max((20, 20)) + +def verify_mean(input_dim): + dtype = 'float32' + + a_np1 = np.random.uniform(size=input_dim).astype(dtype) + a_np2 = np.random.uniform(size=input_dim).astype(dtype) + a_np3 = np.random.uniform(size=input_dim).astype(dtype) + + b_np = np.mean((a_np1, a_np2, a_np3), axis=0) + + mean_node = helper.make_node("Mean", ["a_np1", "a_np2", "a_np3"], ["out"]) + + graph = helper.make_graph([mean_node], + "Mean_test", + inputs = [helper.make_tensor_value_info("a_np1", + TensorProto.FLOAT, list(input_dim)), + helper.make_tensor_value_info("a_np2", + TensorProto.FLOAT, list(input_dim)), + helper.make_tensor_value_info("a_np3", + TensorProto.FLOAT, list(input_dim))], + outputs = [helper.make_tensor_value_info("out", + TensorProto.FLOAT, list(b_np.shape))]) + + model = helper.make_model(graph, producer_name='Mean_test') + + for target, ctx in ctx_list(): + tvm_out = get_tvm_output(model, [a_np1, a_np2, a_np3], target, ctx, b_np.shape) + tvm.testing.assert_allclose(b_np, tvm_out, rtol=1e-5, atol=1e-5) + +def test_forward_mean(): + verify_mean((1, 3, 20, 20)) + verify_mean((20, 20)) + +def verify_hardsigmoid(input_dim, alpha, beta): + dtype = 'float32' + + a_np1 = np.random.uniform(size=input_dim).astype(dtype) + + b_np = np.clip(a_np1 * alpha + beta, 0, 1) + + hardsigmoid_node = helper.make_node("HardSigmoid", ["a_np1"], ["out"], alpha=alpha, beta=beta) + + graph = helper.make_graph([hardsigmoid_node], + "HardSigmoid_test", + inputs = [helper.make_tensor_value_info("a_np1", + TensorProto.FLOAT, list(input_dim))], + outputs = [helper.make_tensor_value_info("out", + TensorProto.FLOAT, list(b_np.shape))]) + + model = helper.make_model(graph, producer_name='HardSigmoid_test') + + for target, ctx in ctx_list(): + tvm_out = get_tvm_output(model, [a_np1], target, ctx, b_np.shape) + tvm.testing.assert_allclose(b_np, tvm_out, rtol=1e-5, atol=1e-5) + +def test_forward_hardsigmoid(): + verify_hardsigmoid((1, 3, 20, 20), 0.5, 0.6) + verify_hardsigmoid((20, 20), 0.3, 0.4) + +def verify_argmin(input_dim, axis=None, keepdims=None): + def _argmin_numpy(data, axis=0, keepdims=True): + result = np.argmin(data, axis=axis) + if (keepdims == 1): + result = np.expand_dims(result, axis) + return result.astype(data.dtype) + + a_np1 = np.random.uniform(-10, 10, input_dim).astype(np.int32) + if keepdims is None and axis is None: + b_np = _argmin_numpy(a_np1) + node = onnx.helper.make_node('ArgMin', + inputs=['a_np1'], + outputs=['out']) + elif axis is None: + b_np = _argmin_numpy(a_np1, keepdims=keepdims) + node = onnx.helper.make_node('ArgMin', + inputs=['a_np1'], + outputs=['out'], + keepdims=keepdims) + elif keepdims is None: + b_np = _argmin_numpy(a_np1, axis=axis) + node = onnx.helper.make_node('ArgMin', + inputs=['a_np1'], + outputs=['out'], + axis=axis) + else: + b_np = _argmin_numpy(a_np1, axis=axis, keepdims=keepdims) + node = onnx.helper.make_node('ArgMin', + inputs=['a_np1'], + outputs=['out'], + axis=axis, + keepdims=keepdims) + graph = helper.make_graph([node], + "argmin_test", + inputs = [helper.make_tensor_value_info("a_np1", + TensorProto.INT32, list(a_np1.shape))], + outputs = [helper.make_tensor_value_info("out", + TensorProto.INT32, list(b_np.shape))]) + + model = helper.make_model(graph, producer_name='argmin_test') + + for target, ctx in ctx_list(): + tvm_out = get_tvm_output(model, [a_np1], target, ctx, b_np.shape, b_np.dtype) + tvm.testing.assert_allclose(b_np, tvm_out, rtol=1e-5, atol=1e-5) + +def verify_argmax(input_dim, axis=None, keepdims=None): + def _argmax_numpy(data, axis=0, keepdims=True): + result = np.argmax(data, axis=axis) + if (keepdims == 1): + result = np.expand_dims(result, axis) + return result.astype(data.dtype) + + a_np1 = np.random.uniform(-10, 10, input_dim).astype(np.int32) + + if keepdims is None and axis is None: + b_np = _argmax_numpy(a_np1) + node = onnx.helper.make_node('ArgMax', + inputs=['a_np1'], + outputs=['out']) + elif axis is None: + b_np = _argmax_numpy(a_np1, keepdims=keepdims) + node = onnx.helper.make_node('ArgMax', + inputs=['a_np1'], + outputs=['out'], + keepdims=keepdims) + elif keepdims is None: + b_np = _argmax_numpy(a_np1, axis=axis) + node = onnx.helper.make_node('ArgMax', + inputs=['a_np1'], + outputs=['out'], + axis=axis) + else: + b_np = _argmax_numpy(a_np1, axis=axis, keepdims=keepdims) + node = onnx.helper.make_node('ArgMax', + inputs=['a_np1'], + outputs=['out'], + axis=axis, + keepdims=keepdims) + + graph = helper.make_graph([node], + "argmax_test", + inputs = [helper.make_tensor_value_info("a_np1", + TensorProto.INT32, list(a_np1.shape))], + outputs = [helper.make_tensor_value_info("out", + TensorProto.INT32, list(b_np.shape))]) + + model = helper.make_model(graph, producer_name='argmax_test') + + for target, ctx in ctx_list(): + tvm_out = get_tvm_output(model, [a_np1], target, ctx, b_np.shape, b_np.dtype) + tvm.testing.assert_allclose(b_np, tvm_out, rtol=1e-5, atol=1e-5) + +def test_forward_arg_min_max(): + '''Verify argmin and argmax''' + verify_argmin([3,4,4]) + verify_argmax([3,4,4]) + verify_argmin([3,4,4], axis=1) + verify_argmax([3,4,4], axis=0) + verify_argmin([3,4,4], keepdims=0) + verify_argmax([3,4,4], keepdims=1) + for axis in [0,1,2]: + for keepdims in [True,False]: + verify_argmin([3,4,4], axis, keepdims) + verify_argmax([3,4,4], axis, keepdims) + +def verify_constantfill(is_shape, input_dim, out_dim, value, dtype, **kwargs): + input_a = np.random.uniform(size=input_dim).astype(dtype) + out = np.empty(shape=out_dim, dtype=dtype) + out.fill(value) + + if is_shape == True: + fill_node = helper.make_node("ConstantFill", [], ["out"], shape=input_dim, value=value, **kwargs) + else: + fill_node = helper.make_node("ConstantFill", ["input_a"], ["out"], value=value, dtype=dtype, **kwargs) + + graph = helper.make_graph([fill_node], + "fill_test", + inputs = [helper.make_tensor_value_info("input_a", + TensorProto.FLOAT, list(input_dim))], + outputs = [helper.make_tensor_value_info("out", + TensorProto.FLOAT, list(out.shape))]) + + model = helper.make_model(graph, producer_name='fill_test') + + for target, ctx in ctx_list(): + if is_shape == True: + tvm_out = get_tvm_output(model, [], target, ctx, out.shape) + else: + tvm_out = get_tvm_output(model, [input_a], target, ctx, out.shape) + + tvm.testing.assert_allclose(out, tvm_out, rtol=1e-5, atol=1e-5) + +def test_constantfill(): + verify_constantfill(True, (2, 3, 4, 5), (2, 3, 4, 5), 10, 'float32') + verify_constantfill(False, (2, 3, 4, 5), (2, 3, 4, 5), 10, 'float32') + verify_constantfill(True, (2, 3, 4, 5), (2, 3, 4, 5, 4, 5, 6), 10, 'float32', extra_shape=(4, 5, 6)) + + +def verify_pad(indata, pads, value=0.0): + indata = np.array(indata).astype(np.float32) + # numpy expect result + len_dim = len(pads) // 2 + np_pads = [(pads[i], pads[i+len_dim]) for i in range(len_dim)] + outdata = np.pad(indata, pad_width=np_pads, mode='constant', constant_values=value) + # onnx graph + node = helper.make_node( + 'Pad', + inputs=['input'], + outputs=['output'], + mode='constant', + pads=pads, + value=value + ) + graph = helper.make_graph([node], + 'pad_test', + inputs = [helper.make_tensor_value_info("input", + TensorProto.FLOAT, list(indata.shape))], + outputs = [helper.make_tensor_value_info("output", + TensorProto.FLOAT, list(outdata.shape))]) + model = helper.make_model(graph, producer_name='pad_test') + # tvm result + for target, ctx in ctx_list(): + tvm_out = get_tvm_output(model, indata, target, ctx, outdata.shape, 'float32') + tvm.testing.assert_allclose(outdata, tvm_out, rtol=1e-5, atol=1e-5) + +def test_pad(): + verify_pad(np.random.randn(2, 2).astype(np.float32), [0, 1, 0, 0], 0.0) + verify_pad(np.random.randn(2, 3).astype(np.float32), [1, 0, 0, 1], 0.0) + verify_pad(np.random.randn(3, 2).astype(np.float32), [0, 0, 1, 0], 5.0) + +def verify_reduce_x(name, indata, axis, keepdims): + indata = np.array(indata).astype(np.float32) + # numpy expect result + if name == 'ReduceMax': + outdata = np.maximum.reduce(indata, axis=axis, keepdims=keepdims == 1) + elif name == 'ReduceMin': + outdata = np.minimum.reduce(indata, axis=axis, keepdims=keepdims == 1) + elif name == 'ReduceSum': + outdata = np.sum(indata, axis=axis, keepdims=keepdims == 1) + elif name == 'ReduceMean': + outdata = np.mean(indata, axis=axis, keepdims=keepdims == 1) + else: + raise Exception('unsupport op: {}'.format(name)) + if len(np.asarray(outdata).shape) == 0: + outdata = np.asarray([outdata]) + # onnx graph + if axis is None: + node = helper.make_node(name, inputs=['input'], outputs=['output'], + keepdims=keepdims) + else: + node = helper.make_node(name, inputs=['input'], outputs=['output'], + axis=axis, keepdims=keepdims) + graph = helper.make_graph([node], + '{}_test'.format(name), + inputs = [helper.make_tensor_value_info("input", + TensorProto.FLOAT, list(indata.shape))], + outputs = [helper.make_tensor_value_info("output", + TensorProto.FLOAT, list(outdata.shape))]) + model = helper.make_model(graph, producer_name='{}_test'.format(name)) + # tvm result + for target, ctx in ctx_list(): + tvm_out = get_tvm_output(model, indata, target, ctx, outdata.shape, 'float32') + tvm.testing.assert_allclose(outdata, tvm_out, rtol=1e-5, atol=1e-5) + +def test_reduce_max(): + verify_reduce_x("ReduceMax", + np.random.randn(3, 2, 2).astype(np.float32), + axis=None, keepdims=1) + verify_reduce_x("ReduceMax", + np.random.randn(3, 2, 3).astype(np.float32), + axis=None, keepdims=0) + verify_reduce_x("ReduceMax", + np.random.randn(3, 3, 3).astype(np.float32), + axis=(1,), keepdims=1) + +def test_reduce_min(): + verify_reduce_x("ReduceMin", + np.random.randn(3, 2, 2).astype(np.float32), + axis=None, keepdims=1) + verify_reduce_x("ReduceMin", + np.random.randn(3, 2, 3).astype(np.float32), + axis=None, keepdims=0) + verify_reduce_x("ReduceMin", + np.random.randn(3, 3, 3).astype(np.float32), + axis=(1,), keepdims=1) + +def test_reduce_sum(): + verify_reduce_x("ReduceSum", + np.random.randn(3, 2, 2).astype(np.float32), + axis=None, keepdims=1) + verify_reduce_x("ReduceSum", + np.random.randn(3, 2, 3).astype(np.float32), + axis=None, keepdims=0) + verify_reduce_x("ReduceSum", + np.random.randn(3, 3, 3).astype(np.float32), + axis=(1,), keepdims=1) + +def test_reduce_mean(): + verify_reduce_x("ReduceMean", + np.random.randn(3, 2, 2).astype(np.float32), + axis=None, keepdims=1) + verify_reduce_x("ReduceMean", + np.random.randn(3, 2, 3).astype(np.float32), + axis=None, keepdims=0) + verify_reduce_x("ReduceMean", + np.random.randn(3, 3, 3).astype(np.float32), + axis=(1,), keepdims=1) + +def verify_split(indata, outdatas, split, axis=0): + indata = np.array(indata).astype(np.float32) + outdatas = [np.array(o).astype(np.float32) for o in outdatas] + node = helper.make_node( + 'Split', + inputs=['input'], + outputs=['output_{}'.format(i) for i in range(len(split))], + axis=axis, + split=split + ) + graph = helper.make_graph([node], + 'split_test', + inputs = [helper.make_tensor_value_info("input", + TensorProto.FLOAT, list(indata.shape))], + outputs = [helper.make_tensor_value_info("output_{}".format(i), + TensorProto.FLOAT, list(outdatas[i].shape)) + for i in range(len(split)) + ]) + model = helper.make_model(graph, producer_name='split_test') + + for target, ctx in ctx_list(): + output_shape = [o.shape for o in outdatas] + output_type = ['float32', 'float32', 'float32'] + tvm_out = get_tvm_output(model, indata, target, ctx, output_shape, output_type) + for o, t in zip(outdatas, tvm_out): + tvm.testing.assert_allclose(o, t) + +def test_split(): + # 1D + verify_split([1., 2., 3., 4., 5., 6.], [[1., 2.], [3., 4.], [5., 6.]], [2, 2, 2], 0) + verify_split([1., 2., 3., 4., 5., 6.], [[1., 2.], [3.], [4., 5., 6.]], [2, 1, 3], 0) + # 2D + verify_split([[1., 2., 3., 4.], [7., 8., 9., 10.]], + [[[1., 2.], [7., 8.]], [[3., 4.], [9., 10.]]], [2, 2], 1) + +def test_binary_ops(): + in_shape = (1, 2, 3, 3) + dtype = "float32" + out_shape = in_shape + + def verify_binary_ops(op, x, y, out_np, broadcast=None, rtol=1e-7, atol=1e-7): + if broadcast is None: + z = helper.make_node(op, ['in1', 'in2'], ['out']) + else: + z = helper.make_node(op, ['in1', 'in2'], ['out'], broadcast=1) + graph = helper.make_graph([z], + '_test', + inputs = [helper.make_tensor_value_info("in1", + TensorProto.FLOAT, list(in_shape)), + helper.make_tensor_value_info("in2", + TensorProto.FLOAT, list(in_shape))], + outputs = [helper.make_tensor_value_info("out", + TensorProto.FLOAT, list(out_shape))]) + model = helper.make_model(graph, producer_name='_test') + for target, ctx in ctx_list(): + tvm_out = get_tvm_output(model, [x, y], target, ctx) + tvm.testing.assert_allclose(out_np, tvm_out, rtol=rtol, atol=atol) + + x = np.random.uniform(size=in_shape).astype(dtype) + y = np.random.uniform(size=in_shape).astype(dtype) + z = np.random.uniform(size=(3,)).astype(dtype) + verify_binary_ops("Add",x, y, x + y, broadcast=None) + verify_binary_ops("Add", x, z, x + z, broadcast=True) + verify_binary_ops("Sub", x, y, x - y, broadcast=None) + verify_binary_ops("Sub", x, z, x - z, broadcast=True) + verify_binary_ops("Mul",x, y, x * y, broadcast=None) + verify_binary_ops("Mul", x, z, x * z, broadcast=True) + verify_binary_ops("Div", x, y, x / y, broadcast=None, rtol=1e-5, atol=1e-5) + verify_binary_ops("Div", x, z, x / z, broadcast=True, rtol=1e-5, atol=1e-5) + verify_binary_ops("Sum", x, y, x + y, broadcast=None) + +def test_single_ops(): + in_shape = (1, 2, 3, 3) + dtype = "float32" + out_shape = in_shape + + def verify_single_ops(op, x, out_np, rtol=1e-7, atol=1e-7): + z = helper.make_node(op, ['in1'], ['out']) + graph = helper.make_graph([z], + '_test', + inputs = [helper.make_tensor_value_info("in1", + TensorProto.FLOAT, list(in_shape)),], + outputs = [helper.make_tensor_value_info("out", + TensorProto.FLOAT, list(out_shape))]) + model = helper.make_model(graph, producer_name='_test') + for target, ctx in ctx_list(): + tvm_out = get_tvm_output(model, [x], target, ctx) + tvm.testing.assert_allclose(out_np, tvm_out, rtol=rtol, atol=atol) + + x = np.random.uniform(size=in_shape).astype(dtype) + verify_single_ops("Neg",x, -x) + verify_single_ops("Abs",x, np.abs(x)) + verify_single_ops("Reciprocal",x, 1/x, rtol=1e-5, atol=1e-5) + verify_single_ops("Sqrt",x, np.sqrt(x), rtol=1e-5, atol=1e-5) + verify_single_ops("Relu",x, np.maximum(x, 0)) + verify_single_ops("Exp",x, np.exp(x), rtol=1e-5, atol=1e-5) + verify_single_ops("Log",x, np.log(x), rtol=1e-5, atol=1e-5) + verify_single_ops("Log",x, np.log(x), rtol=1e-5, atol=1e-5) + verify_single_ops("Tanh",x, np.tanh(x), rtol=1e-5, atol=1e-5) + verify_single_ops("Sigmoid",x, 1 / (1 + np.exp(-x)), rtol=1e-5, atol=1e-5) + verify_single_ops("Softsign",x, x / (1 + np.abs(x)), rtol=1e-5, atol=1e-5) + verify_single_ops("SoftPlus",x, np.log(1 + np.exp(x)), rtol=1e-5, atol=1e-5) + +def test_leaky_relu(): + def leaky_relu_x(x, alpha): + return np.where(x >= 0, x, x * alpha) + _test_onnx_op_elementwise((2, 4, 5, 6), + leaky_relu_x, + {'alpha': 0.25}, + 'float32', + 'LeakyRelu', + {'alpha': 0.25}) + +def test_elu(): + def elu_x(x, alpha): + return np.where(x > 0, x, alpha * (np.exp(x) - 1.0)) + _test_onnx_op_elementwise((2, 4, 5, 6), + elu_x, + {'alpha': 0.25}, + 'float32', + 'Elu', + {'alpha': 0.25}) + +def test_selu(): + def selu_x(x, alpha, gamma): + return gamma * np.where(x > 0, x, alpha * (np.exp(x) - 1.0)) + _test_onnx_op_elementwise((2, 4, 5, 6), + selu_x, + {'alpha': 0.25, 'gamma': 0.3}, + 'float32', + 'Selu', + {'alpha': 0.25, 'gamma': 0.3}) + +def test_ThresholdedRelu(): + def ThresholdedRelu_x(x, alpha): + out_np = np.clip(x, alpha, np.inf) + out_np[out_np == alpha] = 0 + return out_np + _test_onnx_op_elementwise((2, 4, 5, 6), + ThresholdedRelu_x, + {'alpha': 0.25}, + 'float32', + 'ThresholdedRelu', + {'alpha': 0.25}) + +def test_ScaledTanh(): + def ScaledTanh_x(x, alpha, beta): + return alpha * np.tanh(beta * x) + _test_onnx_op_elementwise((2, 4, 5, 6), + ScaledTanh_x, + {'alpha': 0.25, 'beta': 0.3}, + 'float32', + 'ScaledTanh', + {'alpha': 0.25, 'beta': 0.3}) + +def test_ParametricSoftplus(): + def ParametricSoftplus_x(x, alpha, beta): + return alpha * np.log(np.exp(beta * x) + 1) + _test_onnx_op_elementwise((2, 4, 5, 6), + ParametricSoftplus_x, + {'alpha': 0.25, 'beta': 0.3}, + 'float32', + 'ParametricSoftplus', + {'alpha': 0.25, 'beta': 0.3}) + +def test_Scale(): + def Scale_x(x, scale): + return scale * x + _test_onnx_op_elementwise((2, 4, 5, 6), + Scale_x, + {'scale': 0.25}, + 'float32', + 'Scale', + {'scale': 0.25}) + +def test_LogSoftmax(): + _test_onnx_op_elementwise((1, 4), + topi.testing.log_softmax_python, + {}, + 'float32', + 'LogSoftmax', + {'axis': 1}, + rtol=1e-5, + atol=1e-5) + +if __name__ == '__main__': + # verify_super_resolution_example() + # verify_squeezenet1_1() + # verify_lenet() + verify_resnet18() + test_reshape() + test_reshape_like() + test_power() + test_squeeze() + test_unsqueeze() + test_slice() + test_floor() + test_ceil() + test_clip() + test_matmul() + test_gather() + test_lrn() + test_upsample() + test_forward_min() + test_forward_max() + test_forward_mean() + test_forward_hardsigmoid() + test_forward_arg_min_max() + test_softmax() + test_constantfill() + test_pad() + test_reduce_max() + test_reduce_min() + test_reduce_sum() + test_reduce_mean() + test_split() + test_binary_ops() + test_single_ops() + test_leaky_relu() + test_elu() + test_selu() + test_ThresholdedRelu() + test_ScaledTanh() + test_ParametricSoftplus() + test_Scale() + test_LogSoftmax() diff --git a/nnvm/tests/python/unittest/test_correct_layout.py b/nnvm/tests/python/unittest/test_correct_layout.py new file mode 100644 index 000000000000..5d313fbacb3e --- /dev/null +++ b/nnvm/tests/python/unittest/test_correct_layout.py @@ -0,0 +1,379 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import nnvm +import nnvm.symbol as sym +import nnvm.graph as graph +from nnvm.compiler import graph_attr + +def correct_layout(g, layout=None): + if isinstance(g, nnvm.symbol.Symbol): + g = graph.create(g) + if layout: + graph_attr.set_layout_inputs(g, layout) + g = g.apply("CorrectLayout") + ldict = {} + vlayout = g.json_attr("layout") + entry_ptr = g.index.entry_ptr + for i, n in enumerate(g.index.nodes): + begin, end = entry_ptr[i], entry_ptr[i + 1] + ldict[n["name"]] = vlayout[begin:end] + return g, ldict + + +# Level 1 +def test_dense(): + x = sym.Variable("data", shape=(10, 20)) + y = sym.dense(x, units=30, name="fc") + g, ldict = correct_layout(y, "HW") + assert(ldict["data"][0] == "HW") + assert(ldict["fc"][0] == "HW") + assert(ldict["fc_bias"][0] == "__undef__") + # second pass will insert layout transform + _, ldict = correct_layout(g, "HW16w") + assert(ldict["data"][0] == "HW16w") + assert(ldict["data_HW"][0] == "HW") + assert(ldict["fc"][0] == "HW") + assert(ldict["fc_bias"][0] == "__undef__") + + +def test_matmul(): + a = sym.Variable("a", shape=(10, 20)) + b = sym.Variable("b", shape=(20, 30)) + c = sym.matmul(a, b, name="matmul") + g, ldict = correct_layout(c, {"a" : "HW", "b" : "WC"}) + assert(ldict["a"][0] == "HW") + assert(ldict["b"][0] == "WC") + assert(ldict["matmul"][0] == "HC") + # second pass will insert layout transform + _, ldict = correct_layout(g, {"a" : "HW16w", "b" : "WC16c"}) + assert(ldict["a"][0] == "HW16w") + assert(ldict["a_HW"][0] == "HW") + assert(ldict["b"][0] == "WC16c") + assert(ldict["b_WC"][0] == "WC") + assert(ldict["matmul"][0] == "HC") + a = sym.Variable("a", shape=(20, 10)) + c = sym.matmul(a, b, name="matmul", transpose_a=True) + g, ldict = correct_layout(c, {"a" : "HW", "b" : "HC"}) + assert(ldict["a"][0] == "HW") + assert(ldict["b"][0] == "HC") + assert(ldict["matmul"][0] == "WC") + b = sym.Variable("b", shape=(30, 20)) + c = sym.matmul(a, b, name="matmul", transpose_b=True) + g, ldict = correct_layout(c, {"a" : "HW", "b" : "CW"}) + assert(ldict["a"][0] == "HW") + assert(ldict["b"][0] == "CW") + assert(ldict["matmul"][0] == "HC") + a = sym.Variable("a", shape=(20, 10)) + b = sym.Variable("b", shape=(30, 20)) + c = sym.matmul(a, b, name="matmul", transpose_a=True, transpose_b=True) + g, ldict = correct_layout(c, {"a" : "HW", "b" : "CH"}) + assert(ldict["a"][0] == "HW") + assert(ldict["b"][0] == "CH") + assert(ldict["matmul"][0] == "WC") + + +def test_concatenate(): + x1 = sym.Variable("x", shape=(10, 20)) + x2 = sym.Variable("y", shape=(10, 30)) + z = sym.concatenate(x1, x2, name="concat") + g, ldict = correct_layout(z, {"x": "HW", "y": "HW"}) + assert(ldict["x"][0] == "HW") + assert(ldict["y"][0] == "HW") + assert(ldict["concat"][0] == "HW") + # second pass will insert layout transform + _, ldict = correct_layout(g, {"x": "HW16w", "y": "HW16w"}) + assert(ldict["x"][0] == "HW16w") + assert(ldict["y"][0] == "HW16w") + assert(ldict["concat"][0] == "HW16w") + + x1 = sym.Variable("x", shape=(10, 20, 60)) + x2 = sym.Variable("y", shape=(10, 20, 40)) + z = sym.concatenate(x1, x2, axis=2, name="concat") + g, ldict = correct_layout(z, {"x": "H20wW", "y": "H20wW"}) + assert(ldict["x"][0] == "H20wW") + assert(ldict["y"][0] == "H20wW") + assert(ldict["concat"][0] == "H20wW") + # second pass will insert layout transform + _, ldict = correct_layout(g, {"x": "HW", "y": "HW"}) + assert(ldict["x_H20wW"][0] == "H20wW") + assert(ldict["x_H20wW"][0] == "H20wW") + assert(ldict["concat"][0] == "H20wW") + + +def test_expand_dims(): + x = sym.Variable("x", shape=(10, 20)) + y = sym.expand_dims(x, axis=1, name="y") + g, ldict = correct_layout(y, "HW") + assert(ldict["x"][0] == "HW") + assert(ldict["y"][0] == "__undef__") + # second pass will insert layout transform + _, ldict = correct_layout(g, "HW16w") + assert(ldict["x"][0] == "HW16w") + assert(ldict["x_HW"][0] == "HW") + assert(ldict["y"][0] == "__undef__") + + +def test_split(): + x = sym.Variable("x", shape=(10, 20)) + y = sym.split(x, indices_or_sections=[11], name="y") + g, ldict = correct_layout(y, "HW") + assert(ldict["x"][0] == "HW") + assert(ldict["y"][0] == "__undef__") + # second pass will insert layout transform + _, ldict = correct_layout(g, "HW16w") + assert(ldict["x"][0] == "HW16w") + assert(ldict["x_HW"][0] == "HW") + assert(ldict["y"][0] == "__undef__") + + +def test_batchnorm(): + x = sym.Variable("data", shape=(10, 20, 30, 40)) + y = sym.batch_norm(x, axis=1, epsilon=2e-5, name="bn") + g, ldict = correct_layout(y, "NCHW") + assert(ldict["data"][0] == "NCHW") + assert(ldict["bn"][0] == "NCHW") + assert(ldict["bn"][1] == "C") + assert(ldict["bn"][2] == "C") + assert(ldict["bn_beta"][0] == "C") + assert(ldict["bn_gamma"][0] == "C") + assert(ldict["bn_moving_mean"][0] == "C") + assert(ldict["bn_moving_var"][0] == "C") + # batch_norm can deal with sub-dim of C at the last dim. + g, ldict = correct_layout(g, "NCHW16c") + assert(ldict["data"][0] == "NCHW16c") + assert(ldict["bn"][0] == "NCHW16c") + assert(ldict["bn"][1] == "C16c") + assert(ldict["bn"][2] == "C16c") + assert(ldict["bn_beta"][0] == "C") + assert(ldict["bn_beta_C16c"][0] == "C16c") + assert(ldict["bn_gamma"][0] == "C") + assert(ldict["bn_gamma_C16c"][0] == "C16c") + assert(ldict["bn_moving_mean"][0] == "C") + assert(ldict["bn_moving_mean_C16c"][0] == "C16c") + assert(ldict["bn_moving_var"][0] == "C") + assert(ldict["bn_moving_var_C16c"][0] == "C16c") + # but for other layout, it does a layout transform for data + g, ldict = correct_layout(g, "NCH16cW") + assert(ldict["data"][0] == "NCH16cW") + assert(ldict["data_NCHW16c"][0] == "NCHW16c") + assert(ldict["bn"][0] == "NCHW16c") + assert(ldict["bn"][1] == "C16c") + assert(ldict["bn"][2] == "C16c") + assert(ldict["bn_beta"][0] == "C") + assert(ldict["bn_beta_C16c"][0] == "C16c") + assert(ldict["bn_gamma"][0] == "C") + assert(ldict["bn_gamma_C16c"][0] == "C16c") + assert(ldict["bn_moving_mean"][0] == "C") + assert(ldict["bn_moving_mean_C16c"][0] == "C16c") + assert(ldict["bn_moving_var"][0] == "C") + assert(ldict["bn_moving_var_C16c"][0] == "C16c") + + +def test_flatten(): + x = sym.Variable("x", shape=(10, 20, 10, 10)) + y = sym.flatten(x, name="y") + g, ldict = correct_layout(y, "NCHW") + assert(ldict["x"][0] == "NCHW") + assert(ldict["y"][0] == "__undef__") + # second pass will insert layout transform + _, ldict = correct_layout(g, "NCHW16c") + assert(ldict["x"][0] == "NCHW16c") + assert(ldict["x_NCHW"][0] == "NCHW") + assert(ldict["y"][0] == "__undef__") + + +def test_softmax(): + x = sym.Variable("x", shape=(10, 20, 10, 10)) + y = sym.softmax(x, name="y") + g, ldict = correct_layout(y, "NCHW") + assert(ldict["x"][0] == "NCHW") + assert(ldict["y"][0] == "NCHW") + # second pass will insert layout transform + _, ldict = correct_layout(g, "NCHW16c") + assert(ldict["x"][0] == "NCHW16c") + assert(ldict["x_NCHW"][0] == "NCHW") + assert(ldict["y"][0] == "NCHW") + + +# Level 2 +def test_conv2d(): + x = sym.Variable("data", shape=(1, 32, 512, 512)) + y = sym.conv2d(x, name="conv", channels=12, + kernel_size=(3,3), padding=(1,1), layout="NCHW") + _, ldict = correct_layout(y) + assert(ldict["data"][0] == "NCHW") + assert(ldict["conv_weight"][0] == "OIHW") + assert(ldict["conv_bias"][0] == "C") + assert(ldict["conv"][0] == "NCHW") + y = sym.conv2d(x, name="conv", channels=12, + kernel_size=(3,3), padding=(1,1), layout="NCHW16c", + kernel_layout="OIHW16i16o", out_layout="NCHW8c") + _, ldict = correct_layout(y) + assert(ldict["data"][0] == "NCHW16c") + assert(ldict["conv_weight"][0] == "OIHW16i16o") + assert(ldict["conv_bias"][0] == "C8c") + assert(ldict["conv"][0] == "NCHW8c") + y = sym.conv2d(x, name="conv", channels=12, + kernel_size=(3,3), padding=(1,1), layout="N16cHWC") + _, ldict = correct_layout(y) + assert(ldict["data"][0] == "N16cHWC") + assert(ldict["conv_weight"][0] == "OIHW") + assert(ldict["conv_bias"][0] == "16cC") + assert(ldict["conv"][0] == "N16cHWC") + + +def test_conv2d_transpose(): + x = sym.Variable("data", shape=(1, 32, 512, 512)) + y = sym.conv2d_transpose(x, name="conv", channels=12, + kernel_size=(3,3), padding=(1,1), layout="NCHW") + _, ldict = correct_layout(y) + assert(ldict["data"][0] == "NCHW") + assert(ldict["conv_weight"][0] == "OIHW") + assert(ldict["conv_bias"][0] == "C") + assert(ldict["conv"][0] == "NCHW") + + +def test_max_pool2d(): + x = sym.Variable("data", shape=(1, 32, 512, 512)) + y = sym.max_pool2d(x, name="pool", pool_size=(3,3), + padding=(1,1), layout="NCHW") + g, ldict = correct_layout(y) + assert(ldict["data"][0] == "NCHW") + assert(ldict["pool"][0] == "NCHW") + # if index of H and W remain the same, + # pool2d does not convert the layout. + g, ldict = correct_layout(g, "NCHW16c") + assert(ldict["data"][0] == "NCHW16c") + assert(ldict["pool"][0] == "NCHW16c") + # for other layout it requires a layout transform. + g, ldict = correct_layout(g, "NHWC") + assert(ldict["data"][0] == "NHWC") + assert(ldict["data_NCHW"][0] == "NCHW") + assert(ldict["pool"][0] == "NCHW") + + +def test_global_pool2d(): + x = sym.Variable("data", shape=(1, 32, 512, 512)) + y = sym.global_max_pool2d(x, name="pool", layout="NCHW") + g, ldict = correct_layout(y) + assert(ldict["data"][0] == "NCHW") + assert(ldict["pool"][0] == "NCHW") + # if index of H and W remain the same, + # pool2d does not convert the layout. + g, ldict = correct_layout(g, "NCHW16c") + assert(ldict["data"][0] == "NCHW16c") + assert(ldict["pool"][0] == "NCHW16c") + # for other layout it requires a layout transform. + g, ldict = correct_layout(g, "NHWC") + assert(ldict["data"][0] == "NHWC") + assert(ldict["data_NCHW"][0] == "NCHW") + assert(ldict["pool"][0] == "NCHW") + + +# Level 3 +def test_reshape(): + x = sym.Variable("x", shape=(4,)) + y = sym.reshape(x, shape=(2,2), name="y") + g, ldict = correct_layout(y, "C") + assert(ldict["x"][0] == "C") + assert(ldict["y"][0] == "__undef__") + # second pass will insert layout transform + g, ldict = correct_layout(g, "C16c") + assert(ldict["x"][0] == "C16c") + assert(ldict["x_C"][0] == "C") + assert(ldict["y"][0] == "__undef__") + + +def test_transpose(): + x = sym.Variable("x", shape=(1, 32, 512, 512)) + y = sym.transpose(x, name="y", axes=(0, 2, 3, 1)) + g, ldict = correct_layout(y, "NCHW") + assert(ldict["x"][0] == "NCHW") + assert(ldict["y"][0] == "NHWC") + # second pass will insert layout transform + g, ldict = correct_layout(g, "NCHW16c") + assert(ldict["x"][0] == "NCHW16c") + assert(ldict["x_NCHW"][0] == "NCHW") + assert(ldict["y"][0] == "NHWC") + + +def test_broadcast_to(): + x = sym.Variable("x", shape=(4, 1)) + y = sym.broadcast_to(x, shape=(0, 4), name="y") + g, ldict = correct_layout(y, "HW") + assert(ldict["x"][0] == "HW") + assert(ldict["y"][0] == "__undef__") + # second pass will insert layout transform + g, ldict = correct_layout(g, "HW16h") + assert(ldict["x"][0] == "HW16h") + assert(ldict["x_HW"][0] == "HW") + assert(ldict["y"][0] == "__undef__") + + +def test_broadcast_binary(): + x = sym.Variable("x", shape=(1, 16, 512, 512)) + y = sym.Variable("y", shape=(16, 512, 512)) + z = sym.broadcast_add(x, y, name="z") + g, ldict = correct_layout(z, {"x": "NCHW", "y": "CHW"}) + assert(ldict["x"][0] == "NCHW") + assert(ldict["y"][0] == "CHW") + assert(ldict["z"][0] == "NCHW") + # prior to keep the left layout if they do not match. + g, ldict = correct_layout(g, {"x": "NCHW16c", "y": "CHW"}) + assert(ldict["x"][0] == "NCHW16c") + assert(ldict["y"][0] == "CHW") + assert(ldict["y_CHW16c"][0] == "CHW16c") + assert(ldict["z"][0] == "NCHW16c") + # broadcast_add(HCW16c, N16nCH16cW) + g, ldict = correct_layout(z, {"x": "HCW16c", "y": "N16nCH16cW"}) + assert(ldict["x"][0] == "HCW16c") + assert(ldict["y"][0] == "N16nCH16cW") + assert(ldict["x_CH16cW"][0] == "CH16cW") + assert(ldict["z"][0] == "N16nCH16cW") + + +def test_reduce(): + x = sym.Variable("x", shape=(1, 16, 512, 512)) + y = sym.sum(x, name="y", axis=1) + g, ldict = correct_layout(y, "NCHW") + assert(ldict["x"][0] == "NCHW") + assert(ldict["y"][0] == "__undef__") + # second pass will insert layout transform + g, ldict = correct_layout(g, "NCHW16c") + assert(ldict["x"][0] == "NCHW16c") + assert(ldict["x_NCHW"][0] == "NCHW") + assert(ldict["y"][0] == "__undef__") + + +if __name__ == "__main__": + test_dense() + test_matmul() + test_concatenate() + test_expand_dims() + test_split() + test_batchnorm() + test_flatten() + test_softmax() + test_conv2d() + test_conv2d_transpose() + test_max_pool2d() + test_global_pool2d() + test_reshape() + test_transpose() + test_broadcast_to() + test_broadcast_binary() + test_reduce() diff --git a/nnvm/tests/python/unittest/test_graph.py b/nnvm/tests/python/unittest/test_graph.py new file mode 100644 index 000000000000..1ba0a2487cee --- /dev/null +++ b/nnvm/tests/python/unittest/test_graph.py @@ -0,0 +1,160 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import json +import nnvm.symbol as sym +import nnvm.graph as graph +import nnvm.compiler.graph_util as graph_util + +def test_json_pass(): + x = sym.Variable('x') + y = sym.dense(data=x, name='conv', units=30) + g = graph.create(y) + ret = g.apply('SaveJSON') + ret._set_json_attr('json', ret.json_attr('json')) + g2 = ret.apply('LoadJSON') + assert g2.apply('SaveJSON').json_attr('json') == ret.json_attr('json') + json = g.json() + g2 = graph.load_json(json) + assert json == g2.json() + + +def test_json_pass_with_attr(): + x = sym.Variable('x') + y = sym.dense(data=x, name='fc', units=30) + g = graph.create(y) + g._set_json_attr('version', '0.1.0') + ret = g.apply('SaveJSON') + json_str = ret.json_attr('json') + ret._set_json_attr('json', json_str) + g2 = ret.apply('LoadJSON') + assert g2.json_attr('version') == '0.1.0' + + +def test_graph_json_attr(): + x = sym.Variable('x') + y = sym.dense(data=x, name='fc', units=30) + g = graph.create(y) + g._set_json_attr('ilist', [1,2,3], 'list_int') + assert g.json_attr('ilist') == [1,2,3] + +def test_list_args(): + x = sym.Variable('x') + z = sym.Variable('z') + y = sym.dense(data=x, name='fc', units=30) + y = sym.elemwise_add(y, z, name='add1') + +def test_infer_shape(): + x = sym.Variable('x', shape=(2, 4, 2)) + y = sym.elemwise_add(x, x, name='add1') + y = sym.flatten(y, name="flatten") + g = graph.create(y) + g._set_json_attr("shape_attr_key", "shape") + g = g.apply('InferShape') + jgraph = json.loads(g.apply('SaveJSON').json_attr('json')) + jnodes = jgraph['nodes'] + jnode_row_ptr = jgraph['node_row_ptr'] + nindex = {n['name']: i for i, n in enumerate(jnodes)} + assert g.json_attr('shape')[jnode_row_ptr[nindex["flatten"]]] == [2, 8] + assert g.json_attr('shape')[jnode_row_ptr[nindex["add1"]]] == [2, 4, 2] + +def test_infer_shape_known_partial(): + x = sym.Variable('x') + y = sym.elemwise_add(x, x, name='add1') + y = sym.flatten(y, name="flatten1") + g = graph.create(y) + jgraph = json.loads(g.apply('SaveJSON').json_attr('json')) + shape = [[2, 4, 2], [] , []] + g._set_json_attr("shape", shape, 'list_shape') + g = g.apply("InferShape") + jnodes = jgraph['nodes'] + jnode_row_ptr = jgraph['node_row_ptr'] + nindex = {n['name']: i for i, n in enumerate(jnodes)} + assert g.json_attr('shape')[jnode_row_ptr[nindex["flatten1"]]] == [2, 8] + assert g.json_attr('shape')[jnode_row_ptr[nindex["add1"]]] == [2, 4, 2] + +def test_infer_type(): + x = sym.Variable('x', dtype=0) + y = sym.elemwise_add(x, x, name='add1') + y = sym.cast(y, dtype="float64", name="cast1") + g = graph.create(y) + g._set_json_attr("dtype_attr_key", "dtype") + g = g.apply('InferType') + jgraph = json.loads(g.apply('SaveJSON').json_attr('json')) + jnodes = jgraph['nodes'] + jnode_row_ptr = jgraph['node_row_ptr'] + nindex = {n['name']: i for i, n in enumerate(jnodes)} + assert g.json_attr('dtype')[jnode_row_ptr[nindex["cast1"]]] == 1 + assert g.json_attr('dtype')[jnode_row_ptr[nindex["add1"]]] == 0 + +def test_plan_memory(): + x = sym.Variable('x', shape=(4, 2)) + x2 = sym.elemwise_add(x, x, name='addk') + y = sym.flatten(x2, name="reshapek") + y = sym.elemwise_add(y, x2, name="add2") + y = sym.elemwise_add(y, y) + g = graph.create(y) + g._set_json_attr("shape_attr_key", "shape") + g = g.apply(["InferShape", "InferType", "PlanMemory"]) + jgraph = json.loads(g.apply('SaveJSON').json_attr('json')) + jnodes = jgraph['nodes'] + jnode_row_ptr = jgraph['node_row_ptr'] + storage_id = g.json_attr('storage_id') + nindex = {n['name']: i for i, n in enumerate(jnodes)} + assert (storage_id[jnode_row_ptr[nindex["addk"]]] != + storage_id[jnode_row_ptr[nindex["reshapek"]]]) + assert (storage_id[jnode_row_ptr[nindex["add2"]]] == + storage_id[jnode_row_ptr[nindex["reshapek"]]]) + +def test_print_graph_ir(): + x = sym.Variable("x", shape=(1, 1, 10, 20)) + y = sym.conv2d(x + 1, name="y", channels=10, kernel_size=(3,3)) + g = graph.create(y) + g = g.apply("InferShape") + ir1 = g.ir() + ir2 = g.ir(join_entry_attrs=["shape"]) + assert("y_bias" in ir1) + assert("shape=" in ir2) + +def test_gradient(): + x = sym.Variable("x") + y = sym.Variable("y") + z1 = sym.elemwise_add(x, sym.sqrt(y)) + z2 = sym.log(x) + gradient = graph_util.gradients([z1, z2], [x, y]) + assert len(gradient) == 2 + + g1 = sym.Variable("g1") + g2 = sym.Variable("g2") + grad_ys = [g1, g2] + gradient = graph_util.gradients(sym.Group([z1, z2]), + sym.Group([x, y]), grad_ys=grad_ys) + g_graph = graph.create(sym.Group(gradient)).ir() + assert len(gradient) == 2 + assert "g1" in g_graph + assert "g2" in g_graph + +if __name__ == "__main__": + test_print_graph_ir() + test_json_pass_with_attr() + test_graph_json_attr() + test_json_pass() + test_infer_shape() + test_infer_shape_known_partial() + test_infer_type() + test_plan_memory() + test_list_args() + test_gradient() diff --git a/nnvm/tests/python/unittest/test_graph_gradient.py b/nnvm/tests/python/unittest/test_graph_gradient.py new file mode 100644 index 000000000000..4ae6053c946f --- /dev/null +++ b/nnvm/tests/python/unittest/test_graph_gradient.py @@ -0,0 +1,152 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import nnvm.symbol as sym +from nnvm.compiler import graph_util + +def test_cnn_gradients(): + # input data + h = 128 + w = 128 + data_shape = (1000, 3, h, w) + data = sym.Variable('data', shape=data_shape, dtype=0) + + # conv2d + num_channels = 64 + kernel_size = 32 + conv_w_shape = (num_channels, 3, kernel_size, kernel_size) + conv_b_shape = (num_channels,) + conv_w = sym.Variable('conv_w', shape=conv_w_shape) + conv_b = sym.Variable('conv_b', shape=conv_b_shape) + conv1 = sym.conv2d(data=data, weight=conv_w, bias=conv_b, + channels=num_channels, kernel_size=(kernel_size, kernel_size), + name='conv1') + # relu1 + relu1 = sym.relu(data=conv1, name='relu1') + # max pooling + max_pooling1 = sym.max_pool2d(data=relu1, pool_size=(2, 2), name='max_pooling1') + # flatten + flatten1 = sym.flatten(data=max_pooling1) + # shape after flatten + flatten_out_shape = (h - kernel_size) * (w - kernel_size) * num_channels + # dense1 + dense1_hidden_units = 100 + dense1 = sym.dense(data=flatten1, name='dense1', units=dense1_hidden_units) + # relu2 + relu2 = sym.relu(data=dense1, name='relu2') + # dense2 + dense2_hidden_units = 10 + dense2 = sym.dense(data=relu2, name='dense2', units=dense2_hidden_units) + # softmax + mlp = sym.softmax(data=dense2, name='softmax') + # fake non-sparse label + label = sym.full_like(mlp, fill_value=1) + # cross entropy loss + ce_loss = sym.sum( + sym.elemwise_mul(sym.log_softmax(dense2), label), + axis=1, + keepdims=True, + name="ce_loss") + + # input variables: + # print grad_g.symbol.list_input_names() + # >> ['data', 'conv_w', 'conv_b', + # 'dense1_weight', 'dense1_bias', + # 'dense2_weight', 'dense2_bias'] + + # output gradient variables: + # print grad_g.symbol.list_output_names() + # >> ['conv1_grad_data', 'conv1_grad_weight', 'conv1_grad_bias', + # 'dense1_grad_weight', 'dense1_grad_bias', + # 'dense2_grad_weight', 'dense2_grad_bias'] + grad_g = graph_util.get_gradient_graph(ce_loss, ce_loss.list_input_variables()) + + # infer shape + in_shapes, out_shapes = graph_util.infer_shape(grad_g) + + # forward graph shape + assert in_shapes == [list(data_shape), list(conv_w_shape), list(conv_b_shape), + [dense1_hidden_units, flatten_out_shape], [dense1_hidden_units], + [dense2_hidden_units, dense1_hidden_units], [dense2_hidden_units]] + # input grads shape should be equal with input shape + assert in_shapes == out_shapes + + # output grads w.r.t input variables + grads = graph_util.gradients(ce_loss, ce_loss.list_input_variables()) + + # gradients number should be equal with grad_input number + assert len(grads) == len(ce_loss.list_input_variables()) + + # infer type + in_dtypes, out_dtypes = graph_util.infer_dtype(grad_g) + assert out_dtypes == ['float32', 'float32', 'float32', 'float32', 'float32', 'float32', 'float32'] + +def test_multi_loss_graph_gradients(): + # input data + shape1 = (1000, 100) + data1 = sym.Variable('data1', shape=(1000, 100), dtype=0) + + # fake non-sparse label + label = sym.full(fill_value=3) + + # square loss + sub1 = sym.elemwise_sub(data1, label, name="sub1") + square_loss = sym.sum(data=sub1**2, axis=1, name="square_loss") + + # fake loss1 + shape2 = (1000, ) + data2 = sym.Variable('data2', shape=shape2, dtype=0) + loss1 = sym.sqrt(data2, name="loss1") + + # fake loss2 + loss2 = sym.relu(data1, name='loss2') + + # block loss1 + total_loss = sym.elemwise_sum( + sym.block_grad(loss1), + square_loss, + num_args=2, + name="total_loss") + + # grad_g.symbol.list_output_names() + # >> ['loss1_grad_0_output', 'grad_sum_output'] + grad_g = graph_util.get_gradient_graph([total_loss, loss2], total_loss.list_input_variables()) + # infer shape + in_shapes, out_shapes = graph_util.infer_shape(grad_g) + assert out_shapes == [list(shape2), list(shape1)] + + # grad_data1 is elemwise_sum of grad_loss2, grad_square_loss + grad_data1 = grad_g.symbol[1] + assert grad_data1.list_attr()['num_args'] == '2' + + # block grad should return zero grad + grad_data2 = grad_g.symbol[0] + assert 'zeros_like' in grad_g.ir() + + # test reverse infer shape for label + assert grad_g.apply('InferShape').json_attr('shape_num_unknown_nodes') == 0 + + # infer type + in_dtypes, out_dtypes = graph_util.infer_dtype(grad_g) + assert out_dtypes == ['float32', 'float32'] + + # test reverse infer type for label + assert grad_g.apply('InferType').json_attr('dtype_num_unknown_nodes') == 0 + + +if __name__ == "__main__": + test_cnn_gradients() + test_multi_loss_graph_gradients() diff --git a/nnvm/tests/python/unittest/test_infer_shape.py b/nnvm/tests/python/unittest/test_infer_shape.py new file mode 100644 index 000000000000..c394fab562f2 --- /dev/null +++ b/nnvm/tests/python/unittest/test_infer_shape.py @@ -0,0 +1,415 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import json +import nnvm.symbol as sym +import nnvm.graph as graph + +def infer_shape(sym): + g = graph.create(sym) + g._set_json_attr("shape_attr_key", "shape") + g = g.apply("InferShape") + sdict = {} + vshape = g.json_attr("shape") + entry_ptr = g.index.entry_ptr + for i, n in enumerate(g.index.nodes): + begin, end = entry_ptr[i], entry_ptr[i + 1] + sdict[n["name"]] = vshape[begin:end] + return sdict + +# Level 1 +def test_dense(): + x = sym.Variable("x", shape=(10, 20)) + y = sym.dense(x, units=30, name="fc") + sdict = infer_shape(y) + assert(sdict["fc"][0] == [10, 30]) + assert(sdict["fc_bias"][0] == [30]) + + +def test_matmul(): + a = sym.Variable('a', shape=(10, 20)) + b = sym.Variable('b', shape=(20, 30)) + c = sym.matmul(a, b, name="matmul") + sdict = infer_shape(c) + assert(sdict["matmul"][0] == [10, 30]) + a = sym.Variable('a', shape=(20, 10)) + c = sym.matmul(a, b, name="matmul", transpose_a=True) + sdict = infer_shape(c) + assert(sdict["matmul"][0] == [10, 30]) + b = sym.Variable('b', shape=(30, 20)) + c = sym.matmul(a, b, name="matmul", transpose_a=True, transpose_b=True) + sdict = infer_shape(c) + assert(sdict["matmul"][0] == [10, 30]) + a = sym.Variable('a', shape=(10, 20)) + c = sym.matmul(a, b, name="matmul", transpose_b=True) + sdict = infer_shape(c) + assert(sdict["matmul"][0] == [10, 30]) + a = sym.Variable('a', shape=(10, 20, 30)) + b = sym.Variable('b', shape=(30, 40, 50)) + c = sym.matmul(a, b, name="matmul") + sdict = infer_shape(c) + assert(sdict["matmul"][0] == [10, 20, 40, 50]) + a = sym.Variable('a', shape=(30, 20, 10)) + b = sym.Variable('b', shape=(50, 40, 30)) + c = sym.matmul(a, b, name="matmul", transpose_a=True, transpose_b=True) + sdict = infer_shape(c) + assert(sdict["matmul"][0] == [10, 20, 40, 50]) + + +def test_concatenate(): + x1 = sym.Variable("x", shape=(10, 20)) + x2 = sym.Variable("y", shape=(10, 30)) + z = sym.concatenate(x1, x2, name="concat") + sdict = infer_shape(z) + assert(sdict["concat"][0] == [10, 50]) + z = sym.concatenate(x1, x1, axis=0, name="concat") + sdict = infer_shape(z) + assert(sdict["concat"][0] == [20, 20]) + + +def test_expand_dims(): + x = sym.Variable("x", shape=(10, 20)) + y = sym.expand_dims(x, axis=1, name="y") + sdict = infer_shape(y) + assert(sdict["y"][0] == [10, 1, 20]) + y = sym.expand_dims(x, axis=-1, name="y", num_newaxis=2) + sdict = infer_shape(y) + assert(sdict["y"][0] == [10, 20, 1, 1]) + + +def test_split(): + x1 = sym.Variable("x", shape=(10, 20)) + z = sym.split(x1, indices_or_sections=[11], name="y") + sdict = infer_shape(z) + assert(sdict["y"][0] == [10, 11]) + assert(sdict["y"][1] == [10, 9]) + z = sym.split(x1, indices_or_sections=2, name="y") + sdict = infer_shape(z) + assert(sdict["y"][0] == [10, 10]) + assert(sdict["y"][1] == [10, 10]) + z = sym.split(x1, indices_or_sections=[6], axis=-1, name="y") + sdict = infer_shape(z) + assert(sdict["y"][0] == [10, 6]) + assert(sdict["y"][1] == [10, 14]) + + +def test_batchnorm(): + x = sym.Variable("x", shape=(10, 20)) + y = sym.batch_norm(1 / x, name="bn") + sdict = infer_shape(y) + assert(sdict["bn_gamma"][0] == [20]) + + x = sym.Variable("x", shape=(10, 20, 30, 40)) + y = sym.batch_norm(data=x, axis=0, epsilon=2e-5, name='bn') + sdict = infer_shape(y) + assert(sdict['bn_moving_var'][0] == [10]) + + y = sym.batch_norm(data=x, axis=1, epsilon=2e-5, name='bn') + sdict = infer_shape(y) + assert(sdict['bn_gamma'][0] == [20]) + + y = sym.batch_norm(data=x, axis=2, epsilon=2e-5, name='bn') + sdict = infer_shape(y) + assert(sdict['bn_beta'][0] == [30]) + + y = sym.batch_norm(data=x, axis=3, epsilon=2e-5, name='bn') + sdict = infer_shape(y) + assert(sdict['bn_moving_mean'][0] == [40]) + +def test_flatten(): + x = sym.Variable("x", shape=(10, 20, 10)) + y = sym.flatten(x) * 2 + y = sym.exp(y, name="y") + sdict = infer_shape(y) + assert(sdict["y"][0] == [10, 200]) + +def test_squeeze(): + x = sym.Variable("x", shape=(1, 1, 1, 10)) + y = sym.squeeze(x, axis=(1,2), name='squeeze') + sdict = infer_shape(y) + assert(sdict['squeeze'][0] == [1, 10]) + + x = sym.Variable("x", shape=(1, 3, 1)) + y = sym.squeeze(x, name='squeeze') + sdict = infer_shape(y) + assert(sdict['squeeze'][0] == [3]) + + y = sym.squeeze(x, axis=(0), name='squeeze') + sdict = infer_shape(y) + assert(sdict['squeeze'][0] == [3, 1]) + + y = sym.squeeze(x, axis=(0,2), name='squeeze') + sdict = infer_shape(y) + assert(sdict['squeeze'][0] == [3]) + +# Level 2 +def test_conv2d(): + def check(in_shape, out_shape, **kwargs): + x = sym.Variable("x", shape=in_shape) + y = sym.conv2d(x, name="y", **kwargs) + sdict = infer_shape(y) + assert(tuple(sdict["y"][0]) == tuple(out_shape)) + + check((4, 10, 10, 12), + (4, 12, 10, 12), + channels=12, + kernel_size=(3,3), + padding=(1,1)) + check((4, 10, 12, 4), + (4, 8, 8, 5), + channels=5, + kernel_size=(3, 5), + layout="NHWC") + check((4, 10, 12, 4), + (4, 6, 8, 5), + channels=5, + dilation=(2, 2), + kernel_size=(3, 3), + layout="NHWC") + check((4, 10, 12, 4), + (4, 5, 6, 5), + channels=5, + strides=(2, 2), + kernel_size=(3, 3), + padding=(1, 1), + layout="NHWC") + + +def test_conv2d_packed(): + def check(in_shape, + out_shape, + kernel_shape, + **kwargs): + x = sym.Variable("x", shape=in_shape) + y = sym.conv2d(x, name="y", **kwargs) + sdict = infer_shape(y) + assert(tuple(sdict["y"][0]) == tuple(out_shape)) + assert(tuple(sdict["y_weight"][0]) == tuple(kernel_shape)) + + check((4, 10, 10, 12, 1, 8), + (4, 10, 10, 2, 1, 8), + (2, 12, 3, 3, 8, 8), + channels=8 * 2, + kernel_size=(3,3), + padding=(1,1), + layout="NHWC1n8c", + kernel_layout="OIHW8o8i") + + +def test_conv2d_transpose(): + def check(in_shape, out_shape, **kwargs): + x = sym.Variable("x", shape=in_shape) + y = sym.conv2d_transpose(x, name="y", **kwargs) + sdict = infer_shape(y) + assert(tuple(sdict["y"][0]) == tuple(out_shape)) + + check((4, 10, 10, 12), + (4, 15, 10, 12), + channels=15, + kernel_size=(3,3), + padding=(1,1)) + check((4, 10, 10, 12), + (4, 15, 10, 14), + channels=15, + kernel_size=(3, 5), + padding=(1, 1)) + check((4, 10, 10, 12), + (4, 15, 11, 15), + channels=15, + kernel_size=(3, 5), + padding=(1, 1), + output_padding=(1, 1)) + check((4, 10, 10, 12), + (4, 15, 15, 11), + channels=11, + kernel_size=(5, 5), + output_padding=(1, 1), + layout="NHWC") + + +def test_max_pool2d(): + def check(in_shape, out_shape, **kwargs): + x = sym.Variable("x", shape=in_shape) + y = sym.max_pool2d(x, name="y", **kwargs) + sdict = infer_shape(y) + assert(tuple(sdict["y"][0]) == tuple(out_shape)) + + check((4, 10, 12, 12), + (4, 10, 12, 12), + pool_size=(3,3), + padding=(1,1)) + check((4, 10, 12, 12), + (4, 10, 6, 6), + pool_size=(3, 3), + padding=(1, 1), + strides=(2, 2)) + check((4, 10, 12, 12), + (4, 10, 7, 7), + pool_size=(3, 3), + padding=(1, 1), + strides=(2, 2), + ceil_mode=True) + check((4, 12, 14, 10), + (4, 6, 7, 10), + pool_size=(3, 3), + padding=(1, 1), + strides=(2, 2), + layout="NHWC") + + +def test_global_pool2d(): + def check(in_shape, out_shape, **kwargs): + x = sym.Variable("x", shape=in_shape) + y = sym.global_max_pool2d(x, name="y", **kwargs) + sdict = infer_shape(y) + assert(tuple(sdict["y"][0]) == tuple(out_shape)) + + check((4, 10, 12, 12), + (4, 10, 1, 1)) + check((4, 10, 12, 12), + (4, 1, 1, 12), + layout="NHWC") + + +# Level 3 +def test_reshape(): + def check(in_shape, tshape, out_shape): + x = sym.Variable("x", shape=in_shape) + y = sym.reshape(x, shape=tshape, name="y") + sdict = infer_shape(y) + assert(tuple(sdict["y"][0]) == tuple(out_shape)) + + check((4,), (2, 2), (2, 2)) + check((2, 3, 4), (4, 0, 2), (4, 3, 2)) + check((2, 3, 4), (2, 0, 0), (2, 3, 4)) + check((2, 3, 4), (6, 1, -1), (6, 1, 4)) + check((2, 3, 4), (3, -1, 8), (3, 1, 8)) + check((2, 3, 4), (-1,), (24,)) + check((2, 3, 4), (-2,), (2, 3, 4)) + check((2, 3, 4), (2, -2), (2, 3, 4)) + check((2, 3, 4), (-2, 1, 1), (2, 3, 4, 1, 1)) + check((2, 3, 4), (-3, 4), (6, 4)) + check((2, 3, 4, 5), (-3, -3), (6, 20)) + check((2, 3, 4), (0, -3), (2, 12)) + check((2, 3, 4), (-3, -2), (6, 4)) + check((2, 3, 4), (-4, 1, 2, -2), (1, 2, 3, 4)) + check((2, 3, 4), (2, -4, -1, 3, -2), (2, 1, 3, 4)) + + +def test_prelu(): + def check(in_shape, axis, out_shape): + x = sym.Variable("x", shape=in_shape) + w = sym.Variable("w") + y = sym.prelu(x, w, axis=axis, name="y") + sdict = infer_shape(y) + assert(tuple(sdict["y"][0]) == tuple(out_shape)) + check((1, 3, 2, 2), 1, (1, 3, 2, 2)) + check((1, 2, 2, 3), 3, (1, 2, 2, 3)) + + +# Level 4 +def test_transpose(): + def check(in_shape, out_shape, **kwargs): + x = sym.Variable("x", shape=in_shape) + y = sym.transpose(x, name="y", **kwargs) + sdict = infer_shape(y) + assert(tuple(sdict["y"][0]) == tuple(out_shape)) + + check((4, 1), (1, 4)) + check((0, 1, 2, 3), (1, 2, 3, 0), axes=(1, 2, 3, 0)) + + +def test_broadcast_to(): + def check(in_shape, tshape, out_shape): + x = sym.Variable("x", shape=in_shape) + y = sym.broadcast_to(x, shape=tshape, name="y") + sdict = infer_shape(y) + assert(tuple(sdict["y"][0]) == tuple(out_shape)) + + check((4, 1), (0, 4), (4, 4)) + check((4, 1, 5), (0, 4, 5), (4, 4, 5)) + + +def test_broadcast_binary(): + def check(lhs_shape, rhs_shape, out_shape): + x = sym.Variable("x", shape=lhs_shape) + y = sym.Variable("y", shape=rhs_shape) + z = sym.broadcast_add(x, y, name="y") + sdict = infer_shape(z) + assert(tuple(sdict["y"][0]) == tuple(out_shape)) + + check((4, 1), (4), (4, 4)) + check((5, 1, 1), (1, 4, 4), (5, 4, 4)) + check((6, 1, 4), (5, 4), (6, 5, 4)) + + +def test_reduce(): + def check(in_shape, out_shape, **kwargs): + x = sym.Variable("x", shape=in_shape) + y = sym.sum(x, name="y", **kwargs) + sdict = infer_shape(y) + assert(tuple(sdict["y"][0]) == tuple(out_shape)) + + check((4, 5), (4,), axis=1) + check((4, 5), (4, 1), axis=1, keepdims=True) + check((4, 5), (1, 5), axis=0, keepdims=True) + check((4, 5), (1, 1), axis=(), keepdims=True) + check((4, 5), (1,), axis=()) + check((4, 5, 10), (5,), axis=(0, 2)) + check((4, 5, 10), (1, 5, 1), axis=(0, 2), keepdims=True) + + +def test_gather_nd(): + def check(data_shape, indices_shape, out_shape): + x = sym.Variable("x", shape=data_shape) + indices = sym.Variable("indices", shape=indices_shape) + y = sym.gather_nd(x, indices, name="y") + sdict = infer_shape(y) + assert(tuple(sdict["y"][0]) == tuple(out_shape)) + + check((4,), (1, 1), (1,)) + check((4,), (1, 3), (3,)) + check((2, 3), (1, 1), (1, 3)) + check((2, 3), (2, 1), (1,)) + check((2, 3), (2, 5, 6), (5, 6)) + check((2, 3, 4), (1, 1), (1, 3, 4)) + check((2, 3, 4), (2, 1), (1, 4)) + check((2, 3, 4), (2, 5), (5, 4)) + check((2, 3, 4), (2, 5, 6), (5, 6, 4)) + check((2, 3, 4, 5), (2, 6, 7), (6, 7, 4, 5)) + + +if __name__ == "__main__": + test_conv2d_packed() + test_expand_dims() + test_dense() + test_matmul() + test_concatenate() + test_split() + test_batchnorm() + test_flatten() + test_conv2d() + test_conv2d_transpose() + test_max_pool2d() + test_global_pool2d() + test_reshape() + test_broadcast_to() + test_broadcast_binary() + test_reduce() + test_transpose() + test_prelu() + test_squeeze() + test_gather_nd() diff --git a/nnvm/tests/python/unittest/test_pass_saveload_json.py b/nnvm/tests/python/unittest/test_pass_saveload_json.py new file mode 100644 index 000000000000..a8b067c8fe24 --- /dev/null +++ b/nnvm/tests/python/unittest/test_pass_saveload_json.py @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import nnvm +from tvm.contrib import util + + +def test_variable_node_parsed(): + sym = nnvm.sym.Variable('data') + tempdir = util.tempdir() + json_filename = 'test_nnvm_symbol.json' + with open(tempdir.relpath(json_filename), 'w') as fo: + fo.write(nnvm.graph.create(sym).json()) + sym_str = open(tempdir.relpath(json_filename), 'r').read() + sym = nnvm.graph.load_json(sym_str).symbol() + sym = nnvm.sym.relu(sym) + + +if __name__ == '__main__': + test_variable_node_parsed() diff --git a/nnvm/tests/python/unittest/test_symbol.py b/nnvm/tests/python/unittest/test_symbol.py new file mode 100644 index 000000000000..a54dec170aae --- /dev/null +++ b/nnvm/tests/python/unittest/test_symbol.py @@ -0,0 +1,77 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import nnvm.symbol as sym +from nnvm import NNVMError + +def test_dense(): + x = sym.Variable('x') + y = sym.dense(x, units=30, name="fc") + assert y.list_input_names() == ["x", "fc_weight", "fc_bias"] + +def test_batch_norm(): + x = sym.Variable('x') + y = sym.dense(x, units=30, name="fc") + z = sym.batch_norm(x, name='bn') + assert z.list_input_names('aux_state') == ['bn_moving_mean', 'bn_moving_var'] + assert z.list_input_names('read_only') == ['x', 'bn_gamma', 'bn_beta'] + +def test_compose(): + x = sym.Variable('x') + z = sym.Variable('z') + y = sym.exp(sym.elemwise_add(x, x, name='add', gpu=2), + name='exp', gpu=1, attr={"kk": "1"}) + + assert y.list_input_names() == ['x'] + assert y.list_output_names() == ["exp_output"] + assert y.list_attr()['gpu'] == '1' + z = y.get_internals() + assert z['add_output'].list_output_names() == ['add_output'] + assert y.list_attr(recursive=True)['add$gpu'] == '2' + +def test_default_input(): + x = sym.Variable('x') + y = sym.dense(data=x, units=30, name='fc', use_bias=False) + assert y.list_input_names() == ['x', 'fc_weight'] + tname = [z.list_output_names()[0] for z in y.list_input_variables()] + assert tname == y.list_input_names() + try: + z = sym.elemwise_add(x) + assert False + except NNVMError: + pass + +def test_copy(): + x = sym.Variable('x') + z = sym.Variable('z') + y = sym.exp(sym.elemwise_add(x, x, name='add', gpu=2), + name='exp', gpu=1, attr={"kk": "1"}) + assert y.__copy__().debug_str() == y.debug_str() + + +def test_op_name(): + x = sym.Variable('x') + y = sym.exp(x) + op_name = y.attr("op_name") + op_func = sym.__dict__[op_name] + z = op_func(x) + +if __name__ == "__main__": + test_op_name() + test_copy() + test_default_input() + test_compose() + test_batch_norm() diff --git a/nnvm/tests/python/unittest/test_top_level1.py b/nnvm/tests/python/unittest/test_top_level1.py new file mode 100644 index 000000000000..2d646dc16ae4 --- /dev/null +++ b/nnvm/tests/python/unittest/test_top_level1.py @@ -0,0 +1,66 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import nnvm.symbol as sym +import nnvm.graph as graph + +def test_dense(): + x = sym.Variable('x') + x1 = sym.dense(x, units=3, name="dense") + x2 = sym.flatten(x1) + x3 = sym.softmax(x2) + assert x3.list_input_names() == ['x', 'dense_weight', 'dense_bias'] + + +def test_concatenate_split(): + x = sym.Variable('x') + y = sym.Variable('y') + y = sym.concatenate(x, y) + assert y.list_input_names() == ['x', 'y'] + z = sym.split(y, indices_or_sections=10) + assert len(z.list_output_names()) == 10 + z = sym.split(y, indices_or_sections=[10, 20]) + assert len(z.list_output_names()) == 3 + +def test_expand_dims(): + x = sym.Variable('x') + y = sym.expand_dims(x, axis=1, num_newaxis=2) + assert y.list_input_names() == ['x'] + + +def test_unary(): + x = sym.Variable('x') + x = sym.exp(x) + x = sym.log(x) + x = sym.sigmoid(x) + x = sym.tanh(x) + x = sym.relu(x) + assert x.list_input_names() == ['x'] + + +def test_batchnorm(): + x = sym.Variable('x') + x = sym.batch_norm(x, name="bn") + assert x.list_input_names() == [ + "x", "bn_gamma", "bn_beta", "bn_moving_mean", "bn_moving_var"] + + +if __name__ == "__main__": + test_concatenate_split() + test_expand_dims() + test_dense() + test_unary() + test_batchnorm() diff --git a/nnvm/tests/python/unittest/test_top_level2.py b/nnvm/tests/python/unittest/test_top_level2.py new file mode 100644 index 000000000000..b327356b5cc0 --- /dev/null +++ b/nnvm/tests/python/unittest/test_top_level2.py @@ -0,0 +1,35 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import nnvm.symbol as sym + +def test_conv2d(): + x = sym.Variable('x') + y = sym.conv2d(x, channels=3, kernel_size=(3, 3), + name="y", use_bias=False) + assert y.list_input_names() == ["x", "y_weight"] + + +def test_max_pool2d(): + x = sym.Variable('x') + y = sym.max_pool2d(x, pool_size=(3, 3), name="y") + y = sym.global_max_pool2d(y) + assert y.list_input_names() == ["x"] + + +if __name__ == "__main__": + test_conv2d() + test_max_pool2d() diff --git a/nnvm/tests/python/unittest/test_top_level3.py b/nnvm/tests/python/unittest/test_top_level3.py new file mode 100644 index 000000000000..f19e1fd4376e --- /dev/null +++ b/nnvm/tests/python/unittest/test_top_level3.py @@ -0,0 +1,46 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import nnvm.symbol as sym + +def test_reshape(): + x = sym.Variable("x") + y = sym.reshape(x, shape=(10, 20), name="y") + assert(y.list_input_names() == ["x"]) + + +def test_scalar_op(): + x = sym.Variable("x") + y = (1 / (x * 2) - 1) ** 2 + assert(y.list_input_names() == ["x"]) + +def test_leaky_relu(): + x = sym.Variable("x") + y = sym.leaky_relu(x, alpha=0.1) + assert(y.list_input_names() == ["x"]) + +def test_prelu(): + x = sym.Variable("x") + w = sym.Variable("w") + y = sym.prelu(x, w) + assert(y.list_input_names()[0] == 'x') + assert(y.list_input_names()[1] == 'w') + +if __name__ == "__main__": + test_scalar_op() + test_reshape() + test_leaky_relu() + test_prelu() diff --git a/nnvm/tests/python/unittest/test_top_level4.py b/nnvm/tests/python/unittest/test_top_level4.py new file mode 100644 index 000000000000..ad0829b59283 --- /dev/null +++ b/nnvm/tests/python/unittest/test_top_level4.py @@ -0,0 +1,36 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import nnvm.symbol as sym + +def test_binary_broadcast(): + x = sym.Variable('x') + y = sym.Variable('y') + z = x + y + z = x * y + z = x - y + z = x / y + + +def test_broadcast_to(): + x = sym.Variable('x') + y = sym.broadcast_to(x, shape=(3, 3)) + assert y.list_input_names() == ["x"] + + +if __name__ == "__main__": + test_binary_broadcast() + test_broadcast_to() diff --git a/nnvm/tutorials/.gitignore b/nnvm/tutorials/.gitignore new file mode 100644 index 000000000000..5f8a03c46b89 --- /dev/null +++ b/nnvm/tutorials/.gitignore @@ -0,0 +1,11 @@ +*.pb +*.mlmodel +*.ttf +*.txt +*synset*txt +*.cfg +ssd_model +*.names +*.jpg +*.pbtxt +*.weights diff --git a/nnvm/tutorials/README.txt b/nnvm/tutorials/README.txt new file mode 100644 index 000000000000..334409cd8a28 --- /dev/null +++ b/nnvm/tutorials/README.txt @@ -0,0 +1,4 @@ +.. _tutorial-nnvm: + +NNVM Compiler Tutorials +----------------------- diff --git a/nnvm/tutorials/deploy_model_on_mali_gpu.py b/nnvm/tutorials/deploy_model_on_mali_gpu.py new file mode 100644 index 000000000000..d90b0955048c --- /dev/null +++ b/nnvm/tutorials/deploy_model_on_mali_gpu.py @@ -0,0 +1,229 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +.. _tutorial-deploy-model-on-mali-gpu: + +Deploy the Pretrained Model on ARM Mali GPU +=========================================== +**Author**: `Lianmin Zheng `_, `Ziheng Jiang `_ + +This is an example of using NNVM to compile a ResNet model and +deploy it on Firefly-RK3399 with ARM Mali GPU. We will use the +Mali-T860 MP4 GPU on this board to accelerate the inference. +""" + +import tvm +import nnvm.compiler +import nnvm.testing +from tvm import rpc +from tvm.contrib import util, graph_runtime as runtime +from tvm.contrib.download import download_testdata + +###################################################################### +# Build TVM Runtime on Device +# --------------------------- +# +# The first step is to build tvm runtime on the remote device. +# +# .. note:: +# +# All instructions in both this section and next section should be +# executed on the target device, e.g. Rk3399. And we assume it +# has Linux running. +# +# Since we do compilation on local machine, the remote device is only used +# for running the generated code. We only need to build tvm runtime on +# the remote device. Make sure you have opencl driver in your board. +# You can refer to `tutorial `_ +# to setup OS and opencl driver for rk3399. +# +# .. code-block:: bash +# +# git clone --recursive https://github.com/apache/incubator-tvm tvm +# cd tvm +# cp cmake/config.cmake . +# sed -i "s/USE_OPENCL OFF/USE_OPENCL ON/" config.cmake +# make runtime -j4 +# +# After building runtime successfully, we need to set environment varibles +# in :code:`~/.bashrc` file. We can edit :code:`~/.bashrc` +# using :code:`vi ~/.bashrc` and add the line below (Assuming your TVM +# directory is in :code:`~/tvm`): +# +# .. code-block:: bash +# +# export PYTHONPATH=$PYTHONPATH:~/tvm/python +# +# To update the environment variables, execute :code:`source ~/.bashrc`. + +###################################################################### +# Set Up RPC Server on Device +# --------------------------- +# To start an RPC server, run the following command on your remote device +# (Which is RK3399 in our example). +# +# .. code-block:: bash +# +# python -m tvm.exec.rpc_server --host 0.0.0.0 --port=9090 +# +# If you see the line below, it means the RPC server started +# successfully on your device. +# +# .. code-block:: bash +# +# INFO:root:RPCServer: bind to 0.0.0.0:9090 +# + +###################################################################### +# Prepare the Pre-trained Model +# ----------------------------- +# Back to the host machine, which should have a full TVM installed (with LLVM). +# +# We will use pre-trained model from +# `MXNet Gluon model zoo `_. +# You can found more details about this part at tutorial :ref:`tutorial-from-mxnet`. + +from mxnet.gluon.model_zoo.vision import get_model +from PIL import Image +import numpy as np + +# only one line to get the model +block = get_model('resnet18_v1', pretrained=True) + +###################################################################### +# In order to test our model, here we download an image of cat and +# transform its format. +img_name = 'cat.png' +img_path = download_testdata('https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true', + img_name, module='data') +image = Image.open(img_path).resize((224, 224)) + +def transform_image(image): + image = np.array(image) - np.array([123., 117., 104.]) + image /= np.array([58.395, 57.12, 57.375]) + image = image.transpose((2, 0, 1)) + image = image[np.newaxis, :] + return image + +x = transform_image(image) + +###################################################################### +# synset is used to transform the label from number of ImageNet class to +# the word human can understand. +synset_url = ''.join(['https://gist.githubusercontent.com/zhreshold/', + '4d0b62f3d01426887599d4f7ede23ee5/raw/', + '596b27d23537e5a1b5751d2b0481ef172f58b539/', + 'imagenet1000_clsid_to_human.txt']) + +synset_name = 'imagenet1000_clsid_to_human.txt' +synset_path = download_testdata(synset_url, synset_name, module='data') +with open(synset_path) as f: + synset = eval(f.read()) + +###################################################################### +# Now we would like to port the Gluon model to a portable computational graph. +# It's as easy as several lines. + +# We support MXNet static graph(symbol) and HybridBlock in mxnet.gluon +net, params = nnvm.frontend.from_mxnet(block) +# we want a probability so add a softmax operator +net = nnvm.sym.softmax(net) + +###################################################################### +# Here are some basic data workload configurations. +batch_size = 1 +num_classes = 1000 +image_shape = (3, 224, 224) +data_shape = (batch_size,) + image_shape + +###################################################################### +# Compile The Graph +# ----------------- +# To compile the graph, we call the :any:`nnvm.compiler.build` function +# with the graph configuration and parameters. As we use OpenCL for +# GPU computing, the tvm will generate both OpenCL kernel code and ARM +# CPU host code. The CPU host code is used for calling OpenCL kernels. +# In order to generate correct CPU code, we need to specify the target +# triplet for host ARM device by setting the parameter :code:`target_host`. + +###################################################################### +# If we run the example on our x86 server for demonstration, we can simply +# set it as :code:`llvm`. If running it on the RK3399, we need to +# specify its instruction set. Set :code:`local_demo` to False if you +# want to run this tutorial with a real device. + +local_demo = True + +if local_demo: + target_host = "llvm" + target = "llvm" +else: + # Here is the setting for my rk3399 board + # If you don't use rk3399, you can query your target triple by + # execute `gcc -v` on your board. + target_host = "llvm -target=aarch64-linux-gnu" + + # set target as `tvm.target.mali` instead of 'opencl' to enable + # optimization for mali + target = tvm.target.mali() + +with nnvm.compiler.build_config(opt_level=3): + graph, lib, params = nnvm.compiler.build(net, target=target, + shape={"data": data_shape}, params=params, target_host=target_host) + +# After `nnvm.compiler.build`, you will get three return values: graph, +# library and the new parameter, since we do some optimization that will +# change the parameters but keep the result of model as the same. + +# Save the library at local temporary directory. +tmp = util.tempdir() +lib_fname = tmp.relpath('net.tar') +lib.export_library(lib_fname) + +###################################################################### +# Deploy the Model Remotely by RPC +# -------------------------------- +# With RPC, you can deploy the model remotely from your host machine +# to the remote device. + +# obtain an RPC session from remote device. +if local_demo: + remote = rpc.LocalSession() +else: + # The following is my environment, change this to the IP address of your target device + host = '10.77.1.145' + port = 9090 + remote = rpc.connect(host, port) + +# upload the library to remote device and load it +remote.upload(lib_fname) +rlib = remote.load_module('net.tar') + +# create the remote runtime module +ctx = remote.cl(0) if not local_demo else remote.cpu(0) +module = runtime.create(graph, rlib, ctx) +# set parameter (upload params to the remote device. This may take a while) +module.set_input(**params) +# set input data +module.set_input('data', tvm.nd.array(x.astype('float32'))) +# run +module.run() +# get output +out = module.get_output(0) +# get top1 result +top1 = np.argmax(out.asnumpy()) +print('TVM prediction top-1: {}'.format(synset[top1])) diff --git a/nnvm/tutorials/deploy_model_on_rasp.py b/nnvm/tutorials/deploy_model_on_rasp.py new file mode 100644 index 000000000000..576b517f3aa5 --- /dev/null +++ b/nnvm/tutorials/deploy_model_on_rasp.py @@ -0,0 +1,220 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +.. _tutorial-deploy-model-on-rasp: + +Deploy the Pretrained Model on Raspberry Pi +=========================================== +**Author**: `Ziheng Jiang `_ + +This is an example of using NNVM to compile a ResNet model and deploy +it on Raspberry Pi. +""" + +import tvm +import nnvm.compiler +import nnvm.testing +from tvm import rpc +from tvm.contrib import util, graph_runtime as runtime +from tvm.contrib.download import download_testdata + +###################################################################### +# .. _build-tvm-runtime-on-device: +# +# Build TVM Runtime on Device +# --------------------------- +# +# The first step is to build tvm runtime on the remote device. +# +# .. note:: +# +# All instructions in both this section and next section should be +# executed on the target device, e.g. Raspberry Pi. And we assume it +# has Linux running. +# +# Since we do compilation on local machine, the remote device is only used +# for running the generated code. We only need to build tvm runtime on +# the remote device. +# +# .. code-block:: bash +# +# git clone --recursive https://github.com/apache/incubator-tvm tvm +# cd tvm +# make runtime -j4 +# +# After building runtime successfully, we need to set environment varibles +# in :code:`~/.bashrc` file. We can edit :code:`~/.bashrc` +# using :code:`vi ~/.bashrc` and add the line below (Assuming your TVM +# directory is in :code:`~/tvm`): +# +# .. code-block:: bash +# +# export PYTHONPATH=$PYTHONPATH:~/tvm/python +# +# To update the environment variables, execute :code:`source ~/.bashrc`. + +###################################################################### +# Set Up RPC Server on Device +# --------------------------- +# To start an RPC server, run the following command on your remote device +# (Which is Raspberry Pi in our example). +# +# .. code-block:: bash +# +# python -m tvm.exec.rpc_server --host 0.0.0.0 --port=9090 +# +# If you see the line below, it means the RPC server started +# successfully on your device. +# +# .. code-block:: bash +# +# INFO:root:RPCServer: bind to 0.0.0.0:9090 +# + +###################################################################### +# Prepare the Pre-trained Model +# ----------------------------- +# Back to the host machine, which should have a full TVM installed (with LLVM). +# +# We will use pre-trained model from +# `MXNet Gluon model zoo `_. +# You can found more details about this part at tutorial :ref:`tutorial-from-mxnet`. + +from mxnet.gluon.model_zoo.vision import get_model +from PIL import Image +import numpy as np + +# one line to get the model +block = get_model('resnet18_v1', pretrained=True) + +###################################################################### +# In order to test our model, here we download an image of cat and +# transform its format. +img_name = 'cat.png' +img_path = download_testdata('https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true', + img_name, module='data') +image = Image.open(img_path).resize((224, 224)) + +def transform_image(image): + image = np.array(image) - np.array([123., 117., 104.]) + image /= np.array([58.395, 57.12, 57.375]) + image = image.transpose((2, 0, 1)) + image = image[np.newaxis, :] + return image + +x = transform_image(image) + +###################################################################### +# synset is used to transform the label from number of ImageNet class to +# the word human can understand. +synset_url = ''.join(['https://gist.githubusercontent.com/zhreshold/', + '4d0b62f3d01426887599d4f7ede23ee5/raw/', + '596b27d23537e5a1b5751d2b0481ef172f58b539/', + 'imagenet1000_clsid_to_human.txt']) +synset_name = 'imagenet1000_clsid_to_human.txt' +synset_path = download_testdata(synset_url, synset_name, module='data') +with open(synset_path) as f: + synset = eval(f.read()) + +###################################################################### +# Now we would like to port the Gluon model to a portable computational graph. +# It's as easy as several lines. + +# We support MXNet static graph(symbol) and HybridBlock in mxnet.gluon +net, params = nnvm.frontend.from_mxnet(block) +# we want a probability so add a softmax operator +net = nnvm.sym.softmax(net) + +###################################################################### +# Here are some basic data workload configurations. +batch_size = 1 +num_classes = 1000 +image_shape = (3, 224, 224) +data_shape = (batch_size,) + image_shape + +###################################################################### +# Compile The Graph +# ----------------- +# To compile the graph, we call the :any:`nnvm.compiler.build` function +# with the graph configuration and parameters. However, You cannot to +# deploy a x86 program on a device with ARM instruction set. It means +# NNVM also needs to know the compilation option of target device, +# apart from arguments :code:`net` and :code:`params` to specify the +# deep learning workload. Actually, the option matters, different option +# will lead to very different performance. + +###################################################################### +# If we run the example on our x86 server for demonstration, we can simply +# set it as :code:`llvm`. If running it on the Raspberry Pi, we need to +# specify its instruction set. Set :code:`local_demo` to False if you want +# to run this tutorial with a real device. + +local_demo = True + +if local_demo: + target = tvm.target.create('llvm') +else: + target = tvm.target.arm_cpu('rasp3b') + # The above line is a simple form of + # target = tvm.target.create('llvm -device=arm_cpu -model=bcm2837 -target=armv7l-linux-gnueabihf -mattr=+neon') + +with nnvm.compiler.build_config(opt_level=3): + graph, lib, params = nnvm.compiler.build( + net, target, shape={"data": data_shape}, params=params) + +# After `nnvm.compiler.build`, you will get three return values: graph, +# library and the new parameter, since we do some optimization that will +# change the parameters but keep the result of model as the same. + +# Save the library at local temporary directory. +tmp = util.tempdir() +lib_fname = tmp.relpath('net.tar') +lib.export_library(lib_fname) + +###################################################################### +# Deploy the Model Remotely by RPC +# -------------------------------- +# With RPC, you can deploy the model remotely from your host machine +# to the remote device. + +# obtain an RPC session from remote device. +if local_demo: + remote = rpc.LocalSession() +else: + # The following is my environment, change this to the IP address of your target device + host = '10.77.1.162' + port = 9090 + remote = rpc.connect(host, port) + +# upload the library to remote device and load it +remote.upload(lib_fname) +rlib = remote.load_module('net.tar') + +# create the remote runtime module +ctx = remote.cpu(0) +module = runtime.create(graph, rlib, ctx) +# set parameter (upload params to the remote device. This may take a while) +module.set_input(**params) +# set input data +module.set_input('data', tvm.nd.array(x.astype('float32'))) +# run +module.run() +# get output +out = module.get_output(0) +# get top1 result +top1 = np.argmax(out.asnumpy()) +print('TVM prediction top-1: {}'.format(synset[top1])) diff --git a/nnvm/tutorials/deploy_ssd_mxnet.py b/nnvm/tutorials/deploy_ssd_mxnet.py new file mode 100644 index 000000000000..c88c61984293 --- /dev/null +++ b/nnvm/tutorials/deploy_ssd_mxnet.py @@ -0,0 +1,180 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +Deploy Single Shot Multibox Detector(SSD) model +=============================================== +**Author**: `Yao Wang `_, \ +`Leyuan Wang `_ + +This article is an introductory tutorial to deploy SSD models with TVM. +We will use mxnet pretrained SSD model with Resnet50 as body network and +convert it to NNVM graph; +""" +import os +import zipfile +import tvm +import mxnet as mx +import cv2 +import numpy as np + +from nnvm import compiler +from nnvm.frontend import from_mxnet +from tvm import relay +from tvm.contrib.download import download_testdata +from tvm.contrib import graph_runtime +from mxnet.model import load_checkpoint + + +###################################################################### +# Preliminary and Set parameters +# ------------------------------ +# We should build TVM with sort support, in TVM root directory +# +# .. code-block:: bash +# +# echo "set(USE_SORT ON)" > config.mk +# make -j8 +# + +model_name = "ssd_resnet50_512" +model_file = "%s.zip" % model_name +test_image = "dog.jpg" +dshape = (1, 3, 512, 512) +dtype = "float32" + +# Target settings +# Use these commented settings to build for cuda. +#target = 'cuda' +#ctx = tvm.gpu(0) +# Use these commented settings to build for opencl. +#target = 'opencl' +#ctx = tvm.opencl(0) +target = "llvm" +ctx = tvm.cpu() + +###################################################################### +# Download MXNet SSD pre-trained model and demo image +# --------------------------------------------------- +# Pre-trained model available at +# https://github.com/apache/incubator-\mxnet/tree/master/example/ssd + +model_url = "https://github.com/zhreshold/mxnet-ssd/releases/download/v0.6/" \ + "resnet50_ssd_512_voc0712_trainval.zip" +image_url = "https://cloud.githubusercontent.com/assets/3307514/20012567/" \ + "cbb60336-a27d-11e6-93ff-cbc3f09f5c9e.jpg" +inference_symbol_folder = \ + "c1904e900848df4548ce5dfb18c719c7-a28c4856c827fe766aa3da0e35bad41d44f0fb26" +inference_symbol_url = "https://gist.github.com/kevinthesun/c1904e900848df4548ce5dfb18c719c7/" \ + "archive/a28c4856c827fe766aa3da0e35bad41d44f0fb26.zip" + +model_file_path = download_testdata(model_url, model_file, module=["mxnet", "ssd_model"]) +inference_symbol_path = download_testdata(inference_symbol_url, "inference_model.zip", + module=["mxnet", "ssd_model"]) +test_image_path = download_testdata(image_url, test_image, module="data") +model_dir = os.path.dirname(model_file_path) + +zip_ref = zipfile.ZipFile(model_file_path, 'r') +zip_ref.extractall(model_dir) +zip_ref.close() +zip_ref = zipfile.ZipFile(inference_symbol_path) +zip_ref.extractall(model_dir) +zip_ref.close() + +###################################################################### +# Convert and compile model with NNVM or Relay for CPU. + +sym = mx.sym.load("%s/%s/ssd_resnet50_inference.json" % (model_dir, inference_symbol_folder)) +_, arg_params, aux_params = load_checkpoint("%s/%s" % (model_dir, model_name), 0) + +import argparse +parser = argparse.ArgumentParser() +parser.add_argument( + "-f", "--frontend", + help="Frontend for compilation, nnvm or relay", + type=str, + default="nnvm") +args = parser.parse_args() +if args.frontend == "relay": + net, params = relay.frontend.from_mxnet(sym, {"data": dshape}, arg_params=arg_params, \ + aux_params=aux_params) + with relay.build_config(opt_level=3): + graph, lib, params = relay.build(net, target, params=params) +elif args.frontend == "nnvm": + net, params = from_mxnet(sym, arg_params, aux_params) + with compiler.build_config(opt_level=3): + graph, lib, params = compiler.build( + net, target, {"data": dshape}, params=params) +else: + parser.print_help() + parser.exit() + +###################################################################### +# Create TVM runtime and do inference + +# Preprocess image +image = cv2.imread(test_image_path) +img_data = cv2.resize(image, (dshape[2], dshape[3])) +img_data = img_data[:, :, (2, 1, 0)].astype(np.float32) +img_data -= np.array([123, 117, 104]) +img_data = np.transpose(np.array(img_data), (2, 0, 1)) +img_data = np.expand_dims(img_data, axis=0) +# Build TVM runtime +m = graph_runtime.create(graph, lib, ctx) +m.set_input('data', tvm.nd.array(img_data.astype(dtype))) +m.set_input(**params) +# execute +m.run() +# get outputs +tvm_output = m.get_output(0) + + +###################################################################### +# Display result + +class_names = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", + "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", + "sheep", "sofa", "train", "tvmonitor"] +def display(img, out, thresh=0.5): + import random + import matplotlib as mpl + import matplotlib.pyplot as plt + mpl.rcParams['figure.figsize'] = (10, 10) + pens = dict() + plt.clf() + plt.imshow(img) + for det in out: + cid = int(det[0]) + if cid < 0: + continue + score = det[1] + if score < thresh: + continue + if cid not in pens: + pens[cid] = (random.random(), random.random(), random.random()) + scales = [img.shape[1], img.shape[0]] * 2 + xmin, ymin, xmax, ymax = [int(p * s) for p, s in zip(det[2:6].tolist(), scales)] + rect = plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, + edgecolor=pens[cid], linewidth=3) + plt.gca().add_patch(rect) + text = class_names[cid] + plt.gca().text(xmin, ymin-2, '{:s} {:.3f}'.format(text, score), + bbox=dict(facecolor=pens[cid], alpha=0.5), + fontsize=12, color='white') + plt.show() + +image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) +display(image, tvm_output.asnumpy()[0], thresh=0.45) diff --git a/nnvm/tutorials/from_coreml.py b/nnvm/tutorials/from_coreml.py new file mode 100644 index 000000000000..3eaced18728e --- /dev/null +++ b/nnvm/tutorials/from_coreml.py @@ -0,0 +1,106 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +Compile CoreML Models +===================== +**Author**: `Joshua Z. Zhang `_ + +This article is an introductory tutorial to deploy CoreML models with NNVM. + +For us to begin with, coremltools module is required to be installed. + +A quick solution is to install via pip + +.. code-block:: bash + + pip install -U coremltools --user + +or please refer to official site +https://github.com/apple/coremltools +""" +import nnvm +import tvm +import coremltools as cm +import numpy as np +from PIL import Image +from tvm.contrib.download import download_testdata + +###################################################################### +# Load pretrained CoreML model +# ---------------------------- +# We will download and load a pretrained mobilenet classification network +# provided by apple in this example +model_url = 'https://docs-assets.developer.apple.com/coreml/models/MobileNet.mlmodel' +model_file = 'mobilenet.mlmodel' +model_path = download_testdata(model_url, model_file, module='coreml') +# now you mobilenet.mlmodel on disk +mlmodel = cm.models.MLModel(model_path) +# we can load the graph as NNVM compatible model +sym, params = nnvm.frontend.from_coreml(mlmodel) + +###################################################################### +# Load a test image +# ------------------ +# A single cat dominates the examples! +from PIL import Image +img_url = 'https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true' +img_path = download_testdata(img_url, 'cat.png', module='data') +img = Image.open(img_path).resize((224, 224)) +#x = np.transpose(img, (2, 0, 1))[np.newaxis, :] +image = np.asarray(img) +image = image.transpose((2, 0, 1)) +x = image[np.newaxis, :] +###################################################################### +# Compile the model on NNVM +# --------------------------- +# We should be familiar with the process right now. +import nnvm.compiler +target = 'cuda' +shape_dict = {'image': x.shape} +with nnvm.compiler.build_config(opt_level=2, add_pass=['AlterOpLayout']): + graph, lib, params = nnvm.compiler.build(sym, target, shape_dict, params=params) + +###################################################################### +# Execute on TVM +# ------------------- +# The process is no different from other example +from tvm.contrib import graph_runtime +ctx = tvm.gpu(0) +dtype = 'float32' +m = graph_runtime.create(graph, lib, ctx) +# set inputs +m.set_input('image', tvm.nd.array(x.astype(dtype))) +m.set_input(**params) +# execute +m.run() +# get outputs +tvm_output = m.get_output(0) +top1 = np.argmax(tvm_output.asnumpy()[0]) + +##################################################################### +# Look up synset name +# ------------------- +# Look up prediction top 1 index in 1000 class synset. +synset_url = ''.join(['https://gist.githubusercontent.com/zhreshold/', + '4d0b62f3d01426887599d4f7ede23ee5/raw/', + '596b27d23537e5a1b5751d2b0481ef172f58b539/', + 'imagenet1000_clsid_to_human.txt']) +synset_name = 'imagenet1000_clsid_to_human.txt' +synset_path = download_testdata(synset_url, synset_name, module='data') +with open(synset_path) as f: + synset = eval(f.read()) +print('Top-1 id', top1, 'class name', synset[top1]) diff --git a/nnvm/tutorials/from_darknet.py b/nnvm/tutorials/from_darknet.py new file mode 100644 index 000000000000..d2ab647da1b3 --- /dev/null +++ b/nnvm/tutorials/from_darknet.py @@ -0,0 +1,177 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +Compile YOLO-V2 and YOLO-V3 in DarkNet Models +================================= +**Author**: `Siju Samuel `_ + +This article is an introductory tutorial to deploy darknet models with NNVM. +All the required models and libraries will be downloaded from the internet by the script. +This script runs the YOLO-V2 and YOLO-V3 Model with the bounding boxes +Darknet parsing have dependancy with CFFI and CV2 library +Please install CFFI and CV2 before executing this script + +.. code-block:: bash + + pip install cffi + pip install opencv-python +""" + +import nnvm +import nnvm.frontend.darknet +import tvm.relay.testing.yolo_detection +import tvm.relay.testing.darknet +import matplotlib.pyplot as plt +import numpy as np +import tvm +import sys + +from ctypes import * +from tvm.contrib.download import download_testdata +from tvm.relay.testing.darknet import __darknetffi__ + +# Model name +MODEL_NAME = 'yolov3' + +###################################################################### +# Download required files +# ----------------------- +# Download cfg and weights file if first time. +CFG_NAME = MODEL_NAME + '.cfg' +WEIGHTS_NAME = MODEL_NAME + '.weights' +REPO_URL = 'https://github.com/siju-samuel/darknet/blob/master/' +CFG_URL = REPO_URL + 'cfg/' + CFG_NAME + '?raw=true' +WEIGHTS_URL = 'https://pjreddie.com/media/files/' + WEIGHTS_NAME + +cfg_path = download_testdata(CFG_URL, CFG_NAME, module="darknet") +weights_path = download_testdata(WEIGHTS_URL, WEIGHTS_NAME, module="darknet") + +# Download and Load darknet library +if sys.platform in ['linux', 'linux2']: + DARKNET_LIB = 'libdarknet2.0.so' + DARKNET_URL = REPO_URL + 'lib/' + DARKNET_LIB + '?raw=true' +elif sys.platform == 'darwin': + DARKNET_LIB = 'libdarknet_mac2.0.so' + DARKNET_URL = REPO_URL + 'lib_osx/' + DARKNET_LIB + '?raw=true' +else: + err = "Darknet lib is not supported on {} platform".format(sys.platform) + raise NotImplementedError(err) + +lib_path = download_testdata(DARKNET_URL, DARKNET_LIB, module="darknet") + +DARKNET_LIB = __darknetffi__.dlopen(lib_path) +net = DARKNET_LIB.load_network(cfg_path.encode('utf-8'), weights_path.encode('utf-8'), 0) +dtype = 'float32' +batch_size = 1 + +print("Converting darknet to nnvm symbols...") +sym, params = nnvm.frontend.darknet.from_darknet(net, dtype) + +###################################################################### +# Compile the model on NNVM +# ------------------------- +# compile the model +target = 'llvm' +ctx = tvm.cpu(0) +data = np.empty([batch_size, net.c, net.h, net.w], dtype) +shape = {'data': data.shape} +print("Compiling the model...") +dtype_dict = {} +with nnvm.compiler.build_config(opt_level=2): + graph, lib, params = nnvm.compiler.build(sym, target, shape, dtype_dict, params) + +[neth, netw] = shape['data'][2:] # Current image shape is 608x608 +###################################################################### +# Load a test image +# -------------------------------------------------------------------- +test_image = 'dog.jpg' +print("Loading the test image...") +img_url = 'https://github.com/siju-samuel/darknet/blob/master/data/' + \ + test_image + '?raw=true' +img_path = download_testdata(img_url, test_image, "data") + +data = tvm.relay.testing.darknet.load_image(img_path, netw, neth) +###################################################################### +# Execute on TVM Runtime +# ---------------------- +# The process is no different from other examples. +from tvm.contrib import graph_runtime + +m = graph_runtime.create(graph, lib, ctx) + +# set inputs +m.set_input('data', tvm.nd.array(data.astype(dtype))) +m.set_input(**params) +# execute +print("Running the test image...") + +m.run() +# get outputs +tvm_out = [] +if MODEL_NAME == 'yolov2': + layer_out = {} + layer_out['type'] = 'Region' + # Get the region layer attributes (n, out_c, out_h, out_w, classes, coords, background) + layer_attr = m.get_output(2).asnumpy() + layer_out['biases'] = m.get_output(1).asnumpy() + out_shape = (layer_attr[0], layer_attr[1]//layer_attr[0], + layer_attr[2], layer_attr[3]) + layer_out['output'] = m.get_output(0).asnumpy().reshape(out_shape) + layer_out['classes'] = layer_attr[4] + layer_out['coords'] = layer_attr[5] + layer_out['background'] = layer_attr[6] + tvm_out.append(layer_out) + +elif MODEL_NAME == 'yolov3': + for i in range(3): + layer_out = {} + layer_out['type'] = 'Yolo' + # Get the yolo layer attributes (n, out_c, out_h, out_w, classes, total) + layer_attr = m.get_output(i*4+3).asnumpy() + layer_out['biases'] = m.get_output(i*4+2).asnumpy() + layer_out['mask'] = m.get_output(i*4+1).asnumpy() + out_shape = (layer_attr[0], layer_attr[1]//layer_attr[0], + layer_attr[2], layer_attr[3]) + layer_out['output'] = m.get_output(i*4).asnumpy().reshape(out_shape) + layer_out['classes'] = layer_attr[4] + tvm_out.append(layer_out) + +# do the detection and bring up the bounding boxes +thresh = 0.5 +nms_thresh = 0.45 +img = tvm.relay.testing.darknet.load_image_color(img_path) +_, im_h, im_w = img.shape +dets = tvm.relay.testing.yolo_detection.fill_network_boxes((netw, neth), (im_w, im_h), thresh, + 1, tvm_out) +last_layer = net.layers[net.n - 1] +tvm.relay.testing.yolo_detection.do_nms_sort(dets, last_layer.classes, nms_thresh) + +coco_name = 'coco.names' +coco_url = 'https://github.com/siju-samuel/darknet/blob/master/data/' + coco_name + '?raw=true' +font_name = 'arial.ttf' +font_url = 'https://github.com/siju-samuel/darknet/blob/master/data/' + font_name + '?raw=true' +coco_path = download_testdata(coco_url, coco_name, module='data') +font_path = download_testdata(font_url, font_name, module='data') + +with open(coco_path) as f: + content = f.readlines() + +names = [x.strip() for x in content] + +tvm.relay.testing.yolo_detection.draw_detections(font_path, img, dets, thresh, names, last_layer.classes) +plt.imshow(img.transpose(1, 2, 0)) +plt.show() diff --git a/nnvm/tutorials/from_mxnet.py b/nnvm/tutorials/from_mxnet.py new file mode 100644 index 000000000000..e4a30aa2c0e0 --- /dev/null +++ b/nnvm/tutorials/from_mxnet.py @@ -0,0 +1,136 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +.. _tutorial-from-mxnet: + +Compile MXNet Models +==================== +**Author**: `Joshua Z. Zhang `_ + +This article is an introductory tutorial to deploy mxnet models with NNVM. + +For us to begin with, mxnet module is required to be installed. + +A quick solution is + +.. code-block:: bash + + pip install mxnet --user + +or please refer to offical installation guide. +https://mxnet.incubator.apache.org/versions/master/install/index.html +""" +# some standard imports +import mxnet as mx +import numpy as np +import nnvm +import tvm +from tvm.contrib.download import download_testdata + +###################################################################### +# Download Resnet18 model from Gluon Model Zoo +# --------------------------------------------- +# In this section, we download a pretrained imagenet model and classify an image. +from mxnet.gluon.model_zoo.vision import get_model +from PIL import Image +from matplotlib import pyplot as plt +block = get_model('resnet18_v1', pretrained=True) +img_url = 'https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true' +img_name = 'cat.png' +synset_url = ''.join(['https://gist.githubusercontent.com/zhreshold/', + '4d0b62f3d01426887599d4f7ede23ee5/raw/', + '596b27d23537e5a1b5751d2b0481ef172f58b539/', + 'imagenet1000_clsid_to_human.txt']) +synset_name = 'imagenet1000_clsid_to_human.txt' +img_path = download_testdata(img_url, img_name, module='data') +synset_path = download_testdata(synset_url, synset_name, module='data') +with open(synset_path) as f: + synset = eval(f.read()) +image = Image.open(img_path).resize((224, 224)) +plt.imshow(image) +plt.show() + +def transform_image(image): + image = np.array(image) - np.array([123., 117., 104.]) + image /= np.array([58.395, 57.12, 57.375]) + image = image.transpose((2, 0, 1)) + image = image[np.newaxis, :] + return image + +x = transform_image(image) +print('x', x.shape) + +###################################################################### +# Compile the Graph +# ----------------- +# Now we would like to port the Gluon model to a portable computational graph. +# It's as easy as several lines. +# We support MXNet static graph(symbol) and HybridBlock in mxnet.gluon +sym, params = nnvm.frontend.from_mxnet(block) +# we want a probability so add a softmax operator +sym = nnvm.sym.softmax(sym) + +###################################################################### +# now compile the graph +import nnvm.compiler +target = 'cuda' +shape_dict = {'data': x.shape} +with nnvm.compiler.build_config(opt_level=3): + graph, lib, params = nnvm.compiler.build(sym, target, shape_dict, params=params) + +###################################################################### +# Execute the portable graph on TVM +# --------------------------------- +# Now, we would like to reproduce the same forward computation using TVM. +from tvm.contrib import graph_runtime +ctx = tvm.gpu(0) +dtype = 'float32' +m = graph_runtime.create(graph, lib, ctx) +# set inputs +m.set_input('data', tvm.nd.array(x.astype(dtype))) +m.set_input(**params) +# execute +m.run() +# get outputs +tvm_output = m.get_output(0) +top1 = np.argmax(tvm_output.asnumpy()[0]) +print('TVM prediction top-1:', top1, synset[top1]) + +###################################################################### +# Use MXNet symbol with pretrained weights +# ---------------------------------------- +# MXNet often use `arg_params` and `aux_params` to store network parameters +# separately, here we show how to use these weights with existing API +def block2symbol(block): + data = mx.sym.Variable('data') + sym = block(data) + args = {} + auxs = {} + for k, v in block.collect_params().items(): + args[k] = mx.nd.array(v.data().asnumpy()) + return sym, args, auxs +mx_sym, args, auxs = block2symbol(block) +# usually we would save/load it as checkpoint +mx.model.save_checkpoint('resnet18_v1', 0, mx_sym, args, auxs) +# there are 'resnet18_v1-0000.params' and 'resnet18_v1-symbol.json' on disk + +###################################################################### +# for a normal mxnet model, we start from here +mx_sym, args, auxs = mx.model.load_checkpoint('resnet18_v1', 0) +# now we use the same API to get NNVM compatible symbol +nnvm_sym, nnvm_params = nnvm.frontend.from_mxnet(mx_sym, args, auxs) +# repeat the same steps to run this model using TVM diff --git a/nnvm/tutorials/from_mxnet_to_webgl.py b/nnvm/tutorials/from_mxnet_to_webgl.py new file mode 100644 index 000000000000..a54704cca381 --- /dev/null +++ b/nnvm/tutorials/from_mxnet_to_webgl.py @@ -0,0 +1,515 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +Deploy Deep Learning Models to OpenGL and WebGL +=============================================== +**Author**: `Zhixun Tan `_ + +This example shows how to build a neural network with NNVM python frontend and +generate runtime library for WebGL running in a browser with TVM. +To run this notebook, you need to install tvm and nnvm. +Notice that you need to build tvm with OpenGL. +""" + +###################################################################### +# Overview +# -------- +# In this tutorial, we will download a pre-trained resnet18 model from Gluon +# Model Zoo, and run image classification in 3 different ways: +# +# - Run locally: +# We will compile the model into a TVM library with OpenGL device code and +# directly run it locally. +# +# - Run in a browser through RPC: +# We will compile the model into a JavaScript TVM library with WebGL device +# code, and upload it to an RPC server that is hosting JavaScript TVM runtime +# to run it. +# +# - Export a JavaScript library and run in a browser: +# We will compile the model into a JavaScript TVM library with WebGL device +# code, combine it with JavaScript TVM runtime, and pack everything together. +# Then we will run it directly in a browser. +# +from __future__ import print_function + +import numpy as np +import tvm +from tvm.contrib.download import download_testdata +import nnvm.compiler +import nnvm.testing + +# This tutorial must be run with OpenGL backend enabled in TVM. +# The NNVM CI does not enable OpenGL yet. But the user can run this script. +opengl_enabled = tvm.module.enabled("opengl") + +# To run the local demo, set this flag to True. +run_deploy_local = False + +# To run the RPC demo, set this flag to True. +run_deploy_rpc = False + +# To run the WebGL deploy demo, set this flag to True. +run_deploy_web = False + +###################################################################### +# Download a Pre-trained Resnet18 Model +# ------------------------------------- +# Here we define 2 functions: +# +# - A function that downloads a pre-trained resnet18 model from Gluon Model Zoo. +# The model that we download is in MXNet format, we then transform it into an +# NNVM computation graph. +# +# - A function that downloads a file that contains the name of all the image +# classes in this model. +# +def load_mxnet_resnet(): + """Load a pretrained resnet model from MXNet and transform that into NNVM + format. + + Returns + ------- + net : nnvm.Symbol + The loaded resnet computation graph. + + params : dict[str -> NDArray] + The pretrained model parameters. + + data_shape: tuple + The shape of the input tensor (an image). + + out_shape: tuple + The shape of the output tensor (probability of all classes). + """ + + print("Loading pretrained resnet model from MXNet...") + + # Download a pre-trained mxnet resnet18_v1 model. + from mxnet.gluon.model_zoo.vision import get_model + block = get_model('resnet18_v1', pretrained=True) + + # Transform the mxnet model into NNVM. + # We want a probability so add a softmax operator. + sym, params = nnvm.frontend.from_mxnet(block) + sym = nnvm.sym.softmax(sym) + + print("- Model loaded!") + return sym, params, (1, 3, 224, 224), (1, 1000) + +def download_synset(): + """Download a dictionary from class index to name. + This lets us know what our prediction actually is. + + Returns + ------- + synset : dict[int -> str] + The loaded synset. + """ + + print("Downloading synset...") + + url = "https://gist.githubusercontent.com/zhreshold/" + \ + "4d0b62f3d01426887599d4f7ede23ee5/raw/" + \ + "596b27d23537e5a1b5751d2b0481ef172f58b539/" + \ + "imagenet1000_clsid_to_human.txt" + file_name = "imagenet1000_clsid_to_human.txt" + + file_path = download_testdata(url, file_name, module='data') + with open(file_path) as f: + synset = eval(f.read()) + + print("- Synset downloaded!") + return synset + +###################################################################### +# Download Input Image +# -------------------- +# Here we define 2 functions that prepare an image that we want to perform +# classification on. +# +# - A function that downloads a cat image. +# +# - A function that performs preprocessing to an image so that it fits the +# format required by the resnet18 model. +# +def download_image(): + """Download a cat image and resize it to 224x224 which fits resnet. + + Returns + ------- + image : PIL.Image.Image + The loaded and resized image. + """ + + print("Downloading cat image...") + + from matplotlib import pyplot as plt + from PIL import Image + + url = "https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true" + img_name = "cat.png" + + img_path = download_testdata(url, img_name, module='data') + image = Image.open(img_path).resize((224, 224)) + + print("- Cat image downloaded!") + + plt.imshow(image) + plt.show() + + return image + +def transform_image(image): + """Perform necessary preprocessing to input image. + + Parameters + ---------- + image : numpy.ndarray + The raw image. + + Returns + ------- + image : numpy.ndarray + The preprocessed image. + """ + + image = np.array(image) - np.array([123., 117., 104.]) + image /= np.array([58.395, 57.12, 57.375]) + image = image.transpose((2, 0, 1)) + image = image[np.newaxis, :] + return image + +###################################################################### +# Compile the Model +# ----------------- +# Here we define a function that invokes the NNVM compiler. +# +def compile_net(net, target_host, target, data_shape, params): + """Compiles an NNVM computation graph. + + Parameters + ---------- + net : nnvm.Graph + The NNVM computation graph. + + target_host : str + The target to compile the host portion of the library. + + target : str + The target to compile the device portion of the library. + + data_shape : tuple + The shape of the input data (image). + + params : dict[str -> NDArray] + Model parameters. + + Returns + ------- + graph : Graph + The final execution graph. + + libmod : tvm.Module + The module that comes with the execution graph + + params : dict[str -> NDArray] + The updated parameters of graph if params is passed. + This can be different from the params passed in. + """ + + print("Compiling the neural network...") + + with nnvm.compiler.build_config(opt_level=0): + deploy_graph, lib, deploy_params = nnvm.compiler.build( + net, + target_host=target_host, + target=target, + shape={"data": data_shape}, + params=params) + + print("- Complilation completed!") + return deploy_graph, lib, deploy_params + +###################################################################### +# Demo 1: Deploy Locally +# ---------------------- +# In this demo, we will compile the model targetting the local machine. +# +# Then we will demonstrate how to save the compiled model as a shared library +# and load it back. +# +# Finally, we will run the model. +# +def deploy_local(): + """Runs the demo that deploys a model locally. + """ + + # Load resnet model. + net, params, data_shape, out_shape = load_mxnet_resnet() + + # Compile the model. + # Note that we specify the the host target as "llvm". + deploy_graph, lib, deploy_params = compile_net( + net, + target_host="llvm", + target="opengl", + data_shape=data_shape, + params=params) + + # Save the compiled module. + # Note we need to save all three files returned from the NNVM compiler. + print("Saving the compiled module...") + from tvm.contrib import util + temp = util.tempdir() + + path_lib = temp.relpath("deploy_lib.so") + path_graph_json = temp.relpath("deploy_graph.json") + path_params = temp.relpath("deploy_param.params") + + lib.export_library(path_lib) + with open(path_graph_json, "w") as fo: + fo.write(deploy_graph.json()) + with open(path_params, "wb") as fo: + fo.write(nnvm.compiler.save_param_dict(deploy_params)) + + print("- Saved files:", temp.listdir()) + + # Load the module back. + print("Loading the module back...") + loaded_lib = tvm.module.load(path_lib) + with open(path_graph_json) as fi: + loaded_graph_json = fi.read() + with open(path_params, "rb") as fi: + loaded_params = bytearray(fi.read()) + print("- Module loaded!") + + # Run the model! We will perform prediction on an image. + print("Running the graph...") + from tvm.contrib import graph_runtime + + module = graph_runtime.create(loaded_graph_json, loaded_lib, tvm.opengl(0)) + module.load_params(loaded_params) + + image = transform_image(download_image()) + input_data = tvm.nd.array(image.astype("float32"), ctx=tvm.opengl(0)) + + module.set_input("data", input_data) + module.run() + + # Retrieve the output. + out = module.get_output(0, tvm.nd.empty(out_shape, ctx=tvm.opengl(0))) + top1 = np.argmax(out.asnumpy()) + synset = download_synset() + print('TVM prediction top-1:', top1, synset[top1]) + +if run_deploy_local and opengl_enabled: + deploy_local() + +###################################################################### +# Demo 2: Deploy the Model to WebGL Remotely with RPC +# ------------------------------------------------------- +# Following the steps above, we can also compile the model for WebGL. +# TVM provides rpc module to help with remote deploying. +# +# When we deploy a model locally to OpenGL, the model consists of two parts: +# the host LLVM part and the device GLSL part. Now that we want to deploy to +# WebGL, we need to leverage Emscripten to transform LLVM into JavaScript. In +# order to do that, we will need to specify the host target as +# 'llvm -target=asmjs-unknown-emscripten -system-lib`. Then call Emscripten to +# compile the LLVM binary output into a JavaScript file. +# +# First, we need to manually start an RPC server. Please follow the instructions +# in `tvm/web/README.md`. After following the steps, you should have a web page +# opened in a browser, and a Python script running a proxy. +# +def deploy_rpc(): + """Runs the demo that deploys a model remotely through RPC. + """ + from tvm import rpc + from tvm.contrib import util, emscripten + + # As usual, load the resnet18 model. + net, params, data_shape, out_shape = load_mxnet_resnet() + + # Compile the model. + # Note that this time we are changing the target. + # This is because we want to translate the host library into JavaScript + # through Emscripten. + graph, lib, params = compile_net( + net, + target_host="llvm -target=asmjs-unknown-emscripten -system-lib", + target="opengl", + data_shape=data_shape, + params=params) + + # Now we want to deploy our model through RPC. + # First we ned to prepare the module files locally. + print("Saving the compiled module...") + + temp = util.tempdir() + path_obj = temp.relpath("deploy.bc") # host LLVM part + path_dso = temp.relpath("deploy.js") # host JavaScript part + path_gl = temp.relpath("deploy.gl") # device GLSL part + path_json = temp.relpath("deploy.tvm_meta.json") + + lib.save(path_obj) + emscripten.create_js(path_dso, path_obj, side_module=True) + lib.imported_modules[0].save(path_gl) + + print("- Saved files:", temp.listdir()) + + # Connect to the RPC server. + print("Connecting to RPC server...") + proxy_host = 'localhost' + proxy_port = 9090 + remote = rpc.connect(proxy_host, proxy_port, key="js") + print("- Connected to RPC server!") + + # Upload module to RPC server. + print("Uploading module to RPC server...") + remote.upload(path_dso, "deploy.dso") + remote.upload(path_gl) + remote.upload(path_json) + print("- Upload completed!") + + # Load remote library. + print("Loading remote library...") + fdev = remote.load_module("deploy.gl") + fhost = remote.load_module("deploy.dso") + fhost.import_module(fdev) + rlib = fhost + print("- Remote library loaded!") + + ctx = remote.opengl(0) + + # Upload the parameters. + print("Uploading parameters...") + rparams = {k: tvm.nd.array(v, ctx) for k, v in params.items()} + print("- Parameters uploaded!") + + # Create the remote runtime module. + print("Running remote module...") + from tvm.contrib import graph_runtime + module = graph_runtime.create(graph, rlib, ctx) + + # Set parameter. + module.set_input(**rparams) + + # Set input data. + input_data = np.random.uniform(size=data_shape) + module.set_input('data', tvm.nd.array(input_data.astype('float32'))) + + # Run. + module.run() + print("- Remote module execution completed!") + + out = module.get_output(0, out=tvm.nd.empty(out_shape, ctx=ctx)) + # Print first 10 elements of output. + print(out.asnumpy()[0][0:10]) + +if run_deploy_rpc and opengl_enabled: + deploy_rpc() + +###################################################################### +# Demo 3: Deploy the Model to WebGL SystemLib +# ----------------------------------------------- +# This time we are not using RPC. Instead, we will compile the model and link it +# with the entire tvm runtime into a single giant JavaScript file. Then we will +# run the model using JavaScript. +# +def deploy_web(): + """Runs the demo that deploys to web. + """ + + import base64 + import json + import os + import shutil + import SimpleHTTPServer, SocketServer + + from tvm.contrib import emscripten + + curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(os.getcwd()))) + working_dir = os.getcwd() + output_dir = os.path.join(working_dir, "resnet") + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + # As usual, load the resnet18 model. + net, params, data_shape, out_shape = load_mxnet_resnet() + + # As usual, compile the model. + graph, lib, params = compile_net( + net, + target_host="llvm -target=asmjs-unknown-emscripten -system-lib", + target="opengl", + data_shape=data_shape, + params=params) + + # Now we save the model and link it with the TVM web runtime. + path_lib = os.path.join(output_dir, "resnet.js") + path_graph = os.path.join(output_dir, "resnet.json") + path_params = os.path.join(output_dir, "resnet.params") + path_data_shape = os.path.join(output_dir, "data_shape.json") + path_out_shape = os.path.join(output_dir, "out_shape.json") + + lib.export_library(path_lib, emscripten.create_js, options=[ + "-s", "USE_GLFW=3", + "-s", "USE_WEBGL2=1", + "-lglfw", + "-s", "TOTAL_MEMORY=1073741824", + ]) + with open(path_graph, "w") as fo: + fo.write(graph.json()) + with open(path_params, "w") as fo: + fo.write(base64.b64encode(nnvm.compiler.save_param_dict(params))) + + shutil.copyfile(os.path.join(curr_path, "../tvm/web/tvm_runtime.js"), + os.path.join(output_dir, "tvm_runtime.js")) + shutil.copyfile(os.path.join(curr_path, "web/resnet.html"), + os.path.join(output_dir, "resnet.html")) + + # Now we want to save some extra files so that we can execute the model from + # JavaScript. + # - data shape + with open(path_data_shape, "w") as fo: + json.dump(list(data_shape), fo) + # - out shape + with open(path_out_shape, "w") as fo: + json.dump(list(out_shape), fo) + # - input image + image = download_image() + image.save(os.path.join(output_dir, "data.png")) + # - synset + synset = download_synset() + with open(os.path.join(output_dir, "synset.json"), "w") as fo: + json.dump(synset, fo) + + print("Output files are in", output_dir) + + # Finally, we fire up a simple web server to serve all the exported files. + print("Now running a simple server to serve the files...") + os.chdir(output_dir) + port = 8080 + handler = SimpleHTTPServer.SimpleHTTPRequestHandler + httpd = SocketServer.TCPServer(("", port), handler) + print("Please open http://localhost:" + str(port) + "/resnet.html") + httpd.serve_forever() + +if run_deploy_web and opengl_enabled: + deploy_web() diff --git a/nnvm/tutorials/from_onnx.py b/nnvm/tutorials/from_onnx.py new file mode 100644 index 000000000000..97d154615e67 --- /dev/null +++ b/nnvm/tutorials/from_onnx.py @@ -0,0 +1,111 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +Compile ONNX Models +=================== +**Author**: `Joshua Z. Zhang `_ + +This article is an introductory tutorial to deploy ONNX models with NNVM. + +For us to begin with, onnx module is required to be installed. + +A quick solution is to install protobuf compiler, and + +.. code-block:: bash + + pip install onnx --user + +or please refer to offical site. +https://github.com/onnx/onnx +""" +import nnvm +import tvm +from tvm.contrib.download import download_testdata +import onnx +import numpy as np + +###################################################################### +# Load pretrained ONNX model +# --------------------------------------------- +# The example super resolution model used here is exactly the same model in onnx tutorial +# http://pytorch.org/tutorials/advanced/super_resolution_with_caffe2.html +# we skip the pytorch model construction part, and download the saved onnx model +model_url = ''.join(['https://gist.github.com/zhreshold/', + 'bcda4716699ac97ea44f791c24310193/raw/', + '93672b029103648953c4e5ad3ac3aadf346a4cdc/', + 'super_resolution_0.2.onnx']) +model_path = download_testdata(model_url, 'super_resolution.onnx', module='onnx') +# now you have super_resolution.onnx on disk +onnx_model = onnx.load_model(model_path) +# we can load the graph as NNVM compatible model +sym, params = nnvm.frontend.from_onnx(onnx_model) + +###################################################################### +# Load a test image +# --------------------------------------------- +# A single cat dominates the examples! +from PIL import Image +img_url = 'https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true' +img_path = download_testdata(img_url, 'cat.png', module='data') +img = Image.open(img_path).resize((224, 224)) +img_ycbcr = img.convert("YCbCr") # convert to YCbCr +img_y, img_cb, img_cr = img_ycbcr.split() +x = np.array(img_y)[np.newaxis, np.newaxis, :, :] + +###################################################################### +# Compile the model on NNVM +# --------------------------------------------- +# We should be familiar with the process right now. +import nnvm.compiler +target = 'cuda' +# assume first input name is data +input_name = sym.list_input_names()[0] +shape_dict = {input_name: x.shape} +with nnvm.compiler.build_config(opt_level=3): + graph, lib, params = nnvm.compiler.build(sym, target, shape_dict, params=params) + +###################################################################### +# Execute on TVM +# --------------------------------------------- +# The process is no different from other example +from tvm.contrib import graph_runtime +ctx = tvm.gpu(0) +dtype = 'float32' +m = graph_runtime.create(graph, lib, ctx) +# set inputs +m.set_input(input_name, tvm.nd.array(x.astype(dtype))) +m.set_input(**params) +# execute +m.run() +# get outputs +output_shape = (1, 1, 672, 672) +tvm_output = m.get_output(0, tvm.nd.empty(output_shape, dtype)).asnumpy() + +###################################################################### +# Display results +# --------------------------------------------- +# We put input and output image neck to neck +from matplotlib import pyplot as plt +out_y = Image.fromarray(np.uint8((tvm_output[0, 0]).clip(0, 255)), mode='L') +out_cb = img_cb.resize(out_y.size, Image.BICUBIC) +out_cr = img_cr.resize(out_y.size, Image.BICUBIC) +result = Image.merge('YCbCr', [out_y, out_cb, out_cr]).convert('RGB') +canvas = np.full((672, 672*2, 3), 255) +canvas[0:224, 0:224, :] = np.asarray(img) +canvas[:, 672:, :] = np.asarray(result) +plt.imshow(canvas.astype(np.uint8)) +plt.show() diff --git a/nnvm/tutorials/from_tensorflow.py b/nnvm/tutorials/from_tensorflow.py new file mode 100644 index 000000000000..6a30443dba60 --- /dev/null +++ b/nnvm/tutorials/from_tensorflow.py @@ -0,0 +1,239 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +Compile Tensorflow Models +========================= +This article is an introductory tutorial to deploy tensorflow models with TVM. + +For us to begin with, tensorflow python module is required to be installed. + +Please refer to https://www.tensorflow.org/install +""" + +# tvm and nnvm +import nnvm +import tvm + +# os and numpy +import numpy as np +import os.path + +# Tensorflow imports +import tensorflow as tf +from tensorflow.core.framework import graph_pb2 +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import tensor_util + +# Tensorflow utility functions +import tvm.relay.testing.tf as tf_testing + +# Base location for model related files. +repo_base = 'https://github.com/dmlc/web-data/raw/master/tensorflow/models/InceptionV1/' + +# Test image +img_name = 'elephant-299.jpg' +image_url = os.path.join(repo_base, img_name) + +###################################################################### +# Tutorials +# --------- +# .. note:: +# +# protobuf should be exported with :any:`add_shapes=True` option. +# Could use https://github.com/dmlc/web-data/tree/master/tensorflow/scripts/tf-to-nnvm.py +# to add shapes for existing models. +# +# Please refer docs/frontend/tensorflow.md for more details for various models +# from tensorflow. + +model_name = 'classify_image_graph_def-with_shapes.pb' +model_url = os.path.join(repo_base, model_name) + +# Image label map +map_proto = 'imagenet_2012_challenge_label_map_proto.pbtxt' +map_proto_url = os.path.join(repo_base, map_proto) + +# Human readable text for labels +label_map = 'imagenet_synset_to_human_label_map.txt' +label_map_url = os.path.join(repo_base, label_map) + +# Target settings +# Use these commented settings to build for cuda. +#target = 'cuda' +#target_host = 'llvm' +#layout = "NCHW" +#ctx = tvm.gpu(0) +target = 'llvm' +target_host = 'llvm' +layout = None +ctx = tvm.cpu(0) + +###################################################################### +# Download required files +# ----------------------- +# Download files listed above. +from tvm.contrib.download import download_testdata + +img_path = download_testdata(image_url, img_name, module='data') +model_path = download_testdata(model_url, model_name, module=['tf', 'InceptionV1']) +map_proto_path = download_testdata(map_proto_url, map_proto, module='data') +label_path = download_testdata(label_map_url, label_map, module='data') + +###################################################################### +# Import model +# ------------ +# Creates tensorflow graph definition from protobuf file. + +with tf.gfile.FastGFile(model_path, 'rb') as f: + graph_def = tf.GraphDef() + graph_def.ParseFromString(f.read()) + graph = tf.import_graph_def(graph_def, name='') + # Call the utility to import the graph definition into default graph. + graph_def = tf_testing.ProcessGraphDefParam(graph_def) + # Add shapes to the graph. + with tf.Session() as sess: + graph_def = tf_testing.AddShapesToGraphDef(sess, 'softmax') + +###################################################################### +# Decode image +# ------------ +# .. note:: +# +# tensorflow frontend import doesn't support preprocessing ops like JpegDecode. +# JpegDecode is bypassed (just return source node). +# Hence we supply decoded frame to TVM instead. +# + +from PIL import Image +image = Image.open(img_path).resize((299, 299)) + +x = np.array(image) + +###################################################################### +# Import the graph to NNVM +# ------------------------ +# Import tensorflow graph definition to nnvm. +# +# Results: +# sym: nnvm graph for given tensorflow protobuf. +# params: params converted from tensorflow params (tensor protobuf). +sym, params = nnvm.frontend.from_tensorflow(graph_def, layout=layout) + +print("Tensorflow protobuf imported as nnvm graph") +###################################################################### +# NNVM Compilation +# ---------------- +# Compile the graph to llvm target with given input specification. +# +# Results: +# graph: Final graph after compilation. +# params: final params after compilation. +# lib: target library which can be deployed on target with tvm runtime. + +import nnvm.compiler +shape_dict = {'DecodeJpeg/contents': x.shape} +dtype_dict = {'DecodeJpeg/contents': 'uint8'} +graph, lib, params = nnvm.compiler.build(sym, shape=shape_dict, target=target, target_host=target_host, dtype=dtype_dict, params=params) + +###################################################################### +# Execute the portable graph on TVM +# --------------------------------- +# Now we can try deploying the NNVM compiled model on target. + +from tvm.contrib import graph_runtime +dtype = 'uint8' +m = graph_runtime.create(graph, lib, ctx) +# set inputs +m.set_input('DecodeJpeg/contents', tvm.nd.array(x.astype(dtype))) +m.set_input(**params) +# execute +m.run() +# get outputs +tvm_output = m.get_output(0, tvm.nd.empty(((1, 1008)), 'float32')) + +###################################################################### +# Process the output +# ------------------ +# Process the model output to human readable text for InceptionV1. +predictions = tvm_output.asnumpy() +predictions = np.squeeze(predictions) + +# Creates node ID --> English string lookup. +node_lookup = tf_testing.NodeLookup(label_lookup_path=map_proto_path, + uid_lookup_path=label_path) + +# Print top 5 predictions from TVM output. +top_k = predictions.argsort()[-5:][::-1] +for node_id in top_k: + human_string = node_lookup.id_to_string(node_id) + score = predictions[node_id] + print('%s (score = %.5f)' % (human_string, score)) + +###################################################################### +# Inference on tensorflow +# ----------------------- +# Run the corresponding model on tensorflow + +def create_graph(): + """Creates a graph from saved GraphDef file and returns a saver.""" + # Creates graph from saved graph_def.pb. + with tf.gfile.FastGFile(model_path, 'rb') as f: + graph_def = tf.GraphDef() + graph_def.ParseFromString(f.read()) + graph = tf.import_graph_def(graph_def, name='') + # Call the utility to import the graph definition into default graph. + graph_def = tf_testing.ProcessGraphDefParam(graph_def) + +def run_inference_on_image(image): + """Runs inference on an image. + + Parameters + ---------- + image: String + Image file name. + + Returns + ------- + Nothing + """ + if not tf.gfile.Exists(image): + tf.logging.fatal('File does not exist %s', image) + image_data = tf.gfile.FastGFile(image, 'rb').read() + + # Creates graph from saved GraphDef. + create_graph() + + with tf.Session() as sess: + softmax_tensor = sess.graph.get_tensor_by_name('softmax:0') + predictions = sess.run(softmax_tensor, + {'DecodeJpeg/contents:0': image_data}) + + predictions = np.squeeze(predictions) + + # Creates node ID --> English string lookup. + node_lookup = tf_testing.NodeLookup(label_lookup_path=map_proto_path, + uid_lookup_path=label_path) + + # Print top 5 predictions from tensorflow. + top_k = predictions.argsort()[-5:][::-1] + print ("===== TENSORFLOW RESULTS =======") + for node_id in top_k: + human_string = node_lookup.id_to_string(node_id) + score = predictions[node_id] + print('%s (score = %.5f)' % (human_string, score)) + +run_inference_on_image(img_path) diff --git a/nnvm/tutorials/get_started.py b/nnvm/tutorials/get_started.py new file mode 100644 index 000000000000..46f711e7d347 --- /dev/null +++ b/nnvm/tutorials/get_started.py @@ -0,0 +1,190 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +Get Started with NNVM +===================== +**Author**: `Tianqi Chen `_ + +This article is an introductory tutorial to workflow in NNVM. +""" +import nnvm.compiler +import nnvm.symbol as sym + +###################################################################### +# Declare Computation +# ------------------- +# We start by describing our need using computational graph. +# Most deep learning frameworks use computation graph to describe +# their computation. In this example, we directly use +# NNVM's API to construct the computational graph. +# +# .. note:: +# +# In a typical deep learning compilation workflow, +# we can get the models from :any:`nnvm.frontend` +# +# The following code snippet describes :math:`z = x + \sqrt{y}` +# and creates a nnvm graph from the description. +# We can print out the graph ir to check the graph content. + +x = sym.Variable("x") +y = sym.Variable("y") +z = sym.elemwise_add(x, sym.sqrt(y)) +compute_graph = nnvm.graph.create(z) +print("-------compute graph-------") +print(compute_graph.ir()) + +###################################################################### +# Compile +# ------- +# We can call :any:`nnvm.compiler.build` to compile the graph. +# The build function takes a shape parameter which specifies the +# input shape requirement. Here we only need to pass in shape of ``x`` +# and the other one will be inferred automatically by NNVM. +# +# The function returns three values. ``deploy_graph`` contains +# the final compiled graph structure. ``lib`` is a :any:`tvm.module.Module` +# that contains compiled CUDA functions. We do not need the ``params`` +# in this case. +shape = (4,) +deploy_graph, lib, params = nnvm.compiler.build( + compute_graph, target="cuda", shape={"x": shape}, dtype="float32") + +###################################################################### +# We can print out the IR of ``deploy_graph`` to understand what just +# happened under the hood. We can find that ``deploy_graph`` only +# contains a single operator ``tvm_op``. This is because NNVM +# automatically fused the operator together into one operator. +# +print("-------deploy graph-------") +print(deploy_graph.ir()) + +###################################################################### +# Let us also peek into content of ``lib``. +# Typically a compiled TVM CUDA module contains a host module(lib) +# and a device module(``lib.imported_modules[0]``) that contains the CUDA code. +# We print out the the generated device code here. +# This is exactly a fused CUDA version of kernel that the graph points to. +# +print("-------deploy library-------") +print(lib.imported_modules[0].get_source()) + +###################################################################### +# Deploy and Run +# -------------- +# Now that we have have compiled module, let us run it. +# We can use :any:`graph_runtime ` +# in tvm to create a deployable :any:`GraphModule `. +# We can use the :any:`set_input `, +# :any:`run ` and +# :any:`get_output ` function +# to set the input, execute the graph and get the output we need. +# +import tvm +import numpy as np +from tvm.contrib import graph_runtime, util + +module = graph_runtime.create(deploy_graph, lib, tvm.gpu(0)) +x_np = np.array([1, 2, 3, 4]).astype("float32") +y_np = np.array([4, 4, 4, 4]).astype("float32") +# set input to the graph module +module.set_input(x=x_np, y=y_np) +# run forward computation +module.run() +# get the first output +out = module.get_output(0, out=tvm.nd.empty(shape)) +print(out.asnumpy()) + +###################################################################### +# Provide Model Parameters +# ------------------------ +# Most deep learning models contains two types of inputs: parameters +# that remains fixed during inference and data input that need to +# change for each inference task. It is helpful to provide these +# information to NNVM. Let us assume that ``y`` is the parameter +# in our example. We can provide the model parameter information +# by the params argument to :any:`nnvm.compiler.build`. +# +deploy_graph, lib, params = nnvm.compiler.build( + compute_graph, target="cuda", shape={"x": shape}, params={"y": y_np}) + +###################################################################### +# This time we will need params value returned by :any:`nnvm.compiler.build`. +# NNVM applys optimization to pre-compute the intermediate values in +# the graph that can be determined by parameters. In this case +# :math:`\sqrt{y}` can be pre-computed. The pre-computed values +# are returned as new params. We can print out the new compiled library +# to confirm that the fused kernel only now contains add. +# +print("-----optimized params-----") +print(params) +print("-------deploy library-------") +print(lib.imported_modules[0].get_source()) + +###################################################################### +# Save the Deployed Module +# ------------------------ +# We can save the ``deploy_graph``, ``lib`` and ``params`` separately +# and load them back later. We can use :any:`tvm.module.Module` to export +# the compiled library. ``deploy_graph`` is saved in json format and ``params`` +# is serialized into a bytearray. +# +temp = util.tempdir() +path_lib = temp.relpath("deploy.so") +lib.export_library(path_lib) +with open(temp.relpath("deploy.json"), "w") as fo: + fo.write(deploy_graph.json()) +with open(temp.relpath("deploy.params"), "wb") as fo: + fo.write(nnvm.compiler.save_param_dict(params)) +print(temp.listdir()) + +###################################################################### +# We can load the module back. +loaded_lib = tvm.module.load(path_lib) +loaded_json = open(temp.relpath("deploy.json")).read() +loaded_params = bytearray(open(temp.relpath("deploy.params"), "rb").read()) +module = graph_runtime.create(loaded_json, loaded_lib, tvm.gpu(0)) +params = nnvm.compiler.load_param_dict(loaded_params) +# directly load from byte array +module.load_params(loaded_params) +module.run(x=x_np) +# get the first output +out = module.get_output(0, out=tvm.nd.empty(shape)) +print(out.asnumpy()) + +###################################################################### +# Deploy using Another Language +# ----------------------------- +# We use python in this example for demonstration. +# We can also deploy the compiled modules with other languages +# supported by TVM such as c++, java, javascript. +# The graph module itself is fully embedded in TVM runtime. +# +# The following block demonstrates how we can directly use TVM's +# runtime API to execute the compiled module. +# You can find similar runtime API in TVMRuntime of other languages. +# +fcreate = tvm.get_global_func("tvm.graph_runtime.create") +ctx = tvm.gpu(0) +gmodule = fcreate(loaded_json, loaded_lib, ctx.device_type, ctx.device_id) +set_input, get_output, run = gmodule["set_input"], gmodule["get_output"], gmodule["run"] +set_input("x", tvm.nd.array(x_np)) +gmodule["load_params"](loaded_params) +run() +out = tvm.nd.empty(shape) +get_output(0, out) +print(out.asnumpy()) diff --git a/nnvm/tutorials/nlp/from_darknet_rnn.py b/nnvm/tutorials/nlp/from_darknet_rnn.py new file mode 100644 index 000000000000..1bc9627dd62f --- /dev/null +++ b/nnvm/tutorials/nlp/from_darknet_rnn.py @@ -0,0 +1,198 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +Compile Darknet Models for RNN +============================== +**Author**: `Siju Samuel `_ + +This article is an introductory tutorial to deploy darknet rnn models with NNVM. + +This script will run a character prediction model +Each module consists of 3 fully-connected layers. The input layer propagates information from the +input to the current state. The recurrent layer propagates information through time from the +previous state to the current one. + +The input to the network is a 1-hot encoding of ASCII characters. We train the network to predict +the next character in a stream of characters. The output is constrained to be a probability +distribution using a softmax layer. + +Since each recurrent layer contains information about the current character and the past +characters, it can use this context to predict the future characters in a word or phrase. + +All the required models and libraries will be downloaded from the internet +by the script. +""" +import random +import numpy as np +import tvm +from tvm.contrib import graph_runtime +from tvm.contrib.download import download_testdata +from nnvm.testing.darknet import __darknetffi__ +import nnvm +import nnvm.frontend.darknet + +# Set the parameters +# ----------------------- +# Set the seed value and the number of characters to predict + +#Model name +MODEL_NAME = 'rnn' +#Seed value +seed = 'Thus' +#Number of characters to predict +num = 1000 + +# Download required files +# ----------------------- +# Download cfg and weights file if first time. +CFG_NAME = MODEL_NAME + '.cfg' +WEIGHTS_NAME = MODEL_NAME + '.weights' +REPO_URL = 'https://github.com/dmlc/web-data/blob/master/darknet/' +CFG_URL = REPO_URL + 'cfg/' + CFG_NAME + '?raw=true' +WEIGHTS_URL = REPO_URL + 'weights/' + WEIGHTS_NAME + '?raw=true' + +cfg_path = download_testdata(CFG_URL, CFG_NAME, module='darknet') +weights_path = download_testdata(WEIGHTS_URL, WEIGHTS_NAME, module='darknet') + +# Download and Load darknet library +DARKNET_LIB = 'libdarknet.so' +DARKNET_URL = REPO_URL + 'lib/' + DARKNET_LIB + '?raw=true' +lib_path = download_testdata(DARKNET_URL, DARKNET_LIB, module='darknet') +DARKNET_LIB = __darknetffi__.dlopen(lib_path) +net = DARKNET_LIB.load_network(cfg_path.encode('utf-8'), weights_path.encode('utf-8'), 0) +dtype = 'float32' +batch_size = 1 + +# Import the graph to NNVM +# ------------------------ +# Import darknet graph definition to nnvm. +# +# Results: +# sym: nnvm graph for rnn model +# params: params converted from darknet weights +print("Converting darknet rnn model to nnvm symbols...") +sym, params = nnvm.frontend.darknet.from_darknet(net, dtype) + +# Compile the model on NNVM +data = np.empty([1, net.inputs], dtype)#net.inputs + +target = 'llvm' +shape = {'data': data.shape} +print("Compiling the model...") + +shape_dict = {'data': data.shape} +dtype_dict = {'data': data.dtype} + +with nnvm.compiler.build_config(opt_level=2): + graph, lib, params = nnvm.compiler.build(sym, target, shape_dict, dtype_dict, params) + +# Execute the portable graph on TVM +# --------------------------------- +# Now we can try deploying the NNVM compiled model on cpu target. + +# Set the cpu context +ctx = tvm.cpu(0) +# Create graph runtime +m = graph_runtime.create(graph, lib, ctx) +# Set the params to runtime +m.set_input(**params) + +def _init_state_memory(rnn_cells_count, dtype): + '''Initialize memory for states''' + states = {} + state_shape = (1024,) + for i in range(rnn_cells_count): + k = 'rnn' + str(i) + '_state' + states[k] = tvm.nd.array(np.zeros(state_shape, dtype).astype(dtype)) + return states + +def _set_state_input(runtime, states): + '''Set the state inputs''' + for state in states: + runtime.set_input(state, states[state]) + +def _get_state_output(runtime, states): + '''Get the state outputs and save''' + i = 1 + for state in states: + data = states[state] + states[state] = runtime.get_output((i), tvm.nd.empty(data.shape, data.dtype)) + i += 1 + +def _proc_rnn_output(out_data): + '''Generate the characters from the output array''' + sum_array = 0 + n = out_data.size + r = random.uniform(0, 1) + for j in range(n): + if out_data[j] < 0.0001: + out_data[j] = 0 + sum_array += out_data[j] + + for j in range(n): + out_data[j] *= float(1.0) / sum_array + r = r - out_data[j] + if r <= 0: + return j + return n-1 + +print("RNN generaring text...") + +out_shape = (net.outputs,) +rnn_cells_count = 3 + +# Initialize state memory +# ----------------------- +states = _init_state_memory(rnn_cells_count, dtype) + +len_seed = len(seed) +count = len_seed + num +out_txt = "" + +#Initialize random seed +random.seed(0) +c = ord(seed[0]) +inp_data = np.zeros([net.inputs], dtype) + +# Run the model +# ------------- + +# Predict character by character till `num` +for i in range(count): + inp_data[c] = 1 + + # Set the input data + m.set_input('data', tvm.nd.array(inp_data.astype(dtype))) + inp_data[c] = 0 + + # Set the state inputs + _set_state_input(m, states) + + # Run the model + m.run() + + # Get the output + tvm_out = m.get_output(0, tvm.nd.empty(out_shape, dtype)).asnumpy() + + # Get the state outputs + _get_state_output(m, states) + + # Get the predicted character and keep buffering it + c = ord(seed[i]) if i < len_seed else _proc_rnn_output(tvm_out) + out_txt += chr(c) + +print("Predicted Text =", out_txt) diff --git a/nnvm/tutorials/nlp/keras_s2s_translate.py b/nnvm/tutorials/nlp/keras_s2s_translate.py new file mode 100644 index 000000000000..16c737418c6f --- /dev/null +++ b/nnvm/tutorials/nlp/keras_s2s_translate.py @@ -0,0 +1,254 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +Keras LSTM Sequence to Sequence Model for Translation +================================= +**Author**: `Siju Samuel `_ + +This script demonstrates how to implement a basic character-level sequence-to-sequence model. +We apply it to translating short English sentences into short French sentences, +character-by-character. + +# Summary of the algorithm + +- We start with input sequences from a domain (e.g. English sentences) + and corresponding target sequences from another domain + (e.g. French sentences). +- An encoder LSTM turns input sequences to 2 state vectors + (we keep the last LSTM state and discard the outputs). +- A decoder LSTM is trained to turn the target sequences into + the same sequence but offset by one timestep in the future, + a training process called "teacher forcing" in this context. + Is uses as initial state the state vectors from the encoder. + Effectively, the decoder learns to generate `targets[t+1...]` + given `targets[...t]`, conditioned on the input sequence. + +This script loads the s2s.h5 model saved in repository +https://github.com/dmlc/web-data/raw/master/keras/models/s2s_translate/lstm_seq2seq.py +and generates sequences from it. It assumes that no changes have been made (for example: +latent_dim is unchanged, and the input data and model architecture are unchanged). + +# References + +- Sequence to Sequence Learning with Neural Networks + https://arxiv.org/abs/1409.3215 +- Learning Phrase Representations using + RNN Encoder-Decoder for Statistical Machine Translation + https://arxiv.org/abs/1406.1078 + +See lstm_seq2seq.py for more details on the model architecture and how it is trained. +""" + +from keras.models import Model, load_model +from keras.layers import Input +import random +import os +import numpy as np +import keras +import tvm +import nnvm + +###################################################################### +# Download required files +# ----------------------- +# Download files listed below from dmlc web-data repo. +model_file = "s2s_translate.h5" +data_file = "fra-eng.txt" + +# Base location for model related files. +repo_base = 'https://github.com/dmlc/web-data/raw/master/keras/models/s2s_translate/' +model_url = os.path.join(repo_base, model_file) +data_url = os.path.join(repo_base, data_file) + +# Download files listed below. +from tvm.contrib.download import download_testdata +model_path = download_testdata(model_url, model_file, module='keras') +data_path = download_testdata(data_url, data_file, module='data') + +latent_dim = 256 # Latent dimensionality of the encoding space. +test_samples = 10000 # Number of samples used for testing. + +###################################################################### +# Process the data file +# --------------------- +# Vectorize the data. We use the same approach as the training script. +# NOTE: the data must be identical, in order for the character -> integer +# mappings to be consistent. +input_texts = [] +target_texts = [] +input_characters = set() +target_characters = set() +with open(data_path, 'r', encoding='utf-8') as f: + lines = f.read().split('\n') +test_samples = min(test_samples, len(lines)) +max_encoder_seq_length = 0 +max_decoder_seq_length = 0 +for line in lines[:test_samples]: + input_text, target_text = line.split('\t') + # We use "tab" as the "start sequence" character + # for the targets, and "\n" as "end sequence" character. + target_text = '\t' + target_text + '\n' + max_encoder_seq_length = max(max_encoder_seq_length, len(input_text)) + max_decoder_seq_length = max(max_decoder_seq_length, len(target_text)) + for char in input_text: + if char not in input_characters: + input_characters.add(char) + for char in target_text: + if char not in target_characters: + target_characters.add(char) + +input_characters = sorted(list(input_characters)) +target_characters = sorted(list(target_characters)) +num_encoder_tokens = len(input_characters) +num_decoder_tokens = len(target_characters) +input_token_index = dict( + [(char, i) for i, char in enumerate(input_characters)]) +target_token_index = dict( + [(char, i) for i, char in enumerate(target_characters)]) + +# Reverse-lookup token index to decode sequences back to something readable. +reverse_target_char_index = dict( + (i, char) for char, i in target_token_index.items()) + +###################################################################### +# Load Keras Model +# ---------------- +# Restore the model and construct the encoder and decoder. +model = load_model(model_path) +encoder_inputs = model.input[0] # input_1 + +encoder_outputs, state_h_enc, state_c_enc = model.layers[2].output # lstm_1 +encoder_states = [state_h_enc, state_c_enc] +encoder_model = Model(encoder_inputs, encoder_states) + +decoder_inputs = model.input[1] # input_2 +decoder_state_input_h = Input(shape=(latent_dim,), name='input_3') +decoder_state_input_c = Input(shape=(latent_dim,), name='input_4') +decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c] +decoder_lstm = model.layers[3] +decoder_outputs, state_h_dec, state_c_dec = decoder_lstm( + decoder_inputs, initial_state=decoder_states_inputs) +decoder_states = [state_h_dec, state_c_dec] +decoder_dense = model.layers[4] +decoder_outputs = decoder_dense(decoder_outputs) +decoder_model = Model( + [decoder_inputs] + decoder_states_inputs, + [decoder_outputs] + decoder_states) + +###################################################################### +# Compile both encoder and decoder model on NNVM +# ---------------------------------------------- +# Creates NNVM graph definition from keras model file. +from tvm.contrib import graph_runtime +target = 'llvm' +ctx = tvm.cpu(0) + +# Parse Encoder model +sym, params = nnvm.frontend.from_keras(encoder_model) +inp_enc_shape = (1, max_encoder_seq_length, num_encoder_tokens) +shape_dict = {'input_1': inp_enc_shape} + +# Build Encoder model +with nnvm.compiler.build_config(opt_level=2): + enc_graph, enc_lib, enc_params = nnvm.compiler.build(sym, target, shape_dict, params=params) +print("Encoder build ok.") + +# Create graph runtime for encoder model +tvm_enc = graph_runtime.create(enc_graph, enc_lib, ctx) +tvm_enc.set_input(**enc_params) + +# Parse Decoder model +inp_dec_shape = (1, 1, num_decoder_tokens) +shape_dict = {'input_2': inp_dec_shape, + 'input_3': (1, latent_dim), + 'input_4': (1, latent_dim)} + +# Build Decoder model +sym, params = nnvm.frontend.from_keras(decoder_model) +with nnvm.compiler.build_config(opt_level=2): + dec_graph, dec_lib, dec_params = nnvm.compiler.build(sym, target, shape_dict, params=params) +print("Decoder build ok.") + +# Create graph runtime for decoder model +tvm_dec = graph_runtime.create(dec_graph, dec_lib, ctx) +tvm_dec.set_input(**dec_params) + +# Decodes an input sequence. +def decode_sequence(input_seq): + # Set the input for encoder model. + tvm_enc.set_input('input_1', input_seq) + + # Run encoder model + tvm_enc.run() + + # Get states from encoder network + h = tvm_enc.get_output(0).asnumpy() + c = tvm_enc.get_output(1).asnumpy() + + # Populate the first character of target sequence with the start character. + sampled_token_index = target_token_index['\t'] + + # Sampling loop for a batch of sequences + decoded_sentence = '' + while True: + # Generate empty target sequence of length 1. + target_seq = np.zeros((1, 1, num_decoder_tokens), dtype='float32') + # Update the target sequence (of length 1). + target_seq[0, 0, sampled_token_index] = 1. + + # Set the input and states for decoder model. + tvm_dec.set_input('input_2', target_seq) + tvm_dec.set_input('input_3', h) + tvm_dec.set_input('input_4', c) + # Run decoder model + tvm_dec.run() + + output_tokens = tvm_dec.get_output(0).asnumpy() + h = tvm_dec.get_output(1).asnumpy() + c = tvm_dec.get_output(2).asnumpy() + + # Sample a token + sampled_token_index = np.argmax(output_tokens[0, -1, :]) + sampled_char = reverse_target_char_index[sampled_token_index] + + # Exit condition: either hit max length or find stop character. + if sampled_char == '\n': + break + + # Update the sentence + decoded_sentence += sampled_char + if len(decoded_sentence) > max_decoder_seq_length: + break + return decoded_sentence + +def generate_input_seq(input_text): + input_seq = np.zeros((1, max_encoder_seq_length, num_encoder_tokens), dtype='float32') + for t, char in enumerate(input_text): + input_seq[0, t, input_token_index[char]] = 1. + return input_seq + +###################################################################### +# Run the model +# ------------- +# Randonly take some text from test samples and translate +for seq_index in range(100): + # Take one sentence randomly and try to decode. + index = random.randint(1, test_samples) + input_text, _ = lines[index].split('\t') + input_seq = generate_input_seq(input_text) + decoded_sentence = decode_sequence(input_seq) + print((seq_index + 1), ": ", input_text, "==>", decoded_sentence) diff --git a/nnvm/tutorials/tune_nnvm_arm.py b/nnvm/tutorials/tune_nnvm_arm.py new file mode 100644 index 000000000000..d61130b852cc --- /dev/null +++ b/nnvm/tutorials/tune_nnvm_arm.py @@ -0,0 +1,427 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +Auto-tuning a convolutional network for ARM CPU (NNVM) +====================================================== +**Author**: `Lianmin Zheng `_, `Zhao Wu `_ + +Auto-tuning for a specific ARM device is critical for getting the best +performance. This is a tutorial about how to tune a whole convolutional +network. + +The operator implementation for ARM CPU in TVM is written in template form. +The template has many tunable knobs (tile factor, vectorization, unrolling, etc). +We will tune all convolution and depthwise convolution operators +in the neural network. After tuning, we produce a log file which stores +the best knob values for all required operators. When the tvm compiler compiles +these operators, it will query this log file to get the best knob values. + +We also released pre-tuned parameters for some arm devices. You can go to +`ARM CPU Benchmark `_ +to see the results. +""" + +###################################################################### +# Install dependencies +# -------------------- +# To use the autotvm package in tvm, we need to install some extra dependencies. +# (change "3" to "2" if you use python2): +# +# .. code-block:: bash +# +# pip3 install --user psutil xgboost tornado +# +# To make tvm run faster during tuning, it is recommended to use cython +# as FFI of tvm. In the root directory of tvm, execute +# (change "3" to "2" if you use python2): +# +# .. code-block:: bash +# +# pip3 install --user cython +# sudo make cython3 +# +# Now return to python code. Import packages. + +import os + +import numpy as np + +import nnvm.testing +import nnvm.compiler +import tvm +from tvm import autotvm +from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner +from tvm.contrib.util import tempdir +import tvm.contrib.graph_runtime as runtime + +################################################################# +# Define network +# -------------- +# First we need to define the network in nnvm symbol API. +# We can load some pre-defined network from :code:`nnvm.testing`. +# We can also load models from MXNet, ONNX and TensorFlow (see NNVM +# tutorials :ref:`tutorial-nnvm` for more details). + +def get_network(name, batch_size): + """Get the symbol definition and random weight of a network""" + input_shape = (batch_size, 3, 224, 224) + output_shape = (batch_size, 1000) + + if "resnet" in name: + n_layer = int(name.split('-')[1]) + net, params = nnvm.testing.resnet.get_workload(num_layers=n_layer, batch_size=batch_size) + elif "vgg" in name: + n_layer = int(name.split('-')[1]) + net, params = nnvm.testing.vgg.get_workload(num_layers=n_layer, batch_size=batch_size) + elif name == 'mobilenet': + net, params = nnvm.testing.mobilenet.get_workload(batch_size=batch_size) + elif name == 'squeezenet_v1.1': + net, params = nnvm.testing.squeezenet.get_workload(batch_size=batch_size, version='1.1') + elif name == 'inception_v3': + input_shape = (1, 3, 299, 299) + net, params = nnvm.testing.inception_v3.get_workload(batch_size=batch_size) + elif name == 'custom': + # an example for custom network + from nnvm.testing import utils + net = nnvm.sym.Variable('data') + net = nnvm.sym.conv2d(net, channels=4, kernel_size=(3,3), padding=(1,1)) + net = nnvm.sym.flatten(net) + net = nnvm.sym.dense(net, units=1000) + net, params = utils.create_workload(net, batch_size, (3, 224, 224)) + elif name == 'mxnet': + # an example for mxnet model + from mxnet.gluon.model_zoo.vision import get_model + block = get_model('resnet18_v1', pretrained=True) + net, params = nnvm.frontend.from_mxnet(block) + net = nnvm.sym.softmax(net) + else: + raise ValueError("Unsupported network: " + name) + + return net, params, input_shape, output_shape + + +################################################################# +# Start RPC Tracker +# ----------------- +# TVM uses RPC session to communicate with ARM boards. +# During tuning, the tuner will send the generated code to the board and +# measure the speed of code on the board. +# +# To scale up the tuning, TVM uses RPC Tracker to manage distributed devices. +# The RPC Tracker is a centralized master node. We can register all devices to +# the tracker. For example, if we have 10 phones, we can register all of them +# to the tracker, and run 10 measurements in parallel, accelerating the tuning process. +# +# To start an RPC tracker, run this command on the host machine. The tracker is +# required during the whole tuning process, so we need to open a new terminal for +# this command: +# +# .. code-block:: bash +# +# python -m tvm.exec.rpc_tracker --host=0.0.0.0 --port=9190 +# +# The expected output is +# +# .. code-block:: bash +# +# INFO:RPCTracker:bind to 0.0.0.0:9190 + +################################################################# +# Register devices to RPC Tracker +# ----------------------------------- +# Now we can register our devices to the tracker. The first step is to +# build tvm runtime for the ARM devices. +# +# * For Linux: +# Follow this section :ref:`build-tvm-runtime-on-device` to build +# tvm runtime on the device. Then register the device to tracker by +# +# .. code-block:: bash +# +# python -m tvm.exec.rpc_server --tracker=[HOST_IP]:9190 --key=rk3399 +# +# (replace :code:`[HOST_IP]` with the IP address of your host machine) +# +# * For Android: +# Follow this `readme page `_ to +# install tvm rpc apk on the android device. Make sure you can pass the android rpc test. +# Then you have already registred your device. During tuning, you have to go to developer option +# and enable "Keep screen awake during changing" and charge your phone to make it stable. +# +# After registering devices, we can confirm it by querying rpc_tracker +# +# .. code-block:: bash +# +# python -m tvm.exec.query_rpc_tracker --host=0.0.0.0 --port=9190 +# +# For example, if we have 2 Huawei mate10 pro, 11 Raspberry Pi 3B and 2 rk3399, +# the output can be +# +# .. code-block:: bash +# +# Queue Status +# ---------------------------------- +# key total free pending +# ---------------------------------- +# mate10pro 2 2 0 +# rk3399 2 2 0 +# rpi3b 11 11 0 +# ---------------------------------- +# +# You can register multiple devices to the tracker to accelerate the measurement in tuning. + +########################################### +# Set Tuning Options +# ------------------ +# Before tuning, we should apply some configurations. Here I use an RK3399 board +# as example. In your setting, you should modify the target and device_key accordingly. +# set :code:`use_android` to True if you use android phone. + +#### DEVICE CONFIG #### + +# Replace "aarch64-linux-gnu" with the correct target of your board. +# This target is used for cross compilation. You can query it by :code:`gcc -v` on your device. +target = tvm.target.create('llvm -device=arm_cpu -target=aarch64-linux-gnu') + +# Also replace this with the device key in your tracker +device_key = 'rk3399' + +# Set this to True if you use android phone +use_android = False + +#### TUNING OPTION #### +network = 'resnet-18' +log_file = "%s.%s.log" % (device_key, network) +dtype = 'float32' + +tuning_option = { + 'log_filename': log_file, + + 'tuner': 'xgb', + 'n_trial': 2000, + 'early_stopping': 800, + + 'measure_option': autotvm.measure_option( + builder=autotvm.LocalBuilder( + build_func='ndk' if use_android else 'default'), + runner=autotvm.RPCRunner( + device_key, host='localhost', port=9190, + number=5, + timeout=4, + ), + ), +} + +#################################################################### +# +# .. note:: How to set tuning options +# +# In general, the default values provided here work well. +# If you have enough time budget, you can set :code:`n_trial`, :code:`early_stopping` larger, +# which makes the tuning run longer. +# If your device runs very slow or your conv2d operators have many GFLOPs, considering to +# set timeout larger. +# +# If your model has depthwise convolution, you could consider setting +# :code:`try_spatial_pack_depthwise` be :code:`True`, which perform better than default +# optimization in general. For example, on ARM CPU A53 2.0GHz, we find it could boost 1.6x +# performance of depthwise convolution on Mobilenet V1 model. + +################################################################### +# Begin Tuning +# ------------ +# Now we can extract tuning tasks from the network and begin tuning. +# Here, we provide a simple utility function to tune a list of tasks. +# This function is just an initial implementation which tunes them in sequential order. +# We will introduce a more sophisticated tuning scheduler in the future. + +# You can skip the implementation of this function for this tutorial. +def tune_tasks(tasks, + measure_option, + tuner='xgb', + n_trial=1000, + early_stopping=None, + log_filename='tuning.log', + use_transfer_learning=True, + try_winograd=True, + try_spatial_pack_depthwise=False): + if try_winograd: + for i in range(len(tasks)): + try: # try winograd template + tsk = autotvm.task.create(tasks[i].name, tasks[i].args, + tasks[i].target, tasks[i].target_host, 'winograd') + input_channel = tsk.workload[1][1] + if input_channel >= 64: + tasks[i] = tsk + except Exception: + pass + + # if we want to use spatial pack for depthwise convolution + if try_spatial_pack_depthwise: + tuner = 'xgb_knob' + for i in range(len(tasks)): + if tasks[i].name == 'topi_nn_depthwise_conv2d_nchw': + tsk = autotvm.task.create(tasks[i].name, tasks[i].args, + tasks[i].target, tasks[i].target_host, + 'contrib_spatial_pack') + tasks[i] = tsk + + # create tmp log file + tmp_log_file = log_filename + ".tmp" + if os.path.exists(tmp_log_file): + os.remove(tmp_log_file) + + for i, tsk in enumerate(reversed(tasks)): + prefix = "[Task %2d/%2d] " % (i+1, len(tasks)) + + # create tuner + if tuner == 'xgb' or tuner == 'xgb-rank': + tuner_obj = XGBTuner(tsk, loss_type='rank') + elif tuner == 'xgb_knob': + tuner_obj = XGBTuner(tsk, loss_type='rank', feature_type='knob') + elif tuner == 'ga': + tuner_obj = GATuner(tsk, pop_size=50) + elif tuner == 'random': + tuner_obj = RandomTuner(tsk) + elif tuner == 'gridsearch': + tuner_obj = GridSearchTuner(tsk) + else: + raise ValueError("Invalid tuner: " + tuner) + + if use_transfer_learning: + if os.path.isfile(tmp_log_file): + tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file)) + + # do tuning + n_trial = min(n_trial, len(tsk.config_space)) + tuner_obj.tune(n_trial=n_trial, + early_stopping=early_stopping, + measure_option=measure_option, + callbacks=[ + autotvm.callback.progress_bar(n_trial, prefix=prefix), + autotvm.callback.log_to_file(tmp_log_file)]) + + # pick best records to a cache file + autotvm.record.pick_best(tmp_log_file, log_filename) + os.remove(tmp_log_file) + + +######################################################################## +# Finally, we launch tuning jobs and evaluate the end-to-end performance. + +def tune_and_evaluate(tuning_opt): + # extract workloads from nnvm graph + print("Extract tasks...") + net, params, input_shape, out_shape = get_network(network, batch_size=1) + tasks = autotvm.task.extract_from_graph(net, target=target, + shape={'data': input_shape}, dtype=dtype, + symbols=(nnvm.sym.conv2d,)) + + # run tuning tasks + print("Tuning...") + tune_tasks(tasks, **tuning_opt) + + # compile kernels with history best records + with autotvm.apply_history_best(log_file): + print("Compile...") + with nnvm.compiler.build_config(opt_level=3): + graph, lib, params = nnvm.compiler.build( + net, target=target, shape={'data': input_shape}, params=params, dtype=dtype) + + # export library + tmp = tempdir() + if use_android: + from tvm.contrib import ndk + filename = "net.so" + lib.export_library(tmp.relpath(filename), ndk.create_shared) + else: + filename = "net.tar" + lib.export_library(tmp.relpath(filename)) + + # upload module to device + print("Upload...") + remote = autotvm.measure.request_remote(device_key, 'localhost', 9190, + timeout=10000) + remote.upload(tmp.relpath(filename)) + rlib = remote.load_module(filename) + + # upload parameters to device + ctx = remote.context(str(target), 0) + module = runtime.create(graph, rlib, ctx) + data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype)) + module.set_input('data', data_tvm) + module.set_input(**params) + + # evaluate + print("Evaluate inference time cost...") + ftimer = module.module.time_evaluator("run", ctx, number=1, repeat=10) + prof_res = np.array(ftimer().results) * 1000 # convert to millisecond + print("Mean inference time (std dev): %.2f ms (%.2f ms)" % + (np.mean(prof_res), np.std(prof_res))) + +# We do not run the tuning in our webpage server since it takes too long. +# Uncomment the following line to run it by yourself. + +# tune_and_evaluate(tuning_option) + +###################################################################### +# Sample Output +# ------------- +# The tuning needs to compile many programs and extract feature from them. +# So a high performance CPU is recommended. +# One sample output is listed below. +# It takes about 2 hours on a 32T AMD Ryzen Threadripper. +# +# .. code-block:: bash +# +# Extract tasks... +# Tuning... +# [Task 1/12] Current/Best: 22.37/ 52.19 GFLOPS | Progress: (544/1000) | 406.59 s Done. +# [Task 2/12] Current/Best: 6.51/ 18.77 GFLOPS | Progress: (608/1000) | 325.05 s Done. +# [Task 3/12] Current/Best: 4.67/ 24.87 GFLOPS | Progress: (480/1000) | 372.31 s Done. +# [Task 4/12] Current/Best: 11.35/ 46.83 GFLOPS | Progress: (736/1000) | 602.39 s Done. +# [Task 5/12] Current/Best: 1.01/ 19.80 GFLOPS | Progress: (448/1000) | 262.16 s Done. +# [Task 6/12] Current/Best: 2.47/ 23.76 GFLOPS | Progress: (672/1000) | 563.85 s Done. +# [Task 7/12] Current/Best: 14.57/ 33.97 GFLOPS | Progress: (544/1000) | 465.15 s Done. +# [Task 8/12] Current/Best: 1.13/ 17.65 GFLOPS | Progress: (576/1000) | 365.08 s Done. +# [Task 9/12] Current/Best: 14.45/ 22.66 GFLOPS | Progress: (928/1000) | 724.25 s Done. +# [Task 10/12] Current/Best: 3.22/ 15.36 GFLOPS | Progress: (864/1000) | 564.27 s Done. +# [Task 11/12] Current/Best: 11.03/ 32.23 GFLOPS | Progress: (736/1000) | 635.15 s Done. +# [Task 12/12] Current/Best: 8.00/ 21.65 GFLOPS | Progress: (1000/1000) | 1111.81 s Done. +# Compile... +# Upload... +# Evaluate inference time cost... +# Mean inference time (std dev): 162.59 ms (0.06 ms) + +###################################################################### +# +# .. note:: **Experiencing Difficulties?** +# +# The auto tuning module is error-prone. If you always see " 0.00/ 0.00 GFLOPS", +# then there must be something wrong. +# +# First, make sure you set the correct configuration of your device. +# Then, you can print debug information by adding these lines in the beginning +# of the script. It will print every measurement result, where you can find useful +# error messages. +# +# .. code-block:: python +# +# import logging +# logging.getLogger('autotvm').setLevel(logging.DEBUG) +# +# Finally, always feel free to ask our community for help on https://discuss.tvm.ai diff --git a/nnvm/tutorials/tune_nnvm_cuda.py b/nnvm/tutorials/tune_nnvm_cuda.py new file mode 100644 index 000000000000..be3f79992cb6 --- /dev/null +++ b/nnvm/tutorials/tune_nnvm_cuda.py @@ -0,0 +1,391 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +Auto-tuning a convolutional network for NVIDIA GPU (NNVM) +========================================================= +**Author**: `Lianmin Zheng `_ + +Auto-tuning for specific devices and workloads is critical for getting the +best performance. This is a tutorial on how to tune a whole convolutional +network for NVIDIA GPU. + +The operator implementation for NVIDIA GPU in TVM is written in template form. +The template has many tunable knobs (tile factor, unrolling, etc). +We will tune all convolution and depthwise convolution operators +in the neural network. After tuning, we produce a log file which stores +the best knob values for all required operators. When the tvm compiler compiles +these operators, it will query this log file to get the best knob values. + +We also released pre-tuned parameters for some NVIDIA GPUs. You can go to +`NVIDIA GPU Benchmark `_ +to see the results. +""" + +###################################################################### +# Install dependencies +# -------------------- +# To use the autotvm package in tvm, we need to install some extra dependencies. +# (change "3" to "2" if you use python2): +# +# .. code-block:: bash +# +# pip3 install --user psutil xgboost tornado +# +# To make tvm run faster during tuning, it is recommended to use cython +# as FFI of tvm. In the root directory of tvm, execute: +# +# .. code-block:: bash +# +# pip3 install --user cython +# sudo make cython3 +# +# Now return to python code. Import packages. + +import os + +import numpy as np + +import nnvm.testing +import nnvm.compiler +import tvm +from tvm import autotvm +from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner +from tvm.contrib.util import tempdir +import tvm.contrib.graph_runtime as runtime + +################################################################# +# Define Network +# -------------- +# First we need to define the network in nnvm symbol API. +# We can load some pre-defined network from :code:`nnvm.testing`. +# We can also load models from MXNet, ONNX and TensorFlow (see NNVM +# tutorials :ref:`tutorial-nnvm` for more details). + +def get_network(name, batch_size): + """Get the symbol definition and random weight of a network""" + input_shape = (batch_size, 3, 224, 224) + output_shape = (batch_size, 1000) + + if "resnet" in name: + n_layer = int(name.split('-')[1]) + net, params = nnvm.testing.resnet.get_workload(num_layers=n_layer, batch_size=batch_size) + elif "vgg" in name: + n_layer = int(name.split('-')[1]) + net, params = nnvm.testing.vgg.get_workload(num_layers=n_layer, batch_size=batch_size) + elif name == 'mobilenet': + net, params = nnvm.testing.mobilenet.get_workload(batch_size=batch_size) + elif name == 'squeezenet_v1.1': + net, params = nnvm.testing.squeezenet.get_workload(batch_size=batch_size, version='1.1') + elif name == 'inception_v3': + input_shape = (1, 3, 299, 299) + net, params = nnvm.testing.inception_v3.get_workload(batch_size=batch_size) + elif name == 'custom': + # an example for custom network + from nnvm.testing import utils + net = nnvm.sym.Variable('data') + net = nnvm.sym.conv2d(net, channels=4, kernel_size=(3,3), padding=(1,1)) + net = nnvm.sym.flatten(net) + net = nnvm.sym.dense(net, units=1000) + net, params = utils.create_workload(net, batch_size, (3, 224, 224)) + elif name == 'mxnet': + # an example for mxnet model + from mxnet.gluon.model_zoo.vision import get_model + block = get_model('resnet18_v1', pretrained=True) + net, params = nnvm.frontend.from_mxnet(block) + net = nnvm.sym.softmax(net) + else: + raise ValueError("Unsupported network: " + name) + + return net, params, input_shape, output_shape + +########################################### +# Set Tuning Options +# ------------------ +# Before tuning, we apply some configurations. + +#### DEVICE CONFIG #### +target = tvm.target.cuda() + +#### TUNING OPTION #### +network = 'resnet-18' +log_file = "%s.log" % network +dtype = 'float32' + +tuning_option = { + 'log_filename': log_file, + + 'tuner': 'xgb', + 'n_trial': 2000, + 'early_stopping': 600, + + 'measure_option': autotvm.measure_option( + builder=autotvm.LocalBuilder(timeout=10), + runner=autotvm.LocalRunner(number=20, repeat=3, timeout=4, min_repeat_ms=150), + ), +} + +#################################################################### +# +# .. note:: How to set tuning options +# +# In general, the default value provided here works well. +# +# If you have large time budget, you can set :code:`n_trial`, :code:`early_stopping` larger, +# which makes the tuning runs longer. +# +# If you have multiple devices, you can use all of them for measurement to +# accelerate the tuning process. (see the 'Scale up measurement` section below). +# + +################################################################### +# Begin Tuning +# ------------ +# Now we can extract tuning tasks from the network and begin tuning. +# Here, we provide a simple utility function to tune a list of tasks. +# This function is just an initial implementation which tunes them in sequential order. +# We will introduce a more sophisticated tuning scheduler in the future. + +# You can skip the implementation of this function for this tutorial. +def tune_tasks(tasks, + measure_option, + tuner='xgb', + n_trial=1000, + early_stopping=None, + log_filename='tuning.log', + use_transfer_learning=True, + try_winograd=True): + if try_winograd: + for i in range(len(tasks)): + try: # try winograd template + tsk = autotvm.task.create(tasks[i].name, tasks[i].args, + tasks[i].target, tasks[i].target_host, 'winograd') + input_channel = tsk.workload[1][1] + if input_channel >= 64: + tasks[i] = tsk + except Exception: + pass + + # create tmp log file + tmp_log_file = log_filename + ".tmp" + if os.path.exists(tmp_log_file): + os.remove(tmp_log_file) + + for i, tsk in enumerate(reversed(tasks)): + prefix = "[Task %2d/%2d] " %(i+1, len(tasks)) + + # create tuner + if tuner == 'xgb' or tuner == 'xgb-rank': + tuner_obj = XGBTuner(tsk, loss_type='rank') + elif tuner == 'ga': + tuner_obj = GATuner(tsk, pop_size=100) + elif tuner == 'random': + tuner_obj = RandomTuner(tsk) + elif tuner == 'gridsearch': + tuner_obj = GridSearchTuner(tsk) + else: + raise ValueError("Invalid tuner: " + tuner) + + if use_transfer_learning: + if os.path.isfile(tmp_log_file): + tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file)) + + # do tuning + n_trial = min(n_trial, len(tsk.config_space)) + tuner_obj.tune(n_trial=n_trial, + early_stopping=early_stopping, + measure_option=measure_option, + callbacks=[ + autotvm.callback.progress_bar(n_trial, prefix=prefix), + autotvm.callback.log_to_file(tmp_log_file)]) + + # pick best records to a cache file + autotvm.record.pick_best(tmp_log_file, log_filename) + os.remove(tmp_log_file) + + +######################################################################## +# Finally, we launch tuning jobs and evaluate the end-to-end performance. + +def tune_and_evaluate(tuning_opt): + # extract workloads from nnvm graph + print("Extract tasks...") + net, params, input_shape, out_shape = get_network(network, batch_size=1) + tasks = autotvm.task.extract_from_graph(net, target=target, + shape={'data': input_shape}, dtype=dtype, + symbols=(nnvm.sym.conv2d,)) + + # run tuning tasks + print("Tuning...") + tune_tasks(tasks, **tuning_opt) + + # compile kernels with history best records + with autotvm.apply_history_best(log_file): + print("Compile...") + with nnvm.compiler.build_config(opt_level=3): + graph, lib, params = nnvm.compiler.build( + net, target=target, shape={'data': input_shape}, params=params, dtype=dtype) + + # export library + tmp = tempdir() + filename = "net.tar" + lib.export_library(tmp.relpath(filename)) + + # load parameters + ctx = tvm.context(str(target), 0) + module = runtime.create(graph, lib, ctx) + data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype)) + module.set_input('data', data_tvm) + module.set_input(**params) + + # evaluate + print("Evaluate inference time cost...") + ftimer = module.module.time_evaluator("run", ctx, number=1, repeat=600) + prof_res = np.array(ftimer().results) * 1000 # convert to millisecond + print("Mean inference time (std dev): %.2f ms (%.2f ms)" % + (np.mean(prof_res), np.std(prof_res))) + +# We do not run the tuning in our webpage server since it takes too long. +# Uncomment the following line to run it by yourself. + +# tune_and_evaluate(tuning_option) + +###################################################################### +# Sample Output +# ------------- +# The tuning needs to compile many programs and extract feature from them. +# So a high performance CPU is recommended. One sample output is listed below. +# It takes about 4 hours to get the following output on a 32T AMD Ryzen Threadripper. +# The tuning target is NVIDIA 1080 Ti. +# (You can see some errors during compilation. If the tuning is not stuck, it is okay.) +# +# .. code-block:: bash +# +# Extract tasks... +# Tuning... +# [Task 1/12] Current/Best: 541.83/3570.66 GFLOPS | Progress: (960/2000) | 1001.31 s Done. +# [Task 2/12] Current/Best: 0.56/ 803.33 GFLOPS | Progress: (704/2000) | 608.08 s Done. +# [Task 3/12] Current/Best: 103.69/1141.25 GFLOPS | Progress: (768/2000) | 702.13 s Done. +# [Task 4/12] Current/Best: 2905.03/3925.15 GFLOPS | Progress: (864/2000) | 745.94 sterminate called without an active exception +# [Task 4/12] Current/Best: 2789.36/3925.15 GFLOPS | Progress: (1056/2000) | 929.40 s Done. +# [Task 5/12] Current/Best: 89.06/1076.24 GFLOPS | Progress: (704/2000) | 601.73 s Done. +# [Task 6/12] Current/Best: 40.39/2129.02 GFLOPS | Progress: (1088/2000) | 1125.76 s Done. +# [Task 7/12] Current/Best: 4090.53/5007.02 GFLOPS | Progress: (800/2000) | 903.90 s Done. +# [Task 8/12] Current/Best: 4.78/1272.28 GFLOPS | Progress: (768/2000) | 749.14 s Done. +# [Task 9/12] Current/Best: 1391.45/2325.08 GFLOPS | Progress: (992/2000) | 1084.87 s Done. +# [Task 10/12] Current/Best: 1995.44/2383.59 GFLOPS | Progress: (864/2000) | 862.60 s Done. +# [Task 11/12] Current/Best: 4093.94/4899.80 GFLOPS | Progress: (224/2000) | 240.92 sterminate called without an active exception +# [Task 11/12] Current/Best: 3487.98/4909.91 GFLOPS | Progress: (480/2000) | 534.96 sterminate called without an active exception +# [Task 11/12] Current/Best: 4636.84/4912.17 GFLOPS | Progress: (1184/2000) | 1381.16 sterminate called without an active exception +# [Task 11/12] Current/Best: 50.12/4912.17 GFLOPS | Progress: (1344/2000) | 1602.81 s Done. +# [Task 12/12] Current/Best: 3581.31/4286.30 GFLOPS | Progress: (736/2000) | 943.52 s Done. +# Compile... +# Evaluate inference time cost... +# Mean inference time (std dev): 1.07 ms (0.05 ms) +# +# As a reference baseline, the time cost of MXNet + TensorRT on resnet-18 is 1.30ms. So we are a little faster. + +###################################################################### +# +# .. note:: **Experiencing Difficulties?** +# +# The auto tuning module is error-prone. If you always see " 0.00/ 0.00 GFLOPS", +# then there must be something wrong. +# +# First, make sure you set the correct configuration of your device. +# Then, you can print debug information by adding these lines in the beginning +# of the script. It will print every measurement result, where you can find useful +# error messages. +# +# .. code-block:: python +# +# import logging +# logging.getLogger('autotvm').setLevel(logging.DEBUG) +# +# Finally, always feel free to ask our community for help on https://discuss.tvm.ai + + +################################################################# +# Scale up measurement by using multiple devices +# ---------------------------------------------- +# +# If you have multiple devices, you can use all of them for measurement. +# TVM uses the RPC Tracker to manage distributed devices. +# The RPC Tracker is a centralized master node. We can register all devices to +# the tracker. For example, if we have 10 GPU cards, we can register all of them +# to the tracker, and run 10 measurements in parallel, accelerating the tuning process. +# +# To start an RPC tracker, run this command on the host machine. The tracker is +# required during the whole tuning process, so we need to open a new terminal for +# this command: +# +# .. code-block:: bash +# +# python -m tvm.exec.rpc_tracker --host=0.0.0.0 --port=9190 +# +# The expected output is +# +# .. code-block:: bash +# +# INFO:RPCTracker:bind to 0.0.0.0:9190 +# +# Then open another new terminal for the RPC server. We need to start one server +# for each dedicated device. We use a string key to distinguish the types of devices. +# You can pick a name you like. +# (Note: For rocm backend, there are some internal errors with the compiler, +# we need to add `--no-fork` to the argument list.) +# +# .. code-block:: bash +# +# python -m tvm.exec.rpc_server --tracker=localhost:9190 --key=1080ti +# +# After registering devices, we can confirm it by querying rpc_tracker +# +# .. code-block:: bash +# +# python -m tvm.exec.query_rpc_tracker --host=localhost --port=9190 +# +# For example, if we have four 1080ti, two titanx and one gfx900, the output can be +# +# .. code-block:: bash +# +# Queue Status +# ---------------------------------- +# key total free pending +# ---------------------------------- +# 1080ti 4 4 0 +# titanx 2 2 0 +# gfx900 1 1 0 +# ---------------------------------- +# +# Finally, we need to change the tuning option to use RPCRunner. Use the code below +# to replace the corresponding part above. + +tuning_option = { + 'log_filename': log_file, + + 'tuner': 'xgb', + 'n_trial': 2000, + 'early_stopping': 600, + + 'measure_option': autotvm.measure_option( + builder=autotvm.LocalBuilder(timeout=10), + runner=autotvm.RPCRunner( + '1080ti', # change the device key to your key + 'localhost', 9190, + number=20, repeat=3, timeout=4, min_repeat_ms=150), + ), +} diff --git a/nnvm/tutorials/tune_nnvm_mobile_gpu.py b/nnvm/tutorials/tune_nnvm_mobile_gpu.py new file mode 100644 index 000000000000..8946dc1833bd --- /dev/null +++ b/nnvm/tutorials/tune_nnvm_mobile_gpu.py @@ -0,0 +1,416 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +Auto-tuning a convolutional network for Mobile GPU (NNVM) +========================================================= +**Author**: `Lianmin Zheng `_ + +Auto-tuning for a specific device is critical for getting the best +performance. This is a tutorial about how to tune a whole convolutional +network. + +The operator implementation for Mobile GPU in TVM is written in template form. +The template has many tunable knobs (tile factor, vectorization, unrolling, etc). +We will tune all convolution, depthwise convolution and dense operators +in the neural network. After tuning, we produce a log file which stores +the best knob values for all required operators. When the tvm compiler compiles +these operators, it will query this log file to get the best knob values. + +We also released pre-tuned parameters for some arm devices. You can go to +`Mobile GPU Benchmark `_ +to see the results. +""" + +###################################################################### +# Install dependencies +# -------------------- +# To use the autotvm package in tvm, we need to install some extra dependencies. +# (change "3" to "2" if you use python2): +# +# .. code-block:: bash +# +# pip3 install --user psutil xgboost tornado +# +# To make tvm run faster during tuning, it is recommended to use cython +# as FFI of tvm. In the root directory of tvm, execute +# (change "3" to "2" if you use python2): +# +# .. code-block:: bash +# +# pip3 install --user cython +# sudo make cython3 +# +# Now return to python code. Import packages. + +import os + +import numpy as np + +import nnvm.testing +import nnvm.compiler +import tvm +from tvm import autotvm +from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner +from tvm.contrib.util import tempdir +import tvm.contrib.graph_runtime as runtime + +################################################################# +# Define network +# -------------- +# First we need to define the network in nnvm symbol API. +# We can load some pre-defined network from :code:`nnvm.testing`. +# We can also load models from MXNet, ONNX and TensorFlow (see NNVM +# tutorials :ref:`tutorial-nnvm` for more details). + +def get_network(name, batch_size): + """Get the symbol definition and random weight of a network""" + input_shape = (batch_size, 3, 224, 224) + output_shape = (batch_size, 1000) + + if "resnet" in name: + n_layer = int(name.split('-')[1]) + net, params = nnvm.testing.resnet.get_workload(num_layers=n_layer, batch_size=batch_size) + elif "vgg" in name: + n_layer = int(name.split('-')[1]) + net, params = nnvm.testing.vgg.get_workload(num_layers=n_layer, batch_size=batch_size) + elif name == 'mobilenet': + net, params = nnvm.testing.mobilenet.get_workload(batch_size=batch_size) + elif name == 'squeezenet_v1.1': + net, params = nnvm.testing.squeezenet.get_workload(batch_size=batch_size, version='1.1') + elif name == 'inception_v3': + input_shape = (1, 3, 299, 299) + net, params = nnvm.testing.inception_v3.get_workload(batch_size=batch_size) + elif name == 'custom': + # an example for custom network + from nnvm.testing import utils + net = nnvm.sym.Variable('data') + net = nnvm.sym.conv2d(net, channels=4, kernel_size=(3,3), padding=(1,1)) + net = nnvm.sym.flatten(net) + net = nnvm.sym.dense(net, units=1000) + net, params = utils.create_workload(net, batch_size, (3, 224, 224)) + elif name == 'mxnet': + # an example for mxnet model + from mxnet.gluon.model_zoo.vision import get_model + block = get_model('resnet18_v1', pretrained=True) + net, params = nnvm.frontend.from_mxnet(block) + net = nnvm.sym.softmax(net) + else: + raise ValueError("Unsupported network: " + name) + + return net, params, input_shape, output_shape + + +################################################################# +# Start RPC Tracker +# ----------------- +# TVM uses RPC session to communicate with ARM boards. +# During tuning, the tuner will send the generated code to the board and +# measure the speed of code on the board. +# +# To scale up the tuning, TVM uses RPC Tracker to manage distributed devices. +# The RPC Tracker is a centralized master node. We can register all devices to +# the tracker. For example, if we have 10 phones, we can register all of them +# to the tracker, and run 10 measurements in parallel, accelerating the tuning process. +# +# To start an RPC tracker, run this command on the host machine. The tracker is +# required during the whole tuning process, so we need to open a new terminal for +# this command: +# +# .. code-block:: bash +# +# python -m tvm.exec.rpc_tracker --host=0.0.0.0 --port=9190 +# +# The expected output is +# +# .. code-block:: bash +# +# INFO:RPCTracker:bind to 0.0.0.0:9190 + +################################################################# +# Register devices to RPC Tracker +# ----------------------------------- +# Now we can register our devices to the tracker. The first step is to +# build tvm runtime for the ARM devices. +# +# * For Linux: +# Follow this section :ref:`build-tvm-runtime-on-device` to build +# tvm runtime on the device. Then register the device to tracker by +# +# .. code-block:: bash +# +# python -m tvm.exec.rpc_server --tracker=[HOST_IP]:9190 --key=rk3399 +# +# (replace :code:`[HOST_IP]` with the IP address of your host machine) +# +# * For Android: +# Follow this `readme page `_ to +# install tvm rpc apk on the android device. Make sure you can pass the android rpc test. +# Then you have already registred your device. During tuning, you have to go to developer option +# and enable "Keep screen awake during changing" and charge your phone to make it stable. +# +# After registering devices, we can confirm it by querying rpc_tracker +# +# .. code-block:: bash +# +# python -m tvm.exec.query_rpc_tracker --host=0.0.0.0 --port=9190 +# +# For example, if we have 2 Huawei mate10 pro, 11 Raspberry Pi 3B and 2 rk3399, +# the output can be +# +# .. code-block:: bash +# +# Queue Status +# ---------------------------------- +# key total free pending +# ---------------------------------- +# mate10pro 2 2 0 +# rk3399 2 2 0 +# rpi3b 11 11 0 +# ---------------------------------- +# +# You can register multiple devices to the tracker to accelerate the measurement in tuning. + +########################################### +# Set Tuning Options +# ------------------ +# Before tuning, we should apply some configurations. Here I use an RK3399 board +# as example. In your setting, you should modify the target and device_key accordingly. +# set :code:`use_android` to True if you use android phone. + +#### DEVICE CONFIG #### + +target = tvm.target.create('opencl -device=mali') + +# Replace "aarch64-linux-gnu" with the correct target of your board. +# This target host is used for cross compilation. You can query it by :code:`gcc -v` on your device. +target_host = 'llvm -target=aarch64-linux-gnu' + +# Also replace this with the device key in your tracker +device_key = 'rk3399' + +# Set this to True if you use android phone +use_android = False + +#### TUNING OPTION #### +network = 'resnet-18' +log_file = "%s.%s.log" % (device_key, network) +dtype = 'float32' + +tuning_option = { + 'log_filename': log_file, + + 'tuner': 'xgb', + 'n_trial': 1000, + 'early_stopping': 450, + + 'measure_option': autotvm.measure_option( + builder=autotvm.LocalBuilder( + build_func='ndk' if use_android else 'default'), + runner=autotvm.RPCRunner( + device_key, host='localhost', port=9190, + number=10, + timeout=5, + ), + ), +} + +#################################################################### +# +# .. note:: How to set tuning options +# +# In general, the default values provided here work well. +# If you have enough time budget, you can set :code:`n_trial`, :code:`early_stopping` larger, +# which makes the tuning run longer. +# If your device runs very slow or your conv2d operators have many GFLOPs, considering to +# set timeout larger. +# + +################################################################### +# Begin Tuning +# ------------ +# Now we can extract tuning tasks from the network and begin tuning. +# Here, we provide a simple utility function to tune a list of tasks. +# This function is just an initial implementation which tunes them in sequential order. +# We will introduce a more sophisticated tuning scheduler in the future. + +# You can skip the implementation of this function for this tutorial. +def tune_tasks(tasks, + measure_option, + tuner='xgb', + n_trial=1000, + early_stopping=None, + log_filename='tuning.log', + use_transfer_learning=True, + try_winograd=True): + if try_winograd: + for i in range(len(tasks)): + try: # try winograd template + tsk = autotvm.task.create(tasks[i].name, tasks[i].args, + tasks[i].target, tasks[i].target_host, 'winograd') + tasks.append(tsk) + except Exception: + pass + + # create tmp log file + tmp_log_file = log_filename + ".tmp" + if os.path.exists(tmp_log_file): + os.remove(tmp_log_file) + + for i, tsk in enumerate(reversed(tasks)): + prefix = "[Task %2d/%2d] " % (i+1, len(tasks)) + + # create tuner + if tuner == 'xgb' or tuner == 'xgb-rank': + tuner_obj = XGBTuner(tsk, loss_type='rank') + elif tuner == 'ga': + tuner_obj = GATuner(tsk, pop_size=50) + elif tuner == 'random': + tuner_obj = RandomTuner(tsk) + elif tuner == 'gridsearch': + tuner_obj = GridSearchTuner(tsk) + else: + raise ValueError("Invalid tuner: " + tuner) + + if use_transfer_learning: + if os.path.isfile(tmp_log_file): + tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file)) + + # do tuning + n_trial = min(n_trial, len(tsk.config_space)) + tuner_obj.tune(n_trial=n_trial, + early_stopping=early_stopping, + measure_option=measure_option, + callbacks=[ + autotvm.callback.progress_bar(n_trial, prefix=prefix), + autotvm.callback.log_to_file(tmp_log_file)]) + + # pick best records to a cache file + autotvm.record.pick_best(tmp_log_file, log_filename) + os.remove(tmp_log_file) + + +######################################################################## +# Finally, we launch tuning jobs and evaluate the end-to-end performance. + +def tune_and_evaluate(tuning_opt): + # extract workloads from nnvm graph + print("Extract tasks...") + net, params, input_shape, out_shape = get_network(network, batch_size=1) + tasks = autotvm.task.extract_from_graph(net, target=target, target_host=target_host, + shape={'data': input_shape}, dtype=dtype, + symbols=(nnvm.sym.conv2d, nnvm.sym.dense)) + + # run tuning tasks + print("Tuning...") + tune_tasks(tasks, **tuning_opt) + + # compile kernels with history best records + with autotvm.apply_history_best(log_file): + print("Compile...") + with nnvm.compiler.build_config(opt_level=3): + graph, lib, params = nnvm.compiler.build( + net, target=target, target_host=target_host, + shape={'data': input_shape}, params=params, dtype=dtype) + + # export library + tmp = tempdir() + if use_android: + from tvm.contrib import ndk + filename = "net.so" + lib.export_library(tmp.relpath(filename), ndk.create_shared) + else: + filename = "net.tar" + lib.export_library(tmp.relpath(filename)) + + # upload module to device + print("Upload...") + remote = autotvm.measure.request_remote(device_key, 'localhost', 9190, + timeout=10000) + remote.upload(tmp.relpath(filename)) + rlib = remote.load_module(filename) + + # upload parameters to device + ctx = remote.context(str(target), 0) + module = runtime.create(graph, rlib, ctx) + data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype)) + module.set_input('data', data_tvm) + module.set_input(**params) + + # evaluate + print("Evaluate inference time cost...") + ftimer = module.module.time_evaluator("run", ctx, number=1, repeat=30) + prof_res = np.array(ftimer().results) * 1000 # convert to millisecond + print("Mean inference time (std dev): %.2f ms (%.2f ms)" % + (np.mean(prof_res), np.std(prof_res))) + +# We do not run the tuning in our webpage server since it takes too long. +# Uncomment the following line to run it by yourself. + +# tune_and_evaluate(tuning_option) + +###################################################################### +# Sample Output +# ------------- +# The tuning needs to compile many programs and extract feature from them. +# So a high performance CPU is recommended. +# One sample output is listed below. It takes about 3 hours on a 32T AMD Ryzen Threadripper. +# +# .. code-block:: bash +# +# Extract tasks... +# Tuning... +# [Task 1/17] Current/Best: 25.30/ 39.12 GFLOPS | Progress: (992/1000) | 751.22 s Done. +# [Task 2/17] Current/Best: 40.70/ 45.50 GFLOPS | Progress: (736/1000) | 545.46 s Done. +# [Task 3/17] Current/Best: 38.83/ 42.35 GFLOPS | Progress: (992/1000) | 1549.85 s Done. +# [Task 4/17] Current/Best: 23.31/ 31.02 GFLOPS | Progress: (640/1000) | 1059.31 s Done. +# [Task 5/17] Current/Best: 0.06/ 2.34 GFLOPS | Progress: (544/1000) | 305.45 s Done. +# [Task 6/17] Current/Best: 10.97/ 17.20 GFLOPS | Progress: (992/1000) | 1050.00 s Done. +# [Task 7/17] Current/Best: 8.98/ 10.94 GFLOPS | Progress: (928/1000) | 421.36 s Done. +# [Task 8/17] Current/Best: 4.48/ 14.86 GFLOPS | Progress: (704/1000) | 582.60 s Done. +# [Task 9/17] Current/Best: 10.30/ 25.99 GFLOPS | Progress: (864/1000) | 899.85 s Done. +# [Task 10/17] Current/Best: 11.73/ 12.52 GFLOPS | Progress: (608/1000) | 304.85 s Done. +# [Task 11/17] Current/Best: 15.26/ 18.68 GFLOPS | Progress: (800/1000) | 747.52 s Done. +# [Task 12/17] Current/Best: 17.48/ 26.71 GFLOPS | Progress: (1000/1000) | 1166.40 s Done. +# [Task 13/17] Current/Best: 0.96/ 11.43 GFLOPS | Progress: (960/1000) | 611.65 s Done. +# [Task 14/17] Current/Best: 17.88/ 20.22 GFLOPS | Progress: (672/1000) | 670.29 s Done. +# [Task 15/17] Current/Best: 11.62/ 13.98 GFLOPS | Progress: (736/1000) | 449.25 s Done. +# [Task 16/17] Current/Best: 19.90/ 23.83 GFLOPS | Progress: (608/1000) | 708.64 s Done. +# [Task 17/17] Current/Best: 17.98/ 22.75 GFLOPS | Progress: (736/1000) | 1122.60 s Done. +# Compile... +# Upload... +# Evaluate inference time cost... +# Mean inference time (std dev): 128.05 ms (7.74 ms) +# + +###################################################################### +# +# .. note:: **Experiencing Difficulties?** +# +# The auto tuning module is error-prone. If you always see " 0.00/ 0.00 GFLOPS", +# then there must be something wrong. +# +# First, make sure you set the correct configuration of your device. +# Then, you can print debug information by adding these lines in the beginning +# of the script. It will print every measurement result, where you can find useful +# error messages. +# +# .. code-block:: python +# +# import logging +# logging.getLogger('autotvm').setLevel(logging.DEBUG) +# +# Finally, always feel free to ask our community for help on https://discuss.tvm.ai diff --git a/nnvm/tutorials/tune_nnvm_x86.py b/nnvm/tutorials/tune_nnvm_x86.py new file mode 100644 index 000000000000..b7426271f06b --- /dev/null +++ b/nnvm/tutorials/tune_nnvm_x86.py @@ -0,0 +1,236 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +Auto-tuning a convolutional network for x86 CPU (NNVM) +====================================================== +**Author**: `Yao Wang `_ + +This is a tutorial about how to tune convolution neural network +for x86 cpu. +""" +import os +import numpy as np + +import nnvm.testing +import nnvm.compiler +import tvm +from tvm import autotvm +from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner +import tvm.contrib.graph_runtime as runtime + +################################################################# +# Define network +# -------------- +# First we need to define the network in nnvm symbol API. +# We can load some pre-defined network from :code:`nnvm.testing`. +# We can also load models from MXNet, ONNX and TensorFlow (see NNVM +# tutorials :ref:`tutorial-nnvm` for more details). +# +# In this tutorial, we choose resnet-18 as tuning example. + +def get_network(name, batch_size): + """Get the symbol definition and random weight of a network""" + input_shape = (batch_size, 3, 224, 224) + output_shape = (batch_size, 1000) + + if "resnet" in name: + n_layer = int(name.split('-')[1]) + net, params = nnvm.testing.resnet.get_workload(num_layers=n_layer, batch_size=batch_size) + elif "vgg" in name: + n_layer = int(name.split('-')[1]) + net, params = nnvm.testing.vgg.get_workload(num_layers=n_layer, batch_size=batch_size) + elif name == 'mobilenet': + net, params = nnvm.testing.mobilenet.get_workload(batch_size=batch_size) + elif name == 'squeezenet_v1.1': + net, params = nnvm.testing.squeezenet.get_workload(batch_size=batch_size, version='1.1') + elif name == 'inception_v3': + input_shape = (1, 3, 299, 299) + net, params = nnvm.testing.inception_v3.get_workload(batch_size=batch_size) + elif name == 'custom': + # an example for custom network + from nnvm.testing import utils + net = nnvm.sym.Variable('data') + net = nnvm.sym.conv2d(net, channels=4, kernel_size=(3,3), padding=(1,1)) + net = nnvm.sym.flatten(net) + net = nnvm.sym.dense(net, units=1000) + net, params = utils.create_workload(net, batch_size, (3, 224, 224)) + elif name == 'mxnet': + # an example for mxnet model + from mxnet.gluon.model_zoo.vision import get_model + block = get_model('resnet18_v1', pretrained=True) + net, params = nnvm.frontend.from_mxnet(block) + net = nnvm.sym.softmax(net) + else: + raise ValueError("Unsupported network: " + name) + + return net, params, input_shape, output_shape + +# Replace "llvm" with the correct target of your cpu. +# For example, for AWS EC2 c5 instance with Intel Xeon +# Platinum 8000 series, the target should be "llvm -mcpu=skylake-avx512". +# For AWS EC2 c4 instance with Intel Xeon E5-2666 v3, it should be +# "llvm -mcpu=core-avx2". +target = "llvm" + +batch_size = 1 +dtype = "float32" +model_name = "resnet-18" +log_file = "%s.log" % model_name + +# Set number of threads used for tuning based on the number of +# physical cpu cores on your machine. +num_threads = 1 +os.environ["TVM_NUM_THREADS"] = str(num_threads) + + +################################################################# +# Configure tensor tuning settings and create tasks +# ------------------------------------------------- +# To get better kernel execution performance on x86 cpu, +# we need to change data layout of convolution kernel from +# "NCHW" to "NCHWc". To deal with this situation, we define +# conv2d_NCHWc operator in topi. We will tune this operator +# instead of plain conv2d. +# +# We will use local mode for tuning configuration. RPC tracker +# mode can be setup similarly to the approach in +# :ref:`tune_nnvm_arm` tutorial. + +tuning_option = { + 'log_filename': log_file, + 'tuner': 'random', + 'early_stopping': None, + + 'measure_option': autotvm.measure_option( + builder=autotvm.LocalBuilder(), + runner=autotvm.LocalRunner(number=10, repeat=1, + min_repeat_ms=1000), + ), +} + +# You can skip the implementation of this function for this tutorial. +def tune_kernels(tasks, + measure_option, + tuner='gridsearch', + early_stopping=None, + log_filename='tuning.log'): + + for i, tsk in enumerate(tasks): + prefix = "[Task %2d/%2d] " % (i+1, len(tasks)) + + # converting conv2d tasks to conv2d_NCHWc tasks + op_name = tsk.workload[0] + if op_name == 'conv2d': + func_create = 'topi_x86_conv2d_NCHWc' + elif op_name == 'depthwise_conv2d_nchw': + func_create = 'topi_x86_depthwise_conv2d_NCHWc_from_nchw' + else: + raise ValueError("Tuning {} is not supported on x86".format(op_name)) + + task = autotvm.task.create(func_create, args=tsk.args, + target=target, template_key='direct') + task.workload = tsk.workload + + # create tuner + if tuner == 'xgb' or tuner == 'xgb-rank': + tuner_obj = XGBTuner(task, loss_type='rank') + elif tuner == 'ga': + tuner_obj = GATuner(task, pop_size=50) + elif tuner == 'random': + tuner_obj = RandomTuner(task) + elif tuner == 'gridsearch': + tuner_obj = GridSearchTuner(task) + else: + raise ValueError("Invalid tuner: " + tuner) + + # do tuning + n_trial=len(task.config_space) + tuner_obj.tune(n_trial=n_trial, + early_stopping=early_stopping, + measure_option=measure_option, + callbacks=[ + autotvm.callback.progress_bar(n_trial, prefix=prefix), + autotvm.callback.log_to_file(log_filename)]) + + +######################################################################## +# Finally, we launch tuning jobs and evaluate the end-to-end performance. + +def tune_and_evaluate(tuning_opt): + # extract workloads from nnvm graph + print("Extract tasks...") + net, params, data_shape, out_shape = get_network(model_name, batch_size) + tasks = autotvm.task.extract_from_graph(net, target=target, + shape={'data': data_shape}, dtype=dtype, + symbols=(nnvm.sym.conv2d,)) + + # run tuning tasks + print("Tuning...") + tune_kernels(tasks, **tuning_opt) + + # compile kernels with history best records + with autotvm.apply_history_best(log_file): + print("Compile...") + with nnvm.compiler.build_config(opt_level=3): + graph, lib, params = nnvm.compiler.build( + net, target=target, shape={'data': data_shape}, params=params, dtype=dtype) + + # upload parameters to device + ctx = tvm.cpu() + data_tvm = tvm.nd.array((np.random.uniform(size=data_shape)).astype(dtype)) + module = runtime.create(graph, lib, ctx) + module.set_input('data', data_tvm) + module.set_input(**params) + + # evaluate + print("Evaluate inference time cost...") + ftimer = module.module.time_evaluator("run", ctx, number=100, repeat=3) + prof_res = np.array(ftimer().results) * 1000 # convert to millisecond + print("Mean inference time (std dev): %.2f ms (%.2f ms)" % + (np.mean(prof_res), np.std(prof_res))) + +# We do not run the tuning in our webpage server since it takes too long. +# Uncomment the following line to run it by yourself. + +# tune_and_evaluate(tuning_option) + +###################################################################### +# Sample Output +# ------------- +# The tuning needs to compile many programs and extract feature from them. +# So a high performance CPU is recommended. +# One sample output is listed below. +# +# .. code-block:: bash +# +# Extract tasks... +# Tuning... +# [Task 1/12] Current/Best: 598.05/2497.63 GFLOPS | Progress: (252/252) | 1357.95 s Done. +# [Task 2/12] Current/Best: 522.63/2279.24 GFLOPS | Progress: (784/784) | 3989.60 s Done. +# [Task 3/12] Current/Best: 447.33/1927.69 GFLOPS | Progress: (784/784) | 3869.14 s Done. +# [Task 4/12] Current/Best: 481.11/1912.34 GFLOPS | Progress: (672/672) | 3274.25 s Done. +# [Task 5/12] Current/Best: 414.09/1598.45 GFLOPS | Progress: (672/672) | 2720.78 s Done. +# [Task 6/12] Current/Best: 508.96/2273.20 GFLOPS | Progress: (768/768) | 3718.75 s Done. +# [Task 7/12] Current/Best: 469.14/1955.79 GFLOPS | Progress: (576/576) | 2665.67 s Done. +# [Task 8/12] Current/Best: 230.91/1658.97 GFLOPS | Progress: (576/576) | 2435.01 s Done. +# [Task 9/12] Current/Best: 487.75/2295.19 GFLOPS | Progress: (648/648) | 3009.95 s Done. +# [Task 10/12] Current/Best: 182.33/1734.45 GFLOPS | Progress: (360/360) | 1755.06 s Done. +# [Task 11/12] Current/Best: 372.18/1745.15 GFLOPS | Progress: (360/360) | 1684.50 s Done. +# [Task 12/12] Current/Best: 215.34/2271.11 GFLOPS | Progress: (400/400) | 2128.74 s Done. +# Compile... +# Evaluate inference time cost... +# Mean inference time (std dev): 3.16 ms (0.03 ms) diff --git a/nnvm/tutorials/using_external_lib.py b/nnvm/tutorials/using_external_lib.py new file mode 100644 index 000000000000..cc52652ffa37 --- /dev/null +++ b/nnvm/tutorials/using_external_lib.py @@ -0,0 +1,234 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +Using External Libraries in NNVM +================================ +**Author**: `Masahiro Masuda `_ + +This is a short tutorial on how to use external libraries such as cuDNN, or cuBLAS with NNVM. + +NNVM uses TVM internally to generate target specific code. For example, with cuda backend TVM generates cuda kernels for all layers in the user provided network. +But sometimes it is also helpful to incorporate external libraries developed by various vendors into NNVM. +Luckily, TVM has a mechanism to transparently call into these libraries. +For NNVM users, all we need to do is just to set a target string appropriately. + +Before we can use external libraries from NNVM, your TVM needs to be built with libraries you want to use. +For example, to use cuDNN, USE_CUDNN option in tvm/make/config.mk needs to be enabled, and cuDNN include and library directories need to be specified. + +To begin with, we import NNVM and TVM. +""" +import tvm +import numpy as np +from tvm.contrib import graph_runtime as runtime +import nnvm.symbol as sym +import nnvm.compiler +from nnvm.testing import utils + +###################################################################### +# Create a simple network +# ----------------------- +# Let's create a very simple network for demonstration. +# It consists of convolution, batch normalization, and ReLU activation. + +out_channels = 16 +data = sym.Variable(name="data") +simple_net = sym.conv2d(data=data, kernel_size=(3,3), channels=out_channels, padding = (1, 1), use_bias=True) +simple_net = sym.batch_norm(data=simple_net) +simple_net = sym.relu(data=simple_net) + +batch_size = 1 +data_shape = (batch_size, 3, 224, 224) +net, params = utils.create_workload(simple_net, batch_size, data_shape[1:]) + +###################################################################### +# Build and run with cuda backend +# ------------------------------- +# We build and run this network with cuda backend, as usual. +# By setting the logging level to DEBUG, the result of NNVM graph compilation will be dumped as pseudo code. +import logging +logging.basicConfig(level=logging.DEBUG) # to dump TVM IR after fusion + +target = "cuda" +graph, lib, params = nnvm.compiler.build( + net, target, shape={"data": data_shape}, params=params) + +ctx = tvm.context(target, 0) +data = np.random.uniform(-1, 1, size=data_shape).astype("float32") +module = runtime.create(graph, lib, ctx) +module.set_input(**params) +module.set_input("data", data) +module.run() +out_shape = (batch_size, out_channels, 224, 224) +out = module.get_output(0, tvm.nd.empty(out_shape)) +out_cuda = out.asnumpy() + +###################################################################### +# The generated pseudo code should look something like below. +# Note how bias add, batch normalization, and ReLU activation are fused into the convolution kernel. +# TVM generates a single, fused kernel from this representation. +# +# .. code-block:: text +# +# produce compute { +# // attr [iter_var(blockIdx.x, , blockIdx.x)] thread_extent = 112 +# // attr [input1.shared] storage_scope = "shared" +# allocate input1.shared[float32 * 16 * 3 * 3 * 3] +# // attr [compute] storage_scope = "local" +# allocate compute[float32 * 16 * 1 * 1 * 1 * 1] +# // attr [pad_temp.global.global.shared] storage_scope = "shared" +# allocate pad_temp.global.global.shared[float32 * 1 * 1 * 4 * 57 * 4] +# // attr [iter_var(threadIdx.x, Range(min=0, extent=448), threadIdx.x)] thread_extent = 448 +# produce compute { +# produce input1.shared { +# for (ax0, 0, 16) { +# if (likely((threadIdx.x < 27))) { +# input1.shared[(threadIdx.x + (ax0*27))] = input1[((((((blockIdx.x/112)*48) + (threadIdx.x/9))*9) + (threadIdx.x % 9)) + (ax0*27))] +# } +# } +# } +# compute[0] = 0.000000f +# compute[1] = 0.000000f +# compute[2] = 0.000000f +# compute[3] = 0.000000f +# compute[4] = 0.000000f +# compute[5] = 0.000000f +# compute[6] = 0.000000f +# compute[7] = 0.000000f +# compute[8] = 0.000000f +# compute[9] = 0.000000f +# compute[10] = 0.000000f +# compute[11] = 0.000000f +# compute[12] = 0.000000f +# compute[13] = 0.000000f +# compute[14] = 0.000000f +# compute[15] = 0.000000f +# for (rc, 0, 3) { +# produce pad_temp.global.global.shared { +# if (likely((threadIdx.x < 228))) { +# if (likely(((blockIdx.x*2) < (226 - (threadIdx.x/57))))) { +# pad_temp.global.global.shared[ramp((threadIdx.x*4), 1, 4)] = pad_temp[ramp(((((((blockIdx.x*2) + (threadIdx.x/57))*57) + (threadIdx.x % 57)) + (rc*12882))*4), 1, 4)] +# } +# } +# } +# for (ry, 0, 3) { +# for (rx, 0, 3) { +# compute[0] = (compute[0] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[((((rc*3) + ry)*3) + rx)])) +# compute[1] = (compute[1] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[(((((rc*3) + ry)*3) + rx) + 27)])) +# compute[2] = (compute[2] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[(((((rc*3) + ry)*3) + rx) + 54)])) +# compute[3] = (compute[3] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[(((((rc*3) + ry)*3) + rx) + 81)])) +# compute[4] = (compute[4] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[(((((rc*3) + ry)*3) + rx) + 108)])) +# compute[5] = (compute[5] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[(((((rc*3) + ry)*3) + rx) + 135)])) +# compute[6] = (compute[6] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[(((((rc*3) + ry)*3) + rx) + 162)])) +# compute[7] = (compute[7] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[(((((rc*3) + ry)*3) + rx) + 189)])) +# compute[8] = (compute[8] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[(((((rc*3) + ry)*3) + rx) + 216)])) +# compute[9] = (compute[9] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[(((((rc*3) + ry)*3) + rx) + 243)])) +# compute[10] = (compute[10] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[(((((rc*3) + ry)*3) + rx) + 270)])) +# compute[11] = (compute[11] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[(((((rc*3) + ry)*3) + rx) + 297)])) +# compute[12] = (compute[12] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[(((((rc*3) + ry)*3) + rx) + 324)])) +# compute[13] = (compute[13] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[(((((rc*3) + ry)*3) + rx) + 351)])) +# compute[14] = (compute[14] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[(((((rc*3) + ry)*3) + rx) + 378)])) +# compute[15] = (compute[15] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[(((((rc*3) + ry)*3) + rx) + 405)])) +# } +# } +# } +# } +# compute[(((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224))] = max((((compute[0] + input2[((blockIdx.x/112)*16)])*input3[((blockIdx.x/112)*16)]) + input4[((blockIdx.x/112)*16)]), 0.000000f) +# compute[((((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224)) + 50176)] = max((((compute[1] + input2[(((blockIdx.x/112)*16) + 1)])*input3[(((blockIdx.x/112)*16) + 1)]) + input4[(((blockIdx.x/112)*16) + 1)]), 0.000000f) +# compute[((((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224)) + 100352)] = max((((compute[2] + input2[(((blockIdx.x/112)*16) + 2)])*input3[(((blockIdx.x/112)*16) + 2)]) + input4[(((blockIdx.x/112)*16) + 2)]), 0.000000f) +# compute[((((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224)) + 150528)] = max((((compute[3] + input2[(((blockIdx.x/112)*16) + 3)])*input3[(((blockIdx.x/112)*16) + 3)]) + input4[(((blockIdx.x/112)*16) + 3)]), 0.000000f) +# compute[((((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224)) + 200704)] = max((((compute[4] + input2[(((blockIdx.x/112)*16) + 4)])*input3[(((blockIdx.x/112)*16) + 4)]) + input4[(((blockIdx.x/112)*16) + 4)]), 0.000000f) +# compute[((((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224)) + 250880)] = max((((compute[5] + input2[(((blockIdx.x/112)*16) + 5)])*input3[(((blockIdx.x/112)*16) + 5)]) + input4[(((blockIdx.x/112)*16) + 5)]), 0.000000f) +# compute[((((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224)) + 301056)] = max((((compute[6] + input2[(((blockIdx.x/112)*16) + 6)])*input3[(((blockIdx.x/112)*16) + 6)]) + input4[(((blockIdx.x/112)*16) + 6)]), 0.000000f) +# compute[((((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224)) + 351232)] = max((((compute[7] + input2[(((blockIdx.x/112)*16) + 7)])*input3[(((blockIdx.x/112)*16) + 7)]) + input4[(((blockIdx.x/112)*16) + 7)]), 0.000000f) +# compute[((((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224)) + 401408)] = max((((compute[8] + input2[(((blockIdx.x/112)*16) + 8)])*input3[(((blockIdx.x/112)*16) + 8)]) + input4[(((blockIdx.x/112)*16) + 8)]), 0.000000f) +# compute[((((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224)) + 451584)] = max((((compute[9] + input2[(((blockIdx.x/112)*16) + 9)])*input3[(((blockIdx.x/112)*16) + 9)]) + input4[(((blockIdx.x/112)*16) + 9)]), 0.000000f) +# compute[((((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224)) + 501760)] = max((((compute[10] + input2[(((blockIdx.x/112)*16) + 10)])*input3[(((blockIdx.x/112)*16) + 10)]) + input4[(((blockIdx.x/112)*16) + 10)]), 0.000000f) +# compute[((((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224)) + 551936)] = max((((compute[11] + input2[(((blockIdx.x/112)*16) + 11)])*input3[(((blockIdx.x/112)*16) + 11)]) + input4[(((blockIdx.x/112)*16) + 11)]), 0.000000f) +# compute[((((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224)) + 602112)] = max((((compute[12] + input2[(((blockIdx.x/112)*16) + 12)])*input3[(((blockIdx.x/112)*16) + 12)]) + input4[(((blockIdx.x/112)*16) + 12)]), 0.000000f) +# compute[((((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224)) + 652288)] = max((((compute[13] + input2[(((blockIdx.x/112)*16) + 13)])*input3[(((blockIdx.x/112)*16) + 13)]) + input4[(((blockIdx.x/112)*16) + 13)]), 0.000000f) +# compute[((((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224)) + 702464)] = max((((compute[14] + input2[(((blockIdx.x/112)*16) + 14)])*input3[(((blockIdx.x/112)*16) + 14)]) + input4[(((blockIdx.x/112)*16) + 14)]), 0.000000f) +# compute[((((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224)) + 752640)] = max((((compute[15] + input2[(((blockIdx.x/112)*16) + 15)])*input3[(((blockIdx.x/112)*16) + 15)]) + input4[(((blockIdx.x/112)*16) + 15)]), 0.000000f) +# } +# + +###################################################################### +# Use cuDNN for a convolutional layer +# ----------------------------------- +# We can use cuDNN to replace convolution kernels with cuDNN ones. +# To do that, all we need to do is to append the option " -libs=cudnn" to the target string. +net, params = utils.create_workload(simple_net, batch_size, data_shape[1:]) +target = "cuda -libs=cudnn" # use cudnn for convolution +graph, lib, params = nnvm.compiler.build( + net, target, shape={"data": data_shape}, params=params) + +ctx = tvm.context(target, 0) +data = np.random.uniform(-1, 1, size=data_shape).astype("float32") +module = runtime.create(graph, lib, ctx) +module.set_input(**params) +module.set_input("data", data) +module.run() +out_shape = (batch_size, out_channels, 224, 224) +out = module.get_output(0, tvm.nd.empty(out_shape)) +out_cudnn = out.asnumpy() + +###################################################################### +# Note that if you use cuDNN, NNVM cannot fuse convolution with layers following it. +# This is because layer fusion happens at the level of TVM internal representation(IR). +# NNVM treats external libraries as black box, so there is no way to fuse them with TVM IR. +# +# The pseudo code below shows that cuDNN convolution + bias add + batch norm + ReLU turned into two stages of computation, one for cuDNN call and the other for the rest of operations. +# +# .. code-block:: text +# +# allocate y[float32 * 1 * 16 * 224 * 224] +# produce y { +# // attr [0] extern_scope = 0 +# tvm_call_packed("tvm.contrib.cudnn.conv2d.forward", 1, 0, 1, 1, 1, 1, 1, 1, 1, tvm_stack_make_array(input0, tvm_stack_make_shape(1, 3, 224, 224), 0, 4, 0.000000f, 0), tvm_stack_make_array(input1, tvm_stack_make_shape(16, 3, 3, 3), 0, 4, 0.000000f, 0), tvm_stack_make_array(y, tvm_stack_make_shape(1, 16, 224, 224), 0, 4, 0.000000f, 0)) +# } +# produce compute { +# // attr [iter_var(blockIdx.x, , blockIdx.x)] thread_extent = 1568 +# // attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 512 +# compute[((((((blockIdx.x*512) + threadIdx.x)/50176) + ((((blockIdx.x*512) + threadIdx.x)/802816)*16))*50176) + ((((((blockIdx.x*512) + threadIdx.x)/224) % 224)*224) + (((blockIdx.x*64) + threadIdx.x) % 224)))] = max((((y[((((((blockIdx.x*512) + threadIdx.x)/50176) + ((((blockIdx.x*512) + threadIdx.x)/802816)*16))*50176) + ((((((blockIdx.x*512) + threadIdx.x)/224) % 224)*224) + (((blockIdx.x*64) + threadIdx.x) % 224)))] + input2[(((blockIdx.x*512) + threadIdx.x)/50176)])*input3[(((blockIdx.x*512) + threadIdx.x)/50176)]) + input4[(((blockIdx.x*512) + threadIdx.x)/50176)]), 0.000000f) +# } +# + +###################################################################### +# Verify the result +# ----------------- +# We can check that the results of two runs match. + +tvm.testing.assert_allclose(out_cuda, out_cudnn, rtol=1e-5) + +##################################################################### +# Conclusion +# ---------- +# This tutorial covered the usage of cuDNN with NNVM. +# We also have support for cuBLAS. If cuBLAS is enabled, it will be used inside a fully connected layer (nnvm.symbol.dense). +# To use cuBLAS, set a target string as "cuda -libs=cublas". +# You can use both cuDNN and cuBLAS with "cuda -libs=cudnn,cublas". +# +# For ROCm backend, we have support for MIOpen and rocBLAS. +# They can be enabled with target "rocm -libs=miopen,rocblas". +# +# Being able to use external libraries is great, but we need to keep in mind some cautions. +# +# First, the use of external libraries may restrict your usage of TVM and NNVM. +# For example, MIOpen only supports NCHW layout and fp32 data type at the moment, so you cannot use other layouts or data type in TVM. +# +# Second, and more importantly, external libraries restrict the possibility of operator fusion during graph compilation, as shown above. +# TVM and NNVM aim to achieve the best performance on a variety of hardwares, with joint operator level and graph level optimization. +# To achieve this goal, we should continue developing better optimizations for TVM and NNVM, while using external libraries as a nice way to fall back to existing implementation when necessary. diff --git a/nnvm/tutorials/web/resnet.html b/nnvm/tutorials/web/resnet.html new file mode 100644 index 000000000000..13531a3809c0 --- /dev/null +++ b/nnvm/tutorials/web/resnet.html @@ -0,0 +1,204 @@ + + + + + + + + + + + + + + + + + + + + + + NNVM WebGL Test Page + + + +

NNVM WebGL Test Page

+ + +
Input Image:
+ + + + + + +
+ + +
Log:
+ + + + + + + + + + + + + diff --git a/python/setup.py b/python/setup.py index bc53060f95cf..bc3390a63f28 100644 --- a/python/setup.py +++ b/python/setup.py @@ -96,7 +96,6 @@ def config_cython(): "../3rdparty/dmlc-core/include", "../3rdparty/dlpack/include", ], - extra_compile_args=["-std=c++11"], library_dirs=library_dirs, libraries=libraries, language="c++")) @@ -160,7 +159,7 @@ def get_package_data_files(): 'attrs', 'psutil', ], - extras_require={'test': ['pillow', + extras_require={'test': ['PIL', 'matplotlib'], 'extra_feature': ['tornado', 'psutil', diff --git a/python/tvm/_ffi/_ctypes/ndarray.py b/python/tvm/_ffi/_ctypes/ndarray.py index c572947c8d19..9367160b811b 100644 --- a/python/tvm/_ffi/_ctypes/ndarray.py +++ b/python/tvm/_ffi/_ctypes/ndarray.py @@ -20,7 +20,7 @@ import ctypes from ..base import _LIB, check_call, c_str -from ..runtime_ctypes import TVMArrayHandle +from ..runtime_ctypes import TVMArrayHandle, TVMNDArrayContainerHandle from .types import RETURN_SWITCH, C_TO_PY_ARG_SWITCH, _wrap_arg_func, _return_handle @@ -85,16 +85,6 @@ def __del__(self): def _tvm_handle(self): return ctypes.cast(self.handle, ctypes.c_void_p).value - def _copyto(self, target_nd): - """Internal function that implements copy to target ndarray.""" - check_call(_LIB.TVMArrayCopyFromTo(self.handle, target_nd.handle, None)) - return target_nd - - @property - def shape(self): - """Shape of this array""" - return tuple(self.handle.contents.shape[i] for i in range(self.handle.contents.ndim)) - def to_dlpack(self): """Produce an array from a DLPack Tensor without copying memory @@ -110,17 +100,12 @@ def to_dlpack(self): def _make_array(handle, is_view, is_container): global _TVM_ND_CLS handle = ctypes.cast(handle, TVMArrayHandle) - if is_container: - tindex = ctypes.c_uint() - check_call(_LIB.TVMArrayGetTypeIndex(handle, ctypes.byref(tindex))) - cls = _TVM_ND_CLS.get(tindex.value, _CLASS_NDARRAY) - else: - cls = _CLASS_NDARRAY - - ret = cls.__new__(cls) - ret.handle = handle - ret.is_view = is_view - return ret + fcreate = _CLASS_NDARRAY + if is_container and _TVM_ND_CLS: + array_type_info = ctypes.cast(handle, TVMNDArrayContainerHandle).array_type_info.value + if array_type_info > 0: + fcreate = _TVM_ND_CLS[array_type_info] + return fcreate(handle, is_view) _TVM_COMPATS = () @@ -134,9 +119,9 @@ def _reg_extension(cls, fcreate): _TVM_ND_CLS = {} -def _register_ndarray(index, cls): +def _reg_ndarray(cls, fcreate): global _TVM_ND_CLS - _TVM_ND_CLS[index] = cls + _TVM_ND_CLS[cls._array_type_code] = fcreate _CLASS_NDARRAY = None diff --git a/python/tvm/_ffi/_ctypes/object.py b/python/tvm/_ffi/_ctypes/object.py index b8b8aefea131..c3ae56822198 100644 --- a/python/tvm/_ffi/_ctypes/object.py +++ b/python/tvm/_ffi/_ctypes/object.py @@ -21,7 +21,7 @@ import ctypes from ..base import _LIB, check_call from .types import TypeCode, RETURN_SWITCH, C_TO_PY_ARG_SWITCH, _wrap_arg_func -from .ndarray import _register_ndarray, NDArrayBase +from ..node_generic import _set_class_node_base ObjectHandle = ctypes.c_void_p @@ -39,9 +39,6 @@ def _set_class_node(node_class): def _register_object(index, cls): """register object class""" - if issubclass(cls, NDArrayBase): - _register_ndarray(index, cls) - return OBJECT_TYPE[index] = cls @@ -94,3 +91,6 @@ def __init_handle_by_constructor__(self, fconstructor, *args): if not isinstance(handle, ObjectHandle): handle = ObjectHandle(handle) self.handle = handle + + +_set_class_node_base(ObjectBase) diff --git a/python/tvm/_ffi/_cython/base.pxi b/python/tvm/_ffi/_cython/base.pxi index 7ccb6279fed0..4b7b2c88ffa5 100644 --- a/python/tvm/_ffi/_cython/base.pxi +++ b/python/tvm/_ffi/_cython/base.pxi @@ -19,7 +19,7 @@ from ..base import get_last_ffi_error from libcpp.vector cimport vector from cpython.version cimport PY_MAJOR_VERSION from cpython cimport pycapsule -from libc.stdint cimport int32_t, int64_t, uint64_t, uint32_t, uint8_t, uint16_t +from libc.stdint cimport int32_t, int64_t, uint64_t, uint8_t, uint16_t import ctypes cdef enum TVMTypeCode: @@ -78,11 +78,14 @@ ctypedef void* TVMRetValueHandle ctypedef void* TVMFunctionHandle ctypedef void* ObjectHandle -ctypedef struct TVMObject: - uint32_t type_index_ - int32_t ref_counter_ - void (*deleter_)(TVMObject* self) +ctypedef struct TVMNDArrayContainer: + DLTensor dl_tensor + void* manager_ctx + void (*deleter)(DLManagedTensor* self) + int32_t array_type_info + +ctypedef TVMNDArrayContainer* TVMNDArrayContainerHandle ctypedef int (*TVMPackedCFunc)( TVMValue* args, diff --git a/python/tvm/_ffi/_cython/ndarray.pxi b/python/tvm/_ffi/_cython/ndarray.pxi index 9fd3aa43841f..402c9de24ebc 100644 --- a/python/tvm/_ffi/_cython/ndarray.pxi +++ b/python/tvm/_ffi/_cython/ndarray.pxi @@ -68,11 +68,6 @@ cdef class NDArrayBase: def __set__(self, value): self._set_handle(value) - @property - def shape(self): - """Shape of this array""" - return tuple(self.chandle.shape[i] for i in range(self.chandle.ndim)) - def __init__(self, handle, is_view): self._set_handle(handle) self.c_is_view = is_view @@ -81,11 +76,6 @@ cdef class NDArrayBase: if self.c_is_view == 0: CALL(TVMArrayFree(self.chandle)) - def _copyto(self, target_nd): - """Internal function that implements copy to target ndarray.""" - CALL(TVMArrayCopyFromTo(self.chandle, (target_nd).chandle, NULL)) - return target_nd - def to_dlpack(self): """Produce an array from a DLPack Tensor without copying memory @@ -100,34 +90,17 @@ cdef class NDArrayBase: return pycapsule.PyCapsule_New(dltensor, _c_str_dltensor, _c_dlpack_deleter) -# Import limited object-related function from C++ side to improve the speed -# NOTE: can only use POD-C compatible object in FFI. -cdef extern from "tvm/runtime/ndarray.h" namespace "tvm::runtime": - cdef void* TVMArrayHandleToObjectHandle(DLTensorHandle handle) - - cdef c_make_array(void* chandle, is_view, is_container): global _TVM_ND_CLS - - if is_container: - tindex = ( - TVMArrayHandleToObjectHandle(chandle)).type_index_ - if tindex < len(_TVM_ND_CLS): - cls = _TVM_ND_CLS[tindex] - if cls is not None: - ret = cls.__new__(cls) - else: - ret = _CLASS_NDARRAY.__new__(_CLASS_NDARRAY) - else: - ret = _CLASS_NDARRAY.__new__(_CLASS_NDARRAY) - (ret).chandle = chandle - (ret).c_is_view = is_view - return ret - else: - ret = _CLASS_NDARRAY.__new__(_CLASS_NDARRAY) - (ret).chandle = chandle - (ret).c_is_view = is_view - return ret + cdef int32_t array_type_info + fcreate = _CLASS_NDARRAY + if is_container and len(_TVM_ND_CLS) > 0: + array_type_info = (chandle).array_type_info + if array_type_info > 0: + fcreate = _TVM_ND_CLS[array_type_info] + ret = fcreate(None, is_view) + (ret).chandle = chandle + return ret cdef _TVM_COMPATS = () @@ -140,16 +113,11 @@ def _reg_extension(cls, fcreate): if fcreate: _TVM_EXT_RET[cls._tvm_tcode] = fcreate -cdef list _TVM_ND_CLS = [] +cdef _TVM_ND_CLS = {} -cdef _register_ndarray(int index, object cls): - """register object class""" +def _reg_ndarray(cls, fcreate): global _TVM_ND_CLS - while len(_TVM_ND_CLS) <= index: - _TVM_ND_CLS.append(None) - - _TVM_ND_CLS[index] = cls - + _TVM_ND_CLS[cls._array_type_code] = fcreate def _make_array(handle, is_view, is_container): cdef unsigned long long ptr diff --git a/python/tvm/_ffi/_cython/object.pxi b/python/tvm/_ffi/_cython/object.pxi index 6d20723fd188..9561eab94ea2 100644 --- a/python/tvm/_ffi/_cython/object.pxi +++ b/python/tvm/_ffi/_cython/object.pxi @@ -16,15 +16,12 @@ # under the License. """Maps object type to its constructor""" -cdef list OBJECT_TYPE = [] +from ..node_generic import _set_class_node_base + +OBJECT_TYPE = [] def _register_object(int index, object cls): """register object class""" - if issubclass(cls, NDArrayBase): - _register_ndarray(index, cls) - return - - global OBJECT_TYPE while len(OBJECT_TYPE) <= index: OBJECT_TYPE.append(None) OBJECT_TYPE[index] = cls @@ -34,13 +31,14 @@ cdef inline object make_ret_object(void* chandle): global OBJECT_TYPE global _CLASS_NODE cdef unsigned tindex + cdef list object_type cdef object cls cdef object handle object_type = OBJECT_TYPE handle = ctypes_handle(chandle) CALL(TVMObjectGetTypeIndex(chandle, &tindex)) - if tindex < len(OBJECT_TYPE): - cls = OBJECT_TYPE[tindex] + if tindex < len(object_type): + cls = object_type[tindex] if cls is not None: obj = cls.__new__(cls) else: @@ -101,3 +99,6 @@ cdef class ObjectBase: (fconstructor).chandle, kObjectHandle, args, &chandle) self.chandle = chandle + + +_set_class_node_base(ObjectBase) diff --git a/python/tvm/_ffi/base.py b/python/tvm/_ffi/base.py index 716091e96ed0..c61c5c445442 100644 --- a/python/tvm/_ffi/base.py +++ b/python/tvm/_ffi/base.py @@ -35,13 +35,8 @@ # this function is needed for python3 # to convert ctypes.char_p .value back to python str if sys.platform == "win32": - def _py_str(x): - try: - return x.decode('utf-8') - except UnicodeDecodeError: - encoding = 'cp' + str(ctypes.cdll.kernel32.GetACP()) - return x.decode(encoding) - py_str = _py_str + encoding = 'cp' + str(ctypes.cdll.kernel32.GetACP()) + py_str = lambda x: x.decode(encoding) else: py_str = lambda x: x.decode('utf-8') else: @@ -61,7 +56,7 @@ def _load_lib(): # version number __version__ = libinfo.__version__ -# library instance +# library instance of nnvm _LIB, _LIB_NAME = _load_lib() # Whether we are runtime only diff --git a/python/tvm/_ffi/function.py b/python/tvm/_ffi/function.py index 23d95ebbf66b..60e7aeb9aec5 100644 --- a/python/tvm/_ffi/function.py +++ b/python/tvm/_ffi/function.py @@ -22,7 +22,6 @@ import sys import ctypes from .base import _LIB, check_call, py_str, c_str, string_types, _FFI_MODE -from .node_generic import _set_class_objects IMPORT_EXCEPT = RuntimeError if _FFI_MODE == "cython" else ImportError @@ -33,21 +32,15 @@ if sys.version_info >= (3, 0): from ._cy3.core import _set_class_function, _set_class_module from ._cy3.core import FunctionBase as _FunctionBase - from ._cy3.core import NDArrayBase as _NDArrayBase - from ._cy3.core import ObjectBase as _ObjectBase from ._cy3.core import convert_to_tvm_func else: from ._cy2.core import _set_class_function, _set_class_module from ._cy2.core import FunctionBase as _FunctionBase - from ._cy2.core import NDArrayBase as _NDArrayBase - from ._cy2.core import ObjectBase as _ObjectBase from ._cy2.core import convert_to_tvm_func except IMPORT_EXCEPT: # pylint: disable=wrong-import-position from ._ctypes.function import _set_class_function, _set_class_module from ._ctypes.function import FunctionBase as _FunctionBase - from ._ctypes.ndarray import NDArrayBase as _NDArrayBase - from ._ctypes.object import ObjectBase as _ObjectBase from ._ctypes.function import convert_to_tvm_func FunctionHandle = ctypes.c_void_p @@ -89,9 +82,6 @@ def __init__(self, handle): def __del__(self): check_call(_LIB.TVMModFree(self.handle)) - def __hash__(self): - return ctypes.cast(self.handle, ctypes.c_void_p).value - @property def entry_func(self): """Get the entry function @@ -332,4 +322,3 @@ def _init_api_prefix(module_name, prefix): setattr(target_module, ff.__name__, ff) _set_class_function(Function) -_set_class_objects((_ObjectBase, _NDArrayBase, ModuleBase)) diff --git a/python/tvm/_ffi/ndarray.py b/python/tvm/_ffi/ndarray.py index 650f01dd5409..da0783e10410 100644 --- a/python/tvm/_ffi/ndarray.py +++ b/python/tvm/_ffi/ndarray.py @@ -35,16 +35,16 @@ if sys.version_info >= (3, 0): from ._cy3.core import _set_class_ndarray, _make_array, _from_dlpack from ._cy3.core import NDArrayBase as _NDArrayBase - from ._cy3.core import _reg_extension + from ._cy3.core import _reg_extension, _reg_ndarray else: from ._cy2.core import _set_class_ndarray, _make_array, _from_dlpack from ._cy2.core import NDArrayBase as _NDArrayBase - from ._cy2.core import _reg_extension + from ._cy2.core import _reg_extension, _reg_ndarray except IMPORT_EXCEPT: # pylint: disable=wrong-import-position from ._ctypes.ndarray import _set_class_ndarray, _make_array, _from_dlpack from ._ctypes.ndarray import NDArrayBase as _NDArrayBase - from ._ctypes.ndarray import _reg_extension + from ._ctypes.ndarray import _reg_extension, _reg_ndarray def context(dev_type, dev_id=0): @@ -157,6 +157,10 @@ def from_dlpack(dltensor): class NDArrayBase(_NDArrayBase): """A simple Device/CPU Array object in runtime.""" + @property + def shape(self): + """Shape of this array""" + return tuple(self.handle.contents.shape[i] for i in range(self.handle.contents.ndim)) @property def dtype(self): @@ -236,7 +240,6 @@ def copyfrom(self, source_array): except: raise TypeError('array must be an array_like data,' + 'type %s is not supported' % str(type(source_array))) - t = TVMType(self.dtype) shape, dtype = self.shape, self.dtype if t.lanes > 1: @@ -291,12 +294,28 @@ def copyto(self, target): target : NDArray The target array to be copied, must have same shape as this array. """ + if isinstance(target, TVMContext): + target = empty(self.shape, self.dtype, target) if isinstance(target, NDArrayBase): - return self._copyto(target) - elif isinstance(target, TVMContext): - res = empty(self.shape, self.dtype, target) - return self._copyto(res) - raise ValueError("Unsupported target type %s" % str(type(target))) + check_call(_LIB.TVMArrayCopyFromTo( + self.handle, target.handle, None)) + else: + raise ValueError("Unsupported target type %s" % str(type(target))) + return target + + +def free_extension_handle(handle, type_code): + """Free c++ extension type handle + + Parameters + ---------- + handle : ctypes.c_void_p + The handle to the extension type. + + type_code : int + The tyoe code + """ + check_call(_LIB.TVMExtTypeFree(handle, ctypes.c_int(type_code))) def register_extension(cls, fcreate=None): @@ -348,8 +367,13 @@ def __init__(self): def _tvm_handle(self): return self.handle.value """ - assert hasattr(cls, "_tvm_tcode") - if fcreate and cls._tvm_tcode < TypeCode.EXT_BEGIN: - raise ValueError("Cannot register create when extension tcode is same as buildin") - _reg_extension(cls, fcreate) + if issubclass(cls, _NDArrayBase): + assert fcreate is not None + assert hasattr(cls, "_array_type_code") + _reg_ndarray(cls, fcreate) + else: + assert hasattr(cls, "_tvm_tcode") + if fcreate and cls._tvm_tcode < TypeCode.EXT_BEGIN: + raise ValueError("Cannot register create when extension tcode is same as buildin") + _reg_extension(cls, fcreate) return cls diff --git a/python/tvm/_ffi/node_generic.py b/python/tvm/_ffi/node_generic.py index 8ee7fc5f2b5b..e89812685eb2 100644 --- a/python/tvm/_ffi/node_generic.py +++ b/python/tvm/_ffi/node_generic.py @@ -23,11 +23,11 @@ from .base import string_types # Node base class -_CLASS_OBJECTS = None +_CLASS_NODE_BASE = None -def _set_class_objects(cls): - global _CLASS_OBJECTS - _CLASS_OBJECTS = cls +def _set_class_node_base(cls): + global _CLASS_NODE_BASE + _CLASS_NODE_BASE = cls def _scalar_type_inference(value): @@ -67,7 +67,7 @@ def convert_to_node(value): node : Node The corresponding node value. """ - if isinstance(value, _CLASS_OBJECTS): + if isinstance(value, _CLASS_NODE_BASE): return value if isinstance(value, bool): return const(value, 'uint1x1') @@ -81,7 +81,7 @@ def convert_to_node(value): if isinstance(value, dict): vlist = [] for item in value.items(): - if (not isinstance(item[0], _CLASS_OBJECTS) and + if (not isinstance(item[0], _CLASS_NODE_BASE) and not isinstance(item[0], string_types)): raise ValueError("key of map must already been a container type") vlist.append(item[0]) diff --git a/python/tvm/_ffi/runtime_ctypes.py b/python/tvm/_ffi/runtime_ctypes.py index a7947dbc38a2..2dbb67dfbf73 100644 --- a/python/tvm/_ffi/runtime_ctypes.py +++ b/python/tvm/_ffi/runtime_ctypes.py @@ -271,3 +271,12 @@ class TVMArray(ctypes.Structure): ("byte_offset", ctypes.c_uint64)] TVMArrayHandle = ctypes.POINTER(TVMArray) + +class TVMNDArrayContainer(ctypes.Structure): + """TVM NDArray::Container""" + _fields_ = [("dl_tensor", TVMArray), + ("manager_ctx", ctypes.c_void_p), + ("deleter", ctypes.c_void_p), + ("array_type_info", ctypes.c_int32)] + +TVMNDArrayContainerHandle = ctypes.POINTER(TVMNDArrayContainer) diff --git a/python/tvm/api.py b/python/tvm/api.py index ef121bc880b2..f0261be37e41 100644 --- a/python/tvm/api.py +++ b/python/tvm/api.py @@ -179,7 +179,7 @@ def var(name="tindex", dtype=int32): name : str The name - dtype : str + dtype : int The data type Returns diff --git a/python/tvm/autotvm/database.py b/python/tvm/autotvm/database.py index 07f3766acb1d..f820c1234832 100644 --- a/python/tvm/autotvm/database.py +++ b/python/tvm/autotvm/database.py @@ -156,7 +156,7 @@ def filter(self, func): Examples -------- get records for a target - >>> db.filter(lambda inp, results: "cuda" in inp.target.keys) + >>> db.filter(lambda inp, resulst: "cuda" in inp.target.keys) get records with errors >>> db.filter(lambda inp, results: any(r.error_no != 0 for r in results)) """ diff --git a/python/tvm/autotvm/record.py b/python/tvm/autotvm/record.py index fbf4a08f7b0c..14efb7bd9239 100644 --- a/python/tvm/autotvm/record.py +++ b/python/tvm/autotvm/record.py @@ -183,13 +183,7 @@ def load_from_file(filename): """ for row in open(filename): if row and not row.startswith('#'): - inp, res = decode(row) - # Avoid loading the record with an empty config. The TOPI schedule with no entities - # will result in an empty entity map (e.g., depthwise_conv2d_nchw on x86). - # Using an empty config will cause problems when applying alter op like NCHW to NCHWc. - if not inp.config._entity_map: - continue - yield (inp, res) + yield decode(row) def split_workload(in_file, clean=True): diff --git a/python/tvm/autotvm/task/__init__.py b/python/tvm/autotvm/task/__init__.py index f249f6bacb90..0a0e6e1e8ac7 100644 --- a/python/tvm/autotvm/task/__init__.py +++ b/python/tvm/autotvm/task/__init__.py @@ -30,4 +30,5 @@ from .topi_integration import register_topi_compute, register_topi_schedule, \ TaskExtractEnv +from .nnvm_integration import extract_from_graph, extract_from_multiple_graph from .relay_integration import extract_from_program, extract_from_multiple_program diff --git a/python/tvm/autotvm/task/nnvm_integration.py b/python/tvm/autotvm/task/nnvm_integration.py new file mode 100644 index 000000000000..9161822d173c --- /dev/null +++ b/python/tvm/autotvm/task/nnvm_integration.py @@ -0,0 +1,200 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=unused-variable,invalid-name +""" +Decorator and utilities for the integration with TOPI and NNVM + +""" +import threading +import warnings +import logging + + +from .task import create +from .topi_integration import TaskExtractEnv + +logger = logging.getLogger('autotvm') + + +def extract_from_graph(graph, shape, dtype, target, symbols, params=None, target_host=None): + """ Extract tuning tasks from a nnvm graph. + + This function collects tuning tasks by building the graph + and trace all the calls to topi. + + Parameters + ---------- + graph : Graph + The graph to tune + shape : dict of str to tuple + The input shape to the graph + dtype : str or dict of str to str + The input types to the graph + target: tvm.target.Target + The compilation target + symbols : Array of nnvm.symbol + Array of nnvm symbols want to be tuned + params : dict of str to NDArray + The parameter dictionary. + target_host: tvm.target.Target + The host compilation target + + Returns + ------- + task: Array of autotvm.task.Task + collected tasks + """ + import nnvm.compiler + import nnvm + import topi + + env = TaskExtractEnv.get() + + # NOTE: To add more symbols, you only need to change the following lists + # nnvm symbol -> topi compute + SYMBOL2TOPI = { + nnvm.sym.conv2d: [topi.nn.conv2d, topi.nn.depthwise_conv2d_nchw, + topi.nn.group_conv2d_nchw], + nnvm.sym.conv2d_transpose: [topi.nn.conv2d_transpose_nchw], + nnvm.sym.dense: [topi.nn.dense], + } + + topi_funcs = [] + for sym_name in symbols: + if sym_name in SYMBOL2TOPI: + topi_funcs.extend(SYMBOL2TOPI[sym_name]) + else: + warnings.warn("Symbol %s is not tunable, ignored" % sym_name) + + # run compiler to collect all TOPI calls during compilation + env.reset(topi_funcs) + with env: + # disable logger temporarily + old_state = logger.disabled + logger.disabled = True + + nnvm.compiler.engine.clear_cache() + # wrap build call in thread to avoid multiprocessing problems + build_thread = threading.Thread(target=nnvm.compiler.build, + args=(graph, + target, + shape, + dtype, + params, + target_host)) + build_thread.start() + build_thread.join() + + logger.disabled = old_state + + # create tasks for target + tasks = [] + for task_name, args in env.get_tasks(): + try: + tsk = create(task_name, args, + target=target, target_host=target_host, + template_key='direct') + tasks.append(tsk) + except topi.InvalidShapeError: + print("[Warning] Invalid shape during AutoTVM task creation") + + return tasks + + +def extract_from_multiple_graph(graphs, shapes, dtypes, target, symbols, params, target_host=None): + """ Extract tuning tasks from multiple nnvm graphs. + + This function is the multiple graph version of extract_from_graph + + Parameters + ---------- + graphs : List of Graph + The list of graphs to tune + shapes : List of dict of str to tuple + The input shape to the graph + dtypes : List of str or dict of str to str + The input types to the graph + target: tvm.target.Target + The compilation target + symbols : Array of nnvm.symbol + Array of nnvm symbols want to be tuned + params : dict of str to NDArray + The parameter dictionary. + target_host: tvm.target.Target + The host compilation target + + Returns + ------- + task: Array of autotvm.task.Task + collected tasks + """ + import nnvm.compiler + import nnvm + import topi + + env = TaskExtractEnv.get() + + #NOTE: To add more symbols, you only need to change the following lists + #nnvm symbol -> topi compute + SYMBOL2TOPI = { + nnvm.sym.conv2d: [topi.nn.conv2d, topi.nn.depthwise_conv2d_nchw, + topi.nn.group_conv2d_nchw], + nnvm.sym.conv2d_transpose: [topi.nn.conv2d_transpose_nchw], + nnvm.sym.dense: [topi.nn.dense], + } + + topi_funcs = [] + for sym_name in symbols: + if sym_name in SYMBOL2TOPI: + topi_funcs.extend(SYMBOL2TOPI[sym_name]) + else: + warnings.warn("Symbol %s is not tunable, ignored" % sym_name) + + # run compiler to collect all TOPI calls during compilation + env.reset(topi_funcs) + with env: + # disable logger temporarily + old_state = logger.disabled + logger.disabled = True + + for graph, shape, dtype in zip(graphs, shapes, dtypes): + nnvm.compiler.engine.clear_cache() + # wrap build call in thread to avoid multiprocessing problems + build_thread = threading.Thread(target=nnvm.compiler.build, + args=(graph, + target, + shape, + dtype, + params, + target_host)) + build_thread.start() + build_thread.join() + + logger.disabled = old_state + + # create tasks for target + tasks = [] + for task_name, args in env.get_tasks(): + try: + tsk = create(task_name, args, + target=target, target_host=target_host, + template_key='direct') + tasks.append(tsk) + except topi.InvalidShapeError: + print("[Warning] Invalid shape during AutoTVM task creation") + + return tasks diff --git a/python/tvm/autotvm/task/relay_integration.py b/python/tvm/autotvm/task/relay_integration.py index 4a407714b414..b65c5d428e4b 100644 --- a/python/tvm/autotvm/task/relay_integration.py +++ b/python/tvm/autotvm/task/relay_integration.py @@ -128,7 +128,6 @@ def extract_from_multiple_program(funcs, params, ops, target, target_host=None, tvm.relay.op.nn.dense: [topi.nn.dense], tvm.relay.op.nn.batch_matmul: [topi.nn.batch_matmul], tvm.relay.op.nn.deformable_conv2d: [topi.nn.deformable_conv2d_nchw], - tvm.relay.op.nn.conv1d_transpose: [topi.nn.conv1d_transpose_ncw], } topi_funcs = [] diff --git a/python/tvm/autotvm/task/topi_integration.py b/python/tvm/autotvm/task/topi_integration.py index 8b3ba35e92ab..7bfc313de6e9 100644 --- a/python/tvm/autotvm/task/topi_integration.py +++ b/python/tvm/autotvm/task/topi_integration.py @@ -69,9 +69,9 @@ def deserialize_args(args): return ret -# Task extractor for relay program +# Task extractor for nnvm graph, relay program class TaskExtractEnv: - """Global environment for extracting tuning tasks from graph""" + """Global environment for extracting tuning tasks from nnvm graph""" current = None registered = None @@ -92,7 +92,6 @@ def __init__(self, allow_duplicate=False): topi.nn.bitserial_conv2d_nhwc: "topi_nn_bitserial_conv2d_nhwc", topi.nn.bitserial_dense: "topi_nn_bitserial_dense", topi.nn.deformable_conv2d_nchw: "topi_nn_deformable_conv2d_nchw", - topi.nn.conv1d_transpose_ncw: "topi_nn_conv1d_transpose_ncw", } self.topi_to_schedule = { @@ -110,7 +109,6 @@ def __init__(self, allow_duplicate=False): topi.nn.bitserial_conv2d_nhwc: [topi.generic.schedule_bitserial_conv2d_nhwc], topi.nn.bitserial_dense: [topi.generic.schedule_bitserial_dense], topi.nn.deformable_conv2d_nchw: [topi.generic.schedule_deformable_conv2d_nchw], - topi.nn.conv1d_transpose_ncw: [topi.generic.schedule_conv1d_transpose_ncw], } # function reflection for tracing @@ -127,7 +125,6 @@ def __init__(self, allow_duplicate=False): topi.nn.bitserial_conv2d_nhwc: lambda x: setattr(topi.nn, 'bitserial_conv2d_nhwc', x), topi.nn.bitserial_dense: lambda x: setattr(topi.nn, 'bitserial_dense', x), topi.nn.deformable_conv2d_nchw: lambda x: setattr(topi.nn, 'deformable_conv2d_nchw', x), - topi.nn.conv1d_transpose_ncw: lambda x: setattr(topi.nn, 'conv1d_transpose_ncw', x), } self.allow_duplicate = allow_duplicate @@ -182,15 +179,12 @@ def _topi_nn_conv2d(*args, **kwargs): args = deserialize_args(args) A, W = args[:2] layout = args[-2] + assert layout == 'NCHW' or layout == 'HWCN', "only support NCHW/HWCN currently" C = topi.nn.conv2d(*args, **kwargs) if layout == 'NCHW': s = topi.generic.schedule_conv2d_nchw([C]) - elif layout == 'HWCN': - s = topi.generic.schedule_conv2d_hwcn([C]) - elif layout == 'NHWC': - s = topi.generic.schedule_conv2d_nhwc([C]) else: - raise ValueError("Unsupported layout {}".format(layout)) + s = topi.generic.schedule_conv2d_hwcn([C]) return s, [A, W, C] @register("topi_nn_depthwise_conv2d_nchw") @@ -220,15 +214,6 @@ def _topi_nn_conv2d_transpose_nchw(*args, **kwargs): s = topi.generic.schedule_conv2d_transpose_nchw([C]) return s, [A, W, C] - @register("topi_nn_conv1d_transpose_ncw") - def _topi_nn_conv1d_transpose_ncw(*args, **kwargs): - assert not kwargs, "Do not support kwargs in template function call" - args = deserialize_args(args) - A, W = args[:2] - C = topi.nn.conv1d_transpose_ncw(*args, **kwargs) - s = topi.generic.schedule_conv1d_transpose_ncw([C]) - return s, [A, W, C] - @register("topi_nn_dense") def _topi_nn_dense(*args, **kwargs): assert not kwargs, "Do not support kwargs in template function call" @@ -313,7 +298,7 @@ def get_tasks(self): Returns ------- tasks: List of tuple(name, args) - A list of tasks extracted from the graph + A list of tasks extracted from the nnvm graph """ return self.task_collection diff --git a/python/tvm/autotvm/tophub.py b/python/tvm/autotvm/tophub.py index ebfe6ee3a354..95e9acb23dff 100644 --- a/python/tvm/autotvm/tophub.py +++ b/python/tvm/autotvm/tophub.py @@ -18,7 +18,8 @@ TopHub: Tensor Operator Hub To get the best performance, we typically need auto-tuning for the specific devices. TVM releases pre-tuned parameters in TopHub for some common networks and hardware targets. -TVM will download these parameters for you when you call relay.build. +TVM will download these parameters for you when you call +nnvm.compiler.build_module or relay.build. """ # pylint: disable=invalid-name @@ -223,7 +224,7 @@ def load_reference_log(backend, model, workload_name, template_key): if model == inp.target.model: find = True break - # if device model is not find, use the device model with the most tuned workloads + # if device model is not find, use the device model with the most tuned worklaods if not find and counts: model = max(counts.items(), key=lambda k: k[1])[0] diff --git a/python/tvm/autotvm/tuner/xgboost_cost_model.py b/python/tvm/autotvm/tuner/xgboost_cost_model.py index 34f4c03e224b..265365144639 100644 --- a/python/tvm/autotvm/tuner/xgboost_cost_model.py +++ b/python/tvm/autotvm/tuner/xgboost_cost_model.py @@ -51,7 +51,7 @@ class XGBoostCostModel(CostModel): 'itervar' is more accurate but 'knob' is much faster. There are some constraints on 'itervar', if you meet problems with feature extraction when using 'itervar', - you can switch to 'knob'. + you can swith to 'knob'. For cross-shape tuning (e.g. many convolutions with different shapes), 'itervar' and 'curve' has better transferability, diff --git a/python/tvm/autotvm/tuner/xgboost_tuner.py b/python/tvm/autotvm/tuner/xgboost_tuner.py index 2ebea86d8e3e..a7498c3b6309 100644 --- a/python/tvm/autotvm/tuner/xgboost_tuner.py +++ b/python/tvm/autotvm/tuner/xgboost_tuner.py @@ -40,7 +40,7 @@ class XGBTuner(ModelBasedTuner): 'itervar' is more accurate but 'knob' is much faster. There are some constraints on 'itervar', if you meet problems with feature extraction when using 'itervar', - you can switch to 'knob'. + you can swith to 'knob'. For cross-shape tuning (e.g. many convolutions with different shapes), 'itervar' and 'curve' has better transferability, diff --git a/python/tvm/contrib/cublaslt.py b/python/tvm/contrib/cublaslt.py deleted file mode 100644 index 5470fd0b4c18..000000000000 --- a/python/tvm/contrib/cublaslt.py +++ /dev/null @@ -1,51 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -"""External function interface to cuBLASlt libraries.""" -from __future__ import absolute_import as _abs - -from .. import api as _api -from .. import intrin as _intrin - -def matmul(lhs, rhs, transa=False, transb=False, n=0, m=0, dtype=None): - """Create an extern op that compute matrix mult of A and rhs with cuBLAS - - Parameters - ---------- - lhs : Tensor - The left matrix operand - rhs : Tensor - The right matrix operand - transa : bool - Whether transpose lhs - transb : bool - Whether transpose rhs - - Returns - ------- - C : Tensor - The result tensor. - """ - if n == 0: - n = lhs.shape[1] if transa else lhs.shape[0] - if m == 0: - m = rhs.shape[0] if transb else rhs.shape[1] - dtype = dtype if dtype is not None else lhs.dtype - return _api.extern( - (n, m), [lhs, rhs], - lambda ins, outs: _intrin.call_packed( - "tvm.contrib.cublaslt.matmul", - ins[0], ins[1], outs[0], transa, transb), dtype=dtype, name="C") diff --git a/python/tvm/contrib/debugger/debug_result.py b/python/tvm/contrib/debugger/debug_result.py index 3fc0d1574b8c..8ee99d7561bd 100644 --- a/python/tvm/contrib/debugger/debug_result.py +++ b/python/tvm/contrib/debugger/debug_result.py @@ -40,7 +40,7 @@ class DebugResult(object): Parameters ---------- graph_json : str - The graph to be deployed in json format output by graph compiler. Each operator (tvm_op) + The graph to be deployed in json format output by nnvm graph. Each operator (tvm_op) in the graph will have a one to one mapping with the symbol in libmod which is used to construct a "PackedFunc" . @@ -57,12 +57,12 @@ def __init__(self, graph_json, dump_path): self.dump_graph_json(graph_json) def _parse_graph(self, graph_json): - """Parse and extract the JSON graph and update the nodes, shapes and dltype. + """Parse and extract the NNVM graph and update the nodes, shapes and dltype. Parameters ---------- graph_json : str or graph class - The graph to be deployed in json format output by JSON graph. + The graph to be deployed in json format output by nnvm graph. """ json_obj = json.loads(graph_json) self._nodes_list = json_obj['nodes'] @@ -197,7 +197,7 @@ def dump_graph_json(self, graph): Parameters ---------- graph : json format - json formatted JSON graph contain list of each node's + json formatted NNVM graph contain list of each node's name, shape and type. """ graph_dump_file_name = GRAPH_DUMP_FILE_NAME diff --git a/python/tvm/contrib/debugger/debug_runtime.py b/python/tvm/contrib/debugger/debug_runtime.py index 7d150c7c3d34..c71cbd2b0c2d 100644 --- a/python/tvm/contrib/debugger/debug_runtime.py +++ b/python/tvm/contrib/debugger/debug_runtime.py @@ -35,7 +35,7 @@ def create(graph_json_str, libmod, ctx, dump_root=None): Parameters ---------- graph_json_str : str or graph class - The graph to be deployed in json format output by graph compiler. + The graph to be deployed in json format output by nnvm graph. The graph can only contain one operator(tvm_op) that points to the name of PackedFunc in the libmod. @@ -85,7 +85,7 @@ class GraphModuleDebug(graph_runtime.GraphModule): Parameters ---------- module : Module - The internal tvm module that holds the actual graph functions. + The interal tvm module that holds the actual graph functions. ctx : TVMContext The context this module is under. @@ -188,7 +188,7 @@ def _run_debug(self): out_tensor = array(out_tensor) self.debug_datum._output_tensor_list.append(out_tensor) - def debug_get_output(self, node, out=None): + def debug_get_output(self, node, out): """Run graph up to node and get the output to out Parameters @@ -199,11 +199,12 @@ def debug_get_output(self, node, out=None): out : NDArray The output array container """ + ret = None if isinstance(node, str): output_tensors = self.debug_datum.get_output_tensors() try: - out = output_tensors[node] - except KeyError: + ret = output_tensors[node] + except: node_list = output_tensors.keys() raise RuntimeError( "Node " @@ -214,10 +215,10 @@ def debug_get_output(self, node, out=None): ) elif isinstance(node, int): output_tensors = self.debug_datum._output_tensor_list - out = output_tensors[node] + ret = output_tensors[node] else: raise RuntimeError("Require node index or name only.") - return out + return ret def run(self, **input_dict): """Run forward execution of the graph with debug @@ -243,6 +244,7 @@ def run_individual(self, number, repeat=1, min_repeat_ms=0): ret = self._run_individual(number, repeat, min_repeat_ms) return ret.strip(",").split(",") if ret else [] + def exit(self): """Exits the dump folder and all its contents""" self._remove_dump_root() diff --git a/python/tvm/contrib/graph_runtime.py b/python/tvm/contrib/graph_runtime.py index 2c945d2fca95..f4ee2f7db28d 100644 --- a/python/tvm/contrib/graph_runtime.py +++ b/python/tvm/contrib/graph_runtime.py @@ -22,13 +22,12 @@ from .._ffi.runtime_ctypes import TVMContext from ..rpc import base as rpc_base - def create(graph_json_str, libmod, ctx): """Create a runtime executor module given a graph and module. Parameters ---------- graph_json_str : str or graph class - The graph to be deployed in json format output by json graph. + The graph to be deployed in json format output by nnvm graph. The graph can only contain one operator(tvm_op) that points to the name of PackedFunc in the libmod. libmod : tvm.Module @@ -58,7 +57,6 @@ def create(graph_json_str, libmod, ctx): return GraphModule(fcreate(graph_json_str, libmod, *device_type_id)) - def get_device_ctx(libmod, ctx): """Parse and validate all the device context(s). Parameters @@ -114,12 +112,12 @@ class GraphModule(object): Parameters ---------- module : Module - The internal tvm module that holds the actual graph functions. + The interal tvm module that holds the actual graph functions. Attributes ---------- module : Module - The internal tvm module that holds the actual graph functions. + The interal tvm module that holds the actual graph functions. """ def __init__(self, module): @@ -144,7 +142,7 @@ def set_input(self, key=None, value=None, **params): The input key params : dict of str to NDArray - Additional arguments + Additonal arguments """ if key is not None: self._get_input(key).copyfrom(value) @@ -213,7 +211,7 @@ def get_output(self, index, out=None): return self._get_output(index) def debug_get_output(self, node, out): - """Run graph up to node and get the output to out + """Run graph upto node and get the output to out Parameters ---------- diff --git a/python/tvm/contrib/verilog.py b/python/tvm/contrib/verilog.py new file mode 100644 index 000000000000..30fa6ce2c7dd --- /dev/null +++ b/python/tvm/contrib/verilog.py @@ -0,0 +1,316 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Verilog simulator modules.""" +from __future__ import absolute_import + +import subprocess +import sys +import os +import ctypes + +from .. import _api_internal +from .._ffi.base import string_types +from .._ffi.node import NodeBase, register_node +from .._ffi.function import register_func +from . import util + +@register_node +class VPISession(NodeBase): + """Verilog session""" + def __init__(self, handle): + super(VPISession, self).__init__(handle) + self.proc = None + self.execpath = None + self.yield_callbacks = [] + + def __del__(self): + self.proc.kill() + try: + super(VPISession, self).__del__() + except AttributeError: + pass + + def arg(self, index): + """Get handle passed to host session. + + Parameters + ---------- + index : int + The index value. + + Returns + ------- + handle : VPIHandle + The handle + """ + return _api_internal._vpi_SessGetArg(self, index) + + def __getitem__(self, name): + if not isinstance(name, string_types): + raise ValueError("have to be string types") + return _api_internal._vpi_SessGetHandleByName(self, name) + + def __getattr__(self, name): + return _api_internal._vpi_SessGetHandleByName(self, name) + + def yield_until_next_cycle(self): + """Yield until next posedge""" + for f in self.yield_callbacks: + f() + return _api_internal._vpi_SessYield(self) + + def shutdown(self): + """Shutdown the simulator""" + return _api_internal._vpi_SessShutdown(self) + + +@register_node +class VPIHandle(NodeBase): + """Handle to a verilog variable.""" + def __init__(self, handle): + super(VPIHandle, self).__init__(handle) + self._name = None + self._size = None + + def get_int(self): + """Get integer value from handle. + + Returns + ------- + value : int + """ + return _api_internal._vpi_HandleGetInt(self) + + def put_int(self, value): + """Put integer value to handle. + + Parameters + ---------- + value : int + The value to put + """ + return _api_internal._vpi_HandlePutInt(self, value) + + @property + def name(self): + if self._name is None: + self._name = _api_internal._vpi_HandleGetName(self) + return self._name + + @property + def size(self): + if self._size is None: + self._size = _api_internal._vpi_HandleGetSize(self) + return self._size + + def __getitem__(self, name): + if not isinstance(name, string_types): + raise ValueError("have to be string types") + return _api_internal._vpi_HandleGetHandleByName(self, name) + + def __getattr__(self, name): + return _api_internal._vpi_HandleGetHandleByName(self, name) + + +def _find_vpi_path(): + curr_path = os.path.dirname(os.path.realpath(os.path.expanduser(__file__))) + api_path = os.path.join(curr_path, '../../../lib/') + vpi_path = [curr_path, api_path] + vpi_path = [os.path.join(p, 'tvm_vpi.vpi') for p in vpi_path] + vpi_found = [p for p in vpi_path if os.path.exists(p) and os.path.isfile(p)] + if vpi_found: + return os.path.dirname(vpi_found[0]) + raise ValueError("Cannot find tvm_vpi.vpi, make sure you did `make verilog`") + +def search_path(): + """Get the search directory.""" + curr_path = os.path.dirname(os.path.realpath(os.path.expanduser(__file__))) + ver_path = [os.path.join(curr_path, '../../../verilog/')] + ver_path += [os.path.join(curr_path, '../../../tests/verilog/unittest/')] + ver_path += [os.path.join(curr_path, '../../../tests/verilog/integration/')] + return ver_path + + +def find_file(file_name): + """Find file in the search directories. + + Parameters + ---------- + file_name : str + The file name + + Return + ------ + file_name : str + The absolute path to the file, raise Error if cannot find it. + """ + ver_path = search_path() + flist = [os.path.join(p, file_name) for p in ver_path] + found = [p for p in flist if os.path.exists(p) and os.path.isfile(p)] + if not found: + raise ValueError("Cannot find %s in %s" % (file_name, flist)) + return found[0] + + +def compile_file(file_name, file_target, options=None): + """Compile verilog via iverilog + + Parameters + ---------- + file_name : str or list of str + The cuda code. + + file_target : str + The target file. + """ + cmd = ["iverilog"] + for path in search_path(): + cmd += ["-I%s" % path] + + cmd += ["-o", file_target] + if options: + cmd += options + + if isinstance(file_name, string_types): + file_name = [file_name] + cmd += file_name + proc = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + (out, _) = proc.communicate() + + if proc.returncode != 0: + raise ValueError("Compilation error:\n%s" % out) + + +def session(file_names, codes=None): + """Create a new iverilog session by compile the file. + + Parameters + ---------- + file_names : str or list of str + The name of the file + + codes : str or list of str + The code in str. + + Returns + ------- + sess : VPISession + The created session. + """ + if isinstance(file_names, string_types): + file_names = [file_names] + path = util.tempdir() + + if codes: + if isinstance(codes, (list, tuple)): + codes = '\n'.join(codes) + fcode = path.relpath("temp_code.v") + with open(fcode, "w") as out_file: + out_file.write(codes) + file_names.append(fcode) + + for name in file_names: + if not os.path.exists(name): + raise ValueError("Cannot find file %s" % name) + + target = path.relpath(os.path.basename(file_names[0].rsplit(".", 1)[0])) + compile_file(file_names, target) + vpi_path = _find_vpi_path() + + cmd = ["vvp"] + cmd += ["-M", vpi_path] + cmd += ["-m", "tvm_vpi"] + cmd += [target] + env = os.environ.copy() + + read_device, write_host = os.pipe() + read_host, write_device = os.pipe() + + if sys.platform == "win32": + import msvcrt + env['TVM_DREAD_PIPE'] = str(msvcrt.get_osfhandle(read_device)) + env['TVM_DWRITE_PIPE'] = str(msvcrt.get_osfhandle(write_device)) + read_host = msvcrt.get_osfhandle(read_host) + write_host = msvcrt.get_osfhandle(write_host) + else: + env['TVM_DREAD_PIPE'] = str(read_device) + env['TVM_DWRITE_PIPE'] = str(write_device) + + env['TVM_HREAD_PIPE'] = str(read_host) + env['TVM_HWRITE_PIPE'] = str(write_host) + + try: + # close_fds does not work well for all python3 + # Use pass_fds instead. + # pylint: disable=unexpected-keyword-arg + pass_fds = (read_device, write_device, read_host, write_host) + proc = subprocess.Popen(cmd, pass_fds=pass_fds, env=env) + except TypeError: + # This is effective for python2 + proc = subprocess.Popen(cmd, close_fds=False, env=env) + + # close device side pipe + os.close(read_device) + os.close(write_device) + + sess = _api_internal._vpi_SessMake(read_host, write_host) + sess.proc = proc + sess.execpath = path + return sess + + +@register_func +def tvm_callback_verilog_simulator(code, *args): + """Callback by TVM runtime to invoke verilog simulator + + Parameters + ---------- + code : str + The verilog code to be simulated + + args : list + Additional arguments to be set. + """ + libs = [ + find_file("tvm_vpi_mmap.v") + ] + sess = session(libs, code) + for i, value in enumerate(args): + vpi_h = sess.main["tvm_arg%d" % i] + if isinstance(value, ctypes.c_void_p): + int_value = int(value.value) + elif isinstance(value, int): + int_value = value + else: + raise ValueError( + "Do not know how to handle value type %s" % type(value)) + vpi_h.put_int(int_value) + + rst = sess.main.rst + done = sess.main.done + # start driving + rst.put_int(1) + sess.yield_until_next_cycle() + rst.put_int(0) + sess.yield_until_next_cycle() + while not done.get_int(): + sess.yield_until_next_cycle() + sess.yield_until_next_cycle() + sess.shutdown() diff --git a/python/tvm/module.py b/python/tvm/module.py index 163ad3bc8822..fb350a2d131e 100644 --- a/python/tvm/module.py +++ b/python/tvm/module.py @@ -136,27 +136,21 @@ def export_library(self, self.save(file_name) return - modules = self._collect_dso_modules() + if not (self.type_key == "llvm" or self.type_key == "c"): + raise ValueError("Module[%s]: Only llvm and c support export shared" % self.type_key) temp = _util.tempdir() - files = [] - is_system_lib = False - has_c_module = False - for index, module in enumerate(modules): - if fcompile is not None and hasattr(fcompile, "object_format"): - object_format = fcompile.object_format + if fcompile is not None and hasattr(fcompile, "object_format"): + object_format = fcompile.object_format + else: + if self.type_key == "llvm": + object_format = "o" else: - if module.type_key == "llvm": - object_format = "o" - else: - assert module.type_key == "c" - object_format = "cc" - has_c_module = True - path_obj = temp.relpath("lib" + str(index) + "." + object_format) - module.save(path_obj) - files.append(path_obj) - is_system_lib = (module.type_key == "llvm" and - module.get_function("__tvm_is_system_module")()) - + assert self.type_key == "c" + object_format = "cc" + path_obj = temp.relpath("lib." + object_format) + self.save(path_obj) + files = [path_obj] + is_system_lib = self.type_key == "llvm" and self.get_function("__tvm_is_system_module")() if self.imported_modules: path_cc = temp.relpath("devc.cc") with open(path_cc, "w") as f: @@ -167,15 +161,13 @@ def export_library(self, fcompile = _tar.tar else: fcompile = _cc.create_shared - - if has_c_module: + if self.type_key == "c": options = [] if "options" in kwargs: opts = kwargs["options"] options = opts if isinstance(opts, (list, tuple)) else [opts] opts = options + ["-I" + path for path in find_include_path()] kwargs.update({'options': opts}) - fcompile(file_name, files, **kwargs) def time_evaluator(self, func_name, ctx, number=10, repeat=1, min_repeat_ms=0): @@ -236,25 +228,6 @@ def evaluator(*args): except NameError: raise NameError("time_evaluate is only supported when RPC is enabled") - def _collect_dso_modules(self): - """Helper function to collect dso modules, then return it.""" - visited, stack, dso_modules = set(), [], [] - # append root module - visited.add(self) - stack.append(self) - while stack: - module = stack.pop() - if module._dso_exportable(): - dso_modules.append(module) - for m in module.imported_modules: - if m not in visited: - visited.add(m) - stack.append(m) - return dso_modules - - def _dso_exportable(self): - return self.type_key == "llvm" or self.type_key == "c" - def system_lib(): """Get system-wide library module singleton. diff --git a/python/tvm/ndarray.py b/python/tvm/ndarray.py index b19db6627ac6..f9c7cc6c5403 100644 --- a/python/tvm/ndarray.py +++ b/python/tvm/ndarray.py @@ -27,11 +27,8 @@ from ._ffi.ndarray import TVMContext, TVMType, NDArrayBase from ._ffi.ndarray import context, empty, from_dlpack from ._ffi.ndarray import _set_class_ndarray -from ._ffi.ndarray import register_extension -from ._ffi.object import register_object +from ._ffi.ndarray import register_extension, free_extension_handle - -@register_object class NDArray(NDArrayBase): """Lightweight NDArray class of TVM runtime. diff --git a/python/tvm/relay/_parser.py b/python/tvm/relay/_parser.py index 45822c56ede2..71e5bfaadff0 100644 --- a/python/tvm/relay/_parser.py +++ b/python/tvm/relay/_parser.py @@ -135,15 +135,12 @@ def __call__(self, args, attrs, type_args): "nn.dense": op.nn.dense, "nn.bias_add": op.nn.bias_add, "nn.max_pool2d": op.nn.max_pool2d, - "nn.max_pool3d": op.nn.max_pool3d, "nn.global_max_pool2d": op.nn.global_max_pool2d, "nn.avg_pool2d": op.nn.avg_pool2d, - "nn.avg_pool3d": op.nn.avg_pool3d, "nn.global_avg_pool2d": op.nn.global_avg_pool2d, "nn.softmax": op.nn.softmax, "reshape": op.reshape, "nn.conv2d_transpose": op.nn.conv2d_transpose, - "nn.conv1d_transpose": op.nn.conv1d_transpose, "concatenate": op.concatenate, "nn.dropout": op.nn.dropout_raw, "zeros": op.zeros, diff --git a/python/tvm/relay/frontend/keras.py b/python/tvm/relay/frontend/keras.py index f0468e7c232b..57ee227694db 100644 --- a/python/tvm/relay/frontend/keras.py +++ b/python/tvm/relay/frontend/keras.py @@ -362,7 +362,7 @@ def _convert_flatten(inexpr, keras_layer, _): def _convert_pooling(inexpr, keras_layer, etab): _check_data_format(keras_layer) pool_type = type(keras_layer).__name__ - # global pool in keras = global pool + flatten in relay + # global pool in keras = global pool + flatten in nnvm/relay if pool_type == 'GlobalMaxPooling2D': return _convert_flatten(_op.nn.global_max_pool2d(inexpr), keras_layer, etab) if pool_type == 'GlobalAveragePooling2D': diff --git a/python/tvm/relay/frontend/mxnet.py b/python/tvm/relay/frontend/mxnet.py index a1a357883a83..abef45d498a1 100644 --- a/python/tvm/relay/frontend/mxnet.py +++ b/python/tvm/relay/frontend/mxnet.py @@ -207,23 +207,29 @@ def _mx_conv1d_transpose(inputs, attrs): if data_layout != "NCW": raise tvm.error.OpAttributeInvalid( 'Only "NCW" data layout is supported for 1D Convolution') + data_layout = "NCHW" channel_axis = 1 - kernel_layout = "OIW" + kernel_layout = "OIHW" + new_attrs = {} new_attrs["channels"] = attrs.get_int("num_filter") - new_attrs["kernel_size"] = attrs.get_int_tuple("kernel") - new_attrs["strides"] = attrs.get_int_tuple("stride", (1,)) - new_attrs["output_padding"] = attrs.get_int_tuple("adj", (0,)) - new_attrs["padding"] = attrs.get_int_tuple("pad", (0,)) - new_attrs["dilation"] = attrs.get_int_tuple("dilate", (1,)) + new_attrs["kernel_size"] = (1,) + attrs.get_int_tuple("kernel") + new_attrs["strides"] = (1,) + attrs.get_int_tuple("stride", (1,)) + new_attrs["output_padding"] = (0,) + attrs.get_int_tuple("adj", (0,)) + new_attrs["padding"] = (0,) + attrs.get_int_tuple("pad", (0,)) + new_attrs["dilation"] = (1,) + attrs.get_int_tuple("dilate", (1,)) new_attrs["groups"] = attrs.get_int("num_group", 1) new_attrs["data_layout"] = data_layout new_attrs["kernel_layout"] = kernel_layout use_bias = not attrs.get_bool("no_bias", True) - res = _op.nn.conv1d_transpose(inputs[0], inputs[1], **new_attrs) + data = _op.expand_dims(inputs[0], axis=2) + kernel = _op.expand_dims(inputs[1], axis=2) + res = _op.nn.conv2d_transpose(data, kernel, **new_attrs) + if use_bias: assert len(inputs) == 3 res = _op.nn.bias_add(res, inputs[2], axis=channel_axis) + res = _op.squeeze(res, axis=[2]) return res diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py index c7764db729ee..3d90d15e1916 100644 --- a/python/tvm/relay/frontend/onnx.py +++ b/python/tvm/relay/frontend/onnx.py @@ -66,17 +66,6 @@ def revert_caffe2_pad(pads): return pads -def get_pad_pair(input1d, kernel1d, stride1d): - """infer pad size""" - if input1d % stride1d == 0: - pad = max(kernel1d - stride1d, 0) - else: - pad = max(kernel1d - (input1d % stride1d), 0) - pad_before = pad // 2 - pad_after = pad - pad_before - return [pad_before, pad_after] - - def onnx_storage_order2layout(storage_order): """converter of onnx storage order parameter to tvm storage order format""" if storage_order not in (0, 1): @@ -213,37 +202,14 @@ class Conv(OnnxOpConverter): @classmethod def _impl_v1(cls, inputs, attr, params): - # infer pads for auto_pad - if 'auto_pad' in attr: - attr['auto_pad'] = attr['auto_pad'].decode('utf-8') - if attr['auto_pad'] in ('SAME_UPPER', 'SAME_LOWER'): - input_shape = infer_shape(inputs[0]) - in_h, in_w = input_shape[2], input_shape[3] - stride_h, stride_w = attr['strides'] - kernel_h, kernel_w = attr['kernel_shape'] - dilation_h, dilation_w = attr['dilations'] - dilated_kernel_h = (kernel_h - 1) * dilation_h + 1 - dilated_kernel_w = (kernel_w - 1) * dilation_w + 1 - pad_v = get_pad_pair(in_h, dilated_kernel_h, stride_h) - pad_h = get_pad_pair(in_w, dilated_kernel_w, stride_w) - attr['pads'] = (pad_v[0], pad_h[0], pad_v[1], pad_h[1]) - elif attr['auto_pad'] == 'VALID': - attr['pads'] = (0, 0) - elif attr['auto_pad'] == 'NOTSET': - pass - else: - msg = 'Value {} in attribute "auto_pad" of operator Conv is invalid.' - raise tvm.error.OpAttributeInvalid(msg.format(attr['auto_pad'])) - attr.pop('auto_pad') - - out = AttrCvt( - op_name=dimension_picker('conv'), - transforms={ - 'kernel_shape': 'kernel_size', - 'dilations': ('dilation', (0, 0)), - 'pads': ('padding', (0, 0), revert_caffe2_pad), - 'group': ('groups', 1)}, - custom_check=dimension_constraint())(inputs[:2], attr, params) + out = AttrCvt(op_name=dimension_picker('conv'), + transforms={ + 'kernel_shape': 'kernel_size', + 'dilations': ('dilation', (0, 0)), + 'pads': ('padding', (0, 0), revert_caffe2_pad), + 'group': ('groups', 1)}, + ignores=['auto_pad'], + custom_check=dimension_constraint())(inputs[:2], attr, params) use_bias = len(inputs) == 3 if use_bias: out = _op.nn.bias_add(out, inputs[2]) @@ -260,29 +226,6 @@ def _impl_v1(cls, inputs, attr, params): attr['channels'] = channels groups = attr.pop('group') attr['groups'] = groups - # infer pads for auto_pad - if 'auto_pad' in attr: - attr['auto_pad'] = attr['auto_pad'].decode('utf-8') - if attr['auto_pad'] in ('SAME_UPPER', 'SAME_LOWER'): - input_shape = infer_shape(inputs[0]) - in_h, in_w = input_shape[2], input_shape[3] - stride_h, stride_w = attr['strides'] - kernel_h, kernel_w = attr['kernel_shape'] - dilation_h, dilation_w = attr['dilations'] - dilated_kernel_h = (kernel_h - 1) * dilation_h + 1 - dilated_kernel_w = (kernel_w - 1) * dilation_w + 1 - pad_v = get_pad_pair(in_h, dilated_kernel_h, stride_h) - pad_h = get_pad_pair(in_w, dilated_kernel_w, stride_w) - attr['pads'] = (pad_v[0], pad_h[0], pad_v[1], pad_h[1]) - elif attr['auto_pad'] == 'VALID': - attr['pads'] = (0, 0) - elif attr['auto_pad'] == 'NOTSET': - pass - else: - msg = 'Value {} in attribute "auto_pad" of operator Conv is invalid.' - raise tvm.error.OpAttributeInvalid(msg.format(attr['auto_pad'])) - attr.pop('auto_pad') - out = AttrCvt( op_name=dimension_picker('conv', '_transpose'), transforms={ @@ -520,8 +463,7 @@ def _impl_v1(cls, inputs, attr, params): @classmethod def _impl_v5(cls, inputs, attr, params): if get_name(inputs[1]) in params: - # pop shape out of parameters since it wont be needed later. - shape = tuple(params.pop(inputs[1].name_hint).asnumpy()) + shape = tuple(params[inputs[1].name_hint].asnumpy()) out = _op.reshape(inputs[0], shape) else: data, shape = inputs @@ -540,7 +482,34 @@ def _impl_v11(cls, inputs, attr, params): block_size = int(attr['blocksize']) mode = attr.get("mode", "DCR") - return _op.nn.depth_to_space(inputs[0], block_size, mode=mode) + + # handle NCHW layout + indata = infer_value_simulated(inputs[0], params) + in_n, in_c, in_h, in_w = indata.shape + + # reshape to proper output + new_c = int(in_c / (block_size * block_size)) + new_h = in_h * block_size + new_w = in_w * block_size + newshape = (in_n, new_c, new_h, new_w) + + if mode == "DCR": + # expand input to larger dimension. + expanded = _op.reshape(inputs[0], + newshape=(in_n, block_size, block_size, new_c, in_h, in_w)) + # reorder to expand spatial blocks. + transposed = _op.transpose(expanded, axes=(0, 3, 4, 1, 5, 2)) + + else: # CRD mode + # expand input to larger dimension. + expanded = _op.reshape(inputs[0], + newshape=(in_n, new_c, block_size, block_size, in_h, in_w)) + # reorder to expand spatial blocks. + transposed = _op.transpose(expanded, axes=(0, 1, 4, 2, 5, 3)) + + return AttrCvt(op_name="reshape", + extras={'newshape': newshape}, + ignores=['mode', 'blocksize'])([transposed], attr) class SpaceToDepth(OnnxOpConverter): @@ -551,7 +520,26 @@ class SpaceToDepth(OnnxOpConverter): def _impl_v1(cls, inputs, attr, params): block_size = int(attr['blocksize']) - return _op.nn.space_to_depth(inputs[0], block_size) + + # handle NCHW layout + indata = infer_value_simulated(inputs[0], params) + in_n, in_c, in_h, in_w = indata.shape + + # reshape to proper output + new_c = in_c * (block_size * block_size) + new_h = int(in_h / block_size) + new_w = int(in_w / block_size) + newshape = (in_n, new_c, new_h, new_w) + + # expand input to larger dimension. + expanded = _op.reshape(inputs[0], + newshape=(in_n, in_c, new_h, block_size, new_w, block_size)) + # reorder to expand spatial blocks. + transposed = _op.transpose(expanded, axes=(0, 3, 5, 1, 2, 4)) + + return AttrCvt(op_name="reshape", + extras={'newshape': newshape}, + ignores=['blocksize'])([transposed], attr) class Concat(OnnxOpConverter): @@ -698,7 +686,8 @@ class Shape(OnnxOpConverter): @classmethod def _impl_v1(cls, inputs, attr, params): - return _op.shape_of(inputs[0], "int64") + # TODO(@jroesch): use shape_of once it has been fixed) + return _op.shape_of(inputs[0]) class Cast(OnnxOpConverter): """ Operator converter for Cast. @@ -1091,52 +1080,6 @@ class Or(Elemwise): def _impl_v7(cls, inputs, attr, params): return _op.logical_or(inputs[0], inputs[1]) -class Expand(OnnxOpConverter): - """ Operator converter for Expand. - """ - @classmethod - def _impl_v8(cls, inputs, attr, params): - in_shape = np.array(infer_shape(inputs[0])).astype('int32') - if get_name(inputs[1]) in params: - shape = params[inputs[1].name_hint].asnumpy().astype('int32') - else: - shape = infer_value_simulated(inputs[1], params).asnumpy().astype('int32') - - # Currently 'op.broadcast_to' expect the rank of the given 'shape' - # (the 2nd input) is always higher than that of the given 'input' (the 1st input) - # However, ONNX Expand supports multi-directional broadcasting, which allows - # above pattern and also some extent of 'shape' can be smaller than the corresponding - # extent of 'input'. In this case, the extent of 'shape' must be 1. - # https://github.com/onnx/onnx/blob/master/docs/Broadcasting.md - # In above cases, we cannot directorly apply 'op.broadcast_to' instead of 'expand' - # so, here we solved this problem by expanding the given 'shape' itself. - def expand_shape(in_shape, shape): - """ A function expands the shape when the rank is lower than that of the given - intput. Also it replaces the extent of the shape with the corresponding extent - of the intput when it is 1. - """ - - # here we flip the shapes because this can be more simply written - # when the innermost dimension is located at the index 0. - in_shape = np.flip(in_shape, axis=0) - shape = np.flip(shape, axis=0) - - if in_shape.size < shape.size: - for i in range(shape.size): - if i < in_shape.size and in_shape[i] > shape[i]: - shape[i] = in_shape[i] - else: - for i in range(in_shape.size): - if i >= shape.size: - np.append(shape, in_shape[i]) - elif shape[i] == 1: - shape[i] = in_shape[i] - - new_shape = np.flip(shape, axis=0) - return new_shape - - shape = expand_shape(in_shape, shape) - return _op.broadcast_to(inputs[0], shape=tuple(shape)) # compatible operators that do NOT require any conversion. _identity_list = [] @@ -1244,7 +1187,6 @@ def _get_convert_map(opset): # defs/tensor 'Cast': Cast.get_converter(opset), 'Reshape': Reshape.get_converter(opset), - 'Expand': Expand.get_converter(opset), 'Concat': Concat.get_converter(opset), 'Split': Split.get_converter(opset), 'Slice': Slice.get_converter(opset), @@ -1368,6 +1310,8 @@ def from_onnx(self, graph, opset): self._num_param += 1 # We should convert scalar integers to int32, to normalize. array = self._parse_array(t_proto) + if len(array.shape) == 0 and array.dtype == 'int64': + array = _nd.array(array.asnumpy().astype('int32')) self._params[node.output[0]] = array self._nodes[node.output[0]] = new_var( node.output[0], diff --git a/python/tvm/relay/frontend/tensorflow.py b/python/tvm/relay/frontend/tensorflow.py index f748fe828bfd..460a14699a77 100644 --- a/python/tvm/relay/frontend/tensorflow.py +++ b/python/tvm/relay/frontend/tensorflow.py @@ -122,70 +122,6 @@ def _impl(inputs, attr, params): return get_relay_op(name)(*inputs) return _impl -def _pool3d(name): - def _impl(inputs, attr, params): - attr['data_format'] = attr['data_format'].decode("utf-8") - flip_layout = False - - input_shape = attr['_input_shapes'][inputs[0]] - - if attr['data_format'] == 'NDHWC': - attr['kernel_shape'] = (attr['ksize'][1], attr['ksize'][2], attr['ksize'][3]) - attr['strides'] = (attr['strides'][1], attr['strides'][2], attr['strides'][3]) - elif attr['data_format'] == 'NCDHW': - attr['kernel_shape'] = (attr['ksize'][2], attr['ksize'][3], attr['ksize'][4]) - attr['strides'] = (attr['strides'][2], attr['strides'][3], attr['strides'][4]) - else: - msg = 'Value {} of attribute "data_format" of operator Pooling ' \ - 'is not valid.' - raise tvm.error.OpAttributeInvalid(msg.format(attr['data_format'])) - if attr['data_format'] == "NDHWC": - input_shape = [attr['_input_shapes'][inputs[0]][i] for i in (0, 4, 1, 2, 3)] - inputs[0] = _op.transpose(inputs[0], axes=(0, 4, 1, 2, 3)) - attr['data_format'] = "NCDHW" - attr['_input_shapes'][inputs[0]] = input_shape - flip_layout = True - - attr['padding'] = attr['padding'].decode("utf-8") - - if attr['padding'] == 'VALID': - attr['padding'] = [0, 0, 0, 0, 0, 0] - elif attr['padding'] == 'SAME': - stride_d, stride_h, stride_w = attr['strides'] - kernel_d, kernel_h, kernel_w = attr['kernel_shape'] - if attr['data_format'] == 'NDHWC': - in_d = input_shape[1] - in_h = input_shape[2] - in_w = input_shape[3] - else: - in_d = input_shape[2] - in_h = input_shape[3] - in_w = input_shape[4] - pad_d = _get_pad_pair(in_d, kernel_d, stride_d) - pad_v = _get_pad_pair(in_h, kernel_h, stride_h) - pad_h = _get_pad_pair(in_w, kernel_w, stride_w) - - attr['padding'] = [pad_d[0], pad_v[0], pad_h[0], pad_d[1], pad_v[1], pad_h[1]] - else: - msg = 'Value {} in attribute "padding" of operator Pooling is ' \ - 'not valid.' - raise tvm.error.OpAttributeInvalid(msg.format(attr['padding'])) - - if name == "avg_pool": - attr['count_include_pad'] = False - attr['ceil_mode'] = False - out = AttrCvt( - op_name=name, - transforms={ - 'kernel_shape': 'pool_size', - 'data_format': 'layout'}, - ignores=['ksize'])(inputs, attr) - if flip_layout: - out = _op.transpose(out, axes=(0, 2, 3, 4, 1)) - return out - - return _impl - def _pooling(name): def _impl(inputs, attr, params): @@ -269,12 +205,6 @@ def _impl(inputs, attr, params): attr['strides'][1], attr['strides'][2], attr['strides'][3] = \ attr['strides'][3], attr['strides'][1], attr['strides'][2] attr['data_format'] = 'NCHW' - - if opname == 'conv_transpose' and len(attr['_output_shapes']) > 0: - tmp_shape = attr['_output_shapes'][0] - tmp_shape = [tmp_shape[ii] for ii in (0, 3, 1, 2)] - attr['_output_shapes'][0] = tmp_shape - flip_layout = True inputs_data = inputs[0] if opname != 'conv_transpose' else inputs[2] @@ -351,17 +281,12 @@ def _impl(inputs, attr, params): elif attr['padding'] == 'SAME': stride_h, stride_w = attr['strides'] kernel_h, kernel_w = attr['kernel_shape'] - - pdata_shape = input_shape - if opname == 'conv_transpose' and len(attr['_output_shapes']) > 0: - pdata_shape = attr['_output_shapes'][0] - if attr['data_format'] == 'NHWC': - in_h = pdata_shape[1] - in_w = pdata_shape[2] + in_h = input_shape[1] + in_w = input_shape[2] else: - in_h = pdata_shape[2] - in_w = pdata_shape[3] + in_h = input_shape[2] + in_w = input_shape[3] dilation_h = attr['dilations'][0] dilation_w = attr['dilations'][1] @@ -370,23 +295,21 @@ def _impl(inputs, attr, params): pad_v = _get_pad_pair(in_h, dilated_kernel_h, stride_h) pad_h = _get_pad_pair(in_w, dilated_kernel_w, stride_w) - if opname != 'conv_transpose': - if attr['data_format'] == 'NHWC': - inputs_data = _op.nn.pad(data=inputs_data, - pad_width=((0, 0), - (pad_v[0], pad_v[1]), - (pad_h[0], pad_h[1]), - (0, 0))) - else: - inputs_data = _op.nn.pad(data=inputs_data, - pad_width=((0, 0), - (0, 0), - (pad_v[0], pad_v[1]), - (pad_h[0], pad_h[1]))) - attr['padding'] = [0, 0] + if attr['data_format'] == 'NHWC': + inputs_data = _op.nn.pad(data=inputs_data, + pad_width=((0, 0), + (pad_v[0], pad_v[1]), + (pad_h[0], pad_h[1]), + (0, 0))) else: - attr['padding'] = [pad_v[0], pad_h[0], pad_v[1], pad_h[1]] + inputs_data = _op.nn.pad(data=inputs_data, + pad_width=((0, 0), + (0, 0), + (pad_v[0], pad_v[1]), + (pad_h[0], pad_h[1]))) + + attr['padding'] = [0, 0] else: msg = 'Value {} in attribute "padding" of operator Conv is not ' \ @@ -741,18 +664,76 @@ def _impl(inputs, attr, params): def _depth_to_space(): def _impl(inputs, attr, params): + # Need to handle data layouts differently. + input_shape = attr['_input_shapes'][inputs[0]] block_size = int(attr['block_size']) - layout = attr['data_format'].decode("utf-8") - return _op.nn.depth_to_space(inputs[0], block_size, layout) + if attr['data_format'].decode("utf-8") == 'NHWC': + in_n, in_h, in_w, in_c = input_shape + new_c = int(in_c / (block_size * block_size)) + + # First expand input to larger dimension. + expanded = _op.reshape( + inputs[0], newshape=(in_n, in_h, in_w, block_size, block_size, new_c)) + # Now reorder to expand spatial blocks. + transposed = _op.transpose(expanded, axes=(0, 1, 3, 2, 4, 5)) + # Finally reshape to proper output. + new_h = in_h * block_size + new_w = in_w * block_size + newshape = (in_n, new_h, new_w, new_c) + + else: # Handle NCHW layout + in_n, in_c, in_h, in_w = input_shape + new_c = int(in_c / (block_size * block_size)) + + expanded = _op.reshape( + inputs[0], newshape=(in_n, block_size, block_size, new_c, in_h, in_w)) + transposed = _op.transpose(expanded, axes=(0, 3, 4, 1, 5, 2)) + new_h = in_h * block_size + new_w = in_w * block_size + newshape = (in_n, new_c, new_h, new_w) + + return AttrCvt( + op_name="reshape", + extras={'newshape': newshape}, + ignores=['data_format', 'block_size'])([transposed], attr) return _impl def _space_to_depth(): def _impl(inputs, attr, params): + # Need to handle data layouts differently. + input_shape = attr['_input_shapes'][inputs[0]] block_size = int(attr['block_size']) - layout = attr['data_format'].decode("utf-8") - return _op.nn.space_to_depth(inputs[0], block_size, layout) + if attr['data_format'].decode("utf-8") == 'NHWC': + in_n, in_h, in_w, in_c = input_shape + new_h = int(in_h / block_size) + new_w = int(in_w / block_size) + + # First expand input to larger dimension. + expanded = _op.reshape( + inputs[0], newshape=(in_n, new_h, block_size, new_w, block_size, in_c)) + # Now reorder to expand spatial blocks. + transposed = _op.transpose(expanded, axes=(0, 1, 3, 2, 4, 5)) + # Finally reshape to proper output. + new_c = in_c * block_size * block_size + newshape = (in_n, new_h, new_w, new_c) + + else: # Handle NCHW layout + in_n, in_c, in_h, in_w = input_shape + new_h = int(in_h / block_size) + new_w = int(in_w / block_size) + + expanded = _op.reshape( + inputs[0], newshape=(in_n, in_c, new_h, block_size, new_w, block_size)) + transposed = _op.transpose(expanded, axes=(0, 3, 5, 1, 2, 4)) + new_c = int(in_c * block_size * block_size) + newshape = (in_n, new_c, new_h, new_w) + + return AttrCvt( + op_name="reshape", + extras={'newshape': newshape}, + ignores=['data_format', 'block_size'])([transposed], attr) return _impl @@ -1428,7 +1409,6 @@ def _impl(inputs, attr, params): 'ArgMin' : _argx(_op.argmin, 'argmin'), 'Assert' : _assert(), 'AvgPool' : _pooling('avg_pool'), - 'AvgPool3D' : _pool3d('avg_pool3d'), 'BatchMatMul' : _batch_matmul(), 'BatchMatMulV2' : _batch_matmul(), 'BatchNormWithGlobalNormalization' : _batch_norm(), @@ -1480,7 +1460,6 @@ def _impl(inputs, attr, params): 'MatMul' : _matmul(), 'Max' : _reduce('max'), 'MaxPool' : _pooling('max_pool'), - 'MaxPool3D' : _pool3d('max_pool3d'), 'Maximum' : _elemwise('maximum'), 'Mean' : _mean(), 'Min' : _reduce('min'), diff --git a/python/tvm/relay/op/_tensor_grad.py b/python/tvm/relay/op/_tensor_grad.py index 944e51e636f5..d55cad7c7a2d 100644 --- a/python/tvm/relay/op/_tensor_grad.py +++ b/python/tvm/relay/op/_tensor_grad.py @@ -379,9 +379,9 @@ def log_softmax_grad(orig, grad): @register_gradient("nn.bias_add") def bias_add_grad(orig, grad): """Returns gradient of bias_add""" - data = orig.args[0] + data, bias = orig.args return [collapse_sum_like(grad, data), - _sum(grad, orig.attrs.axis, keepdims=False, exclude=True)] + collapse_sum_like(grad, bias)] @register_gradient("nn.dense") diff --git a/python/tvm/relay/op/nn/_nn.py b/python/tvm/relay/op/nn/_nn.py index 322325819fba..cd8a1311eaba 100644 --- a/python/tvm/relay/op/nn/_nn.py +++ b/python/tvm/relay/op/nn/_nn.py @@ -251,47 +251,6 @@ def legalize_conv2d(attrs, inputs, types): """ return topi.nn.conv2d_legalize(attrs, inputs, types) - -@reg.register_convert_op_layout("nn.conv2d") -def convert_conv2d(attrs, inputs, tinfos, desired_layout): - """Convert Layout pass registration for conv2d op. - - Parameters - ---------- - attrs : tvm.attrs.Attrs - Attributes of current convolution - inputs : list of tvm.relay.Expr - The args of the Relay expr to be legalized - tinfos : list of types - List of input and output types - desired_layout : str - The desired layout - - Returns - ------- - result : tvm.relay.Expr - The transformed expr - """ - - from tvm import relay - data_layout = attrs['data_layout'] - kernel_layout = attrs['kernel_layout'] - data, weight = inputs - assert desired_layout == 'NCHW', \ - "Currently only transformation to NCHW layout is supported." - if desired_layout == 'NCHW': - new_attrs = dict(attrs) - new_attrs['data_layout'] = desired_layout - new_attrs['kernel_layout'] = 'OIHW' - - if data_layout == 'NHWC' and kernel_layout == 'HWIO': - # Convert (NHWC, HWIO) to (NCHW, OIHW) - return relay.nn.conv2d(data, weight, **new_attrs) - if data_layout == 'NHWC' and kernel_layout == 'HWOI': - # Convert (NHWC, HWOI) to (NCHW, OIHW). Depthwise conv2d. - return relay.nn.conv2d(data, weight, **new_attrs) - return None - reg.register_pattern("nn.conv2d", OpPattern.OUT_ELEMWISE_FUSABLE) @@ -389,37 +348,6 @@ def legalize_conv2d_transpose(attrs, inputs, types): reg.register_pattern("nn.conv2d_transpose", OpPattern.OUT_ELEMWISE_FUSABLE) -# conv1d_transpose -@reg.register_compute("nn.conv1d_transpose") -def compute_conv1d_transpose(attrs, inputs, out_dtype, target): - """Compute definition of conv1d_transpose""" - padding = get_const_tuple(attrs.padding) - strides = get_const_tuple(attrs.strides) - dilation = get_const_tuple(attrs.dilation) - groups = attrs.groups - layout = attrs.data_layout - out_dtype = attrs.out_dtype - out_dtype = (inputs[0].dtype if out_dtype in ("same", "") - else out_dtype) - assert layout == "NCW", "conv1d_transpose ncw only supported" - assert dilation == (1,), "conv1d_transpose dilation is not supported" - assert groups == 1, "conv1d_transpose groups == 1 only supported" - out = topi.nn.conv1d_transpose_ncw( - inputs[0], inputs[1], strides, padding, out_dtype) - output_padding = get_const_tuple(attrs.output_padding) - out = topi.nn.pad(out, - [0, 0, 0], [0, 0, output_padding[0]]) - return [out] - - -@reg.register_schedule("nn.conv1d_transpose") -def schedule_conv1d_transpose(attrs, outs, target): - """Schedule definition of conv1d_transpose""" - with target: - return topi.generic.schedule_conv1d_transpose_ncw(outs) - -reg.register_pattern("nn.conv1d_transpose", OpPattern.OUT_ELEMWISE_FUSABLE) - # bias_add reg.register_schedule("nn.bias_add", schedule_injective) reg.register_pattern("nn.bias_add", OpPattern.BROADCAST) @@ -437,18 +365,6 @@ def schedule_max_pool2d(attrs, outs, target): reg.register_pattern("nn.max_pool2d", OpPattern.OUT_ELEMWISE_FUSABLE) -# max_pool3d -@reg.register_schedule("nn.max_pool3d") -def schedule_max_pool3d(attrs, outs, target): - """Schedule definition of max_pool3d""" - layout = attrs.layout - with target: - return topi.generic.schedule_pool(outs, layout) - - -reg.register_pattern("nn.max_pool3d", OpPattern.OUT_ELEMWISE_FUSABLE) - - # avg_pool2d @reg.register_schedule("nn.avg_pool2d") def schedule_avg_pool2d(attrs, outs, target): @@ -457,19 +373,8 @@ def schedule_avg_pool2d(attrs, outs, target): with target: return topi.generic.schedule_pool(outs, layout) -reg.register_pattern("nn.avg_pool2d", OpPattern.OUT_ELEMWISE_FUSABLE) - -# avg_pool3d -@reg.register_schedule("nn.avg_pool3d") -def schedule_avg_pool3d(attrs, outs, target): - """Schedule definition of avg_pool3d""" - layout = attrs.layout - with target: - return topi.generic.schedule_pool(outs, layout) - - -reg.register_pattern("nn.avg_pool3d", OpPattern.OUT_ELEMWISE_FUSABLE) +reg.register_pattern("nn.avg_pool2d", OpPattern.OUT_ELEMWISE_FUSABLE) # max_pool2d_grad @@ -582,25 +487,6 @@ def compute_upsampling(attrs, inputs, out_dtype, target): align_corners = attrs.align_corners return [topi.nn.upsampling(inputs[0], scale_h, scale_w, layout, method, align_corners)] -# upsampling3d -reg.register_schedule("nn.upsampling3d", reg.schedule_injective) - -def schedule_upsampling3d(_, outs, target): - """Schedule definition of upsampling3d""" - with target: - return topi.generic.schedule_injective(outs) - -@reg.register_compute("nn.upsampling3d") -def compute_upsampling3d(attrs, inputs, out_dtype, target): - scale_d = attrs.scale_d - scale_h = attrs.scale_h - scale_w = attrs.scale_w - layout = attrs.layout - method = attrs.method - coordinate_transformation_mode = attrs.coordinate_transformation_mode - return [topi.nn.upsampling3d(inputs[0], scale_d, scale_h, scale_w, layout, method,\ - coordinate_transformation_mode)] - # pad reg.register_schedule("nn.pad", schedule_broadcast) @@ -964,28 +850,6 @@ def compute_cross_entropy_with_logits(attrs, inputs, out_dtype, target): x, y = inputs return [-topi.sum(x * y) / x.shape[0]] - -@reg.register_compute("nn.depth_to_space") -def compute_depth_to_space(attrs, inputs, out_dtype, target): - block_size = attrs.block_size - layout = attrs.layout - mode = attrs.mode - return [topi.nn.depth_to_space(inputs[0], block_size, layout=layout, mode=mode)] - -reg.register_schedule("nn.depth_to_space", schedule_injective) -reg.register_pattern("nn.depth_to_space", OpPattern.INJECTIVE) - - -@reg.register_compute("nn.space_to_depth") -def compute_space_to_depth(attrs, inputs, out_dtype, target): - block_size = attrs.block_size - layout = attrs.layout - return [topi.nn.space_to_depth(inputs[0], block_size, layout=layout)] - -reg.register_schedule("nn.space_to_depth", schedule_injective) -reg.register_pattern("nn.space_to_depth", OpPattern.INJECTIVE) - - # shape func @script def _conv2d_NCHWc_shape_func(dshape, kshape, strides, padding, dilation, oc_bn): diff --git a/python/tvm/relay/op/nn/nn.py b/python/tvm/relay/op/nn/nn.py index ec360af6dd48..5e1c6a8c2616 100644 --- a/python/tvm/relay/op/nn/nn.py +++ b/python/tvm/relay/op/nn/nn.py @@ -257,72 +257,6 @@ def conv2d_transpose(data, kernel_layout, out_layout, output_padding, out_dtype) -def conv1d_transpose(data, - weight, - strides=(1,), - padding=(0,), - dilation=(1,), - groups=1, - channels=None, - kernel_size=None, - data_layout="NCW", - kernel_layout="OIW", - out_layout="", - output_padding=(0,), - out_dtype=""): - """One dimensional transposed convolution operator. - - Parameters - ---------- - data : tvm.relay.Expr - The input data to the operator. - - weight : tvm.relay.Expr - The weight expressions. - - strides : Tuple[int], optional - The strides of convolution. - - padding : Tuple[int], optional - The padding of convolution on both sides of inputs. - - dilation : Tuple[int], optional - Specifies the dilation rate to be used for dilated convolution. - - channels : int, optional - Number of output channels of this convolution. - - kernel_size : tuple of int, optional - The spatial of the convolution kernel. - - groups : int, optional - Number of groups for grouped convolution. - - data_layout : str, optional - Layout of the input. - - kernel_layout : str, optional - Layout of the weight. - - out_layout : Optional[str] - Layout of the output, by default, out_layout is the same as data_layout - - output_padding : Tuple[int], optional - Additional zero-padding to be added to one side of the output. - - out_dtype : str, optional - Specifies the output data type for mixed precision conv2d. - - Returns - ------- - result : tvm.relay.Expr - The computed result. - """ - return _make.conv1d_transpose(data, weight, strides, padding, dilation, - groups, channels, kernel_size, data_layout, - kernel_layout, out_layout, output_padding, out_dtype) - - def softmax(data, axis=-1): r"""Computes softmax. @@ -425,51 +359,6 @@ def max_pool2d(data, return _make.max_pool2d(data, pool_size, strides, padding, layout, ceil_mode) -def max_pool3d(data, - pool_size=(1, 1, 1), - strides=(1, 1, 1), - padding=(0, 0, 0), - layout="NCDHW", - ceil_mode=False): - r"""3D maximum pooling operator. - - This operator takes data as input and does 3D max value calculation - with in pool_size sized window by striding defined by stride. - - - In the default case, where the data_layout is `NCDHW` - a data Tensor with shape `(batch_size, channels, depth, height, width)`, - to produce an output Tensor. - - The ceil_mode is used to take ceil or floor while computing out shape. - count_include_pad indicates including or excluding padded input values in computation. - This operator accepts data layout specification. - - Parameters - ---------- - data : tvm.relay.Expr - The input data to the operator. - - strides : tuple of int, optional - The strides of pooling. - - padding : tuple of int, optional - The padding for pooling. - - layout : str, optional - Layout of the input. - - ceil_mode : bool, optional - To enable or disable ceil while pooling. - - Returns - ------- - result : tvm.relay.Expr - The computed result. - """ - return _make.max_pool3d(data, pool_size, strides, padding, - layout, ceil_mode) - def avg_pool2d(data, pool_size=(1, 1), strides=(1, 1), @@ -527,55 +416,6 @@ def avg_pool2d(data, return _make.avg_pool2d(data, pool_size, strides, padding, layout, ceil_mode, count_include_pad) -def avg_pool3d(data, - pool_size=(1, 1, 1), - strides=(1, 1, 1), - padding=(0, 0, 0), - layout="NCDHW", - ceil_mode=False, - count_include_pad=False): - r"""3D average pooling operator. - - This operator takes data as input and does 3D average value calculation - with in pool_size sized window by striding defined by stride - - - In the default case, where the data_layout is `NCDHW` - a data Tensor with shape `(batch_size, channels, depthm height, width)`, - to produce an output Tensor. - - The ceil_mode is used to take ceil or floor while computing out shape. - count_include_pad indicates including or excluding padded input values in computation. - This operator accepts data layout specification. - - Parameters - ---------- - data : tvm.relay.Expr - The input data to the operator. - - strides : tuple of int, optional - The strides of pooling. - - padding : tuple of int, optional - The padding for pooling. - - layout : str, optional - Layout of the input. - - ceil_mode : bool, optional - To enable or disable ceil while pooling. - - count_include_pad : bool, optional - To include padding to compute the average. - - Returns - ------- - result : tvm.relay.Expr - The computed result. - """ - return _make.avg_pool3d(data, pool_size, strides, padding, - layout, ceil_mode, count_include_pad) - def max_pool2d_grad(out_grad, data, pool_size=(1, 1), @@ -771,58 +611,6 @@ def upsampling(data, return _make.upsampling(data, scale_h, scale_w, layout, method, align_corners) -def upsampling3d(data, - scale_d=1, - scale_h=1, - scale_w=1, - layout="NCDHW", - method="nearest_neighbor", - coordinate_transformation_mode="half_pixel"): - """3D Upsampling. - - This operator takes data as input and does 3D scaling to the given scale factor. - In the default case, where the data_layout is `NCDHW` - with data of shape (n, c, d, h, w) - out will have a shape (n, c, d*scale_d, h*scale_h, w*scale_w) - - method indicates the algorithm to be used while calculating the out value - and method can be one of ("trilinear", "nearest_neighbor") - - Parameters - ---------- - data : tvm.relay.Expr - The input data to the operator. - - scale_d : tvm.relay.Expr - The scale factor for depth upsampling. - - scale_h : tvm.relay.Expr - The scale factor for height upsampling. - - scale_w : tvm.relay.Expr - The scale factor for width upsampling. - - layout : str, optional - Layout of the input. - - method : str, optional - Scale method to used [nearest_neighbor, trilinear]. - - coordinate_transformation_mode: string, optional - Describes how to transform the coordinate in the resized tensor - to the coordinate in the original tensor. - Refer to the ONNX Resize operator specification for details. - Available options are "half_pixel", "align_corners" and "asymmetric". - - Returns - ------- - result : tvm.relay.Expr - The computed result. - """ - return _make.upsampling3d(data, scale_d, scale_h, scale_w, layout, method, - coordinate_transformation_mode) - - def batch_flatten(data): """BatchFlatten. @@ -2131,53 +1919,3 @@ def cross_entropy_with_logits(predictions, targets): The computed result. """ return _make.cross_entropy_with_logits(predictions, targets) - - -def depth_to_space(data, block_size, layout='NCHW', mode='DCR'): - """Convert channels into spatial blocks. - - Parameters - ---------- - data : tvm.relay.Expr - Input data with channels divisible by block_size**2 - - block_size : int - Size of blocks to convert channels into. - - layout : string - One of NCHW or NHWC, indicates channel axis. - - mode : string - One of DCR or CDR, indicates which order channels - are accessed in. - - Returns - ------- - result : tvm.relay.Expr - Tensor with shape [in_batch, in_channel / block_size * block_size, - in_height * block_size, in_width * block_size] - """ - return _make.depth_to_space(data, block_size, layout, mode) - - -def space_to_depth(data, block_size, layout='NCHW'): - """Convert spatial blocks into channels. - - Parameters - ---------- - data : tvm.relay.Expr - Input data with spatial dimensions divisible by block_size - - block_size : int - Size of blocks to decompose into channels. - - layout : string - One of NCHW or NHWC, indicates channel axis. - - Returns - ------- - result : tvm.relay.Expr - Tensor with shape [in_batch, in_channel * block_size * block_size, - in_height / block_size, in_width / block_size] - """ - return _make.space_to_depth(data, block_size, layout) diff --git a/python/tvm/relay/op/op.py b/python/tvm/relay/op/op.py index 382f667b86a9..355496e42b48 100644 --- a/python/tvm/relay/op/op.py +++ b/python/tvm/relay/op/op.py @@ -196,23 +196,6 @@ def register_alter_op_layout(op_name, alter_layout=None, level=10): return register(op_name, "FTVMAlterOpLayout", alter_layout, level) -def register_convert_op_layout(op_name, convert_layout=None, level=10): - """Register convert op layout function for an op - - Parameters - ---------- - op_name : str - The name of the operator - - convert_layout: function (attrs: Attrs, inputs: List[Expr]) -> new_expr: Expr - The function for changing the layout or replacing the operator - - level : int - The priority level - """ - return register(op_name, "FTVMConvertOpLayout", convert_layout, level) - - def register_legalize(op_name, legal_op=None, level=10): """Register legal transformation function for an op diff --git a/python/tvm/relay/op/op_attrs.py b/python/tvm/relay/op/op_attrs.py index e0887e5ff872..35b2c053f8cf 100644 --- a/python/tvm/relay/op/op_attrs.py +++ b/python/tvm/relay/op/op_attrs.py @@ -63,10 +63,6 @@ class FIFOBufferAttrs(Attrs): class UpSamplingAttrs(Attrs): """Attributes for nn.upsampling""" -@register_relay_attr_node -class UpSampling3DAttrs(Attrs): - """Attributes for nn.upsampling3d""" - @register_relay_attr_node class PadAttrs(Attrs): """Attributes for nn.pad""" @@ -275,16 +271,6 @@ class AvgPool2DAttrs(Attrs): """Attributes used in avg_pool2d operators""" -@register_relay_attr_node -class MaxPool3DAttrs(Attrs): - """Attributes used in max_pool3d operators""" - - -@register_relay_attr_node -class AvgPool3DAttrs(Attrs): - """Attributes used in avg_pool3d operators""" - - @register_relay_attr_node class BitPackAttrs(Attrs): """Attributes used in bitpack operator""" @@ -303,8 +289,3 @@ class BinaryDenseAttrs(Attrs): @register_relay_attr_node class Conv2DTransposeAttrs(Attrs): """Attributes used in Transposed Conv2D operators""" - - -@register_relay_attr_node -class SubPixelAttrs(Attrs): - """Attributes used in depth to space and space to depth operators""" diff --git a/python/tvm/relay/quantize/_calibrate.py b/python/tvm/relay/quantize/_calibrate.py index 21254fa61e8e..aae50519b132 100644 --- a/python/tvm/relay/quantize/_calibrate.py +++ b/python/tvm/relay/quantize/_calibrate.py @@ -53,18 +53,11 @@ def collect_stats(mod, dataset): logging.info("collecting statistics for calibration...") func = mod['main'] func = _quantize.CreateStatsCollector(func) - - if tvm.target.current_target(): - target = tvm.target.current_target() - ctx = tvm.context(target.target_name) - else: - target = 'llvm' - ctx = tvm.context(target) - + target = tvm.target.current_target() or 'llvm' with _transform.build_config(opt_level=3): graph, lib, params = _build_module.build(func, target=target) outputs = [] - runtime = graph_runtime.create(graph, lib, ctx) + runtime = graph_runtime.create(graph, lib, tvm.context(target)) runtime.set_input(**params) num_outputs = runtime.get_num_outputs() diff --git a/python/tvm/relay/testing/tf.py b/python/tvm/relay/testing/tf.py index e3d6e7df0b98..79d0d8257953 100644 --- a/python/tvm/relay/testing/tf.py +++ b/python/tvm/relay/testing/tf.py @@ -28,12 +28,8 @@ # Tensorflow imports import tensorflow as tf from tensorflow.core.framework import graph_pb2 -from tvm.contrib.download import download_testdata -try: - tf_compat_v1 = tf.compat.v1 -except ImportError: - tf_compat_v1 = tf +from tvm.contrib.download import download_testdata ###################################################################### # Some helper functions @@ -84,7 +80,7 @@ def AddShapesToGraphDef(session, out_node): """ - graph_def = tf_compat_v1.graph_util.convert_variables_to_constants( + graph_def = tf.compat.v1.graph_util.convert_variables_to_constants( session, session.graph.as_graph_def(add_shapes=True), [out_node], @@ -116,13 +112,13 @@ def load(self, label_lookup_path, uid_lookup_path): dict from integer node ID to human-readable string. """ - if not tf_compat_v1.gfile.Exists(uid_lookup_path): + if not tf.compat.v1.io.gfile.exists(uid_lookup_path): tf.logging.fatal('File does not exist %s', uid_lookup_path) - if not tf_compat_v1.gfile.Exists(label_lookup_path): + if not tf.compat.v1.io.gfile.exists(label_lookup_path): tf.logging.fatal('File does not exist %s', label_lookup_path) # Loads mapping from string UID to human-readable string - proto_as_ascii_lines = tf_compat_v1.gfile.GFile(uid_lookup_path).readlines() + proto_as_ascii_lines = tf.compat.v1.gfile.GFile(uid_lookup_path).readlines() uid_to_human = {} p = re.compile(r'[n\d]*[ \S,]*') for line in proto_as_ascii_lines: @@ -133,7 +129,7 @@ def load(self, label_lookup_path, uid_lookup_path): # Loads mapping from string UID to integer node ID. node_id_to_uid = {} - proto_as_ascii = tf_compat_v1.gfile.GFile(label_lookup_path).readlines() + proto_as_ascii = tf.compat.v1.gfile.GFile(label_lookup_path).readlines() for line in proto_as_ascii: if line.startswith(' target_class:'): target_class = int(line.split(': ')[1]) @@ -213,7 +209,7 @@ def get_workload(model_path, model_sub_path=None): path_model = download_testdata(model_url, model_path, module='tf') # Creates graph from saved graph_def.pb. - with tf_compat_v1.gfile.FastGFile(path_model, 'rb') as f: + with tf.compat.v1.gfile.FastGFile(path_model, 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) graph = tf.import_graph_def(graph_def, name='') @@ -303,7 +299,7 @@ def _create_ptb_vocabulary(data_dir): file_name = 'ptb.train.txt' def _read_words(filename): """Read the data for creating vocabulary""" - with tf_compat_v1.gfile.GFile(filename, "r") as f: + with tf.compat.v1.gfile.GFile(filename, "r") as f: return f.read().encode("utf-8").decode("utf-8").replace("\n", "").split() def _build_vocab(filename): diff --git a/python/tvm/relay/transform.py b/python/tvm/relay/transform.py index 1f91272769b4..540c1f5b79cd 100644 --- a/python/tvm/relay/transform.py +++ b/python/tvm/relay/transform.py @@ -460,34 +460,6 @@ def AlterOpLayout(): return _transform.AlterOpLayout() -def ConvertLayout(desired_layout): - """ Given a dest layout, this pass transforms the expr such that most of the ops input data - layout is changed to the dest layout. In ideal situation, there are only 2 layout transforms, - one at the start and one at the end. - - This pass is not a part of relay.build and is expected to be called between framework-relay - parser and relay.build call. This is very helpful for hardware backends that support/prefer only - type of data layout. - - RFC - https://discuss.tvm.ai/t/layout-conversion-pass/4009 - - This pass uses most of the AlterOpLayout and InferCorrectLayout infrastructure. We can define - new layouts for conv2d ops for now. Most of the other operators try to adapt to their input - layout using the InferCorrectLayout infrastructure. - - Parameters - ---------- - desired_layout : str - The desired layout for the transformed expr. - - Returns - ------- - pass: FunctionPass - The pass. - """ - return _transform.ConvertLayout(desired_layout) - - def Legalize(legalize_map_attr_name="FTVMLegalize"): """Legalizes an expression with another expression. This pass can be used to replace an expr with another expr for target diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 8467f6a92ea8..02e2c7c67c99 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -22,7 +22,7 @@ members = [ "runtime", "runtime/tests/test_tvm_basic", "runtime/tests/test_tvm_dso", - "runtime/tests/test_nn", + "runtime/tests/test_nnvm", "frontend", "frontend/tests/basics", "frontend/tests/callback", diff --git a/rust/frontend/Cargo.toml b/rust/frontend/Cargo.toml index 3f99188a40f5..c6b56800ef59 100644 --- a/rust/frontend/Cargo.toml +++ b/rust/frontend/Cargo.toml @@ -23,7 +23,7 @@ description = "Rust frontend support for TVM" repository = "https://github.com/apache/incubator-tvm" homepage = "https://github.com/apache/incubator-tvm" readme = "README.md" -keywords = ["rust", "tvm"] +keywords = ["rust", "tvm", "nnvm"] categories = ["api-bindings", "science"] authors = ["TVM Contributors"] edition = "2018" diff --git a/rust/frontend/README.md b/rust/frontend/README.md index c61ba847c1f2..b77a4bd156ef 100644 --- a/rust/frontend/README.md +++ b/rust/frontend/README.md @@ -35,12 +35,14 @@ Here's a Python snippet for downloading and building a pretrained Resnet18 via A ```python block = get_model('resnet18_v1', pretrained=True) - -sym, params = relay.frontend.from_mxnet(block, shape_dict) + +sym, params = nnvm.frontend.from_mxnet(block) +# add the softmax layer for prediction +net = nnvm.sym.softmax(sym) # compile the model -with relay.build_config(opt_level=opt_level): - graph, lib, params = relay.build( - net, target, params=params) +with nnvm.compiler.build_config(opt_level=opt_level): + graph, lib, params = nnvm.compiler.build( + net, target, shape={"data": data_shape}, params=params) # same the model artifacts lib.save(os.path.join(target_dir, "deploy_lib.o")) cc.create_shared(os.path.join(target_dir, "deploy_lib.so"), @@ -49,7 +51,7 @@ cc.create_shared(os.path.join(target_dir, "deploy_lib.so"), with open(os.path.join(target_dir, "deploy_graph.json"), "w") as fo: fo.write(graph.json()) with open(os.path.join(target_dir,"deploy_param.params"), "wb") as fo: - fo.write(relay.save_param_dict(params)) + fo.write(nnvm.compiler.save_param_dict(params)) ``` Now, we need to input the artifacts to create and run the *Graph Runtime* to detect our input cat image @@ -111,7 +113,7 @@ and the model correctly predicts the input image as **tiger cat**. Please follow TVM [installations](https://docs.tvm.ai/install/index.html), `export TVM_HOME=/path/to/tvm` and add `libtvm_runtime` to your `LD_LIBRARY_PATH`. -*Note:* To run the end-to-end examples and tests, `tvm` and `topi` need to be added to your `PYTHONPATH` or it's automatic via an Anaconda environment when it is installed individually. +*Note:* To run the end-to-end examples and tests, `tvm`, `nnvm` and `topi` need to be added to your `PYTHONPATH` or it's automatic via an Anaconda environment when it is installed individually. ## Supported TVM Functionalities diff --git a/rust/frontend/examples/resnet/README.md b/rust/frontend/examples/resnet/README.md index 29274743a1ce..3ce4a778e4bd 100644 --- a/rust/frontend/examples/resnet/README.md +++ b/rust/frontend/examples/resnet/README.md @@ -18,11 +18,11 @@ ## Resnet example This end-to-end example shows how to: -* build `Resnet 18` with `tvm` from Python +* build `Resnet 18` with `tvm` and `nnvm` from Python * use the provided Rust frontend API to test for an input image -To run the example with pretrained resnet weights, first `tvm` and `mxnet` must be installed for the python build. To install mxnet for cpu, run `pip install mxnet` -and to install `tvm` with `llvm` follow the [TVM installation guide](https://docs.tvm.ai/install/index.html). +To run the example with pretrained resnet weights, first `tvm`, `nnvm` and `mxnet` must be installed for the python build. To install mxnet for cpu, run `pip install mxnet` +and to install `tvm` and `nnvm` with `llvm` follow the [TVM installation guide](https://docs.tvm.ai/install/index.html). * **Build the example**: `cargo build diff --git a/rust/runtime/Cargo.toml b/rust/runtime/Cargo.toml index f0d24595f0a1..34acc77899e9 100644 --- a/rust/runtime/Cargo.toml +++ b/rust/runtime/Cargo.toml @@ -22,7 +22,7 @@ license = "Apache-2.0" description = "A static TVM runtime" repository = "https://github.com/apache/incubator-tvm" readme = "README.md" -keywords = ["tvm"] +keywords = ["tvm", "nnvm"] categories = ["api-bindings", "science"] authors = ["TVM Contributors"] edition = "2018" diff --git a/rust/runtime/src/graph.rs b/rust/runtime/src/graph.rs index 42b9458223a6..cacd7a38a97f 100644 --- a/rust/runtime/src/graph.rs +++ b/rust/runtime/src/graph.rs @@ -440,7 +440,7 @@ named!( ) ); -/// Loads a param dict saved using `relay.save_param_dict`. +/// Loads a param dict saved using `nnvm.compiler.save_param_dict`. pub fn load_param_dict(bytes: &[u8]) -> Result, GraphFormatError> { if let Ok((remaining_bytes, param_dict)) = parse_param_dict(bytes) { if remaining_bytes.len() == 0 { diff --git a/rust/runtime/src/threading.rs b/rust/runtime/src/threading.rs index f05faf73566c..3f25309741ec 100644 --- a/rust/runtime/src/threading.rs +++ b/rust/runtime/src/threading.rs @@ -296,7 +296,7 @@ pub(crate) fn sgx_join_threads() { ocall_packed!("__sgx_thread_group_join__", 0); } -// @see issue 988 for information on why this function is used. +// @see https://github.com/apache/incubator-tvm/issues/988 for information on why this function is used. #[no_mangle] pub extern "C" fn TVMBackendParallelBarrier(_task_id: usize, penv: *const TVMParallelGroupEnv) { let barrier: &Arc = unsafe { &*((*penv).sync_handle as *const Arc) }; diff --git a/rust/runtime/tests/build_model.py b/rust/runtime/tests/build_model.py index e3da95f24fd8..bed3c0aa2da8 100755 --- a/rust/runtime/tests/build_model.py +++ b/rust/runtime/tests/build_model.py @@ -16,37 +16,56 @@ # specific language governing permissions and limitations # under the License. -"""Builds a simple graph for testing.""" +"""Builds a simple NNVM graph for testing.""" from os import path as osp +import nnvm +from nnvm import sym +from nnvm.compiler import graph_util +from nnvm.testing import init import numpy as np import tvm -from tvm import relay -from tvm.relay import testing CWD = osp.dirname(osp.abspath(osp.expanduser(__file__))) + def _get_model(dshape): - data = relay.var('data', shape=dshape) - fc = relay.nn.dense(data, relay.var("dense_weight"), units=dshape[-1]*2) - fc = relay.nn.bias_add(data, relay.var("dense_bias")) - left, right = relay.split(fc, indices_or_sections=2, axis=1) - one = relay.const(1, dtype="float32") - return relay.Tuple([(left + one), (right - one), fc]) + data = sym.Variable('data', shape=dshape) + fc1 = sym.dense(data, units=dshape[-1]*2, use_bias=True) + left, right = sym.split(fc1, indices_or_sections=2, axis=1) + return sym.Group(((left + 1), (right - 1))) + +def _init_params(graph, input_shapes, initializer=init.Xavier(), seed=10): + if isinstance(graph, sym.Symbol): + graph = nnvm.graph.create(graph) + ishapes, _ = graph_util.infer_shape(graph, **input_shapes) + param_shapes = dict(zip(graph.index.input_names, ishapes)) + np.random.seed(seed) + params = {} + for param, shape in param_shapes.items(): + if param in {'data', 'label'} or not shape: + continue + init_value = np.empty(shape).astype('float32') + initializer(param, init_value) + params[param] = tvm.nd.array(init_value) + return params def main(): dshape = (32, 16) net = _get_model(dshape) - mod, params = testing.create_workload(net) - graph, lib, params = relay.build( - mod, 'llvm', params=params) + ishape_dict = {'data': dshape} + params = _init_params(net, ishape_dict) + graph, lib, params = nnvm.compiler.build(net, 'llvm', + shape=ishape_dict, + params=params, + dtype='float32') with open(osp.join(CWD, 'graph.json'), 'w') as f_resnet: - f_resnet.write(graph) + f_resnet.write(graph.json()) with open(osp.join(CWD, 'graph.params'), 'wb') as f_params: - f_params.write(relay.save_param_dict(params)) + f_params.write(nnvm.compiler.save_param_dict(params)) if __name__ == '__main__': main() diff --git a/rust/runtime/tests/test_nn/src/build_test_graph.py b/rust/runtime/tests/test_nn/src/build_test_graph.py deleted file mode 100755 index dd7621b921f7..000000000000 --- a/rust/runtime/tests/test_nn/src/build_test_graph.py +++ /dev/null @@ -1,54 +0,0 @@ -#!/usr/bin/env python3 -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Builds a simple graph for testing.""" - -from os import path as osp -import sys - -import numpy as np -import tvm -from tvm import relay -from tvm.relay import testing - - -def _get_model(dshape): - data = relay.var('data', shape=dshape) - fc = relay.nn.dense(data, relay.var("dense_weight"), units=dshape[-1]*2) - fc = relay.nn.bias_add(data, relay.var("dense_bias")) - left, right = relay.split(fc, indices_or_sections=2, axis=1) - one = relay.const(1, dtype="float32") - return relay.Tuple([(left + one), (right - one), fc]) - -def main(): - dshape = (4, 8) - net = _get_model(dshape) - mod, params = testing.create_workload(net) - graph, lib, params = relay.build( - mod, 'llvm --system-lib', params=params) - - out_dir = sys.argv[1] - lib.save(osp.join(sys.argv[1], 'graph.o')) - with open(osp.join(out_dir, 'graph.json'), 'w') as f_resnet: - f_resnet.write(graph) - - with open(osp.join(out_dir, 'graph.params'), 'wb') as f_params: - f_params.write(relay.save_param_dict(params)) - -if __name__ == '__main__': - main() diff --git a/rust/runtime/tests/test_nn/Cargo.toml b/rust/runtime/tests/test_nnvm/Cargo.toml similarity index 98% rename from rust/runtime/tests/test_nn/Cargo.toml rename to rust/runtime/tests/test_nnvm/Cargo.toml index afd218817104..93fdef4f61c3 100644 --- a/rust/runtime/tests/test_nn/Cargo.toml +++ b/rust/runtime/tests/test_nnvm/Cargo.toml @@ -16,7 +16,7 @@ # under the License. [package] -name = "test-nn" +name = "test-nnvm" version = "0.0.0" license = "Apache-2.0" authors = ["TVM Contributors"] diff --git a/rust/runtime/tests/test_nn/build.rs b/rust/runtime/tests/test_nnvm/build.rs similarity index 100% rename from rust/runtime/tests/test_nn/build.rs rename to rust/runtime/tests/test_nnvm/build.rs diff --git a/rust/runtime/tests/test_nnvm/src/build_test_graph.py b/rust/runtime/tests/test_nnvm/src/build_test_graph.py new file mode 100755 index 000000000000..69ec6d24dbef --- /dev/null +++ b/rust/runtime/tests/test_nnvm/src/build_test_graph.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python3 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Builds a simple NNVM graph for testing.""" + +from os import path as osp +import sys + +import nnvm +from nnvm import sym +from nnvm.compiler import graph_util +from nnvm.testing import init +import numpy as np +import tvm + + +def _get_model(dshape): + data = sym.Variable('data', shape=dshape) + fc = sym.dense(data, units=dshape[-1]*2, use_bias=True) + left, right = sym.split(fc, indices_or_sections=2, axis=1) + return sym.Group(((left + 1), (right - 1), fc)) + + +def _init_params(graph, input_shapes, initializer=init.Xavier(), seed=10): + if isinstance(graph, sym.Symbol): + graph = nnvm.graph.create(graph) + + ishapes, _ = graph_util.infer_shape(graph, **input_shapes) + param_shapes = dict(zip(graph.index.input_names, ishapes)) + np.random.seed(seed) + params = {} + for param, shape in param_shapes.items(): + if param in {'data', 'label'} or not shape: + continue + + init_value = np.arange(np.product(shape), 0, -1).reshape(*shape).astype('float32') + if param.endswith('_bias'): + params[param] = tvm.nd.array(init_value) + continue + + init_value = np.empty(shape).astype('float32') + initializer(param, init_value) + # init_value /= init_value.sum() + 1e-10 + params[param] = tvm.nd.array(init_value) + + return params + +def main(): + dshape = (4, 8) + net = _get_model(dshape) + ishape_dict = {'data': dshape} + params = _init_params(net, ishape_dict) + graph, lib, params = nnvm.compiler.build(net, 'llvm --system-lib', + shape=ishape_dict, + params=params, + dtype='float32') + + out_dir = sys.argv[1] + lib.save(osp.join(sys.argv[1], 'graph.o')) + with open(osp.join(out_dir, 'graph.json'), 'w') as f_resnet: + f_resnet.write(graph.json()) + + with open(osp.join(out_dir, 'graph.params'), 'wb') as f_params: + f_params.write(nnvm.compiler.save_param_dict(params)) + +if __name__ == '__main__': + main() diff --git a/rust/runtime/tests/test_nn/src/main.rs b/rust/runtime/tests/test_nnvm/src/main.rs similarity index 100% rename from rust/runtime/tests/test_nn/src/main.rs rename to rust/runtime/tests/test_nnvm/src/main.rs diff --git a/src/api/api_ir.cc b/src/api/api_ir.cc index 03f37b171782..9312c5532302 100644 --- a/src/api/api_ir.cc +++ b/src/api/api_ir.cc @@ -30,7 +30,7 @@ namespace tvm { namespace ir { TVM_REGISTER_API("_Var") -.set_body_typed([](std::string s, DataType t) { +.set_body_typed([](std::string s, Type t) { return Variable::make(t, s); }); @@ -75,7 +75,7 @@ TVM_REGISTER_API("make.For") TVM_REGISTER_API("make.Load") .set_body([](TVMArgs args, TVMRetValue *ret) { - DataType t = args[0]; + Type t = args[0]; if (args.size() == 3) { *ret = Load::make(t, args[1], args[2], const_true(t.lanes())); } else { @@ -87,7 +87,7 @@ TVM_REGISTER_API("make.Store") .set_body([](TVMArgs args, TVMRetValue *ret) { Expr value = args[1]; if (args.size() == 3) { - *ret = Store::make(args[0], value, args[2], const_true(value.dtype().lanes())); + *ret = Store::make(args[0], value, args[2], const_true(value.type().lanes())); } else { *ret = Store::make(args[0], value, args[2], args[3]); } @@ -97,8 +97,8 @@ TVM_REGISTER_API("make.Realize") .set_body_typed(Realize::make); TVM_REGISTER_API("make.Call") -.set_body_typed, int, FunctionRef, int)>([]( - DataType type, std::string name, +.set_body_typed, int, FunctionRef, int)>([]( + Type type, std::string name, Array args, int call_type, FunctionRef func, int value_index ) { @@ -166,8 +166,8 @@ TVM_REGISTER_API("make.Block") // has default args TVM_REGISTER_API("make.Allocate") - .set_body_typed, Expr, Stmt)>([]( - VarExpr buffer_var, DataType type, Array extents, Expr condition, Stmt body + .set_body_typed, Expr, Stmt)>([]( + VarExpr buffer_var, Type type, Array extents, Expr condition, Stmt body ){ return Allocate::make(buffer_var, type, extents, condition, body); }); diff --git a/src/api/api_lang.cc b/src/api/api_lang.cc index 8a74fe5cdb7d..f3d6c5f6ab62 100644 --- a/src/api/api_lang.cc +++ b/src/api/api_lang.cc @@ -35,10 +35,10 @@ namespace tvm { TVM_REGISTER_API("_min_value") -.set_body_typed(min_value); +.set_body_method(&DataType::min); TVM_REGISTER_API("_max_value") -.set_body_typed(max_value); +.set_body_method(&DataType::max); TVM_REGISTER_API("_const") .set_body([](TVMArgs args, TVMRetValue* ret) { @@ -67,7 +67,7 @@ TVM_REGISTER_API("_Array") } auto node = make_node(); node->data = std::move(data); - *ret = Array(node); + *ret = runtime::ObjectRef(node); }); TVM_REGISTER_API("_ArrayGetItem") @@ -100,28 +100,28 @@ TVM_REGISTER_API("_Map") for (int i = 0; i < args.num_args; i += 2) { CHECK(args[i].type_code() == kStr) << "key of str map need to be str"; - CHECK(args[i + 1].IsObjectRef()) + CHECK(args[i + 1].type_code() == kObjectHandle) << "value of the map to be NodeRef"; data.emplace(std::make_pair(args[i].operator std::string(), args[i + 1].operator ObjectRef())); } auto node = make_node(); node->data = std::move(data); - *ret = Map(node); + *ret = node; } else { // Container node. MapNode::ContainerType data; for (int i = 0; i < args.num_args; i += 2) { - CHECK(args[i].IsObjectRef()) - << "key of str map need to be object"; - CHECK(args[i + 1].IsObjectRef()) + CHECK(args[i].type_code() == kObjectHandle) + << "key of str map need to be str"; + CHECK(args[i + 1].type_code() == kObjectHandle) << "value of map to be NodeRef"; data.emplace(std::make_pair(args[i].operator ObjectRef(), args[i + 1].operator ObjectRef())); } auto node = make_node(); node->data = std::move(data); - *ret = Map(node); + *ret = node; } }); @@ -191,7 +191,7 @@ TVM_REGISTER_API("_MapItems") rkvs->data.push_back(kv.first); rkvs->data.push_back(kv.second); } - *ret = Array(rkvs); + *ret = rkvs; } else { auto* n = static_cast(ptr); auto rkvs = make_node(); @@ -199,7 +199,7 @@ TVM_REGISTER_API("_MapItems") rkvs->data.push_back(ir::StringImm::make(kv.first)); rkvs->data.push_back(kv.second); } - *ret = Array(rkvs); + *ret = rkvs; } }); @@ -287,8 +287,8 @@ TVM_REGISTER_API("_TensorHash") }); TVM_REGISTER_API("_Placeholder") -.set_body_typed, DataType, std::string)>([]( - Array shape, DataType dtype, std::string name +.set_body_typed, Type, std::string)>([]( + Array shape, Type dtype, std::string name ) { return placeholder(shape, dtype, name); }); diff --git a/src/api/api_pass.cc b/src/api/api_pass.cc index c62cc8ad16a0..4210788d52b5 100644 --- a/src/api/api_pass.cc +++ b/src/api/api_pass.cc @@ -159,7 +159,9 @@ REGISTER_PASS(InjectPrefetch); REGISTER_PASS(InjectDoubleBuffer); REGISTER_PASS(LoopPartition); REGISTER_PASS(RemoveNoOp); +REGISTER_PASS(SplitPipeline); REGISTER_PASS(LiftAttrScope); +REGISTER_PASS(NarrowChannelAccess); REGISTER_PASS(LowerThreadAllreduce); REGISTER_PASS(LowerWarpMemory); REGISTER_PASS(RemapThreadAxis); diff --git a/src/arithmetic/bound_deducer.cc b/src/arithmetic/bound_deducer.cc index 19f045241915..31fedcc72cde 100644 --- a/src/arithmetic/bound_deducer.cc +++ b/src/arithmetic/bound_deducer.cc @@ -132,7 +132,7 @@ class BoundDeducer: public IRVisitor { Expr target_var = left ? op->a : op->b; SignType sign_operand; - if (operand.dtype().is_uint()) { + if (operand.type().is_uint()) { sign_operand = kPositive; } else { sign_operand = expr_map_[operand].sign_type(); diff --git a/src/arithmetic/canonical_simplify.cc b/src/arithmetic/canonical_simplify.cc index 022dd8e94dbb..1b576a645824 100644 --- a/src/arithmetic/canonical_simplify.cc +++ b/src/arithmetic/canonical_simplify.cc @@ -115,7 +115,7 @@ class SplitExprNode : public CanonicalExprNode { Expr NormalizeWithScale(int64_t sscale) const { Expr res = this->index; - DataType dtype = this->dtype; + Type dtype = this->type; if (this->scale == 0) { return make_const(dtype, 0); } @@ -190,9 +190,9 @@ class SumExprNode : public CanonicalExprNode { Expr Normalize() const final { // quick path 1. if (this->args.size() == 0) { - return make_const(this->dtype, this->base); + return make_const(this->type, this->base); } - return Normalize_(this->dtype, + return Normalize_(this->type, SimplifySplitExprs(args), base); } @@ -379,7 +379,7 @@ class SumExprNode : public CanonicalExprNode { std::stable_sort(args.begin(), args.end(), fcompare); return args; } - static Expr Normalize_(DataType dtype, + static Expr Normalize_(Type dtype, const std::vector& args, int64_t base) { // Positive scales first @@ -508,7 +508,7 @@ class CanonicalSimplifier::Impl : public RewriteSimplifier::Impl { expr = op->Normalize(); } NodePtr n = make_node(); - n->dtype = expr.dtype(); + n->type = expr.type(); n->index = std::move(expr); n->div_mode = kTruncDiv; return SplitExpr(n); @@ -545,7 +545,7 @@ class CanonicalSimplifier::Impl : public RewriteSimplifier::Impl { return GetRef(op); } NodePtr n = make_node(); - n->dtype = expr.dtype(); + n->type = expr.type(); if (const auto* op = expr.as()) { n->base = op->value; return SumExpr(n); @@ -560,7 +560,7 @@ class CanonicalSimplifier::Impl : public RewriteSimplifier::Impl { Expr CanonicalSimplifier::Impl:: Mutate_(const Add* op, const Expr& self) { - if (!IsIndexType(op->dtype)) { + if (!IsIndexType(op->type)) { return Rewriter::Mutate_(op, self); } // normalize @@ -586,7 +586,7 @@ Mutate_(const Add* op, const Expr& self) { Expr CanonicalSimplifier::Impl:: Mutate_(const Sub* op, const Expr& self) { - if (!IsIndexType(op->dtype)) { + if (!IsIndexType(op->type)) { return Rewriter::Mutate_(op, self); } // normalize @@ -613,7 +613,7 @@ Mutate_(const Sub* op, const Expr& self) { Expr CanonicalSimplifier::Impl:: Mutate_(const Mul* op, const Expr& self) { - if (!IsIndexType(op->dtype)) { + if (!IsIndexType(op->type)) { return Rewriter::Mutate_(op, self); } // normalize @@ -657,8 +657,8 @@ SeparateDivisibleParts(const SumExprNode* psum, SumExpr* out_non_divisible) { auto divisible = make_node(); auto non_divisible = make_node(); - divisible->dtype = psum->dtype; - non_divisible->dtype = psum->dtype; + divisible->type = psum->type; + non_divisible->type = psum->type; if (psum->base % coeff == 0) { divisible->base = psum->base; @@ -698,11 +698,11 @@ SplitDivConst(SplitExpr lhs, int64_t cval, DivMode div_mode) { return lhs; } else if (lhs->upper_factor <= (lhs->lower_factor * scaled_cval)) { // (x % c1) / c2 => 0 when c2 >= c1 - return ToSplitExpr(make_zero(lhs.dtype())); + return ToSplitExpr(make_zero(lhs.type())); } else { // move the upper_factor modular into index. lhs.CopyOnWrite()->index = - ModImpl(lhs->index, make_const(lhs.dtype(), lhs->upper_factor), div_mode); + ModImpl(lhs->index, make_const(lhs.type(), lhs->upper_factor), div_mode); lhs.CopyOnWrite()->upper_factor = SplitExprNode::kPosInf; lhs.CopyOnWrite()->scale = 1; lhs.CopyOnWrite()->lower_factor *= scaled_cval; @@ -720,7 +720,7 @@ SplitDivConst(SplitExpr lhs, int64_t cval, DivMode div_mode) { Expr CanonicalSimplifier::Impl:: Mutate_(const Div* op, const Expr& self) { - if (!IsIndexType(op->dtype)) { + if (!IsIndexType(op->type)) { return Rewriter::Mutate_(op, self); } @@ -764,7 +764,7 @@ Mutate_(const Div* op, const Expr& self) { // if a >= 0 && a < cval, then result == 0 auto cbound = analyzer_->const_int_bound(Normalize(a)); if (cbound->min_value >= 0 && cbound->max_value < cval) { - return make_zero(a.dtype()); + return make_zero(a.type()); } } return SplitDivConst(ToSplitExpr(std::move(a)), cval, kTruncDiv); @@ -781,7 +781,7 @@ Mutate_(const Div* op, const Expr& self) { Expr CanonicalSimplifier::Impl:: Mutate_(const FloorDiv* op, const Expr& self) { - if (!IsIndexType(op->dtype)) { + if (!IsIndexType(op->type)) { return Rewriter::Mutate_(op, self); } Expr a = this->CanonicalMutate(op->a); @@ -820,7 +820,7 @@ Mutate_(const FloorDiv* op, const Expr& self) { // if a >= 0 && a < cval, then result == 0 auto cbound = analyzer_->const_int_bound(Normalize(a)); if (cbound->min_value >= 0 && cbound->max_value < cval) { - return make_zero(a.dtype()); + return make_zero(a.type()); } } return SplitDivConst(ToSplitExpr(std::move(a)), cval, kFloorDiv); @@ -859,7 +859,7 @@ SplitModConst(SplitExpr lhs, int64_t cval, DivMode div_mode) { if (new_upper_factor < lhs->upper_factor && lhs->upper_factor != SplitExprNode::kPosInf) { auto updated = ToSplitExpr(Mutate(ModImpl( - lhs->index, make_const(lhs.dtype(), new_upper_factor), div_mode))); + lhs->index, make_const(lhs.type(), new_upper_factor), div_mode))); // re-apply the lower_factor if (lhs->lower_factor != 1) { return SplitDivConst(updated, lhs->lower_factor, div_mode); @@ -887,7 +887,7 @@ SplitModConst(SplitExpr lhs, int64_t cval, DivMode div_mode) { Expr CanonicalSimplifier::Impl:: Mutate_(const Mod* op, const Expr& self) { - if (!IsIndexType(op->dtype)) { + if (!IsIndexType(op->type)) { return Rewriter::Mutate_(op, self); } // normalize @@ -906,7 +906,7 @@ Mutate_(const Mod* op, const Expr& self) { SumExpr lhs, extra; SeparateDivisibleParts(psum, cval, &lhs, &extra); if (extra->IsZero()) { - return make_zero(a.dtype()); + return make_zero(a.type()); } // both lhs and extra are non-negative if (analyzer_->CanProveGreaterEqual(lhs->Normalize(), 0) && @@ -957,7 +957,7 @@ Mutate_(const Mod* op, const Expr& self) { Expr CanonicalSimplifier::Impl:: Mutate_(const FloorMod* op, const Expr& self) { - if (!IsIndexType(op->dtype)) { + if (!IsIndexType(op->type)) { return Rewriter::Mutate_(op, self); } // normalize diff --git a/src/arithmetic/compute_expr.h b/src/arithmetic/compute_expr.h index 806587ab75aa..4b001cfb8610 100644 --- a/src/arithmetic/compute_expr.h +++ b/src/arithmetic/compute_expr.h @@ -56,7 +56,7 @@ inline Expr ComputeReduce( const Array& values, Expr empty_value); inline bool GetConst(Expr e, int64_t* out) { - if (e.dtype().is_vector()) return false; + if (e.type().is_vector()) return false; const int64_t* v = as_const_int(e); if (v) { *out = *v; return true; diff --git a/src/arithmetic/const_fold.h b/src/arithmetic/const_fold.h index 93bf708a113f..86f1927f2abe 100644 --- a/src/arithmetic/const_fold.h +++ b/src/arithmetic/const_fold.h @@ -70,7 +70,7 @@ inline Expr TryConstFold(Expr a); * \param type The type to represent index. * \return the checked result. */ -inline bool IsIndexType(const DataType& type) { +inline bool IsIndexType(const Type& type) { return type.is_int() && type.lanes() == 1 && (type.bits() == 32 || type.bits() == 64); } @@ -92,8 +92,8 @@ inline bool IsIndexType(const DataType& type) { using ir::UIntImm; \ const IntImm* pa = a.as(); \ const IntImm* pb = b.as(); \ - const DataType& ta = a.dtype(); \ - const DataType& tb = b.dtype(); \ + const Type& ta = a.type(); \ + const Type& tb = b.type(); \ if (arith::IsIndexType(ta) && arith::IsIndexType(tb)) { \ BODY; \ } \ @@ -103,7 +103,7 @@ inline bool IsIndexType(const DataType& type) { template<> inline Expr TryConstFold(Expr a, Expr b) { TVM_ARITH_CONST_PROPAGATION({ - const DataType& rtype = a.dtype(); + const Type& rtype = a.type(); if (pa && pb) return IntImm::make(rtype, pa->value + pb->value); if (pa && pa->value == 0) return b; if (pb && pb->value == 0) return a; @@ -117,7 +117,7 @@ inline Expr TryConstFold(Expr a, Expr b) { template<> inline Expr TryConstFold(Expr a, Expr b) { TVM_ARITH_CONST_PROPAGATION({ - const DataType& rtype = a.dtype(); + const Type& rtype = a.type(); if (pa && pb) return IntImm::make(rtype, pa->value - pb->value); if (pb && pb->value == 0) return a; if (fa && fb) return FloatImm::make(rtype, fa->value - fb->value); @@ -129,7 +129,7 @@ inline Expr TryConstFold(Expr a, Expr b) { template<> inline Expr TryConstFold(Expr a, Expr b) { TVM_ARITH_CONST_PROPAGATION({ - const DataType& rtype = a.dtype(); + const Type& rtype = a.type(); if (pa && pb) return IntImm::make(rtype, pa->value * pb->value); if (pa) { if (pa->value == 1) return b; @@ -155,7 +155,7 @@ inline Expr TryConstFold(Expr a, Expr b) { template<> inline Expr TryConstFold(Expr a, Expr b) { TVM_ARITH_CONST_PROPAGATION({ - const DataType& rtype = a.dtype(); + const Type& rtype = a.type(); if (pa && pb) { // due to division and mod can have different modes // NOTE: this will assumes truc div. @@ -184,7 +184,7 @@ inline Expr TryConstFold(Expr a, Expr b) { template<> inline Expr TryConstFold(Expr a, Expr b) { TVM_INDEX_CONST_PROPAGATION({ - const DataType& rtype = a.dtype(); + const Type& rtype = a.type(); if (pa && pb) { return IntImm::make(rtype, pa->value % pb->value); } @@ -202,7 +202,7 @@ inline Expr TryConstFold(Expr a, Expr b) { template<> inline Expr TryConstFold(Expr a, Expr b) { TVM_ARITH_CONST_PROPAGATION({ - const DataType& rtype = a.dtype(); + const Type& rtype = a.type(); if (pa && pb) { CHECK_NE(pb->value, 0) << "Divide by zero"; return IntImm::make(rtype, arith::floordiv(pa->value, pb->value)); @@ -229,7 +229,7 @@ inline Expr TryConstFold(Expr a, Expr b) { template<> inline Expr TryConstFold(Expr a, Expr b) { TVM_INDEX_CONST_PROPAGATION({ - const DataType& rtype = a.dtype(); + const Type& rtype = a.type(); if (pa && pb) { return IntImm::make(rtype, arith::floormod(pa->value, pb->value)); } @@ -247,7 +247,7 @@ inline Expr TryConstFold(Expr a, Expr b) { template<> inline Expr TryConstFold(Expr a, Expr b) { TVM_ARITH_CONST_PROPAGATION({ - const DataType& rtype = a.dtype(); + const Type& rtype = a.type(); if (pa && pb) return IntImm::make(rtype, std::min(pa->value, pb->value)); if (fa && fb) return FloatImm::make(rtype, std::min(fa->value, fb->value)); }); @@ -258,7 +258,7 @@ inline Expr TryConstFold(Expr a, Expr b) { template<> inline Expr TryConstFold(Expr a, Expr b) { TVM_ARITH_CONST_PROPAGATION({ - const DataType& rtype = a.dtype(); + const Type& rtype = a.type(); if (pa && pb) return IntImm::make(rtype, std::max(pa->value, pb->value)); if (fa && fb) return FloatImm::make(rtype, std::max(fa->value, fb->value)); }); @@ -269,8 +269,8 @@ inline Expr TryConstFold(Expr a, Expr b) { template<> inline Expr TryConstFold(Expr a, Expr b) { TVM_ARITH_CONST_PROPAGATION({ - if (pa && pb) return UIntImm::make(DataType::UInt(1), pa->value > pb->value); - if (fa && fb) return UIntImm::make(DataType::UInt(1), fa->value > fb->value); + if (pa && pb) return UIntImm::make(UInt(1), pa->value > pb->value); + if (fa && fb) return UIntImm::make(UInt(1), fa->value > fb->value); }); return Expr(); } @@ -278,8 +278,8 @@ inline Expr TryConstFold(Expr a, Expr b) { template<> inline Expr TryConstFold(Expr a, Expr b) { TVM_ARITH_CONST_PROPAGATION({ - if (pa && pb) return UIntImm::make(DataType::UInt(1), pa->value >= pb->value); - if (fa && fb) return UIntImm::make(DataType::UInt(1), fa->value >= fb->value); + if (pa && pb) return UIntImm::make(UInt(1), pa->value >= pb->value); + if (fa && fb) return UIntImm::make(UInt(1), fa->value >= fb->value); }); return Expr(); } @@ -287,8 +287,8 @@ inline Expr TryConstFold(Expr a, Expr b) { template<> inline Expr TryConstFold(Expr a, Expr b) { TVM_ARITH_CONST_PROPAGATION({ - if (pa && pb) return UIntImm::make(DataType::UInt(1), pa->value < pb->value); - if (fa && fb) return UIntImm::make(DataType::UInt(1), fa->value < fb->value); + if (pa && pb) return UIntImm::make(UInt(1), pa->value < pb->value); + if (fa && fb) return UIntImm::make(UInt(1), fa->value < fb->value); }); return Expr(); } @@ -296,8 +296,8 @@ inline Expr TryConstFold(Expr a, Expr b) { template<> inline Expr TryConstFold(Expr a, Expr b) { TVM_ARITH_CONST_PROPAGATION({ - if (pa && pb) return UIntImm::make(DataType::UInt(1), pa->value <= pb->value); - if (fa && fb) return UIntImm::make(DataType::UInt(1), fa->value <= fb->value); + if (pa && pb) return UIntImm::make(UInt(1), pa->value <= pb->value); + if (fa && fb) return UIntImm::make(UInt(1), fa->value <= fb->value); }); return Expr(); } @@ -305,8 +305,8 @@ inline Expr TryConstFold(Expr a, Expr b) { template<> inline Expr TryConstFold(Expr a, Expr b) { TVM_ARITH_CONST_PROPAGATION({ - if (pa && pb) return UIntImm::make(DataType::UInt(1), pa->value == pb->value); - if (fa && fb) return UIntImm::make(DataType::UInt(1), fa->value == fb->value); + if (pa && pb) return UIntImm::make(UInt(1), pa->value == pb->value); + if (fa && fb) return UIntImm::make(UInt(1), fa->value == fb->value); }); return Expr(); } @@ -314,8 +314,8 @@ inline Expr TryConstFold(Expr a, Expr b) { template<> inline Expr TryConstFold(Expr a, Expr b) { TVM_ARITH_CONST_PROPAGATION({ - if (pa && pb) return UIntImm::make(DataType::UInt(1), pa->value != pb->value); - if (fa && fb) return UIntImm::make(DataType::UInt(1), fa->value != fb->value); + if (pa && pb) return UIntImm::make(UInt(1), pa->value != pb->value); + if (fa && fb) return UIntImm::make(UInt(1), fa->value != fb->value); }); return Expr(); } @@ -349,7 +349,7 @@ inline Expr TryConstFold(Expr a) { using ir::UIntImm; const UIntImm* pa = a.as(); if (pa) { - return UIntImm::make(DataType::UInt(1), !(pa->value)); + return UIntImm::make(UInt(1), !(pa->value)); } return Expr(); } diff --git a/src/arithmetic/const_int_bound.cc b/src/arithmetic/const_int_bound.cc index c0519107d5b8..6e119695a8c8 100644 --- a/src/arithmetic/const_int_bound.cc +++ b/src/arithmetic/const_int_bound.cc @@ -125,7 +125,7 @@ class ConstIntBoundAnalyzer::Impl : // Override visitor behaviors Entry VisitExprDefault_(const Node* op) final { return Everything( - static_cast(op)->dtype); + static_cast(op)->type); } Entry VisitExpr(const Expr& expr) final { @@ -142,7 +142,7 @@ class ConstIntBoundAnalyzer::Impl : Entry VisitExpr_(const Cast* op) final { Entry a = VisitExpr(op->value); - Entry b = Everything(op->dtype); + Entry b = Everything(op->type); return Intersect(a, b); } @@ -154,7 +154,7 @@ class ConstIntBoundAnalyzer::Impl : if (op->value <= static_cast(kPosInf)) { return MakeBound(op->value, op->value); } else { - return Everything(op->dtype); + return Everything(op->type); } } @@ -211,7 +211,7 @@ class ConstIntBoundAnalyzer::Impl : CHECK(!b.is_const(0)) << "mod by zero"; // mod by negative value is rare, // and we just use the simpliest rule. - return Everything(op->dtype); + return Everything(op->type); } } @@ -242,7 +242,7 @@ class ConstIntBoundAnalyzer::Impl : CHECK(!b.is_const(0)) << "floormod by zero"; // mod by negative value is rare, // and we just use the simpliest rule. - return Everything(op->dtype); + return Everything(op->type); } } @@ -278,7 +278,7 @@ class ConstIntBoundAnalyzer::Impl : } else if (op->is_intrinsic(Call::bitwise_and)) { return VisitBitwiseAnd(op); } else { - return Everything(op->dtype); + return Everything(op->type); } } @@ -288,7 +288,7 @@ class ConstIntBoundAnalyzer::Impl : if (it != var_map_.end()) { return it->second; } else { - return Everything(op->dtype); + return Everything(op->type); } } @@ -311,7 +311,7 @@ class ConstIntBoundAnalyzer::Impl : if (a.min_value >= 0) { return MakeBound(0, a.max_value); } - return Everything(op->dtype); + return Everything(op->type); } } @@ -466,7 +466,7 @@ class ConstIntBoundAnalyzer::Impl : * \param dtype The data type. * \return Bound that represent everything dtype can represent. */ - static Entry Everything(DataType dtype) { + static Entry Everything(Type dtype) { if (!dtype.is_int() && !dtype.is_uint()) { return MakeBound(kNegInf, kPosInf); } diff --git a/src/arithmetic/detect_linear_equation.cc b/src/arithmetic/detect_linear_equation.cc index cf37545502ba..8c7f4f2bb738 100644 --- a/src/arithmetic/detect_linear_equation.cc +++ b/src/arithmetic/detect_linear_equation.cc @@ -53,10 +53,10 @@ class LinearEqDetector *ret = VisitExpr(e, e); if (fail_) return false; if (!ret->base.defined()) { - ret->base = make_zero(var_.dtype()); + ret->base = make_zero(var_.type()); } if (!ret->coeff.defined()) { - ret->coeff = make_zero(var_.dtype()); + ret->coeff = make_zero(var_.type()); } return true; } @@ -100,7 +100,7 @@ class LinearEqDetector LinearEqEntry VisitExpr_(const Variable* op, const Expr& e) final { LinearEqEntry ret; if (op == var_.get()) { - ret.coeff = make_const(op->dtype, 1); + ret.coeff = make_const(op->type, 1); } else { ret.base = e; } @@ -190,16 +190,16 @@ bool DetectClipBound( // canonical form: exp >= 0 Expr canonical; if (const LT* op = cond.as()) { - if (!op->a.dtype().is_int()) return false; - canonical = op->b - op->a - make_const(op->a.dtype(), 1); + if (!op->a.type().is_int()) return false; + canonical = op->b - op->a - make_const(op->a.type(), 1); } else if (const LE* op = cond.as()) { - if (!op->a.dtype().is_int()) return false; + if (!op->a.type().is_int()) return false; canonical = op->b - op->a; } else if (const GT* op = cond.as()) { - if (!op->a.dtype().is_int()) return false; - canonical = op->a - op->b - make_const(op->a.dtype(), 1); + if (!op->a.type().is_int()) return false; + canonical = op->a - op->b - make_const(op->a.type(), 1); } else if (const GE* op = cond.as()) { - if (!op->a.dtype().is_int()) return false; + if (!op->a.type().is_int()) return false; canonical = op->a - op->b; } else { return false; diff --git a/src/arithmetic/domain_touched.cc b/src/arithmetic/domain_touched.cc index 947f0050c6cb..c28346ed2e33 100644 --- a/src/arithmetic/domain_touched.cc +++ b/src/arithmetic/domain_touched.cc @@ -72,7 +72,7 @@ class FuncTouchedDomain final : public IRVisitor { const IterVarNode* thread_axis = op->node.as(); CHECK(thread_axis); const Variable* var = thread_axis->var.get(); - dom_map_[var] = IntSet::range(Range(make_zero(op->value.dtype()), op->value)); + dom_map_[var] = IntSet::range(Range(make_zero(op->value.type()), op->value)); IRVisitor::Visit_(op); dom_map_.erase(var); } else { diff --git a/src/arithmetic/int_set.cc b/src/arithmetic/int_set.cc index e4f2042a19d7..9f8effb6c612 100644 --- a/src/arithmetic/int_set.cc +++ b/src/arithmetic/int_set.cc @@ -33,8 +33,8 @@ namespace tvm { namespace arith { -Expr SymbolicLimits::pos_inf_ = Var("pos_inf", DataType::Handle()); -Expr SymbolicLimits::neg_inf_ = Var("neg_inf", DataType::Handle()); +Expr SymbolicLimits::pos_inf_ = Var("pos_inf", Handle()); +Expr SymbolicLimits::neg_inf_ = Var("neg_inf", Handle()); IntervalSet::IntervalSet(Expr min_value, Expr max_value) { auto node = make_node(); @@ -54,8 +54,8 @@ TVM_REGISTER_API("arith._make_IntervalSet") IntervalSet Intersect(Analyzer* analyzer, IntervalSet a, IntervalSet b) { Expr max_value = min(a->max_value, b->max_value); Expr min_value = max(a->min_value, b->min_value); - if ((max_value.dtype().is_int() || max_value.dtype().is_uint()) && - (min_value.dtype().is_int() || min_value.dtype().is_uint()) && + if ((max_value.type().is_int() || max_value.type().is_uint()) && + (min_value.type().is_int() || min_value.type().is_uint()) && analyzer->CanProveGreaterEqual(min_value - max_value, 1)) { return IntervalSet::Empty(); } else { @@ -105,8 +105,8 @@ inline IntervalSet Combine(Analyzer* analyzer, return IntervalSet::SinglePoint(res); } if (is_logical_op::value) { - return IntervalSet(make_const(a->min_value.dtype(), 0), - make_const(a->min_value.dtype(), 1)); + return IntervalSet(make_const(a->min_value.type(), 0), + make_const(a->min_value.type(), 1)); } if (a->IsEmpty()) return a; if (b->IsEmpty()) return b; @@ -177,7 +177,7 @@ inline IntervalSet Combine(Analyzer* analyzer, return IntervalSet(min_value, max_value); } else if (a->HasUpperBound() && a->HasLowerBound()) { using ir::Select; - Expr sign = b->min_value >= make_zero(b->min_value.dtype().element_of()); + Expr sign = b->min_value >= make_zero(b->min_value.type().element_of()); Expr e1 = a->min_value * b->min_value; Expr e2 = a->max_value * b->min_value; return IntervalSet(Select::make(sign, e1, e2), Select::make(sign, e2, e1)); @@ -212,7 +212,7 @@ inline IntervalSet Combine(Analyzer* analyzer, return IntervalSet(min_value, max_value); } else if (a->HasUpperBound() && a->HasLowerBound()) { using ir::Select; - Expr sign = b->min_value >= make_zero(b->min_value.dtype().element_of()); + Expr sign = b->min_value >= make_zero(b->min_value.type().element_of()); Expr e1 = a->min_value / b->min_value; Expr e2 = a->max_value / b->min_value; return IntervalSet(Select::make(sign, e1, e2), Select::make(sign, e2, e1)); @@ -242,7 +242,7 @@ inline IntervalSet Combine(Analyzer* analyzer, // is the case of our application. // TODO(tqchen): add bound constraints for a. if (analyzer->CanProveGreaterEqual(divisor, 0)) { - return IntervalSet(make_zero(divisor.dtype()), divisor - 1); + return IntervalSet(make_zero(divisor.type()), divisor - 1); } else { Expr bound = abs(divisor) - 1; return IntervalSet(-bound, bound); @@ -278,7 +278,7 @@ inline IntervalSet Combine(Analyzer* analyzer, return IntervalSet(min_value, max_value); } else if (a->HasUpperBound() && a->HasLowerBound()) { using ir::Select; - Expr sign = b->min_value >= make_zero(b->min_value.dtype().element_of()); + Expr sign = b->min_value >= make_zero(b->min_value.type().element_of()); Expr e1 = floordiv(a->min_value, b->min_value); Expr e2 = floordiv(a->max_value, b->min_value); return IntervalSet(Select::make(sign, e1, e2), Select::make(sign, e2, e1)); @@ -304,7 +304,7 @@ inline IntervalSet Combine(Analyzer* analyzer, LOG(FATAL) << "Modular by zero in CombineInterval Mod"; } if (analyzer->CanProveGreaterEqual(divisor, 0)) { - return IntervalSet(make_zero(divisor.dtype()), divisor - 1); + return IntervalSet(make_zero(divisor.type()), divisor - 1); } else { Expr bound = abs(divisor) - 1; return IntervalSet(-bound, bound); @@ -476,7 +476,7 @@ class IntervalSetEvaluator : IntervalSet base = Eval(op->base); PVar stride; if (stride.Match(op->stride)) { - DataType t = op->base.dtype(); + Type t = op->base.type(); int64_t vstride = stride.Eval()->value; if (vstride> 0) { return Combine( diff --git a/src/arithmetic/ir_mutator_with_analyzer.cc b/src/arithmetic/ir_mutator_with_analyzer.cc index 0d4b8f26b18b..cda9d585ace1 100644 --- a/src/arithmetic/ir_mutator_with_analyzer.cc +++ b/src/arithmetic/ir_mutator_with_analyzer.cc @@ -140,7 +140,7 @@ Mutate_(const Call* op, const Expr& self) { false_value.same_as(op->args[2])) { return self; } else { - return Call::make(op->dtype, op->name, + return Call::make(op->type, op->name, {cond, true_value, false_value}, op->call_type); } diff --git a/src/arithmetic/pattern_match.h b/src/arithmetic/pattern_match.h index fd07a377e955..f7d5483cf6de 100644 --- a/src/arithmetic/pattern_match.h +++ b/src/arithmetic/pattern_match.h @@ -291,7 +291,7 @@ class PConstWithTypeLike : } Expr Eval() const { - return make_const(ref_.Eval().dtype(), value_); + return make_const(ref_.Eval().type(), value_); } private: @@ -474,7 +474,7 @@ class PCastExpr : bool Match_(const NodeRef& node) const { if (const ir::Cast* ptr = node.as()) { - if (!dtype_.Match_(ptr->dtype)) return false; + if (!dtype_.Match_(ptr->type)) return false; if (!value_.Match_(ptr->value)) return false; return true; } else { @@ -730,7 +730,7 @@ class PCallExpr : #define TVM_PATTERN_BINARY_INTRIN(FuncName, OpName, IntrinStr) \ struct OpName { \ static Expr Eval(Array args) { \ - return ir::Call::make(args[0].dtype(), kName, args, \ + return ir::Call::make(args[0].type(), kName, args, \ ir::Call::PureIntrinsic); \ } \ static constexpr const char* kName = IntrinStr; \ @@ -751,7 +751,7 @@ TVM_PATTERN_BINARY_INTRIN(operator^, PBitwiseXorOp, "bitwise_xor"); #define TVM_PATTERN_UNARY_INTRIN(FuncName, OpName, IntrinStr) \ struct OpName { \ static Expr Eval(Array args) { \ - return ir::Call::make(args[0].dtype(), kName, args, \ + return ir::Call::make(args[0].type(), kName, args, \ ir::Call::PureIntrinsic); \ } \ static constexpr const char* kName = IntrinStr; \ @@ -768,7 +768,7 @@ TVM_PATTERN_UNARY_INTRIN(operator~, PBitwiseNotOp, "bitwise_not"); struct PIfThenElseOp { static Expr Eval(Array args) { return ir::Call::make( - args[1].dtype(), kName, args, + args[1].type(), kName, args, ir::Call::PureIntrinsic); } static constexpr const char* kName = "tvm_if_then_else"; diff --git a/src/arithmetic/rewrite_simplify.cc b/src/arithmetic/rewrite_simplify.cc index 235306cc7bf8..b26f8335055a 100644 --- a/src/arithmetic/rewrite_simplify.cc +++ b/src/arithmetic/rewrite_simplify.cc @@ -129,7 +129,7 @@ Mutate_(const Add* op, const Expr& self) { // Pattern var for lanes in broadcast and ramp PVar lanes; // Vector rules - if (op->dtype.lanes() != 1) { + if (op->type.lanes() != 1) { TVM_TRY_REWRITE(ramp(b1, s1, lanes) + ramp(b2, s2, lanes), ramp(b1 + b2, s1 + s2, lanes)); TVM_TRY_REWRITE(ramp(b1, s1, lanes) + broadcast(x, lanes), @@ -140,7 +140,7 @@ Mutate_(const Add* op, const Expr& self) { broadcast(x + y, lanes)); } - if (IsIndexType(op->dtype)) { + if (IsIndexType(op->type)) { // Index rules // cancelation rules TVM_TRY_REWRITE((x - y) + y, x); @@ -244,7 +244,7 @@ Mutate_(const Sub* op, const Expr& self) { // Pattern var for lanes in broadcast and ramp PVar lanes; // Vector rules - if (op->dtype.lanes() != 1) { + if (op->type.lanes() != 1) { TVM_TRY_REWRITE(ramp(b1, s1, lanes) - ramp(b2, s2, lanes), ramp(b1 - b2, s1 - s2, lanes)); TVM_TRY_REWRITE(ramp(b1, s1, lanes) - broadcast(x, lanes), @@ -255,7 +255,7 @@ Mutate_(const Sub* op, const Expr& self) { broadcast(x - y, lanes)); } - if (IsIndexType(op->dtype)) { + if (IsIndexType(op->type)) { // Index rules // cancelation rules TVM_TRY_REWRITE((x + y) - y, x); @@ -443,7 +443,7 @@ Mutate_(const Mul* op, const Expr& self) { // Pattern var for lanes in broadcast and ramp PVar lanes; // Vector rules - if (op->dtype.lanes() != 1) { + if (op->type.lanes() != 1) { TVM_TRY_REWRITE(broadcast(x, lanes) * broadcast(y, lanes), broadcast(x * y, lanes)); TVM_TRY_REWRITE(ramp(b1, s1, lanes) * broadcast(x, lanes), @@ -452,7 +452,7 @@ Mutate_(const Mul* op, const Expr& self) { ramp(b1 * x, s1 * x, lanes)); } - if (IsIndexType(op->dtype)) { + if (IsIndexType(op->type)) { // constant simplification rule TVM_TRY_REWRITE((x + c1) * c2, x * c2 + c1 * c2); TVM_TRY_REWRITE((x * c1) * c2, x * (c1 * c2)); @@ -484,12 +484,12 @@ Mutate_(const Div* op, const Expr& self) { // x / 2.0 = x * 0.5 if (const FloatImm* ptr = op->b.as()) { - CHECK(op->dtype.is_float()); - return op->a * make_const(op->b.dtype(), 1.0 / ptr->value); + CHECK(op->type.is_float()); + return op->a * make_const(op->b.type(), 1.0 / ptr->value); } // Vector rules - if (op->dtype.lanes() != 1) { + if (op->type.lanes() != 1) { // NOTE: use div as the pattern also works for float. TVM_TRY_REWRITE(div(broadcast(x, lanes), broadcast(y, lanes)), broadcast(div(x, y), lanes)); @@ -512,7 +512,7 @@ Mutate_(const Div* op, const Expr& self) { } } - if (IsIndexType(op->dtype)) { + if (IsIndexType(op->type)) { // Be-aware of the division rules: // We adopt the default C division uses truncation instead of floordiv. // This means most rules need to check non-negativeness of the operands. @@ -524,7 +524,7 @@ Mutate_(const Div* op, const Expr& self) { if (truncdiv(c1, c2).Match(ret)) { int64_t c1val = c1.Eval()->value; int64_t c2val = c2.Eval()->value; - return make_const(op->dtype, truncdiv(c1val, c2val)); + return make_const(op->type, truncdiv(c1val, c2val)); } // while it is always true for trunc div @@ -706,7 +706,7 @@ Mutate_(const Mod* op, const Expr& self) { PVar lanes; // Vector rules - if (op->dtype.lanes() != 1) { + if (op->type.lanes() != 1) { TVM_TRY_REWRITE(truncmod(broadcast(x, lanes), broadcast(y, lanes)), broadcast(truncmod(x, y), lanes)); @@ -734,7 +734,7 @@ Mutate_(const Mod* op, const Expr& self) { } } - if (IsIndexType(op->dtype)) { + if (IsIndexType(op->type)) { // Be-aware of the division rules: // We adopt the default C division uses truncation instead of floordiv. // This means most rules need to check non-negativeness of the operands. @@ -762,10 +762,9 @@ Mutate_(const Mod* op, const Expr& self) { // canonicalization: x % c == x % (-c) for truncated division // NOTE: trunc div required - TVM_TRY_RECURSIVE_REWRITE_IF( - truncmod(x, c1), - truncmod(x, PConst(make_const(op->dtype, -c1.Eval()->value))), - c1.Eval()->value < 0); + TVM_TRY_RECURSIVE_REWRITE_IF(truncmod(x, c1), + truncmod(x, PConst(make_const(op->type, -c1.Eval()->value))), + c1.Eval()->value < 0); // try modular analysis if (truncmod(x, c1).Match(ret)) { @@ -795,7 +794,7 @@ Mutate_(const FloorDiv* op, const Expr& self) { PVar lanes; // Vector rules - if (op->dtype.lanes() != 1) { + if (op->type.lanes() != 1) { TVM_TRY_REWRITE(floordiv(broadcast(x, lanes), broadcast(y, lanes)), broadcast(floordiv(x, y), lanes)); // ramp // bcast @@ -815,7 +814,7 @@ Mutate_(const FloorDiv* op, const Expr& self) { } } - if (IsIndexType(op->dtype)) { + if (IsIndexType(op->type)) { // Be-aware of the division rules: this is floor division. TVM_TRY_REWRITE_IF(floordiv(floordiv(x, c1), c2), floordiv(x, c1 * c2), c1.Eval()->value > 0 && c2.Eval()->value > 0); @@ -940,7 +939,7 @@ Mutate_(const FloorMod* op, const Expr& self) { PVar lanes; // Vector rules - if (op->dtype.lanes() != 1) { + if (op->type.lanes() != 1) { TVM_TRY_REWRITE(floormod(broadcast(x, lanes), broadcast(y, lanes)), broadcast(floormod(x, y), lanes)); @@ -965,7 +964,7 @@ Mutate_(const FloorMod* op, const Expr& self) { } } - if (IsIndexType(op->dtype)) { + if (IsIndexType(op->type)) { // Be-aware of the division rules: we use floordiv/floormod here TVM_TRY_REWRITE_IF(floormod(x * c1, c2), ZeroWithTypeLike(x), c2.Eval()->value != 0 && @@ -1009,13 +1008,13 @@ Mutate_(const Min* op, const Expr& self) { PVar lanes; // vector rule - if (op->dtype.lanes() != 1) { + if (op->type.lanes() != 1) { TVM_TRY_REWRITE(min(broadcast(x, lanes), broadcast(y, lanes)), broadcast(min(x, y), lanes)); TVM_TRY_REWRITE(min(min(x, broadcast(y, lanes)), broadcast(z, lanes)), min(x, broadcast(min(y, z), lanes))); } - if (IsIndexType(op->dtype)) { + if (IsIndexType(op->type)) { TVM_TRY_REWRITE(min(x, x), x); // constant int bound @@ -1194,13 +1193,13 @@ Mutate_(const Max* op, const Expr& self) { PVar lanes; // vector rule - if (op->dtype.lanes() != 1) { + if (op->type.lanes() != 1) { TVM_TRY_REWRITE(max(broadcast(x, lanes), broadcast(y, lanes)), broadcast(max(x, y), lanes)); TVM_TRY_REWRITE(max(max(x, broadcast(y, lanes)), broadcast(z, lanes)), max(x, broadcast(max(y, z), lanes))); } - if (IsIndexType(op->dtype)) { + if (IsIndexType(op->type)) { TVM_TRY_REWRITE(max(x, x), x); // constant int bound @@ -1367,17 +1366,17 @@ Mutate_(const EQ* op, const Expr& self) { PVar lanes; // vector rule - if (op->dtype.lanes() != 1) { + if (op->type.lanes() != 1) { TVM_TRY_REWRITE(broadcast(x, lanes) == broadcast(y, lanes), broadcast(x == y, lanes)); } - if (IsIndexType(op->a.dtype())) { + if (IsIndexType(op->a.type())) { CompareResult result = TryCompare(op->a - op->b, 0); if (result == kEQ) { - return make_const(op->dtype, true); + return make_const(op->type, true); } else if (result == kNE || result == kGT || result == kLT) { - return make_const(op->dtype, false); + return make_const(op->type, false); } TVM_TRY_REWRITE(x - c1 == 0, x == c1); TVM_TRY_REWRITE(c1 - x == 0, x == c1); @@ -1421,20 +1420,20 @@ Mutate_(const LT* op, const Expr& self) { PVar lanes; // vector rule - if (op->dtype.lanes() != 1) { + if (op->type.lanes() != 1) { TVM_TRY_REWRITE(broadcast(x, lanes) < broadcast(y, lanes), broadcast(x < y, lanes)); TVM_TRY_REWRITE(ramp(x, s1, lanes) < ramp(y, s1, lanes), broadcast(x < y, lanes)); } - if (IsIndexType(op->a.dtype())) { + if (IsIndexType(op->a.type())) { CompareResult result = TryCompare(op->a - op->b, 0); if (result == kLT) { - return make_const(op->dtype, true); + return make_const(op->type, true); } if (result == kEQ || result == kGT || result == kGE) { - return make_const(op->dtype, false); + return make_const(op->type, false); } TVM_TRY_REWRITE(x + y < x + z, y < z); @@ -1572,7 +1571,7 @@ Mutate_(const Not* op, const Expr& self) { // Pattern var to match any expression PVar x, y; PVar lanes; - if (op->dtype.lanes() != 1) { + if (op->type.lanes() != 1) { TVM_TRY_REWRITE(!broadcast(x, lanes), broadcast(!x, lanes)); } @@ -1601,12 +1600,12 @@ Mutate_(const And* op, const Expr& self) { PVar c1, c2; PVar lanes; - if (op->dtype.lanes() != 1) { + if (op->type.lanes() != 1) { TVM_TRY_REWRITE(broadcast(x, lanes) && broadcast(y, lanes), broadcast(x && y, lanes)); } - auto cfalse = PConst(make_const(op->dtype, false)); + auto cfalse = PConst(make_const(op->type, false)); TVM_TRY_REWRITE(x == y && x != y, cfalse); TVM_TRY_REWRITE(x != y && x == y, cfalse); TVM_TRY_REWRITE(x && !x, cfalse); @@ -1650,12 +1649,12 @@ Mutate_(const Or* op, const Expr& self) { PVar c1, c2; PVar lanes; - if (op->dtype.lanes() != 1) { + if (op->type.lanes() != 1) { TVM_TRY_REWRITE(broadcast(x, lanes) || broadcast(y, lanes), broadcast(x || y, lanes)); } - auto ctrue = PConst(make_const(op->dtype, true)); + auto ctrue = PConst(make_const(op->type, true)); TVM_TRY_REWRITE(x == y || x != y, ctrue); TVM_TRY_REWRITE(x != y || x == y, ctrue); @@ -1721,7 +1720,7 @@ Mutate_(const Call* op, const Expr& self) { for (const auto& constraint : literal_constraints_) { // Cases such as for (i, 0, bound) {if (likely(iter_var < bound)) { .. } } if (Equal(constraint, op->args[0])) { - return make_const(op->dtype, true); + return make_const(op->type, true); } } } @@ -1742,7 +1741,7 @@ Expr RewriteSimplifier::Impl:: Mutate_(const Cast* op, const Expr& self) { Expr ret = IRMutator::Mutate_(op, self); op = ret.as(); - return cast(op->dtype, op->value); + return cast(op->type, op->value); } Expr RewriteSimplifier::Impl:: diff --git a/src/autotvm/touch_extractor.cc b/src/autotvm/touch_extractor.cc index f66a724595c6..101d8f1aa57f 100644 --- a/src/autotvm/touch_extractor.cc +++ b/src/autotvm/touch_extractor.cc @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -255,10 +255,10 @@ void GetItervarFeature(Stmt stmt, bool take_log, Array > > *re feature_row.push_back(Array{std::string("_itervar_"), var}); Array attr{std::string("_attr_"), - FloatImm::make(DataType::Float(32), trans(fea.length)), - IntImm::make(DataType::Int(32), fea.nest_level), - FloatImm::make(DataType::Float(32), trans(fea.topdown_product)), - FloatImm::make(DataType::Float(32), trans(fea.bottomup_product)), + FloatImm::make(Float(32), trans(fea.length)), + IntImm::make(Int(32), fea.nest_level), + FloatImm::make(Float(32), trans(fea.topdown_product)), + FloatImm::make(Float(32), trans(fea.bottomup_product)), }; // one hot annotation for (int i = 0; i < kNum; i++) { @@ -268,9 +268,9 @@ void GetItervarFeature(Stmt stmt, bool take_log, Array > > *re // arithmetic feature_row.push_back(Array{std::string("_arith_"), - FloatImm::make(DataType::Float(32), trans(fea.add_ct)), - FloatImm::make(DataType::Float(32), trans(fea.mul_ct)), - FloatImm::make(DataType::Float(32), trans(fea.div_ct)), + FloatImm::make(Float(32), trans(fea.add_ct)), + FloatImm::make(Float(32), trans(fea.mul_ct)), + FloatImm::make(Float(32), trans(fea.div_ct)), }); // touch map @@ -282,12 +282,12 @@ void GetItervarFeature(Stmt stmt, bool take_log, Array > > *re for (auto k : bufs) { TouchPattern &v = fea.touch_feature[k]; feature_row.push_back(Array{k, - FloatImm::make(DataType::Float(32), trans(v.stride)), - FloatImm::make(DataType::Float(32), trans(v.mod)), - FloatImm::make(DataType::Float(32), trans(v.count)), - FloatImm::make(DataType::Float(32), trans(v.reuse)), - FloatImm::make(DataType::Float(32), trans(v.thread_count)), - FloatImm::make(DataType::Float(32), trans(v.thread_reuse)), + FloatImm::make(Float(32), trans(v.stride)), + FloatImm::make(Float(32), trans(v.mod)), + FloatImm::make(Float(32), trans(v.count)), + FloatImm::make(Float(32), trans(v.reuse)), + FloatImm::make(Float(32), trans(v.thread_count)), + FloatImm::make(Float(32), trans(v.thread_reuse)), }); } diff --git a/src/autotvm/touch_extractor.h b/src/autotvm/touch_extractor.h index 1028b0144e12..e6690641edc6 100644 --- a/src/autotvm/touch_extractor.h +++ b/src/autotvm/touch_extractor.h @@ -91,31 +91,31 @@ class TouchExtractor : public FeatureVisitor { // arithmetic stats void Visit_(const Add *op) { - if (op->dtype.is_float()) + if (op->type.is_float()) itervar_map[itervar_stack_.back()].add_ct++; IRVisitor::Visit_(op); } void Visit_(const Sub *op) { - if (op->dtype.is_float()) + if (op->type.is_float()) itervar_map[itervar_stack_.back()].add_ct++; IRVisitor::Visit_(op); } void Visit_(const Mul *op) { - if (op->dtype.is_float()) + if (op->type.is_float()) itervar_map[itervar_stack_.back()].mul_ct++; IRVisitor::Visit_(op); } void Visit_(const Div *op) { - if (op->dtype.is_float()) + if (op->type.is_float()) itervar_map[itervar_stack_.back()].div_ct++; IRVisitor::Visit_(op); } void Visit_(const Mod *op) { - if (op->dtype.is_float()) + if (op->type.is_float()) itervar_map[itervar_stack_.back()].div_ct++; IRVisitor::Visit_(op); } diff --git a/src/codegen/build_common.h b/src/codegen/build_common.h index b2c895348a46..8a21aeea7eee 100644 --- a/src/codegen/build_common.h +++ b/src/codegen/build_common.h @@ -39,7 +39,7 @@ ExtractFuncInfo(const Array& funcs) { for (LoweredFunc f : funcs) { runtime::FunctionInfo info; for (size_t i = 0; i < f->args.size(); ++i) { - info.arg_types.push_back(f->args[i].dtype()); + info.arg_types.push_back(Type2TVMType(f->args[i].type())); } for (size_t i = 0; i < f->thread_axis.size(); ++i) { info.thread_axis_tags.push_back(f->thread_axis[i]->thread_tag); diff --git a/src/codegen/build_module.cc b/src/codegen/build_module.cc index ca25731cafef..80fd57af66f9 100644 --- a/src/codegen/build_module.cc +++ b/src/codegen/build_module.cc @@ -309,10 +309,6 @@ Target intel_graphics(const std::vector& options) { Target stackvm(const std::vector& options) { return CreateTarget("stackvm", options); } - -Target ext_dev(const std::vector& options) { - return CreateTarget("ext_dev", options); -} } // namespace target bool LLVMEnabled() { @@ -334,12 +330,12 @@ Target DefaultTargetHost(Target target) { } Buffer BufferWithOffsetAlignment(Array shape, - DataType dtype, + Type dtype, std::string name, int data_alignment, int offset_factor, bool compact) { - auto data = Var(name, DataType::Handle()); + auto data = Var(name, Handle()); bool has_any = false; if (!compact) { for (const auto& it : shape) { @@ -353,7 +349,7 @@ Buffer BufferWithOffsetAlignment(Array shape, Expr elem_offset; if (offset_factor != 0) { - elem_offset = Var(name + "_elem_offset", shape[0].dtype()); + elem_offset = Var(name + "_elem_offset", shape[0].type()); } else { elem_offset = Expr(); } diff --git a/src/codegen/codegen.cc b/src/codegen/codegen.cc index 60b12dc6e553..ded8fcebf57c 100644 --- a/src/codegen/codegen.cc +++ b/src/codegen/codegen.cc @@ -28,10 +28,7 @@ #include #include #include -#include -#include -#include -#include +#include namespace tvm { namespace codegen { @@ -61,111 +58,19 @@ runtime::Module Build(const Array& funcs, return m; } -/*! \brief Helper class to serialize module */ -class ModuleSerializer { - public: - explicit ModuleSerializer(runtime::Module mod) : mod_(mod) { - Init(); - } - - void SerializeModule(dmlc::Stream* stream) { - // Only have one DSO module and it is in the root, then - // we will not produce import_tree_. - bool has_import_tree = true; - if (DSOExportable(mod_.operator->()) && mod_->imports().empty()) { - has_import_tree = false; - } - uint64_t sz = 0; - if (has_import_tree) { - // we will append one key for _import_tree - // The layout is the same as before: binary_size, key, logic, key, logic... - sz = mod_vec_.size() + 1; - } else { - // Keep the old behaviour - sz = mod_->imports().size(); - } - stream->Write(sz); - - for (auto m : mod_vec_) { - std::string mod_type_key = m->type_key(); - if (!DSOExportable(m)) { - stream->Write(mod_type_key); - m->SaveToBinary(stream); - } else if (has_import_tree) { - mod_type_key = "_lib"; - stream->Write(mod_type_key); - } - } - - // Write _import_tree key if we have - if (has_import_tree) { - std::string import_key = "_import_tree"; - stream->Write(import_key); - stream->Write(import_tree_row_ptr_); - stream->Write(import_tree_child_indices_); - } - } - - private: - void Init() { - CreateModuleIndex(); - CreateImportTree(); - } - - // invariance: root module is always at location 0. - // The module order is collected via DFS - void CreateModuleIndex() { - std::unordered_set visited {mod_.operator->()}; - std::vector stack {mod_.operator->()}; - uint64_t module_index = 0; - - while (!stack.empty()) { - runtime::ModuleNode* n = stack.back(); - stack.pop_back(); - mod2index_[n] = module_index++; - mod_vec_.emplace_back(n); - for (runtime::Module m : n->imports()) { - runtime::ModuleNode* next = m.operator->(); - if (visited.count(next) == 0) { - visited.insert(next); - stack.push_back(next); - } - } - } - } - - void CreateImportTree() { - for (auto m : mod_vec_) { - for (runtime::Module im : m->imports()) { - uint64_t mod_index = mod2index_[im.operator->()]; - import_tree_child_indices_.push_back(mod_index); - } - import_tree_row_ptr_.push_back(import_tree_child_indices_.size()); - } - } - - bool DSOExportable(const runtime::ModuleNode* mod) { - return !std::strcmp(mod->type_key(), "llvm") || - !std::strcmp(mod->type_key(), "c"); - } - - runtime::Module mod_; - // construct module to index - std::unordered_map mod2index_; - // index -> module - std::vector mod_vec_; - std::vector import_tree_row_ptr_ {0}; - std::vector import_tree_child_indices_; -}; - std::string PackImportsToC(const runtime::Module& mod, bool system_lib) { std::string bin; dmlc::MemoryStringStream ms(&bin); dmlc::Stream* stream = &ms; - - ModuleSerializer module_serializer(mod); - module_serializer.SerializeModule(stream); - + uint64_t sz = static_cast(mod->imports().size()); + stream->Write(sz); + for (runtime::Module im : mod->imports()) { + CHECK_EQ(im->imports().size(), 0U) + << "Only support simply one-level hierarchy"; + std::string tkey = im->type_key(); + stream->Write(tkey); + im->SaveToBinary(stream); + } // translate to C program std::ostringstream os; os << "#ifdef _WIN32\n" diff --git a/src/codegen/codegen_c.cc b/src/codegen/codegen_c.cc index 4b95e2caf1aa..eab542dd3e08 100644 --- a/src/codegen/codegen_c.cc +++ b/src/codegen/codegen_c.cc @@ -79,7 +79,7 @@ void CodeGenC::AddFunction(LoweredFunc f) { ReserveKeywordsAsUnique(); // add to alloc buffer type. for (const auto & kv : f->handle_data_type) { - RegisterHandleType(kv.first.get(), kv.second.dtype()); + RegisterHandleType(kv.first.get(), kv.second.type()); } this->stream << "void " << f->name << "("; @@ -87,7 +87,7 @@ void CodeGenC::AddFunction(LoweredFunc f) { Var v = f->args[i]; std::string vid = AllocVarID(v.get()); if (i != 0) stream << ", "; - if (v.dtype().is_handle()) { + if (v.type().is_handle()) { auto it = alloc_storage_scope_.find(v.get()); if (it != alloc_storage_scope_.end()) PrintStorageScope(it->second, stream); @@ -104,7 +104,7 @@ void CodeGenC::AddFunction(LoweredFunc f) { stream << ' ' << restrict_keyword_; } } else { - PrintType(v.dtype(), stream); + PrintType(v.type(), stream); } stream << ' ' << vid; } @@ -125,14 +125,14 @@ void CodeGenC::PrintExpr(const Expr& n, std::ostream& os) { // NOLINT(*) if (print_ssa_form_) { std::ostringstream temp; VisitExpr(n, temp); - os << SSAGetID(temp.str(), n.dtype()); + os << SSAGetID(temp.str(), n.type()); } else { VisitExpr(n, os); } } void CodeGenC::PrintSSAAssign( - const std::string& target, const std::string& src, DataType t) { + const std::string& target, const std::string& src, Type t) { PrintType(t, stream); stream << ' ' << target << " = "; if (src.length() > 3 && @@ -146,7 +146,7 @@ void CodeGenC::PrintSSAAssign( // Print a reference expression to a buffer. std::string CodeGenC::GetBufferRef( - DataType t, const Variable* buffer, Expr index) { + Type t, const Variable* buffer, Expr index) { std::ostringstream os; std::string vid = GetVarID(buffer); std::string scope; @@ -213,7 +213,7 @@ std::string CodeGenC::GetBufferRef( // Print a reference expression to a buffer. std::string CodeGenC::GetStructRef( - DataType t, const Expr& buffer, const Expr& index, int kind) { + Type t, const Expr& buffer, const Expr& index, int kind) { if (kind < intrinsic::kArrKindBound_) { std::ostringstream os; os << "(((TVMArray*)"; @@ -265,13 +265,13 @@ std::string CodeGenC::GetStructRef( } -bool CodeGenC::HandleTypeMatch(const Variable* buf_var, DataType t) const { +bool CodeGenC::HandleTypeMatch(const Variable* buf_var, Type t) const { auto it = handle_data_type_.find(buf_var); if (it == handle_data_type_.end()) return false; return it->second == t; } -void CodeGenC::RegisterHandleType(const Variable* buf_var, DataType t) { +void CodeGenC::RegisterHandleType(const Variable* buf_var, Type t) { auto it = handle_data_type_.find(buf_var); if (it == handle_data_type_.end()) { handle_data_type_[buf_var] = t; @@ -282,13 +282,13 @@ void CodeGenC::RegisterHandleType(const Variable* buf_var, DataType t) { } void CodeGenC::PrintVecElemLoad(const std::string& vec, - DataType t, int i, + Type t, int i, std::ostream& os) { // NOLINT(*) os << vec << ".s" << std::hex << i << std::dec; } void CodeGenC::PrintVecElemStore(const std::string& vec, - DataType t, int i, + Type t, int i, const std::string& value) { this->PrintIndent(); stream << vec << ".s" << std::hex << i @@ -296,19 +296,19 @@ void CodeGenC::PrintVecElemStore(const std::string& vec, } std::string CodeGenC::GetVecLoad( - DataType t, const Variable* buffer, Expr base) { + Type t, const Variable* buffer, Expr base) { return GetBufferRef(t, buffer, base); } void CodeGenC::PrintVecStore(const Variable* buffer, - DataType t, Expr base, + Type t, Expr base, const std::string& value) { std::string ref = GetBufferRef(t, buffer, base); this->PrintIndent(); stream << ref << " = " << value << ";\n"; } -std::string CodeGenC::CastFromTo(std::string value, DataType from, DataType target) { +std::string CodeGenC::CastFromTo(std::string value, Type from, Type target) { if (from == target) return value; std::ostringstream os; os << "(("; @@ -328,7 +328,7 @@ void CodeGenC::PrintStorageScope(const std::string& scope, std::ostream& os) { / CHECK_EQ(scope, "global"); } -void CodeGenC::PrintType(DataType t, std::ostream& os) { // NOLINT(*) +void CodeGenC::PrintType(Type t, std::ostream& os) { // NOLINT(*) CHECK_EQ(t.lanes(), 1) << "do not yet support vector types"; if (t.is_handle()) { @@ -360,48 +360,48 @@ void CodeGenC::PrintType(DataType t, std::ostream& os) { // NOLINT(*) inline void PrintConst(const IntImm* op, std::ostream& os, CodeGenC* p) { // NOLINT(*) - if (op->dtype == DataType::Int(32)) { + if (op->type == Int(32)) { std::ostringstream temp; temp << op->value; p->MarkConst(temp.str()); os << temp.str(); } else { os << "("; - p->PrintType(op->dtype, os); + p->PrintType(op->type, os); os << ")" << op->value; } } inline void PrintConst(const UIntImm* op, std::ostream& os, CodeGenC* p) { // NOLINT(*) - if (op->dtype == DataType::UInt(32)) { + if (op->type == UInt(32)) { std::ostringstream temp; temp << op->value << "U"; p->MarkConst(temp.str()); os << temp.str(); } else { os << "("; - p->PrintType(op->dtype, os); + p->PrintType(op->type, os); os << ")" << op->value; } } inline void PrintConst(const FloatImm* op, std::ostream& os, CodeGenC* p) { // NOLINT(*) - switch (op->dtype.bits()) { + switch (op->type.bits()) { case 64: case 32: { std::ostringstream temp; temp << std::scientific << op->value; - if (op->dtype.bits() == 32) temp << 'f'; + if (op->type.bits() == 32) temp << 'f'; p->MarkConst(temp.str()); os << temp.str(); break; } case 16: { os << '('; - p->PrintType(op->dtype, os); + p->PrintType(op->type, os); os << ')' << std::scientific <value << 'f'; break; } - default: LOG(FATAL) << "Bad bit-width for float: " << op->dtype << "\n"; + default: LOG(FATAL) << "Bad bit-width for float: " << op->type << "\n"; } } @@ -423,7 +423,7 @@ inline void PrintBinaryExpr(const T* op, const char *opstr, std::ostream& os, // NOLINT(*) CodeGenC* p) { - if (op->dtype.lanes() == 1) { + if (op->type.lanes() == 1) { if (isalpha(opstr[0])) { os << opstr << '('; p->PrintExpr(op->a, os); @@ -438,7 +438,7 @@ inline void PrintBinaryExpr(const T* op, os << ')'; } } else { - p->PrintVecBinaryOp(opstr, op->dtype, op->a, op->b, os); + p->PrintVecBinaryOp(opstr, op->type, op->a, op->b, os); } } @@ -446,7 +446,7 @@ inline void PrintBinaryIntrinsic(const Call* op, const char *opstr, std::ostream& os, // NOLINT(*) CodeGenC* p) { - if (op->dtype.lanes() == 1) { + if (op->type.lanes() == 1) { CHECK_EQ(op->args.size(), 2U); os << '('; p->PrintExpr(op->args[0], os); @@ -454,13 +454,13 @@ inline void PrintBinaryIntrinsic(const Call* op, p->PrintExpr(op->args[1], os); os << ')'; } else { - p->PrintVecBinaryOp(opstr, op->dtype, op->args[0], op->args[1], os); + p->PrintVecBinaryOp(opstr, op->type, op->args[0], op->args[1], os); } } void CodeGenC::VisitExpr_(const Cast *op, std::ostream& os) { // NOLINT(*) std::stringstream value; this->PrintExpr(op->value, value); - os << CastFromTo(value.str(), op->value.dtype(), op->dtype); + os << CastFromTo(value.str(), op->value.type(), op->type); } void CodeGenC::VisitExpr_(const Variable *op, std::ostream& os) { // NOLINT(*) os << GetVarID(op); @@ -553,7 +553,7 @@ void CodeGenC::VisitExpr_(const Call *op, std::ostream& os) { // NOLINT(*) const Load *l = op->args[0].as(); CHECK(op->args.size() == 1 && l); os << "(("; - this->PrintType(l->dtype.element_of(), os); + this->PrintType(l->type.element_of(), os); os << " *)" << this->GetVarID(l->buffer_var.get()) << " + "; this->PrintExpr(l->index, os); @@ -561,7 +561,7 @@ void CodeGenC::VisitExpr_(const Call *op, std::ostream& os) { // NOLINT(*) } else if (op->is_intrinsic(intrinsic::tvm_struct_get)) { CHECK_EQ(op->args.size(), 3U); os << GetStructRef( - op->dtype, op->args[0], op->args[1], + op->type, op->args[0], op->args[1], op->args[2].as()->value); } else if (op->is_intrinsic(intrinsic::tvm_handle_is_null)) { CHECK_EQ(op->args.size(), 1U); @@ -571,7 +571,7 @@ void CodeGenC::VisitExpr_(const Call *op, std::ostream& os) { // NOLINT(*) } else if (op->is_intrinsic(Call::reinterpret)) { // generate (*( TYPE *)(&(ARG))) os << "(*("; - this->PrintType(op->dtype, os); + this->PrintType(op->type, os); os << " *)(&("; this->PrintExpr(op->args[0], os); os << ")))"; @@ -585,7 +585,7 @@ void CodeGenC::VisitExpr_(const Call *op, std::ostream& os) { // NOLINT(*) if (op->call_type == Call::Intrinsic || op->call_type == Call::PureIntrinsic) { LOG(FATAL) << "Unresolved intrinsic " << op->name - << " with return type " << op->dtype; + << " with return type " << op->type; } else { LOG(FATAL) << "Unresolved call type " << op->call_type; } @@ -593,7 +593,7 @@ void CodeGenC::VisitExpr_(const Call *op, std::ostream& os) { // NOLINT(*) } void CodeGenC::PrintVecBinaryOp( - const std::string& op, DataType t, + const std::string& op, Type t, Expr lhs, Expr rhs, std::ostream& os) { // NOLINT(*) if (isalpha(op[0])) { os << op << "("; @@ -611,17 +611,17 @@ void CodeGenC::PrintVecBinaryOp( } void CodeGenC::VisitExpr_(const Load* op, std::ostream& os) { // NOLINT(*) - int lanes = op->dtype.lanes(); + int lanes = op->type.lanes(); // delcare type. - if (op->dtype.lanes() == 1) { - std::string ref = GetBufferRef(op->dtype, op->buffer_var.get(), op->index); + if (op->type.lanes() == 1) { + std::string ref = GetBufferRef(op->type, op->buffer_var.get(), op->index); os << ref; } else { CHECK(is_one(op->predicate)) << "predicated load is not supported"; Expr base; - if (GetRamp1Base(op->index, op->dtype.lanes(), &base)) { - std::string ref = GetVecLoad(op->dtype, op->buffer_var.get(), base); + if (GetRamp1Base(op->index, op->type.lanes(), &base)) { + std::string ref = GetVecLoad(op->type, op->buffer_var.get(), base); os << ref; } else { // The assignment below introduces side-effect, and the resulting value cannot @@ -631,16 +631,16 @@ void CodeGenC::VisitExpr_(const Load* op, std::ostream& os) { // NOLINT(*) // load seperately. std::string svalue = GetUniqueName("_"); this->PrintIndent(); - this->PrintType(op->dtype, stream); + this->PrintType(op->type, stream); stream << ' ' << svalue << ";\n"; - std::string sindex = SSAGetID(PrintExpr(op->index), op->index.dtype()); + std::string sindex = SSAGetID(PrintExpr(op->index), op->index.type()); std::string vid = GetVarID(op->buffer_var.get()); - DataType elem_type = op->dtype.element_of(); + Type elem_type = op->type.element_of(); for (int i = 0; i < lanes; ++i) { std::ostringstream value_temp; if (!HandleTypeMatch(op->buffer_var.get(), elem_type)) { value_temp << "(("; - if (op->buffer_var.get()->dtype.is_handle()) { + if (op->buffer_var.get()->type.is_handle()) { auto it = alloc_storage_scope_.find(op->buffer_var.get()); if (it != alloc_storage_scope_.end()) { PrintStorageScope(it->second, value_temp); @@ -653,9 +653,9 @@ void CodeGenC::VisitExpr_(const Load* op, std::ostream& os) { // NOLINT(*) value_temp << vid; } value_temp << '['; - PrintVecElemLoad(sindex, op->index.dtype(), i, value_temp); + PrintVecElemLoad(sindex, op->index.type(), i, value_temp); value_temp << ']'; - PrintVecElemStore(svalue, op->dtype, i, value_temp.str()); + PrintVecElemStore(svalue, op->type, i, value_temp.str()); } os << svalue; EndScope(vec_scope); @@ -664,7 +664,7 @@ void CodeGenC::VisitExpr_(const Load* op, std::ostream& os) { // NOLINT(*) } void CodeGenC::VisitStmt_(const Store* op) { - DataType t = op->value.dtype(); + Type t = op->value.type(); if (t.lanes() == 1) { std::string value = this->PrintExpr(op->value); std::string ref = this->GetBufferRef(t, op->buffer_var.get(), op->index); @@ -683,15 +683,15 @@ void CodeGenC::VisitStmt_(const Store* op) { int vec_scope = BeginScope(); // store elements seperately - std::string index = SSAGetID(PrintExpr(op->index), op->index.dtype()); - std::string value = SSAGetID(PrintExpr(op->value), op->value.dtype()); + std::string index = SSAGetID(PrintExpr(op->index), op->index.type()); + std::string value = SSAGetID(PrintExpr(op->value), op->value.type()); std::string vid = GetVarID(op->buffer_var.get()); for (int i = 0; i < t.lanes(); ++i) { this->PrintIndent(); - DataType elem_type = t.element_of(); + Type elem_type = t.element_of(); if (!HandleTypeMatch(op->buffer_var.get(), elem_type)) { stream << "(("; - if (op->buffer_var.get()->dtype.is_handle()) { + if (op->buffer_var.get()->type.is_handle()) { auto it = alloc_storage_scope_.find(op->buffer_var.get()); if (it != alloc_storage_scope_.end()) { PrintStorageScope(it->second, stream); @@ -704,9 +704,9 @@ void CodeGenC::VisitStmt_(const Store* op) { stream << vid; } stream << '['; - PrintVecElemLoad(index, op->index.dtype(), i, stream); + PrintVecElemLoad(index, op->index.type(), i, stream); stream << "] = "; - PrintVecElemLoad(value, op->value.dtype(), i, stream); + PrintVecElemLoad(value, op->value.type(), i, stream); stream << ";\n"; } EndScope(vec_scope); @@ -723,7 +723,7 @@ void CodeGenC::VisitExpr_(const Let* op, std::ostream& os) { // NOLINT(*) void CodeGenC::VisitExpr_(const Ramp* op, std::ostream& os) { // NOLINT(*) // constraint of current logic - CHECK_EQ(op->base.dtype(), DataType::Int(32)); + CHECK_EQ(op->base.type(), Int(32)); os << "((int" << op->lanes << ")("; for (int i = 0; i < op->lanes; i++) { os << "(" << PrintExpr(op->base) << ")" << "+(" << PrintExpr(op->stride) << "*" << i <<")"; @@ -758,7 +758,7 @@ void CodeGenC::VisitStmt_(const LetStmt* op) { var_idmap_[op->var.get()] = value; } else { PrintIndent(); - if (op->var.dtype() == DataType::Handle() && + if (op->var.type() == Handle() && handle_data_type_.count(op->var.get())) { PrintType(handle_data_type_.at(op->var.get()), stream); stream << "* " @@ -767,7 +767,7 @@ void CodeGenC::VisitStmt_(const LetStmt* op) { PrintType(handle_data_type_.at(op->var.get()), stream); stream << "*)" << value << ";\n"; } else { - PrintType(op->var.dtype(), this->stream); + PrintType(op->var.type(), this->stream); this->stream << ' ' << AllocVarID(op->var.get()) << " = " << value << ";\n"; @@ -784,7 +784,7 @@ void CodeGenC::VisitStmt_(const Allocate* op) { CHECK_EQ(op->free_function, "nop"); std::string new_data = PrintExpr(op->new_expr); this->PrintIndent(); - PrintType(op->dtype, stream); + PrintType(op->type, stream); stream << "* "<< vid << '=' << new_data << ";\n"; } else { this->PrintIndent(); @@ -795,11 +795,11 @@ void CodeGenC::VisitStmt_(const Allocate* op) { std::string scope = alloc_storage_scope_.at(buffer); PrintStorageScope(scope, stream); stream << ' '; - PrintType(op->dtype, stream); + PrintType(op->type, stream); stream << ' '<< vid << '[' << constant_size << "];\n"; } - RegisterHandleType(op->buffer_var.get(), op->dtype); + RegisterHandleType(op->buffer_var.get(), op->type); this->PrintStmt(op->body); } @@ -841,7 +841,7 @@ void CodeGenC::VisitStmt_(const For* op) { std::string vid = AllocVarID(op->loop_var.get()); CHECK(is_zero(op->min)); stream << "for ("; - PrintType(op->loop_var.dtype(), stream); + PrintType(op->loop_var.type(), stream); stream << ' ' << vid << " = 0; " << vid << " < " << extent << "; ++" << vid << ") {\n"; @@ -890,7 +890,7 @@ void CodeGenC::VisitStmt_(const Evaluate *op) { CHECK_EQ(call->args.size(), 4); std::string value = PrintExpr(call->args[3]); std::string ref = GetStructRef( - call->args[3].dtype(), + call->args[3].type(), call->args[0], call->args[1], call->args[2].as()->value); diff --git a/src/codegen/codegen_c.h b/src/codegen/codegen_c.h index b8d357051998..8701cda1e14c 100644 --- a/src/codegen/codegen_c.h +++ b/src/codegen/codegen_c.h @@ -147,7 +147,7 @@ class CodeGenC : * \param t The type representation. * \param os The stream to print the ctype into */ - virtual void PrintType(DataType t, std::ostream& os); // NOLINT(*) + virtual void PrintType(Type t, std::ostream& os); // NOLINT(*) /*! * \brief Print expr representing the thread tag * \param IterVar iv The thread index to be binded; @@ -157,51 +157,51 @@ class CodeGenC : virtual void PrintStorageSync(const Call* op); // NOLINT(*) // Binary vector op. virtual void PrintVecBinaryOp( - const std::string&op, DataType op_type, + const std::string&op, Type op_type, Expr lhs, Expr rhs, std::ostream& os); // NOLINT(*) // print vector load - virtual std::string GetVecLoad(DataType t, const Variable* buffer, Expr base); + virtual std::string GetVecLoad(Type t, const Variable* buffer, Expr base); // print vector store virtual void PrintVecStore(const Variable* buffer, - DataType t, Expr base, + Type t, Expr base, const std::string& value); // NOLINT(*) // print load of single element virtual void PrintVecElemLoad( - const std::string& vec, DataType t, int i, std::ostream& os); // NOLINT(*) + const std::string& vec, Type t, int i, std::ostream& os); // NOLINT(*) // print store of single element. virtual void PrintVecElemStore( - const std::string& vec, DataType t, int i, const std::string& value); + const std::string& vec, Type t, int i, const std::string& value); // Get a cast type from to - virtual std::string CastFromTo(std::string value, DataType from, DataType target); + virtual std::string CastFromTo(std::string value, Type from, Type target); protected: // Print reference to struct location std::string GetStructRef( - DataType t, const Expr& buffer, const Expr& index, int kind); + Type t, const Expr& buffer, const Expr& index, int kind); // print reference to a buffer as type t in index. virtual std::string GetBufferRef( - DataType t, const Variable* buffer, Expr index); + Type t, const Variable* buffer, Expr index); /*! * \brief If buffer is allocated as type t. * \param buf_var The buffer variable. * \param t The type to be checked. */ - bool HandleTypeMatch(const Variable* buf_var, DataType t) const; + bool HandleTypeMatch(const Variable* buf_var, Type t) const; /*! * \brief Register the data type of buf_var * \param buf_var The buffer variable. * \param t The type to be checked. */ - void RegisterHandleType(const Variable* buf_var, DataType t); + void RegisterHandleType(const Variable* buf_var, Type t); // override void PrintSSAAssign( - const std::string& target, const std::string& src, DataType t) final; + const std::string& target, const std::string& src, Type t) final; /*! \brief restrict keyword */ std::string restrict_keyword_{""}; /*! \brief the storage scope of allocation */ std::unordered_map alloc_storage_scope_; /*! \brief the data type of allocated buffers */ - std::unordered_map handle_data_type_; + std::unordered_map handle_data_type_; /*! \brief reserves common C keywords */ void ReserveKeywordsAsUnique(); diff --git a/src/codegen/codegen_c_host.cc b/src/codegen/codegen_c_host.cc index f2c54c2700c9..9c099a425fd6 100644 --- a/src/codegen/codegen_c_host.cc +++ b/src/codegen/codegen_c_host.cc @@ -48,7 +48,7 @@ void CodeGenCHost::AddFunction(LoweredFunc f) { ReserveKeywordsAsUnique(); // add to alloc buffer type. for (const auto & kv : f->handle_data_type) { - RegisterHandleType(kv.first.get(), kv.second.dtype()); + RegisterHandleType(kv.first.get(), kv.second.type()); } this->stream << "#ifdef __cplusplus\n"; @@ -59,7 +59,7 @@ void CodeGenCHost::AddFunction(LoweredFunc f) { Var v = f->args[i]; std::string vid = AllocVarID(v.get()); if (i != 0) stream << ", "; - if (v.dtype().is_handle()) { + if (v.type().is_handle()) { auto it = alloc_storage_scope_.find(v.get()); if (it != alloc_storage_scope_.end()) { PrintStorageScope(it->second, stream); @@ -77,7 +77,7 @@ void CodeGenCHost::AddFunction(LoweredFunc f) { stream << ' ' << restrict_keyword_; } } else { - PrintType(v.dtype(), stream); + PrintType(v.type(), stream); } stream << ' ' << vid; } @@ -96,14 +96,14 @@ std::string CodeGenCHost::Finish() { return CodeGenC::Finish(); } -void CodeGenCHost::PrintType(DataType t, std::ostream& os) { // NOLINT(*) +void CodeGenCHost::PrintType(Type t, std::ostream& os) { // NOLINT(*) int lanes = t.lanes(); if (t.is_handle()) { CHECK_EQ(lanes, 1) << "does not support vector types"; os << "void*"; return; } - if (t == DataType::Bool()) { + if (t == Bool()) { os << "bool"; return; } bool fail = false; @@ -145,7 +145,7 @@ void CodeGenCHost::PrintType(DataType t, std::ostream& os) { // NOLINT(*) void CodeGenCHost::VisitExpr_(const Broadcast* op, std::ostream& os) { // NOLINT(*) std::string v = PrintExpr(op->value); os << "(("; - PrintType(op->dtype, os); + PrintType(op->type, os); os << ")("; for (int i = 0; i < op->lanes; ++i) { if (i != 0) os << ", "; @@ -268,10 +268,10 @@ inline void CodeGenCHost::PrintTernaryCondExpr(const T* op, std::ostream& os) { // NOLINT(*) std::ostringstream temp_a; VisitExpr(op->a, temp_a); - std::string a_id = SSAGetID(temp_a.str(), op->a.dtype()); + std::string a_id = SSAGetID(temp_a.str(), op->a.type()); std::ostringstream temp_b; VisitExpr(op->b, temp_b); - std::string b_id = SSAGetID(temp_b.str(), op->b.dtype()); + std::string b_id = SSAGetID(temp_b.str(), op->b.type()); os << "((" << a_id << ") " << compare << " (" << b_id << ") " << "? (" << a_id << ") : (" << b_id << "))"; diff --git a/src/codegen/codegen_c_host.h b/src/codegen/codegen_c_host.h index 44f838536627..80e359c33ce0 100644 --- a/src/codegen/codegen_c_host.h +++ b/src/codegen/codegen_c_host.h @@ -39,7 +39,7 @@ class CodeGenCHost final : public CodeGenC { void AddFunction(LoweredFunc f); std::string Finish(); - void PrintType(DataType t, std::ostream& os) final; // NOLINT(*) + void PrintType(Type t, std::ostream& os) final; // NOLINT(*) // overload visitor functions void VisitExpr_(const Broadcast* op, std::ostream& os) final; // NOLINT(*) diff --git a/src/codegen/codegen_cuda.cc b/src/codegen/codegen_cuda.cc index 06b542a66323..6656fa07740d 100644 --- a/src/codegen/codegen_cuda.cc +++ b/src/codegen/codegen_cuda.cc @@ -105,10 +105,10 @@ void CodeGenCUDA::VisitStmt_(const ir::For* op) { void CodeGenCUDA::BindThreadIndex(const IterVar& iv) { CHECK(!var_idmap_.count(iv->var.get())); var_idmap_[iv->var.get()] = - CastFromTo(iv->thread_tag, DataType::UInt(32), iv->var.dtype()); + CastFromTo(iv->thread_tag, UInt(32), iv->var.type()); } -void CodeGenCUDA::PrintType(DataType t, std::ostream& os) { // NOLINT(*) +void CodeGenCUDA::PrintType(Type t, std::ostream& os) { // NOLINT(*) int lanes = t.lanes(); if (t.is_handle()) { CHECK_EQ(lanes, 1) @@ -137,7 +137,7 @@ void CodeGenCUDA::PrintType(DataType t, std::ostream& os) { // NOLINT(*) if (!fail && (lanes >= 2 && lanes <= 4)) { os << lanes; return; } - } else if (t == DataType::Bool()) { + } else if (t == Bool()) { os << "bool"; return; } else if (t.is_uint() || t.is_int()) { if (t.is_uint()) { @@ -199,7 +199,7 @@ void CodeGenCUDA::PrintType(DataType t, std::ostream& os) { // NOLINT(*) } void CodeGenCUDA::PrintVecBinaryOp( - const std::string&op, DataType t, + const std::string&op, Type t, Expr lhs, Expr rhs, std::ostream& os) { // NOLINT(*) // unpacking operations. int lanes = t.lanes(); @@ -210,8 +210,8 @@ void CodeGenCUDA::PrintVecBinaryOp( int vec_scope = BeginScope(); // default: unpack into individual ops. - std::string vlhs = SSAGetID(PrintExpr(lhs), lhs.dtype()); - std::string vrhs = SSAGetID(PrintExpr(rhs), rhs.dtype()); + std::string vlhs = SSAGetID(PrintExpr(lhs), lhs.type()); + std::string vrhs = SSAGetID(PrintExpr(rhs), rhs.type()); std::string sret = GetUniqueName("_"); { // delcare type. @@ -223,15 +223,15 @@ void CodeGenCUDA::PrintVecBinaryOp( std::ostringstream value_temp; if (isalpha(op[0])) { value_temp << op << "("; - PrintVecElemLoad(vlhs, lhs.dtype(), i, value_temp); + PrintVecElemLoad(vlhs, lhs.type(), i, value_temp); value_temp << ", "; - PrintVecElemLoad(vrhs, rhs.dtype(), i, value_temp); + PrintVecElemLoad(vrhs, rhs.type(), i, value_temp); value_temp << ")"; } else { value_temp << "("; - PrintVecElemLoad(vlhs, lhs.dtype(), i, value_temp); + PrintVecElemLoad(vlhs, lhs.type(), i, value_temp); value_temp << op; - PrintVecElemLoad(vrhs, rhs.dtype(), i, value_temp); + PrintVecElemLoad(vrhs, rhs.type(), i, value_temp); value_temp << ")"; } PrintVecElemStore(sret, t, i, value_temp.str()); @@ -242,7 +242,7 @@ void CodeGenCUDA::PrintVecBinaryOp( } void CodeGenCUDA::PrintVecElemLoad( - const std::string& vec, DataType t, int i, std::ostream& os) { // NOLINT(*) + const std::string& vec, Type t, int i, std::ostream& os) { // NOLINT(*) static const char access[] = {'x', 'y', 'z', 'w'}; CHECK(i >= 0 && i < 4); if (t.is_int() && t.bits() == 8) { @@ -253,7 +253,7 @@ void CodeGenCUDA::PrintVecElemLoad( } void CodeGenCUDA::PrintVecElemStore( - const std::string& vec, DataType t, int i, const std::string& value) { + const std::string& vec, Type t, int i, const std::string& value) { this->PrintIndent(); static const char access[] = {'x', 'y', 'z', 'w'}; CHECK(i >= 0 && i < 4); @@ -390,7 +390,7 @@ void CodeGenCUDA::VisitStmt_(const Allocate* op) { CHECK_EQ(op->free_function, "nop"); std::string new_data = PrintExpr(op->new_expr); this->PrintIndent(); - PrintType(op->dtype, stream); + PrintType(op->type, stream); stream << "* "<< vid << '=' << new_data << ";\n"; } else { this->PrintIndent(); @@ -401,27 +401,23 @@ void CodeGenCUDA::VisitStmt_(const Allocate* op) { std::string scope = alloc_storage_scope_.at(buffer); if (scope.find("wmma.") == 0) { if (scope == "wmma.matrix_a" || scope == "wmma.matrix_b") { - CHECK(op->dtype == DataType::Float(16) || - op->dtype == DataType::Int(8) || - op->dtype == DataType::UInt(8)) + CHECK(op->type == Float(16) || op->type == Int(8) || op->type == UInt(8)) << "Matrix_a and matrix_b only support half or char or unsigned char type for now"; } else { - CHECK(op->dtype == DataType::Float(16) || - op->dtype == DataType::Float(32) || - op->dtype == DataType::Int(32)) + CHECK(op->type == Float(16) || op->type == Float(32) || op->type == Int(32)) << "Accumulator only support half, float and int type for now"; } constant_size = GetWmmaFragmentSize(scope, buffer, constant_size); - PrintWmmaScope(scope, op->dtype, buffer, stream); + PrintWmmaScope(scope, op->type, buffer, stream); } else { PrintStorageScope(scope, stream); stream << ' '; - PrintType(op->dtype, stream); + PrintType(op->type, stream); } stream << ' '<< vid << '[' << constant_size << "];\n"; } - RegisterHandleType(op->buffer_var.get(), op->dtype); + RegisterHandleType(op->buffer_var.get(), op->type); this->PrintStmt(op->body); } @@ -453,7 +449,7 @@ void CodeGenCUDA::VisitExpr_(const Ramp* op, std::ostream& os) { } void CodeGenCUDA::VisitExpr_(const Broadcast* op, std::ostream& os) { // NOLINT(*) - if (op->dtype.is_int() && op->dtype.bits() == 8 && op->lanes == 4) { + if (op->type.is_int() && op->type.bits() == 8 && op->lanes == 4) { // make_int8x4 const int64_t *p = as_const_int(op->value); CHECK(p); @@ -465,7 +461,7 @@ void CodeGenCUDA::VisitExpr_(const Broadcast* op, std::ostream& os) { // NOLIN std::string v = PrintExpr(op->value); os << "make_"; - PrintType(op->dtype, os); + PrintType(op->type, os); os << '('; for (int i = 0; i < op->lanes; ++i) { if (i != 0) os << ", "; @@ -477,11 +473,11 @@ void CodeGenCUDA::VisitExpr_(const Broadcast* op, std::ostream& os) { // NOLIN void CodeGenCUDA::VisitExpr_(const Shuffle* op, std::ostream &os) { std::vector to_shuffle(op->vectors.size()); for (int i = 0, e = op->vectors.size(); i < e; ++i) { - CHECK(op->vectors[i].dtype().lanes() == 1) << "Only scalars can be shuffled in CUDA!"; + CHECK(op->vectors[i].type().lanes() == 1) << "Only scalars can be shuffled in CUDA!"; to_shuffle[i] = PrintExpr(op->vectors[i]); } os << "make_"; - PrintType(op->dtype, os); + PrintType(op->type, os); os << '('; for (int i = 0, e = op->indices.size(); i < e; ++i) { const int64_t *val = as_const_int(op->indices[i]); @@ -493,21 +489,21 @@ void CodeGenCUDA::VisitExpr_(const Shuffle* op, std::ostream &os) { } inline void PrintConst(const FloatImm* op, std::ostream& os, CodeGenCUDA* p) { // NOLINT(*) - switch (op->dtype.bits()) { + switch (op->type.bits()) { case 64: case 32: { std::ostringstream temp; if (std::isinf(op->value)) { if (op->value < 0) { temp << "-"; } - temp << ((op->dtype.bits() == 32) ? "CUDART_INF_F" : "CUDART_INF"); + temp << ((op->type.bits() == 32) ? "CUDART_INF_F" : "CUDART_INF"); p->need_math_constants_h_ = true; } else if (std::isnan(op->value)) { - temp << ((op->dtype.bits() == 32) ? "CUDART_NAN_F" : "CUDART_NAN"); + temp << ((op->type.bits() == 32) ? "CUDART_NAN_F" : "CUDART_NAN"); p->need_math_constants_h_ = true; } else { temp << std::scientific << op->value; - if (op->dtype.bits() == 32) temp << 'f'; + if (op->type.bits() == 32) temp << 'f'; } p->MarkConst(temp.str()); os << temp.str(); @@ -518,7 +514,7 @@ inline void PrintConst(const FloatImm* op, std::ostream& os, CodeGenCUDA* p) { / os << '(' << std::scientific << op->value << 'f' << ')'; break; } - default: LOG(FATAL) << "Bad bit-width for float: " << op->dtype << "\n"; + default: LOG(FATAL) << "Bad bit-width for float: " << op->type << "\n"; } } @@ -527,7 +523,7 @@ void CodeGenCUDA::VisitExpr_(const FloatImm *op, std::ostream& os) { // NOLINT(* PrintConst(op, os, this); } -void CodeGenCUDA::PrintWmmaScope(const std::string &scope, DataType t, +void CodeGenCUDA::PrintWmmaScope(const std::string &scope, Type t, const Variable* variable, std::ostream &os) { std::stringstream type; PrintType(t, type); diff --git a/src/codegen/codegen_cuda.h b/src/codegen/codegen_cuda.h index 74d6fba35fc7..efb300415b56 100644 --- a/src/codegen/codegen_cuda.h +++ b/src/codegen/codegen_cuda.h @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -47,13 +47,13 @@ class CodeGenCUDA final : public CodeGenC { void PrintStorageSync(const Call* op) final; void PrintStorageScope(const std::string& scope, std::ostream& os) final; // NOLINT(*) void PrintVecBinaryOp( - const std::string&op, DataType t, + const std::string&op, Type t, Expr lhs, Expr rhs, std::ostream& os) final; // NOLINT(*) - void PrintType(DataType t, std::ostream& os) final; // NOLINT(*) + void PrintType(Type t, std::ostream& os) final; // NOLINT(*) void PrintVecElemLoad( - const std::string& vec, DataType t, int i, std::ostream& os) final; // NOLINT(*) + const std::string& vec, Type t, int i, std::ostream& os) final; // NOLINT(*) void PrintVecElemStore( - const std::string& vec, DataType t, int i, const std::string& value) final; + const std::string& vec, Type t, int i, const std::string& value) final; void BindThreadIndex(const IterVar& iv) final; // NOLINT(*) // overload visitor void VisitExpr_(const Ramp* op, std::ostream& os) final; // NOLINT(*) @@ -84,10 +84,8 @@ class CodeGenCUDA final : public CodeGenC { std::unordered_map fragment_shapes; std::unordered_map fragment_layouts; friend void PrintConst(const FloatImm* op, std::ostream& os, CodeGenCUDA* p); - void PrintWmmaScope( - const std::string& scope, DataType t, const Variable* variable, std::ostream& os); - int32_t GetWmmaFragmentSize( - const std::string &scope, const Variable* variable, int32_t size); + void PrintWmmaScope(const std::string& scope, Type t, const Variable* variable, std::ostream& os); + int32_t GetWmmaFragmentSize(const std::string &scope, const Variable* variable, int32_t size); }; } // namespace codegen diff --git a/src/codegen/codegen_metal.cc b/src/codegen/codegen_metal.cc index f4ff014c7297..311bdcbfa8d4 100644 --- a/src/codegen/codegen_metal.cc +++ b/src/codegen/codegen_metal.cc @@ -36,7 +36,7 @@ void CodeGenMetal::InitFuncState(LoweredFunc f) { CodeGenC::InitFuncState(f); // analyze the data; for (Var arg : f->args) { - if (arg.dtype().is_handle()) { + if (arg.type().is_handle()) { alloc_storage_scope_[arg.get()] = "global"; } } @@ -57,7 +57,7 @@ void CodeGenMetal::AddFunction(LoweredFunc f) { GetUniqueName("_"); // add to alloc buffer type. for (const auto & kv : f->handle_data_type) { - RegisterHandleType(kv.first.get(), kv.second.dtype()); + RegisterHandleType(kv.first.get(), kv.second.type()); } // Function header. this->stream << "kernel void " << f->name << "(\n"; @@ -65,7 +65,7 @@ void CodeGenMetal::AddFunction(LoweredFunc f) { size_t num_buffer = 0; for (size_t i = 0; i < f->args.size(); ++i, ++num_buffer) { Var v = f->args[i]; - if (!v.dtype().is_handle()) break; + if (!v.type().is_handle()) break; stream << " "; std::string vid = AllocVarID(v.get()); auto it = alloc_storage_scope_.find(v.get()); @@ -76,7 +76,7 @@ void CodeGenMetal::AddFunction(LoweredFunc f) { PrintType(handle_data_type_.at(v.get()), stream); stream << "*"; } else { - PrintType(v.dtype(), stream); + PrintType(v.type(), stream); } stream << ' ' << vid << " [[ buffer(" << i << ") ]],\n"; @@ -92,19 +92,19 @@ void CodeGenMetal::AddFunction(LoweredFunc f) { decl_stream << "struct " << arg_buf_type << " {\n"; for (size_t i = num_buffer; i < f->args.size(); ++i) { Var v = f->args[i]; - CHECK(!v.dtype().is_handle()); + CHECK(!v.type().is_handle()); std::string vid = AllocVarID(v.get()); std::ostringstream vref; - if (v.dtype().bits() == 32) { + if (v.type().bits() == 32) { decl_stream << " "; - PrintType(v.dtype(), decl_stream); + PrintType(v.type(), decl_stream); decl_stream << " " << vid << ";\n"; vref << varg << "." << vid; } else { // For non 32bit type, ref through arg union. decl_stream << " __TVMArgUnion " << vid << ";\n"; vref << varg << "." << vid << ".v_"; - PrintType(v.dtype(), vref); + PrintType(v.type(), vref); } var_idmap_[v.get()] = vref.str(); } @@ -121,10 +121,10 @@ void CodeGenMetal::AddFunction(LoweredFunc f) { if (work_dim != 0) { // use ushort by default for now stream << " "; - PrintType(DataType::UInt(thread_index_bits_, work_dim), stream); + PrintType(UInt(thread_index_bits_, work_dim), stream); stream << " blockIdx [[threadgroup_position_in_grid]],\n"; stream << " "; - PrintType(DataType::UInt(thread_index_bits_, work_dim), stream); + PrintType(UInt(thread_index_bits_, work_dim), stream); stream << " threadIdx [[thread_position_in_threadgroup]]\n"; } // bind thread axis @@ -135,7 +135,7 @@ void CodeGenMetal::AddFunction(LoweredFunc f) { vname = vname.substr(0, iv->thread_tag.length() - 2); } var_idmap_[iv->var.get()] = - CastFromTo(vname, DataType::UInt(thread_index_bits_), iv->var.dtype()); + CastFromTo(vname, UInt(thread_index_bits_), iv->var.type()); } // the function scope. stream << ") {\n"; @@ -149,17 +149,17 @@ void CodeGenMetal::AddFunction(LoweredFunc f) { void CodeGenMetal::BindThreadIndex(const IterVar& iv) { CHECK(!var_idmap_.count(iv->var.get())); var_idmap_[iv->var.get()] = - CastFromTo(iv->thread_tag, DataType::UInt(thread_index_bits_), iv->var.dtype()); + CastFromTo(iv->thread_tag, UInt(thread_index_bits_), iv->var.type()); } -void CodeGenMetal::PrintType(DataType t, std::ostream& os) { // NOLINT(*) +void CodeGenMetal::PrintType(Type t, std::ostream& os) { // NOLINT(*) int lanes = t.lanes(); if (t.is_handle()) { CHECK_EQ(lanes, 1) << "do not yet support vector types"; os << "void*"; return; } - if (t == DataType::Bool()) { + if (t == Bool()) { os << "bool"; return; } bool fail = false; @@ -210,13 +210,13 @@ void CodeGenMetal::PrintStorageSync(const Call* op) { } void CodeGenMetal::PrintVecElemLoad(const std::string& vec, - DataType t, int i, + Type t, int i, std::ostream& os) { // NOLINT(*) os << vec << "[" << i << "]"; } void CodeGenMetal::PrintVecElemStore(const std::string& vec, - DataType t, int i, + Type t, int i, const std::string& value) { this->PrintIndent(); stream << vec << "[" << i << "]" @@ -236,7 +236,7 @@ void CodeGenMetal::PrintStorageScope( void CodeGenMetal::VisitExpr_(const Broadcast* op, std::ostream& os) { // NOLINT(*) std::string v = PrintExpr(op->value); - PrintType(op->dtype, os); + PrintType(op->type, os); os << "("; for (int i = 0; i < op->lanes; ++i) { if (i != 0) os << ", "; @@ -249,7 +249,7 @@ void CodeGenMetal::VisitExpr_(const Call* op, std::ostream& os) { // NOLINT(*) if (op->is_intrinsic(Call::reinterpret)) { // generate as_type(ARG) os << "(as_type<"; - this->PrintType(op->dtype, os); + this->PrintType(op->type, os); os << ">("; this->PrintExpr(op->args[0], os); os << "))"; diff --git a/src/codegen/codegen_metal.h b/src/codegen/codegen_metal.h index 728e3e07a916..c009cd1e9169 100644 --- a/src/codegen/codegen_metal.h +++ b/src/codegen/codegen_metal.h @@ -41,14 +41,14 @@ class CodeGenMetal final : public CodeGenC { void InitFuncState(LoweredFunc f) final; void PrintStorageScope(const std::string& scope, std::ostream& os) final; // NOLINT(*) void PrintStorageSync(const Call* op) final; // NOLINT(*) - void PrintType(DataType t, std::ostream& os) final; // NOLINT(*) + void PrintType(Type t, std::ostream& os) final; // NOLINT(*) void BindThreadIndex(const IterVar& iv) final; // NOLINT(*) // print load of single element void PrintVecElemLoad( - const std::string& vec, DataType t, int i, std::ostream& os) final; // NOLINT(*) + const std::string& vec, Type t, int i, std::ostream& os) final; // NOLINT(*) // print store of single element. void PrintVecElemStore( - const std::string& vec, DataType t, int i, const std::string& value) final; + const std::string& vec, Type t, int i, const std::string& value) final; // overload visitor void VisitExpr_(const Broadcast* op, std::ostream& os) final; // NOLINT(*) diff --git a/src/codegen/codegen_opencl.cc b/src/codegen/codegen_opencl.cc index ae434197400f..49dccb173ed3 100644 --- a/src/codegen/codegen_opencl.cc +++ b/src/codegen/codegen_opencl.cc @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -39,7 +39,7 @@ CodeGenOpenCL::CodeGenOpenCL() { void CodeGenOpenCL::InitFuncState(LoweredFunc f) { CodeGenC::InitFuncState(f); for (Var arg : f->args) { - if (arg.dtype().is_handle()) { + if (arg.type().is_handle()) { alloc_storage_scope_[arg.get()] = "global"; } } @@ -89,17 +89,17 @@ void CodeGenOpenCL::BindThreadIndex(const IterVar& iv) { os << "get_group_id(" << ts.dim_index << ")"; } var_idmap_[iv->var.get()] = - CastFromTo(os.str(), DataType::UInt(64), iv->var.dtype()); + CastFromTo(os.str(), UInt(64), iv->var.type()); } -void CodeGenOpenCL::PrintType(DataType t, std::ostream& os) { // NOLINT(*) +void CodeGenOpenCL::PrintType(Type t, std::ostream& os) { // NOLINT(*) int lanes = t.lanes(); if (t.is_handle()) { CHECK_EQ(lanes, 1) << "do not yet support vector types"; os << "void*"; return; } - if (t == DataType::Bool()) { + if (t == Bool()) { os << "bool"; return; } bool fail = false; @@ -144,7 +144,7 @@ void CodeGenOpenCL::PrintType(DataType t, std::ostream& os) { // NOLINT(*) LOG(FATAL) << "Cannot convert type " << t << " to OpenCL type"; } -void CodeGenOpenCL::PrintVecAddr(const Variable* buffer, DataType t, +void CodeGenOpenCL::PrintVecAddr(const Variable* buffer, Type t, Expr base, std::ostream& os) { // NOLINT(*) if (!HandleTypeMatch(buffer, t.element_of())) { os << '('; @@ -160,7 +160,7 @@ void CodeGenOpenCL::PrintVecAddr(const Variable* buffer, DataType t, PrintExpr(base, os); } std::string CodeGenOpenCL::GetVecLoad( - DataType t, const Variable* buffer, Expr base) { + Type t, const Variable* buffer, Expr base) { std::ostringstream os; os << "vload" << t.lanes() << "(0, "; PrintVecAddr(buffer, t, base, os); @@ -169,7 +169,7 @@ std::string CodeGenOpenCL::GetVecLoad( } void CodeGenOpenCL::PrintVecStore(const Variable* buffer, - DataType t, Expr base, + Type t, Expr base, const std::string& value) { this->PrintIndent(); stream << "vstore" << t.lanes() << "(" << value << ", 0, "; @@ -199,7 +199,7 @@ void CodeGenOpenCL::PrintStorageScope( } } -std::string CodeGenOpenCL::CastFromTo(std::string value, DataType from, DataType target) { +std::string CodeGenOpenCL::CastFromTo(std::string value, Type from, Type target) { if (from == target) return value; std::ostringstream os; if (target.lanes() == 1) { @@ -218,7 +218,7 @@ std::string CodeGenOpenCL::CastFromTo(std::string value, DataType from, DataType void CodeGenOpenCL::VisitExpr_(const Broadcast* op, std::ostream& os) { // NOLINT(*) std::string v = PrintExpr(op->value); os << "(("; - PrintType(op->dtype, os); + PrintType(op->type, os); os << ")("; for (int i = 0; i < op->lanes; ++i) { if (i != 0) os << ", "; @@ -232,7 +232,7 @@ void CodeGenOpenCL::VisitExpr_(const Call *op, std::ostream& os) { // NOLINT(*) * add a cast */ if (op->is_intrinsic(intrinsic::tvm_if_then_else)) { os << "("; - PrintType(op->args[2].dtype(), os); + PrintType(op->args[2].type(), os); os << ")"; } CodeGenC::VisitExpr_(op, os); @@ -242,7 +242,7 @@ void CodeGenOpenCL::VisitExpr_(const Select* op, std::ostream& os) { // NOLINT( /* Return type of ternary expression is not always same as its sub-expressions, * add a cast */ os << "("; - PrintType(op->true_value.dtype(), os); + PrintType(op->true_value.type(), os); os << ")"; CodeGenC::VisitExpr_(op, os); } diff --git a/src/codegen/codegen_opencl.h b/src/codegen/codegen_opencl.h index 36324eb431ae..32f4501276e7 100644 --- a/src/codegen/codegen_opencl.h +++ b/src/codegen/codegen_opencl.h @@ -43,16 +43,16 @@ class CodeGenOpenCL final : public CodeGenC { void BindThreadIndex(const IterVar& iv) final; // NOLINT(*) void PrintStorageScope(const std::string& scope, std::ostream& os) final; // NOLINT(*) void PrintStorageSync(const Call* op) final; // NOLINT(*) - void PrintType(DataType t, std::ostream& os) final; // NOLINT(*) - std::string GetVecLoad(DataType t, const Variable* buffer, + void PrintType(Type t, std::ostream& os) final; // NOLINT(*) + std::string GetVecLoad(Type t, const Variable* buffer, Expr base) final; void PrintVecStore(const Variable* buffer, - DataType t, Expr base, + Type t, Expr base, const std::string& value) final; // NOLINT(*) // the address of load/store - void PrintVecAddr(const Variable* buffer, DataType t, + void PrintVecAddr(const Variable* buffer, Type t, Expr base, std::ostream& os); // NOLINT(*) - std::string CastFromTo(std::string value, DataType from, DataType target); // NOLINT(*) + std::string CastFromTo(std::string value, Type from, Type target); // NOLINT(*) // overload visitor void VisitExpr_(const Broadcast* op, std::ostream& os) final; // NOLINT(*) diff --git a/src/codegen/codegen_opengl.cc b/src/codegen/codegen_opengl.cc index db14be3b395e..52e04db12480 100644 --- a/src/codegen/codegen_opengl.cc +++ b/src/codegen/codegen_opengl.cc @@ -59,7 +59,7 @@ void CodeGenOpenGL::AddFunction(LoweredFunc f) { GetUniqueName("_"); // add to alloc buffer type. for (const auto& kv : f->handle_data_type) { - RegisterHandleType(kv.first.get(), kv.second.dtype()); + RegisterHandleType(kv.first.get(), kv.second.type()); } // Allocate argument names. Store in `var_idmap_`. @@ -93,7 +93,7 @@ void CodeGenOpenGL::AddFunction(LoweredFunc f) { auto type_it = this->handle_data_type_.find(arg.get()); CHECK(type_it != this->handle_data_type_.cend()) << "Cannot find type."; - DLDataType type = type_it->second; + auto type = Type2TVMType(type_it->second); CHECK_EQ(type.lanes, 1) << "Vector type not supported."; switch (type.code) { @@ -129,7 +129,7 @@ void CodeGenOpenGL::AddFunction(LoweredFunc f) { // Format: "uniform {type} {name};" auto arg_name = GetVarID(arg.get()); - auto type = arg.get()->dtype; + auto type = arg.get()->type; this->decl_stream << "uniform "; PrintType(type, this->decl_stream); @@ -207,7 +207,7 @@ std::string CodeGenOpenGL::TexelFetch(const Variable* buffer, Expr index) { // Print a reference expression to a buffer. // Format: texelFetch(buffer, index, 0).r std::string CodeGenOpenGL::GetBufferRef( - DataType t, const Variable* buffer, Expr index) { + Type t, const Variable* buffer, Expr index) { CHECK_EQ(t.lanes(), 1) << "Vector type not supported."; CHECK(HandleTypeMatch(buffer, t)) << "Type mismatch not supported."; @@ -221,7 +221,7 @@ std::string CodeGenOpenGL::GetBufferRef( } } -void CodeGenOpenGL::PrintType(DataType t, std::ostream& os) { +void CodeGenOpenGL::PrintType(Type t, std::ostream& os) { switch (t.code()) { case kDLInt: CHECK_EQ(t.bits(), 32) << "Only support 32-bit int."; @@ -243,17 +243,17 @@ void CodeGenOpenGL::PrintType(DataType t, std::ostream& os) { // Codegen for immediate values void CodeGenOpenGL::VisitExpr_(const IntImm* op, std::ostream& os) { - CHECK_EQ(op->dtype, DataType::Int(32)) << "GLSL 3.0 only supports 32-bit ints."; + CHECK_EQ(op->type, Int(32)) << "GLSL 3.0 only supports 32-bit ints."; CodeGenC::VisitExpr_(op, os); } void CodeGenOpenGL::VisitExpr_(const UIntImm* op, std::ostream& os) { - CHECK_EQ(op->dtype, DataType::UInt(32)) << "GLSL 3.0 only supports 32-bit uints."; + CHECK_EQ(op->type, UInt(32)) << "GLSL 3.0 only supports 32-bit uints."; CodeGenC::VisitExpr_(op, os); } void CodeGenOpenGL::VisitExpr_(const FloatImm* op, std::ostream& os) { - CHECK_EQ(op->dtype, DataType::Float(32)) << "GLSL 3.0 only supports 32-bit floats."; + CHECK_EQ(op->type, Float(32)) << "GLSL 3.0 only supports 32-bit floats."; CodeGenC::VisitExpr_(op, os); } @@ -273,7 +273,7 @@ void CodeGenOpenGL::VisitStmt_(const Evaluate* op) { auto value = call->args[1]; // Doesn't support store to vector. - auto type = value.dtype(); + auto type = value.type(); CHECK_EQ(type.lanes(), 1) << "Vectorized store not implemented, type = " << type; diff --git a/src/codegen/codegen_opengl.h b/src/codegen/codegen_opengl.h index 46e87a8165c1..d18052f5f46c 100644 --- a/src/codegen/codegen_opengl.h +++ b/src/codegen/codegen_opengl.h @@ -45,8 +45,8 @@ class CodeGenOpenGL final : public CodeGenC { void BindThreadIndex(const IterVar& iv) final; void VisitStmt_(const Store* op) final; std::string TexelFetch(const Variable* buffer, Expr index); - std::string GetBufferRef(DataType t, const Variable* buffer, Expr index) final; - void PrintType(DataType t, std::ostream& os) final; // NOLINT(*) + std::string GetBufferRef(Type t, const Variable* buffer, Expr index) final; + void PrintType(Type t, std::ostream& os) final; // NOLINT(*) // Codegen for immediate values void VisitExpr_(const IntImm* op, std::ostream& os) final; // NOLINT(*) diff --git a/src/codegen/codegen_source_base.cc b/src/codegen/codegen_source_base.cc index 7c4ed5b91c8b..9a9f525d40f1 100644 --- a/src/codegen/codegen_source_base.cc +++ b/src/codegen/codegen_source_base.cc @@ -52,7 +52,7 @@ std::string CodeGenSourceBase::GetUniqueName(std::string prefix) { return prefix; } -std::string CodeGenSourceBase::SSAGetID(std::string src, DataType t) { +std::string CodeGenSourceBase::SSAGetID(std::string src, Type t) { if (name_alloc_map_.count(src)) return src; auto it = ssa_assign_map_.find(src); if (it != ssa_assign_map_.end()) { diff --git a/src/codegen/codegen_source_base.h b/src/codegen/codegen_source_base.h index 7fd0eef98a90..e0608c6afbde 100644 --- a/src/codegen/codegen_source_base.h +++ b/src/codegen/codegen_source_base.h @@ -79,7 +79,7 @@ class CodeGenSourceBase { * \param src The source expression * \param t The type of the expression. */ - std::string SSAGetID(std::string src, DataType t); + std::string SSAGetID(std::string src, Type t); /*! * \brief get a unique name with the corresponding prefix * \param prefix The prefix of the name @@ -103,7 +103,7 @@ class CodeGenSourceBase { * \param t The type of target. */ virtual void PrintSSAAssign( - const std::string& target, const std::string& src, DataType t) = 0; + const std::string& target, const std::string& src, Type t) = 0; /*! \brief the declaration stream */ std::ostringstream decl_stream; diff --git a/src/codegen/codegen_vhls.cc b/src/codegen/codegen_vhls.cc index 40550d9f9916..84329f90ddfc 100644 --- a/src/codegen/codegen_vhls.cc +++ b/src/codegen/codegen_vhls.cc @@ -37,7 +37,7 @@ void CodeGenVivadoHLS::Init(bool output_ssa) { this->stream << "#include \n\n"; } -void CodeGenVivadoHLS::PrintType(DataType t, std::ostream& os) { +void CodeGenVivadoHLS::PrintType(Type t, std::ostream& os) { if (t.is_uint()) { switch (t.bits()) { case 8: @@ -78,7 +78,7 @@ void CodeGenVivadoHLS::PreFunctionBody(LoweredFunc f) { for (size_t i = 0; i < f->args.size(); ++i) { Var v = f->args[i]; std::string vid = GetVarID(v.get()); - if (v.dtype().is_handle()) { + if (v.type().is_handle()) { this->stream << "#pragma HLS INTERFACE m_axi port=" << vid << " offset=slave bundle=gmem\n"; } this->stream << "#pragma HLS INTERFACE s_axilite port=" << vid << " bundle=control\n"; @@ -100,8 +100,8 @@ inline void PrintBinaryExpr(const T* op, void CodeGenVivadoHLS::VisitExpr_(const Min *op, std::ostream& os) { // NOLINT(*) const char *opstr = "std::min"; - if (op->dtype.is_float()) { - switch (op->dtype.bits()) { + if (op->type.is_float()) { + switch (op->type.bits()) { case 32: opstr = "fminf"; break; case 64: @@ -114,8 +114,8 @@ void CodeGenVivadoHLS::VisitExpr_(const Min *op, std::ostream& os) { // NOLINT( void CodeGenVivadoHLS::VisitExpr_(const Max *op, std::ostream& os) { // NOLINT(*) const char *opstr = "std::max"; - if (op->dtype.is_float()) { - switch (op->dtype.bits()) { + if (op->type.is_float()) { + switch (op->type.bits()) { case 32: opstr = "fmaxf"; break; case 64: diff --git a/src/codegen/codegen_vhls.h b/src/codegen/codegen_vhls.h index e678edb05198..4ec7b105385d 100644 --- a/src/codegen/codegen_vhls.h +++ b/src/codegen/codegen_vhls.h @@ -35,7 +35,7 @@ namespace codegen { class CodeGenVivadoHLS final : public CodeGenC { public: void Init(bool output_ssa); - void PrintType(DataType t, std::ostream& os); + void PrintType(Type t, std::ostream& os); void AddFunction(LoweredFunc f); void PreFunctionBody(LoweredFunc f); void VisitExpr_(const Min *op, std::ostream& os); diff --git a/src/codegen/intrin_rule.cc b/src/codegen/intrin_rule.cc index 219b485387d5..f765c0095ce1 100644 --- a/src/codegen/intrin_rule.cc +++ b/src/codegen/intrin_rule.cc @@ -57,7 +57,7 @@ TVM_REGISTER_GLOBAL("tvm.intrin.rule.default.rsqrt") const Call* call = e.as(); CHECK(call != nullptr); - auto one = make_const(call->args[0].dtype(), 1); + auto one = make_const(call->args[0].type(), 1); *rv = one / sqrt(call->args[0]); }); @@ -70,7 +70,7 @@ TVM_REGISTER_GLOBAL("tvm.intrin.rule.default.sigmoid") const Call* call = e.as(); CHECK(call != nullptr); - auto one = make_const(call->args[0].dtype(), 1); + auto one = make_const(call->args[0].type(), 1); *rv = one / (one + exp(-call->args[0])); }); diff --git a/src/codegen/intrin_rule.h b/src/codegen/intrin_rule.h index 581387da69cf..9f3bd793dd39 100644 --- a/src/codegen/intrin_rule.h +++ b/src/codegen/intrin_rule.h @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -37,10 +37,10 @@ using namespace ir; // Add float suffix to the intrinsics struct FloatSuffix { - std::string operator()(DataType t, std::string name) const { - if (t == DataType::Float(32)) { + std::string operator()(Type t, std::string name) const { + if (t == Float(32)) { return name + 'f'; - } else if (t == DataType::Float(64)) { + } else if (t == Float(64)) { return name; } else { return ""; @@ -50,7 +50,7 @@ struct FloatSuffix { // Return the intrinsic name struct Direct { - std::string operator()(DataType t, std::string name) const { + std::string operator()(Type t, std::string name) const { return name; } }; @@ -61,10 +61,10 @@ inline void DispatchExtern(const TVMArgs& args, TVMRetValue* rv) { Expr e = args[0]; const Call* call = e.as(); CHECK(call != nullptr); - std::string name = T()(call->dtype, call->name); + std::string name = T()(call->type, call->name); if (name.length() != 0) { *rv = Call::make( - call->dtype, name, call->args, Call::PureExtern); + call->type, name, call->args, Call::PureExtern); } else { *rv = e; } diff --git a/src/codegen/intrin_rule_cuda.cc b/src/codegen/intrin_rule_cuda.cc index 3f6bc7ba1d06..4fed20fce51d 100644 --- a/src/codegen/intrin_rule_cuda.cc +++ b/src/codegen/intrin_rule_cuda.cc @@ -28,7 +28,7 @@ namespace codegen { namespace intrin { // Add float suffix to the intrinsics, CUDA fast math. struct CUDAMath { - std::string operator()(DataType t, std::string name) const { + std::string operator()(Type t, std::string name) const { if (t.lanes() == 1) { if (t.is_float()) { switch (t.bits()) { @@ -44,7 +44,7 @@ struct CUDAMath { }; struct CUDAFastMath : public CUDAMath { - std::string operator()(DataType t, std::string name) const { + std::string operator()(Type t, std::string name) const { if (t.lanes() == 1 && t.is_float() && t.bits() == 32) { return "__" + name + 'f'; } else { @@ -55,7 +55,7 @@ struct CUDAFastMath : public CUDAMath { }; struct CUDAPopcount { - std::string operator()(DataType t, std::string name) const { + std::string operator()(Type t, std::string name) const { if (t.lanes() == 1 && t.is_uint()) { switch (t.bits()) { case 32: return "__popc"; @@ -68,7 +68,7 @@ struct CUDAPopcount { }; struct CUDAShuffle { - std::string operator()(DataType t, std::string name) const { + std::string operator()(Type t, std::string name) const { return "__shfl"; } }; diff --git a/src/codegen/intrin_rule_opencl.cc b/src/codegen/intrin_rule_opencl.cc index 4b1d4033c16f..246747cc361d 100644 --- a/src/codegen/intrin_rule_opencl.cc +++ b/src/codegen/intrin_rule_opencl.cc @@ -66,7 +66,7 @@ TVM_REGISTER_GLOBAL("tvm.intrin.rule.opencl.fmod") // There is no warp shuffle instruction in standard OpenCL // When shuffle is used, we assume it is intel's shuffle extension struct IntelShuffle { - std::string operator()(DataType t, std::string name) const { + std::string operator()(Type t, std::string name) const { return "intel_sub_group_shuffle"; } }; diff --git a/src/codegen/llvm/codegen_amdgpu.cc b/src/codegen/llvm/codegen_amdgpu.cc index f57a3ca869ef..491a304983c6 100644 --- a/src/codegen/llvm/codegen_amdgpu.cc +++ b/src/codegen/llvm/codegen_amdgpu.cc @@ -82,7 +82,7 @@ class CodeGenAMDGPU : public CodeGenLLVM { << "Can only handle constant size stack allocation in GPU"; StorageInfo& info = alloc_storage_info_[op->buffer_var.get()]; if (constant_size % 4 == 0 && info.alignment == 0) { - info.alignment = GetTempAllocaAlignment(op->dtype, constant_size); + info.alignment = GetTempAllocaAlignment(op->type, constant_size); } // maximum necessary alignment in the AMD devices if (info.alignment > 16) { @@ -93,7 +93,7 @@ class CodeGenAMDGPU : public CodeGenLLVM { // TODO(tqchen): for higher version of LLVM, local address space can be set. llvm::AllocaInst* alloca = WithFunctionEntry([&]() { return builder_->CreateAlloca( - LLVMType(op->dtype), ConstInt32(constant_size)); + LLVMType(op->type), ConstInt32(constant_size)); }); if (alloca->getAlignment() < static_cast(info.alignment)) { #if TVM_LLVM_VERSION >= 100 @@ -108,7 +108,7 @@ class CodeGenAMDGPU : public CodeGenLLVM { << "Can only allocate shared or local memory inside kernel"; // Shared memory: address space == 3 const unsigned shared_address_space = 3; - llvm::Type* type = llvm::ArrayType::get(LLVMType(op->dtype), constant_size); + llvm::Type* type = llvm::ArrayType::get(LLVMType(op->type), constant_size); // Allocate shared memory in global, address_space = 3 llvm::GlobalVariable *global = new llvm::GlobalVariable( *module_, type, false, llvm::GlobalValue::PrivateLinkage, 0, ".shared", @@ -122,7 +122,7 @@ class CodeGenAMDGPU : public CodeGenLLVM { } } buf = builder_->CreatePointerCast( - buf, LLVMType(op->dtype)->getPointerTo( + buf, LLVMType(op->type)->getPointerTo( buf->getType()->getPointerAddressSpace())); CHECK(!var_map_.count(op->buffer_var.get())); var_map_[op->buffer_var.get()] = buf; diff --git a/src/codegen/llvm/codegen_arm.cc b/src/codegen/llvm/codegen_arm.cc index 4c092dfe377a..9b21455605c3 100644 --- a/src/codegen/llvm/codegen_arm.cc +++ b/src/codegen/llvm/codegen_arm.cc @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -61,14 +61,14 @@ Expr CodeGenARM::ARMPopcount(const Call *call) { ::llvm::Intrinsic::ID vpaddlu_id = ::llvm::Intrinsic::arm_neon_vpaddlu; // Fallback to default llvm lowering rule if input type not a full vector or half vector length - int total_size = call->dtype.bits() * call->dtype.lanes(); - if (!call->dtype.is_vector() || call->dtype.bits() == 8 || + int total_size = call->type.bits() * call->type.lanes(); + if (!call->type.is_vector() || call->type.bits() == 8 || (total_size != 128 && total_size != 64)) { Array vcnt_args; - vcnt_args.push_back(ir::UIntImm::make(DataType::UInt(32), ctpop_id)); - vcnt_args.push_back(ir::UIntImm::make(DataType::UInt(32), 1)); + vcnt_args.push_back(ir::UIntImm::make(UInt(32), ctpop_id)); + vcnt_args.push_back(ir::UIntImm::make(UInt(32), 1)); vcnt_args.push_back(e); - return ir::Call::make(call->dtype, "llvm_intrin", vcnt_args, Call::PureIntrinsic); + return ir::Call::make(call->type, "llvm_intrin", vcnt_args, Call::PureIntrinsic); } // Popcount lowering rule: @@ -77,12 +77,9 @@ Expr CodeGenARM::ARMPopcount(const Call *call) { // to return back to original input type // Dvisions are always divisible (number of bits = 64 or 128) - DataType uint8_type = DataType( - e.dtype().code(), 8, e.dtype().bits() * e.dtype().lanes() / 8); - DataType uint16_type = DataType( - uint8_type.code(), 16, uint8_type.bits() * uint8_type.lanes() / 16); - DataType uint32_type = DataType( - uint16_type.code(), 32, uint8_type.bits() * uint8_type.lanes() / 32); + Type uint8_type = Type(e.type().code(), 8, e.type().bits() * e.type().lanes() / 8); + Type uint16_type = Type(uint8_type.code(), 16, uint8_type.bits() * uint8_type.lanes() / 16); + Type uint32_type = Type(uint16_type.code(), 32, uint8_type.bits() * uint8_type.lanes() / 32); // Interpret input as vector of 8bit values Expr input8 = reinterpret(uint8_type, e); @@ -90,37 +87,37 @@ Expr CodeGenARM::ARMPopcount(const Call *call) { const Call* c0 = input8.as(); CHECK(c0 != nullptr); Array vcnt8_args; - vcnt8_args.push_back(ir::UIntImm::make(DataType::UInt(32), ctpop_id)); - vcnt8_args.push_back(ir::UIntImm::make(DataType::UInt(32), 1)); + vcnt8_args.push_back(ir::UIntImm::make(UInt(32), ctpop_id)); + vcnt8_args.push_back(ir::UIntImm::make(UInt(32), 1)); vcnt8_args.push_back(input8); Expr vcnt8 = ir::Call::make(uint8_type, "llvm_intrin", vcnt8_args, Call::PureIntrinsic); // Accumulation 8->16bit Array vcnt16_args; - vcnt16_args.push_back(ir::UIntImm::make(DataType::UInt(32), vpaddlu_id)); - vcnt16_args.push_back(ir::UIntImm::make(DataType::UInt(32), 1)); + vcnt16_args.push_back(ir::UIntImm::make(UInt(32), vpaddlu_id)); + vcnt16_args.push_back(ir::UIntImm::make(UInt(32), 1)); vcnt16_args.push_back(vcnt8); Expr vcnt16 = ir::Call::make(uint16_type, "llvm_intrin", vcnt16_args, Call::PureIntrinsic); - if (call->dtype.bits() == 16) { + if (call->type.bits() == 16) { return vcnt16; } // Accumulation 16->32bit Array vcnt32_args; - vcnt32_args.push_back(ir::UIntImm::make(DataType::UInt(32), vpaddlu_id)); - vcnt32_args.push_back(ir::UIntImm::make(DataType::UInt(32), 1)); + vcnt32_args.push_back(ir::UIntImm::make(UInt(32), vpaddlu_id)); + vcnt32_args.push_back(ir::UIntImm::make(UInt(32), 1)); vcnt32_args.push_back(vcnt16); Expr vcnt32 = ir::Call::make(uint32_type, "llvm_intrin", vcnt32_args, Call::PureIntrinsic); - if (call->dtype.bits() == 32) { + if (call->type.bits() == 32) { return vcnt32; } // Accumulation 32->64bit Array vcnt64_args; - vcnt64_args.push_back(ir::UIntImm::make(DataType::UInt(32), vpaddlu_id)); - vcnt64_args.push_back(ir::UIntImm::make(DataType::UInt(32), 1)); + vcnt64_args.push_back(ir::UIntImm::make(UInt(32), vpaddlu_id)); + vcnt64_args.push_back(ir::UIntImm::make(UInt(32), 1)); vcnt64_args.push_back(vcnt32); - return ir::Call::make(call->dtype, "llvm_intrin", vcnt64_args, Call::PureIntrinsic); + return ir::Call::make(call->type, "llvm_intrin", vcnt64_args, Call::PureIntrinsic); } TVM_REGISTER_GLOBAL("tvm.codegen.llvm.target_arm") diff --git a/src/codegen/llvm/codegen_cpu.cc b/src/codegen/llvm/codegen_cpu.cc index 9f1a2926f002..0ba0c584a590 100644 --- a/src/codegen/llvm/codegen_cpu.cc +++ b/src/codegen/llvm/codegen_cpu.cc @@ -43,7 +43,7 @@ void CodeGenCPU::Init(const std::string& module_name, func_handle_map_.clear(); export_system_symbols_.clear(); // TVM runtime types - t_tvm_shape_index_ = llvm::Type::getIntNTy(*ctx, DataType::ShapeIndex().bits()); + t_tvm_shape_index_ = llvm::Type::getIntNTy(*ctx, TVMShapeIndexType().bits()); t_tvm_context_ = llvm::StructType::create({t_int_, t_int_}); t_tvm_type_ = llvm::StructType::create({t_int8_, t_int8_, t_int16_}); t_tvm_func_handle_ = t_void_p_; @@ -252,7 +252,7 @@ std::unique_ptr CodeGenCPU::Finish() { return CodeGenLLVM::Finish(); } llvm::Value* CodeGenCPU::CreateStructRefPtr( - DataType t, llvm::Value* buf, llvm::Value* index, int kind) { + Type t, llvm::Value* buf, llvm::Value* index, int kind) { if (kind < intrinsic::kArrKindBound_) { if (buf->getType() == t_void_p_) { buf = builder_->CreatePointerCast(buf, t_tvm_array_->getPointerTo()); @@ -329,7 +329,7 @@ llvm::Value* CodeGenCPU::CreateCallExtern(const Call* op) { arg_types.push_back(v->getType()); } llvm::FunctionType* ftype = llvm::FunctionType::get( - LLVMType(op->dtype), arg_types, false); + LLVMType(op->type), arg_types, false); // Check if it is available in global function table as injected function. auto it = gv_func_map_.find(op->name); if (it != gv_func_map_.end()) { @@ -448,7 +448,7 @@ void CodeGenCPU::CreateComputeScope(const AttrStmt* op) { llvm::Argument* v = &(*it); const Var& var = vargs[idx]; new_vmap[var.get()] = v; - if (var.dtype().is_handle() && !alias_var_set_.count(var.get())) { + if (var.type().is_handle() && !alias_var_set_.count(var.get())) { // set non alias. #if TVM_LLVM_VERSION >= 50 fcompute->addParamAttr(idx, llvm::Attribute::NoAlias); @@ -532,8 +532,8 @@ void CodeGenCPU::CreateParallelLaunch(const Stmt& body, int num_task) { UnpackClosureData(cdata, vfields, &new_vmap); // setup parallel env ParallelEnv par_env; - par_env.task_id = Var("task_id", DataType::Int(32)); - par_env.num_task = Var("num_task", DataType::Int(32)); + par_env.task_id = Var("task_id", Int(32)); + par_env.num_task = Var("num_task", Int(32)); new_vmap[par_env.task_id.get()] = task_id; new_vmap[par_env.num_task.get()] = builder_->CreateLoad( builder_->CreateInBoundsGEP( @@ -670,7 +670,7 @@ llvm::Value* CodeGenCPU::GetPackedFuncHandle(const std::string& fname) { llvm::BasicBlock * CodeGenCPU::MakeCallPacked(const Array &args, llvm::Value **rvalue, - llvm::Value **ret_tcode, const DataType &r_type, + llvm::Value **ret_tcode, const Type &r_type, const int64_t begin, const int64_t end) { using llvm::BasicBlock; std::string func_name = args[0].as()->value; @@ -684,15 +684,15 @@ CodeGenCPU::MakeCallPacked(const Array &args, llvm::Value **rvalue, builder_->CreatePointerCast(stack_value, t_tvm_value_->getPointerTo()), ConstInt32(begin)); llvm::Value *arg_tcode = - CreateBufferPtr(DataType::Int(32), stack_tcode, ConstInt32(begin)); + CreateBufferPtr(Int(32), stack_tcode, ConstInt32(begin)); llvm::Value *ret_value = builder_->CreateInBoundsGEP( builder_->CreatePointerCast(stack_value, t_tvm_value_->getPointerTo()), ConstInt32(end)); - *ret_tcode = CreateBufferPtr(DataType::Int(32), stack_tcode, ConstInt32(end)); + *ret_tcode = CreateBufferPtr(Int(32), stack_tcode, ConstInt32(end)); BasicBlock *end_block = CheckCallSuccess(builder_->CreateCall( RuntimeTVMFuncCall(), {handle, arg_value, arg_tcode, ConstInt32(nargs), ret_value, *ret_tcode})); - DataType r_api_type = ir::APIType(r_type); + Type r_api_type = ir::APIType(r_type); *rvalue = builder_->CreateAlignedLoad( builder_->CreatePointerCast(ret_value, LLVMType(r_api_type)->getPointerTo()), @@ -705,7 +705,7 @@ llvm::Value *CodeGenCPU::CreateCallPacked(const Call *op) { CHECK_EQ(op->args.size(), 5U); llvm::Value *rvalue = nullptr; llvm::Value *ret_tcode = nullptr; - MakeCallPacked(op->args, &rvalue, &ret_tcode, op->dtype, + MakeCallPacked(op->args, &rvalue, &ret_tcode, op->type, op->args[3].as()->value, op->args[4].as()->value); return rvalue; @@ -717,7 +717,7 @@ llvm::Value *CodeGenCPU::CreateCallTracePacked(const Call *op) { llvm::Value *rvalue = nullptr; llvm::Value *ret_tcode = nullptr; BasicBlock *end_block = MakeCallPacked( - op->args, &rvalue, &ret_tcode, op->dtype, op->args[3].as()->value, + op->args, &rvalue, &ret_tcode, op->type, op->args[3].as()->value, op->args[4].as()->value); // Get traced value. llvm::Value *traced_value = MakeValue(op->args[5]); @@ -800,7 +800,7 @@ llvm::Value* CodeGenCPU::CreateIntrinsic(const Call* op) { CHECK_EQ(op->args.size(), 3U); int kind = op->args[2].as()->value; llvm::Value* ref = this->CreateStructRefPtr( - op->dtype, MakeValue(op->args[0]), + op->type, MakeValue(op->args[0]), MakeValue(op->args[1]), kind); if (kind == intrinsic::kArrAddr) { return builder_->CreatePointerCast(ref, t_void_p_); @@ -812,7 +812,7 @@ llvm::Value* CodeGenCPU::CreateIntrinsic(const Call* op) { int kind = op->args[2].as()->value; llvm::Value* value = MakeValue(op->args[3]); llvm::Value* ref = this->CreateStructRefPtr( - op->args[3].dtype(), MakeValue(op->args[0]), + op->args[3].type(), MakeValue(op->args[0]), MakeValue(op->args[1]), kind); CHECK(kind != intrinsic::kArrAddr); if (value->getType()->isPointerTy()) { @@ -922,7 +922,7 @@ void CodeGenCPU::VisitStmt_(const For* op) { CHECK(parallel_env_.task_id.defined()); CHECK(parallel_env_.num_task.defined()); CHECK(parallel_env_.penv != nullptr); - DataType t = op->extent.dtype(); + Type t = op->extent.type(); Expr num_task = cast(t, parallel_env_.num_task); Expr task_id = cast(t, parallel_env_.task_id); CHECK(!parallel_env_.in_parallel_loop) diff --git a/src/codegen/llvm/codegen_cpu.h b/src/codegen/llvm/codegen_cpu.h index b9e127557e1a..52e6f6c6ef90 100644 --- a/src/codegen/llvm/codegen_cpu.h +++ b/src/codegen/llvm/codegen_cpu.h @@ -96,14 +96,14 @@ class CodeGenCPU : public CodeGenLLVM { llvm::Value* CreateStaticHandle(); llvm::Value* GetPackedFuncHandle(const std::string& str); llvm::Value* PackClosureData(const Array& fields, uint64_t *num_bytes); - llvm::Value* CreateStructRefPtr(DataType t, llvm::Value* buffer, llvm::Value* index, int kind); + llvm::Value* CreateStructRefPtr(Type t, llvm::Value* buffer, llvm::Value* index, int kind); void UnpackClosureData(llvm::Value*cdata, const Array& fields, std::unordered_map* vmap); // Make packed call. llvm::BasicBlock *MakeCallPacked(const Array &args, llvm::Value **rvalue, - llvm::Value **ret_tcode, const DataType &r_type, + llvm::Value **ret_tcode, const Type &r_type, const int64_t begin, const int64_t end); // create call into tvm packed function. llvm::Value* CreateCallPacked(const Call* op); diff --git a/src/codegen/llvm/codegen_llvm.cc b/src/codegen/llvm/codegen_llvm.cc index 94ad8b76c9c9..2cff88b0bbf4 100644 --- a/src/codegen/llvm/codegen_llvm.cc +++ b/src/codegen/llvm/codegen_llvm.cc @@ -115,11 +115,11 @@ void CodeGenLLVM::AddFunctionInternal(const LoweredFunc& f, bool ret_void) { std::vector arg_types; is_restricted_ = f->is_restricted; for (Var arg : f->args) { - DataType t = arg.dtype(); + Type t = arg.type(); if (t.is_handle()) { auto it = f->handle_data_type.find(arg); if (it != f->handle_data_type.end()) { - arg_types.push_back(LLVMType((*it).second.dtype()) + arg_types.push_back(LLVMType((*it).second.type()) ->getPointerTo(GetGlobalAddressSpace())); } else { arg_types.push_back(t_int8_->getPointerTo(GetGlobalAddressSpace())); @@ -128,7 +128,7 @@ void CodeGenLLVM::AddFunctionInternal(const LoweredFunc& f, bool ret_void) { alias_var_set_.insert(arg.get()); } } else { - arg_types.push_back(LLVMType(arg.dtype())); + arg_types.push_back(LLVMType(arg.type())); } } llvm::FunctionType* ftype = llvm::FunctionType::get( @@ -147,7 +147,7 @@ void CodeGenLLVM::AddFunctionInternal(const LoweredFunc& f, bool ret_void) { const Var& var = f->args[i]; var_map_[var.get()] = v; if (is_restricted_) { - if (var.dtype().is_handle() && !alias_var_set_.count(var.get())) { + if (var.type().is_handle() && !alias_var_set_.count(var.get())) { // set non alias. #if TVM_LLVM_VERSION >= 50 function_->addParamAttr(i, llvm::Attribute::NoAlias); @@ -302,7 +302,7 @@ unsigned CodeGenLLVM::GetGlobalAddressSpace() { return 0; } -llvm::Type* CodeGenLLVM::LLVMType(const DataType& t) const { +llvm::Type* CodeGenLLVM::LLVMType(const Type& t) const { if (t.is_handle()) { CHECK_EQ(t.lanes(), 1); return t_void_p_; @@ -335,7 +335,7 @@ llvm::Type* CodeGenLLVM::LLVMType(const DataType& t) const { void CodeGenLLVM::AddAliasInfo(llvm::Instruction* inst, const Variable* buffer, Expr index, - DataType type) { + Type type) { if (alias_var_set_.count(buffer) != 0) { // Mark all possibly aliased pointer as same type. llvm::MDNode* meta = md_tbaa_alias_set_; @@ -387,7 +387,7 @@ void CodeGenLLVM::AddAliasInfo(llvm::Instruction* inst, md_builder_->createTBAAStructTagNode(meta, meta, 0)); } -void CodeGenLLVM::GetAlignment(DataType t, +void CodeGenLLVM::GetAlignment(Type t, const Variable* buf_var, const Expr& index, int* p_alignment, @@ -474,7 +474,7 @@ llvm::Value* CodeGenLLVM::CreateVecFlip(llvm::Value* vec) { } llvm::Value* CodeGenLLVM::CreateVecPad(llvm::Value* vec, int target_lanes) { - llvm::Value* mask = llvm::UndefValue::get(LLVMType(DataType::Int(32, target_lanes))); + llvm::Value* mask = llvm::UndefValue::get(LLVMType(Int(32, target_lanes))); int num_elems = static_cast(vec->getType()->getVectorNumElements()); if (num_elems == target_lanes) return vec; CHECK_LT(num_elems, target_lanes); @@ -542,19 +542,19 @@ void CodeGenLLVM::CreateSerialFor(llvm::Value* begin, loop_value->addIncoming(begin, pre_block); CHECK(!var_map_.count(loop_var.get())); var_map_[loop_var.get()] = loop_value; - builder_->CreateCondBr(CreateLT(loop_var.dtype(), loop_value, end), + builder_->CreateCondBr(CreateLT(loop_var.type(), loop_value, end), for_body, for_end, md_very_likely_branch_); builder_->SetInsertPoint(for_body); this->VisitStmt(body); var_map_.erase(loop_var.get()); - llvm::Value* loop_next = CreateAdd(loop_var.dtype(), loop_value, stride); + llvm::Value* loop_next = CreateAdd(loop_var.type(), loop_value, stride); loop_value->addIncoming(loop_next, builder_->GetInsertBlock()); builder_->CreateBr(for_begin); builder_->SetInsertPoint(for_end); } // cast operatpr -llvm::Value* CodeGenLLVM::CreateCast(DataType from, DataType to, llvm::Value* value) { +llvm::Value* CodeGenLLVM::CreateCast(Type from, Type to, llvm::Value* value) { llvm::Type * target = LLVMType(to); if (value->getType() == target) return value; if (to.is_handle()) { @@ -609,7 +609,7 @@ llvm::Value* CodeGenLLVM::GetConstString(const std::string& str) { } llvm::Value* CodeGenLLVM::CreateBufferPtr( - DataType t, llvm::Value* buffer, llvm::Value* index) { + Type t, llvm::Value* buffer, llvm::Value* index) { CHECK_EQ(t.lanes(), 1); llvm::PointerType* btype = llvm::dyn_cast(buffer->getType()); CHECK(btype != nullptr); @@ -622,7 +622,7 @@ llvm::Value* CodeGenLLVM::CreateBufferPtr( } llvm::Value* CodeGenLLVM::CreateBufferVecPtr( - DataType t, llvm::Value* buffer, llvm::Value* index) { + Type t, llvm::Value* buffer, llvm::Value* index) { CHECK_GT(t.lanes(), 1); llvm::PointerType* btype = llvm::dyn_cast(buffer->getType()); CHECK(btype != nullptr); @@ -647,7 +647,7 @@ llvm::Value* CodeGenLLVM::CreateCallExtern(const Call* op) { arg_type.push_back(arg_value.back()->getType()); } llvm::FunctionType* ftype = llvm::FunctionType::get( - LLVMType(op->dtype), arg_type, false); + LLVMType(op->type), arg_type, false); llvm::Function* f = module_->getFunction(op->name); if (f == nullptr) { f = llvm::Function::Create( @@ -674,7 +674,7 @@ llvm::Value* CodeGenLLVM::CreateIntrinsic(const Call* op) { sig_type.push_back(arg_value.back()->getType()); } } - llvm::Type *return_type = LLVMType(op->dtype); + llvm::Type *return_type = LLVMType(op->type); if (sig_type.size() > 0 && return_type != sig_type[0]) { sig_type.insert(sig_type.begin(), return_type); } @@ -692,7 +692,7 @@ llvm::Value* CodeGenLLVM::CreateIntrinsic(const Call* op) { } else if (op->is_intrinsic(Call::shift_left)) { return builder_->CreateShl(MakeValue(op->args[0]), MakeValue(op->args[1])); } else if (op->is_intrinsic(Call::shift_right)) { - if (op->args[0].dtype().is_int()) { + if (op->args[0].type().is_int()) { return builder_->CreateAShr(MakeValue(op->args[0]), MakeValue(op->args[1])); } else { return builder_->CreateLShr(MakeValue(op->args[0]), MakeValue(op->args[1])); @@ -707,13 +707,13 @@ llvm::Value* CodeGenLLVM::CreateIntrinsic(const Call* op) { unsigned addrspace; if (!r) { ptr = CreateBufferPtr( - l->dtype, MakeValue(l->buffer_var), MakeValue(l->index)); + l->type, MakeValue(l->buffer_var), MakeValue(l->index)); addrspace = llvm::dyn_cast( ptr->getType())->getAddressSpace(); } else { - Expr index = r->base / make_const(DataType::Int(32), r->lanes); + Expr index = r->base / make_const(Int(32), r->lanes); ptr = CreateBufferVecPtr( - l->dtype, MakeValue(l->buffer_var), MakeValue(index)); + l->type, MakeValue(l->buffer_var), MakeValue(index)); addrspace = llvm::dyn_cast( ptr->getType())->getAddressSpace(); } @@ -723,7 +723,7 @@ llvm::Value* CodeGenLLVM::CreateIntrinsic(const Call* op) { } else if (op->is_intrinsic(intrinsic::tvm_handle_is_null)) { return builder_->CreateIsNull(MakeValue(op->args[0])); } else if (op->is_intrinsic(intrinsic::tvm_if_then_else)) { - CHECK_EQ(op->args[0].dtype().lanes(), 1) + CHECK_EQ(op->args[0].type().lanes(), 1) << "if_then_else can only take scalar condition"; using llvm::BasicBlock; BasicBlock* then_block = BasicBlock::Create( @@ -747,7 +747,7 @@ llvm::Value* CodeGenLLVM::CreateIntrinsic(const Call* op) { value->addIncoming(else_value, else_value_block); return value; } else if (op->is_intrinsic(Call::reinterpret)) { - llvm::Type * target = LLVMType(op->dtype); + llvm::Type * target = LLVMType(op->type); return builder_->CreateBitCast(MakeValue(op->args[0]), target); } else if (op->is_intrinsic(Call::isnan)) { // TODO(hgt312): set fast math flag @@ -779,13 +779,13 @@ llvm::Value* CodeGenLLVM::CreateIntrinsic(const Call* op) { void CodeGenLLVM::Scalarize(const Expr& e, std::function f) { if (const Ramp* ramp = e.as()) { - for (int i = 0; i < ramp->dtype.lanes(); ++i) { + for (int i = 0; i < ramp->type.lanes(); ++i) { Expr offset = ramp->base + (ramp->stride * i); f(i, MakeValue(offset)); } } else { llvm::Value* value = MakeValue(e); - for (int i = 0; i < e.dtype().lanes(); ++i) { + for (int i = 0; i < e.type().lanes(); ++i) { f(i, builder_->CreateExtractElement(value, i)); } } @@ -798,18 +798,18 @@ llvm::Value* CodeGenLLVM::VisitExpr_(const Variable* op) { } llvm::Value* CodeGenLLVM::VisitExpr_(const Cast* op) { - return CreateCast(op->value.dtype(), op->dtype, MakeValue(op->value)); + return CreateCast(op->value.type(), op->type, MakeValue(op->value)); } llvm::Value* CodeGenLLVM::VisitExpr_(const IntImm* op) { - return llvm::ConstantInt::getSigned(LLVMType(op->dtype), op->value); + return llvm::ConstantInt::getSigned(LLVMType(op->type), op->value); } llvm::Value* CodeGenLLVM::VisitExpr_(const UIntImm* op) { - return llvm::ConstantInt::get(LLVMType(op->dtype), op->value); + return llvm::ConstantInt::get(LLVMType(op->type), op->value); } llvm::Value* CodeGenLLVM::VisitExpr_(const FloatImm* op) { - return llvm::ConstantFP::get(LLVMType(op->dtype), op->value); + return llvm::ConstantFP::get(LLVMType(op->type), op->value); } llvm::Value* CodeGenLLVM::VisitExpr_(const StringImm* op) { @@ -818,7 +818,7 @@ llvm::Value* CodeGenLLVM::VisitExpr_(const StringImm* op) { #define DEFINE_CODEGEN_BINARY_OP(Op) \ llvm::Value* CodeGenLLVM::Create ## Op( \ - DataType t, llvm::Value* a, llvm::Value *b) { \ + Type t, llvm::Value* a, llvm::Value *b) { \ if (t.is_int()) { \ if (t.bits() >= 32) { \ return builder_->CreateNSW ## Op (a, b); \ @@ -837,7 +837,7 @@ llvm::Value* CodeGenLLVM::VisitExpr_(const StringImm* op) { } \ } \ llvm::Value* CodeGenLLVM::VisitExpr_(const Op* op) { \ - return Create ## Op(op->dtype, MakeValue(op->a), MakeValue(op->b)); \ + return Create ## Op(op->type, MakeValue(op->a), MakeValue(op->b)); \ } DEFINE_CODEGEN_BINARY_OP(Add); @@ -846,7 +846,7 @@ DEFINE_CODEGEN_BINARY_OP(Mul); #define DEFINE_CODEGEN_CMP_OP(Op) \ llvm::Value* CodeGenLLVM::Create ## Op( \ - DataType t, llvm::Value* a, llvm::Value* b) { \ + Type t, llvm::Value* a, llvm::Value* b) { \ if (t.is_int()) { \ return builder_->CreateICmpS ## Op (a, b); \ } else if (t.is_uint()) { \ @@ -857,7 +857,7 @@ DEFINE_CODEGEN_BINARY_OP(Mul); } \ } \ llvm::Value* CodeGenLLVM::VisitExpr_(const Op* op) { \ - return Create ## Op(op->a.dtype(), MakeValue(op->a), MakeValue(op->b)); \ + return Create ## Op(op->a.type(), MakeValue(op->a), MakeValue(op->b)); \ } DEFINE_CODEGEN_CMP_OP(LT); @@ -868,12 +868,12 @@ DEFINE_CODEGEN_CMP_OP(GE); llvm::Value* CodeGenLLVM::VisitExpr_(const Div* op) { llvm::Value* a = MakeValue(op->a); llvm::Value* b = MakeValue(op->b); - if (op->dtype.is_int()) { + if (op->type.is_int()) { return builder_->CreateSDiv(a, b); - } else if (op->dtype.is_uint()) { + } else if (op->type.is_uint()) { return builder_->CreateUDiv(a, b); } else { - CHECK(op->dtype.is_float()); + CHECK(op->type.is_float()); return builder_->CreateFDiv(a, b); } } @@ -881,12 +881,12 @@ llvm::Value* CodeGenLLVM::VisitExpr_(const Div* op) { llvm::Value* CodeGenLLVM::VisitExpr_(const Mod* op) { llvm::Value* a = MakeValue(op->a); llvm::Value* b = MakeValue(op->b); - if (op->dtype.is_int()) { + if (op->type.is_int()) { return builder_->CreateSRem(a, b); - } else if (op->dtype.is_uint()) { + } else if (op->type.is_uint()) { return builder_->CreateURem(a, b); } else { - CHECK(op->dtype.is_float()); + CHECK(op->type.is_float()); return builder_->CreateFRem(a, b); } } @@ -894,19 +894,19 @@ llvm::Value* CodeGenLLVM::VisitExpr_(const Mod* op) { llvm::Value* CodeGenLLVM::VisitExpr_(const Min* op) { llvm::Value* a = MakeValue(op->a); llvm::Value* b = MakeValue(op->b); - return builder_->CreateSelect(CreateLT(op->a.dtype(), a, b), a, b); + return builder_->CreateSelect(CreateLT(op->a.type(), a, b), a, b); } llvm::Value* CodeGenLLVM::VisitExpr_(const Max* op) { llvm::Value* a = MakeValue(op->a); llvm::Value* b = MakeValue(op->b); - return builder_->CreateSelect(CreateGT(op->a.dtype(), a, b), a, b); + return builder_->CreateSelect(CreateGT(op->a.type(), a, b), a, b); } llvm::Value* CodeGenLLVM::VisitExpr_(const EQ* op) { llvm::Value* a = MakeValue(op->a); llvm::Value* b = MakeValue(op->b); - if (op->a.dtype().is_int() || op->a.dtype().is_uint()) { + if (op->a.type().is_int() || op->a.type().is_uint()) { return builder_->CreateICmpEQ(a, b); } else { return builder_->CreateFCmpOEQ(a, b); @@ -916,7 +916,7 @@ llvm::Value* CodeGenLLVM::VisitExpr_(const EQ* op) { llvm::Value* CodeGenLLVM::VisitExpr_(const NE* op) { llvm::Value* a = MakeValue(op->a); llvm::Value* b = MakeValue(op->b); - if (op->a.dtype().is_int() || op->a.dtype().is_uint()) { + if (op->a.type().is_int() || op->a.type().is_uint()) { return builder_->CreateICmpNE(a, b); } else { return builder_->CreateFCmpONE(a, b); @@ -950,7 +950,7 @@ llvm::Value* CodeGenLLVM::VisitExpr_(const Let* op) { } llvm::Value* CodeGenLLVM::VisitExpr_(const Load* op) { - DataType t = op->dtype; + Type t = op->type; bool is_volatile = volatile_buf_.count(op->buffer_var.get()); llvm::Value* buffer = MakeValue(op->buffer_var); llvm::Value* index = MakeValue(op->index); @@ -1010,10 +1010,10 @@ llvm::Value* CodeGenLLVM::VisitExpr_(const Call* op) { } llvm::Value* CodeGenLLVM::VisitExpr_(const Ramp* op) { - llvm::Value* vec = llvm::UndefValue::get(LLVMType(op->dtype)); + llvm::Value* vec = llvm::UndefValue::get(LLVMType(op->type)); for (int i = 0; i < op->lanes; ++i) { vec = builder_->CreateInsertElement( - vec, MakeValue(op->base + op->stride * make_const(op->stride.dtype(), i)), + vec, MakeValue(op->base + op->stride * make_const(op->stride.type(), i)), ConstInt32(i)); } return vec; @@ -1024,7 +1024,7 @@ llvm::Value* CodeGenLLVM::VisitExpr_(const Shuffle* op) { int total_lanes = 0; for (int i = 0, e = op->vectors.size(); i < e; ++i) { vecs[i] = VisitExpr(op->vectors[i]); - total_lanes += op->vectors[i].dtype().lanes(); + total_lanes += op->vectors[i].type().lanes(); } llvm::Value* v0 = CreateVecConcat(vecs); std::vector idx(op->indices.size()); @@ -1045,7 +1045,7 @@ llvm::Value* CodeGenLLVM::VisitExpr_(const Broadcast* op) { void CodeGenLLVM::VisitStmt_(const Store* op) { CHECK(is_one(op->predicate)); - DataType t = op->value.dtype(); + Type t = op->value.type(); bool is_volatile = volatile_buf_.count(op->buffer_var.get()); llvm::Value* buffer = MakeValue(op->buffer_var); llvm::Value* index = MakeValue(op->index); @@ -1056,7 +1056,7 @@ void CodeGenLLVM::VisitStmt_(const Store* op) { GetAlignment(t, op->buffer_var.get(), op->index, &alignment, &native_bits); llvm::Value* ptr = CreateBufferPtr(t, buffer, index); llvm::StoreInst* store = builder_->CreateAlignedStore(value, ptr, alignment, is_volatile); - AddAliasInfo(store, op->buffer_var.get(), op->index, op->value.dtype()); + AddAliasInfo(store, op->buffer_var.get(), op->index, op->value.type()); return; } else { // vector store @@ -1071,7 +1071,7 @@ void CodeGenLLVM::VisitStmt_(const Store* op) { t.element_of(), buffer, MakeValue(ramp->base)); ptr = builder_->CreatePointerCast(ptr, LLVMType(t)->getPointerTo(addrspace)); llvm::StoreInst* store = builder_->CreateAlignedStore(value, ptr, alignment, is_volatile); - AddAliasInfo(store, op->buffer_var.get(), op->index, op->value.dtype()); + AddAliasInfo(store, op->buffer_var.get(), op->index, op->value.type()); return; } } @@ -1084,7 +1084,7 @@ void CodeGenLLVM::VisitStmt_(const Store* op) { llvm::StoreInst* store = builder_->CreateAlignedStore( builder_->CreateExtractElement(value, i), ptr, basic_align, is_volatile); - AddAliasInfo(store, op->buffer_var.get(), Expr(), op->value.dtype()); + AddAliasInfo(store, op->buffer_var.get(), Expr(), op->value.type()); }; this->Scalarize(op->index, f); } @@ -1142,7 +1142,7 @@ void CodeGenLLVM::VisitStmt_(const Allocate* op) { << "Can only handle constant size stack allocation"; StorageInfo& info = alloc_storage_info_[op->buffer_var.get()]; if (constant_size % 4 == 0 && info.alignment == 0) { - info.alignment = GetTempAllocaAlignment(op->dtype, constant_size); + info.alignment = GetTempAllocaAlignment(op->type, constant_size); } // maximum necessary alignment in the NV devices if (info.alignment > 16) { @@ -1150,7 +1150,7 @@ void CodeGenLLVM::VisitStmt_(const Allocate* op) { } llvm::AllocaInst* alloca = WithFunctionEntry([&]() { return builder_->CreateAlloca( - LLVMType(op->dtype), ConstInt32(constant_size)); + LLVMType(op->type), ConstInt32(constant_size)); }); if (alloca->getAlignment() < static_cast(info.alignment)) { #if TVM_LLVM_VERSION >= 100 @@ -1163,7 +1163,7 @@ void CodeGenLLVM::VisitStmt_(const Allocate* op) { buf = alloca; } buf = builder_->CreatePointerCast( - buf, LLVMType(op->dtype)->getPointerTo( + buf, LLVMType(op->type)->getPointerTo( buf->getType()->getPointerAddressSpace())); CHECK(!var_map_.count(op->buffer_var.get())); var_map_[op->buffer_var.get()] = buf; @@ -1204,7 +1204,7 @@ void CodeGenLLVM::VisitStmt_(const AssertStmt* op) { void CodeGenLLVM::VisitStmt_(const LetStmt* op) { CHECK(!var_map_.count(op->var.get())); - if (op->var.dtype().is_handle()) { + if (op->var.type().is_handle()) { if (!is_restricted_) { alias_var_set_.insert(op->var.get()); } diff --git a/src/codegen/llvm/codegen_llvm.h b/src/codegen/llvm/codegen_llvm.h index 08c836adf9d0..b7d091b3921b 100644 --- a/src/codegen/llvm/codegen_llvm.h +++ b/src/codegen/llvm/codegen_llvm.h @@ -206,12 +206,12 @@ class CodeGenLLVM : * \param t The original type. * \return LLVM type of t */ - llvm::Type* LLVMType(const DataType& t) const; + llvm::Type* LLVMType(const Type& t) const; // initialize the function state. void InitFuncState(); // Get alignment given index. void GetAlignment( - DataType t, const Variable* buf_var, const Expr& index, + Type t, const Variable* buf_var, const Expr& index, int* p_alignment, int* p_native_bits); // Get constant string llvm::Value* GetConstString(const std::string& str); @@ -221,19 +221,19 @@ class CodeGenLLVM : // handle module import void HandleImport(const std::string& code); // cast operatpr - llvm::Value* CreateCast(DataType from, DataType to, llvm::Value* value); + llvm::Value* CreateCast(Type from, Type to, llvm::Value* value); // comparison op llvm::Value* GetVarValue(const Variable* v) const; - llvm::Value* CreateLT(DataType t, llvm::Value* a, llvm::Value* b); - llvm::Value* CreateLE(DataType t, llvm::Value* a, llvm::Value* b); - llvm::Value* CreateGT(DataType t, llvm::Value* a, llvm::Value* b); - llvm::Value* CreateGE(DataType t, llvm::Value* a, llvm::Value* b); - llvm::Value* CreateAdd(DataType t, llvm::Value* a, llvm::Value* b); - llvm::Value* CreateSub(DataType t, llvm::Value* a, llvm::Value* b); - llvm::Value* CreateMul(DataType t, llvm::Value* a, llvm::Value* b); + llvm::Value* CreateLT(Type t, llvm::Value* a, llvm::Value* b); + llvm::Value* CreateLE(Type t, llvm::Value* a, llvm::Value* b); + llvm::Value* CreateGT(Type t, llvm::Value* a, llvm::Value* b); + llvm::Value* CreateGE(Type t, llvm::Value* a, llvm::Value* b); + llvm::Value* CreateAdd(Type t, llvm::Value* a, llvm::Value* b); + llvm::Value* CreateSub(Type t, llvm::Value* a, llvm::Value* b); + llvm::Value* CreateMul(Type t, llvm::Value* a, llvm::Value* b); llvm::Value* CreateBroadcast(llvm::Value* value, int lanes); - llvm::Value* CreateBufferPtr(DataType t, llvm::Value* buffer, llvm::Value* index); - llvm::Value* CreateBufferVecPtr(DataType t, llvm::Value* buffer, llvm::Value* index); + llvm::Value* CreateBufferPtr(Type t, llvm::Value* buffer, llvm::Value* index); + llvm::Value* CreateBufferVecPtr(Type t, llvm::Value* buffer, llvm::Value* index); // Vector concatenation. llvm::Value* CreateVecSlice(llvm::Value* vec, int begin, int extent); llvm::Value* CreateVecFlip(llvm::Value* vec); @@ -245,7 +245,7 @@ class CodeGenLLVM : llvm::Value* stride, const VarExpr& loop_var, const Stmt& body); // add alias information. - void AddAliasInfo(llvm::Instruction* load, const Variable* buffer, Expr index, DataType type); + void AddAliasInfo(llvm::Instruction* load, const Variable* buffer, Expr index, Type type); // The IRBuilder. using IRBuilder = llvm::IRBuilder; // The current function diff --git a/src/codegen/llvm/codegen_nvptx.cc b/src/codegen/llvm/codegen_nvptx.cc index 372408c5e666..b6bc6ef952fd 100644 --- a/src/codegen/llvm/codegen_nvptx.cc +++ b/src/codegen/llvm/codegen_nvptx.cc @@ -58,7 +58,7 @@ class CodeGenNVPTX : public CodeGenLLVM { << "Can only handle constant size stack allocation in GPU"; StorageInfo& info = alloc_storage_info_[op->buffer_var.get()]; if (constant_size % 4 == 0 && info.alignment == 0) { - info.alignment = GetTempAllocaAlignment(op->dtype, constant_size); + info.alignment = GetTempAllocaAlignment(op->type, constant_size); } // maximum necessary alignment in the NV devices if (info.alignment > 16) { @@ -69,7 +69,7 @@ class CodeGenNVPTX : public CodeGenLLVM { // TODO(tqchen): for higher version of LLVM, local address space can be set. llvm::AllocaInst* alloca = WithFunctionEntry([&]() { return builder_->CreateAlloca( - LLVMType(op->dtype), ConstInt32(constant_size)); + LLVMType(op->type), ConstInt32(constant_size)); }); if (alloca->getAlignment() < static_cast(info.alignment)) { #if TVM_LLVM_VERSION >= 100 @@ -84,7 +84,7 @@ class CodeGenNVPTX : public CodeGenLLVM { << "Can only allocate shared or local memory inside kernel"; // Shared memory: address space == 3 const unsigned shared_address_space = 3; - llvm::Type* type = llvm::ArrayType::get(LLVMType(op->dtype), constant_size); + llvm::Type* type = llvm::ArrayType::get(LLVMType(op->type), constant_size); // Allocate shared memory in global, address_space = 3 llvm::GlobalVariable *global = new llvm::GlobalVariable( *module_, type, false, llvm::GlobalValue::PrivateLinkage, 0, ".shared", @@ -98,7 +98,7 @@ class CodeGenNVPTX : public CodeGenLLVM { } } buf = builder_->CreatePointerCast( - buf, LLVMType(op->dtype)->getPointerTo( + buf, LLVMType(op->type)->getPointerTo( buf->getType()->getPointerAddressSpace())); CHECK(!var_map_.count(op->buffer_var.get())); var_map_[op->buffer_var.get()] = buf; diff --git a/src/codegen/llvm/codegen_x86_64.cc b/src/codegen/llvm/codegen_x86_64.cc index 5d72b56df376..804d9b2f1b37 100644 --- a/src/codegen/llvm/codegen_x86_64.cc +++ b/src/codegen/llvm/codegen_x86_64.cc @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -74,8 +74,8 @@ llvm::Value* CodeGenX86_64::VisitExpr_(const Cast* op) { // LLVM does not automatically generate the correct instruction sequences for // half -> float conversion (i.e. using AVX2/AVX-512 vectorized variants of // vcvtph2ps), so we explicitly generate them ourselves. - const auto from = op->value.dtype(); - const auto to = op->dtype; + const auto from = op->value.type(); + const auto to = op->type; if (from.is_float() && to.is_float() && from.bits() == 16 && to.bits() == 32) { CHECK_EQ(from.lanes(), to.lanes()); CHECK_NOTNULL(target_machine_); @@ -85,25 +85,21 @@ llvm::Value* CodeGenX86_64::VisitExpr_(const Cast* op) { if (from.lanes() >= 16 && has_avx512) { return CallVectorIntrin( - ::llvm::Intrinsic::x86_avx512_mask_vcvtph2ps_512, 16, - LLVMType(DataType::Float(32, from.lanes())), + ::llvm::Intrinsic::x86_avx512_mask_vcvtph2ps_512, 16, LLVMType(Float(32, from.lanes())), { - MakeValue(ir::Call::make( - DataType::Int(16, from.lanes()), ir::Call::reinterpret, {op->value}, - ir::Call::PureIntrinsic)), - MakeValue( - ir::Broadcast::make(ir::FloatImm::make(DataType::Float(32), 0), from.lanes())), - /*mask=*/MakeValue(ir::IntImm::make(DataType::Int(16), -1)), - /*rounding-mode=*/MakeValue(ir::IntImm::make(DataType::Int(32), 4)), + MakeValue(ir::Call::make(Int(16, from.lanes()), ir::Call::reinterpret, {op->value}, + ir::Call::PureIntrinsic)), + MakeValue(ir::Broadcast::make(ir::FloatImm::make(Float(32), 0), from.lanes())), + /*mask=*/MakeValue(ir::IntImm::make(Int(16), -1)), + /*rounding-mode=*/MakeValue(ir::IntImm::make(Int(32), 4)), }); } if (from.lanes() >= 8 && has_f16c) { return CallVectorIntrin( - ::llvm::Intrinsic::x86_vcvtph2ps_256, 8, LLVMType(DataType::Float(32, from.lanes())), - {MakeValue(ir::Call::make( - DataType::Int(16, from.lanes()), ir::Call::reinterpret, {op->value}, - ir::Call::PureIntrinsic))}); + ::llvm::Intrinsic::x86_vcvtph2ps_256, 8, LLVMType(Float(32, from.lanes())), + {MakeValue(ir::Call::make(Int(16, from.lanes()), ir::Call::reinterpret, {op->value}, + ir::Call::PureIntrinsic))}); } } diff --git a/src/codegen/llvm/intrin_rule_llvm.cc b/src/codegen/llvm/intrin_rule_llvm.cc index da07ff324b20..fd28d7e4594a 100644 --- a/src/codegen/llvm/intrin_rule_llvm.cc +++ b/src/codegen/llvm/intrin_rule_llvm.cc @@ -67,19 +67,19 @@ TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.tanh") const ir::Call* call = e.as(); CHECK(call != nullptr); const Expr& x = call->args[0]; - Expr one = make_const(x.dtype(), 1); - Expr two = make_const(x.dtype(), 2); - Expr neg_two = make_const(x.dtype(), -2); + Expr one = make_const(x.type(), 1); + Expr two = make_const(x.type(), 2); + Expr neg_two = make_const(x.type(), -2); Expr exp_neg2x = ir::Call::make( - x.dtype(), "exp", {neg_two * x}, ir::Call::PureIntrinsic); + x.type(), "exp", {neg_two * x}, ir::Call::PureIntrinsic); Expr exp_pos2x = ir::Call::make( - x.dtype(), "exp", {two * x}, ir::Call::PureIntrinsic); + x.type(), "exp", {two * x}, ir::Call::PureIntrinsic); Expr tanh_pos = (one - exp_neg2x) / (one + exp_neg2x); Expr tanh_neg = (exp_pos2x - one) / (exp_pos2x + one); *rv = ir::Select::make( - x >= make_zero(x.dtype()), tanh_pos, tanh_neg); + x >= make_zero(x.type()), tanh_pos, tanh_neg); }); TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.pow") diff --git a/src/codegen/llvm/intrin_rule_llvm.h b/src/codegen/llvm/intrin_rule_llvm.h index 7863a3dd7a96..c0b5241e8876 100644 --- a/src/codegen/llvm/intrin_rule_llvm.h +++ b/src/codegen/llvm/intrin_rule_llvm.h @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -41,14 +41,14 @@ inline void DispatchLLVMPureIntrin(const TVMArgs& targs, TVMRetValue* rv) { CHECK(call != nullptr); Array cargs; // intrin id. - cargs.push_back(ir::UIntImm::make(DataType::UInt(32), id)); - cargs.push_back(ir::UIntImm::make(DataType::UInt(32), num_signature)); + cargs.push_back(ir::UIntImm::make(UInt(32), id)); + cargs.push_back(ir::UIntImm::make(UInt(32), num_signature)); for (Expr arg : call->args) { cargs.push_back(arg); } *rv = ir::Call::make( - call->dtype, "llvm_intrin", cargs, ir::Call::PureIntrinsic); + call->type, "llvm_intrin", cargs, ir::Call::PureIntrinsic); } template @@ -58,13 +58,13 @@ inline void DispatchLLVMIntrin(const TVMArgs& targs, TVMRetValue* rv) { CHECK(call != nullptr); Array cargs; // intrin id. - cargs.push_back(ir::UIntImm::make(DataType::UInt(32), id)); - cargs.push_back(ir::UIntImm::make(DataType::UInt(32), num_signature)); + cargs.push_back(ir::UIntImm::make(UInt(32), id)); + cargs.push_back(ir::UIntImm::make(UInt(32), num_signature)); for (Expr arg : call->args) { cargs.push_back(arg); } *rv = ir::Call::make( - call->dtype, "llvm_intrin", cargs, ir::Call::Intrinsic); + call->type, "llvm_intrin", cargs, ir::Call::Intrinsic); } } // namespace codegen diff --git a/src/codegen/llvm/intrin_rule_nvptx.cc b/src/codegen/llvm/intrin_rule_nvptx.cc index 862d06b73a5f..4718cf78062e 100644 --- a/src/codegen/llvm/intrin_rule_nvptx.cc +++ b/src/codegen/llvm/intrin_rule_nvptx.cc @@ -35,11 +35,11 @@ inline void DispatchExternLibDevice(const TVMArgs& args, TVMRetValue* rv) { using namespace ir; const Call* call = e.as(); CHECK(call != nullptr); - CHECK(call->dtype.bits() == 32 || call->dtype.bits() == 64) << "Only support float32 or float64."; + CHECK(call->type.bits() == 32 || call->type.bits() == 64) << "Only support float32 or float64."; std::ostringstream intrinsic_name; intrinsic_name << "__nv_" << call->name; - if (call->dtype.bits() == 32) intrinsic_name << "f"; - *rv = Call::make(call->dtype, intrinsic_name.str(), call->args, + if (call->type.bits() == 32) intrinsic_name << "f"; + *rv = Call::make(call->type, intrinsic_name.str(), call->args, Call::PureExtern); } diff --git a/src/codegen/llvm/intrin_rule_rocm.cc b/src/codegen/llvm/intrin_rule_rocm.cc index 22b324545825..5ad5261c81bf 100644 --- a/src/codegen/llvm/intrin_rule_rocm.cc +++ b/src/codegen/llvm/intrin_rule_rocm.cc @@ -36,8 +36,8 @@ inline void DispatchExternOCML(const TVMArgs& args, TVMRetValue* rv) { const Call* call = e.as(); CHECK(call != nullptr); std::ostringstream intrinsic_name; - intrinsic_name << "__ocml_" << call->name << "_f" << call->dtype.bits(); - *rv = Call::make(call->dtype, intrinsic_name.str(), call->args, + intrinsic_name << "__ocml_" << call->name << "_f" << call->type.bits(); + *rv = Call::make(call->type, intrinsic_name.str(), call->args, Call::PureExtern); } diff --git a/src/codegen/llvm/llvm_common.h b/src/codegen/llvm/llvm_common.h index 5ec8bb3f2a9c..c16229f5f10a 100644 --- a/src/codegen/llvm/llvm_common.h +++ b/src/codegen/llvm/llvm_common.h @@ -33,12 +33,6 @@ #include #include -#if TVM_LLVM_VERSION >= 100 -#include -#include -#include -#include -#endif #include #include #include @@ -50,6 +44,7 @@ #include #include #include +#include #include #include diff --git a/src/codegen/spirv/codegen_spirv.cc b/src/codegen/spirv/codegen_spirv.cc index 7800e47319e0..be2b6cc668eb 100644 --- a/src/codegen/spirv/codegen_spirv.cc +++ b/src/codegen/spirv/codegen_spirv.cc @@ -37,11 +37,11 @@ std::vector CodeGenSPIRV::BuildFunction(const LoweredFunc& f) { std::vector pod_args; uint32_t num_buffer = 0; for (Var arg : f->args) { - DataType t = arg.dtype(); + Type t = arg.type(); if (t.is_handle()) { auto it = f->handle_data_type.find(arg); if (it != f->handle_data_type.end()) { - DataType value_type = (*it).second.dtype(); + Type value_type = (*it).second.type(); spirv::Value arg_value = builder_->BufferArgument( builder_->GetSType(value_type), 0, num_buffer); storage_info_[arg.get()].UpdateContentType(value_type); @@ -61,7 +61,7 @@ std::vector CodeGenSPIRV::BuildFunction(const LoweredFunc& f) { if (pod_args.size() != 0) { std::vector value_types; for (size_t i = 0; i < pod_args.size(); ++i) { - value_types.push_back(builder_->GetSType(pod_args[i].dtype())); + value_types.push_back(builder_->GetSType(pod_args[i].type())); } spirv::Value ptr = builder_->DeclarePushConstant(value_types); for (size_t i = 0; i < pod_args.size(); ++i) { @@ -103,7 +103,7 @@ spirv::Value CodeGenSPIRV::GetThreadIndex( } else { v = builder_->GetWorkgroupID(ts.dim_index); } - return builder_->Cast(builder_->GetSType(iv->var.dtype()), v); + return builder_->Cast(builder_->GetSType(iv->var.type()), v); } spirv::Value CodeGenSPIRV::CreateStorageSync(const Call* op) { @@ -112,7 +112,7 @@ spirv::Value CodeGenSPIRV::CreateStorageSync(const Call* op) { if (sync == "warp") { return value; } else if (sync == "shared") { - auto type_int = builder_->GetSType(DataType::Int(32)); + auto type_int = builder_->GetSType(Int(32)); builder_->MakeInst( spv::OpControlBarrier, builder_->IntImm(type_int, static_cast(spv::ScopeWorkgroup)), @@ -133,15 +133,15 @@ spirv::Value CodeGenSPIRV::VisitExpr_(const Variable* op) { } spirv::Value CodeGenSPIRV::VisitExpr_(const IntImm* op) { - return builder_->IntImm(builder_->GetSType(op->dtype), op->value); + return builder_->IntImm(builder_->GetSType(op->type), op->value); } spirv::Value CodeGenSPIRV::VisitExpr_(const UIntImm* op) { - return builder_->UIntImm(builder_->GetSType(op->dtype), op->value); + return builder_->UIntImm(builder_->GetSType(op->type), op->value); } spirv::Value CodeGenSPIRV::VisitExpr_(const FloatImm* op) { - return builder_->FloatImm(builder_->GetSType(op->dtype), op->value); + return builder_->FloatImm(builder_->GetSType(op->type), op->value); } spirv::Value CodeGenSPIRV::VisitExpr_(const StringImm* op) { @@ -150,7 +150,7 @@ spirv::Value CodeGenSPIRV::VisitExpr_(const StringImm* op) { } spirv::Value CodeGenSPIRV::VisitExpr_(const Cast* op) { - return builder_->Cast(builder_->GetSType(op->dtype), MakeValue(op->value)); + return builder_->Cast(builder_->GetSType(op->type), MakeValue(op->value)); } spirv::Value CodeGenSPIRV::VisitExpr_(const Add* op) { @@ -248,7 +248,7 @@ spirv::Value CodeGenSPIRV::VisitExpr_(const Call* op) { values.push_back(MakeValue(op->args[i])); } return builder_->CallGLSL450( - builder_->GetSType(op->dtype), inst_id, values); + builder_->GetSType(op->type), inst_id, values); } else if (op->is_intrinsic(Call::bitwise_and)) { CHECK_EQ(op->args.size(), 2U); spirv::Value a = MakeValue(op->args[0]); @@ -277,13 +277,13 @@ spirv::Value CodeGenSPIRV::VisitExpr_(const Call* op) { CHECK_EQ(op->args.size(), 2U); spirv::Value a = MakeValue(op->args[0]); spirv::Value b = MakeValue(op->args[1]); - if (op->args[0].dtype().is_int()) { + if (op->args[0].type().is_int()) { return builder_->MakeValue(spv::OpShiftRightArithmetic, a.stype, a, b); } else { return builder_->MakeValue(spv::OpShiftRightLogical, a.stype, a, b); } } else if (op->is_intrinsic(Call::reinterpret)) { - return builder_->MakeValue(spv::OpBitcast, builder_->GetSType(op->dtype), + return builder_->MakeValue(spv::OpBitcast, builder_->GetSType(op->type), MakeValue(op->args[0])); } else if (op->is_intrinsic(intrinsic::tvm_storage_sync)) { return this->CreateStorageSync(op); @@ -316,17 +316,17 @@ spirv::Value CodeGenSPIRV::VisitExpr_(const Call* op) { } else if (op->is_intrinsic("popcount")) { return builder_->MakeValue( spv::OpBitCount, - builder_->GetSType(op->dtype), + builder_->GetSType(op->type), MakeValue(op->args[0])); } else { if (op->call_type == Call::Intrinsic || op->call_type == Call::PureIntrinsic) { LOG(FATAL) << "Unresolved intrinsic " << op->name - << " with return type " << op->dtype; + << " with return type " << op->type; } else if (op->call_type == Call::Extern || op->call_type == Call::PureExtern) { LOG(FATAL) << "Unresolved extern " << op->name - << " with return type " << op->dtype; + << " with return type " << op->type; } else { LOG(FATAL) << "Unresolved call type " << op->call_type; } @@ -341,7 +341,7 @@ spirv::Value CodeGenSPIRV::VisitExpr_(const Ramp* op) { spirv::Value v = base; if (i != 0) { spirv::Value offset = MakeValue( - make_const(op->stride.dtype(), i) * op->stride); + make_const(op->stride.type(), i) * op->stride); v = builder_->Add(v, offset); } values.push_back(v); @@ -364,7 +364,7 @@ spirv::Value CodeGenSPIRV::VisitExpr_(const Load* op) { CHECK(it != storage_info_.end()); StorageInfo& info = it->second; if (!info.content_fixed) { - info.UpdateContentType(op->dtype); + info.UpdateContentType(op->type); } spirv::SType content_type = builder_->GetSType(info.content_type); @@ -376,15 +376,15 @@ spirv::Value CodeGenSPIRV::VisitExpr_(const Load* op) { if (info.is_volatile) { mask |= spv::MemoryAccessVolatileMask; } - if (op->dtype.lanes() == 1) { - CHECK_EQ(info.content_type, op->dtype) + if (op->type.lanes() == 1) { + CHECK_EQ(info.content_type, op->type) << "Vulkan only allow one type access to the same buffer"; spirv::Value index = MakeValue(op->index); spirv::Value ptr = builder_->StructArrayAccess( ptr_type, buffer, index); return builder_->MakeValue(spv::OpLoad, content_type, ptr, mask); } else { - if (op->dtype.element_of() == info.content_type) { + if (op->type.element_of() == info.content_type) { // because content type is element type, we can only do scalarize load. std::vector values; auto f = [&](int i, spirv::Value index) { @@ -398,13 +398,13 @@ spirv::Value CodeGenSPIRV::VisitExpr_(const Load* op) { } else { if (const Ramp* ramp = op->index.as()) { if (is_one(ramp->stride)) { - CHECK_EQ(ramp->lanes, op->dtype.lanes()); + CHECK_EQ(ramp->lanes, op->type.lanes()); arith::ModularSet me = analyzer_->modular_set(ramp->base); CHECK((me->coeff % ramp->lanes) == 0 && (me->base % ramp->lanes) == 0) << "Only aligned vector access is allowed in SPIRV"; Expr vec_index = ir::Simplify( - ramp->base / make_const(ramp->base.dtype(), ramp->lanes)); + ramp->base / make_const(ramp->base.type(), ramp->lanes)); spirv::Value ptr = builder_->StructArrayAccess( ptr_type, buffer, MakeValue(vec_index)); return builder_->MakeValue(spv::OpLoad, content_type, ptr, mask); @@ -420,14 +420,14 @@ spirv::Value CodeGenSPIRV::VisitExpr_(const Load* op) { void CodeGenSPIRV::Scalarize(const Expr& e, std::function f) { if (const Ramp* ramp = e.as()) { - for (int i = 0; i < ramp->dtype.lanes(); ++i) { + for (int i = 0; i < ramp->type.lanes(); ++i) { Expr offset = ramp->base + ramp->stride * i; f(i, MakeValue(offset)); } } else { - spirv::SType etype = builder_->GetSType(e.dtype().element_of()); + spirv::SType etype = builder_->GetSType(e.type().element_of()); spirv::Value value = MakeValue(e); - for (int i = 0; i < e.dtype().lanes(); ++i) { + for (int i = 0; i < e.type().lanes(); ++i) { f(i, builder_->MakeValue( spv::OpCompositeExtract, etype, value, i)); } @@ -441,7 +441,7 @@ void CodeGenSPIRV::VisitStmt_(const Store* op) { StorageInfo& info = it->second; if (!info.content_fixed) { - info.UpdateContentType(op->value.dtype()); + info.UpdateContentType(op->value.type()); } spirv::SType content_type = builder_->GetSType(info.content_type); @@ -455,15 +455,15 @@ void CodeGenSPIRV::VisitStmt_(const Store* op) { mask |= spv::MemoryAccessVolatileMask; } - if (op->value.dtype().lanes() == 1) { - CHECK_EQ(info.content_type, op->value.dtype()) + if (op->value.type().lanes() == 1) { + CHECK_EQ(info.content_type, op->value.type()) << "Vulkan only allow one type access to the same buffer"; spirv::Value index = MakeValue(op->index); spirv::Value ptr = builder_->StructArrayAccess( ptr_type, buffer, index); builder_->MakeInst(spv::OpStore, ptr, value, mask); } else { - if (op->value.dtype().element_of() == info.content_type) { + if (op->value.type().element_of() == info.content_type) { // because content type is element type, we can only do scalarize load. auto f = [&](int i, spirv::Value index) { spirv::Value elem = builder_->MakeValue( @@ -476,13 +476,13 @@ void CodeGenSPIRV::VisitStmt_(const Store* op) { } else { if (const Ramp* ramp = op->index.as()) { if (is_one(ramp->stride)) { - CHECK_EQ(ramp->lanes, op->value.dtype().lanes()); + CHECK_EQ(ramp->lanes, op->value.type().lanes()); arith::ModularSet me = analyzer_->modular_set(ramp->base); CHECK((me->coeff % ramp->lanes) == 0 && (me->base % ramp->lanes) == 0) << "Only aligned vector access is allowed in SPIRV"; Expr vec_index = ir::Simplify( - ramp->base / make_const(ramp->base.dtype(), ramp->lanes)); + ramp->base / make_const(ramp->base.type(), ramp->lanes)); spirv::Value ptr = builder_->StructArrayAccess( ptr_type, buffer, MakeValue(vec_index)); builder_->MakeInst(spv::OpStore, ptr, value, mask); @@ -530,7 +530,7 @@ void CodeGenSPIRV::VisitStmt_(const For* op) { // loop continue builder_->StartLabel(continue_label); spirv::Value one = - op->loop_var.dtype().is_int() ? + op->loop_var.type().is_int() ? builder_->IntImm(loop_var.stype, 1) : builder_->UIntImm(loop_var.stype, 1); spirv::Value next_value = builder_->Add(loop_var, one); @@ -576,13 +576,13 @@ void CodeGenSPIRV::VisitStmt_(const IfThenElse* op) { void CodeGenSPIRV::VisitStmt_(const Allocate* op) { CHECK(!is_zero(op->condition)); CHECK(!op->new_expr.defined()); - CHECK(!op->dtype.is_handle()); + CHECK(!op->type.is_handle()); int32_t constant_size = op->constant_allocation_size(); CHECK_GT(constant_size, 0) << "Can only handle constant size stack allocation in GPU"; spirv::Value buf; StorageInfo& info = storage_info_[op->buffer_var.get()]; - spirv::SType etype = builder_->GetSType(op->dtype); + spirv::SType etype = builder_->GetSType(op->type); if (info.scope.rank == runtime::StorageRank::kLocal) { buf = builder_->Allocate( etype, static_cast(constant_size), @@ -597,7 +597,7 @@ void CodeGenSPIRV::VisitStmt_(const Allocate* op) { spv::StorageClassWorkgroup); } CHECK(!info.content_fixed); - info.UpdateContentType(op->dtype); + info.UpdateContentType(op->type); CHECK(!var_map_.count(op->buffer_var.get())); var_map_[op->buffer_var.get()] = buf; this->VisitStmt(op->body); @@ -632,7 +632,7 @@ void CodeGenSPIRV::VisitStmt_(const AssertStmt* op) { void CodeGenSPIRV::VisitStmt_(const LetStmt* op) { CHECK(!var_map_.count(op->var.get())); - CHECK(!op->var.dtype().is_handle()); + CHECK(!op->var.type().is_handle()); var_map_[op->var.get()] = MakeValue(op->value); analyzer_->Bind(op->var, op->value); this->VisitStmt(op->body); diff --git a/src/codegen/spirv/codegen_spirv.h b/src/codegen/spirv/codegen_spirv.h index 3d16377271c4..eca361493e80 100644 --- a/src/codegen/spirv/codegen_spirv.h +++ b/src/codegen/spirv/codegen_spirv.h @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -112,10 +112,10 @@ class CodeGenSPIRV: /*! \brief Whether it is volatile */ bool content_fixed{false}; /*! \brief Current content type */ - DataType content_type{DataType::Handle()}; + Type content_type{Handle()}; // Update content type if it hasn't beenupdated. - void UpdateContentType(DataType type) { + void UpdateContentType(Type type) { if (content_fixed) { CHECK_EQ(type, content_type) << "Cannot use two different content type in GLSL model"; diff --git a/src/codegen/spirv/intrin_rule_spirv.cc b/src/codegen/spirv/intrin_rule_spirv.cc index 7a347e5e8dbc..fca9aa203f80 100644 --- a/src/codegen/spirv/intrin_rule_spirv.cc +++ b/src/codegen/spirv/intrin_rule_spirv.cc @@ -39,13 +39,13 @@ inline void DispatchGLSLPureIntrin(const TVMArgs& targs, TVMRetValue* rv) { CHECK(call != nullptr); Array cargs; // intrin id. - cargs.push_back(ir::UIntImm::make(DataType::UInt(32), id)); + cargs.push_back(ir::UIntImm::make(UInt(32), id)); for (Expr arg : call->args) { cargs.push_back(arg); } *rv = ir::Call::make( - call->dtype, "spirv_glsl450", cargs, ir::Call::PureIntrinsic); + call->type, "spirv_glsl450", cargs, ir::Call::PureIntrinsic); } TVM_REGISTER_GLOBAL("tvm.intrin.rule.vulkan.floor") diff --git a/src/codegen/spirv/ir_builder.cc b/src/codegen/spirv/ir_builder.cc index 6f8d96e148c1..35d57d7cc3f8 100644 --- a/src/codegen/spirv/ir_builder.cc +++ b/src/codegen/spirv/ir_builder.cc @@ -53,10 +53,10 @@ void IRBuilder::InitHeader() { void IRBuilder::InitPreDefs() { ext_glsl450_ = ExtInstImport("GLSL.std.450"); - t_int32_ = DeclareType(DataType::Int(32)); - t_uint32_ = DeclareType(DataType::UInt(32)); - t_bool_ = DeclareType(DataType::UInt(1)); - t_fp32_ = DeclareType(DataType::Float(32)); + t_int32_ = DeclareType(Int(32)); + t_uint32_ = DeclareType(UInt(32)); + t_bool_ = DeclareType(UInt(1)); + t_fp32_ = DeclareType(Float(32)); const_i32_zero_ = IntImm(t_int32_, 0); // declare void, and void functions t_void_.id = id_counter_++; @@ -66,14 +66,14 @@ void IRBuilder::InitPreDefs() { .AddSeq(t_void_func_, t_void_).Commit(&global_); } -SType IRBuilder::GetSType(const DataType& dtype) { - if (dtype == DataType::Int(32)) { +SType IRBuilder::GetSType(const Type& dtype) { + if (dtype == Int(32)) { return t_int32_; - } else if (dtype == DataType::UInt(1)) { + } else if (dtype == UInt(1)) { return t_bool_; - } else if (dtype == DataType::Float(32)) { + } else if (dtype == Float(32)) { return t_fp32_; - } else if (dtype == DataType::UInt(32)) { + } else if (dtype == UInt(32)) { return t_uint32_; } uint32_t type_key; @@ -99,7 +99,7 @@ SType IRBuilder::GetPointerType(const SType& value_type, } SType t; t.id = id_counter_++; - t.type = DataType::Handle(); + t.type = Handle(); t.element_type_id = value_type.id; t.storage_class = storage_class; ib_.Begin(spv::OpTypePointer) @@ -118,11 +118,11 @@ SType IRBuilder::GetStructArrayType(const SType& value_type, SType arr_type; arr_type.id = id_counter_++; - arr_type.type = DataType::Handle(); + arr_type.type = Handle(); arr_type.element_type_id = value_type.id; if (num_elems != 0) { - Value length = UIntImm(GetSType(DataType::UInt(32)), num_elems); + Value length = UIntImm(GetSType(UInt(32)), num_elems); ib_.Begin(spv::OpTypeArray) .AddSeq(arr_type, value_type, length).Commit(&global_); } else { @@ -138,7 +138,7 @@ SType IRBuilder::GetStructArrayType(const SType& value_type, // declare struct of array SType struct_type; struct_type.id = id_counter_++; - struct_type.type = DataType::Handle(); + struct_type.type = Handle(); struct_type.element_type_id = value_type.id; ib_.Begin(spv::OpTypeStruct) .AddSeq(struct_type, arr_type).Commit(&global_); @@ -183,7 +183,7 @@ Value IRBuilder::FloatImm(const SType& dtype, double value) { } else { CHECK_EQ(dtype.type.bits(), 16); return Cast(dtype, - FloatImm(GetSType(DataType::Float(32)), value)); + FloatImm(GetSType(Float(32)), value)); } } @@ -206,7 +206,7 @@ Value IRBuilder::DeclarePushConstant(const std::vector& value_types) { CHECK_EQ(push_const_.id, 0); SType struct_type; struct_type.id = id_counter_++; - struct_type.type = DataType::Handle(); + struct_type.type = Handle(); ib_.Begin(spv::OpTypeStruct).Add(struct_type); for (const SType& vtype : value_types) { ib_.Add(vtype); @@ -218,7 +218,7 @@ Value IRBuilder::DeclarePushConstant(const std::vector& value_types) { ib_.Begin(spv::OpMemberDecorate) .AddSeq(struct_type, i, spv::DecorationOffset, offset) .Commit(&decorate_); - DataType t = value_types[i].type; + Type t = value_types[i].type; uint32_t nbits = t.bits() * t.lanes(); CHECK_EQ(nbits % 8 , 0); offset += nbits / 8; @@ -296,7 +296,7 @@ Value IRBuilder::Allocate(const SType& value_type, Value IRBuilder::GetWorkgroupID(uint32_t dim_index) { if (workgroup_id_.id == 0) { - SType vec3_type = this->GetSType(DataType::Int(32).with_lanes(3)); + SType vec3_type = this->GetSType(Int(32).with_lanes(3)); SType ptr_type = this->GetPointerType( vec3_type, spv::StorageClassInput); workgroup_id_ = NewValue(ptr_type, kVectorPtr); @@ -315,7 +315,7 @@ Value IRBuilder::GetWorkgroupID(uint32_t dim_index) { Value IRBuilder::GetLocalID(uint32_t dim_index) { if (local_id_.id == 0) { - SType vec3_type = this->GetSType(DataType::Int(32).with_lanes(3)); + SType vec3_type = this->GetSType(Int(32).with_lanes(3)); SType ptr_type = this->GetPointerType(vec3_type, spv::StorageClassInput); local_id_ = NewValue(ptr_type, kVectorPtr); ib_.Begin(spv::OpVariable) @@ -339,7 +339,7 @@ Value IRBuilder::GetConst_(const SType& dtype, const uint64_t* pvalue) { } CHECK_LE(dtype.type.bits(), 64); Value ret = NewValue(dtype, kConstant); - if (dtype.type == DataType::UInt(1)) { + if (dtype.type == UInt(1)) { // bool types. if (*pvalue) { ib_.Begin(spv::OpConstantTrue).AddSeq(ret); @@ -367,7 +367,7 @@ Value IRBuilder::GetConst_(const SType& dtype, const uint64_t* pvalue) { return ret; } -SType IRBuilder::DeclareType(const DataType& dtype) { +SType IRBuilder::DeclareType(const Type& dtype) { if (dtype.lanes() == 1) { SType t; t.id = id_counter_++; @@ -426,7 +426,7 @@ Value IRBuilder::CallGLSL450(const SType& ret_type, Value IRBuilder::Concat(const std::vector& vec) { bool is_const = vec[0].flag == kConstant; - DataType etype = vec[0].stype.type; + Type etype = vec[0].stype.type; int lanes = etype.lanes(); for (size_t i = 1; i < vec.size(); ++i) { CHECK_EQ(etype, vec[i].stype.type.element_of()) @@ -456,10 +456,10 @@ Value IRBuilder::Concat(const std::vector& vec) { Value IRBuilder::Cast(const SType& dst_type, spirv::Value value) { CHECK_NE(value.stype.id, 0U); if (value.stype.id == dst_type.id) return value; - const tvm::DataType& from = value.stype.type; - const tvm::DataType& to = dst_type.type; + const tvm::Type& from = value.stype.type; + const tvm::Type& to = dst_type.type; CHECK_EQ(from.lanes(), to.lanes()); - if (from == DataType::Bool()) { + if (from == Bool()) { if (to.is_int()) { return Select(value, IntImm(dst_type, 1), IntImm(dst_type, 0)); } else if (to.is_uint()) { @@ -471,7 +471,7 @@ Value IRBuilder::Cast(const SType& dst_type, spirv::Value value) { LOG(FATAL) << "cannot cast from " << from << " to " << to; return Value(); } - } else if (to == DataType::Bool()) { + } else if (to == Bool()) { if (from.is_int()) { return NE(value, IntImm(value.stype, 0)); } else if (to.is_uint()) { @@ -558,7 +558,7 @@ Value IRBuilder::Mod(Value a, Value b) { Value IRBuilder::_OpName(Value a, Value b) { \ CHECK_EQ(a.stype.id, b.stype.id); \ CHECK_EQ(a.stype.type.lanes(), b.stype.type.lanes()); \ - const auto& bool_type = this->GetSType(DataType::UInt(1).with_lanes(a.stype.type.lanes())); \ + const auto& bool_type = this->GetSType(UInt(1).with_lanes(a.stype.type.lanes())); \ if (a.stype.type.is_int()) { \ return MakeValue(spv::OpS##_Op, bool_type, a, b); \ } else if (a.stype.type.is_uint()) { \ @@ -578,7 +578,7 @@ DEFINE_BUILDER_CMP_OP(GE, GreaterThanEqual); Value IRBuilder::_OpName(Value a, Value b) { \ CHECK_EQ(a.stype.id, b.stype.id); \ CHECK_EQ(a.stype.type.lanes(), b.stype.type.lanes()); \ - const auto& bool_type = this->GetSType(DataType::UInt(1).with_lanes(a.stype.type.lanes())); \ + const auto& bool_type = this->GetSType(UInt(1).with_lanes(a.stype.type.lanes())); \ if (a.stype.type.is_int() || a.stype.type.is_uint()) { \ return MakeValue(spv::OpI##_Op, bool_type, a, b); \ } else { \ @@ -592,7 +592,7 @@ DEFINE_BUILDER_CMP_UOP(NE, NotEqual); Value IRBuilder::Select(Value cond, Value a, Value b) { CHECK_EQ(a.stype.id, b.stype.id); - CHECK_EQ(cond.stype.type.element_of(), DataType::UInt(1)); + CHECK_EQ(cond.stype.type.element_of(), UInt(1)); return MakeValue(spv::OpSelect, a.stype, cond, a, b); } diff --git a/src/codegen/spirv/ir_builder.h b/src/codegen/spirv/ir_builder.h index 3843cbb3c6a9..c04af743fbb8 100644 --- a/src/codegen/spirv/ir_builder.h +++ b/src/codegen/spirv/ir_builder.h @@ -45,7 +45,7 @@ struct SType { /*! \brief The Id to represent type */ uint32_t id{0}; /*! \brief corresponding TVM type */ - tvm::DataType type; + tvm::Type type; /*! \brief content type id if it is a pointer/struct-array class */ uint32_t element_type_id{0}; /*! \brief The storage class, if it is a pointer */ @@ -424,7 +424,7 @@ class IRBuilder { * \param dtype The data type. * \return The corresponding spirv type. */ - SType GetSType(const tvm::DataType& dtype); + SType GetSType(const tvm::Type& dtype); /*! * \brief Get the pointer type that points to value_type * \param value_type. @@ -575,7 +575,7 @@ class IRBuilder { // get constant given value encoded in uint64_t Value GetConst_(const SType& dtype, const uint64_t* pvalue); // declare type - SType DeclareType(const DataType& dtype); + SType DeclareType(const Type& dtype); /*! \brief internal instruction builder */ InstrBuilder ib_; /*! \brief Current label */ diff --git a/src/codegen/stackvm/codegen_stackvm.cc b/src/codegen/stackvm/codegen_stackvm.cc index 52cabaf0b6eb..fd2a5f764ff6 100644 --- a/src/codegen/stackvm/codegen_stackvm.cc +++ b/src/codegen/stackvm/codegen_stackvm.cc @@ -100,12 +100,12 @@ int CodeGenStackVM::GetVarID(const Variable* v) const { void CodeGenStackVM::VisitExpr_(const Load* op) { this->Push(op->buffer_var); - StackVM::OpCode code = StackVM::GetLoad(op->dtype); + StackVM::OpCode code = StackVM::GetLoad(Type2TVMType(op->type)); if (const IntImm* index = op->index.as()) { this->PushOp(code, index->value); } else { this->Push(op->index); - this->PushOp(StackVM::PUSH_I64, op->dtype.element_of().bytes()); + this->PushOp(StackVM::PUSH_I64, op->type.element_of().bytes()); this->PushOp(StackVM::MUL_I64); this->PushOp(StackVM::ADDR_ADD); this->PushOp(code, 0); @@ -114,13 +114,13 @@ void CodeGenStackVM::VisitExpr_(const Load* op) { void CodeGenStackVM::VisitStmt_(const Store* op) { this->Push(op->buffer_var); - StackVM::OpCode code = StackVM::GetStore(op->value.dtype()); + StackVM::OpCode code = StackVM::GetStore(Type2TVMType(op->value.type())); if (const IntImm* index = op->index.as()) { this->Push(op->value); this->PushOp(code, index->value); } else { this->Push(op->index); - this->PushOp(StackVM::PUSH_I64, op->value.dtype().element_of().bytes()); + this->PushOp(StackVM::PUSH_I64, op->value.type().element_of().bytes()); this->PushOp(StackVM::MUL_I64); this->PushOp(StackVM::ADDR_ADD); this->Push(op->value); @@ -147,7 +147,7 @@ void CodeGenStackVM::VisitExpr_(const Call* op) { CHECK(op->args.size() == 1 && l); this->PushOp(StackVM::LOAD_HEAP, GetVarID(l->buffer_var.get())); this->Push(l->index); - this->PushOp(StackVM::PUSH_I64, l->dtype.element_of().bytes()); + this->PushOp(StackVM::PUSH_I64, l->type.element_of().bytes()); this->PushOp(StackVM::MUL_I64); this->PushOp(StackVM::ADDR_ADD); } else if (op->is_intrinsic(Call::reinterpret)) { @@ -248,7 +248,7 @@ void CodeGenStackVM::PushBinary(StackVM::OpCode op_int64, const Expr& b) { this->Push(a); this->Push(b); - DataType t = a.dtype(); + Type t = a.type(); if (t.is_int()) { this->PushOp(op_int64); } else if (t.is_uint()) { @@ -258,7 +258,7 @@ void CodeGenStackVM::PushBinary(StackVM::OpCode op_int64, } } -void CodeGenStackVM::PushCast(DataType dst, DataType src) { +void CodeGenStackVM::PushCast(Type dst, Type src) { if (dst.is_int()) { if (src.is_int() || src.is_uint()) return; } else if (dst.is_uint()) { @@ -297,7 +297,7 @@ void CodeGenStackVM::VisitExpr_(const Variable *op) { void CodeGenStackVM::VisitExpr_(const Cast *op) { this->Push(op->value); - PushCast(op->dtype, op->value.dtype()); + PushCast(op->type, op->value.type()); } void CodeGenStackVM::VisitExpr_(const Add *op) { diff --git a/src/codegen/stackvm/codegen_stackvm.h b/src/codegen/stackvm/codegen_stackvm.h index dcae072c102d..1e6dd64476aa 100644 --- a/src/codegen/stackvm/codegen_stackvm.h +++ b/src/codegen/stackvm/codegen_stackvm.h @@ -108,7 +108,7 @@ class CodeGenStackVM const Expr& a, const Expr& b); // push cast; - void PushCast(DataType dst, DataType src); + void PushCast(Type dst, Type src); // overloadable functions // expression void VisitExpr_(const Variable* op) final; diff --git a/src/contrib/hybrid/codegen_hybrid.cc b/src/contrib/hybrid/codegen_hybrid.cc index 2bb86093e2f8..9e55d9be13d5 100644 --- a/src/contrib/hybrid/codegen_hybrid.cc +++ b/src/contrib/hybrid/codegen_hybrid.cc @@ -57,7 +57,7 @@ std::string CodeGenHybrid::Finish() { return stream.str(); } -void CodeGenHybrid::PrintType(DataType t, std::ostream &os) { +void CodeGenHybrid::PrintType(Type t, std::ostream &os) { if (t.is_float()) { os << "float"; CHECK(t.bits() == 16 || t.bits() == 32 || t.bits() == 64); @@ -76,11 +76,11 @@ void CodeGenHybrid::VisitExpr_(const IntImm *op, std::ostream& os) { // NOLINT( os << op->value; } void CodeGenHybrid::VisitExpr_(const UIntImm *op, std::ostream& os) { // NOLINT(*) - PrintType(op->dtype, os); + PrintType(op->type, os); os << "(" << op->value << ")"; } void CodeGenHybrid::VisitExpr_(const FloatImm *op, std::ostream& os) { // NOLINT(*) - PrintType(op->dtype, os); + PrintType(op->type, os); os << "(" << std::setprecision(20) << op->value << ")"; } void CodeGenHybrid::VisitExpr_(const StringImm *op, std::ostream& os) { // NOLINT(*) @@ -92,7 +92,7 @@ inline void PrintBinaryExpr(const T* op, const char *opstr, std::ostream& os, // NOLINT(*) CodeGenHybrid* p) { - CHECK(op->dtype.lanes() == 1) << "vec bin op not implemented"; + CHECK(op->type.lanes() == 1) << "vec bin op not implemented"; if (isalpha(opstr[0])) { os << opstr << '('; p->PrintExpr(op->a, os); @@ -114,7 +114,7 @@ inline void PrintBinaryIntrinsitc(const Call* op, const char *opstr, std::ostream& os, // NOLINT(*) CodeGenHybrid* p) { - CHECK(op->dtype.lanes() == 1) << "vec bin intrin not implemented"; + CHECK(op->type.lanes() == 1) << "vec bin intrin not implemented"; CHECK_EQ(op->args.size(), 2U); os << '('; p->PrintExpr(op->args[0], os); @@ -124,10 +124,10 @@ inline void PrintBinaryIntrinsitc(const Call* op, } void CodeGenHybrid::VisitExpr_(const Cast *op, std::ostream& os) { // NOLINT(*) - if (op->dtype == op->value.dtype()) { + if (op->type == op->value.type()) { PrintExpr(op->value, stream); } else { - PrintType(op->dtype, os); + PrintType(op->type, os); os << "("; PrintExpr(op->value, os); os << ")"; @@ -148,14 +148,14 @@ void CodeGenHybrid::VisitExpr_(const Mul *op, std::ostream& os) { // NOLINT(*) } void CodeGenHybrid::VisitExpr_(const Div *op, std::ostream& os) { // NOLINT(*) - if (op->dtype.is_int()) + if (op->type.is_int()) PrintBinaryExpr(op, "//", os, this); else PrintBinaryExpr(op, "/", os, this); } void CodeGenHybrid::VisitExpr_(const FloorDiv *op, std::ostream& os) { // NOLINT(*) - if (op->dtype.is_int()) + if (op->type.is_int()) PrintBinaryExpr(op, "//", os, this); else PrintBinaryExpr(op, "/", os, this); @@ -320,7 +320,7 @@ void CodeGenHybrid::VisitStmt_(const Realize *op) { } if (op->bounds.size() == 1) stream << ", "; stream << "), '"; - PrintType(op->dtype, stream); + PrintType(op->type, stream); stream << "', '"; stream << alloc_storage_scope_[op->func] << "')\n"; } diff --git a/src/contrib/hybrid/codegen_hybrid.h b/src/contrib/hybrid/codegen_hybrid.h index 2c719b0b3ecf..866756996f8d 100644 --- a/src/contrib/hybrid/codegen_hybrid.h +++ b/src/contrib/hybrid/codegen_hybrid.h @@ -138,7 +138,7 @@ class CodeGenHybrid : * \param t The type representation. * \param os The stream to print the ctype into */ - virtual void PrintType(DataType t, std::ostream& os); // NOLINT(*) + virtual void PrintType(Type t, std::ostream& os); // NOLINT(*) private: /*! \brief The current indent of the code dump. */ diff --git a/src/lang/attrs.cc b/src/lang/attrs.cc index b83734beacb3..007a68b1e629 100644 --- a/src/lang/attrs.cc +++ b/src/lang/attrs.cc @@ -177,7 +177,7 @@ bool AttrsEqualHandler::VisitAttr_(const Not* lhs, const ObjectRef& other) { bool AttrsEqualHandler::VisitAttr_(const Cast* lhs, const ObjectRef& other) { if (const auto* rhs = other.as()) { - if (lhs->dtype != rhs->dtype) return false; + if (lhs->type != rhs->type) return false; return Equal(lhs->value, rhs->value); } else { return false; @@ -188,7 +188,7 @@ bool AttrsEqualHandler::VisitAttr_(const Call* lhs, const ObjectRef& other) { if (const auto* rhs = other.as()) { return lhs->name == rhs->name && - lhs->dtype == rhs->dtype && + lhs->type == rhs->type && lhs->call_type == rhs->call_type && Equal(lhs->args, rhs->args); } else { @@ -290,7 +290,7 @@ size_t AttrsHashHandler::VisitAttr_(const Cast* op) { static size_t key = std::hash()(Cast::_type_key); AttrsHash hasher; size_t res = key; - res = Combine(res, hasher(op->dtype)); + res = Combine(res, hasher(op->type)); res = Combine(res, Hash(op->value)); return res; } @@ -300,7 +300,7 @@ size_t AttrsHashHandler::VisitAttr_(const Call* op) { AttrsHash hasher; size_t res = key; res = Combine(res, hasher(op->name)); - res = Combine(res, hasher(op->dtype)); + res = Combine(res, hasher(op->type)); res = Combine(res, Hash(op->args)); return res; } diff --git a/src/lang/buffer.cc b/src/lang/buffer.cc index eb5d87efbbfa..77e741086a59 100644 --- a/src/lang/buffer.cc +++ b/src/lang/buffer.cc @@ -42,10 +42,10 @@ Array SimplifyArray(Array array) { } Buffer decl_buffer(Array shape, - DataType dtype, + Type dtype, std::string name) { return BufferNode::make( - Var(name, DataType::Handle()), + Var(name, Handle()), dtype, shape, Array(), @@ -279,30 +279,30 @@ inline Expr ElemOffset(const BufferNode* n, Array index) { return base; } -inline Expr BufferOffset(const BufferNode* n, Array index, DataType dtype) { +inline Expr BufferOffset(const BufferNode* n, Array index, Type dtype) { Expr offset = ElemOffset(n, index); if (n->dtype.lanes() != 1) { - offset = offset * make_const(offset.dtype(), dtype.lanes()); + offset = offset * make_const(offset.type(), dtype.lanes()); } if (dtype.lanes() != 1) { - return ir::Ramp::make(offset, make_const(offset.dtype(), 1), dtype.lanes()); + return ir::Ramp::make(offset, make_const(offset.type(), 1), dtype.lanes()); } else { return offset; } } -Expr Buffer::vload(Array begin, DataType dtype) const { - // specially handle bool, stored asDataType::Int(8) +Expr Buffer::vload(Array begin, Type dtype) const { + // specially handle bool, stored as Int(8) const BufferNode* n = operator->(); CHECK(dtype.element_of() == n->dtype.element_of() && dtype.lanes() % n->dtype.lanes() == 0) << "Cannot load " << dtype << " from buffer of " << n->dtype; - if (dtype == DataType::Bool()) { + if (dtype == Bool()) { return ir::Cast::make( - DataType::Bool(), + Bool(), ir::Load::make( - DataType::Int(8), n->data, BufferOffset(n, begin, DataType::Int(8)), + Int(8), n->data, BufferOffset(n, begin, Int(8)), const_true())); } else { return ir::Load::make( @@ -312,17 +312,17 @@ Expr Buffer::vload(Array begin, DataType dtype) const { } Stmt Buffer::vstore(Array begin, Expr value) const { - // specially handle bool, stored asDataType::Int(8) + // specially handle bool, stored as Int(8) const BufferNode* n = operator->(); - DataType dtype = value.dtype(); + Type dtype = value.type(); CHECK(dtype.element_of() == n->dtype.element_of() && dtype.lanes() % n->dtype.lanes() == 0) << "Cannot load " << dtype << " from buffer of " << n->dtype; - if (value.dtype() == DataType::Bool()) { + if (value.type() == Bool()) { return ir::Store::make(n->data, - ir::Cast::make(DataType::Int(8), value), - BufferOffset(n, begin, DataType::Int(8)), + ir::Cast::make(Int(8), value), + BufferOffset(n, begin, Int(8)), const_true()); } else { return ir::Store::make(n->data, value, BufferOffset(n, begin, dtype), @@ -381,7 +381,7 @@ Buffer Buffer::MakeSlice(Array begins, Array extents) const { n->buffer_type); } -Expr Buffer::access_ptr(int access_mask, DataType ptr_type, int content_lanes, Expr offset) const { +Expr Buffer::access_ptr(int access_mask, Type ptr_type, int content_lanes, Expr offset) const { const BufferNode* self = operator->(); Expr e_dtype; Expr extent; @@ -396,21 +396,21 @@ Expr Buffer::access_ptr(int access_mask, DataType ptr_type, int content_lanes, E Expr elem_offset = self->elem_offset + offset; if (content_lanes > 1) { e_dtype = ir::TypeAnnotation(self->dtype.with_lanes(content_lanes)); - extent = extent / make_const(self->elem_offset.dtype(), content_lanes); - elem_offset = self->elem_offset / make_const(self->elem_offset.dtype(), + extent = extent / make_const(self->elem_offset.type(), content_lanes); + elem_offset = self->elem_offset / make_const(self->elem_offset.type(), content_lanes); } else { e_dtype = ir::TypeAnnotation(self->dtype); } Array acc_args{ e_dtype, self->data, elem_offset, - extent, make_const(DataType::Int(32), access_mask)}; + extent, make_const(Int(32), access_mask)}; return ir::Call::make( ptr_type, ir::intrinsic::tvm_access_ptr, acc_args, ir::Call::Intrinsic); } Buffer BufferNode::make(Var data, - DataType dtype, + Type dtype, Array shape, Array strides, Expr elem_offset, diff --git a/src/lang/channel.cc b/src/lang/channel.cc new file mode 100644 index 000000000000..cb3e2f566c77 --- /dev/null +++ b/src/lang/channel.cc @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file channel.cc + */ +#include + +namespace tvm { + +Channel ChannelNode::make(Var handle_var, Type dtype) { + auto n = make_node(); + n->handle_var = handle_var; + n->dtype = dtype; + return Channel(n); +} + +TVM_STATIC_IR_FUNCTOR(IRPrinter, vtable) +.set_dispatch([](const ObjectRef& node, IRPrinter *p) { + auto* op = static_cast(node.get()); + p->stream << "channel(" << op->handle_var << ", " << op->dtype << ")"; +}); + +TVM_REGISTER_NODE_TYPE(ChannelNode); +} // namespace tvm diff --git a/src/lang/expr.cc b/src/lang/expr.cc index 997c15177546..6a69fdaa20c4 100644 --- a/src/lang/expr.cc +++ b/src/lang/expr.cc @@ -29,11 +29,70 @@ namespace tvm { +// maximum and min values +Expr DataType::max() const { + using namespace ir; + CHECK_EQ(lanes(), 1); + if (is_int()) { + if (bits() == 64) { + return IntImm::make(*this, std::numeric_limits::max()); + } else if (bits() < 64) { + int64_t val = 1; + val = (val << (bits() - 1)) - 1; + return IntImm::make(*this, val); + } + } else if (is_uint()) { + if (bits() == 64) { + return UIntImm::make(*this, std::numeric_limits::max()); + } else if (bits() < 64) { + uint64_t val = 1; + val = (val << static_cast(bits())) - 1; + return UIntImm::make(*this, val); + } + } else if (is_float()) { + if (bits() == 64) { + return FloatImm::make(*this, std::numeric_limits::max()); + } else if (bits() == 32) { + return FloatImm::make(*this, std::numeric_limits::max()); + } else if (bits() == 16) { + return FloatImm::make(*this, 65504.0); + } + } + LOG(FATAL) << "Cannot decide max_value for type" << *this; + return Expr(); +} + +Expr DataType::min() const { + using namespace ir; + CHECK_EQ(lanes(), 1); + if (is_int()) { + if (bits() == 64) { + return IntImm::make(*this, std::numeric_limits::lowest()); + } else if (bits() < 64) { + int64_t val = 1; + val = -(val << (bits() - 1)); + return IntImm::make(*this, val); + } + } else if (is_uint()) { + return UIntImm::make(*this, 0); + } else if (is_float()) { + if (bits() == 64) { + return FloatImm::make(*this, std::numeric_limits::lowest()); + } else if (bits() == 32) { + return FloatImm::make(*this, std::numeric_limits::lowest()); + } else if (bits() == 16) { + return FloatImm::make(*this, -65504.0); + } + } + LOG(FATAL) << "Cannot decide min_value for type" << *this; + return Expr(); +} + Expr::Expr(int32_t value) - : Expr(IntImm::make(DataType::Int(32), value)) {} + : Expr(IntImm::make(Int(32), value)) {} Expr::Expr(float value) - : Expr(ir::FloatImm::make(DataType::Float(32), value)) {} + : Expr(ir::FloatImm::make(Float(32), value)) {} Expr::Expr(std::string str) : Expr(ir::StringImm::make(str)) {} @@ -43,7 +102,7 @@ Var::Var(std::string name_hint, DataType t) Var Variable::make(DataType t, std::string name_hint) { NodePtr node = make_node(); - node->dtype = t; + node->type = t; node->name_hint = std::move(name_hint); return Var(node); } @@ -54,11 +113,11 @@ Range::Range(Expr begin, Expr end) is_zero(begin) ? end : (end - begin))) { } -Integer IntImm::make(DataType t, int64_t value) { +Integer IntImm::make(Type t, int64_t value) { CHECK(t.is_int() && t.is_scalar()) << "ValueError: IntImm can only take scalar."; NodePtr node = make_node(); - node->dtype = t; + node->type = t; node->value = value; return Integer(node); } @@ -93,7 +152,7 @@ void Dump(const NodeRef& n) { std::cerr << n << "\n"; } -Var var(std::string name_hint, DataType t) { +Var var(std::string name_hint, Type t) { return Var(name_hint, t); } @@ -125,10 +184,10 @@ IRPrinter::FType& IRPrinter::vtable() { TVM_STATIC_IR_FUNCTOR(IRPrinter, vtable) .set_dispatch([](const ObjectRef& node, IRPrinter* p) { auto* op = static_cast(node.get()); - if (op->dtype == DataType::Int(32)) { + if (op->type == Int(32)) { p->stream << op->value; } else { - p->stream << "(" << op->dtype << ")" << op->value; + p->stream << "(" << op->type << ")" << op->value; } }); diff --git a/src/lang/expr_operator.cc b/src/lang/expr_operator.cc index 1166e7eef976..220d4378cc97 100644 --- a/src/lang/expr_operator.cc +++ b/src/lang/expr_operator.cc @@ -30,16 +30,16 @@ namespace tvm { // simple cast that only checks if type matches and cast -inline Expr SimpleCast(const DataType& t, Expr value) { - if (value.dtype() == t) return value; +inline Expr SimpleCast(const Type& t, Expr value) { + if (value.type() == t) return value; return ir::Cast::make(t, value); } // The public function with a quick checking path. void BinaryOpMatchTypes(Expr& lhs, Expr& rhs) { // NOLINT(*) - if (lhs.dtype() == rhs.dtype()) return; - DataType ltype = lhs.dtype(); - DataType rtype = rhs.dtype(); + if (lhs.type() == rhs.type()) return; + Type ltype = lhs.type(); + Type rtype = rhs.type(); if (ltype.lanes() == 1 && rtype.lanes() != 1) { lhs = ir::Broadcast::make(lhs, rtype.lanes()); } else if (rtype.lanes() == 1 && ltype.lanes() != 1) { @@ -48,96 +48,37 @@ void BinaryOpMatchTypes(Expr& lhs, Expr& rhs) { // NOLINT(*) CHECK(ltype.lanes() == rtype.lanes()) << "Cannot match type " << ltype << " vs " << rtype; } - if (lhs.dtype() == rhs.dtype()) return; + if (lhs.type() == rhs.type()) return; // Only do very simple type coversion - // int->float, DataType::Int(32)->int(64) + // int->float, int(32)->int(64) // require the types to be relatively consistent // This will the reduce amount code generated by operators // and also help user to find potential type conversion problems. - if (!lhs.dtype().is_float() && rhs.dtype().is_float()) { + if (!lhs.type().is_float() && rhs.type().is_float()) { // int->float - lhs = cast(rhs.dtype(), lhs); - } else if (lhs.dtype().is_float() && !rhs.dtype().is_float()) { + lhs = cast(rhs.type(), lhs); + } else if (lhs.type().is_float() && !rhs.type().is_float()) { // int->float - rhs = cast(lhs.dtype(), rhs); - } else if ((lhs.dtype().is_int() && rhs.dtype().is_int()) || - (lhs.dtype().is_uint() && rhs.dtype().is_uint())) { + rhs = cast(lhs.type(), rhs); + } else if ((lhs.type().is_int() && rhs.type().is_int()) || + (lhs.type().is_uint() && rhs.type().is_uint())) { // promote int to higher bits - if (lhs.dtype().bits() < rhs.dtype().bits()) { - lhs = cast(rhs.dtype(), lhs); + if (lhs.type().bits() < rhs.type().bits()) { + lhs = cast(rhs.type(), lhs); } else { - rhs = cast(lhs.dtype(), rhs); + rhs = cast(lhs.type(), rhs); } - } else if ((lhs.dtype().is_int() && rhs.dtype().is_uint()) || - (lhs.dtype().is_uint() && rhs.dtype().is_int())) { - int bits = std::max(lhs.dtype().bits(), rhs.dtype().bits()); - lhs = SimpleCast(DataType::Int(bits, lhs.dtype().lanes()), lhs); - rhs = SimpleCast(DataType::Int(bits, rhs.dtype().lanes()), rhs); + } else if ((lhs.type().is_int() && rhs.type().is_uint()) || + (lhs.type().is_uint() && rhs.type().is_int())) { + int bits = std::max(lhs.type().bits(), rhs.type().bits()); + lhs = SimpleCast(Int(bits, lhs.type().lanes()), lhs); + rhs = SimpleCast(Int(bits, rhs.type().lanes()), rhs); } else { LOG(FATAL) << "Cannot match type " << ltype << " vs " << rtype; } } -// maximum and min limits -Expr max_value(const DataType& dtype) { - using namespace ir; - CHECK_EQ(dtype.lanes(), 1); - if (dtype.is_int()) { - if (dtype.bits() == 64) { - return IntImm::make(dtype, std::numeric_limits::max()); - } else if (dtype.bits() < 64) { - int64_t val = 1; - val = (val << (dtype.bits() - 1)) - 1; - return IntImm::make(dtype, val); - } - } else if (dtype.is_uint()) { - if (dtype.bits() == 64) { - return UIntImm::make(dtype, std::numeric_limits::max()); - } else if (dtype.bits() < 64) { - uint64_t val = 1; - val = (val << static_cast(dtype.bits())) - 1; - return UIntImm::make(dtype, val); - } - } else if (dtype.is_float()) { - if (dtype.bits() == 64) { - return FloatImm::make(dtype, std::numeric_limits::max()); - } else if (dtype.bits() == 32) { - return FloatImm::make(dtype, std::numeric_limits::max()); - } else if (dtype.bits() == 16) { - return FloatImm::make(dtype, 65504.0); - } - } - LOG(FATAL) << "Cannot decide max_value for type" << dtype; - return Expr(); -} - -Expr min_value(const DataType& dtype) { - using namespace ir; - CHECK_EQ(dtype.lanes(), 1); - if (dtype.is_int()) { - if (dtype.bits() == 64) { - return IntImm::make(dtype, std::numeric_limits::lowest()); - } else if (dtype.bits() < 64) { - int64_t val = 1; - val = -(val << (dtype.bits() - 1)); - return IntImm::make(dtype, val); - } - } else if (dtype.is_uint()) { - return UIntImm::make(dtype, 0); - } else if (dtype.is_float()) { - if (dtype.bits() == 64) { - return FloatImm::make(dtype, std::numeric_limits::lowest()); - } else if (dtype.bits() == 32) { - return FloatImm::make(dtype, std::numeric_limits::lowest()); - } else if (dtype.bits() == 16) { - return FloatImm::make(dtype, -65504.0); - } - } - LOG(FATAL) << "Cannot decide min_value for type" << dtype; - return Expr(); -} - template inline bool ConstPowerHelper(ValueType val, int *shift) { if (val <= 0) return false; @@ -162,11 +103,11 @@ bool is_const_power_of_two_integer(const Expr& x, int* shift) { } } -Expr cast(const DataType& t, Expr value) { +Expr cast(const Type& t, Expr value) { using ir::IntImm; using ir::UIntImm; using ir::FloatImm; - if (value.dtype() == t) return value; + if (value.type() == t) return value; // const fold IntImm as they are used in index computations if (t.lanes() == 1) { if (const IntImm* op = value.as()) { @@ -178,10 +119,10 @@ Expr cast(const DataType& t, Expr value) { } return ir::Cast::make(t, value); } else { - if (value.dtype().lanes() == 1) { + if (value.type().lanes() == 1) { // manually unroll cast - DataType vtype = t.element_of(); - if (value.dtype() != vtype) { + Type vtype = t.element_of(); + if (value.type() != vtype) { if (const IntImm* op = value.as()) { value = make_const(vtype, op->value); } else if (const UIntImm* op = value.as()) { @@ -194,14 +135,14 @@ Expr cast(const DataType& t, Expr value) { } return ir::Broadcast::make(value, t.lanes()); } else { - CHECK(value.dtype().lanes() == t.lanes()); + CHECK(value.type().lanes() == t.lanes()); return ir::Cast::make(t, value); } } } -Expr reinterpret(const DataType& t, Expr value) { - if (value.dtype() == t) return value; +Expr reinterpret(const Type& t, Expr value) { + if (value.type() == t) return value; return ir::Call::make(t, ir::Call::reinterpret, { value }, ir::Call::PureIntrinsic); } @@ -218,9 +159,9 @@ Expr operator-(Expr a) { using ir::FloatImm; const IntImm* pa = a.as(); const FloatImm* fa = a.as(); - if (pa) return ir::IntImm::make(a.dtype(), -pa->value); - if (fa) return ir::FloatImm::make(a.dtype(), -fa->value); - return make_zero(a.dtype()) - a; + if (pa) return ir::IntImm::make(a.type(), -pa->value); + if (fa) return ir::FloatImm::make(a.type(), -fa->value); + return make_zero(a.type()) - a; } Expr operator-(Expr a, Expr b) { @@ -245,8 +186,8 @@ Expr div(Expr a, Expr b) { } Expr truncdiv(Expr a, Expr b) { - CHECK(a.dtype().is_int() || a.dtype().is_uint()); - CHECK(b.dtype().is_int() || b.dtype().is_uint()); + CHECK(a.type().is_int() || a.type().is_uint()); + CHECK(b.type().is_int() || b.type().is_uint()); return div(a, b); } @@ -275,8 +216,8 @@ Expr indexmod(Expr a, Expr b) { } Expr floordiv(Expr a, Expr b) { - CHECK(a.dtype().is_int() || a.dtype().is_uint()); - CHECK(b.dtype().is_int() || b.dtype().is_uint()); + CHECK(a.type().is_int() || a.type().is_uint()); + CHECK(b.type().is_int() || b.type().is_uint()); BinaryOpMatchTypes(a, b); Expr ret = arith::TryConstFold(a, b); if (ret.defined()) return ret; @@ -284,8 +225,8 @@ Expr floordiv(Expr a, Expr b) { } Expr floormod(Expr a, Expr b) { - CHECK(a.dtype().is_int() || a.dtype().is_uint()); - CHECK(b.dtype().is_int() || b.dtype().is_uint()); + CHECK(a.type().is_int() || a.type().is_uint()); + CHECK(b.type().is_int() || b.type().is_uint()); BinaryOpMatchTypes(a, b); Expr ret = arith::TryConstFold(a, b); if (ret.defined()) return ret; @@ -323,7 +264,7 @@ Expr max(Expr a, Expr b) { Expr if_then_else(Expr cond, Expr true_value, Expr false_value) { using ir::IntImm; using ir::UIntImm; - CHECK(cond.dtype() == DataType::Bool(1)) + CHECK(cond.type() == Bool(1)) << "if_then_else only accept the condition to be boolean type."; BinaryOpMatchTypes(true_value, false_value); if (const UIntImm* op = cond.as()) { @@ -340,7 +281,7 @@ Expr if_then_else(Expr cond, Expr true_value, Expr false_value) { } } return ir::Call::make( - true_value.dtype(), + true_value.type(), ir::intrinsic::tvm_if_then_else, {cond, true_value, false_value}, ir::Call::PureIntrinsic); @@ -348,7 +289,7 @@ Expr if_then_else(Expr cond, Expr true_value, Expr false_value) { Expr likely(Expr cond) { if (is_const(cond)) return cond; - return ir::Call::make(cond.dtype(), ir::Call::likely, { cond }, ir::Call::PureIntrinsic); + return ir::Call::make(cond.type(), ir::Call::likely, { cond }, ir::Call::PureIntrinsic); } Expr operator>(Expr a, Expr b) { @@ -394,23 +335,23 @@ Expr operator!=(Expr a, Expr b) { } Expr operator&&(Expr a, Expr b) { - CHECK(a.dtype().is_bool()); - CHECK(b.dtype().is_bool()); + CHECK(a.type().is_bool()); + CHECK(b.type().is_bool()); Expr ret = arith::TryConstFold(a, b); if (ret.defined()) return ret; return ir::And::make(a, b); } Expr operator||(Expr a, Expr b) { - CHECK(a.dtype().is_bool()); - CHECK(b.dtype().is_bool()); + CHECK(a.type().is_bool()); + CHECK(b.type().is_bool()); Expr ret = arith::TryConstFold(a, b); if (ret.defined()) return ret; return ir::Or::make(a, b); } Expr operator!(Expr a) { - CHECK(a.dtype().is_bool()); + CHECK(a.type().is_bool()); Expr ret = arith::TryConstFold(a); if (ret.defined()) return ret; return ir::Not::make(a); @@ -419,211 +360,211 @@ Expr operator!(Expr a) { Expr operator>>(Expr a, Expr b) { BinaryOpMatchTypes(a, b); TVM_INDEX_CONST_PROPAGATION({ - const DataType& rtype = a.dtype(); + const Type& rtype = a.type(); if (pa && pb) return IntImm::make(rtype, (pa->value >> pb->value)); if (pb) { if (pb->value == 0) return a; } }); - return ir::Call::make(a.dtype(), ir::Call::shift_right, { a, b }, ir::Call::PureIntrinsic); + return ir::Call::make(a.type(), ir::Call::shift_right, { a, b }, ir::Call::PureIntrinsic); } Expr operator<<(Expr a, Expr b) { BinaryOpMatchTypes(a, b); TVM_INDEX_CONST_PROPAGATION({ - const DataType& rtype = a.dtype(); + const Type& rtype = a.type(); if (pa && pb) return IntImm::make(rtype, (pa->value << pb->value)); if (pb) { if (pb->value == 0) return a; } }); - return ir::Call::make(a.dtype(), ir::Call::shift_left, { a, b }, ir::Call::PureIntrinsic); + return ir::Call::make(a.type(), ir::Call::shift_left, { a, b }, ir::Call::PureIntrinsic); } Expr operator&(Expr a, Expr b) { BinaryOpMatchTypes(a, b); TVM_INDEX_CONST_PROPAGATION({ - const DataType& rtype = a.dtype(); + const Type& rtype = a.type(); if (pa && pb) return IntImm::make(rtype, (pa->value & pb->value)); }); - return ir::Call::make(a.dtype(), ir::Call::bitwise_and, { a, b }, ir::Call::PureIntrinsic); + return ir::Call::make(a.type(), ir::Call::bitwise_and, { a, b }, ir::Call::PureIntrinsic); } Expr operator|(Expr a, Expr b) { BinaryOpMatchTypes(a, b); TVM_INDEX_CONST_PROPAGATION({ - const DataType& rtype = a.dtype(); + const Type& rtype = a.type(); if (pa && pb) return IntImm::make(rtype, (pa->value | pb->value)); }); - return ir::Call::make(a.dtype(), ir::Call::bitwise_or, { a, b }, ir::Call::PureIntrinsic); + return ir::Call::make(a.type(), ir::Call::bitwise_or, { a, b }, ir::Call::PureIntrinsic); } Expr operator^(Expr a, Expr b) { BinaryOpMatchTypes(a, b); TVM_INDEX_CONST_PROPAGATION({ - const DataType& rtype = a.dtype(); + const Type& rtype = a.type(); if (pa && pb) return IntImm::make(rtype, (pa->value ^ pb->value)); }); - return ir::Call::make(a.dtype(), ir::Call::bitwise_xor, { a, b }, ir::Call::PureIntrinsic); + return ir::Call::make(a.type(), ir::Call::bitwise_xor, { a, b }, ir::Call::PureIntrinsic); } Expr operator~(Expr a) { - CHECK(a.dtype().is_int() || a.dtype().is_uint()); - return ir::Call::make(a.dtype(), ir::Call::bitwise_not, { a }, ir::Call::PureIntrinsic); + CHECK(a.type().is_int() || a.type().is_uint()); + return ir::Call::make(a.type(), ir::Call::bitwise_not, { a }, ir::Call::PureIntrinsic); } Expr pow(Expr x, Expr y) { BinaryOpMatchTypes(x, y); - CHECK(x.dtype().is_float()) << "power only applies to float"; - return ir::Call::make(x.dtype(), "pow", { x, y }, ir::Call::PureIntrinsic); + CHECK(x.type().is_float()) << "power only applies to float"; + return ir::Call::make(x.type(), "pow", { x, y }, ir::Call::PureIntrinsic); } Expr abs(Expr x) { - if (x.dtype().is_int()) { + if (x.type().is_int()) { using ir::IntImm; const IntImm* px = x.as(); if (px) { - return ir::IntImm::make(x.dtype(), std::abs(px->value)); + return ir::IntImm::make(x.type(), std::abs(px->value)); } - return ir::Select::make(x >= make_zero(x.dtype()), x, -x); - } else if (x.dtype().is_float()) { + return ir::Select::make(x >= make_zero(x.type()), x, -x); + } else if (x.type().is_float()) { using ir::FloatImm; const FloatImm* fx = x.as(); if (fx) { - return ir::FloatImm::make(x.dtype(), std::fabs(fx->value)); + return ir::FloatImm::make(x.type(), std::fabs(fx->value)); } - return ir::Call::make(x.dtype(), "fabs", {x}, ir::Call::PureIntrinsic); - } else if (x.dtype().is_uint()) { + return ir::Call::make(x.type(), "fabs", {x}, ir::Call::PureIntrinsic); + } else if (x.type().is_uint()) { return x; } else { - LOG(FATAL) << "Data type " << x.dtype() + LOG(FATAL) << "Data type " << x.type() <<" not supported for absolute op. Skipping absolute op..."; return x; } } Expr isnan(Expr x) { - DataType t = DataType::Bool(x.dtype().lanes()); - if (x.dtype().is_int() || x.dtype().is_uint()) { + Type t = Bool(x.type().lanes()); + if (x.type().is_int() || x.type().is_uint()) { return make_const(t, false); - } else if (x.dtype().is_float()) { + } else if (x.type().is_float()) { using ir::FloatImm; const FloatImm* fx = x.as(); if (fx) { return make_const(t, std::isnan(fx->value)); } - if (x.dtype().bits() == 16) { + if (x.type().bits() == 16) { return ir::Call::make(t, ir::Call::isnan, - {cast(DataType::Float(32, t.lanes()), std::move(x))}, + {cast(Float(32, t.lanes()), std::move(x))}, ir::Call::PureIntrinsic); } else { return ir::Call::make(t, ir::Call::isnan, {x}, ir::Call::PureIntrinsic); } } else { - LOG(FATAL) << "Data type " << x.dtype() + LOG(FATAL) << "Data type " << x.type() <<" not supported for isnan op. Skipping isnan op..."; return x; } } Expr sum(Expr source, Array rdom) { - Var x("x", source.dtype()), y("y", source.dtype()); + Var x("x", source.type()), y("y", source.type()); Expr result = ir::Add::make(x, y); - Expr identity_element = make_zero(source.dtype()); + Expr identity_element = make_zero(source.type()); ir::CommReducer combiner = ir::CommReducerNode::make({x}, {y}, {result}, {identity_element}); - return ir::Reduce::make(combiner, {source}, rdom, make_const(DataType::Bool(1), true), 0); + return ir::Reduce::make(combiner, {source}, rdom, make_const(Bool(1), true), 0); } Expr all(Expr source, Array rdom) { - CHECK(source.dtype().is_bool()); - Var x("x", source.dtype()), y("y", source.dtype()); + CHECK(source.type().is_bool()); + Var x("x", source.type()), y("y", source.type()); Expr result = ir::And::make(x, y); - Expr identity_element = make_const(source.dtype(), true); + Expr identity_element = make_const(source.type(), true); ir::CommReducer combiner = ir::CommReducerNode::make({x}, {y}, {result}, {identity_element}); - return ir::Reduce::make(combiner, {source}, rdom, make_const(DataType::Bool(1), true), 0); + return ir::Reduce::make(combiner, {source}, rdom, make_const(Bool(1), true), 0); } Expr any(Expr source, Array rdom) { - CHECK(source.dtype().is_bool()); - Var x("x", source.dtype()), y("y", source.dtype()); + CHECK(source.type().is_bool()); + Var x("x", source.type()), y("y", source.type()); Expr result = ir::Or::make(x, y); - Expr identity_element = make_const(source.dtype(), false); + Expr identity_element = make_const(source.type(), false); ir::CommReducer combiner = ir::CommReducerNode::make({x}, {y}, {result}, {identity_element}); - return ir::Reduce::make(combiner, {source}, rdom, make_const(DataType::Bool(1), true), 0); + return ir::Reduce::make(combiner, {source}, rdom, make_const(Bool(1), true), 0); } Expr max(Expr source, Array rdom) { - Var x("x", source.dtype()), y("y", source.dtype()); + Var x("x", source.type()), y("y", source.type()); Expr result = ir::Max::make(x, y); - Expr identity_element = min_value(source.dtype()); + Expr identity_element = source.type().min(); ir::CommReducer combiner = ir::CommReducerNode::make({x}, {y}, {result}, {identity_element}); - return ir::Reduce::make(combiner, {source}, rdom, make_const(DataType::Bool(1), true), 0); + return ir::Reduce::make(combiner, {source}, rdom, make_const(Bool(1), true), 0); } Expr min(Expr source, Array rdom) { - Var x("x", source.dtype()), y("y", source.dtype()); + Var x("x", source.type()), y("y", source.type()); Expr result = ir::Min::make(x, y); - Expr identity_element = max_value(source.dtype()); + Expr identity_element = source.type().max(); ir::CommReducer combiner = ir::CommReducerNode::make({x}, {y}, {result}, {identity_element}); - return ir::Reduce::make(combiner, {source}, rdom, make_const(DataType::Bool(1), true), 0); + return ir::Reduce::make(combiner, {source}, rdom, make_const(Bool(1), true), 0); } Expr prod(Expr source, Array rdom) { - Var x("x", source.dtype()), y("y", source.dtype()); + Var x("x", source.type()), y("y", source.type()); Expr result = ir::Mul::make(x, y); - Expr identity_element = make_const(source.dtype(), 1); + Expr identity_element = make_const(source.type(), 1); ir::CommReducer combiner = ir::CommReducerNode::make({x}, {y}, {result}, {identity_element}); - return ir::Reduce::make(combiner, {source}, rdom, make_const(DataType::Bool(1), true), 0); + return ir::Reduce::make(combiner, {source}, rdom, make_const(Bool(1), true), 0); } Expr fmod(Expr x, Expr y) { BinaryOpMatchTypes(x, y); - CHECK(x.dtype().is_float()) << "fmod only applies to float"; - return ir::Call::make(x.dtype(), "fmod", { x, y }, ir::Call::PureIntrinsic); + CHECK(x.type().is_float()) << "fmod only applies to float"; + return ir::Call::make(x.type(), "fmod", { x, y }, ir::Call::PureIntrinsic); } Expr floor(Expr x) { using ir::FloatImm; const FloatImm* fx = x.as(); - if (fx) return FloatImm::make(x.dtype(), std::floor(fx->value)); - return ir::Call::make(x.dtype(), "floor", {x}, ir::Call::PureIntrinsic); + if (fx) return FloatImm::make(x.type(), std::floor(fx->value)); + return ir::Call::make(x.type(), "floor", {x}, ir::Call::PureIntrinsic); } Expr ceil(Expr x) { using ir::FloatImm; const FloatImm* fx = x.as(); - if (fx) return FloatImm::make(x.dtype(), std::ceil(fx->value)); - return ir::Call::make(x.dtype(), "ceil", {x}, ir::Call::PureIntrinsic); + if (fx) return FloatImm::make(x.type(), std::ceil(fx->value)); + return ir::Call::make(x.type(), "ceil", {x}, ir::Call::PureIntrinsic); } Expr round(Expr x) { using ir::FloatImm; const FloatImm* fx = x.as(); - if (fx) return FloatImm::make(x.dtype(), std::nearbyint(fx->value)); - return ir::Call::make(x.dtype(), "round", {x}, ir::Call::PureIntrinsic); + if (fx) return FloatImm::make(x.type(), std::nearbyint(fx->value)); + return ir::Call::make(x.type(), "round", {x}, ir::Call::PureIntrinsic); } Expr nearbyint(Expr x) { using ir::FloatImm; const FloatImm* fx = x.as(); - if (fx) return FloatImm::make(x.dtype(), std::nearbyint(fx->value)); - return ir::Call::make(x.dtype(), "nearbyint", {x}, ir::Call::PureIntrinsic); + if (fx) return FloatImm::make(x.type(), std::nearbyint(fx->value)); + return ir::Call::make(x.type(), "nearbyint", {x}, ir::Call::PureIntrinsic); } Expr trunc(Expr x) { using ir::FloatImm; const FloatImm* fx = x.as(); if (fx) { - return FloatImm::make(x.dtype(), (fx->value < 0 ? std::ceil(fx->value) : + return FloatImm::make(x.type(), (fx->value < 0 ? std::ceil(fx->value) : std::floor(fx->value))); } - return ir::Call::make(x.dtype(), "trunc", {x}, ir::Call::PureIntrinsic); + return ir::Call::make(x.type(), "trunc", {x}, ir::Call::PureIntrinsic); } } // namespace tvm diff --git a/src/lang/ir.cc b/src/lang/ir.cc index 427e026bc728..bb8401dae843 100644 --- a/src/lang/ir.cc +++ b/src/lang/ir.cc @@ -35,7 +35,7 @@ Expr UIntImm::make(DataType t, uint64_t value) { CHECK(t.is_uint() && t.lanes() == 1) << "ValueError: UIntImm can only take scalar"; NodePtr node = make_node(); - node->dtype = t; + node->type = t; node->value = value; return Expr(node); } @@ -44,23 +44,23 @@ Expr FloatImm::make(DataType t, double value) { CHECK_EQ(t.lanes(), 1) << "ValueError: FloatImm can only take scalar"; NodePtr node = make_node(); - node->dtype = t; + node->type = t; node->value = value; return Expr(node); } Expr StringImm::make(std::string value) { NodePtr node = make_node(); - node->dtype = DataType::Handle(); + node->type = Handle(); node->value = std::move(value); return Expr(node); } Expr Cast::make(DataType t, Expr value) { CHECK(value.defined()); - CHECK_EQ(t.lanes(), value.dtype().lanes()); + CHECK_EQ(t.lanes(), value.type().lanes()); NodePtr node = make_node(); - node->dtype = t; + node->type = t; node->value = std::move(value); return Expr(node); } @@ -68,12 +68,12 @@ Expr Cast::make(DataType t, Expr value) { Expr And::make(Expr a, Expr b) { CHECK(a.defined()) << "ValueError: a is undefined"; CHECK(b.defined()) << "ValueError: b is undefined"; - CHECK(a.dtype().is_bool()); - CHECK(b.dtype().is_bool()); - CHECK(a.dtype() == b.dtype()) << "TypeError: mismatched types"; + CHECK(a.type().is_bool()); + CHECK(b.type().is_bool()); + CHECK(a.type() == b.type()) << "TypeError: mismatched types"; NodePtr node = make_node(); - node->dtype = DataType::Bool(a.dtype().lanes()); + node->type = Bool(a.type().lanes()); node->a = std::move(a); node->b = std::move(b); return Expr(node); @@ -82,12 +82,12 @@ Expr And::make(Expr a, Expr b) { Expr Or::make(Expr a, Expr b) { CHECK(a.defined()) << "ValueError: a is undefined"; CHECK(b.defined()) << "ValueError: b is undefined"; - CHECK(a.dtype().is_bool()); - CHECK(b.dtype().is_bool()); - CHECK(a.dtype() == b.dtype()) << "TypeError: mismatched types"; + CHECK(a.type().is_bool()); + CHECK(b.type().is_bool()); + CHECK(a.type() == b.type()) << "TypeError: mismatched types"; NodePtr node = make_node(); - node->dtype = DataType::Bool(a.dtype().lanes()); + node->type = Bool(a.type().lanes()); node->a = std::move(a); node->b = std::move(b); return Expr(node); @@ -95,10 +95,10 @@ Expr Or::make(Expr a, Expr b) { Expr Not::make(Expr a) { CHECK(a.defined()) << "ValueError: a is undefined"; - CHECK(a.dtype().is_bool()); + CHECK(a.type().is_bool()); NodePtr node = make_node(); - node->dtype = DataType::Bool(a.dtype().lanes()); + node->type = Bool(a.type().lanes()); node->a = std::move(a); return Expr(node); } @@ -107,27 +107,27 @@ Expr Select::make(Expr condition, Expr true_value, Expr false_value) { CHECK(condition.defined()) << "ValueError: condition is undefined"; CHECK(true_value.defined()) << "ValueError: true_value is undefined"; CHECK(false_value.defined()) << "ValueError: true_value is undefined"; - CHECK(condition.dtype().is_bool()); - CHECK_EQ(condition.dtype().lanes(), true_value.dtype().lanes()); - CHECK(false_value.dtype() == true_value.dtype()) << "TypeError: mismatched types"; + CHECK(condition.type().is_bool()); + CHECK_EQ(condition.type().lanes(), true_value.type().lanes()); + CHECK(false_value.type() == true_value.type()) << "TypeError: mismatched types"; NodePtr(); - node->dtype = true_value.dtype(); + node->type = true_value.type(); node->condition = std::move(condition); node->true_value = std::move(true_value); node->false_value = std::move(false_value); return Expr(node); } -Expr Load::make(DataType dtype, Var buffer_var, Expr index, Expr predicate) { +Expr Load::make(DataType type, Var buffer_var, Expr index, Expr predicate) { CHECK(buffer_var.defined()); CHECK(predicate.defined()); CHECK(index.defined()); - CHECK_EQ(dtype.lanes(), index.dtype().lanes()); - CHECK_EQ(dtype.lanes(), predicate.dtype().lanes()); + CHECK_EQ(type.lanes(), index.type().lanes()); + CHECK_EQ(type.lanes(), predicate.type().lanes()); NodePtr node = make_node(); - node->dtype = dtype; + node->type = type; node->buffer_var = std::move(buffer_var); node->index = std::move(index); node->predicate = std::move(predicate); @@ -138,13 +138,13 @@ Expr Load::make(DataType dtype, Var buffer_var, Expr index, Expr predicate) { Expr Ramp::make(Expr base, Expr stride, int lanes) { CHECK(base.defined()); CHECK(stride.defined()); - CHECK(base.dtype().is_scalar()); - CHECK(stride.dtype().is_scalar()); + CHECK(base.type().is_scalar()); + CHECK(stride.type().is_scalar()); CHECK_GT(lanes, 1); - CHECK_EQ(stride.dtype(), base.dtype()); + CHECK_EQ(stride.type(), base.type()); NodePtr node = make_node(); - node->dtype = base.dtype().with_lanes(lanes); + node->type = base.type().with_lanes(lanes); node->base = base; node->stride = stride; node->lanes = lanes; @@ -153,11 +153,11 @@ Expr Ramp::make(Expr base, Expr stride, int lanes) { Expr Broadcast::make(Expr value, int lanes) { CHECK(value.defined()); - CHECK(value.dtype().is_scalar()); + CHECK(value.type().is_scalar()); CHECK_GT(lanes, 1); NodePtr node = make_node(); - node->dtype = value.dtype().with_lanes(lanes); + node->type = value.type().with_lanes(lanes); node->value = std::move(value); node->lanes = lanes; return Expr(node); @@ -166,10 +166,10 @@ Expr Broadcast::make(Expr value, int lanes) { Expr Let::make(Var var, Expr value, Expr body) { CHECK(value.defined()); CHECK(body.defined()); - CHECK_EQ(value.dtype(), var.dtype()); + CHECK_EQ(value.type(), var.type()); NodePtr node = make_node(); - node->dtype = body.dtype(); + node->type = body.type(); node->var = std::move(var); node->value = std::move(value); node->body = std::move(body); @@ -192,7 +192,7 @@ bool Call::is_vectorizable() const { return false; } -Expr Call::make(DataType dtype, +Expr Call::make(DataType type, std::string name, Array args, CallType call_type, @@ -204,12 +204,12 @@ Expr Call::make(DataType dtype, if (call_type == Halide) { for (size_t i = 0; i < args.size(); ++i) { - CHECK(args[i].dtype().is_int()); + CHECK(args[i].type().is_int()); } } NodePtr node = make_node(); - node->dtype = dtype; + node->type = type; node->name = std::move(name); node->args = std::move(args); node->call_type = call_type; @@ -223,17 +223,17 @@ Expr Shuffle::make(Array vectors, CHECK_NE(vectors.size(), 0U); CHECK_NE(indices.size(), 0U); - DataType base_type = vectors[0].dtype().element_of(); + Type base_type = vectors[0].type().element_of(); int total_lanes = 0; for (Expr val : vectors) { - CHECK(val.dtype().element_of() == base_type); - total_lanes += val.dtype().lanes(); + CHECK(val.type().element_of() == base_type); + total_lanes += val.type().lanes(); } CHECK_LE(indices.size(), static_cast(total_lanes)); NodePtr node = make_node(); - node->dtype = base_type.with_lanes(static_cast(indices.size())); + node->type = base_type.with_lanes(static_cast(indices.size())); node->vectors = std::move(vectors); node->indices = std::move(indices); return Expr(node); @@ -247,8 +247,8 @@ Expr Shuffle::make_concat(Array vectors) { Array indices; int index = 0; for (const Expr& e : vectors) { - for (int i = 0; i < e.dtype().lanes(); ++i) { - indices.push_back(IntImm::make(DataType::Int(32), index++)); + for (int i = 0; i < e.type().lanes(); ++i) { + indices.push_back(IntImm::make(Int(32), index++)); } } return make(vectors, indices); @@ -298,7 +298,7 @@ Expr Reduce::make(CommReducer combiner, Array source, for (size_t i = 0; i < axis.size(); ++i) { CHECK(axis[i].defined()); } - n->dtype = source[value_index].dtype(); + n->type = source[value_index].type(); n->combiner = std::move(combiner); n->source = std::move(source); n->axis = std::move(axis); @@ -315,7 +315,7 @@ Expr Any::make() { Stmt LetStmt::make(Var var, Expr value, Stmt body) { CHECK(value.defined()); CHECK(body.defined()); - CHECK_EQ(value.dtype(), var.dtype()); + CHECK_EQ(value.type(), var.type()); NodePtr node = make_node(); node->var = std::move(var); @@ -338,7 +338,7 @@ Stmt AttrStmt::make(NodeRef node, Stmt AssertStmt::make(Expr condition, Expr message, Stmt body) { CHECK(condition.defined()); - CHECK(message.dtype() == DataType::Int(32) || + CHECK(message.type() == Int(32) || message.as()) << "TypeError: AssertStmt message must be an int or string:" << message << "\n"; @@ -368,9 +368,9 @@ Stmt For::make(Var loop_var, Stmt body) { CHECK(min.defined()); CHECK(extent.defined()); - CHECK(min.dtype().is_scalar()); - CHECK(extent.dtype().is_scalar()); - CHECK(loop_var.dtype().is_scalar()); + CHECK(min.type().is_scalar()); + CHECK(extent.type().is_scalar()); + CHECK(loop_var.type().is_scalar()); CHECK(body.defined()); NodePtr node = make_node(); @@ -387,8 +387,8 @@ Stmt Store::make(Var buffer_var, Expr value, Expr index, Expr predicate) { CHECK(value.defined()); CHECK(index.defined()); CHECK(predicate.defined()); - CHECK_EQ(value.dtype().lanes(), index.dtype().lanes()); - CHECK_EQ(value.dtype().lanes(), predicate.dtype().lanes()); + CHECK_EQ(value.type().lanes(), index.type().lanes()); + CHECK_EQ(value.type().lanes(), predicate.type().lanes()); NodePtr node = make_node(); node->buffer_var = std::move(buffer_var); @@ -416,7 +416,7 @@ Stmt Provide::make(FunctionRef func, int value_index, Expr value, Array ar } Stmt Allocate::make(Var buffer_var, - DataType dtype, + DataType type, Array extents, Expr condition, Stmt body, @@ -424,15 +424,15 @@ Stmt Allocate::make(Var buffer_var, std::string free_function) { for (size_t i = 0; i < extents.size(); ++i) { CHECK(extents[i].defined()); - CHECK(extents[i].dtype().is_scalar()); + CHECK(extents[i].type().is_scalar()); } CHECK(body.defined()); CHECK(condition.defined()); - CHECK(condition.dtype().is_bool()); + CHECK(condition.type().is_bool()); NodePtr node = make_node(); node->buffer_var = std::move(buffer_var); - node->dtype = dtype; + node->type = type; node->extents = std::move(extents); node->condition = std::move(condition); node->body = std::move(body); @@ -464,42 +464,42 @@ Stmt Free::make(Var buffer_var) { Stmt Realize::make(FunctionRef func, int value_index, - DataType dtype, + DataType type, Region bounds, Expr condition, Stmt body) { for (size_t i = 0; i < bounds.size(); ++i) { CHECK(bounds[i]->min.defined()); CHECK(bounds[i]->extent.defined()); - CHECK(bounds[i]->min.dtype().is_scalar()); - CHECK(bounds[i]->extent.dtype().is_scalar()); + CHECK(bounds[i]->min.type().is_scalar()); + CHECK(bounds[i]->extent.type().is_scalar()); } CHECK(body.defined()); CHECK(condition.defined()); - CHECK(condition.dtype().is_bool()); + CHECK(condition.type().is_bool()); NodePtr node = make_node(); node->func = std::move(func); node->value_index = value_index; - node->dtype = dtype; + node->type = type; node->bounds = std::move(bounds); node->condition = std::move(condition); node->body = std::move(body); return Stmt(node); } -Stmt Prefetch::make(FunctionRef func, int value_index, DataType dtype, Region bounds) { +Stmt Prefetch::make(FunctionRef func, int value_index, DataType type, Region bounds) { for (size_t i = 0; i < bounds.size(); ++i) { CHECK(bounds[i]->min.defined()); CHECK(bounds[i]->extent.defined()); - CHECK(bounds[i]->min.dtype().is_scalar()); - CHECK(bounds[i]->extent.dtype().is_scalar()); + CHECK(bounds[i]->min.type().is_scalar()); + CHECK(bounds[i]->extent.type().is_scalar()); } NodePtr node = make_node(); node->func = std::move(func); node->value_index = value_index; - node->dtype = dtype; + node->type = type; node->bounds = std::move(bounds); return Stmt(node); } @@ -555,14 +555,14 @@ Stmt Evaluate::make(Expr value) { TVM_STATIC_IR_FUNCTOR(IRPrinter, vtable) .set_dispatch([](const ObjectRef& node, IRPrinter* p) { auto* op = static_cast(node.get()); - p->stream << "(" << op->dtype << ")" << op->value; + p->stream << "(" << op->type << ")" << op->value; }); TVM_STATIC_IR_FUNCTOR(IRPrinter, vtable) .set_dispatch([](const ObjectRef& node, IRPrinter* p) { auto* op = static_cast(node.get()); auto& stream = p->stream; - switch (op->dtype.bits()) { + switch (op->type.bits()) { case 64: stream << op->value; break; @@ -573,7 +573,7 @@ TVM_STATIC_IR_FUNCTOR(IRPrinter, vtable) stream << op->value << 'h'; break; default: - LOG(FATAL) << "Unknown float type bits=" << op->dtype.bits(); + LOG(FATAL) << "Unknown float type bits=" << op->type.bits(); } }); @@ -616,7 +616,7 @@ TVM_STATIC_IR_FUNCTOR(IRPrinter, vtable) TVM_STATIC_IR_FUNCTOR(IRPrinter, vtable) .set_dispatch([](const ObjectRef& node, IRPrinter* p) { auto* op = static_cast(node.get()); - p->stream << op->dtype << '('; + p->stream << op->type << '('; p->Print(op->value); p->stream << ')'; }) @@ -959,7 +959,7 @@ TVM_STATIC_IR_FUNCTOR(IRPrinter, vtable) .set_dispatch([](const ObjectRef& node, IRPrinter* p) { auto* op = static_cast(node.get()); p->PrintIndent(); - p->stream << "allocate " << op->buffer_var << "[" << op->dtype; + p->stream << "allocate " << op->buffer_var << "[" << op->type; for (size_t i = 0; i < op->extents.size(); ++i) { p->stream << " * "; p->Print(op->extents[i]); diff --git a/src/lang/tensor.cc b/src/lang/tensor.cc index 1c110936b3ef..05ba6f7a08bd 100644 --- a/src/lang/tensor.cc +++ b/src/lang/tensor.cc @@ -56,7 +56,7 @@ Tensor Operation::output(size_t i) const { } Tensor TensorNode::make(Array shape, - DataType dtype, + Type dtype, Operation op, int value_index) { auto n = make_node(); diff --git a/src/node/reflection.cc b/src/node/reflection.cc index f53583723f24..e92ca92834a2 100644 --- a/src/node/reflection.cc +++ b/src/node/reflection.cc @@ -61,7 +61,7 @@ class AttrGetter : public AttrVisitor { void Visit(const char* key, void** value) final { if (skey == key) *ret = static_cast(value[0]); } - void Visit(const char* key, DataType* value) final { + void Visit(const char* key, Type* value) final { if (skey == key) *ret = value[0]; } void Visit(const char* key, std::string* value) final { @@ -135,7 +135,7 @@ class AttrDir : public AttrVisitor { void Visit(const char* key, void** value) final { names->push_back(key); } - void Visit(const char* key, DataType* value) final { + void Visit(const char* key, Type* value) final { names->push_back(key); } void Visit(const char* key, std::string* value) final { diff --git a/src/node/serialization.cc b/src/node/serialization.cc index 5a991aa3ad1b..cb310eb2cda9 100644 --- a/src/node/serialization.cc +++ b/src/node/serialization.cc @@ -39,11 +39,11 @@ namespace tvm { inline std::string Type2String(const DataType& t) { - return runtime::TVMType2String(t); + return runtime::TVMType2String(Type2TVMType(t)); } -inline DataType String2Type(std::string s) { - return DataType(runtime::String2TVMType(s)); +inline Type String2Type(std::string s) { + return TVMType2Type(runtime::String2TVMType(s)); } // indexer to index all the nodes diff --git a/src/op/compute_op.cc b/src/op/compute_op.cc index bd129ac33058..5f5d2d4f475b 100644 --- a/src/op/compute_op.cc +++ b/src/op/compute_op.cc @@ -70,9 +70,9 @@ Array BaseComputeOpNode::root_iter_vars() const { return ret; } -DataType ComputeOpNode::output_dtype(size_t idx) const { +Type ComputeOpNode::output_dtype(size_t idx) const { CHECK_LT(idx, num_outputs()); - return body[idx].dtype(); + return body[idx].type(); } Array BaseComputeOpNode::output_shape(size_t idx) const { @@ -100,7 +100,7 @@ Tensor compute(Array shape, std::ostringstream os; os << "ax" << i; axis.emplace_back(IterVarNode::make( - Range(0, shape[i]), Var(os.str(), shape[i].dtype()), kDataPar)); + Range(0, shape[i]), Var(os.str(), shape[i].type()), kDataPar)); args.push_back(axis.back()->var); } @@ -122,7 +122,7 @@ Array compute(Array shape, std::ostringstream os; os << "ax" << i; axis.emplace_back(IterVarNode::make( - Range(0, shape[i]), Var(os.str(), shape[i].dtype()), kDataPar)); + Range(0, shape[i]), Var(os.str(), shape[i].type()), kDataPar)); args.push_back(axis.back()->var); } @@ -190,7 +190,7 @@ Operation ComputeOpNode::ReplaceInputs( for (size_t k = 0; k < this->body.size(); ++k) { auto n = make_node(*r); n->value_index = static_cast(k); - n->dtype = r->source[k].dtype(); + n->type = r->source[k].type(); arr.push_back(Expr(n)); } } else { @@ -229,7 +229,7 @@ void ComputeOpNode::PropBoundToInputs( IntSet arg_intset = EvalSet(call->args[i], dom_map); const arith::IntervalSetNode* arg_interval = arg_intset.as(); if (arg_interval) { - Expr shape_i_min_value = make_zero(t->shape[i].dtype()); + Expr shape_i_min_value = make_zero(t->shape[i].type()); Expr shape_i_max_value = t->shape[i] - 1; Expr min_value = arg_interval->min_value; Expr max_value = arg_interval->max_value; @@ -295,7 +295,7 @@ Stmt BaseComputeOpNode::BuildRealize( attr->dim_align_offset}; realize = ir::AttrStmt::make( t, ir::attr::buffer_dim_align, - Call::make(DataType::Handle(), ir::intrinsic::tvm_tuple, tuple, Call::Intrinsic), + Call::make(Handle(), ir::intrinsic::tvm_tuple, tuple, Call::Intrinsic), realize); } } diff --git a/src/op/cross_thread_reduction.cc b/src/op/cross_thread_reduction.cc index 4a3aa54ccc6d..818acb912f9c 100644 --- a/src/op/cross_thread_reduction.cc +++ b/src/op/cross_thread_reduction.cc @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -57,14 +57,14 @@ Stmt MakeCrossThreadReduction( cond = cond && v; } Array freduce_args; - freduce_args.push_back(make_const(DataType::UInt(32), static_cast(size))); + freduce_args.push_back(make_const(UInt(32), static_cast(size))); for (size_t i = 0; i < size; ++i) { freduce_args.push_back(reduces[0]->source[i]); } freduce_args.push_back(cond); std::vector res_handles(size); for (size_t idx = 0; idx < size; ++idx) { - res_handles[idx] = Var("reduce_temp" + std::to_string(idx), DataType::Handle()); + res_handles[idx] = Var("reduce_temp" + std::to_string(idx), Handle()); freduce_args.push_back(res_handles[idx]); } @@ -85,17 +85,17 @@ Stmt MakeCrossThreadReduction( } Stmt reduce_body = Evaluate::make(Call::make( - DataType::Handle(), + Handle(), ir::intrinsic::tvm_thread_allreduce, freduce_args, Call::Intrinsic)); reduce_body = AttrStmt::make( reduces[0]->combiner, attr::reduce_scope, - make_zero(DataType::Handle()), + make_zero(Handle()), reduce_body); std::vector assigns(size); for (size_t idx = 0; idx < size; ++idx) { - DataType t = reduces[idx]->dtype; + Type t = reduces[idx]->type; assigns[idx] = Provide::make( stage->op, idx, Load::make(t, res_handles[idx], 0, const_true(t.lanes())), args); @@ -106,7 +106,7 @@ Stmt MakeCrossThreadReduction( Stmt body = Block::make(reduce_body, assign_body); for (size_t idx = size; idx != 0; --idx) { body = Allocate::make( - res_handles[idx - 1], reduces[idx - 1]->dtype, {1}, const_true(), body); + res_handles[idx - 1], reduces[idx - 1]->type, {1}, const_true(), body); body = AttrStmt::make( res_handles[idx - 1], attr::storage_scope, StringImm::make("local"), body); } diff --git a/src/op/extern_op.cc b/src/op/extern_op.cc index 883ebdc4a0f7..35fe469fbe16 100644 --- a/src/op/extern_op.cc +++ b/src/op/extern_op.cc @@ -46,7 +46,7 @@ Array ExternOpNode::root_iter_vars() const { return {}; } -DataType ExternOpNode::output_dtype(size_t i) const { +Type ExternOpNode::output_dtype(size_t i) const { return output_placeholders[i]->dtype; } @@ -122,7 +122,7 @@ void ExternOpNode::PropBoundToInputs( for (size_t i = 0; i < t->shape.size(); ++i) { dom.data[i].emplace_back(IntSet::range( Range::make_by_min_extent( - make_const(t->shape[i].dtype(), 0), t->shape[i]))); + make_const(t->shape[i].type(), 0), t->shape[i]))); } } } @@ -145,7 +145,7 @@ Stmt ExternOpNode::BuildRealize( for (size_t i = 0; i < t->shape.size(); ++i) { bounds.push_back( Range::make_by_min_extent( - make_const(t->shape[i].dtype(), 0), t->shape[i])); + make_const(t->shape[i].type(), 0), t->shape[i])); } realize_body = ir::Realize::make( t->op, t->value_index, t->dtype, @@ -159,19 +159,19 @@ Stmt ExternOpNode::BuildProvide( const std::unordered_map& dom_map, bool debug_keep_trivial_loop) const { CHECK_EQ(stage->op.operator->(), this); - Stmt ret = AttrStmt::make(make_zero(DataType::Int(32)), attr::extern_scope, 0, this->body); + Stmt ret = AttrStmt::make(make_zero(Int(32)), attr::extern_scope, 0, this->body); auto f_push_bind = [&ret](Buffer buffer, Tensor tensor) { Array bind_spec; Array tuple; bind_spec.push_back(buffer); bind_spec.push_back(tensor); for (size_t k = 0; k < buffer->shape.size(); ++k) { - tuple.push_back(make_const(buffer->shape[k].dtype(), 0)); + tuple.push_back(make_const(buffer->shape[k].type(), 0)); tuple.push_back(buffer->shape[k]); } ret = AttrStmt::make( bind_spec, attr::buffer_bind_scope, - Call::make(DataType::Handle(), intrinsic::tvm_tuple, tuple, Call::Intrinsic), ret); + Call::make(Handle(), intrinsic::tvm_tuple, tuple, Call::Intrinsic), ret); }; for (size_t i = output_placeholders.size(); i != 0; --i) { f_push_bind(output_placeholders[i - 1], stage->op.output(i - 1)); diff --git a/src/op/hybrid_op.cc b/src/op/hybrid_op.cc index 1e1a81423b69..7a99ea10b74d 100644 --- a/src/op/hybrid_op.cc +++ b/src/op/hybrid_op.cc @@ -52,7 +52,7 @@ Array HybridOpNode::root_iter_vars() const { return this->axis; } -DataType HybridOpNode::output_dtype(size_t i) const { +Type HybridOpNode::output_dtype(size_t i) const { return outputs[i]->dtype; } @@ -138,7 +138,7 @@ void HybridOpNode::PropBoundToInputs( for (size_t i = 0; i < t->shape.size(); ++i) { dom.data[i].emplace_back(IntSet::range( Range::make_by_min_extent( - make_const(t->shape[i].dtype(), 0), t->shape[i]))); + make_const(t->shape[i].type(), 0), t->shape[i]))); } } } @@ -166,7 +166,7 @@ Stmt HybridOpNode::BuildRealize( for (size_t i = 0; i < t->shape.size(); ++i) { bounds.push_back( Range::make_by_min_extent( - make_const(t->shape[i].dtype(), 0), t->shape[i])); + make_const(t->shape[i].type(), 0), t->shape[i])); } realize_body = ir::Realize::make( t->op, t->value_index, t->dtype, @@ -180,7 +180,7 @@ Stmt HybridOpNode::BuildProvide( const std::unordered_map &dom_map, bool debug_keep_trivial_loop) const { CHECK_EQ(stage->op.operator->(), this); - Stmt ret = AttrStmt::make(make_zero(DataType::Int(32)), attr::extern_scope, 0, this->body); + Stmt ret = AttrStmt::make(make_zero(Int(32)), attr::extern_scope, 0, this->body); std::unordered_map rmap; for (int i = 0; i < this->num_outputs(); ++i) { rmap[outputs[i]] = stage->op.output(i); diff --git a/src/op/op_util.cc b/src/op/op_util.cc index cd3b168d810b..691603157b1c 100644 --- a/src/op/op_util.cc +++ b/src/op/op_util.cc @@ -74,7 +74,7 @@ MakeLoopNest(const Stage& stage, if (bind_iv->thread_tag.length() == 0) { // Only generate new loop if we're not bound to a thread. if (new_loop_var) { - var = Var(iv->var->name_hint + ".init", bind_iv->var.dtype()); + var = Var(iv->var->name_hint + ".init", bind_iv->var.type()); } ForType for_type = ForType::Serial; @@ -98,7 +98,7 @@ MakeLoopNest(const Stage& stage, const std::string& pkey = it_attr->pragma_keys[k].as()->value; Expr pvalue = it_attr->pragma_values[k]; if (!pvalue.defined()) { - pvalue = make_const(DataType::Int(32), 1); + pvalue = make_const(Int(32), 1); } nest[i + 1].emplace_back( AttrStmt::make(iv, ir::attr::pragma_scope_prefix + pkey, pvalue, no_op)); @@ -114,7 +114,7 @@ MakeLoopNest(const Stage& stage, for_type, DeviceAPI::None, no_op)); value_map[iv] = var; } else { - Var idx(bind_iv->var->name_hint + ".idx", bind_iv->var.dtype()); + Var idx(bind_iv->var->name_hint + ".idx", bind_iv->var.type()); nest[i + 1].emplace_back( For::make(idx, 0, dom->extent, for_type, DeviceAPI::None, no_op)); @@ -197,7 +197,7 @@ class TensorReplacer : public ir::IRMutator { auto it = vmap_.find(t); if (it != vmap_.end()) { Expr ret = ir::Call::make( - op->dtype, it->second->op->name, op->args, + op->type, it->second->op->name, op->args, op->call_type, it->second->op, it->second->value_index); found = true; return IRMutator::Mutate_(ret.as(), ret); diff --git a/src/op/placeholder_op.cc b/src/op/placeholder_op.cc index 6910f63b44d3..91b0589e3dd0 100644 --- a/src/op/placeholder_op.cc +++ b/src/op/placeholder_op.cc @@ -42,7 +42,7 @@ Array PlaceholderOpNode::root_iter_vars() const { return {}; } -DataType PlaceholderOpNode::output_dtype(size_t i) const { +Type PlaceholderOpNode::output_dtype(size_t i) const { CHECK_EQ(i, 0U); return dtype; } @@ -54,7 +54,7 @@ Array PlaceholderOpNode::output_shape(size_t i) const { Operation PlaceholderOpNode::make(std::string name, Array shape, - DataType dtype) { + Type dtype) { auto n = make_node(); n->name = name; n->shape = shape; @@ -62,7 +62,7 @@ Operation PlaceholderOpNode::make(std::string name, return Operation(n); } -Tensor placeholder(Array shape, DataType dtype, std::string name) { +Tensor placeholder(Array shape, Type dtype, std::string name) { return PlaceholderOpNode::make(name, shape, dtype).output(0); } diff --git a/src/op/scan_op.cc b/src/op/scan_op.cc index e83a23194cf8..b02073b5357e 100644 --- a/src/op/scan_op.cc +++ b/src/op/scan_op.cc @@ -53,7 +53,7 @@ Array ScanOpNode::root_iter_vars() const { return ret; } -DataType ScanOpNode::output_dtype(size_t i) const { +Type ScanOpNode::output_dtype(size_t i) const { return update[i]->dtype; } diff --git a/src/op/tensor_compute_op.cc b/src/op/tensor_compute_op.cc index e59f90f4948e..83cdd76c2b2a 100644 --- a/src/op/tensor_compute_op.cc +++ b/src/op/tensor_compute_op.cc @@ -46,7 +46,7 @@ int TensorComputeOpNode::num_outputs() const { return static_cast(this->intrin->buffers.size() - this->inputs.size()); } -DataType TensorComputeOpNode::output_dtype(size_t i) const { +Type TensorComputeOpNode::output_dtype(size_t i) const { return this->intrin->buffers[this->inputs.size() + i]->dtype; } @@ -155,7 +155,7 @@ Stmt TensorComputeOpNode::BuildProvide( } input_bind_nest.emplace_back(AttrStmt::make( bind_spec, ir::attr::buffer_bind_scope, - Call::make(DataType::Handle(), ir::intrinsic::tvm_tuple, tuple, Call::Intrinsic), nop)); + Call::make(Handle(), ir::intrinsic::tvm_tuple, tuple, Call::Intrinsic), nop)); } // output binding @@ -179,7 +179,7 @@ Stmt TensorComputeOpNode::BuildProvide( output_bind_nest.emplace_back(AttrStmt::make( bind_spec, ir::attr::buffer_bind_scope, - Call::make(DataType::Handle(), ir::intrinsic::tvm_tuple, tuple, Call::Intrinsic), nop)); + Call::make(Handle(), ir::intrinsic::tvm_tuple, tuple, Call::Intrinsic), nop)); } // Check variable remap diff --git a/src/op/tensorize.cc b/src/op/tensorize.cc index b7f32de8b5ad..c4abf0b04141 100644 --- a/src/op/tensorize.cc +++ b/src/op/tensorize.cc @@ -173,7 +173,7 @@ class TensorIntrinMatcher final : public IRMutator { args.push_back(op->args[i] - e.region[i]->min); } return Call::make( - op->dtype, e.tensor->op->name, args, + op->type, e.tensor->op->name, args, op->call_type, e.tensor->op, e.tensor->value_index); } } @@ -341,12 +341,12 @@ void VerifyTensorizeBody( lhs = CanonicalSimplify(lhs, compute_intrin_iter_space); Expr rhs = Simplify(intrin_compute->body[i], compute_intrin_iter_space); rhs = CanonicalSimplify(rhs, compute_intrin_iter_space); - if (lhs.dtype() != rhs.dtype()) { + if (lhs.type() != rhs.type()) { LOG(FATAL) << "Failed to match the data type with TensorIntrin " << intrin->name << "'s declaration " - << " provided=" << lhs.dtype() - << ", intrin=" << rhs.dtype(); + << " provided=" << lhs.type() + << ", intrin=" << rhs.type(); } CHECK(Equal(lhs, rhs)) << "Failed to match the compute with TensorIntrin " @@ -390,7 +390,7 @@ Stmt MakeTensorize(const ComputeOpNode* self, } input_bind_nest.emplace_back(AttrStmt::make( bind_spec, ir::attr::buffer_bind_scope, - Call::make(DataType::Handle(), ir::intrinsic::tvm_tuple, tuple, Call::Intrinsic), nop)); + Call::make(Handle(), ir::intrinsic::tvm_tuple, tuple, Call::Intrinsic), nop)); } // output binding const ComputeOpNode* intrin_compute = intrin->op.as(); @@ -410,7 +410,7 @@ Stmt MakeTensorize(const ComputeOpNode* self, Array bind_spec{buffer, tensor}; output_bind_nest.emplace_back(AttrStmt::make( bind_spec, ir::attr::buffer_bind_scope, - Call::make(DataType::Handle(), ir::intrinsic::tvm_tuple, tuple, Call::Intrinsic), nop)); + Call::make(Handle(), ir::intrinsic::tvm_tuple, tuple, Call::Intrinsic), nop)); } // Check variable remap std::unordered_map vmap; @@ -430,7 +430,7 @@ Stmt MakeTensorize(const ComputeOpNode* self, IterVar target = intrin_compute->reduce_axis[i - start]; auto it = out_dom.find(iv); CHECK(it != out_dom.end()); - binder.Bind(target->dom->min, make_const(iv->dom->min.dtype(), 0), + binder.Bind(target->dom->min, make_const(iv->dom->min.type(), 0), "tensir_intrin.reduction.min"); binder.Bind(target->dom->extent, it->second->extent, "tensir_intrin.reduction.extent"); diff --git a/src/pass/arg_binder.cc b/src/pass/arg_binder.cc index e4ff9cb457a5..f892b6b957f8 100644 --- a/src/pass/arg_binder.cc +++ b/src/pass/arg_binder.cc @@ -50,7 +50,7 @@ bool ArgBinder::Bind_(const Expr& arg, const Expr& value, const std::string& arg_name, bool with_lets) { - CHECK_EQ(arg.dtype(), value.dtype()); + CHECK_EQ(arg.type(), value.type()); if (const Variable* v = arg.as()) { auto it = def_map_->find(v); if (it == def_map_->end()) { @@ -118,8 +118,8 @@ void ArgBinder::BindBuffer(const Buffer& arg, if (Bind_(arg->elem_offset, value->elem_offset, arg_name + ".elem_offset", false)) { if (arg->offset_factor > 1) { Expr offset = value->elem_offset; - Expr factor = make_const(offset.dtype(), arg->offset_factor); - Expr zero = make_zero(offset.dtype()); + Expr factor = make_const(offset.type(), arg->offset_factor); + Expr zero = make_zero(offset.type()); BinderAddAssert(truncmod(offset, factor) == zero, arg_name + ".elem_offset", &asserts_); } @@ -153,7 +153,7 @@ void ArgBinder::BindBuffer(const Buffer& arg, } } -inline Expr TVMArrayGet(DataType t, Var arr, intrinsic::TVMStructFieldKind kind) { +inline Expr TVMArrayGet(Type t, Var arr, intrinsic::TVMStructFieldKind kind) { return TVMStructGet(t, arr, 0, kind); } @@ -162,8 +162,8 @@ void ArgBinder::BindDLTensor(const Buffer& buffer, const Expr& device_id, const Var& handle, const std::string& arg_name) { - const DataType tvm_shape_type = DataType::ShapeIndex(); - const DataType tvm_ndim_type = DataType::Int(32); + const Type tvm_shape_type = TVMShapeIndexType(); + const Type tvm_ndim_type = Int(32); const Stmt nop = Evaluate::make(0); // dimension checks Expr v_ndim = TVMArrayGet(tvm_ndim_type, handle, intrinsic::kArrNDim); @@ -175,52 +175,52 @@ void ArgBinder::BindDLTensor(const Buffer& buffer, << buffer->shape.size(); asserts_.emplace_back(AssertStmt::make(a_ndim == v_ndim, ndim_err_msg.str(), nop)); // type checks - DataType dtype = buffer->dtype; + Type dtype = buffer->dtype; std::ostringstream type_err_msg; type_err_msg << arg_name << ".dtype is expected to be " << dtype; - Expr cond = (TVMArrayGet(DataType::UInt(8), handle, intrinsic::kArrTypeCode) == - UIntImm::make(DataType::UInt(8), dtype.code()) && - TVMArrayGet(DataType::UInt(8), handle, intrinsic::kArrTypeBits) == - UIntImm::make(DataType::UInt(8), dtype.bits()) && - TVMArrayGet(DataType::UInt(16), handle, intrinsic::kArrTypeLanes) == - UIntImm::make(DataType::UInt(16), dtype.lanes())); + Expr cond = (TVMArrayGet(UInt(8), handle, intrinsic::kArrTypeCode) == + UIntImm::make(UInt(8), dtype.code()) && + TVMArrayGet(UInt(8), handle, intrinsic::kArrTypeBits) == + UIntImm::make(UInt(8), dtype.bits()) && + TVMArrayGet(UInt(16), handle, intrinsic::kArrTypeLanes) == + UIntImm::make(UInt(16), dtype.lanes())); asserts_.emplace_back(AssertStmt::make(cond, type_err_msg.str(), nop)); // data field - if (Bind_(buffer->data, TVMArrayGet(DataType::Handle(), handle, intrinsic::kArrData), + if (Bind_(buffer->data, TVMArrayGet(Handle(), handle, intrinsic::kArrData), arg_name + ".data", true)) { Var vptr(buffer->data); def_handle_dtype_.Set(vptr, ir::TypeAnnotation(buffer->dtype)); // mark alignment of external bufs init_nest_.emplace_back(AttrStmt::make( vptr, ir::attr::storage_alignment, - IntImm::make(DataType::Int(32), buffer->data_alignment), nop)); + IntImm::make(Int(32), buffer->data_alignment), nop)); } - Var v_shape(arg_name + ".shape", DataType::Handle()); + Var v_shape(arg_name + ".shape", Handle()); def_handle_dtype_.Set(v_shape, make_const(tvm_shape_type, 0)); init_nest_.emplace_back(LetStmt::make( - v_shape, TVMArrayGet(DataType::Handle(), handle, intrinsic::kArrShape), nop)); + v_shape, TVMArrayGet(Handle(), handle, intrinsic::kArrShape), nop)); for (size_t k = 0; k < buffer->shape.size(); ++k) { std::ostringstream field_name; field_name << v_shape->name_hint << '[' << k << ']'; Bind_(buffer->shape[k], - cast(buffer->shape[k].dtype(), + cast(buffer->shape[k].type(), Load::make(tvm_shape_type, v_shape, - IntImm::make(DataType::Int(32), k), const_true(1))), + IntImm::make(Int(32), k), const_true(1))), field_name.str(), true); } // strides field - Var v_strides(arg_name + ".strides", DataType::Handle()); + Var v_strides(arg_name + ".strides", Handle()); def_handle_dtype_.Set(v_strides, ir::TypeAnnotation(tvm_shape_type)); init_nest_.emplace_back(LetStmt::make( - v_strides, TVMArrayGet(DataType::Handle(), handle, intrinsic::kArrStrides), + v_strides, TVMArrayGet(Handle(), handle, intrinsic::kArrStrides), nop)); Expr is_null = Call::make( - DataType::Bool(1), intrinsic::tvm_handle_is_null, + Bool(1), intrinsic::tvm_handle_is_null, {v_strides}, Call::PureIntrinsic); if (buffer->strides.size() == 0) { // Assert the buffer is compact - DataType stype = buffer->DefaultIndexType(); + Type stype = buffer->DefaultIndexType(); Expr expect_stride = make_const(stype, 1); Array conds; for (size_t i = buffer->shape.size(); i != 0; --i) { @@ -228,7 +228,7 @@ void ArgBinder::BindDLTensor(const Buffer& buffer, Expr svalue = cast( stype, Load::make(tvm_shape_type, v_strides, - IntImm::make(DataType::Int(32), k), const_true(1))); + IntImm::make(Int(32), k), const_true(1))); conds.push_back(expect_stride == svalue); expect_stride = expect_stride * buffer->shape[k]; } @@ -243,15 +243,15 @@ void ArgBinder::BindDLTensor(const Buffer& buffer, asserts_.emplace_back(Block::make(check, Evaluate::make(0))); } } else if (buffer->buffer_type == kAutoBroadcast) { - DataType stype = buffer->DefaultIndexType(); + Type stype = buffer->DefaultIndexType(); Expr stride = make_const(stype, 1); for (size_t i = buffer->shape.size(); i != 0; --i) { size_t k = i - 1; std::ostringstream field_name; field_name << v_strides->name_hint << '[' << k << ']'; - Expr value = cast(buffer->shape[k].dtype(), + Expr value = cast(buffer->shape[k].type(), Load::make(tvm_shape_type, v_strides, - IntImm::make(DataType::Int(32), k), const_true(1))); + IntImm::make(Int(32), k), const_true(1))); value = tvm::if_then_else(is_null, stride, value); value = tvm::if_then_else(buffer->shape[k] == 1, 0, value); Bind_(buffer->strides[k], value, field_name.str(), true); @@ -266,9 +266,9 @@ void ArgBinder::BindDLTensor(const Buffer& buffer, std::ostringstream field_name; field_name << v_strides->name_hint << '[' << k << ']'; Bind_(buffer->strides[k], - cast(buffer->shape[k].dtype(), + cast(buffer->shape[k].type(), Load::make(tvm_shape_type, v_strides, - IntImm::make(DataType::Int(32), k), const_true(1))), + IntImm::make(Int(32), k), const_true(1))), field_name.str(), true); } } @@ -276,29 +276,29 @@ void ArgBinder::BindDLTensor(const Buffer& buffer, int data_bytes = GetVectorBytes(buffer->dtype); int64_t const_offset; if (arith::GetConst(buffer->elem_offset, &const_offset)) { - Bind_(make_const(DataType::UInt(64), const_offset * data_bytes), - TVMArrayGet(DataType::UInt(64), handle, intrinsic::kArrByteOffset), + Bind_(make_const(UInt(64), const_offset * data_bytes), + TVMArrayGet(UInt(64), handle, intrinsic::kArrByteOffset), arg_name + ".byte_offset", true); } else { if (Bind_(buffer->elem_offset, - cast(buffer->elem_offset.dtype(), - (TVMArrayGet(DataType::UInt(64), handle, intrinsic::kArrByteOffset) / - make_const(DataType::UInt(64), data_bytes))), + cast(buffer->elem_offset.type(), + (TVMArrayGet(UInt(64), handle, intrinsic::kArrByteOffset) / + make_const(UInt(64), data_bytes))), arg_name + ".elem_offset", true)) { if (buffer->offset_factor > 1) { Expr offset = buffer->elem_offset; - Expr factor = make_const(offset.dtype(), buffer->offset_factor); - Expr zero = make_zero(offset.dtype()); + Expr factor = make_const(offset.type(), buffer->offset_factor); + Expr zero = make_zero(offset.type()); BinderAddAssert(truncmod(offset, factor) == zero, arg_name + ".elem_offset", &asserts_); } } } // device info. Bind_(device_type, - TVMArrayGet(DataType::Int(32), handle, intrinsic::kArrDeviceType), + TVMArrayGet(Int(32), handle, intrinsic::kArrDeviceType), arg_name + ".device_type", true); Bind_(device_id, - TVMArrayGet(DataType::Int(32), handle, intrinsic::kArrDeviceId), + TVMArrayGet(Int(32), handle, intrinsic::kArrDeviceId), arg_name + ".device_id", true); } diff --git a/src/pass/bound_checker.cc b/src/pass/bound_checker.cc index 648302e9740a..55f98474994a 100644 --- a/src/pass/bound_checker.cc +++ b/src/pass/bound_checker.cc @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -58,7 +58,7 @@ class BoundChecker : public IRMutator { Stmt Mutate_(const Allocate *op, const Stmt &s) final { // If the shape was updated we should update the hashtable. if (UpdateIsNeeded(op->buffer_var)) { - Update(op->buffer_var, op->extents, op->dtype); + Update(op->buffer_var, op->extents, op->type); } return IRMutator::Mutate_(op, s); } @@ -108,26 +108,26 @@ class BoundChecker : public IRMutator { } void Update(const VarExpr &buffer_var, const Array &new_shape, - const DataType &type) { + const Type &type) { // Sanity check at first. if (!new_shape.size()) { return; } for (size_t i = 0; i < new_shape.size(); ++i) { - if (!new_shape[0].defined() || !new_shape[i].dtype().is_scalar() || + if (!new_shape[0].defined() || !new_shape[i].type().is_scalar() || is_negative_const(new_shape[i])) { return; } } // Scalarize the shape. - Expr shape = Mul::make(make_const(DataType::UInt(64), type.lanes()), - Cast::make(DataType::UInt(64), new_shape[0])); + Expr shape = Mul::make(make_const(UInt(64), type.lanes()), + Cast::make(UInt(64), new_shape[0])); for (size_t i = 1; i < new_shape.size(); ++i) { // Cast to unsigned to avoid integer overlow at frist. - shape = Mul::make(shape, Mul::make(make_const(DataType::UInt(64), type.lanes()), - Cast::make(DataType::UInt(64), new_shape[i]))); + shape = Mul::make(shape, Mul::make(make_const(UInt(64), type.lanes()), + Cast::make(UInt(64), new_shape[i]))); } mem_to_shape_[buffer_var.get()] = shape; } @@ -139,9 +139,9 @@ class BoundChecker : public IRMutator { if (const Ramp *ramp_index = index.as()) { return ramp_index->base.defined() && - ramp_index->base.dtype().is_scalar() && + ramp_index->base.type().is_scalar() && ramp_index->stride.defined() && - ramp_index->stride.dtype().is_scalar() && (ramp_index->lanes > 0); + ramp_index->stride.type().is_scalar() && (ramp_index->lanes > 0); } return true; } @@ -168,7 +168,7 @@ class BoundChecker : public IRMutator { // Non inclusive range. index = Add::make( ramp_index->base, - Mul::make(ramp_index->stride, make_const(ramp_index->stride.dtype(), + Mul::make(ramp_index->stride, make_const(ramp_index->stride.type(), ramp_index->lanes - 1))); } @@ -177,11 +177,11 @@ class BoundChecker : public IRMutator { upper_bound = ir::Simplify(upper_bound); // Cast to the same type - signed, to be able to check lower bound. - index = Cast::make(DataType::Int(64), index); - upper_bound = Cast::make(DataType::Int(64), upper_bound); + index = Cast::make(Int(64), index); + upper_bound = Cast::make(Int(64), upper_bound); // Looks like a lower bound should always be zero after normalization. - Expr lower_bound = make_zero(DataType::Int(64)); + Expr lower_bound = make_zero(Int(64)); Expr current_condition = And::make(GE::make(index, lower_bound), LT::make(index, upper_bound)); diff --git a/src/pass/combine_context_call.cc b/src/pass/combine_context_call.cc index f1cb8fe10a4b..d7fb77961b4b 100644 --- a/src/pass/combine_context_call.cc +++ b/src/pass/combine_context_call.cc @@ -48,14 +48,14 @@ class ContextCallCombiner final : public IRMutator { if (it != ctx_map_.end()) { return it->second; } else { - CHECK(ctx.dtype().is_handle()); + CHECK(ctx.type().is_handle()); std::string name; if (const Call* call = ctx.as()) { name = call->name + "_cache"; } else { name = "ctx_cache_"; } - Var ctx_var(name, ctx.dtype()); + Var ctx_var(name, ctx.type()); ctx_map_[ctx] = ctx_var; return std::move(ctx_var); } diff --git a/src/pass/coproc_sync.cc b/src/pass/coproc_sync.cc index 4aa8879f679b..3dacb6d5bff7 100644 --- a/src/pass/coproc_sync.cc +++ b/src/pass/coproc_sync.cc @@ -198,7 +198,7 @@ class CoProcSyncPlanner : public StorageAccessVisitor { std::vector GetSync(std::string sync_name) { return {Evaluate::make(Call::make( - DataType::Int(32), + Int(32), sync_name, {}, Call::Intrinsic))}; } @@ -345,7 +345,7 @@ class CoProcBarrierDetector : public StorageAccessVisitor { Expr min = r->min; Expr extent = r->extent; return Evaluate::make(Call::make( - DataType::Int(32), func, + Int(32), func, {wvec[0].buffer, wvec[0].dtype.bits(), r->min, r->extent}, Call::Intrinsic)); } // Write barrier name @@ -588,14 +588,14 @@ class CoProcInstDepDetector : public IRVisitor { Stmt MakePush(int from, int to) { return Evaluate::make(Call::make( - DataType::Int(32), sync_push_name_, - {make_const(DataType::Int(32), from), make_const(DataType::Int(32), to)}, + Int(32), sync_push_name_, + {make_const(Int(32), from), make_const(Int(32), to)}, Call::Intrinsic)); } Stmt MakePop(int from, int to) { return Evaluate::make(Call::make( - DataType::Int(32), sync_pop_name_, - {make_const(DataType::Int(32), from), make_const(DataType::Int(32), to)}, + Int(32), sync_pop_name_, + {make_const(Int(32), from), make_const(Int(32), to)}, Call::Intrinsic)); } // sync states. diff --git a/src/pass/detect_device.cc b/src/pass/detect_device.cc index cd7c979171a6..92e368b62d20 100644 --- a/src/pass/detect_device.cc +++ b/src/pass/detect_device.cc @@ -28,7 +28,7 @@ namespace tvm { namespace ir { Stmt DecorateDeviceScope(Stmt stmt) { - Stmt body = AttrStmt::make(make_zero(DataType::Int(32)), + Stmt body = AttrStmt::make(make_zero(Int(32)), ir::attr::device_scope, 0, stmt); diff --git a/src/pass/inject_copy_intrin.cc b/src/pass/inject_copy_intrin.cc index 7b7c5df48236..3b148361fbfc 100644 --- a/src/pass/inject_copy_intrin.cc +++ b/src/pass/inject_copy_intrin.cc @@ -88,7 +88,7 @@ class CopyIntrinInjector : public IRMutator { load = cast->value.as(); } if (load == nullptr) return false; - if (load->dtype.lanes() != 1) return false; + if (load->type.lanes() != 1) return false; Array loop_vars; for (const For* op : loops) { loop_vars.push_back(op->loop_var); @@ -101,7 +101,7 @@ class CopyIntrinInjector : public IRMutator { Array dst_shape; const size_t loop_var_size = loop_vars.size(); if (loop_var_size == 0) { - dst_shape.push_back(make_const(DataType::Int(32), 1)); + dst_shape.push_back(make_const(Int(32), 1)); } else { for (const For* op : loops) { dst_shape.push_back(op->extent); @@ -121,7 +121,7 @@ class CopyIntrinInjector : public IRMutator { for (size_t i = 0; i < src_shape.size(); ++i) { Expr min_value = clip_bound[2 * i]; Expr max_value = clip_bound[2 * i + 1]; - DataType t = loop_vars[i].dtype(); + Type t = loop_vars[i].type(); Expr svalue = src_shape[i]; if (min_value.defined()) { Expr pbefore = Simplify(Max::make(min_value, make_zero(t))); @@ -148,12 +148,12 @@ class CopyIntrinInjector : public IRMutator { Array src_strides(load_strides.begin(), load_strides.begin() + loop_var_size); Array dst_strides(store_strides.begin(), store_strides.begin() + loop_var_size); if (loop_var_size == 0) { - src_strides.push_back(make_const(DataType::Int(32), 1)); - dst_strides.push_back(make_const(DataType::Int(32), 1)); + src_strides.push_back(make_const(Int(32), 1)); + dst_strides.push_back(make_const(Int(32), 1)); } Buffer dst = BufferNode::make( store->buffer_var, - store->value.dtype(), + store->value.type(), dst_shape, dst_strides, store_strides[loop_var_size], @@ -162,7 +162,7 @@ class CopyIntrinInjector : public IRMutator { 0, 0, kDefault); Buffer src = BufferNode::make( load->buffer_var, - load->dtype, + load->type, src_shape, src_strides, src_elem_offset, diff --git a/src/pass/inject_double_buffer.cc b/src/pass/inject_double_buffer.cc index 78d3305d3e17..065bbd4e4db3 100644 --- a/src/pass/inject_double_buffer.cc +++ b/src/pass/inject_double_buffer.cc @@ -100,10 +100,10 @@ class DoubleBufferInjector : public IRMutator { auto it = dbuffer_info_.find(op->buffer_var.get()); if (it != dbuffer_info_.end()) { it->second.stride = arith::ComputeReduce( - op->extents, Expr()) * op->dtype.lanes(); + op->extents, Expr()) * op->type.lanes(); Stmt stmt = IRMutator::Mutate_(op, s); op = stmt.as(); - Array new_extents{make_const(op->extents[0].dtype(), 2)}; + Array new_extents{make_const(op->extents[0].type(), 2)}; for (Expr e : op->extents) { new_extents.push_back(e); } @@ -114,7 +114,7 @@ class DoubleBufferInjector : public IRMutator { StringImm::make(it->second.scope), Evaluate::make(0))); alloc_nest.emplace_back(Allocate::make( - op->buffer_var, op->dtype, new_extents, op->condition, + op->buffer_var, op->type, new_extents, op->condition, Evaluate::make(0))); return op->body; } else { @@ -135,15 +135,15 @@ class DoubleBufferInjector : public IRMutator { CHECK(is_zero(old_loop->min)); Expr zero = old_loop->min; Expr new_ext = - old_loop->extent - make_const(old_loop->loop_var.dtype(), 1); - Expr factor = make_const(new_ext.dtype(), split_loop_); + old_loop->extent - make_const(old_loop->loop_var.type(), 1); + Expr factor = make_const(new_ext.type(), split_loop_); Expr outer_ext = new_ext / factor; Expr tail_base = outer_ext * factor; - Var outer_var(old_loop->loop_var->name_hint + ".outer", old_loop->loop_var.dtype()); + Var outer_var(old_loop->loop_var->name_hint + ".outer", old_loop->loop_var.type()); std::unordered_map vmap; std::vector loop_seq; for (int32_t i = 0; i < split_loop_; ++i) { - vmap[old_loop->loop_var.get()] = outer_var * factor + make_const(factor.dtype(), i); + vmap[old_loop->loop_var.get()] = outer_var * factor + make_const(factor.type(), i); loop_seq.emplace_back(Substitute(old_loop->body, vmap)); } Stmt loop = For::make( @@ -153,7 +153,7 @@ class DoubleBufferInjector : public IRMutator { std::vector tail_seq; Stmt tail_body = StripDoubleBufferWrite().Mutate(old_loop->body); for (int32_t i = 0; i < split_loop_; ++i) { - Expr idx = tail_base + make_const(tail_base.dtype(), i); + Expr idx = tail_base + make_const(tail_base.type(), i); vmap[old_loop->loop_var.get()] = idx; tail_seq.emplace_back( IfThenElse::make(idx < old_loop->extent, @@ -196,7 +196,7 @@ class DoubleBufferInjector : public IRMutator { const StorageEntry& e = it->second; CHECK(e.stride.defined()); CHECK(e.switch_read_var.defined()); - return Load::make(op->dtype, + return Load::make(op->type, op->buffer_var, e.switch_read_var * e.stride + op->index, op->predicate); @@ -222,12 +222,12 @@ class DoubleBufferInjector : public IRMutator { } StorageEntry& e = it->second; e.loop = loop_nest_.back(); - Expr zero = make_const(e.loop->loop_var.dtype(), 0); - Expr one = make_const(e.loop->loop_var.dtype(), 1); - Expr two = make_const(e.loop->loop_var.dtype(), 2); + Expr zero = make_const(e.loop->loop_var.type(), 0); + Expr one = make_const(e.loop->loop_var.type(), 1); + Expr two = make_const(e.loop->loop_var.type(), 2); Expr loop_shift = e.loop->loop_var + one; e.switch_write_var = Var(e.loop->loop_var->name_hint + ".db", - e.loop->loop_var.dtype()); + e.loop->loop_var.type()); e.switch_read_var = indexmod(e.loop->loop_var, two); in_double_buffer_scope_ = true; Stmt body = Mutate(op->body); diff --git a/src/pass/inject_virtual_thread.cc b/src/pass/inject_virtual_thread.cc index c80c7fcdaa8c..eafe5a928cd7 100644 --- a/src/pass/inject_virtual_thread.cc +++ b/src/pass/inject_virtual_thread.cc @@ -222,7 +222,7 @@ class VTInjector : public IRMutator { } auto it = alloc_remap_.find(op->buffer_var.get()); if (it != alloc_remap_.end()) { - return Load::make(op->dtype, op->buffer_var, + return Load::make(op->type, op->buffer_var, RewriteIndex(op->index, it->second), op->predicate); } else { @@ -233,7 +233,7 @@ class VTInjector : public IRMutator { Expr Mutate_(const Call* op, const Expr& e) final { if (op->is_intrinsic(intrinsic::tvm_access_ptr)) { CHECK_EQ(op->args.size(), 5U); - DataType dtype = op->args[0].dtype(); + Type dtype = op->args[0].type(); const Variable* buffer = op->args[1].as(); auto it = alloc_remap_.find(buffer); if (it == alloc_remap_.end()) return IRMutator::Mutate_(op, e); @@ -241,10 +241,10 @@ class VTInjector : public IRMutator { Expr offset = Mutate(op->args[2]); Expr extent = Mutate(op->args[3]); Expr stride = - it->second / make_const(offset.dtype(), dtype.lanes()); + it->second / make_const(offset.type(), dtype.lanes()); offset = stride * var_ + offset; return Call::make( - op->dtype, op->name, + op->type, op->name, {op->args[0], op->args[1], offset, extent, op->args[4]}, op->call_type); } else if (op->is_intrinsic(intrinsic::tvm_context_id)) { @@ -395,9 +395,9 @@ class VTInjector : public IRMutator { if (touched_var_.count(op->buffer_var.get()) || !allow_share_) { // place v on highest dimension. Expr stride = arith::ComputeReduce( - op->extents, Expr()) * op->dtype.lanes(); + op->extents, Expr()) * op->type.lanes(); Array other; - other.push_back(make_const(op->extents[0].dtype(), num_threads_)); + other.push_back(make_const(op->extents[0].type(), num_threads_)); for (Expr e : extents) { other.push_back(e); } @@ -417,7 +417,7 @@ class VTInjector : public IRMutator { return s; } else { return Allocate::make( - op->buffer_var, op->dtype, + op->buffer_var, op->type, extents, condition, body, op->new_expr, op->free_function); } @@ -439,19 +439,19 @@ class VTInjector : public IRMutator { // only unroll if number of vthreads are small if (max_loop_depth_ == 0 && num_threads_ < 16) { // do unrolling if it is inside innermost content. - Stmt blk = Substitute(stmt, {{var_, make_zero(var_.dtype())}}); + Stmt blk = Substitute(stmt, {{var_, make_zero(var_.type())}}); for (int i = 1; i < num_threads_; ++i) { blk = Block::make( - blk, Substitute(stmt, {{var_, make_const(var_.dtype(), i)}})); + blk, Substitute(stmt, {{var_, make_const(var_.type(), i)}})); } return blk; } else { // insert a for loop - Var idx(var_->name_hint + ".s", var_->dtype); + Var idx(var_->name_hint + ".s", var_->type); Map values{{var_, idx}}; stmt = Substitute(stmt, values); - return For::make(idx, make_zero(idx.dtype()), - make_const(idx.dtype(), num_threads_), + return For::make(idx, make_zero(idx.type()), + make_const(idx.type(), num_threads_), ForType::Serial, DeviceAPI::None, stmt); } } diff --git a/src/pass/ir_deep_compare.cc b/src/pass/ir_deep_compare.cc index e399e7f2c54f..cb859d07f07b 100644 --- a/src/pass/ir_deep_compare.cc +++ b/src/pass/ir_deep_compare.cc @@ -63,7 +63,7 @@ class IRDeepCompare : if (order_ != 0) return; if (n.same_as(other)) return; if (CompareValue(n->type_index(), other->type_index()) != 0) return; - if (CompareType(n.dtype(), other.dtype()) != 0) return; + if (CompareType(n.type(), other.type()) != 0) return; ExprComparator::VisitExpr(n, other); } @@ -119,7 +119,7 @@ class IRDeepCompare : } else { if (CompareExpr(op->buffer_var, rhs->buffer_var) != 0) return; } - if (CompareType(op->dtype, rhs->dtype) != 0) return; + if (CompareType(op->type, rhs->type) != 0) return; if (CompareArray(op->extents, rhs->extents) != 0) return; if (CompareExpr(op->condition, rhs->condition) != 0) return; if (CompareStmt(op->body, rhs->body) != 0) return; @@ -166,7 +166,7 @@ class IRDeepCompare : const Realize* rhs = other.as(); if (CompareNodeRef(op->func, rhs->func) != 0) return; if (CompareValue(op->value_index, rhs->value_index) != 0) return; - if (CompareType(op->dtype, rhs->dtype) != 0) return; + if (CompareType(op->type, rhs->type) != 0) return; if (CompareRegion(op->bounds, rhs->bounds) != 0) return; if (CompareStmt(op->body, rhs->body) != 0) return; } @@ -175,7 +175,7 @@ class IRDeepCompare : const Prefetch* rhs = other.as(); if (CompareNodeRef(op->func, rhs->func) != 0) return; if (CompareValue(op->value_index, rhs->value_index) != 0) return; - if (CompareType(op->dtype, rhs->dtype) != 0) return; + if (CompareType(op->type, rhs->type) != 0) return; if (CompareRegion(op->bounds, rhs->bounds) != 0) return; } @@ -369,7 +369,7 @@ class IRDeepCompare : return order_; } - int CompareType(const DataType& lhs, const DataType& rhs) { + int CompareType(const Type& lhs, const Type& rhs) { if (order_ != 0) return order_; if (lhs == rhs) return order_; if (CompareValue(lhs.code(), rhs.code()) != 0) return order_; diff --git a/src/pass/ir_mutator.cc b/src/pass/ir_mutator.cc index b300989dd2fd..f79a1ab8fe3b 100644 --- a/src/pass/ir_mutator.cc +++ b/src/pass/ir_mutator.cc @@ -45,7 +45,7 @@ class IRTransformer final : public IRMutator { } private: - template + template T MutateInternal(T node) { if (only_enable_.size() && !only_enable_.count(node->type_index())) { @@ -89,11 +89,11 @@ IRMutator::FMutateStmt& IRMutator::vtable_stmt() { // NOLINT(*) static FMutateStmt inst; return inst; } -inline Array MutateArray(Array arr, IRMutator* m) { - return UpdateArray(arr, [&m](const Expr& e) { return m->Mutate(e); }); +inline Array MutateArray(Array arr, IRMutator *m) { + return UpdateArray(arr, [&m] (const Expr& e) { return m->Mutate(e); }); } -inline Array MutateIterVarArr(Array rdom, IRMutator* m) { +inline Array MutateIterVarArr(Array rdom, IRMutator *m) { std::vector new_dom(rdom.size()); bool changed = false; for (size_t i = 0; i < rdom.size(); i++) { @@ -133,7 +133,7 @@ Stmt IRMutator::Mutate_(const AttrStmt* op, const Stmt& s) { } } -Stmt IRMutator::Mutate_(const LetStmt* op, const Stmt& s) { +Stmt IRMutator::Mutate_(const LetStmt *op, const Stmt& s) { Expr value = this->Mutate(op->value); Stmt body = this->Mutate(op->body); if (value.same_as(op->value) && @@ -144,7 +144,7 @@ Stmt IRMutator::Mutate_(const LetStmt* op, const Stmt& s) { } } -Stmt IRMutator::Mutate_(const For* op, const Stmt& s) { +Stmt IRMutator::Mutate_(const For *op, const Stmt& s) { Expr min = this->Mutate(op->min); Expr extent = this->Mutate(op->extent); Stmt body = this->Mutate(op->body); @@ -179,13 +179,13 @@ Stmt IRMutator::Mutate_(const Allocate* op, const Stmt& s) { return s; } else { return Allocate::make( - op->buffer_var, op->dtype, + op->buffer_var, op->type, new_extents, condition, body, new_expr, op->free_function); } } -Stmt IRMutator::Mutate_(const IfThenElse* op, const Stmt& s) { +Stmt IRMutator::Mutate_(const IfThenElse *op, const Stmt& s) { Expr condition = this->Mutate(op->condition); Stmt then_case = this->Mutate(op->then_case); Stmt else_case; @@ -201,7 +201,7 @@ Stmt IRMutator::Mutate_(const IfThenElse* op, const Stmt& s) { } } -Stmt IRMutator::Mutate_(const Store* op, const Stmt& s) { +Stmt IRMutator::Mutate_(const Store *op, const Stmt& s) { Expr value = this->Mutate(op->value); Expr index = this->Mutate(op->index); Expr pred = this->Mutate(op->predicate); @@ -233,7 +233,7 @@ Stmt IRMutator::Mutate_(const Realize* op, const Stmt& s) { Expr old_extent = op->bounds[i]->extent; Expr new_min = m->Mutate(old_min); Expr new_extent = m->Mutate(old_extent); - if (!new_min.same_as(old_min)) bounds_changed = true; + if (!new_min.same_as(old_min)) bounds_changed = true; if (!new_extent.same_as(old_extent)) bounds_changed = true; new_bounds.push_back( Range::make_by_min_extent(new_min, new_extent)); @@ -247,7 +247,7 @@ Stmt IRMutator::Mutate_(const Realize* op, const Stmt& s) { return s; } else { return Realize::make(op->func, op->value_index, - op->dtype, new_bounds, + op->type, new_bounds, condition, body); } } @@ -263,7 +263,7 @@ Stmt IRMutator::Mutate_(const Prefetch* op, const Stmt& s) { Expr old_extent = op->bounds[i]->extent; Expr new_min = m->Mutate(old_min); Expr new_extent = m->Mutate(old_extent); - if (!new_min.same_as(old_min)) bounds_changed = true; + if (!new_min.same_as(old_min)) bounds_changed = true; if (!new_extent.same_as(old_extent)) bounds_changed = true; new_bounds.push_back( Range::make_by_min_extent(new_min, new_extent)); @@ -273,7 +273,7 @@ Stmt IRMutator::Mutate_(const Prefetch* op, const Stmt& s) { return s; } else { return Prefetch::make(op->func, op->value_index, - op->dtype, new_bounds); + op->type, new_bounds); } } @@ -288,7 +288,7 @@ Stmt IRMutator::Mutate_(const Block* op, const Stmt& s) { } } -Stmt IRMutator::Mutate_(const AssertStmt* op, const Stmt& s) { +Stmt IRMutator::Mutate_(const AssertStmt *op, const Stmt& s) { Expr condition = this->Mutate(op->condition); Expr message = this->Mutate(op->message); Stmt body = this->Mutate(op->body); @@ -302,7 +302,7 @@ Stmt IRMutator::Mutate_(const AssertStmt* op, const Stmt& s) { } } -Stmt IRMutator::Mutate_(const ProducerConsumer* op, const Stmt& s) { +Stmt IRMutator::Mutate_(const ProducerConsumer *op, const Stmt& s) { Stmt body = this->Mutate(op->body); if (body.same_as(op->body)) { return s; @@ -311,7 +311,7 @@ Stmt IRMutator::Mutate_(const ProducerConsumer* op, const Stmt& s) { } } -Stmt IRMutator::Mutate_(const Evaluate* op, const Stmt& s) { +Stmt IRMutator::Mutate_(const Evaluate *op, const Stmt& s) { Expr v = this->Mutate(op->value); if (v.same_as(op->value)) { return s; @@ -320,7 +320,7 @@ Stmt IRMutator::Mutate_(const Evaluate* op, const Stmt& s) { } } -Stmt IRMutator::Mutate_(const Free* op, const Stmt& s) { +Stmt IRMutator::Mutate_(const Free *op, const Stmt& s) { return s; } @@ -348,21 +348,21 @@ TVM_STATIC_IR_FUNCTOR(IRMutator, vtable_stmt) return m->Mutate_(static_cast(node.get()), e); \ }) -Expr IRMutator::Mutate_(const Variable* op, const Expr& e) { +Expr IRMutator::Mutate_(const Variable *op, const Expr& e) { return e; } -Expr IRMutator::Mutate_(const Load* op, const Expr& e) { +Expr IRMutator::Mutate_(const Load *op, const Expr& e) { Expr index = this->Mutate(op->index); Expr pred = this->Mutate(op->predicate); if (index.same_as(op->index) && pred.same_as(op->predicate)) { return e; } else { - return Load::make(op->dtype, op->buffer_var, index, pred); + return Load::make(op->type, op->buffer_var, index, pred); } } -Expr IRMutator::Mutate_(const Let* op, const Expr& e) { +Expr IRMutator::Mutate_(const Let *op, const Expr& e) { Expr value = this->Mutate(op->value); Expr body = this->Mutate(op->body); if (value.same_as(op->value) && @@ -378,7 +378,7 @@ Expr IRMutator::Mutate_(const Call* op, const Expr& e) { if (op->args.same_as(new_args)) { return e; } else { - return Call::make(op->dtype, op->name, new_args, op->call_type, + return Call::make(op->type, op->name, new_args, op->call_type, op->func, op->value_index); } } @@ -413,8 +413,8 @@ DEFINE_BIOP_EXPR_MUTATE_(GE) DEFINE_BIOP_EXPR_MUTATE_(And) DEFINE_BIOP_EXPR_MUTATE_(Or) -Expr IRMutator::Mutate_(const Reduce* op, const Expr& e) { - Array new_axis = MutateIterVarArr(op->axis, this); +Expr IRMutator::Mutate_(const Reduce *op, const Expr& e) { + Array new_axis = MutateIterVarArr(op->axis, this); Array new_source = MutateArray(op->source, this); Expr new_cond = this->Mutate(op->condition); if (op->axis.same_as(new_axis) && @@ -427,16 +427,16 @@ Expr IRMutator::Mutate_(const Reduce* op, const Expr& e) { } } -Expr IRMutator::Mutate_(const Cast* op, const Expr& e) { +Expr IRMutator::Mutate_(const Cast *op, const Expr& e) { Expr value = this->Mutate(op->value); if (value.same_as(op->value)) { return e; } else { - return Cast::make(op->dtype, value); + return Cast::make(op->type, value); } } -Expr IRMutator::Mutate_(const Not* op, const Expr& e) { +Expr IRMutator::Mutate_(const Not *op, const Expr& e) { Expr a = this->Mutate(op->a); if (a.same_as(op->a)) { return e; @@ -445,7 +445,7 @@ Expr IRMutator::Mutate_(const Not* op, const Expr& e) { } } -Expr IRMutator::Mutate_(const Select* op, const Expr& e) { +Expr IRMutator::Mutate_(const Select *op, const Expr& e) { Expr cond = this->Mutate(op->condition); Expr t = this->Mutate(op->true_value); Expr f = this->Mutate(op->false_value); @@ -458,7 +458,7 @@ Expr IRMutator::Mutate_(const Select* op, const Expr& e) { } } -Expr IRMutator::Mutate_(const Ramp* op, const Expr& e) { +Expr IRMutator::Mutate_(const Ramp *op, const Expr& e) { Expr base = this->Mutate(op->base); Expr stride = this->Mutate(op->stride); if (base.same_as(op->base) && @@ -469,7 +469,7 @@ Expr IRMutator::Mutate_(const Ramp* op, const Expr& e) { } } -Expr IRMutator::Mutate_(const Broadcast* op, const Expr& e) { +Expr IRMutator::Mutate_(const Broadcast *op, const Expr& e) { Expr value = this->Mutate(op->value); if (value.same_as(op->value)) { return e; @@ -478,7 +478,7 @@ Expr IRMutator::Mutate_(const Broadcast* op, const Expr& e) { } } -Expr IRMutator::Mutate_(const Shuffle* op, const Expr& e) { +Expr IRMutator::Mutate_(const Shuffle *op, const Expr& e) { auto new_vec = MutateArray(op->vectors, this); if (new_vec.same_as(op->vectors)) { return e; diff --git a/src/pass/ir_util.h b/src/pass/ir_util.h index 0f8bb990c2d3..690feca135ef 100644 --- a/src/pass/ir_util.h +++ b/src/pass/ir_util.h @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -89,12 +89,12 @@ inline Array UpdateArray(Array arr, F fupdate) { * \return the get expression. */ inline Expr TVMStructGet( - DataType dtype, Var handle, int index, + Type dtype, Var handle, int index, intrinsic::TVMStructFieldKind kind) { Array args ={ handle, - make_const(DataType::Int(32), index), - make_const(DataType::Int(32), static_cast(kind))}; + make_const(Int(32), index), + make_const(Int(32), static_cast(kind))}; return Call::make(dtype, intrinsic::tvm_struct_get, args, Call::PureIntrinsic); } @@ -104,10 +104,10 @@ inline Expr TVMStructGet( * \param dtype The data type. * \param offset the offset index. */ -inline Expr AddressOffset(Var handle, DataType dtype, int offset) { +inline Expr AddressOffset(Var handle, Type dtype, int offset) { return Call::make( - DataType::Handle(), intrinsic::tvm_address_of, - {Load::make(dtype, handle, make_const(DataType::Int(32), offset * dtype.lanes()), + Handle(), intrinsic::tvm_address_of, + {Load::make(dtype, handle, make_const(Int(32), offset * dtype.lanes()), const_true(dtype.lanes()))}, Call::PureIntrinsic); } @@ -118,13 +118,13 @@ inline Expr AddressOffset(Var handle, DataType dtype, int offset) { * \param dtype The data type. * \param offset the offset index. */ -inline Expr AddressOffset(Var handle, DataType dtype, Expr offset) { +inline Expr AddressOffset(Var handle, Type dtype, Expr offset) { if (dtype.lanes() != 1) { - offset = offset * make_const(offset.dtype(), dtype.lanes()); - offset = Ramp::make(offset, make_const(offset.dtype(), 1), dtype.lanes()); + offset = offset * make_const(offset.type(), dtype.lanes()); + offset = Ramp::make(offset, make_const(offset.type(), 1), dtype.lanes()); } return Call::make( - DataType::Handle(), intrinsic::tvm_address_of, + Handle(), intrinsic::tvm_address_of, {Load::make(dtype, handle, offset, const_true(dtype.lanes()))}, Call::PureIntrinsic); @@ -143,11 +143,11 @@ inline Stmt TVMStructSet( intrinsic::TVMStructFieldKind kind, Expr value) { Array args ={ handle, - make_const(DataType::Int(32), index), - make_const(DataType::Int(32), static_cast(kind)), + make_const(Int(32), index), + make_const(Int(32), static_cast(kind)), value}; return Evaluate::make( - Call::make(DataType::Int(32), intrinsic::tvm_struct_set, args, Call::Intrinsic)); + Call::make(Int(32), intrinsic::tvm_struct_set, args, Call::Intrinsic)); } /*! @@ -155,13 +155,13 @@ inline Stmt TVMStructSet( * \param t The original type. * \return The corresponding API type. */ -inline DataType APIType(DataType t) { +inline Type APIType(Type t) { if (t.is_handle()) return t; CHECK_EQ(t.lanes(), 1) << "Cannot pass vector type through packed API."; - if (t.is_uint() || t.is_int()) return DataType::Int(64); + if (t.is_uint() || t.is_int()) return Int(64); CHECK(t.is_float()); - return DataType::Float(64); + return Float(64); } /*! @@ -170,7 +170,7 @@ inline DataType APIType(DataType t) { * \param const_size The constant size of the array. * \return the alignment */ -inline int GetTempAllocaAlignment(DataType type, int32_t const_size) { +inline int GetTempAllocaAlignment(Type type, int32_t const_size) { int align = runtime::kTempAllocaAlignment; if (const_size > 0) { int64_t const_s = static_cast(const_size) * type.bits() * type.lanes() / 8; diff --git a/src/pass/ir_visitor.cc b/src/pass/ir_visitor.cc index d6f163ccedc6..204c0f75fe4a 100644 --- a/src/pass/ir_visitor.cc +++ b/src/pass/ir_visitor.cc @@ -43,6 +43,7 @@ class IRApplyVisit : public IRVisitor { std::unordered_set visited_; }; + void PostOrderVisit(const NodeRef& node, std::function fvisit) { IRApplyVisit(fvisit).Visit(node); } @@ -67,7 +68,7 @@ inline void VisitRDom(const Array& rdom, IRVisitor* v) { void IRVisitor::Visit_(const Variable* op) {} -void IRVisitor::Visit_(const LetStmt* op) { +void IRVisitor::Visit_(const LetStmt *op) { this->Visit(op->value); this->Visit(op->body); } @@ -77,14 +78,14 @@ void IRVisitor::Visit_(const AttrStmt* op) { this->Visit(op->body); } -void IRVisitor::Visit_(const For* op) { +void IRVisitor::Visit_(const For *op) { IRVisitor* v = this; v->Visit(op->min); v->Visit(op->extent); v->Visit(op->body); } -void IRVisitor::Visit_(const Allocate* op) { +void IRVisitor::Visit_(const Allocate *op) { IRVisitor* v = this; for (size_t i = 0; i < op->extents.size(); i++) { v->Visit(op->extents[i]); @@ -96,18 +97,18 @@ void IRVisitor::Visit_(const Allocate* op) { } } -void IRVisitor::Visit_(const Load* op) { +void IRVisitor::Visit_(const Load *op) { this->Visit(op->index); this->Visit(op->predicate); } -void IRVisitor::Visit_(const Store* op) { +void IRVisitor::Visit_(const Store *op) { this->Visit(op->value); this->Visit(op->index); this->Visit(op->predicate); } -void IRVisitor::Visit_(const IfThenElse* op) { +void IRVisitor::Visit_(const IfThenElse *op) { this->Visit(op->condition); this->Visit(op->then_case); if (op->else_case.defined()) { @@ -115,14 +116,14 @@ void IRVisitor::Visit_(const IfThenElse* op) { } } -void IRVisitor::Visit_(const Let* op) { +void IRVisitor::Visit_(const Let *op) { this->Visit(op->value); this->Visit(op->body); } void IRVisitor::Visit_(const Free* op) {} -void IRVisitor::Visit_(const Call* op) { +void IRVisitor::Visit_(const Call *op) { VisitArray(op->args, this); } @@ -170,38 +171,38 @@ void IRVisitor::Visit_(const Select* op) { this->Visit(op->false_value); } -void IRVisitor::Visit_(const Ramp* op) { +void IRVisitor::Visit_(const Ramp *op) { this->Visit(op->base); this->Visit(op->stride); } -void IRVisitor::Visit_(const Shuffle* op) { - for (const auto& elem : op->indices) +void IRVisitor::Visit_(const Shuffle *op) { + for (const auto &elem : op->indices) this->Visit(elem); - for (const auto& elem : op->vectors) + for (const auto &elem : op->vectors) this->Visit(elem); } -void IRVisitor::Visit_(const Broadcast* op) { +void IRVisitor::Visit_(const Broadcast *op) { this->Visit(op->value); } -void IRVisitor::Visit_(const AssertStmt* op) { +void IRVisitor::Visit_(const AssertStmt *op) { this->Visit(op->condition); this->Visit(op->message); this->Visit(op->body); } -void IRVisitor::Visit_(const ProducerConsumer* op) { +void IRVisitor::Visit_(const ProducerConsumer *op) { this->Visit(op->body); } -void IRVisitor::Visit_(const Provide* op) { +void IRVisitor::Visit_(const Provide *op) { VisitArray(op->args, this); this->Visit(op->value); } -void IRVisitor::Visit_(const Realize* op) { +void IRVisitor::Visit_(const Realize *op) { for (size_t i = 0; i < op->bounds.size(); i++) { this->Visit(op->bounds[i]->min); this->Visit(op->bounds[i]->extent); @@ -211,19 +212,19 @@ void IRVisitor::Visit_(const Realize* op) { this->Visit(op->condition); } -void IRVisitor::Visit_(const Prefetch* op) { +void IRVisitor::Visit_(const Prefetch *op) { for (size_t i = 0; i < op->bounds.size(); i++) { this->Visit(op->bounds[i]->min); this->Visit(op->bounds[i]->extent); } } -void IRVisitor::Visit_(const Block* op) { +void IRVisitor::Visit_(const Block *op) { this->Visit(op->first); this->Visit(op->rest); } -void IRVisitor::Visit_(const Evaluate* op) { +void IRVisitor::Visit_(const Evaluate *op) { this->Visit(op->value); } diff --git a/src/pass/lift_attr_scope.cc b/src/pass/lift_attr_scope.cc index cfc6e5a7fc68..adcaaebd6d6e 100644 --- a/src/pass/lift_attr_scope.cc +++ b/src/pass/lift_attr_scope.cc @@ -57,7 +57,7 @@ class AttrScopeLifter : public IRMutator { attr_node_ = NodeRef(); attr_value_ = Expr(); return Allocate::make( - op->buffer_var, op->dtype, + op->buffer_var, op->type, op->extents, op->condition, body, op->new_expr, op->free_function); } else { @@ -198,7 +198,7 @@ class AttrScopeLifter : public IRMutator { static bool ValueSame(const Expr& a, const Expr& b) { if (a.same_as(b)) return true; if (a->type_index() != b->type_index()) return false; - if (a.dtype() != b.dtype()) return false; + if (a.type() != b.type()) return false; if (const IntImm* op = a.as()) { return op->value == b.as()->value; } diff --git a/src/pass/loop_partition.cc b/src/pass/loop_partition.cc index 1ac386767ae3..ef5cc9c4fa9f 100644 --- a/src/pass/loop_partition.cc +++ b/src/pass/loop_partition.cc @@ -181,7 +181,7 @@ class PartitionFinder : public IRVisitor { const IterVarNode* thread_axis = op->node.as(); CHECK(thread_axis); const Variable* var = thread_axis->var.get(); - IntSet dom = IntSet::range(Range(make_zero(op->value.dtype()), op->value)); + IntSet dom = IntSet::range(Range(make_zero(op->value.type()), op->value)); hint_map_.insert({var, dom}); relax_map_.insert({var, dom}); IRVisitor::Visit_(op); @@ -351,12 +351,12 @@ class LoopPartitioner : public IRMutator { if (scope.rank == 1) { // threadIdx should be put into relax map, in case of divergence. relax_map_.insert({var.get(), - IntSet::interval(make_zero(var.dtype()), op->value - 1)}); + IntSet::interval(make_zero(var.type()), op->value - 1)}); res = IRMutator::Mutate_(op, stmt); relax_map_.erase(var.get()); } else { hint_map_.insert({var.get(), - IntSet::interval(make_zero(var.dtype()), op->value - 1)}); + IntSet::interval(make_zero(var.type()), op->value - 1)}); res = IRMutator::Mutate_(op, stmt); hint_map_.erase(var.get()); } @@ -595,9 +595,9 @@ Stmt LoopPartitioner::TryPartition(const Node* node, inline Stmt LoopPartitioner::MakeFor(const Node *node, Expr extent, Stmt body) { const For *for_node = static_cast(node); CHECK(for_node); - if (analyzer_.CanProve(extent == make_const(DataType::Int(32), 1))) { + if (analyzer_.CanProve(extent == make_const(Int(32), 1))) { // If the loop extent is 1, do not create the loop anymore - return Substitute(body, {{Var{for_node->loop_var}, make_const(DataType::Int(32), 0)}}); + return Substitute(body, {{Var{for_node->loop_var}, make_const(Int(32), 0)}}); } else { return For::make(for_node->loop_var, 0, extent, for_node->for_type, for_node->device_api, body); diff --git a/src/pass/lower_custom_datatypes.cc b/src/pass/lower_custom_datatypes.cc index e24cddd97f25..3e71868ce3bc 100644 --- a/src/pass/lower_custom_datatypes.cc +++ b/src/pass/lower_custom_datatypes.cc @@ -42,8 +42,8 @@ class CustomDatatypesLowerer : public IRMutator { explicit CustomDatatypesLowerer(const std::string& target) : target_(target) {} inline Expr Mutate_(const Cast* op, const Expr& e) final { - auto type_code = op->dtype.code(); - auto src_type_code = op->value.dtype().code(); + auto type_code = op->type.code(); + auto src_type_code = op->value.type().code(); // If either datatype is a registered custom datatype, we must lower. bool toBeLowered = datatype::Registry::Global()->GetTypeRegistered(type_code) || datatype::Registry::Global()->GetTypeRegistered(src_type_code); @@ -60,7 +60,7 @@ class CustomDatatypesLowerer : public IRMutator { } inline Expr Mutate_(const FloatImm* imm, const Expr& e) final { - auto type_code = imm->dtype.code(); + auto type_code = imm->type.code(); if (datatype::Registry::Global()->GetTypeRegistered(type_code)) { auto lower = datatype::GetFloatImmLowerFunc(target_, type_code); CHECK(lower) << "FloatImm lowering function for target " << target_ << " type " @@ -71,12 +71,12 @@ class CustomDatatypesLowerer : public IRMutator { } inline Stmt Mutate_(const Allocate* allocate, const Stmt& s) final { - bool toBeLowered = datatype::Registry::Global()->GetTypeRegistered(allocate->dtype.code()); + bool toBeLowered = datatype::Registry::Global()->GetTypeRegistered(allocate->type.code()); Stmt stmt = IRMutator::Mutate_(allocate, s); allocate = stmt.as(); if (toBeLowered) { - auto new_allocate_type = DataType::UInt(allocate->dtype.bits(), allocate->dtype.lanes()); + auto new_allocate_type = UInt(allocate->type.bits(), allocate->type.lanes()); return Allocate::make(allocate->buffer_var, new_allocate_type, allocate->extents, allocate->condition, allocate->body, allocate->new_expr, allocate->free_function); @@ -85,11 +85,11 @@ class CustomDatatypesLowerer : public IRMutator { } inline Expr Mutate_(const Load* load, const Expr& e) final { - bool toBeLowered = datatype::Registry::Global()->GetTypeRegistered(load->dtype.code()); + bool toBeLowered = datatype::Registry::Global()->GetTypeRegistered(load->type.code()); Expr expr = IRMutator::Mutate_(load, e); load = expr.as(); if (toBeLowered) { - auto new_load_type = DataType::UInt(load->dtype.bits()); + auto new_load_type = UInt(load->type.bits()); return Load::make(new_load_type, load->buffer_var, load->index, load->predicate); } return expr; @@ -97,7 +97,7 @@ class CustomDatatypesLowerer : public IRMutator { #define DEFINE_MUTATE__(OP) \ inline Expr Mutate_(const OP* op, const Expr& e) final { \ - auto type_code = op->dtype.code(); \ + auto type_code = op->type.code(); \ bool toBeLowered = datatype::Registry::Global()->GetTypeRegistered(type_code); \ Expr expr = IRMutator::Mutate_(op, e); \ op = expr.as(); \ diff --git a/src/pass/lower_intrin.cc b/src/pass/lower_intrin.cc index f0b0b3c36d42..c2a2fe6f5942 100644 --- a/src/pass/lower_intrin.cc +++ b/src/pass/lower_intrin.cc @@ -76,7 +76,7 @@ class IntrinInjecter : public arith::IRMutatorWithAnalyzer { op = ret.as(); if (op == nullptr) return ret; int shift; - const DataType& dtype = op->dtype; + const DataType& dtype = op->type; CHECK(dtype.is_int() || dtype.is_uint()); if (support_bitwise_op_ && @@ -97,7 +97,7 @@ class IntrinInjecter : public arith::IRMutatorWithAnalyzer { // condition on b >= 0. // truncmod(a, b) < 0 will implies ceildiv, // So we need to correct these cases. - if ((dtype == DataType::Int(32) || dtype == DataType::Int(64)) && support_bitwise_op_) { + if ((dtype == Int(32) || dtype == Int(64)) && support_bitwise_op_) { // equivalent to rdiv + (rmod >= 0 ? 0: -1); return rdiv + (rmod >> make_const(dtype, dtype.bits() - 1)); } else { @@ -123,7 +123,7 @@ class IntrinInjecter : public arith::IRMutatorWithAnalyzer { if (op == nullptr) return ret; // Lower floordiv to native truncdiv. int shift; - const DataType& dtype = op->dtype; + const DataType& dtype = op->type; CHECK(dtype.is_int() || dtype.is_uint()); if (support_bitwise_op_ && @@ -144,7 +144,7 @@ class IntrinInjecter : public arith::IRMutatorWithAnalyzer { // mod(a, b) < 0 will imply we are doing ceildiv, // So we need to correct these cases. Expr rmod = truncmod(op->a, op->b); - if ((dtype == DataType::Int(32) || dtype == DataType::Int(64)) && support_bitwise_op_) { + if ((dtype == Int(32) || dtype == Int(64)) && support_bitwise_op_) { // (rmod >> shift) & b // -> (rmod >= 0 ? 0: -1) & b // -> rmod >= 0 ? 0 : b @@ -207,23 +207,23 @@ class IntrinInjecter : public arith::IRMutatorWithAnalyzer { if (const Cast* cast = bcast->value.as()) { auto should_swap = [&]() { // Maintain behaviour (int8 -> int16, fp16 -> fp32). - if (cast->dtype.bits() == cast->value.dtype().bits() * 2) { + if (cast->type.bits() == cast->value.type().bits() * 2) { return true; } // Check both operands are integer-like. - if (!cast->dtype.is_uint() && !cast->dtype.is_int()) { + if (!cast->type.is_uint() && !cast->type.is_int()) { return false; } - if (!cast->value.dtype().is_uint() && !cast->value.dtype().is_int()) { + if (!cast->value.type().is_uint() && !cast->value.type().is_int()) { return false; } // If both are integer-like, swap if we have a widening cast. - return cast->dtype.bits() > cast->value.dtype().bits(); + return cast->type.bits() > cast->value.type().bits(); }; if (should_swap()) { Expr new_bcast = Broadcast::make(cast->value, bcast->lanes); - return Cast::make(bcast->dtype, new_bcast); + return Cast::make(bcast->type, new_bcast); } } } @@ -236,9 +236,9 @@ class IntrinInjecter : public arith::IRMutatorWithAnalyzer { Expr lhs = SwapBroadcastCast(a); Expr rhs = SwapBroadcastCast(b); - if (fma_ != nullptr && op->dtype.is_float()) { + if (fma_ != nullptr && op->type.is_float()) { Expr r = (*fma_)(Call::make( - op->dtype, "fma", {lhs, rhs, c}, Call::PureIntrinsic)); + op->type, "fma", {lhs, rhs, c}, Call::PureIntrinsic)); if (r.defined()) return this->Mutate(r); } else { if (!lhs.same_as(a) || !rhs.same_as(b)) { diff --git a/src/pass/lower_thread_allreduce.cc b/src/pass/lower_thread_allreduce.cc index 2a121180d695..e8ea52e886cc 100644 --- a/src/pass/lower_thread_allreduce.cc +++ b/src/pass/lower_thread_allreduce.cc @@ -83,7 +83,7 @@ class ThreadAllreduceBuilder final : public IRMutator { stmt = AttrStmt::make( repl->buffer_var, attr::volatile_scope, 1, op->body); stmt = Allocate::make( - repl->buffer_var, repl->dtype, + repl->buffer_var, repl->type, repl->extents, repl->condition, stmt); stmt = AttrStmt::make( repl->buffer_var, attr::storage_scope, @@ -125,14 +125,14 @@ class ThreadAllreduceBuilder final : public IRMutator { CHECK_EQ(size, size_of_args->value); Array inits = combiner->identity_element; std::vector values(size); - std::vector types(size); + std::vector types(size); Expr cond = call->args[size+1]; for (size_t idx = 0; idx < size; ++idx) { values[idx] = call->args[1+idx]; if (!is_one(cond)) { values[idx] = Select::make(cond, values[idx], inits[idx]); } - types[idx] = values[idx].dtype(); + types[idx] = values[idx].type(); } std::vector buffers(size); for (size_t idx = 0; idx < size; ++idx) { @@ -197,7 +197,7 @@ class ThreadAllreduceBuilder final : public IRMutator { // previous iteration on the same buffer. seq.emplace_back(SyncThread("shared")); for (size_t idx = 0; idx < size; ++idx) { - shared_bufs[idx] = Var("red_buf"+std::to_string(idx), DataType::Handle()); + shared_bufs[idx] = Var("red_buf"+std::to_string(idx), Handle()); Expr pred = const_true(types[idx].lanes()); seq.emplace_back(Store::make( shared_bufs[idx], values[idx], @@ -212,7 +212,7 @@ class ThreadAllreduceBuilder final : public IRMutator { Expr pred = const_true(types[idx].lanes()); load_remap_[buffers[idx]] = Load::make( types[idx], shared_bufs[idx], - BufIndex(make_zero(reduce_index.dtype()), group_index, reduce_extent), pred); + BufIndex(make_zero(reduce_index.type()), group_index, reduce_extent), pred); alloc_remap_[buffers[idx]] = Allocate::make( shared_bufs[idx], types[idx], {Expr(group_extent), Expr(reduce_extent)}, @@ -222,7 +222,7 @@ class ThreadAllreduceBuilder final : public IRMutator { } // make allreduce. Stmt MakeBufAllreduce(const CommReducerNode *combiner, - const std::vector& types, + const std::vector& types, const Array& shared_bufs, Expr reduce_index, Expr group_index, @@ -293,7 +293,7 @@ class ThreadAllreduceBuilder final : public IRMutator { int& total_extent = *out_total_extent; total_extent = 1; if (tvec.size() == 0) { - return make_zero(DataType::Int(32)); + return make_zero(Int(32)); } Expr ret; @@ -311,7 +311,7 @@ class ThreadAllreduceBuilder final : public IRMutator { // sync thread op. static Stmt SyncThread(const std::string& sync) { return Evaluate::make( - Call::make(DataType::Int(32), intrinsic::tvm_storage_sync, + Call::make(Int(32), intrinsic::tvm_storage_sync, {StringImm::make(sync)}, Call::Intrinsic)); } diff --git a/src/pass/lower_tvm_builtin.cc b/src/pass/lower_tvm_builtin.cc index c8c8fa9c62d0..e73956cb3d62 100644 --- a/src/pass/lower_tvm_builtin.cc +++ b/src/pass/lower_tvm_builtin.cc @@ -33,12 +33,12 @@ namespace ir { inline Expr ConstInt32(size_t index) { CHECK_LE(index, std::numeric_limits::max()); - return make_const(DataType::Int(32), static_cast(index)); + return make_const(Int(32), static_cast(index)); } inline Expr StackAlloca(std::string type, size_t num) { Array args = {StringImm::make(type), ConstInt32(num)}; - return Call::make(DataType::Handle(), intrinsic::tvm_stack_alloca, args, Call::Intrinsic); + return Call::make(Handle(), intrinsic::tvm_stack_alloca, args, Call::Intrinsic); } // Calculate the statistics of packed function. @@ -46,10 +46,10 @@ inline Expr StackAlloca(std::string type, size_t num) { class BuiltinLower : public IRMutator { public: Stmt Build(Stmt stmt) { - stack_shape_ = Var("stack_shape", DataType::Handle()); - stack_array_ = Var("stack_array", DataType::Handle()); - stack_value_ = Var("stack_value", DataType::Handle()); - stack_tcode_ = Var("stack_tcode", DataType::Handle()); + stack_shape_ = Var("stack_shape", Handle()); + stack_array_ = Var("stack_array", Handle()); + stack_value_ = Var("stack_value", Handle()); + stack_tcode_ = Var("stack_tcode", Handle()); stmt = this->Mutate(stmt); if (max_shape_stack_ != 0) { stmt = LetStmt::make( @@ -86,7 +86,7 @@ class BuiltinLower : public IRMutator { if (op->new_expr.defined()) return stmt; // Get constant allocation bound. int64_t dev_type; - int64_t nbytes = GetVectorBytes(op->dtype); + int64_t nbytes = GetVectorBytes(op->type); if (device_type_.defined()) { if (arith::GetConst(device_type_, &dev_type)) { if (dev_type == kDLCPU) { @@ -97,18 +97,18 @@ class BuiltinLower : public IRMutator { } } } - Expr total_bytes = make_const(op->extents[0].dtype(), nbytes); + Expr total_bytes = make_const(op->extents[0].type(), nbytes); for (size_t i = 0; i < op->extents.size(); ++i) { total_bytes = total_bytes * op->extents[i]; } CHECK(device_type_.defined()) << "Unknown device type in current IR"; CHECK(device_id_.defined()) << "Unknown device id in current IR"; - Stmt throw_last_error = Evaluate::make(Call::make(DataType::Int(32), + Stmt throw_last_error = Evaluate::make(Call::make(Int(32), intrinsic::tvm_throw_last_error, {}, Call::Intrinsic)); Stmt body = Block::make( - IfThenElse::make(Call::make(DataType::Bool(1), + IfThenElse::make(Call::make(Bool(1), intrinsic::tvm_handle_is_null, {op->buffer_var}, Call::PureIntrinsic), throw_last_error), @@ -116,27 +116,27 @@ class BuiltinLower : public IRMutator { Stmt alloca = LetStmt::make( op->buffer_var, - Call::make(op->buffer_var.dtype(), + Call::make(op->buffer_var.type(), "TVMBackendAllocWorkspace", - {cast(DataType::Int(32), device_type_), - cast(DataType::Int(32), device_id_), - cast(DataType::UInt(64), total_bytes), - IntImm::make(DataType::Int(32), op->dtype.code()), - IntImm::make(DataType::Int(32), op->dtype.bits())}, + {cast(Int(32), device_type_), + cast(Int(32), device_id_), + cast(UInt(64), total_bytes), + IntImm::make(Int(32), op->type.code()), + IntImm::make(Int(32), op->type.bits())}, Call::Extern), body); - Expr free_op = Call::make(DataType::Int(32), + Expr free_op = Call::make(Int(32), "TVMBackendFreeWorkspace", - {cast(DataType::Int(32), device_type_), - cast(DataType::Int(32), device_id_), + {cast(Int(32), device_type_), + cast(Int(32), device_id_), op->buffer_var}, Call::Extern); - Stmt free_stmt = IfThenElse::make(free_op != make_zero(DataType::Int(32)), throw_last_error); + Stmt free_stmt = IfThenElse::make(free_op != make_zero(Int(32)), throw_last_error); body = Block::make(alloca, free_stmt); body = AttrStmt::make( op->buffer_var, attr::storage_alignment, - make_const(DataType::Int(32), runtime::kTempAllocaAlignment), + make_const(Int(32), runtime::kTempAllocaAlignment), body); return body; } @@ -164,7 +164,7 @@ class BuiltinLower : public IRMutator { } else if (op->is_intrinsic(intrinsic::tvm_stack_make_array)) { return MakeArray(op, e); } else if (op->is_intrinsic(intrinsic::tvm_context_id)) { - return make_zero(op->dtype); + return make_zero(op->type); } else { return IRMutator::Mutate_(op, e); } @@ -177,10 +177,10 @@ class BuiltinLower : public IRMutator { op = expr.as(); for (size_t i = 0; i < op->args.size(); ++i) { prep_seq_.emplace_back( - Store::make(stack_shape_, cast(DataType::Int(64), op->args[i]), + Store::make(stack_shape_, cast(Int(64), op->args[i]), ConstInt32(stack_begin +i), const_true(1))); } - return AddressOffset(stack_shape_, DataType::Int(64), stack_begin); + return AddressOffset(stack_shape_, Int(64), stack_begin); } // make array Expr MakeArray(const Call* op, const Expr& e) { @@ -194,40 +194,40 @@ class BuiltinLower : public IRMutator { TVMStructSet(stack_array_, idx, intrinsic::kArrShape, op->args[1])); Expr strides = op->args[2]; if (!strides.defined() || is_zero(strides)) { - strides = make_zero(DataType::Handle()); + strides = make_zero(Handle()); } prep_seq_.emplace_back( TVMStructSet(stack_array_, idx, intrinsic::kArrStrides, strides)); prep_seq_.emplace_back( TVMStructSet(stack_array_, idx, intrinsic::kArrNDim, op->args[3])); - DataType dtype = op->args[4].dtype(); + Type dtype = op->args[4].type(); prep_seq_.emplace_back( TVMStructSet(stack_array_, idx, intrinsic::kArrTypeCode, - make_const(DataType::UInt(8), static_cast(dtype.code())))); + make_const(UInt(8), static_cast(dtype.code())))); prep_seq_.emplace_back( TVMStructSet(stack_array_, idx, intrinsic::kArrTypeBits, - make_const(DataType::UInt(8), dtype.bits()))); + make_const(UInt(8), dtype.bits()))); prep_seq_.emplace_back( TVMStructSet(stack_array_, idx, intrinsic::kArrTypeLanes, - make_const(DataType::UInt(16), dtype.lanes()))); + make_const(UInt(16), dtype.lanes()))); // set byte offset int data_bytes = GetVectorBytes(dtype); Expr byte_offset = op->args[5]; if (!is_zero(byte_offset)) { - byte_offset = byte_offset * make_const(byte_offset.dtype(), data_bytes); + byte_offset = byte_offset * make_const(byte_offset.type(), data_bytes); } prep_seq_.emplace_back( TVMStructSet(stack_array_, idx, intrinsic::kArrByteOffset, - cast(DataType::UInt(64), byte_offset))); + cast(UInt(64), byte_offset))); CHECK(device_type_.defined()) << "Unknown device type in current IR"; CHECK(device_id_.defined()) << "Unknown device id in current IR"; prep_seq_.emplace_back( TVMStructSet(stack_array_, idx, intrinsic::kArrDeviceId, - cast(DataType::Int(32), device_id_))); + cast(Int(32), device_id_))); prep_seq_.emplace_back( TVMStructSet(stack_array_, idx, intrinsic::kArrDeviceType, - cast(DataType::Int(32), device_type_))); - return TVMStructGet(DataType::Handle(), stack_array_, idx, intrinsic::kArrAddr); + cast(Int(32), device_type_))); + return TVMStructGet(Handle(), stack_array_, idx, intrinsic::kArrAddr); } // call packed. Expr MakeCallPacked(const Call* op, const Expr& e) { @@ -241,8 +241,8 @@ class BuiltinLower : public IRMutator { for (size_t i = 1; i < op->args.size(); ++i) { Expr stack_index = ConstInt32(arg_stack_begin + i - 1); Expr arg = op->args[i]; - DataType t = arg.dtype(); - DataType api_type = APIType(t); + Type t = arg.type(); + Type api_type = APIType(t); if (t != api_type) { arg = Cast::make(api_type, arg); } @@ -274,7 +274,7 @@ class BuiltinLower : public IRMutator { ConstInt32(arg_stack_begin + op->args.size() - 1) }; return Call::make( - DataType::Int(32), intrinsic::tvm_call_packed_lowered, + Int(32), intrinsic::tvm_call_packed_lowered, packed_args, Call::Intrinsic); } @@ -290,8 +290,8 @@ class BuiltinLower : public IRMutator { for (size_t i = 1; i < op->args.size(); ++i) { Expr stack_index = ConstInt32(arg_stack_begin + i - 1); Expr arg = op->args[i]; - DataType t = arg.dtype(); - DataType api_type = APIType(t); + Type t = arg.type(); + Type api_type = APIType(t); if (t != api_type) { arg = Cast::make(api_type, arg); } @@ -324,7 +324,7 @@ class BuiltinLower : public IRMutator { op->args[args_size - 1] }; return Call::make( - op->dtype, intrinsic::tvm_call_trace_packed_lowered, + op->type, intrinsic::tvm_call_trace_packed_lowered, packed_args, Call::Intrinsic); } diff --git a/src/pass/lower_warp_memory.cc b/src/pass/lower_warp_memory.cc index 0ed2b6232fc1..393605e85b8a 100644 --- a/src/pass/lower_warp_memory.cc +++ b/src/pass/lower_warp_memory.cc @@ -94,11 +94,11 @@ class WarpStoreCoeffFinder : private IRVisitor { /// Visitor implementation void Visit_(const Store *op) final { if (op->buffer_var.get() == buffer_) { - if (op->value.dtype().lanes() == 1) { + if (op->value.type().lanes() == 1) { UpdatePattern(op->index); } else { Expr base; - CHECK(GetRamp1Base(op->index, op->value.dtype().lanes(), &base)) + CHECK(GetRamp1Base(op->index, op->value.type().lanes(), &base)) << "LowerWarpMemory failed due to store index=" << op->index << ", can only handle continuous store"; UpdatePattern(base); @@ -196,7 +196,7 @@ class WarpAccessRewriter : protected IRMutator { int alloc_size = op->constant_allocation_size(); CHECK_GT(alloc_size, 0) << "warp memory only support constant alloc size"; - alloc_size *= op->dtype.lanes(); + alloc_size *= op->type.lanes(); warp_index_ = WarpIndexFinder(warp_size_).Find(op->body)->var; warp_coeff_ = WarpStoreCoeffFinder( buffer_, warp_index_, analyzer_).Find(op->body); @@ -205,8 +205,8 @@ class WarpAccessRewriter : protected IRMutator { warp_group_ = alloc_size / (warp_size_ * warp_coeff_); return Allocate::make( op->buffer_var, - op->dtype, - {make_const(DataType::Int(32), alloc_size / warp_size_)}, + op->type, + {make_const(Int(32), alloc_size / warp_size_)}, op->condition, this->Mutate(op->body)); } @@ -237,8 +237,8 @@ class WarpAccessRewriter : protected IRMutator { << "LowerWarpMemory failed to rewrite load to shuffle for index " << op->index << " local_index=" << local_index; Expr load_value = Load::make( - op->dtype, op->buffer_var, local_index, op->predicate); - return Call::make(load_value.dtype(), + op->type, op->buffer_var, local_index, op->predicate); + return Call::make(load_value.type(), intrinsic::tvm_warp_shuffle, {load_value, group}, Call::Intrinsic); @@ -252,15 +252,15 @@ class WarpAccessRewriter : protected IRMutator { // source index is the corresponding source index // in this access pattern. std::pair SplitIndexByGroup(const Expr& index) { - if (index.dtype().lanes() != 1) { + if (index.type().lanes() != 1) { Expr base, local_index, group; - CHECK(GetRamp1Base(index, index.dtype().lanes(), &base)); + CHECK(GetRamp1Base(index, index.type().lanes(), &base)); std::tie(local_index, group) = SplitIndexByGroup(base); local_index = - Ramp::make(local_index, make_const(local_index.dtype(), 1), index.dtype().lanes()); + Ramp::make(local_index, make_const(local_index.type(), 1), index.type().lanes()); return std::make_pair(local_index, group); } - Expr m = make_const(index.dtype(), warp_coeff_); + Expr m = make_const(index.type(), warp_coeff_); // simple case, warp index is on the highest. if (warp_group_ == 1) { @@ -269,9 +269,9 @@ class WarpAccessRewriter : protected IRMutator { return std::make_pair(x, z); } else { Expr x = analyzer_->canonical_simplify(indexmod(index, m)); - Expr y = index / make_const(index.dtype(), warp_coeff_ * warp_size_); + Expr y = index / make_const(index.type(), warp_coeff_ * warp_size_); y = y * m + x; - Expr z = indexdiv(indexmod(index, make_const(index.dtype(), warp_coeff_ * warp_size_)), + Expr z = indexdiv(indexmod(index, make_const(index.type(), warp_coeff_ * warp_size_)), m); return std::make_pair(analyzer_->canonical_simplify(y), analyzer_->canonical_simplify(z)); diff --git a/src/pass/make_api.cc b/src/pass/make_api.cc index 74b8f891299a..4d9c92bb428e 100644 --- a/src/pass/make_api.cc +++ b/src/pass/make_api.cc @@ -51,9 +51,9 @@ LoweredFunc MakeAPI(Stmt body, int num_packed_args = num_args - num_unpacked_args; // Data field definitions // The packed fields - Var v_packed_args("args", DataType::Handle()); - Var v_packed_arg_type_ids("arg_type_ids", DataType::Handle()); - Var v_num_packed_args("num_args", DataType::Int(32)); + Var v_packed_args("args", Handle()); + Var v_packed_arg_type_ids("arg_type_ids", Handle()); + Var v_num_packed_args("num_args", Int(32)); // The arguments of the function. Array args; // The device context @@ -66,12 +66,12 @@ LoweredFunc MakeAPI(Stmt body, // --------------------------- // local function definitions // load i-th argument as type t - auto f_arg_value = [&](DataType t, int i) { + auto f_arg_value = [&](Type t, int i) { Array call_args{v_packed_args, - IntImm::make(DataType::Int(32), i), - IntImm::make(DataType::Int(32), intrinsic::kTVMValueContent)}; + IntImm::make(Int(32), i), + IntImm::make(Int(32), intrinsic::kTVMValueContent)}; // load 64 bit version - DataType api_type = APIType(t); + Type api_type = APIType(t); Expr res = Call::make( api_type, intrinsic::tvm_struct_get, call_args, Call::PureIntrinsic); @@ -86,7 +86,7 @@ LoweredFunc MakeAPI(Stmt body, std::ostringstream os; os << "arg" << i; const Variable* v = api_args[i].as(); - return Var(os.str(), v ? v->dtype: DataType::Handle()); + return Var(os.str(), v ? v->type: Handle()); }; // --------------------------- // start of logics @@ -110,15 +110,14 @@ LoweredFunc MakeAPI(Stmt body, if (i < num_packed_args) { // Value loads seq_init.emplace_back(LetStmt::make( - v_arg, f_arg_value(v_arg.dtype(), i), nop)); + v_arg, f_arg_value(v_arg.type(), i), nop)); // type code checks - Var tcode(v_arg->name_hint + ".code", DataType::Int(32)); + Var tcode(v_arg->name_hint + ".code", Int(32)); seq_init.emplace_back(LetStmt::make( tcode, Load::make( - DataType::Int(32), v_packed_arg_type_ids, - IntImm::make(DataType::Int(32), i), const_true(1)), + Int(32), v_packed_arg_type_ids, IntImm::make(Int(32), i), const_true(1)), nop)); - DataType t = v_arg.dtype(); + Type t = v_arg.type(); if (t.is_handle()) { std::ostringstream msg; msg << name << ": Expect arg[" << i << "] to be pointer"; @@ -175,7 +174,7 @@ LoweredFunc MakeAPI(Stmt body, n->is_packed_func = num_unpacked_args == 0; n->is_restricted = is_restricted; body = AttrStmt::make( - make_zero(DataType::Int(32)), attr::compute_scope, + make_zero(Int(32)), attr::compute_scope, StringImm::make(name + "_compute_"), body); // Set device context if (vmap.count(device_id.get())) { @@ -187,7 +186,7 @@ LoweredFunc MakeAPI(Stmt body, node, attr::device_context_type, device_type, nop)); Stmt set_device = IfThenElse::make( device_type != kDLCPU, Evaluate::make(Call::make( - DataType::Int(32), intrinsic::tvm_call_packed, + Int(32), intrinsic::tvm_call_packed, {StringImm::make(runtime::symbol::tvm_set_device), device_type, device_id}, Call::Intrinsic))); body = Block::make(set_device, body); @@ -216,7 +215,7 @@ class DeviceTypeBinder: public IRMutator { if (op->attr_key == attr::device_context_type) { if (const Variable* var = op->value.as()) { var_ = var; - Expr value = make_const(op->value.dtype(), device_type_); + Expr value = make_const(op->value.type(), device_type_); Stmt body = IRMutator::Mutate_(op, s); var_ = nullptr; std::ostringstream os; @@ -246,14 +245,14 @@ class DeviceTypeBinder: public IRMutator { Expr res = IRMutator::Mutate_(op, e); op = res.as(); if (ir::Equal(op->a, op->b)) { - return make_const(op->dtype, false); + return make_const(op->type, false); } return res; } Expr Mutate_(const Variable* op, const Expr& e) final { if (op == var_) { - return make_const(op->dtype, device_type_); + return make_const(op->type, device_type_); } else { return e; } diff --git a/src/pass/narrow_channel_access.cc b/src/pass/narrow_channel_access.cc new file mode 100644 index 000000000000..13c4e5141e8d --- /dev/null +++ b/src/pass/narrow_channel_access.cc @@ -0,0 +1,244 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file narrow_channel_access.cc + * \brief Narrow channel access to a smaller range + * when possible by bringing it to the internal loop. + */ +#include +#include +#include +#include +#include +#include +#include +#include "ir_util.h" + +namespace tvm { +namespace ir { +using namespace arith; + +// Bound deducer for channel access. +class ChannelAccessBound : public IRVisitor { + public: + ChannelAccessBound(const Variable* buf_var, bool read_access) + : buf_var_(buf_var), read_access_(read_access) {} + + void Visit_(const Store* op) final { + if (!read_access_ && buf_var_ == op->buffer_var.get()) { + ret_.emplace_back(EvalSet(op->index, dom_map_)); + } + IRVisitor::Visit_(op); + } + void Visit_(const For* op) final { + CHECK(is_zero(op->min)); + // We know that the extent of the loop won't depend on relaxed scope. + // TODO(tqchen) have a verification pass. + dom_map_[op->loop_var.get()] = IntSet::interval(op->min, op->extent - 1); + IRVisitor::Visit_(op); + } + void Visit_(const Load* op) final { + if (read_access_ && buf_var_ == op->buffer_var.get()) { + ret_.emplace_back(EvalSet(op->index, dom_map_)); + } + IRVisitor::Visit_(op); + } + void Visit_(const Let* op) final { + LOG(FATAL) << "cannot pass through let"; + } + void Visit_(const LetStmt* op) final { + LOG(FATAL) << "cannot pass through let"; + } + IntSet Eval(const Stmt& stmt) { + Visit(stmt); + return Union(ret_); + } + + private: + // The buffer variable. + const Variable* buf_var_; + // read or write + bool read_access_{true}; + // Box + std::vector ret_; + // Domain map. + std::unordered_map dom_map_; +}; + +class ChannelAccessIndexRewriter : public IRMutator { + public: + ChannelAccessIndexRewriter(const Variable* buf_var, + Expr min, + bool read_access) + : buf_var_(buf_var), min_(min), read_access_(read_access) {} + Expr Mutate_(const Load* op, const Expr& e) final { + Expr expr = IRMutator::Mutate_(op, e); + op = expr.as(); + if (read_access_ && buf_var_ == op->buffer_var.get()) { + return Load::make( + op->type, op->buffer_var, ir::Simplify(op->index - min_), + op->predicate); + } else { + return expr; + } + } + Stmt Mutate_(const Store* op, const Stmt& s) final { + Stmt stmt = IRMutator::Mutate_(op, s); + op = stmt.as(); + if (!read_access_ && buf_var_ == op->buffer_var.get()) { + return Store::make( + op->buffer_var, op->value, ir::Simplify(op->index - min_), + op->predicate); + } else { + return stmt; + } + } + + private: + // The buffer variable. + const Variable* buf_var_; + // The min bound. + Expr min_; + // read or write + bool read_access_{true}; +}; + + +// Rewrite channel access pattern. +class ChannelAccessRewriter : public IRMutator { + public: + Stmt Mutate_(const AttrStmt* op, const Stmt& s) final { + Stmt ret; + const AttrStmt* adv = op->body.as(); + if ((op->attr_key == ir::attr::channel_read_scope && + adv && adv->attr_key == ir::attr::channel_read_advance) || + (op->attr_key == ir::attr::channel_write_scope && + adv && adv->attr_key == ir::attr::channel_write_advance)) { + RewriteEntry e; + e.window = op; + e.advance = adv; + e.read_access = op->attr_key == ir::attr::channel_read_scope; + tasks_.push_back(e); + ret = IRMutator::Mutate_(op, s); + if (tasks_.back().rewrite_success) { + ret = ret.as()->body.as()->body; + } + tasks_.pop_back(); + return ret; + } else { + return IRMutator::Mutate_(op, s); + } + } + + Stmt Mutate_(const For* op, const Stmt& s) final { + std::vector tasks; + std::swap(tasks_, tasks); + Stmt body = op->body; + std::vector nest; + for (RewriteEntry& e : tasks) { + body = RewriteAccess(op, body, &e, &nest); + } + + if (!body.same_as(op->body)) { + body = Mutate(body); + body = For::make( + op->loop_var, op->min, op->extent, + op->for_type, op->device_api, body); + body = MergeNest(nest, body); + } else { + CHECK_EQ(nest.size(), 0U); + body = IRMutator::Mutate_(op, s); + } + std::swap(tasks_, tasks); + return body; + } + + private: + struct RewriteEntry { + bool read_access; + const AttrStmt* window; + const AttrStmt* advance; + bool rewrite_success{false}; + }; + + Stmt RewriteAccess(const For* for_op, + Stmt body, + RewriteEntry* e, + std::vector* outer_nest) { + const AttrStmt* adv_op = e->advance; + const Expr& window = e->window->value; + bool read_access = e->read_access; + Var var(for_op->loop_var); + Channel ch = Downcast(adv_op->node); + ChannelAccessBound acc(ch->handle_var.get(), read_access); + IntSet iset = acc.Eval(for_op->body); + Range r = iset.cover_range(Range::make_by_min_extent(0, window)); + r = Range::make_by_min_extent( + ir::Simplify(r->min), ir::Simplify(r->extent)); + if (ExprUseVar(r->extent, var)) return body; + Array linear_eq = DetectLinearEquation(r->min, {var}); + if (linear_eq.size() == 0) return body; + Expr coeff = linear_eq[0]; + Expr base = linear_eq[1]; + if (!is_zero(base)) return body; + Expr left = ir::Simplify(adv_op->value - coeff * for_op->extent); + if (!analyzer_.CanProve(left >= 0)) return body; + // rewrite access index. + ChannelAccessIndexRewriter rw( + ch->handle_var.get(), var * coeff, read_access); + body = rw.Mutate(body); + + if (read_access) { + body = AttrStmt::make( + ch, ir::attr::channel_read_scope, r->extent, + AttrStmt::make(ch, ir::attr::channel_read_advance, coeff, + body)); + } else { + body = AttrStmt::make( + ch, ir::attr::channel_write_scope, r->extent, + AttrStmt::make(ch, ir::attr::channel_write_advance, coeff, + body)); + } + + if (!is_zero(left)) { + Stmt no_op = Evaluate::make(0); + if (read_access) { + outer_nest->emplace_back( + AttrStmt::make(ch, ir::attr::channel_read_advance, left, no_op)); + } else { + outer_nest->emplace_back( + AttrStmt::make(ch, ir::attr::channel_write_advance, left, no_op)); + } + } + + e->rewrite_success = true; + return body; + } + + arith::Analyzer analyzer_; + std::vector tasks_; +}; + +Stmt NarrowChannelAccess(Stmt stmt) { + return ChannelAccessRewriter().Mutate(stmt); +} + +} // namespace ir +} // namespace tvm diff --git a/src/pass/rewrite_unsafe_select.cc b/src/pass/rewrite_unsafe_select.cc index 43e3005aef64..25ed03963524 100644 --- a/src/pass/rewrite_unsafe_select.cc +++ b/src/pass/rewrite_unsafe_select.cc @@ -115,12 +115,12 @@ class UnsafeSelectRewriter : public IRMutator { Expr expr = IRMutator::Mutate_(op, e); op = expr.as