diff --git a/apps/bundle_deploy/bundle.c b/apps/bundle_deploy/bundle.c index 29712de9c409..098ac994223e 100644 --- a/apps/bundle_deploy/bundle.c +++ b/apps/bundle_deploy/bundle.c @@ -123,3 +123,9 @@ tvm_crt_error_t TVMPlatformMemoryAllocate(size_t num_bytes, DLContext ctx, void* tvm_crt_error_t TVMPlatformMemoryFree(void* ptr, DLContext ctx) { return g_memory_manager->Free(g_memory_manager, ptr, ctx); } + +tvm_crt_error_t TVMPlatformTimerStart() { return kTvmErrorFunctionCallNotImplemented; } + +tvm_crt_error_t TVMPlatformTimerStop(double* elapsed_time_seconds) { + return kTvmErrorFunctionCallNotImplemented; +} diff --git a/apps/bundle_deploy/bundle_static.c b/apps/bundle_deploy/bundle_static.c index 7ac95fdf2e75..c4b637c3fc7b 100644 --- a/apps/bundle_deploy/bundle_static.c +++ b/apps/bundle_deploy/bundle_static.c @@ -124,3 +124,9 @@ tvm_crt_error_t TVMPlatformMemoryAllocate(size_t num_bytes, DLContext ctx, void* tvm_crt_error_t TVMPlatformMemoryFree(void* ptr, DLContext ctx) { return g_memory_manager->Free(g_memory_manager, ptr, ctx); } + +tvm_crt_error_t TVMPlatformTimerStart() { return kTvmErrorFunctionCallNotImplemented; } + +tvm_crt_error_t TVMPlatformTimerStop(double* elapsed_time_seconds) { + return kTvmErrorFunctionCallNotImplemented; +} diff --git a/include/tvm/runtime/c_runtime_api.h b/include/tvm/runtime/c_runtime_api.h index aac49c198c72..467e69a60827 100644 --- a/include/tvm/runtime/c_runtime_api.h +++ b/include/tvm/runtime/c_runtime_api.h @@ -539,6 +539,13 @@ TVM_DLL int TVMObjectRetain(TVMObjectHandle obj); */ TVM_DLL int TVMObjectFree(TVMObjectHandle obj); +/*! + * \brief Free a TVMByteArray returned from TVMFuncCall, and associated memory. + * \param arr The TVMByteArray instance. + * \return 0 on success, -1 on failure. + */ +TVM_DLL int TVMByteArrayFree(TVMByteArray* arr); + /*! * \brief Allocate a data space on device. * \param ctx The device context to perform operation. diff --git a/include/tvm/runtime/crt/error_codes.h b/include/tvm/runtime/crt/error_codes.h index 41d727de97bb..75e49e63e094 100644 --- a/include/tvm/runtime/crt/error_codes.h +++ b/include/tvm/runtime/crt/error_codes.h @@ -44,6 +44,7 @@ typedef enum { kTvmErrorCategoryGenerated = 6, kTvmErrorCategoryGraphRuntime = 7, kTvmErrorCategoryFunctionCall = 8, + kTvmErrorCategoryTimeEvaluator = 9, } tvm_crt_error_category_t; typedef enum { @@ -77,6 +78,7 @@ typedef enum { kTvmErrorPlatformMemoryManagerInitialized = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryPlatform, 1), kTvmErrorPlatformShutdown = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryPlatform, 2), kTvmErrorPlatformNoMemory = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryPlatform, 3), + kTvmErrorPlatformTimerBadState = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryPlatform, 4), // Common error codes returned from generated functions. kTvmErrorGeneratedInvalidStorageId = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryGenerated, 0), @@ -91,6 +93,9 @@ typedef enum { kTvmErrorFunctionCallWrongArgType = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryFunctionCall, 1), kTvmErrorFunctionCallNotImplemented = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryFunctionCall, 2), + // Time Evaluator - times functions for use with debug runtime. + kTvmErrorTimeEvaluatorBadHandle = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryTimeEvaluator, 0), + // System errors are always negative integers; this mask indicates presence of a system error. // Cast tvm_crt_error_t to a signed integer to interpret the negative error code. kTvmErrorSystemErrorMask = (1 << (sizeof(int) * 4 - 1)), diff --git a/include/tvm/runtime/crt/platform.h b/include/tvm/runtime/crt/platform.h index 12dcdc5927fd..8e0383912f50 100644 --- a/include/tvm/runtime/crt/platform.h +++ b/include/tvm/runtime/crt/platform.h @@ -78,6 +78,25 @@ tvm_crt_error_t TVMPlatformMemoryAllocate(size_t num_bytes, DLContext ctx, void* * \return kTvmErrorNoError if successful; a descriptive error code otherwise. */ tvm_crt_error_t TVMPlatformMemoryFree(void* ptr, DLContext ctx); + +/*! \brief Start a device timer. + * + * The device timer used must not be running. + * + * \return kTvmErrorNoError if successful; a descriptive error code otherwise. + */ +tvm_crt_error_t TVMPlatformTimerStart(); + +/*! \brief Stop the running device timer and get the elapsed time (in microseconds). + * + * The device timer used must be running. + * + * \param elapsed_time_seconds Pointer to write elapsed time into. + * + * \return kTvmErrorNoError if successful; a descriptive error code otherwise. + */ +tvm_crt_error_t TVMPlatformTimerStop(double* elapsed_time_seconds); + #ifdef __cplusplus } // extern "C" #endif diff --git a/python/tvm/micro/session.py b/python/tvm/micro/session.py index 0f2f09a83652..717b6e480671 100644 --- a/python/tvm/micro/session.py +++ b/python/tvm/micro/session.py @@ -23,6 +23,7 @@ from ..error import register_error from .._ffi import get_global_func from ..contrib import graph_runtime +from ..contrib.debugger import debug_runtime from ..rpc import RPCSession from .transport import IoTimeoutError from .transport import TransportLogger diff --git a/src/runtime/c_runtime_api.cc b/src/runtime/c_runtime_api.cc index 299f2826f7d7..6ecc60a93dec 100644 --- a/src/runtime/c_runtime_api.cc +++ b/src/runtime/c_runtime_api.cc @@ -420,6 +420,15 @@ int TVMFuncFree(TVMFunctionHandle func) { API_END(); } +int TVMByteArrayFree(TVMByteArray* arr) { + if (arr == &TVMAPIRuntimeStore::Get()->ret_bytes) { + return 0; // Thread-local storage does not need explicit deleting. + } + + delete arr; + return 0; +} + int TVMFuncCall(TVMFunctionHandle func, TVMValue* args, int* arg_type_codes, int num_args, TVMValue* ret_val, int* ret_type_code) { API_BEGIN(); diff --git a/src/runtime/crt/common/crt_runtime_api.c b/src/runtime/crt/common/crt_runtime_api.c index ac2b99a2c783..079197bc7507 100644 --- a/src/runtime/crt/common/crt_runtime_api.c +++ b/src/runtime/crt/common/crt_runtime_api.c @@ -110,6 +110,9 @@ static const TVMModule* registered_modules[TVM_CRT_MAX_REGISTERED_MODULES]; /*! \brief Passed as `module_index` to EncodeFunctionHandle. */ static const tvm_module_index_t kGlobalFuncModuleIndex = TVM_CRT_MAX_REGISTERED_MODULES; +/*! \brief Special module handle for retur values from RPCTimeEvaluator. */ +static const tvm_module_index_t kTimeEvaluatorModuleIndex = 0x7fff; + static int DecodeModuleHandle(TVMModuleHandle handle, tvm_module_index_t* out_module_index) { tvm_module_index_t module_index; @@ -185,13 +188,15 @@ static int DecodeFunctionHandle(TVMFunctionHandle handle, tvm_module_index_t* mo (tvm_module_index_t)(((uintptr_t)handle) >> (sizeof(tvm_function_index_t) * 8)); unvalidated_module_index &= ~0x8000; - if (unvalidated_module_index > kGlobalFuncModuleIndex) { - TVMAPIErrorf("invalid module handle: index=%08x", unvalidated_module_index); - return -1; - } else if (unvalidated_module_index < kGlobalFuncModuleIndex && - registered_modules[unvalidated_module_index] == NULL) { - TVMAPIErrorf("unregistered module: index=%08x", unvalidated_module_index); - return -1; + if (unvalidated_module_index != kTimeEvaluatorModuleIndex) { + if (unvalidated_module_index > kGlobalFuncModuleIndex) { + TVMAPIErrorf("invalid module handle: index=%08x", unvalidated_module_index); + return -1; + } else if (unvalidated_module_index < kGlobalFuncModuleIndex && + registered_modules[unvalidated_module_index] == NULL) { + TVMAPIErrorf("unregistered module: index=%08x", unvalidated_module_index); + return -1; + } } *function_index = ((uint32_t)((uintptr_t)handle)) & ~0x8000; @@ -199,6 +204,20 @@ static int DecodeFunctionHandle(TVMFunctionHandle handle, tvm_module_index_t* mo return 0; } +int TVMByteArrayFree(TVMByteArray* arr) { + DLContext ctx = {kDLCPU, 0}; + int to_return = TVMPlatformMemoryFree((void*)arr->data, ctx); + if (to_return != 0) { + return to_return; + } + + return TVMPlatformMemoryFree((void*)arr, ctx); +} + +tvm_crt_error_t RunTimeEvaluator(tvm_function_index_t function_index, TVMValue* args, + int* type_codes, int num_args, TVMValue* ret_val, + int* ret_type_code); + int TVMFuncCall(TVMFunctionHandle func_handle, TVMValue* arg_values, int* type_codes, int num_args, TVMValue* ret_val, int* ret_type_code) { tvm_module_index_t module_index; @@ -211,7 +230,10 @@ int TVMFuncCall(TVMFunctionHandle func_handle, TVMValue* arg_values, int* type_c return -1; } - if (module_index == kGlobalFuncModuleIndex) { + if (module_index == kTimeEvaluatorModuleIndex) { + return RunTimeEvaluator(function_index, arg_values, type_codes, num_args, ret_val, + ret_type_code); + } else if (module_index == kGlobalFuncModuleIndex) { resource_handle = NULL; registry = &global_func_registry.registry; } else { @@ -315,6 +337,8 @@ int TVMFuncFree(TVMFunctionHandle func) { return 0; } +int RPCTimeEvaluator(TVMValue* args, int* type_codes, int num_args, TVMValue* ret_val, + int* ret_type_code); tvm_crt_error_t TVMInitializeRuntime() { int idx = 0; tvm_crt_error_t error = kTvmErrorNoError; @@ -351,6 +375,10 @@ tvm_crt_error_t TVMInitializeRuntime() { error = TVMFuncRegisterGlobal("tvm.rpc.server.ModuleGetFunction", &ModuleGetFunction, 0); } + if (error == kTvmErrorNoError) { + error = TVMFuncRegisterGlobal("runtime.RPCTimeEvaluator", &RPCTimeEvaluator, 0); + } + if (error != kTvmErrorNoError) { TVMPlatformMemoryFree(registry_backing_memory, ctx); TVMPlatformMemoryFree(func_registry_memory, ctx); @@ -358,3 +386,109 @@ tvm_crt_error_t TVMInitializeRuntime() { return error; } + +typedef struct { + uint16_t function_index; + TVMFunctionHandle func_to_time; + TVMContext ctx; + int number; + int repeat; + int min_repeat_ms; +} time_evaluator_state_t; + +static time_evaluator_state_t g_time_evaluator_state; + +int RPCTimeEvaluator(TVMValue* args, int* type_codes, int num_args, TVMValue* ret_val, + int* ret_type_code) { + ret_val[0].v_handle = NULL; + ret_type_code[0] = kTVMNullptr; + if (num_args < 8) { + TVMAPIErrorf("not enough args"); + return kTvmErrorFunctionCallNumArguments; + } + if (type_codes[0] != kTVMModuleHandle || type_codes[1] != kTVMStr || + type_codes[2] != kTVMArgInt || type_codes[3] != kTVMArgInt || type_codes[4] != kTVMArgInt || + type_codes[5] != kTVMArgInt || type_codes[6] != kTVMArgInt || type_codes[7] != kTVMStr) { + TVMAPIErrorf("one or more invalid arg types"); + return kTvmErrorFunctionCallWrongArgType; + } + + TVMModuleHandle mod = (TVMModuleHandle)args[0].v_handle; + const char* name = args[1].v_str; + g_time_evaluator_state.ctx.device_type = args[2].v_int64; + g_time_evaluator_state.ctx.device_id = args[3].v_int64; + g_time_evaluator_state.number = args[4].v_int64; + g_time_evaluator_state.repeat = args[5].v_int64; + g_time_evaluator_state.min_repeat_ms = args[6].v_int64; + + int ret_code = + TVMModGetFunction(mod, name, /* query_imports */ 0, &g_time_evaluator_state.func_to_time); + if (ret_code != 0) { + return ret_code; + } + + g_time_evaluator_state.function_index++; + ret_val[0].v_handle = + EncodeFunctionHandle(kTimeEvaluatorModuleIndex, g_time_evaluator_state.function_index); + ret_type_code[0] = kTVMPackedFuncHandle; + return kTvmErrorNoError; +} + +tvm_crt_error_t RunTimeEvaluator(tvm_function_index_t function_index, TVMValue* args, + int* type_codes, int num_args, TVMValue* ret_val, + int* ret_type_code) { + if (function_index != g_time_evaluator_state.function_index) { + return kTvmErrorTimeEvaluatorBadHandle; + } + + // TODO(areusch): should *really* rethink needing to return doubles + DLContext result_byte_ctx = {kDLCPU, 0}; + TVMByteArray* result_byte_arr; + tvm_crt_error_t err = + TVMPlatformMemoryAllocate(sizeof(TVMByteArray), result_byte_ctx, (void*)&result_byte_arr); + if (err != kTvmErrorNoError) { + return err; + } + size_t data_size = sizeof(double) * g_time_evaluator_state.repeat; + err = TVMPlatformMemoryAllocate(data_size, result_byte_ctx, (void*)&result_byte_arr->data); + if (err != kTvmErrorNoError) { + return err; + } + result_byte_arr->size = data_size; + double min_repeat_seconds = ((double)g_time_evaluator_state.min_repeat_ms) / 1000; + double* iter = (double*)result_byte_arr->data; + for (int i = 0; i < g_time_evaluator_state.repeat; i++) { + double repeat_res_seconds = 0.0; + int exec_count = 0; + // do-while structure ensures we run even when `min_repeat_ms` isn't set (i.e., is 0). + do { + tvm_crt_error_t ret_code = TVMPlatformTimerStart(); + if (ret_code != kTvmErrorNoError) { + return ret_code; + } + + for (int j = 0; j < g_time_evaluator_state.number; j++) { + ret_code = TVMFuncCall(g_time_evaluator_state.func_to_time, args, type_codes, num_args, + ret_val, ret_type_code); + if (ret_code != 0) { + return ret_code; + } + } + exec_count += g_time_evaluator_state.number; + + double curr_res_seconds; + ret_code = TVMPlatformTimerStop(&curr_res_seconds); + if (ret_code != kTvmErrorNoError) { + return ret_code; + } + repeat_res_seconds += curr_res_seconds; + } while (repeat_res_seconds < min_repeat_seconds); + double mean_exec_seconds = repeat_res_seconds / exec_count; + *iter = mean_exec_seconds; + iter++; + } + + *ret_type_code = kTVMBytes; + ret_val->v_handle = result_byte_arr; + return kTvmErrorNoError; +} diff --git a/src/runtime/crt/host/crt_config.h b/src/runtime/crt/host/crt_config.h index 689189629542..109abaf04083 100644 --- a/src/runtime/crt/host/crt_config.h +++ b/src/runtime/crt/host/crt_config.h @@ -43,7 +43,7 @@ #define TVM_CRT_MAX_REGISTERED_MODULES 2 /*! Size of the global function registry, in bytes. */ -#define TVM_CRT_GLOBAL_FUNC_REGISTRY_SIZE_BYTES 200 +#define TVM_CRT_GLOBAL_FUNC_REGISTRY_SIZE_BYTES 256 /*! Maximum packet size, in bytes, including the length header. */ #define TVM_CRT_MAX_PACKET_SIZE_BYTES 64000 diff --git a/src/runtime/crt/host/main.cc b/src/runtime/crt/host/main.cc index ba43e8444f9d..7db17f50ccbf 100644 --- a/src/runtime/crt/host/main.cc +++ b/src/runtime/crt/host/main.cc @@ -68,29 +68,30 @@ tvm_crt_error_t TVMPlatformMemoryFree(void* ptr, DLContext ctx) { return memory_manager->Free(memory_manager, ptr, ctx); } -high_resolution_clock::time_point g_utvm_start_time; +steady_clock::time_point g_utvm_start_time; int g_utvm_timer_running = 0; -int TVMPlatformTimerStart() { +tvm_crt_error_t TVMPlatformTimerStart() { if (g_utvm_timer_running) { std::cerr << "timer already running" << std::endl; - return -1; + return kTvmErrorPlatformTimerBadState; } - g_utvm_start_time = high_resolution_clock::now(); + g_utvm_start_time = std::chrono::steady_clock::now(); g_utvm_timer_running = 1; - return 0; + return kTvmErrorNoError; } -int TVMPlatformTimerStop(double* res_us) { +tvm_crt_error_t TVMPlatformTimerStop(double* elapsed_time_seconds) { if (!g_utvm_timer_running) { std::cerr << "timer not running" << std::endl; - return -1; + return kTvmErrorPlatformTimerBadState; } - auto utvm_stop_time = high_resolution_clock::now(); - duration time_span(utvm_stop_time - g_utvm_start_time); - *res_us = time_span.count(); + auto utvm_stop_time = std::chrono::steady_clock::now(); + std::chrono::microseconds time_span = + std::chrono::duration_cast(utvm_stop_time - g_utvm_start_time); + *elapsed_time_seconds = static_cast(time_span.count()) / 1e6; g_utvm_timer_running = 0; - return 0; + return kTvmErrorNoError; } } diff --git a/src/runtime/graph/debug/graph_runtime_debug.cc b/src/runtime/graph/debug/graph_runtime_debug.cc index d02a6d9a0d64..3353c117318b 100644 --- a/src/runtime/graph/debug/graph_runtime_debug.cc +++ b/src/runtime/graph/debug/graph_runtime_debug.cc @@ -58,56 +58,106 @@ class GraphRuntimeDebug : public GraphRuntime { std::string RunIndividual(int number, int repeat, int min_repeat_ms) { // warmup run GraphRuntime::Run(); - std::ostringstream os; + std::string tkey = module_->type_key(); std::vector time_per_op(op_execs_.size(), 0); - for (int i = 0; i < repeat; ++i) { - std::chrono::time_point tbegin, - tend; - double duration_ms = 0.0; - do { - std::fill(time_per_op.begin(), time_per_op.end(), 0); - if (duration_ms > 0.0) { - number = static_cast(std::max((min_repeat_ms / (duration_ms / number) + 1), - number * 1.618)); // 1.618 is chosen by random - } - tbegin = std::chrono::high_resolution_clock::now(); - for (int k = 0; k < number; k++) { - for (size_t index = 0; index < op_execs_.size(); ++index) { - if (op_execs_[index]) { - const TVMContext& ctx = data_entry_[entry_id(index, 0)]->ctx; - auto op_tbegin = std::chrono::high_resolution_clock::now(); - op_execs_[index](); - TVMSynchronize(ctx.device_type, ctx.device_id, nullptr); - auto op_tend = std::chrono::high_resolution_clock::now(); - double op_duration = - std::chrono::duration_cast >(op_tend - op_tbegin) - .count(); - time_per_op[index] += op_duration * 1e6; // us + if (tkey == "rpc") { + // RPC modules rely on remote timing which implements the logic from the else branch. + for (size_t index = 0; index < op_execs_.size(); ++index) { + time_per_op[index] += RunOpRPC(index, number, repeat, min_repeat_ms); + } + } else { + for (int i = 0; i < repeat; ++i) { + std::chrono::time_point + tbegin, tend; + double duration_ms = 0.0; + do { + std::fill(time_per_op.begin(), time_per_op.end(), 0); + if (duration_ms > 0.0) { + number = static_cast(std::max((min_repeat_ms / (duration_ms / number) + 1), + number * 1.618)); // 1.618 is chosen by random + } + tbegin = std::chrono::high_resolution_clock::now(); + for (int k = 0; k < number; k++) { + for (size_t index = 0; index < op_execs_.size(); ++index) { + if (op_execs_[index]) { + time_per_op[index] += RunOpHost(index); + } } } - } - tend = std::chrono::high_resolution_clock::now(); - duration_ms = - std::chrono::duration_cast >(tend - tbegin).count() * - 1000; - } while (duration_ms < min_repeat_ms); - - LOG(INFO) << "Iteration: " << i; - int op = 0; - for (size_t index = 0; index < time_per_op.size(); index++) { - if (op_execs_[index]) { - time_per_op[index] /= number; - LOG(INFO) << "Op #" << op++ << " " << GetNodeName(index) << ": " << time_per_op[index] - << " us/iter"; + tend = std::chrono::high_resolution_clock::now(); + duration_ms = + std::chrono::duration_cast >(tend - tbegin).count() * + 1000; + } while (duration_ms < min_repeat_ms); + + LOG(INFO) << "Iteration: " << i; + int op = 0; + for (size_t index = 0; index < time_per_op.size(); index++) { + if (op_execs_[index]) { + time_per_op[index] /= number; + LOG(INFO) << "Op #" << op++ << " " << GetNodeName(index) << ": " << time_per_op[index] + << " us/iter"; + } } } } + + std::ostringstream os; for (size_t index = 0; index < time_per_op.size(); index++) { os << time_per_op[index] << ","; } return os.str(); } + double RunOpRPC(int index, int number, int repeat, int min_repeat_ms) { + const TVMContext& ctx = data_entry_[entry_id(index, 0)]->ctx; + TVMOpParam param = nodes_[index].param; + std::string name = param.func_name; + uint32_t num_inputs = param.num_inputs; + uint32_t num_outputs = param.num_outputs; + + PackedFunc time_eval = runtime::Registry::Get("runtime.RPCTimeEvaluator") + -> + operator()(module_, name, static_cast(ctx.device_type), + ctx.device_id, number, repeat, min_repeat_ms, ""); + + int num_flat_args = num_inputs + num_outputs; + std::unique_ptr values(new TVMValue[num_flat_args]); + std::unique_ptr type_codes(new int[num_flat_args]); + TVMArgsSetter setter(values.get(), type_codes.get()); + int offs = 0; + const auto& inode = nodes_[index]; + for (const auto& e : inode.inputs) { + uint32_t eid = this->entry_id(e); + DLTensor* arg = const_cast(data_entry_[eid].operator->()); + setter(offs, arg); + offs++; + } + for (uint32_t i = 0; i < num_outputs; ++i) { + uint32_t eid = this->entry_id(index, i); + DLTensor* arg = const_cast(data_entry_[eid].operator->()); + setter(offs, arg); + offs++; + } + TVMRetValue rv; + time_eval.CallPacked(TVMArgs(values.get(), type_codes.get(), num_flat_args), &rv); + std::string results = rv.operator std::string(); + const double* results_arr = reinterpret_cast(results.data()); + LOG(INFO) << "Got op timing: " << results_arr[0]; + return results_arr[0]; + } + + double RunOpHost(int index) { + auto op_tbegin = std::chrono::high_resolution_clock::now(); + op_execs_[index](); + const TVMContext& ctx = data_entry_[entry_id(index, 0)]->ctx; + TVMSynchronize(ctx.device_type, ctx.device_id, nullptr); + auto op_tend = std::chrono::high_resolution_clock::now(); + double op_duration = + std::chrono::duration_cast >(op_tend - op_tbegin).count(); + return op_duration; + } + /*! * \brief Run each operation and get the output. * \param index The index of op which needs to be returned. diff --git a/src/runtime/minrpc/minrpc_server.h b/src/runtime/minrpc/minrpc_server.h index 62f7236b8e2a..d28e0c396e36 100644 --- a/src/runtime/minrpc/minrpc_server.h +++ b/src/runtime/minrpc/minrpc_server.h @@ -156,6 +156,7 @@ class MinRPCServer { } else if (rv_tcode == kTVMBytes) { ret_tcode[1] = kTVMBytes; this->ReturnPackedSeq(ret_value, ret_tcode, 2); + TVMByteArrayFree(reinterpret_cast(ret_value[1].v_handle)); // NOLINT(*) } else if (rv_tcode == kTVMPackedFuncHandle || rv_tcode == kTVMModuleHandle) { ret_tcode[1] = kTVMOpaqueHandle; this->ReturnPackedSeq(ret_value, ret_tcode, 2); diff --git a/tests/micro/qemu/test_zephyr.py b/tests/micro/qemu/test_zephyr.py index 2213203d55c1..ccd8e3dd70c1 100644 --- a/tests/micro/qemu/test_zephyr.py +++ b/tests/micro/qemu/test_zephyr.py @@ -143,5 +143,32 @@ def test_basic_add(sess): test_basic_add(sess) +def test_platform_timer(platform): + """Test compiling the on-device runtime.""" + + model, zephyr_board = PLATFORMS[platform] + + # NOTE: run test in a nested function so cPython will delete arrays before closing the session. + def test_basic_add(sess): + A_data = tvm.nd.array(np.array([2, 3], dtype="int8"), ctx=sess.context) + assert (A_data.asnumpy() == np.array([2, 3])).all() + B_data = tvm.nd.array(np.array([4], dtype="int8"), ctx=sess.context) + assert (B_data.asnumpy() == np.array([4])).all() + C_data = tvm.nd.array(np.array([0, 0], dtype="int8"), ctx=sess.context) + assert (C_data.asnumpy() == np.array([0, 0])).all() + + system_lib = sess.get_system_lib() + time_eval_f = system_lib.time_evaluator( + "add", sess.context, number=20, repeat=3, min_repeat_ms=40 + ) + result = time_eval_f(A_data, B_data, C_data) + assert (C_data.asnumpy() == np.array([6, 7])).all() + assert result.mean > 0 + assert len(result.results) == 3 + + with _make_add_sess(model, zephyr_board) as sess: + test_basic_add(sess) + + if __name__ == "__main__": sys.exit(pytest.main([os.path.dirname(__file__)] + sys.argv[1:])) diff --git a/tests/micro/qemu/zephyr-runtime/src/main.c b/tests/micro/qemu/zephyr-runtime/src/main.c index 86b2b273647f..9d10504dcbed 100644 --- a/tests/micro/qemu/zephyr-runtime/src/main.c +++ b/tests/micro/qemu/zephyr-runtime/src/main.c @@ -99,10 +99,10 @@ int g_utvm_timer_running = 0; static struct device* led_pin; #endif // CONFIG_LED -int TVMPlatformTimerStart() { +tvm_crt_error_t TVMPlatformTimerStart() { if (g_utvm_timer_running) { TVMLogf("timer already running"); - return -1; + return kTvmErrorPlatformTimerBadState; } #ifdef CONFIG_LED @@ -111,13 +111,13 @@ int TVMPlatformTimerStart() { k_timer_start(&g_utvm_timer, TIME_TIL_EXPIRY, TIME_TIL_EXPIRY); g_utvm_start_time = k_cycle_get_32(); g_utvm_timer_running = 1; - return 0; + return kTvmErrorNoError; } -int TVMPlatformTimerStop(double* res_us) { +tvm_crt_error_t TVMPlatformTimerStop(double* elapsed_time_seconds) { if (!g_utvm_timer_running) { TVMLogf("timer not running"); - return -1; + return kTvmErrorPlatformTimerBadState; } uint32_t stop_time = k_cycle_get_32(); @@ -134,7 +134,7 @@ int TVMPlatformTimerStop(double* res_us) { } uint32_t ns_spent = (uint32_t)k_cyc_to_ns_floor64(cycles_spent); - double hw_clock_res_us = ns_spent / 1000.0; + double hw_clock_elapsed_seconds = ns_spent / 1e9; // need to grab time remaining *before* stopping. when stopped, this function // always returns 0. @@ -152,13 +152,13 @@ int TVMPlatformTimerStop(double* res_us) { // if we approach the limits of the HW clock datatype (uint32_t), use the // coarse-grained timer result instead if (approx_num_cycles > (0.5 * (~((uint32_t)0)))) { - *res_us = timer_res_ms * 1000.0; + *elapsed_time_seconds = timer_res_ms / 1e3; } else { - *res_us = hw_clock_res_us; + *elapsed_time_seconds = hw_clock_elapsed_seconds; } g_utvm_timer_running = 0; - return 0; + return kTvmErrorNoError; } #define RING_BUF_SIZE 512 diff --git a/tests/python/unittest/test_crt.py b/tests/python/unittest/test_crt.py index 07a4cfcd8b7d..b1bc181348e7 100644 --- a/tests/python/unittest/test_crt.py +++ b/tests/python/unittest/test_crt.py @@ -25,8 +25,10 @@ import textwrap import numpy as np +import pytest import tvm +import tvm.testing import tvm.relay import tvm.testing @@ -173,8 +175,27 @@ def test_std_math_functions(): np.testing.assert_allclose(B_data.asnumpy(), np.array([7.389056, 20.085537])) +@tvm.testing.requires_micro +def test_platform_timer(): + """Verify the platform timer can be used to time remote functions.""" + import tvm.micro + + workspace = tvm.micro.Workspace() + A = tvm.te.placeholder((2,), dtype="float32", name="A") + B = tvm.te.compute(A.shape, lambda i: tvm.te.exp(A[i]), name="B") + s = tvm.te.create_schedule(B.op) + + with _make_sess_from_op(workspace, "myexpf", s, [A, B]) as sess: + A_data = tvm.nd.array(np.array([2.0, 3.0], dtype="float32"), ctx=sess.context) + B_data = tvm.nd.array(np.array([2.0, 3.0], dtype="float32"), ctx=sess.context) + lib = sess.get_system_lib() + time_eval_f = lib.time_evaluator( + "myexpf", sess.context, number=2000, repeat=3, min_repeat_ms=40 + ) + result = time_eval_f(A_data, B_data) + assert result.mean > 0 + assert len(result.results) == 3 + + if __name__ == "__main__": - test_compile_runtime() - test_reset() - test_graph_runtime() - test_std_math_functions() + sys.exit(pytest.main([__file__] + sys.argv[1:]))