From 75c1c0adcffb4d6c9ff55718a0d798cc1d3f936c Mon Sep 17 00:00:00 2001 From: Mehrdad Hessar Date: Tue, 30 Mar 2021 13:16:50 -0700 Subject: [PATCH 01/12] fix rpc for microtvm --- src/runtime/crt/common/crt_runtime_api.c | 24 ++++++++++-- src/runtime/crt/host/crt_config.h | 5 ++- src/runtime/crt/host/main.cc | 2 +- src/runtime/rpc/rpc_endpoint.cc | 47 ++++++++++++++++++++---- src/runtime/rpc/rpc_endpoint.h | 3 ++ 5 files changed, 68 insertions(+), 13 deletions(-) diff --git a/src/runtime/crt/common/crt_runtime_api.c b/src/runtime/crt/common/crt_runtime_api.c index c8044b49a8d0..c53c8cad8119 100644 --- a/src/runtime/crt/common/crt_runtime_api.c +++ b/src/runtime/crt/common/crt_runtime_api.c @@ -298,8 +298,14 @@ static tvm_crt_error_t FindFunctionOrSetAPIError(tvm_module_index_t module_index } int TVMFuncGetGlobal(const char* name, TVMFunctionHandle* out) { - return FindFunctionOrSetAPIError(kGlobalFuncModuleIndex, &global_func_registry.registry, name, - out); + tvm_crt_error_t to_return = + FindFunctionOrSetAPIError(kGlobalFuncModuleIndex, &global_func_registry.registry, name, out); + // For compatibility with C++ + if (to_return == kTvmErrorFunctionNameNotFound) { + *out = NULL; + to_return = kTvmErrorNoError; + } + return to_return; } int TVMModGetFunction(TVMModuleHandle mod, const char* func_name, int query_imports, @@ -343,7 +349,6 @@ int ModuleGetFunction(TVMValue* args, int* type_codes, int num_args, TVMValue* r if (to_return == kTvmErrorFunctionNameNotFound) { to_return = kTvmErrorNoError; } - return to_return; } @@ -372,6 +377,15 @@ int TVMFuncFree(TVMFunctionHandle func) { int RPCTimeEvaluator(TVMValue* args, int* type_codes, int num_args, TVMValue* ret_val, int* ret_type_code); + +// Sends maximum transfer size for RPC. +int RPCGetTransferMaxSize(TVMValue* args, int* type_codes, int num_args, TVMValue* ret_value, + int* ret_type_codes) { + ret_value[0].v_int64 = TVM_CRT_RPC_MAX_TRANSFER_SIZE_BYTES; + ret_type_codes[0] = kTVMArgInt; + return 0; +} + tvm_crt_error_t TVMInitializeRuntime() { int idx = 0; tvm_crt_error_t error = kTvmErrorNoError; @@ -412,6 +426,10 @@ tvm_crt_error_t TVMInitializeRuntime() { error = TVMFuncRegisterGlobal("runtime.RPCTimeEvaluator", &RPCTimeEvaluator, 0); } + if (error == kTvmErrorNoError) { + error = TVMFuncRegisterGlobal("tvm.rpc.server.GetTransferMaxSize", &RPCGetTransferMaxSize, 0); + } + if (error != kTvmErrorNoError) { TVMPlatformMemoryFree(registry_backing_memory, dev); TVMPlatformMemoryFree(func_registry_memory, dev); diff --git a/src/runtime/crt/host/crt_config.h b/src/runtime/crt/host/crt_config.h index 109abaf04083..e6987d96bb84 100644 --- a/src/runtime/crt/host/crt_config.h +++ b/src/runtime/crt/host/crt_config.h @@ -46,11 +46,14 @@ #define TVM_CRT_GLOBAL_FUNC_REGISTRY_SIZE_BYTES 256 /*! Maximum packet size, in bytes, including the length header. */ -#define TVM_CRT_MAX_PACKET_SIZE_BYTES 64000 +#define TVM_CRT_MAX_PACKET_SIZE_BYTES 8 * 1024 /*! \brief Maximum length of a PackedFunc function name. */ #define TVM_CRT_MAX_FUNCTION_NAME_LENGTH_BYTES 30 +/*! Size of the global function for max RPC transfer, in bytes. */ +#define TVM_CRT_RPC_MAX_TRANSFER_SIZE_BYTES 2048 + // #define TVM_CRT_FRAMER_ENABLE_LOGS #endif // TVM_RUNTIME_CRT_HOST_CRT_CONFIG_H_ diff --git a/src/runtime/crt/host/main.cc b/src/runtime/crt/host/main.cc index e64455417928..c56d3fb3768a 100644 --- a/src/runtime/crt/host/main.cc +++ b/src/runtime/crt/host/main.cc @@ -110,7 +110,7 @@ tvm_crt_error_t TVMPlatformGenerateRandom(uint8_t* buffer, size_t num_bytes) { } } -uint8_t memory[512 * 1024]; +uint8_t memory[2048 * 1024]; static char** g_argv = NULL; diff --git a/src/runtime/rpc/rpc_endpoint.cc b/src/runtime/rpc/rpc_endpoint.cc index b5768146b3f7..48e403384f33 100644 --- a/src/runtime/rpc/rpc_endpoint.cc +++ b/src/runtime/rpc/rpc_endpoint.cc @@ -330,7 +330,7 @@ class RPCEndpoint::EventHandler : public dmlc::Stream { } /*! - * \brief Recive incoming packed seq from the stream. + * \brief Receive incoming packed seq from the stream. * \return The received argments. * \note The TVMArgs is available until we switchstate. */ @@ -369,7 +369,6 @@ class RPCEndpoint::EventHandler : public dmlc::Stream { */ void HandleReturn(RPCCode code, RPCSession::FEncodeReturn setreturn) { TVMArgs args = RecvPackedSeq(); - if (code == RPCCode::kException) { // switch to the state before sending exception. this->SwitchToState(kRecvPacketNumBytes); @@ -801,14 +800,14 @@ void RPCEndpoint::CopyToRemote(void* from_bytes, DLTensor* to, uint64_t nbytes) std::lock_guard lock(mutex_); RPCCode code = RPCCode::kCopyToRemote; - uint64_t num_data_bytes = static_cast(GetDataSize(*to)); - ICHECK_EQ(nbytes, num_data_bytes); + uint64_t tensor_max_size_bytes = static_cast(GetDataSize(*to)); + ICHECK_LE(to->byte_offset + nbytes, tensor_max_size_bytes) << "Overflow in tensor size."; - uint64_t to_data = reinterpret_cast(to->data); + uint64_t to_data = reinterpret_cast(static_cast(to->data) + to->byte_offset); uint64_t shape_bytes = to->ndim * sizeof(int64_t); uint64_t packet_nbytes = sizeof(code) + sizeof(to_data) + sizeof(to->device) + sizeof(to->ndim) + sizeof(to->dtype) + sizeof(to->byte_offset) + shape_bytes + - sizeof(nbytes) + num_data_bytes; + sizeof(nbytes) + nbytes; handler_->Write(packet_nbytes); handler_->Write(code); @@ -968,7 +967,10 @@ class RPCClientSession : public RPCSession, public DeviceAPI { /*! * \brief param endpoint The client endpoint of the session. */ - explicit RPCClientSession(std::shared_ptr endpoint) : endpoint_(endpoint) {} + explicit RPCClientSession(std::shared_ptr endpoint) : endpoint_(endpoint) { + // update max transfer size if not set already. + SetRPCMaxTransferSize(); + } // function overrides PackedFuncHandle GetFunction(const std::string& name) final { @@ -981,7 +983,20 @@ class RPCClientSession : public RPCSession, public DeviceAPI { } void CopyToRemote(void* local_from_bytes, DLTensor* remote_to, uint64_t nbytes) final { - endpoint_->CopyToRemote(local_from_bytes, remote_to, nbytes); + uint64_t block_size = (uint64_t)rpc_chunk_max_size_bytes_; + uint64_t block_count = 0; + uint64_t num_blocks = nbytes / block_size; + + for (block_count = 0; block_count < num_blocks; block_count++) { + remote_to->byte_offset = block_count * block_size; + endpoint_->CopyToRemote(local_from_bytes, remote_to, block_size); + } + + uint64_t remainder_bytes = nbytes % block_size; + if (remainder_bytes != 0) { + remote_to->byte_offset = block_count * block_size; + endpoint_->CopyToRemote(local_from_bytes, remote_to, remainder_bytes); + } } void CopyFromRemote(DLTensor* remote_from, void* local_to_bytes, uint64_t nbytes) final { @@ -1042,7 +1057,23 @@ class RPCClientSession : public RPCSession, public DeviceAPI { bool IsLocalSession() const final { return false; } private: + void RPCMaxTransferRemoteReturnValue(TVMArgs args) { + // Use args[1] as return value, args[0] is tcode + rpc_chunk_max_size_bytes_ = (int64_t)args[1]; + } + + void SetRPCMaxTransferSize() { + PackedFuncHandle rpc_func = GetFunction("tvm.rpc.server.GetTransferMaxSize"); + if (rpc_func == nullptr) { + rpc_chunk_max_size_bytes_ = kRPCMaxTransferSizeDefault; + return; + } + CallFunc(rpc_func, nullptr, nullptr, 0, + [this](TVMArgs args) { RPCMaxTransferRemoteReturnValue(args); }); + } + std::shared_ptr endpoint_; + int64_t rpc_chunk_max_size_bytes_; }; std::shared_ptr CreateClientSession(std::shared_ptr endpoint) { diff --git a/src/runtime/rpc/rpc_endpoint.h b/src/runtime/rpc/rpc_endpoint.h index cd3c9b2bec72..1fcdcf6400ac 100644 --- a/src/runtime/rpc/rpc_endpoint.h +++ b/src/runtime/rpc/rpc_endpoint.h @@ -48,6 +48,9 @@ const int kRPCSuccess = kRPCMagic + 0; // cannot found matched key in server const int kRPCMismatch = kRPCMagic + 2; +// When tvm.rpc.server.GetTransferMaxSize global function is not registered. +const int kRPCMaxTransferSizeDefault = 128000; + /*! \brief Enumeration code for the RPC tracker */ enum class TrackerCode : int { kFail = -1, From b51d0f83a7e071d698019e4803390b0eaa290fc0 Mon Sep 17 00:00:00 2001 From: Mehrdad Hessar Date: Wed, 14 Apr 2021 17:15:34 -0700 Subject: [PATCH 02/12] apply feedbacks --- src/runtime/crt/common/crt_runtime_api.c | 14 +++--- src/runtime/crt/host/crt_config.h | 3 -- src/runtime/crt/host/main.cc | 2 +- src/runtime/rpc/rpc_endpoint.cc | 55 +++++++++++++----------- src/runtime/rpc/rpc_endpoint.h | 13 +++++- 5 files changed, 50 insertions(+), 37 deletions(-) diff --git a/src/runtime/crt/common/crt_runtime_api.c b/src/runtime/crt/common/crt_runtime_api.c index c53c8cad8119..93d694e5e81c 100644 --- a/src/runtime/crt/common/crt_runtime_api.c +++ b/src/runtime/crt/common/crt_runtime_api.c @@ -300,7 +300,7 @@ static tvm_crt_error_t FindFunctionOrSetAPIError(tvm_module_index_t module_index int TVMFuncGetGlobal(const char* name, TVMFunctionHandle* out) { tvm_crt_error_t to_return = FindFunctionOrSetAPIError(kGlobalFuncModuleIndex, &global_func_registry.registry, name, out); - // For compatibility with C++ + // For compatibility with the C++ runtime equivalent, in src/runtime/registry.cc. if (to_return == kTvmErrorFunctionNameNotFound) { *out = NULL; to_return = kTvmErrorNoError; @@ -378,10 +378,12 @@ int TVMFuncFree(TVMFunctionHandle func) { int RPCTimeEvaluator(TVMValue* args, int* type_codes, int num_args, TVMValue* ret_val, int* ret_type_code); -// Sends maximum transfer size for RPC. -int RPCGetTransferMaxSize(TVMValue* args, int* type_codes, int num_args, TVMValue* ret_value, - int* ret_type_codes) { - ret_value[0].v_int64 = TVM_CRT_RPC_MAX_TRANSFER_SIZE_BYTES; +// Sends CRT max packet size. +int RPCGetCRTMaxPacketSize(TVMValue* args, int* type_codes, int num_args, TVMValue* ret_value, + int* ret_type_codes) { + // 11 bytes is for microtvm overhead: + // packet start(2), length(4), session header(3), crc(2) + ret_value[0].v_int64 = TVM_CRT_MAX_PACKET_SIZE_BYTES - 11; ret_type_codes[0] = kTVMArgInt; return 0; } @@ -427,7 +429,7 @@ tvm_crt_error_t TVMInitializeRuntime() { } if (error == kTvmErrorNoError) { - error = TVMFuncRegisterGlobal("tvm.rpc.server.GetTransferMaxSize", &RPCGetTransferMaxSize, 0); + error = TVMFuncRegisterGlobal("tvm.rpc.server.GetCRTMaxPacketSize", &RPCGetCRTMaxPacketSize, 0); } if (error != kTvmErrorNoError) { diff --git a/src/runtime/crt/host/crt_config.h b/src/runtime/crt/host/crt_config.h index e6987d96bb84..1644d3251057 100644 --- a/src/runtime/crt/host/crt_config.h +++ b/src/runtime/crt/host/crt_config.h @@ -51,9 +51,6 @@ /*! \brief Maximum length of a PackedFunc function name. */ #define TVM_CRT_MAX_FUNCTION_NAME_LENGTH_BYTES 30 -/*! Size of the global function for max RPC transfer, in bytes. */ -#define TVM_CRT_RPC_MAX_TRANSFER_SIZE_BYTES 2048 - // #define TVM_CRT_FRAMER_ENABLE_LOGS #endif // TVM_RUNTIME_CRT_HOST_CRT_CONFIG_H_ diff --git a/src/runtime/crt/host/main.cc b/src/runtime/crt/host/main.cc index c56d3fb3768a..07bc6d15afc8 100644 --- a/src/runtime/crt/host/main.cc +++ b/src/runtime/crt/host/main.cc @@ -110,7 +110,7 @@ tvm_crt_error_t TVMPlatformGenerateRandom(uint8_t* buffer, size_t num_bytes) { } } -uint8_t memory[2048 * 1024]; +uint8_t memory[1024 * 1024]; static char** g_argv = NULL; diff --git a/src/runtime/rpc/rpc_endpoint.cc b/src/runtime/rpc/rpc_endpoint.cc index 48e403384f33..40db8e33c2e8 100644 --- a/src/runtime/rpc/rpc_endpoint.cc +++ b/src/runtime/rpc/rpc_endpoint.cc @@ -800,14 +800,13 @@ void RPCEndpoint::CopyToRemote(void* from_bytes, DLTensor* to, uint64_t nbytes) std::lock_guard lock(mutex_); RPCCode code = RPCCode::kCopyToRemote; - uint64_t tensor_max_size_bytes = static_cast(GetDataSize(*to)); - ICHECK_LE(to->byte_offset + nbytes, tensor_max_size_bytes) << "Overflow in tensor size."; + uint64_t tensor_total_size_bytes = static_cast(GetDataSize(*to)); + ICHECK_LE(to->byte_offset + nbytes, tensor_total_size_bytes) + << "Overflow in tensor size: (" << to->byte_offset << ", " << nbytes << ", " + << tensor_total_size_bytes << ")"; - uint64_t to_data = reinterpret_cast(static_cast(to->data) + to->byte_offset); - uint64_t shape_bytes = to->ndim * sizeof(int64_t); - uint64_t packet_nbytes = sizeof(code) + sizeof(to_data) + sizeof(to->device) + sizeof(to->ndim) + - sizeof(to->dtype) + sizeof(to->byte_offset) + shape_bytes + - sizeof(nbytes) + nbytes; + uint64_t overhead = RemoteCopyCalculatePacketOverheadSize(to, code, nbytes); + uint64_t packet_nbytes = overhead + nbytes; handler_->Write(packet_nbytes); handler_->Write(code); @@ -824,11 +823,8 @@ void RPCEndpoint::CopyFromRemote(DLTensor* from, void* to_bytes, uint64_t nbytes uint64_t num_data_bytes = static_cast(GetDataSize(*from)); CHECK_EQ(nbytes, num_data_bytes); - uint64_t from_data = reinterpret_cast(from->data); - uint64_t shape_bytes = from->ndim * sizeof(int64_t); - uint64_t packet_nbytes = sizeof(code) + sizeof(from_data) + sizeof(from->device) + - sizeof(from->ndim) + sizeof(from->dtype) + sizeof(from->byte_offset) + - shape_bytes + sizeof(nbytes); + uint64_t overhead = RemoteCopyCalculatePacketOverheadSize(from, code, nbytes); + uint64_t packet_nbytes = overhead; handler_->Write(packet_nbytes); handler_->Write(code); @@ -967,10 +963,7 @@ class RPCClientSession : public RPCSession, public DeviceAPI { /*! * \brief param endpoint The client endpoint of the session. */ - explicit RPCClientSession(std::shared_ptr endpoint) : endpoint_(endpoint) { - // update max transfer size if not set already. - SetRPCMaxTransferSize(); - } + explicit RPCClientSession(std::shared_ptr endpoint) : endpoint_(endpoint) {} // function overrides PackedFuncHandle GetFunction(const std::string& name) final { @@ -983,7 +976,9 @@ class RPCClientSession : public RPCSession, public DeviceAPI { } void CopyToRemote(void* local_from_bytes, DLTensor* remote_to, uint64_t nbytes) final { - uint64_t block_size = (uint64_t)rpc_chunk_max_size_bytes_; + RPCCode code = RPCCode::kCopyToRemote; + uint64_t overhead = RemoteCopyCalculatePacketOverheadSize(remote_to, code, nbytes); + const uint64_t block_size = GetRPCMaxTransferSize() - overhead; uint64_t block_count = 0; uint64_t num_blocks = nbytes / block_size; @@ -1059,26 +1054,36 @@ class RPCClientSession : public RPCSession, public DeviceAPI { private: void RPCMaxTransferRemoteReturnValue(TVMArgs args) { // Use args[1] as return value, args[0] is tcode - rpc_chunk_max_size_bytes_ = (int64_t)args[1]; + rpc_chunk_max_size_bytes_ = (uint64_t)args[1]; } - void SetRPCMaxTransferSize() { - PackedFuncHandle rpc_func = GetFunction("tvm.rpc.server.GetTransferMaxSize"); + uint64_t GetRPCMaxTransferSize() { + PackedFuncHandle rpc_func = GetFunction("tvm.rpc.server.GetCRTMaxPacketSize"); if (rpc_func == nullptr) { - rpc_chunk_max_size_bytes_ = kRPCMaxTransferSizeDefault; - return; + rpc_chunk_max_size_bytes_ = kRPCMaxTransferSizeBytesDefault; + } else { + CallFunc(rpc_func, nullptr, nullptr, 0, + [this](TVMArgs args) { RPCMaxTransferRemoteReturnValue(args); }); } - CallFunc(rpc_func, nullptr, nullptr, 0, - [this](TVMArgs args) { RPCMaxTransferRemoteReturnValue(args); }); + return rpc_chunk_max_size_bytes_; } std::shared_ptr endpoint_; - int64_t rpc_chunk_max_size_bytes_; + uint64_t rpc_chunk_max_size_bytes_; }; std::shared_ptr CreateClientSession(std::shared_ptr endpoint) { return std::make_shared(endpoint); } +uint64_t RemoteCopyCalculatePacketOverheadSize(DLTensor* tensor, RPCCode code, uint64_t nbytes) { + uint64_t shape_bytes = tensor->ndim * sizeof(int64_t); + uint64_t to_data = reinterpret_cast(static_cast(tensor->data)); + uint64_t overhead = sizeof(code) + sizeof(to_data) + sizeof(tensor->device) + + sizeof(tensor->ndim) + sizeof(tensor->dtype) + sizeof(tensor->byte_offset) + + shape_bytes + sizeof(nbytes); + return overhead; +} + } // namespace runtime } // namespace tvm diff --git a/src/runtime/rpc/rpc_endpoint.h b/src/runtime/rpc/rpc_endpoint.h index 1fcdcf6400ac..32a225f4dbd5 100644 --- a/src/runtime/rpc/rpc_endpoint.h +++ b/src/runtime/rpc/rpc_endpoint.h @@ -48,8 +48,8 @@ const int kRPCSuccess = kRPCMagic + 0; // cannot found matched key in server const int kRPCMismatch = kRPCMagic + 2; -// When tvm.rpc.server.GetTransferMaxSize global function is not registered. -const int kRPCMaxTransferSizeDefault = 128000; +// When tvm.rpc.server.GetCRTMaxPacketSize global function is not registered. +const uint64_t kRPCMaxTransferSizeBytesDefault = 128 * 1024; /*! \brief Enumeration code for the RPC tracker */ enum class TrackerCode : int { @@ -207,6 +207,15 @@ template inline TVMRetValue RPCEndpoint::SysCallRemote(RPCCode code, Args&&... args) { return syscall_remote_(static_cast(code), std::forward(args)...); } + +/*! + * \brief Calculates overhead size of a CopyToRemote packet. + * \param to DLTensor to copy. + * \param code RPCCode for this transfer. + * \param nbytes Number of bytes to transfer. + */ +uint64_t RemoteCopyCalculatePacketOverheadSize(DLTensor* tensor, RPCCode code, uint64_t nbytes); + } // namespace runtime } // namespace tvm #endif // TVM_RUNTIME_RPC_RPC_ENDPOINT_H_ From ba6d95732f5dff937ee8992c16a261915287bfd5 Mon Sep 17 00:00:00 2001 From: Mehrdad Hessar Date: Wed, 14 Apr 2021 22:21:06 -0700 Subject: [PATCH 03/12] bundle deploy fix --- apps/bundle_deploy/crt_config/crt_config.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/apps/bundle_deploy/crt_config/crt_config.h b/apps/bundle_deploy/crt_config/crt_config.h index 97b6c2103f4b..11086c0e9a15 100644 --- a/apps/bundle_deploy/crt_config/crt_config.h +++ b/apps/bundle_deploy/crt_config/crt_config.h @@ -45,4 +45,7 @@ /*! Size of the global function registry, in bytes. */ #define TVM_CRT_GLOBAL_FUNC_REGISTRY_SIZE_BYTES 200 +/*! Maximum packet size, in bytes, including the length header. */ +#define TVM_CRT_MAX_PACKET_SIZE_BYTES 512 + #endif // TVM_RUNTIME_CRT_CONFIG_H_ From d5f1afcab3a193c62dcdb7557aa8ae2b6292d3fe Mon Sep 17 00:00:00 2001 From: Mehrdad Hessar Date: Thu, 15 Apr 2021 11:29:23 -0700 Subject: [PATCH 04/12] fix func registry size --- src/runtime/crt/host/crt_config.h | 2 +- src/runtime/crt/host/main.cc | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/runtime/crt/host/crt_config.h b/src/runtime/crt/host/crt_config.h index 1644d3251057..b81a74eb4ae6 100644 --- a/src/runtime/crt/host/crt_config.h +++ b/src/runtime/crt/host/crt_config.h @@ -43,7 +43,7 @@ #define TVM_CRT_MAX_REGISTERED_MODULES 2 /*! Size of the global function registry, in bytes. */ -#define TVM_CRT_GLOBAL_FUNC_REGISTRY_SIZE_BYTES 256 +#define TVM_CRT_GLOBAL_FUNC_REGISTRY_SIZE_BYTES 512 /*! Maximum packet size, in bytes, including the length header. */ #define TVM_CRT_MAX_PACKET_SIZE_BYTES 8 * 1024 diff --git a/src/runtime/crt/host/main.cc b/src/runtime/crt/host/main.cc index 07bc6d15afc8..9a108bc2e754 100644 --- a/src/runtime/crt/host/main.cc +++ b/src/runtime/crt/host/main.cc @@ -135,10 +135,10 @@ int main(int argc, char** argv) { CHECK_EQ(TVMGraphExecutorModule_Register(), kTvmErrorNoError, "failed to register GraphExecutor TVMModule"); #endif - - if (TVMFuncRegisterGlobal("tvm.testing.reset_server", (TVMFunctionHandle)&testonly_reset_server, - 0)) { - fprintf(stderr, "utvm runtime: internal error registering global packedfunc; exiting\n"); + + int error = TVMFuncRegisterGlobal("tvm.testing.reset_server", (TVMFunctionHandle)&testonly_reset_server, 0); + if (error) { + fprintf(stderr, "utvm runtime: internal error (error#: %d) registering global packedfunc; exiting\n", error); return 2; } From 96419ca7a71d052a7b36096a776ee90cad23ae2e Mon Sep 17 00:00:00 2001 From: Mehrdad Hessar Date: Thu, 15 Apr 2021 16:32:53 -0700 Subject: [PATCH 05/12] mv constant --- src/runtime/minrpc/rpc_reference.h | 3 +++ src/runtime/rpc/rpc_endpoint.cc | 4 ++-- src/runtime/rpc/rpc_endpoint.h | 3 --- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/runtime/minrpc/rpc_reference.h b/src/runtime/minrpc/rpc_reference.h index e42508a73959..6f47b71279f0 100644 --- a/src/runtime/minrpc/rpc_reference.h +++ b/src/runtime/minrpc/rpc_reference.h @@ -30,6 +30,9 @@ namespace runtime { /*! \brief The current RPC procotol version. */ constexpr const char* kRPCProtocolVer = "0.8.0"; +// When tvm.rpc.server.GetCRTMaxPacketSize global function is not registered. +const uint64_t kRPCMaxTransferSizeBytesDefault = 128 * 1024; + /*! \brief The RPC code */ enum class RPCCode : int { kNone, diff --git a/src/runtime/rpc/rpc_endpoint.cc b/src/runtime/rpc/rpc_endpoint.cc index 40db8e33c2e8..a0180f482d9b 100644 --- a/src/runtime/rpc/rpc_endpoint.cc +++ b/src/runtime/rpc/rpc_endpoint.cc @@ -980,14 +980,14 @@ class RPCClientSession : public RPCSession, public DeviceAPI { uint64_t overhead = RemoteCopyCalculatePacketOverheadSize(remote_to, code, nbytes); const uint64_t block_size = GetRPCMaxTransferSize() - overhead; uint64_t block_count = 0; - uint64_t num_blocks = nbytes / block_size; + const uint64_t num_blocks = nbytes / block_size; for (block_count = 0; block_count < num_blocks; block_count++) { remote_to->byte_offset = block_count * block_size; endpoint_->CopyToRemote(local_from_bytes, remote_to, block_size); } - uint64_t remainder_bytes = nbytes % block_size; + const uint64_t remainder_bytes = nbytes % block_size; if (remainder_bytes != 0) { remote_to->byte_offset = block_count * block_size; endpoint_->CopyToRemote(local_from_bytes, remote_to, remainder_bytes); diff --git a/src/runtime/rpc/rpc_endpoint.h b/src/runtime/rpc/rpc_endpoint.h index 32a225f4dbd5..ac2867100a2f 100644 --- a/src/runtime/rpc/rpc_endpoint.h +++ b/src/runtime/rpc/rpc_endpoint.h @@ -48,9 +48,6 @@ const int kRPCSuccess = kRPCMagic + 0; // cannot found matched key in server const int kRPCMismatch = kRPCMagic + 2; -// When tvm.rpc.server.GetCRTMaxPacketSize global function is not registered. -const uint64_t kRPCMaxTransferSizeBytesDefault = 128 * 1024; - /*! \brief Enumeration code for the RPC tracker */ enum class TrackerCode : int { kFail = -1, From 8ec0fea3feb32aff4002b9bc775fd95ac673b1b2 Mon Sep 17 00:00:00 2001 From: Mehrdad Hessar Date: Thu, 15 Apr 2021 16:42:19 -0700 Subject: [PATCH 06/12] fix copyfromremote --- src/runtime/rpc/rpc_endpoint.cc | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/src/runtime/rpc/rpc_endpoint.cc b/src/runtime/rpc/rpc_endpoint.cc index a0180f482d9b..aa6893c2ddb4 100644 --- a/src/runtime/rpc/rpc_endpoint.cc +++ b/src/runtime/rpc/rpc_endpoint.cc @@ -802,7 +802,7 @@ void RPCEndpoint::CopyToRemote(void* from_bytes, DLTensor* to, uint64_t nbytes) uint64_t tensor_total_size_bytes = static_cast(GetDataSize(*to)); ICHECK_LE(to->byte_offset + nbytes, tensor_total_size_bytes) - << "Overflow in tensor size: (" << to->byte_offset << ", " << nbytes << ", " + << "CopyToRemote: overflow in tensor size: (" << to->byte_offset << ", " << nbytes << ", " << tensor_total_size_bytes << ")"; uint64_t overhead = RemoteCopyCalculatePacketOverheadSize(to, code, nbytes); @@ -820,8 +820,10 @@ void RPCEndpoint::CopyFromRemote(DLTensor* from, void* to_bytes, uint64_t nbytes std::lock_guard lock(mutex_); RPCCode code = RPCCode::kCopyFromRemote; - uint64_t num_data_bytes = static_cast(GetDataSize(*from)); - CHECK_EQ(nbytes, num_data_bytes); + uint64_t tensor_total_size_bytes = static_cast(GetDataSize(*from)); + ICHECK_LE(from->byte_offset + nbytes, tensor_total_size_bytes) + << "CopyFromRemote: overflow in tensor size: (" << from->byte_offset << ", " << nbytes << ", " + << tensor_total_size_bytes << ")"; uint64_t overhead = RemoteCopyCalculatePacketOverheadSize(from, code, nbytes); uint64_t packet_nbytes = overhead; @@ -995,7 +997,22 @@ class RPCClientSession : public RPCSession, public DeviceAPI { } void CopyFromRemote(DLTensor* remote_from, void* local_to_bytes, uint64_t nbytes) final { - endpoint_->CopyFromRemote(remote_from, local_to_bytes, nbytes); + RPCCode code = RPCCode::kCopyFromRemote; + uint64_t overhead = RemoteCopyCalculatePacketOverheadSize(remote_from, code, nbytes); + const uint64_t block_size = GetRPCMaxTransferSize() - overhead; + uint64_t block_count = 0; + const uint64_t num_blocks = nbytes / block_size; + + for (block_count = 0; block_count < num_blocks; block_count++) { + remote_from->byte_offset = block_count * block_size; + endpoint_->CopyFromRemote(remote_from, local_to_bytes, block_size); + } + + const uint64_t remainder_bytes = nbytes % block_size; + if (remainder_bytes != 0) { + remote_from->byte_offset = block_count * block_size; + endpoint_->CopyFromRemote(remote_from, local_to_bytes, remainder_bytes); + } } void FreeHandle(void* handle, int type_code) final { From 10045f3327629005dfe1f97c51bce0fdb5b23ae5 Mon Sep 17 00:00:00 2001 From: Mehrdad Hessar Date: Thu, 15 Apr 2021 13:31:05 -0700 Subject: [PATCH 07/12] address comments and fix error --- src/runtime/crt/host/main.cc | 11 ++++++---- src/runtime/rpc/rpc_endpoint.cc | 39 +++++++++++++++++++++++---------- 2 files changed, 34 insertions(+), 16 deletions(-) diff --git a/src/runtime/crt/host/main.cc b/src/runtime/crt/host/main.cc index 9a108bc2e754..53a0572560e2 100644 --- a/src/runtime/crt/host/main.cc +++ b/src/runtime/crt/host/main.cc @@ -110,7 +110,7 @@ tvm_crt_error_t TVMPlatformGenerateRandom(uint8_t* buffer, size_t num_bytes) { } } -uint8_t memory[1024 * 1024]; +uint8_t memory[512 * 1024]; static char** g_argv = NULL; @@ -135,10 +135,13 @@ int main(int argc, char** argv) { CHECK_EQ(TVMGraphExecutorModule_Register(), kTvmErrorNoError, "failed to register GraphExecutor TVMModule"); #endif - - int error = TVMFuncRegisterGlobal("tvm.testing.reset_server", (TVMFunctionHandle)&testonly_reset_server, 0); + + int error = TVMFuncRegisterGlobal("tvm.testing.reset_server", + (TVMFunctionHandle)&testonly_reset_server, 0); if (error) { - fprintf(stderr, "utvm runtime: internal error (error#: %d) registering global packedfunc; exiting\n", error); + fprintf(stderr, + "utvm runtime: internal error (error#: %x) registering global packedfunc; exiting\n", + error); return 2; } diff --git a/src/runtime/rpc/rpc_endpoint.cc b/src/runtime/rpc/rpc_endpoint.cc index aa6893c2ddb4..51091eb3aaae 100644 --- a/src/runtime/rpc/rpc_endpoint.cc +++ b/src/runtime/rpc/rpc_endpoint.cc @@ -802,8 +802,8 @@ void RPCEndpoint::CopyToRemote(void* from_bytes, DLTensor* to, uint64_t nbytes) uint64_t tensor_total_size_bytes = static_cast(GetDataSize(*to)); ICHECK_LE(to->byte_offset + nbytes, tensor_total_size_bytes) - << "CopyToRemote: overflow in tensor size: (" << to->byte_offset << ", " << nbytes << ", " - << tensor_total_size_bytes << ")"; + << "CopyToRemote: overflow in tensor size: (byte_offset=" << to->byte_offset + << ", nbytes=" << nbytes << ", tensor_total_size=" << tensor_total_size_bytes << ")"; uint64_t overhead = RemoteCopyCalculatePacketOverheadSize(to, code, nbytes); uint64_t packet_nbytes = overhead + nbytes; @@ -822,8 +822,8 @@ void RPCEndpoint::CopyFromRemote(DLTensor* from, void* to_bytes, uint64_t nbytes uint64_t tensor_total_size_bytes = static_cast(GetDataSize(*from)); ICHECK_LE(from->byte_offset + nbytes, tensor_total_size_bytes) - << "CopyFromRemote: overflow in tensor size: (" << from->byte_offset << ", " << nbytes << ", " - << tensor_total_size_bytes << ")"; + << "CopyFromRemote: overflow in tensor size: (byte_offset=" << from->byte_offset + << ", nbytes=" << nbytes << ", tensor_total_size=" << tensor_total_size_bytes << ")"; uint64_t overhead = RemoteCopyCalculatePacketOverheadSize(from, code, nbytes); uint64_t packet_nbytes = overhead; @@ -983,16 +983,21 @@ class RPCClientSession : public RPCSession, public DeviceAPI { const uint64_t block_size = GetRPCMaxTransferSize() - overhead; uint64_t block_count = 0; const uint64_t num_blocks = nbytes / block_size; + void* from_bytes; for (block_count = 0; block_count < num_blocks; block_count++) { remote_to->byte_offset = block_count * block_size; - endpoint_->CopyToRemote(local_from_bytes, remote_to, block_size); + from_bytes = reinterpret_cast( + (reinterpret_cast(local_from_bytes) + block_count * block_size)); + endpoint_->CopyToRemote(from_bytes, remote_to, block_size); } const uint64_t remainder_bytes = nbytes % block_size; if (remainder_bytes != 0) { remote_to->byte_offset = block_count * block_size; - endpoint_->CopyToRemote(local_from_bytes, remote_to, remainder_bytes); + from_bytes = reinterpret_cast( + (reinterpret_cast(local_from_bytes) + block_count * block_size)); + endpoint_->CopyToRemote(from_bytes, remote_to, remainder_bytes); } } @@ -1002,16 +1007,21 @@ class RPCClientSession : public RPCSession, public DeviceAPI { const uint64_t block_size = GetRPCMaxTransferSize() - overhead; uint64_t block_count = 0; const uint64_t num_blocks = nbytes / block_size; + void* to_bytes; for (block_count = 0; block_count < num_blocks; block_count++) { remote_from->byte_offset = block_count * block_size; - endpoint_->CopyFromRemote(remote_from, local_to_bytes, block_size); + to_bytes = reinterpret_cast( + (reinterpret_cast(local_to_bytes) + block_count * block_size)); + endpoint_->CopyFromRemote(remote_from, to_bytes, block_size); } const uint64_t remainder_bytes = nbytes % block_size; if (remainder_bytes != 0) { remote_from->byte_offset = block_count * block_size; - endpoint_->CopyFromRemote(remote_from, local_to_bytes, remainder_bytes); + to_bytes = reinterpret_cast( + (reinterpret_cast(local_to_bytes) + block_count * block_size)); + endpoint_->CopyFromRemote(remote_from, to_bytes, remainder_bytes); } } @@ -1071,22 +1081,27 @@ class RPCClientSession : public RPCSession, public DeviceAPI { private: void RPCMaxTransferRemoteReturnValue(TVMArgs args) { // Use args[1] as return value, args[0] is tcode - rpc_chunk_max_size_bytes_ = (uint64_t)args[1]; + // Look at RPCWrappedFunc in src/runtime/rpc/rpc_module.cc + rpc_chunk_max_size_bytes_ = (int64_t)args[1]; } uint64_t GetRPCMaxTransferSize() { + if (rpc_chunk_max_size_bytes_ > 0) { + return (uint64_t)rpc_chunk_max_size_bytes_; + } + PackedFuncHandle rpc_func = GetFunction("tvm.rpc.server.GetCRTMaxPacketSize"); if (rpc_func == nullptr) { - rpc_chunk_max_size_bytes_ = kRPCMaxTransferSizeBytesDefault; + rpc_chunk_max_size_bytes_ = (int64_t)kRPCMaxTransferSizeBytesDefault; } else { CallFunc(rpc_func, nullptr, nullptr, 0, [this](TVMArgs args) { RPCMaxTransferRemoteReturnValue(args); }); } - return rpc_chunk_max_size_bytes_; + return (uint64_t)rpc_chunk_max_size_bytes_; } std::shared_ptr endpoint_; - uint64_t rpc_chunk_max_size_bytes_; + int64_t rpc_chunk_max_size_bytes_ = -1; }; std::shared_ptr CreateClientSession(std::shared_ptr endpoint) { From 15a9e6d7e3588c9ac211b191770d18f73964efb9 Mon Sep 17 00:00:00 2001 From: Mehrdad Hessar Date: Mon, 19 Apr 2021 11:55:50 -0700 Subject: [PATCH 08/12] change rpc default max size --- src/runtime/minrpc/rpc_reference.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/runtime/minrpc/rpc_reference.h b/src/runtime/minrpc/rpc_reference.h index 6f47b71279f0..e3405856d96b 100644 --- a/src/runtime/minrpc/rpc_reference.h +++ b/src/runtime/minrpc/rpc_reference.h @@ -31,7 +31,7 @@ namespace runtime { constexpr const char* kRPCProtocolVer = "0.8.0"; // When tvm.rpc.server.GetCRTMaxPacketSize global function is not registered. -const uint64_t kRPCMaxTransferSizeBytesDefault = 128 * 1024; +const uint64_t kRPCMaxTransferSizeBytesDefault = UINT64_MAX; /*! \brief The RPC code */ enum class RPCCode : int { From 0148cc91f59f3edfd84ed4acafa12cf8001ee899 Mon Sep 17 00:00:00 2001 From: Mehrdad Hessar Date: Mon, 19 Apr 2021 15:54:13 -0700 Subject: [PATCH 09/12] Trigger Build From c06d644d86113bdefa247d2013097a2840afaebb Mon Sep 17 00:00:00 2001 From: Mehrdad Hessar Date: Tue, 20 Apr 2021 09:09:59 -0700 Subject: [PATCH 10/12] add checks --- src/runtime/rpc/rpc_endpoint.cc | 24 ++++++++++++++---------- src/runtime/rpc/rpc_endpoint.h | 1 + 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/src/runtime/rpc/rpc_endpoint.cc b/src/runtime/rpc/rpc_endpoint.cc index 51091eb3aaae..c7850e5957c0 100644 --- a/src/runtime/rpc/rpc_endpoint.cc +++ b/src/runtime/rpc/rpc_endpoint.cc @@ -980,7 +980,9 @@ class RPCClientSession : public RPCSession, public DeviceAPI { void CopyToRemote(void* local_from_bytes, DLTensor* remote_to, uint64_t nbytes) final { RPCCode code = RPCCode::kCopyToRemote; uint64_t overhead = RemoteCopyCalculatePacketOverheadSize(remote_to, code, nbytes); - const uint64_t block_size = GetRPCMaxTransferSize() - overhead; + uint64_t rpc_max_size = GetRPCMaxTransferSize(); + ICHECK_GT(rpc_max_size - overhead, 0) << "CopyToRemote: Invalid block size!"; + const uint64_t block_size = rpc_max_size - overhead; uint64_t block_count = 0; const uint64_t num_blocks = nbytes / block_size; void* from_bytes; @@ -1004,7 +1006,9 @@ class RPCClientSession : public RPCSession, public DeviceAPI { void CopyFromRemote(DLTensor* remote_from, void* local_to_bytes, uint64_t nbytes) final { RPCCode code = RPCCode::kCopyFromRemote; uint64_t overhead = RemoteCopyCalculatePacketOverheadSize(remote_from, code, nbytes); - const uint64_t block_size = GetRPCMaxTransferSize() - overhead; + uint64_t rpc_max_size = GetRPCMaxTransferSize(); + ICHECK_GT(rpc_max_size - overhead, 0) << "CopyFromRemote: Invalid block size!"; + const uint64_t block_size = rpc_max_size - overhead; uint64_t block_count = 0; const uint64_t num_blocks = nbytes / block_size; void* to_bytes; @@ -1079,12 +1083,6 @@ class RPCClientSession : public RPCSession, public DeviceAPI { bool IsLocalSession() const final { return false; } private: - void RPCMaxTransferRemoteReturnValue(TVMArgs args) { - // Use args[1] as return value, args[0] is tcode - // Look at RPCWrappedFunc in src/runtime/rpc/rpc_module.cc - rpc_chunk_max_size_bytes_ = (int64_t)args[1]; - } - uint64_t GetRPCMaxTransferSize() { if (rpc_chunk_max_size_bytes_ > 0) { return (uint64_t)rpc_chunk_max_size_bytes_; @@ -1094,8 +1092,14 @@ class RPCClientSession : public RPCSession, public DeviceAPI { if (rpc_func == nullptr) { rpc_chunk_max_size_bytes_ = (int64_t)kRPCMaxTransferSizeBytesDefault; } else { - CallFunc(rpc_func, nullptr, nullptr, 0, - [this](TVMArgs args) { RPCMaxTransferRemoteReturnValue(args); }); + CallFunc(rpc_func, nullptr, nullptr, 0, [this](TVMArgs args) { + // Use args[1] as return value, args[0] is tcode + // Look at RPCWrappedFunc in src/runtime/rpc/rpc_module.cc + rpc_chunk_max_size_bytes_ = (int64_t)args[1]; + ICHECK_GT(rpc_chunk_max_size_bytes_, 0) + << "RPC max transfer size is <= 0! (remote value = " << rpc_chunk_max_size_bytes_ + << ")"; + }); } return (uint64_t)rpc_chunk_max_size_bytes_; } diff --git a/src/runtime/rpc/rpc_endpoint.h b/src/runtime/rpc/rpc_endpoint.h index ac2867100a2f..7c11a1aeac01 100644 --- a/src/runtime/rpc/rpc_endpoint.h +++ b/src/runtime/rpc/rpc_endpoint.h @@ -210,6 +210,7 @@ inline TVMRetValue RPCEndpoint::SysCallRemote(RPCCode code, Args&&... args) { * \param to DLTensor to copy. * \param code RPCCode for this transfer. * \param nbytes Number of bytes to transfer. + * \return The remote-copy packet overhead size. */ uint64_t RemoteCopyCalculatePacketOverheadSize(DLTensor* tensor, RPCCode code, uint64_t nbytes); From 3dcf8e2849a5fcb80e5066dd0538ff50f8e21132 Mon Sep 17 00:00:00 2001 From: Mehrdad Hessar Date: Tue, 20 Apr 2021 13:39:32 -0700 Subject: [PATCH 11/12] Trigger Build From 9319df86f2f9fa7ca5e39f9311e3ed79e8631457 Mon Sep 17 00:00:00 2001 From: Mehrdad Hessar Date: Wed, 21 Apr 2021 09:33:04 -0700 Subject: [PATCH 12/12] fix ICHECK --- src/runtime/rpc/rpc_endpoint.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/runtime/rpc/rpc_endpoint.cc b/src/runtime/rpc/rpc_endpoint.cc index c7850e5957c0..955c0efeddf1 100644 --- a/src/runtime/rpc/rpc_endpoint.cc +++ b/src/runtime/rpc/rpc_endpoint.cc @@ -981,7 +981,7 @@ class RPCClientSession : public RPCSession, public DeviceAPI { RPCCode code = RPCCode::kCopyToRemote; uint64_t overhead = RemoteCopyCalculatePacketOverheadSize(remote_to, code, nbytes); uint64_t rpc_max_size = GetRPCMaxTransferSize(); - ICHECK_GT(rpc_max_size - overhead, 0) << "CopyToRemote: Invalid block size!"; + ICHECK_GT(rpc_max_size, overhead) << "CopyToRemote: Invalid block size!"; const uint64_t block_size = rpc_max_size - overhead; uint64_t block_count = 0; const uint64_t num_blocks = nbytes / block_size; @@ -1007,7 +1007,7 @@ class RPCClientSession : public RPCSession, public DeviceAPI { RPCCode code = RPCCode::kCopyFromRemote; uint64_t overhead = RemoteCopyCalculatePacketOverheadSize(remote_from, code, nbytes); uint64_t rpc_max_size = GetRPCMaxTransferSize(); - ICHECK_GT(rpc_max_size - overhead, 0) << "CopyFromRemote: Invalid block size!"; + ICHECK_GT(rpc_max_size, overhead) << "CopyFromRemote: Invalid block size!"; const uint64_t block_size = rpc_max_size - overhead; uint64_t block_count = 0; const uint64_t num_blocks = nbytes / block_size;