Skip to content

Commit

Permalink
Remove string serialize hack from RunIndividual functions
Browse files Browse the repository at this point in the history
  • Loading branch information
Icemist committed Jun 27, 2022
1 parent b19bb4d commit 9355cfa
Show file tree
Hide file tree
Showing 5 changed files with 70 additions and 61 deletions.
23 changes: 13 additions & 10 deletions python/tvm/contrib/debugger/debug_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import logging
import os
import shutil
import struct
import tempfile

import tvm._ffi
Expand Down Expand Up @@ -366,11 +367,15 @@ def run_individual(
number, repeat, min_repeat_ms, cooldown_interval_ms, repeats_to_cooldown
)
results = []
for node_data in ret.strip(";").split(";"):
results.append([])
for repeat_data in node_data.strip(",").split(","):
if repeat_data:
results[-1].append(float(repeat_data))
offset = 0
format_size = "@q"
(nodes_count,) = struct.unpack_from(format_size, ret, offset)
offset += struct.calcsize(format_size)
format_data = "@" + repeat * "d"
for _ in range(0, nodes_count):
r = struct.unpack_from(format_data, ret, offset)
offset += struct.calcsize(format_data)
results.append([*r])
return results

def run_individual_node(
Expand Down Expand Up @@ -425,11 +430,9 @@ def run_individual_node(
ret = self._run_individual_node(
index, number, repeat, min_repeat_ms, cooldown_interval_ms, repeats_to_cooldown
)
results = []
for repeat_data in ret.replace(" ", "").strip(",").split(","):
if repeat_data:
results.append(float(repeat_data))
return BenchmarkResult(results)
fmt = "@" + ("d" * repeat)
results = struct.unpack(fmt, ret)
return BenchmarkResult(list(results))

def profile(self, collectors=None, **input_dict):
"""Run forward execution of the graph and collect overall and per-op
Expand Down
3 changes: 2 additions & 1 deletion src/runtime/crt/common/crt_runtime_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -560,7 +560,8 @@ tvm_crt_error_t RunTimeEvaluator(tvm_function_index_t function_index, TVMValue*
do {
if (curr_res_seconds > 0.0) {
double a = (min_repeat_seconds / (curr_res_seconds / g_time_evaluator_state.number) + 1);
double b = g_time_evaluator_state.number * 1.618; // 1.618 is chosen by random
const double golden_ratio = 1.618;
double b = g_time_evaluator_state.number * golden_ratio;
g_time_evaluator_state.number = (int64_t)(a > b ? a : b);
}
err = TVMPlatformBeforeMeasurement();
Expand Down
97 changes: 50 additions & 47 deletions src/runtime/graph_executor/debug/graph_executor_debug.cc
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,9 @@ class GraphExecutorDebug : public GraphExecutor {
* defined by `repeats_to_cooldown`.
* \param repeats_to_cooldown The number of repeats before the
* cooldown is activated.
* \return Comma separated string containing the elapsed time per op for
* the last iteration only, because returning a long string over rpc can be expensive.
* \return Returns a string with an encoded byte array. Where the first 8 bytes are int64_t
* representing the number of layers. Next the encoded real numbers are float32_t in the number of
* repeat multiplied by the number of layers.
*/
std::string RunIndividual(int number, int repeat, int min_repeat_ms, int cooldown_interval_ms,
int repeats_to_cooldown) {
Expand All @@ -78,8 +79,12 @@ class GraphExecutorDebug : public GraphExecutor {
} else {
int op = 0;
for (size_t index = 0; index < op_execs_.size(); ++index) {
time_sec_per_op[index] = RunIndividualNode(index, number, repeat, min_repeat_ms,
std::string result_str = RunIndividualNode(index, number, repeat, min_repeat_ms,
cooldown_interval_ms, repeats_to_cooldown);
const double* blob_ptr = reinterpret_cast<const double*>(result_str.data());
for (int i = 0; i < repeat; ++i, ++blob_ptr) {
time_sec_per_op[index].push_back(*blob_ptr);
}
if (op_execs_[index]) {
LOG(INFO) << "Op #" << op << " " << GetNodeName(index) << ":";
for (size_t cur_repeat = 0; cur_repeat < time_sec_per_op[index].size(); cur_repeat++) {
Expand All @@ -92,43 +97,42 @@ class GraphExecutorDebug : public GraphExecutor {
}

std::ostringstream os;
for (size_t index = 0; index < time_sec_per_op.size(); index++) {
for (const auto& repeat_data : time_sec_per_op[index]) {
int64_t size = time_sec_per_op.size();
os.write(reinterpret_cast<char*>(&size), sizeof(int64_t));
for (size_t index = 0; index < time_sec_per_op.size(); ++index) {
for (auto& repeat_data : time_sec_per_op[index]) {
// To have good behavior when calculating total time, etc.
os << (std::isnan(repeat_data) ? std::to_string(0) : std::to_string(repeat_data)) << ",";
double data = std::isnan(repeat_data) ? 0 : repeat_data;
os.write(reinterpret_cast<char*>(&data), sizeof(double));
}
os << ";";
}
return os.str();
}

std::vector<double> RunIndividualNode(int node_index, int number, int repeat, int min_repeat_ms,
int cooldown_interval_ms, int repeats_to_cooldown) {
std::string RunIndividualNode(int node_index, int number, int repeat, int min_repeat_ms,
int cooldown_interval_ms, int repeats_to_cooldown) {
std::string tkey = module_->type_key();

// results_in_seconds[a][b] is the bth index run of the ath index repeat
std::vector<double> results_in_seconds(repeat, 0);

if (tkey == "rpc") {
LOG(FATAL) << "RPC measurements should not use RunIndividualNode!";
}

if (!op_execs_[node_index]) {
// don't return anything...
return results_in_seconds;
std::ostringstream os;
double zero = 0;
for (int i = 0; i < repeat; ++i) {
os.write(reinterpret_cast<char*>(&zero), sizeof(double));
}
return os.str();
}

// assume host runs things which is first device
Device& d = devices_[0];
PackedFunc time_evaluator = profiling::WrapTimeEvaluator(
TypedPackedFunc<void()>([this, node_index]() { this->RunOpHost(node_index); }), d, number,
repeat, min_repeat_ms, cooldown_interval_ms, repeats_to_cooldown);
std::string result_str = time_evaluator();
const double* blob_ptr = reinterpret_cast<const double*>(result_str.data());
for (int i = 0; i < repeat; ++i, ++blob_ptr) {
results_in_seconds[i] = *blob_ptr;
}
return results_in_seconds;
return time_evaluator();
}

std::vector<double> RunOpRPC(int index, int number, int repeat, int min_repeat_ms,
Expand Down Expand Up @@ -393,36 +397,35 @@ PackedFunc GraphExecutorDebug::GetFunction(const std::string& name,
ICHECK_GE(min_repeat_ms, 0);
ICHECK_GE(cooldown_interval_ms, 0);
ICHECK_GT(repeats_to_cooldown, 0);
*rv = this->RunIndividual(number, repeat, min_repeat_ms, cooldown_interval_ms,
repeats_to_cooldown);
std::string blob = this->RunIndividual(number, repeat, min_repeat_ms, cooldown_interval_ms,
repeats_to_cooldown);
TVMByteArray arr;
arr.size = blob.length();
arr.data = blob.data();
*rv = arr;
});
} else if (name == "run_individual_node") {
return TypedPackedFunc<std::string(int, int, int, int, int, int_least32_t)>(
[sptr_to_self, this](int node_index, int number, int repeat, int min_repeat_ms,
int cooldown_interval_ms, int repeats_to_cooldown) {
ICHECK_GE(node_index, 0);
ICHECK_LT(node_index, nodes_.size());
ICHECK_GT(number, 0);
ICHECK_GT(repeat, 0);
ICHECK_GE(min_repeat_ms, 0);
ICHECK_GE(cooldown_interval_ms, 0);
ICHECK_GT(repeats_to_cooldown, 0);
std::vector<double> results = this->RunIndividualNode(
node_index, number, repeat, min_repeat_ms, cooldown_interval_ms, repeats_to_cooldown);

// Have problems returning FloatImm so serialize to string results as hack.
std::stringstream s;

// use maximum precision available and use fixed representation
s << std::fixed;
s.precision(std::numeric_limits<double>::max_digits10);

for (double cur : results) {
s << cur << ", ";
}

return s.str();
});
return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
int node_index = args[0];
int number = args[1];
int repeat = args[2];
int min_repeat_ms = args[3];
int cooldown_interval_ms = args[4];
int repeats_to_cooldown = args[5];
ICHECK_GE(node_index, 0);
ICHECK_LT(node_index, nodes_.size());
ICHECK_GT(number, 0);
ICHECK_GT(repeat, 0);
ICHECK_GE(min_repeat_ms, 0);
ICHECK_GE(cooldown_interval_ms, 0);
ICHECK_GT(repeats_to_cooldown, 0);
std::string blob = this->RunIndividualNode(node_index, number, repeat, min_repeat_ms,
cooldown_interval_ms, repeats_to_cooldown);
TVMByteArray arr;
arr.size = blob.length();
arr.data = blob.data();
*rv = arr;
});
} else if (name == "profile") {
return TypedPackedFunc<profiling::Report(Array<profiling::MetricCollector>)>(
[sptr_to_self, this](Array<profiling::MetricCollector> collectors) {
Expand Down
5 changes: 3 additions & 2 deletions src/runtime/profiling.cc
Original file line number Diff line number Diff line change
Expand Up @@ -863,8 +863,9 @@ PackedFunc WrapTimeEvaluator(PackedFunc pf, Device dev, int number, int repeat,

do {
if (duration_ms > 0.0) {
number = static_cast<int>(std::max((min_repeat_ms / (duration_ms / number) + 1),
number * 1.618)); // 1.618 is chosen by random
const double golden_ratio = 1.618;
number = static_cast<int>(
std::max((min_repeat_ms / (duration_ms / number) + 1), number * golden_ratio));
}

// start timing
Expand Down
3 changes: 2 additions & 1 deletion web/src/runtime.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1070,8 +1070,9 @@ export class Instance implements Disposable {
let durationMs = 0.0;
do {
if (durationMs > 0.0) {
let golden_ratio = 1.618;
setupNumber = Math.floor(
Math.max(minRepeatMs / (durationMs / setupNumber) + 1, setupNumber * 1.618)
Math.max(minRepeatMs / (durationMs / setupNumber) + 1, setupNumber * golden_ratio)
);
}
const tstart: number = perf.now();
Expand Down

0 comments on commit 9355cfa

Please sign in to comment.