Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Runtime] add set_output_zero_copy #8497

Merged
merged 72 commits into from
Aug 27, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
72 commits
Select commit Hold shift + click to select a range
629ec50
optimize resize vertor
sunjiweiswift Jun 25, 2021
5005373
tmp
sunjiweiswift Jun 25, 2021
a1b749f
DoMultiLevelTiling
sunjiweiswift Jun 26, 2021
f1fc313
modify size_t to int
sunjiweiswift Jun 26, 2021
65a7a00
modify
sunjiweiswift Jun 26, 2021
2368df9
modify level fill
sunjiweiswift Jun 26, 2021
e8ba850
Update utils.cc
sunjiweiswift Jun 26, 2021
a832739
format lower count
sunjiweiswift Jun 26, 2021
258f382
Merge branch 'main' of https://github.com/sunjiweiswift/tvm
sunjiweiswift Jun 26, 2021
2de6c99
delete blank lines
sunjiweiswift Jun 26, 2021
cb99388
delete blank lines
sunjiweiswift Jun 26, 2021
ece0e1d
Merge branch 'main' of https://github.com/sunjiweiswift/tvm
sunjiweiswift Jun 26, 2021
9da6fa3
re-commit message
sunjiweiswift Jun 27, 2021
718e58b
Merge pull request #1 from apache/main
sunjiweiswift Jul 15, 2021
7377e43
Update graph_executor.h
sunjiweiswift Jul 15, 2021
8853436
Merge pull request #2 from apache/main
sunjiweiswift Jul 19, 2021
4a007ab
add setoutputzero
sunjiweiswift Jul 19, 2021
8ca606f
add set output zero
sunjiweiswift Jul 19, 2021
6afb609
Update graph_executor.cc
sunjiweiswift Jul 19, 2021
d71dece
Update graph_executor.h
sunjiweiswift Jul 19, 2021
145219c
delete const_cast
sunjiweiswift Jul 20, 2021
e45c77b
add common function chechDltensor
sunjiweiswift Jul 20, 2021
b7a27c5
Update graph_executor.h
sunjiweiswift Jul 20, 2021
bf6ed08
Update graph_executor.cc
sunjiweiswift Jul 20, 2021
80fc91f
add output_ sort
sunjiweiswift Jul 20, 2021
ab5f957
Update graph_executor.cc
sunjiweiswift Jul 20, 2021
07e80ad
add a.nodeid == b.nodeid
sunjiweiswift Jul 20, 2021
e67b839
add unit test for set output zero
sunjiweiswift Jul 21, 2021
052fa56
add include <algorithm>
sunjiweiswift Jul 22, 2021
847634e
modify Setoutput zero copy
sunjiweiswift Jul 22, 2021
b2d9471
modify by clang-format
sunjiweiswift Jul 22, 2021
5d0461a
add unit test for set output zero
sunjiweiswift Jul 22, 2021
4ebf2bd
rrealy ut go back
sunjiweiswift Jul 22, 2021
c221b51
rrealy ut go back
sunjiweiswift Jul 22, 2021
92294d3
modify input->output
sunjiweiswift Jul 22, 2021
dd54915
delete sort output input
sunjiweiswift Jul 23, 2021
66ef5fe
modify build_module_test.cc
sunjiweiswift Jul 23, 2021
7918c7b
re-pr
sunjiweiswift Jul 24, 2021
c7e00cb
empty commit
sunjiweiswift Jul 24, 2021
2558aee
empty commit
sunjiweiswift Jul 25, 2021
bf85d3e
empty commit
sunjiweiswift Jul 25, 2021
df24fc3
modify input to ouput
sunjiweiswift Jul 28, 2021
c1bf14c
modify zero ouput copy disorder issus
sunjiweiswift Jul 29, 2021
c666527
Merge remote-tracking branch 'upstream/main'
sunjiweiswift Aug 2, 2021
85b4fc3
Merge remote-tracking branch 'upstream/main'
sunjiweiswift Aug 3, 2021
81143b9
modify nid->eid to record output, add var to record the dltensor both…
sunjiweiswift Aug 3, 2021
6f7b068
character too long >= 100
sunjiweiswift Aug 3, 2021
0d25674
modify zero copy UT add set input zero copy
sunjiweiswift Aug 3, 2021
6fc5047
modify zero copy UT add set input zero copy
sunjiweiswift Aug 3, 2021
969c80f
modify zero copy UT add set input zero copy
sunjiweiswift Aug 3, 2021
889106d
Merge branch 'main' of https://github.com/sunjiweiswift/tvm
sunjiweiswift Aug 3, 2021
5f858cc
empty commit
sunjiweiswift Aug 3, 2021
1762cb5
trigger CI
sunjiweiswift Aug 4, 2021
0575cb8
Merge pull request #4 from apache/main
sunjiweiswift Aug 4, 2021
2640e76
trigger CI
sunjiweiswift Aug 4, 2021
a10562f
trigger CI
sunjiweiswift Aug 4, 2021
07128aa
empty commit
sunjiweiswift Aug 20, 2021
c0e89f5
empty commit
sunjiweiswift Aug 20, 2021
3e46c0e
trigger CI
Aug 21, 2021
6b3a126
trigger CI
sunjiweiswift Aug 21, 2021
37b69b1
trigger CI
sunjiweiswift Aug 21, 2021
d66c4e1
Merge pull request #5 from apache/main
sunjiweiswift Aug 21, 2021
e622619
trigger CI
sunjiweiswift Aug 21, 2021
8f9287f
trigger CI
sunjiweiswift Aug 21, 2021
1644d91
resolve conflicts
sunjiweiswift Aug 24, 2021
1c4f9e3
Merge pull request #6 from apache/main
sunjiweiswift Aug 25, 2021
13a1355
modify C style
sunjiweiswift Aug 25, 2021
cb09eab
add runtime test
sunjiweiswift Aug 25, 2021
3205590
add runtime test
sunjiweiswift Aug 25, 2021
aab0ef7
add runtime test
sunjiweiswift Aug 25, 2021
8c0dfb6
realy build generatr the json
sunjiweiswift Aug 26, 2021
2603263
realy build generatr the json
sunjiweiswift Aug 26, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 92 additions & 14 deletions src/runtime/graph_executor/graph_executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,11 @@ void GraphExecutor::Init(const std::string& graph_json, tvm::runtime::Module mod
std::string& name = nodes_[nid].name;
input_map_[name] = i;
}
for (size_t i = 0; i < outputs_.size(); i++) {
const uint32_t nid = outputs_[i].node_id;
std::string& name = nodes_[nid].name;
output_map_[name] = i;
}
}
/*!
* \brief Get the input index given the name of input.
Expand All @@ -104,6 +109,18 @@ int GraphExecutor::GetInputIndex(const std::string& name) {
}
return -1;
}
/*!
* \brief Get the output index given the name of output.
* \param name The name of the output.
* \return The index of output.
*/
int GraphExecutor::GetOutputIndex(const std::string& name) {
auto it = output_map_.find(name);
if (it != output_map_.end()) {
return it->second;
}
return -1;
}
/*!
* \brief set index-th input to the graph.
* \param index The input index.
Expand All @@ -114,6 +131,23 @@ void GraphExecutor::SetInput(int index, DLTensor* data_in) {
uint32_t eid = this->entry_id(input_nodes_[index], 0);
data_entry_[eid].CopyFrom(data_in);
}
/*!
* \brief Check the legality of external DLTensor*.
* \param external The external DLTensor*.
* \param eid The data_enrty_ index.
*/
void GraphExecutor::CheckExternalDLTensor(const DLTensor* external, uint32_t eid) const {
const DLTensor* internal = data_entry_[eid].operator->();

ICHECK_EQ(data_alignment_[eid], details::GetDataAlignment(*external));
ICHECK_EQ(reinterpret_cast<size_t>(external->data) % kAllocAlignment, 0);
ICHECK_EQ(internal->ndim, static_cast<size_t>(external->ndim));
ICHECK_EQ(internal->device.device_type, external->device.device_type);
ICHECK_EQ(internal->device.device_id, external->device.device_id);
for (auto i = 0; i < external->ndim; ++i) {
ICHECK_EQ(internal->shape[i], external->shape[i]);
}
}
/*!
* \brief set index-th input to the graph without copying the data.
* \param index The input index.
Expand All @@ -122,23 +156,37 @@ void GraphExecutor::SetInput(int index, DLTensor* data_in) {
void GraphExecutor::SetInputZeroCopy(int index, DLTensor* data_ref) {
ICHECK_LT(static_cast<size_t>(index), input_nodes_.size());
uint32_t eid = this->entry_id(input_nodes_[index], 0);
const DLTensor* old_t = data_entry_[eid].operator->();

// check the consistency of input
ICHECK_EQ(data_alignment_[eid], details::GetDataAlignment(*data_ref));
ICHECK_EQ(reinterpret_cast<size_t>(data_ref->data) % kAllocAlignment, 0);
ICHECK_EQ(old_t->ndim, static_cast<size_t>(data_ref->ndim));
ICHECK_EQ(old_t->device.device_type, data_ref->device.device_type);
ICHECK_EQ(old_t->device.device_id, data_ref->device.device_id);
for (auto i = 0; i < data_ref->ndim; ++i) {
ICHECK_EQ(old_t->shape[i], data_ref->shape[i]);
}

CheckExternalDLTensor(data_ref, eid);
// Update the data pointer for each argument of each op
for (DLTensor* t : input_dltensors_[eid]) {
t->data = data_ref->data;
}
}
/*!
* \brief set index-th output to the graph without copying the data.
* \param index The output index.
* \param data_ref The output data that is referred.
*/
void GraphExecutor::SetOutputZeroCopy(int index, DLTensor* data_ref) {
ICHECK_LT(static_cast<size_t>(index), outputs_.size());
ICHECK_LT(static_cast<size_t>(index), output_dltensors_.size());
const NodeEntry& output_node = outputs_[index];
uint32_t output_node_eid = this->entry_id(output_node);

// check the consistency of output
CheckExternalDLTensor(data_ref, output_node_eid);

// Update the data pointer for output op
for (DLTensor* t : output_dltensors_[output_node_eid]) {
t->data = data_ref->data;
}

// Update the input of the op connected to the output
for (DLTensor* t : both_output_opinput_dltensors_[output_node_eid]) {
t->data = data_ref->data;
}
}
/*!
* \brief Get the number of outputs
*
Expand Down Expand Up @@ -358,11 +406,17 @@ void GraphExecutor::SetupStorage() {
void GraphExecutor::SetupOpExecs() {
op_execs_.resize(this->GetNumOfNodes());
input_dltensors_.resize(num_node_entries());
output_dltensors_.resize(num_node_entries());
both_output_opinput_dltensors_.resize(num_node_entries());
std::unordered_set<uint32_t> input_node_eids;
for (size_t i = 0; i < input_nodes_.size(); i++) {
uint32_t nid = input_nodes_[i];
input_node_eids.insert(entry_id(nid, 0));
}
std::unordered_set<uint32_t> output_node_eids;
for (size_t i = 0; i < outputs_.size(); i++) {
output_node_eids.insert(entry_id(outputs_[i]));
}

// setup the array and requirements.
for (uint32_t nid = 0; nid < this->GetNumOfNodes(); ++nid) {
Expand All @@ -383,10 +437,25 @@ void GraphExecutor::SetupOpExecs() {
std::tie(op_execs_[nid], op_args) = CreateTVMOp(inode.param, args);

for (size_t i = 0; i < inode.inputs.size(); i++) {
uint32_t eid = this->entry_id(inode.inputs[i]);
uint32_t input_eid = this->entry_id(inode.inputs[i]);
// check if op input is model input
if (input_node_eids.count(eid) > 0) {
input_dltensors_[eid].push_back(static_cast<DLTensor*>(op_args->arg_values[i].v_handle));
if (input_node_eids.count(input_eid) > 0) {
input_dltensors_[input_eid].push_back(
static_cast<DLTensor*>(op_args->arg_values[i].v_handle));
}
// check if any model output is the input of the op
if (output_node_eids.count(input_eid) > 0) {
both_output_opinput_dltensors_[input_eid].push_back(
static_cast<DLTensor*>(op_args->arg_values[i].v_handle));
}
}

for (uint32_t i = inode.inputs.size(); i < inode.inputs.size() + inode.param.num_outputs; ++i) {
uint32_t output_eid = this->entry_id(nid, i - inode.inputs.size());
// check if op output is model output
if (output_node_eids.count(output_eid) > 0) {
output_dltensors_[output_eid].push_back(
static_cast<DLTensor*>(op_args->arg_values[i].v_handle));
}
}
}
Expand Down Expand Up @@ -462,6 +531,15 @@ PackedFunc GraphExecutor::GetFunction(const std::string& name,
this->SetInputZeroCopy(args[0], args[1]);
}
});
} else if (name == "set_output_zero_copy") {
return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
if (String::CanConvertFrom(args[0])) {
int out_idx = this->GetOutputIndex(args[0].operator String());
if (out_idx >= 0) this->SetOutputZeroCopy(out_idx, args[1]);
} else {
this->SetOutputZeroCopy(args[0], args[1]);
}
});
} else if (name == "get_output") {
return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
if (args.num_args == 2) {
Expand Down
28 changes: 28 additions & 0 deletions src/runtime/graph_executor/graph_executor.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,13 @@ class TVM_DLL GraphExecutor : public ModuleNode {
*/
int GetInputIndex(const std::string& name);

/*!
* \brief Get the output index given the name of output.
* \param name The name of the output.
* \return The index of output.
*/
int GetOutputIndex(const std::string& name);

/*!
* \brief set index-th input to the graph.
* \param index The input index.
Expand All @@ -119,6 +126,12 @@ class TVM_DLL GraphExecutor : public ModuleNode {
* \param data_ref The input data that is referred.
*/
void SetInputZeroCopy(int index, DLTensor* data_ref);
/*!
* \brief set index-th output to the graph without copying the data.
* \param index The output index.
* \param data_ref The output data that is referred.
*/
void SetOutputZeroCopy(int index, DLTensor* data_ref);
/*!
* \brief Get the number of outputs
*
Expand Down Expand Up @@ -193,6 +206,9 @@ class TVM_DLL GraphExecutor : public ModuleNode {
uint32_t node_id;
uint32_t index;
uint32_t version;
inline bool operator==(const NodeEntry& other) const {
return node_id == other.node_id && index == other.index && version == other.version;
}
// JSON Loader
void Load(dmlc::JSONReader* reader) {
reader->BeginArray();
Expand Down Expand Up @@ -377,6 +393,12 @@ class TVM_DLL GraphExecutor : public ModuleNode {
void SetupStorage();
/*! \brief Setup the executors. */
void SetupOpExecs();
/*!
* \brief Check the legality of external DLTensor*.
* \param external The external DLTensor*.
* \param eid The data_enrty_ index.
*/
void CheckExternalDLTensor(const DLTensor* external, uint32_t eid) const;
/*!
* \brief Create an execution function given input.
* \param attrs The node attributes.
Expand All @@ -397,8 +419,14 @@ class TVM_DLL GraphExecutor : public ModuleNode {
std::vector<uint32_t> input_nodes_;
/*! \brief Map of input names to input indices. */
std::unordered_map<std::string, uint32_t> input_map_;
/*! \brief Map of output names to output indices. */
std::unordered_map<std::string, uint32_t> output_map_;
/*! \brief Used for quick node input DLTensor* lookup given an input eid. */
std::vector<std::vector<DLTensor*>> input_dltensors_;
/*! \brief Used for quick node output DLTensor* lookup given an output eid. */
std::vector<std::vector<DLTensor*>> output_dltensors_;
/*! \brief Used for quick node(both model output and op input) DLTensor* lookup given an eid. */
std::vector<std::vector<DLTensor*>> both_output_opinput_dltensors_;
/*! \brief Used for quick entry indexing. */
std::vector<uint32_t> node_row_ptr_;
/*! \brief Output entries. */
Expand Down
154 changes: 154 additions & 0 deletions tests/cpp/runtime_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include <gtest/gtest.h>
#include <tvm/driver/driver_api.h>
#include <tvm/ir/module.h>
#include <tvm/relay/analysis.h>
#include <tvm/relay/expr.h>
#include <tvm/relay/op_attr_types.h>
#include <tvm/relay/op_strategy.h>
#include <tvm/relay/transform.h>
#include <tvm/relay/type.h>
#include <tvm/runtime/executor_info.h>
#include <tvm/runtime/module.h>
#include <tvm/runtime/packed_func.h>
#include <tvm/runtime/registry.h>
#include <tvm/te/operation.h>
#include <tvm/topi/broadcast.h>
#include <tvm/topi/generic/injective.h>

using namespace tvm;
using namespace tvm::relay;

TVM_REGISTER_GLOBAL("runtime_test.strategy")
.set_body_typed([](const Attrs& attrs, const Array<te::Tensor>& inputs, const Type& out_type,
const Target& target) {
FTVMCompute fcompute = [](const Attrs& attrs, const Array<te::Tensor>& inputs,
const Type& out_type) -> Array<te::Tensor> {
ICHECK_EQ(inputs.size(), 2U);
return {topi::add(inputs[0], inputs[1])};
};
FTVMSchedule fschedule = [](const Attrs& attrs, const Array<te::Tensor>& outs,
const Target& target) {
With<Target> target_scope(target);
return topi::generic::schedule_injective(target, outs);
};

auto n = make_object<OpStrategyNode>();
auto strategy = tvm::relay::OpStrategy(std::move(n));
strategy.AddImplementation(fcompute, fschedule, "runtime_test.strategy", 10);
return strategy;
});

TEST(Runtime, ZeroCopy) {
auto tensor_type = relay::TensorType({2, 3}, DataType::Float(32));
auto a = relay::Var("a", tensor_type);
auto b = relay::Var("b", tensor_type);
auto add_op = relay::Op::Get("add");
auto x = relay::Call(add_op, {a, b}, tvm::Attrs(), {});
auto c = relay::Var("c", tensor_type);
auto y = relay::Call(add_op, {x, c}, tvm::Attrs(), {});
auto func = relay::Function(relay::FreeVars(y), y, relay::Type(), {});
auto A = tvm::runtime::NDArray::Empty({2, 3}, {kDLFloat, 32, 1}, {kDLCPU, 0});
auto B = tvm::runtime::NDArray::Empty({2, 3}, {kDLFloat, 32, 1}, {kDLCPU, 0});
auto C = tvm::runtime::NDArray::Empty({2, 3}, {kDLFloat, 32, 1}, {kDLCPU, 0});
auto Y = tvm::runtime::NDArray::Empty({2, 3}, {kDLFloat, 32, 1}, {kDLCPU, 0});

auto pA = static_cast<float*>(A->data);
auto pB = static_cast<float*>(B->data);
auto pC = static_cast<float*>(C->data);
auto pY = static_cast<float*>(Y->data);

for (int i = 0; i < 6; ++i) {
pA[i] = i;
pB[i] = i + 1;
pC[i] = i + 2;
}
// get schedule
auto reg = tvm::runtime::Registry::Get("ir.RegisterOpAttr");
if (!reg) {
LOG(FATAL) << "no _Register";
}
auto fs = tvm::runtime::Registry::Get("runtime_test.strategy");
if (!fs) {
LOG(FATAL) << "No test_strategy registered.";
}
auto fgeneric = GenericFunc::Get("runtime_test.strategy_generic").set_default(*fs);
(*reg)("add", "FTVMStrategy", fgeneric, 10);
Array<Integer> dep;
dep.push_back(0);
(*reg)("add", "TShapeDataDependent", dep, 10);
// build
auto pfb = tvm::runtime::Registry::Get("relay.build_module._BuildModule");
tvm::runtime::Module build_mod = (*pfb)();
auto build_f = build_mod.GetFunction("build", false);
auto json_f = build_mod.GetFunction("get_graph_json", false);
auto mod_f = build_mod.GetFunction("get_module", false);
Map<tvm::Integer, tvm::Target> targets;
Target llvm_tgt = Target("llvm");
targets.Set(0, llvm_tgt);
auto relay_mod = tvm::IRModule::FromExpr(func);
ICHECK(relay_mod.defined()) << "Module must be defined";
build_f(relay_mod, targets, llvm_tgt, runtime::kTvmExecutorGraph, "");
// create graph executor
std::string json = json_f();
tvm::runtime::Module mod = mod_f();
auto dev = A->device;
auto pfr = tvm::runtime::Registry::Get("tvm.graph_executor.create");
ICHECK(mod.defined()) << "Module must be defined";
tvm::runtime::Module run_mod =
(*pfr)(json, mod, static_cast<int>(dev.device_type), dev.device_id);
// get function
auto set_input_f = run_mod.GetFunction("set_input_zero_copy", false);
auto set_output_f = run_mod.GetFunction("set_output_zero_copy", false);
auto run_f = run_mod.GetFunction("run", false);
// set input zero copy
set_input_f("a", const_cast<DLTensor*>(A.operator->()));
set_input_f("b", const_cast<DLTensor*>(B.operator->()));
set_input_f("c", const_cast<DLTensor*>(C.operator->()));
// set output zero copy
set_output_f(0, const_cast<DLTensor*>(Y.operator->()));
run_f();
// check correctness
for (int i = 0; i < 6; ++i) {
ICHECK_LT(fabs(pY[i] - (i + (i + 1) + (i + 2))), 1e-4);
}
// mutate the input a bit and run it again
for (int i = 0; i < 6; ++i) {
pB[i] = i + 3;
}
run_f();
// check correctness
for (int i = 0; i < 6; ++i) {
ICHECK_LT(fabs(pY[i] - (i + (i + 3) + (i + 2))), 1e-4);
}
// attach a different input and run it again
auto C2 = tvm::runtime::NDArray::Empty({2, 3}, {kDLFloat, 32, 1}, {kDLCPU, 0});
auto pC2 = static_cast<float*>(C2->data);
for (int i = 0; i < 6; ++i) {
pC2[i] = i + 4;
}
set_input_f("c", const_cast<DLTensor*>(C2.operator->()));
run_f();
// check correctness
for (int i = 0; i < 6; ++i) {
ICHECK_LT(fabs(pY[i] - (i + (i + 3) + (i + 4))), 1e-4);
}
}