Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[onert] Propagate shared memory operand indexes to cpu backend #14230

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion runtime/onert/backend/cpu/Backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ class Backend : public ::onert::backend::Backend
auto &graph = *data.graph;
auto context = std::make_unique<BackendContext>(this, std::move(data));
auto tr = std::make_shared<basic::TensorRegistry>();
auto tb = std::make_shared<TensorBuilder>(tr);
// TODO: Use findSharedMemoryOperandIndexes method here
auto tb = std::make_shared<TensorBuilder>(tr, ir::OperandIndexMap<ir::OperandIndex>{});
context->tensor_registry = tr;
context->tensor_builder = tb;
context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb, tr, custom_kernel_builder,
Expand Down
9 changes: 7 additions & 2 deletions runtime/onert/backend/cpu/BackendContext.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,11 @@ namespace backend
namespace cpu
{

ITensorRegistry *BackendContext::genTensors() { return basic::genTensors(*this); }
ITensorRegistry *BackendContext::genTensors()
{
return basic::genTensors(tensor_builder, *graph(), external_operands(), tensor_registry,
data().op_order, tensor_builder->getSharedMemoryOperandIndexes());
}

FunctionMap BackendContext::genKernels()
{
Expand All @@ -43,7 +47,8 @@ FunctionMap BackendContext::genKernels()
ret.emplace(op_ind, std::move(fn_seq));
}

basic::initConsts(*this);
basic::initConsts(graph()->operands(), external_operands(), tensor_registry.get(),
tensor_builder->getSharedMemoryOperandIndexes());

// NOTE For memory optimization, we want to free some operand data
const_cast<ir::Graph &>(*_data.graph)
Expand Down
47 changes: 28 additions & 19 deletions runtime/onert/core/include/backend/basic/BackendContextHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,19 +34,18 @@ namespace basic
{

// TODO Remove the template param BackendContext once unification of cpu backend context is done
template <typename T_BackendContext> void planTensors(const T_BackendContext &ctx)
template <typename T_TensorBuilder>
void planTensors(const std::shared_ptr<T_TensorBuilder> &tensor_builder, const ir::Graph &graph,
const util::Set<ir::OperandIndex> &external_operands,
const std::vector<onert::ir::OperationIndex> &op_order)
{
const ir::Graph &graph = *ctx.graph();
const auto &order = ctx.data().op_order;
auto tensor_builder = ctx.tensor_builder;

ir::OperandIndexMap<uint32_t> uses_map;
ir::OperandIndexMap<uint32_t> def_map;
ir::OperandIndexSequence constants;

// Prepare scanning
graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
if (ctx.external_operands().contains(ind))
if (external_operands.contains(ind))
return;

// TODO Check if we need to handle unused tensors
Expand Down Expand Up @@ -95,7 +94,7 @@ template <typename T_BackendContext> void planTensors(const T_BackendContext &ct
// 1. Scan DEF of outputs. If the DEF, allocate it
// 2. Scan DEF of inputs. If variable tensor, allocate it
// 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
for (const auto &op_ind : order)
for (const auto &op_ind : op_order)
{
const auto &op = graph.operations().at(op_ind);
auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
Expand All @@ -104,7 +103,7 @@ template <typename T_BackendContext> void planTensors(const T_BackendContext &ct
// Define outputs
for (const auto &ind : op_outputs)
{
if (ctx.external_operands().contains(ind))
if (external_operands.contains(ind))
continue;
if (!tensor_builder->isRegistered(ind))
continue;
Expand All @@ -121,7 +120,7 @@ template <typename T_BackendContext> void planTensors(const T_BackendContext &ct
// non-constant because of less memory usage by memory planning in here
for (const auto &ind : op_inputs)
{
if (ctx.external_operands().contains(ind))
if (external_operands.contains(ind))
continue;
if (!tensor_builder->isRegistered(ind))
continue;
Expand All @@ -138,7 +137,7 @@ template <typename T_BackendContext> void planTensors(const T_BackendContext &ct

for (const auto &ind : op_inputs)
{
if (ctx.external_operands().contains(ind))
if (external_operands.contains(ind))
continue;
if (!tensor_builder->isRegistered(ind))
continue;
Expand Down Expand Up @@ -177,21 +176,24 @@ template <typename T_BackendContext> void planTensors(const T_BackendContext &ct
[](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
}

template <typename T_BackendContext> ITensorRegistry *genTensors(T_BackendContext &ctx)
template <typename T_TensorBuilder>
ITensorRegistry *
genTensors(const std::shared_ptr<T_TensorBuilder> &tensor_builder, const ir::Graph &graph,
const util::Set<ir::OperandIndex> &external_operands,
const std::shared_ptr<ITensorRegistry> &tensor_registry,
const std::vector<onert::ir::OperationIndex> &op_order,
const ir::OperandIndexMap<ir::OperandIndex> & /*shared_memory_operand_idx*/)
{
const ir::Graph &graph = *ctx.graph();
auto tensor_builder = ctx.tensor_builder;

graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
if (ctx.external_operands().contains(ind))
if (external_operands.contains(ind))
return;
tensor_builder->registerTensorInfo(ind, obj.info());
});

// TODO Get compiler options from compiler, and use it rather than getting it from Env
if (util::getConfigString(util::config::EXECUTOR) == "Linear")
{
basic::planTensors(ctx);
basic::planTensors(tensor_builder, graph, external_operands, op_order);
}
else
{
Expand All @@ -205,12 +207,19 @@ template <typename T_BackendContext> ITensorRegistry *genTensors(T_BackendContex

tensor_builder->allocate();

return ctx.tensor_registry.get();
return tensor_registry.get();
}

template <typename T_BackendContext> ITensorRegistry *genTensors(T_BackendContext &ctx)
{
return genTensors(ctx.tensor_builder, *ctx.graph(), ctx.external_operands(), ctx.tensor_registry,
ctx.data().op_order, {});
}

inline void initConsts(const ir::Operands &operands,
const util::Set<ir::OperandIndex> &external_operands,
ITensorRegistry *tensor_registry)
ITensorRegistry *tensor_registry,
const ir::OperandIndexMap<ir::OperandIndex> & /*shared_memory_operands_map*/)
{
operands.iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) {
if (external_operands.contains(ind) || !operand.isConstant())
Expand All @@ -234,7 +243,7 @@ inline void initConsts(const ir::Operands &operands,

inline void initConsts(BackendContext &ctx)
{
initConsts(ctx.graph()->operands(), ctx.external_operands(), ctx.tensor_registry.get());
initConsts(ctx.graph()->operands(), ctx.external_operands(), ctx.tensor_registry.get(), {});
}

} // namespace basic
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,11 @@ class StaticTensorManager
{
public:
StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
DynamicTensorManager *dynamic_tensor_manager);
DynamicTensorManager *dynamic_tensor_manager,
const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operand_indexes);
StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg, const std::string planner_id,
DynamicTensorManager *dynamic_tensor_manager);
DynamicTensorManager *dynamic_tensor_manager,
const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operand_indexes);
virtual ~StaticTensorManager() = default;

void allocateNonconsts(void);
Expand All @@ -57,6 +59,7 @@ class StaticTensorManager
const std::shared_ptr<TensorRegistry> _tensors;
ir::OperandIndexMap<bool> _as_constants;
DynamicTensorManager *_dynamic_tensor_manager;
ir::OperandIndexMap<ir::OperandIndex> _shared_memory_operand_indexes;
};

} // namespace basic
Expand Down
9 changes: 7 additions & 2 deletions runtime/onert/core/include/backend/basic/TensorBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,10 @@ namespace basic
class TensorBuilder
{
public:
TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg);
TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg, const std::string planner_id);
TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg,
const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operand_indexes = {});
TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg, const std::string planner_id,
const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operand_indexes = {});

/**
* @brief Register tensor information to allocate on CPU backend
Expand All @@ -54,13 +56,16 @@ class TensorBuilder

void allocate(void);

const ir::OperandIndexMap<ir::OperandIndex> &getSharedMemoryOperandIndexes() const;

DynamicTensorManager *dynamicTensorManager(void) { return _dynamic_tensor_mgr.get(); }

private:
const std::shared_ptr<TensorRegistry> _tensor_reg;
std::unique_ptr<DynamicTensorManager> _dynamic_tensor_mgr;
std::unique_ptr<StaticTensorManager> _static_tensor_mgr;
ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
ir::OperandIndexMap<ir::OperandIndex> _shared_memory_operand_indexes;
};

} // namespace basic
Expand Down
18 changes: 11 additions & 7 deletions runtime/onert/core/src/backend/basic/StaticTensorManager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,23 @@ namespace backend
namespace basic
{

StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
DynamicTensorManager *dynamic_tensor_manager)
StaticTensorManager::StaticTensorManager(
const std::shared_ptr<TensorRegistry> &reg, DynamicTensorManager *dynamic_tensor_manager,
const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operand_indexes)
: _nonconst_mgr{new MemoryManager()}, _tensors{reg},
_dynamic_tensor_manager{dynamic_tensor_manager}
_dynamic_tensor_manager{dynamic_tensor_manager},
_shared_memory_operand_indexes{shared_memory_operand_indexes}
{
// DO NOTHING
}

StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
const std::string planner_id,
DynamicTensorManager *dynamic_tensor_manager)
StaticTensorManager::StaticTensorManager(
const std::shared_ptr<TensorRegistry> &reg, const std::string planner_id,
DynamicTensorManager *dynamic_tensor_manager,
const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operand_indexes)
: _nonconst_mgr{new MemoryManager(planner_id)}, _tensors{reg},
_dynamic_tensor_manager{dynamic_tensor_manager}
_dynamic_tensor_manager{dynamic_tensor_manager},
_shared_memory_operand_indexes{shared_memory_operand_indexes}
{
// DO NOTHING
}
Expand Down
21 changes: 16 additions & 5 deletions runtime/onert/core/src/backend/basic/TensorBuilder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,23 @@ namespace backend
namespace basic
{

TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg)
TensorBuilder::TensorBuilder(
const std::shared_ptr<TensorRegistry> &tensor_reg,
const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operand_indexes)
: _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg)},
_static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())}
_static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get(),
shared_memory_operand_indexes)},
_shared_memory_operand_indexes{shared_memory_operand_indexes}
{
/* empty */
}

TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg,
const std::string planner_id)
TensorBuilder::TensorBuilder(
const std::shared_ptr<TensorRegistry> &tensor_reg, const std::string planner_id,
const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operand_indexes)
: _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg)},
_static_tensor_mgr{new StaticTensorManager(_tensor_reg, planner_id, _dynamic_tensor_mgr.get())}
_static_tensor_mgr{new StaticTensorManager(_tensor_reg, planner_id, _dynamic_tensor_mgr.get(),
shared_memory_operand_indexes)}
{
/* empty */
}
Expand Down Expand Up @@ -83,6 +89,11 @@ bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const

void TensorBuilder::allocate(void) { _static_tensor_mgr->allocateNonconsts(); }

const ir::OperandIndexMap<ir::OperandIndex> &TensorBuilder::getSharedMemoryOperandIndexes() const
{
return _shared_memory_operand_indexes;
}

} // namespace basic
} // namespace backend
} // namespace onert
4 changes: 2 additions & 2 deletions runtime/onert/core/src/backend/builtin/TensorBuilder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ namespace builtin

TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg)
: _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg->base_reg())},
_static_tensor_mgr{
new basic::StaticTensorManager(_tensor_reg->base_reg(), _dynamic_tensor_mgr.get())}
_static_tensor_mgr{new basic::StaticTensorManager(
_tensor_reg->base_reg(), _dynamic_tensor_mgr.get(), ir::OperandIndexMap<ir::OperandIndex>{})}
{
/* empty */
}
Expand Down