diff --git a/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst b/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst index 4f2ea2f2a43..df47427a3df 100644 --- a/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst +++ b/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst @@ -67,6 +67,7 @@ REGISTER_KERNEL(ResizeBilinear) REGISTER_KERNEL(ResizeNearestNeighbor) REGISTER_KERNEL(ReverseV2) REGISTER_KERNEL(RmsNorm) +REGISTER_KERNEL(RoPE) REGISTER_KERNEL(Rsqrt) REGISTER_KERNEL(Select) REGISTER_KERNEL(SelectV2) diff --git a/compiler/luci-interpreter/src/core/KernelParams.h b/compiler/luci-interpreter/src/core/KernelParams.h index 100a09b7544..cc6a83e08c7 100644 --- a/compiler/luci-interpreter/src/core/KernelParams.h +++ b/compiler/luci-interpreter/src/core/KernelParams.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -32,6 +33,7 @@ namespace luci_interpreter using Activation = luci::FusedActFunc; using Padding = luci::Padding; using MirrorPadMode = luci::MirrorPadMode; +using RoPEMode = luci::RoPEMode; struct AddParams { @@ -186,6 +188,11 @@ struct RmsNormParams float epsilon; }; +struct RoPEParams +{ + RoPEMode mode; +}; + struct ShapeParams { loco::DataType out_type; diff --git a/compiler/luci-interpreter/src/kernels/RoPE.cpp b/compiler/luci-interpreter/src/kernels/RoPE.cpp new file mode 100644 index 00000000000..f86ade6817d --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/RoPE.cpp @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/RoPE.h" + +#include "kernels/Utils.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +RoPE::RoPE(const Tensor *input, const Tensor *sin_table, const Tensor *cos_table, Tensor *output, + const RoPEParams ¶ms) + : KernelWithParams({input, sin_table, cos_table}, {output}, params) +{ +} + +void RoPE::configure() +{ + LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4); + LUCI_INTERPRETER_CHECK(sin_table()->shape().dim(3) == input()->shape().dim(3)); + LUCI_INTERPRETER_CHECK(cos_table()->shape().dim(3) == input()->shape().dim(3)); + + LUCI_INTERPRETER_CHECK(params().mode == RoPEMode::GPT_NEOX); + + output()->resize(input()->shape()); +} + +void RoPE::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + default: + throw std::runtime_error("luci-rope Unsupported data type."); + } +} + +void RoPE::evalFloat() const +{ + const auto input_shape = getTensorShape(input()); + const auto sin_table_shape = getTensorShape(sin_table()); + const auto cos_table_shape = getTensorShape(cos_table()); + auto output_shape = getTensorShape(output()); + + const float *input_data = getTensorData(input()); + const float *sin_table_data = getTensorData(sin_table()); + const float *cos_table_data = getTensorData(cos_table()); + float *output_data = getTensorData(output()); + + if (params().mode == RoPEMode::GPT_NEOX) + { + const int32_t i0_n = input_shape.Dims(0); + const int32_t i1_n = input_shape.Dims(1); // multihead + const int32_t i2_n = input_shape.Dims(2); + const int32_t i3_n = input_shape.Dims(3); // head + + for (int32_t i0 = 0; i0 < i0_n; ++i0) + { + for (int32_t i1 = 0; i1 < i1_n; ++i1) + { + for (int32_t i2 = 0; i2 < i2_n; ++i2) + { + for (int32_t i3 = 0; i3 < i3_n / 2; ++i3) + { + const int32_t offset = tflite::Offset(input_shape, i0, i1, i2, i3); + const float x0 = input_data[offset]; + const float x1 = input_data[offset + i3_n / 2]; + + output_data[offset] = x0 * cos_table_data[i3] - x1 * sin_table_data[i3]; + output_data[offset + i3_n / 2] = + x0 * sin_table_data[i3 + i3_n / 2] + x1 * cos_table_data[i3 + i3_n / 2]; + } + } + } + } + } + else + throw std::runtime_error("luci-intp RoPE unsupported mode."); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/RoPE.h b/compiler/luci-interpreter/src/kernels/RoPE.h new file mode 100644 index 00000000000..5576ece5e11 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/RoPE.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_ROPE_H +#define LUCI_INTERPRETER_KERNELS_ROPE_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class RoPE : public KernelWithParams +{ +public: + RoPE(const Tensor *input, const Tensor *sin_table, const Tensor *cos_table, Tensor *output, + const RoPEParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *sin_table() const { return _inputs[1]; } + const Tensor *cos_table() const { return _inputs[2]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_ROPE_H diff --git a/compiler/luci-interpreter/src/kernels/RoPE.test.cpp b/compiler/luci-interpreter/src/kernels/RoPE.test.cpp new file mode 100644 index 00000000000..e3ab10abde7 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/RoPE.test.cpp @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/RoPE.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class RoPETest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique(); } + + std::unique_ptr _memory_manager; +}; + +TEST_F(RoPETest, floatTest) +{ + Shape input_shape{1, 1, 1, 4}; + std::vector input_data{0, 1.0, 2.0, 3.0}; + + Shape sin_shape{1, 1, 1, 4}; + std::vector sin_data{0.5, 1.0, 1.0, 0.5}; + + Shape cos_shape{1, 1, 1, 4}; + std::vector cos_data{1.0, 0.5, 0.5, 1.0}; + + Shape ref_output_shape{1, 1, 1, 4}; + std::vector ref_output_data{-1.0, -2.5, 1.0, 3.5}; + + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Tensor sin_table = makeInputTensor(sin_shape, sin_data, _memory_manager.get()); + Tensor cos_table = makeInputTensor(cos_shape, cos_data, _memory_manager.get()); + + RoPEParams params{}; + params.mode = RoPEMode::GPT_NEOX; + + RoPE kernel(&input_tensor, &sin_table, &cos_table, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 1, 4})); +} + +TEST_F(RoPETest, Unsupported_dims_NEG) +{ + Shape input_shape{1, 1, 3}; + std::vector input_data{0, 1.0, 2.0}; + + Shape sin_shape{1, 1, 3}; + std::vector sin_data{0.5, 1.0, 1.0}; + + Shape cos_shape{1, 1, 3}; + std::vector cos_data{1.0, 0.5, 0.5}; + + Shape ref_output_shape{1, 1, 3}; + std::vector ref_output_data{-1.0, -2.5, 1.0}; + + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Tensor sin_table = makeInputTensor(sin_shape, sin_data, _memory_manager.get()); + Tensor cos_table = makeInputTensor(cos_shape, cos_data, _memory_manager.get()); + + RoPEParams params{}; + params.mode = RoPEMode::GPT_NEOX; + + RoPE kernel(&input_tensor, &sin_table, &cos_table, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(RoPETest, Unsupported_mode_NEG) +{ + Shape input_shape{1, 1, 1, 4}; + std::vector input_data{0, 1.0, 2.0, 3.0}; + + Shape sin_shape{1, 1, 1, 4}; + std::vector sin_data{0.5, 1.0, 1.0, 0.5}; + + Shape cos_shape{1, 1, 1, 4}; + std::vector cos_data{1.0, 0.5, 0.5, 1.0}; + + Shape ref_output_shape{1, 1, 1, 4}; + std::vector ref_output_data{-1.0, -2.5, 1.0, 3.5}; + + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Tensor sin_table = makeInputTensor(sin_shape, sin_data, _memory_manager.get()); + Tensor cos_table = makeInputTensor(cos_shape, cos_data, _memory_manager.get()); + + RoPEParams params{}; + params.mode = RoPEMode::GPT_J; + + RoPE kernel(&input_tensor, &sin_table, &cos_table, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(RoPETest, Invalid_input_sin_table_NEG) +{ + Shape input_shape{1, 1, 1, 4}; + std::vector input_data{0, 1.0, 2.0, 3.0}; + + Shape sin_shape{1, 1, 1, 3}; + std::vector sin_data{0.5, 1.0, 1.0}; + + Shape cos_shape{1, 1, 1, 4}; + std::vector cos_data{1.0, 0.5, 0.5, 1.0}; + + Shape ref_output_shape{1, 1, 1, 4}; + std::vector ref_output_data{-1.0, -2.5, 1.0, 3.5}; + + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Tensor sin_table = makeInputTensor(sin_shape, sin_data, _memory_manager.get()); + Tensor cos_table = makeInputTensor(cos_shape, cos_data, _memory_manager.get()); + + RoPEParams params{}; + params.mode = RoPEMode::GPT_NEOX; + + RoPE kernel(&input_tensor, &sin_table, &cos_table, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(RoPETest, Invalid_input_cos_table_NEG) +{ + Shape input_shape{1, 1, 1, 4}; + std::vector input_data{0, 1.0, 2.0, 3.0}; + + Shape sin_shape{1, 1, 1, 4}; + std::vector sin_data{0.5, 1.0, 1.0, 0.5}; + + Shape cos_shape{1, 1, 1, 3}; + std::vector cos_data{1.0, 0.5, 0.5}; + + Shape ref_output_shape{1, 1, 1, 4}; + std::vector ref_output_data{-1.0, -2.5, 1.0, 3.5}; + + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Tensor sin_table = makeInputTensor(sin_shape, sin_data, _memory_manager.get()); + Tensor cos_table = makeInputTensor(cos_shape, cos_data, _memory_manager.get()); + + RoPEParams params{}; + params.mode = RoPEMode::GPT_NEOX; + + RoPE kernel(&input_tensor, &sin_table, &cos_table, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp b/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp index 3e05da81e45..0a3c30fff1f 100644 --- a/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp +++ b/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp @@ -69,6 +69,7 @@ #include #include #include +#include #include #include #include @@ -1118,6 +1119,26 @@ TEST_F(KernelBuilderTest, RmsNorm) EXPECT_THAT(kernel->params().epsilon, Eq(op->epsilon())); } +TEST_F(KernelBuilderTest, RoPE) +{ + auto *input = createInputNode(); + auto *sin_table = createInputNode(); + auto *cos_table = createInputNode(); + + auto *op = createNode(); + op->input(input); + op->sin_table(sin_table); + op->cos_table(cos_table); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->sin_table(), sin_table); + checkTensor(kernel->cos_table(), cos_table); + checkTensor(kernel->output(), op); +} + TEST_F(KernelBuilderTest, Rsqrt) { auto *input = createInputNode(); diff --git a/compiler/luci-interpreter/src/loader/nodes/RoPE.cpp b/compiler/luci-interpreter/src/loader/nodes/RoPE.cpp new file mode 100644 index 00000000000..722015d6d2d --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/RoPE.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/RoPE.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleRoPE(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 3); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *sin_table = helper.getInputTensor(node->sin_table()); + const Tensor *cos_table = helper.getInputTensor(node->cos_table()); + + Tensor *output = helper.getOutputTensor(node); + + RoPEParams params{}; + params.mode = node->mode(); + + return std::make_unique(input, sin_table, cos_table, output, params); +} + +} // namespace luci_interpreter