From 279ecd67a6ec565d86adad8efb127ceec65dc2f3 Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Thu, 20 Jan 2022 14:57:41 -0800 Subject: [PATCH 01/13] wip --- apps/fft/fft_aot_test.cpp | 4 +- apps/hannk/util/buffer_util.h | 2 +- python_bindings/src/PyBuffer.cpp | 2 +- src/Argument.h | 6 +- src/Buffer.h | 207 +++++------ src/Closure.h | 6 +- src/Generator.cpp | 2 +- src/Generator.h | 74 +++- src/Module.h | 2 +- src/Parameter.h | 3 +- src/RDom.h | 10 +- src/Realization.h | 4 +- src/runtime/HalideBuffer.h | 352 +++++++++++-------- test/correctness/halide_buffer.cpp | 82 ++++- test/generator/metadata_tester_generator.cpp | 38 +- 15 files changed, 496 insertions(+), 298 deletions(-) diff --git a/apps/fft/fft_aot_test.cpp b/apps/fft/fft_aot_test.cpp index 3be992a9b8c9..042bdb1158d1 100644 --- a/apps/fft/fft_aot_test.cpp +++ b/apps/fft/fft_aot_test.cpp @@ -61,7 +61,7 @@ int main(int argc, char **argv) { auto in = real_buffer(); for (int j = 0; j < kSize; j++) { for (int i = 0; i < kSize; i++) { - in(i, j) = signal_1d[i] + signal_1d[j]; + in(i, j, 0) = signal_1d[i] + signal_1d[j]; } } @@ -155,7 +155,7 @@ int main(int argc, char **argv) { for (size_t j = 0; j < kSize; j++) { for (size_t i = 0; i < kSize; i++) { - float sample = out(i, j); + float sample = out(i, j, 0); float expected = cos(2 * kPi * (i / 16.0f + .125f)); if (fabs(sample - expected) > .001) { std::cerr << "fft_inverse_c2r mismatch at (" << i << ", " << j << ") " << sample << " vs. " << expected << "\n"; diff --git a/apps/hannk/util/buffer_util.h b/apps/hannk/util/buffer_util.h index 85ff6bdc73da..22d71367b8b3 100644 --- a/apps/hannk/util/buffer_util.h +++ b/apps/hannk/util/buffer_util.h @@ -14,7 +14,7 @@ namespace hannk { // Using a Buffer with space for max_rank dimensions is a meaningful // win for some corner cases (when adding dimensions to > 4). template -using HalideBuffer = Halide::Runtime::Buffer; +using HalideBuffer = Halide::Runtime::Buffer::DynamicDims, max_rank>; // dynamic_type_dispatch is a utility for functors that want to be able // to dynamically dispatch a halide_type_t to type-specialized code. diff --git a/python_bindings/src/PyBuffer.cpp b/python_bindings/src/PyBuffer.cpp index 14f7c418359b..4391ecb9f367 100644 --- a/python_bindings/src/PyBuffer.cpp +++ b/python_bindings/src/PyBuffer.cpp @@ -393,7 +393,7 @@ void define_buffer(py::module &m) { py::arg("dirty") = true) .def("copy", &Buffer<>::copy) - .def("copy_from", &Buffer<>::copy_from) + .def("copy_from", &Buffer<>::copy_from::DynamicDims>) .def("add_dimension", (void (Buffer<>::*)()) & Buffer<>::add_dimension) diff --git a/src/Argument.h b/src/Argument.h index 99f6a2bee458..5f33057b6f41 100644 --- a/src/Argument.h +++ b/src/Argument.h @@ -12,7 +12,7 @@ namespace Halide { -template +template class Buffer; struct ArgumentEstimates { @@ -78,8 +78,8 @@ struct Argument { // Not explicit, so that you can put Buffer in an argument list, // to indicate that it shouldn't be baked into the object file, // but instead received as an argument at runtime - template - Argument(Buffer im) + template + Argument(Buffer im) : name(im.name()), kind(InputBuffer), dimensions(im.dimensions()), diff --git a/src/Buffer.h b/src/Buffer.h index ea37014bec46..a96bc1486e05 100644 --- a/src/Buffer.h +++ b/src/Buffer.h @@ -8,7 +8,7 @@ namespace Halide { -template +template class Buffer; struct JITUserContext; @@ -108,20 +108,23 @@ std::string buffer_type_name() { /** A Halide::Buffer is a named shared reference to a * Halide::Runtime::Buffer. * - * A Buffer can refer to a Buffer if T1 is const whenever T2 - * is const, and either T1 = T2 or T1 is void. A Buffer can + * A Buffer can refer to a Buffer if T1 is const whenever T2 + * is const, and either T1 = T2 or T1 is void. A Buffer can * refer to any Buffer of any non-const type, and the default * template parameter is T = void. + * + * A Buffer can refer to a Buffer if D1 == D2, + * or if D1 is -1 (meaning "dimensionality is checked at runtime, not compiletime"). */ -template +template class Buffer { Internal::IntrusivePtr contents; - template + template friend class Buffer; - template - static void assert_can_convert_from(const Buffer &other) { + template + static void assert_can_convert_from(const Buffer &other) { if (!other.defined()) { // Avoid UB of deferencing offset of a null contents ptr static_assert((!std::is_const::value || std::is_const::value), @@ -131,6 +134,8 @@ class Buffer { std::is_void::value || std::is_void::value, "type mismatch constructing Buffer"); + static_assert(Dims == DynamicDims || D2 == DynamicDims || Dims == D2, + "Can't convert from a Buffer with static dimensionality to a Buffer with different static dimensionality"); } else { // Don't delegate to // Runtime::Buffer::assert_can_convert_from. It might @@ -139,7 +144,8 @@ class Buffer { // debugging symbols are found, it throws an exception // when exceptions are enabled, and we can print the // actual types in question. - user_assert(Runtime::Buffer::can_convert_from(*(other.get()))) + using BufType = Runtime::Buffer; // alias because commas in user_assert() macro confuses compiler + user_assert(BufType::can_convert_from(*(other.get()))) << "Type mismatch constructing Buffer. Can't construct Buffer<" << Internal::buffer_type_name() << "> from Buffer<" << type_to_c_type(other.type(), false) << ">\n"; @@ -147,6 +153,9 @@ class Buffer { } public: + static constexpr int DynamicDims = -1; + static_assert(Dims == DynamicDims || Dims >= 0); + typedef T ElemType; // This class isn't final (and is subclassed from the Python binding @@ -166,22 +175,22 @@ class Buffer { Buffer &operator=(Buffer &&) noexcept = default; /** Make a Buffer from a Buffer of a different type */ - template - Buffer(const Buffer &other) + template + Buffer(const Buffer &other) : contents(other.contents) { assert_can_convert_from(other); } /** Move construct from a Buffer of a different type */ - template - Buffer(Buffer &&other) noexcept { + template + Buffer(Buffer &&other) noexcept { assert_can_convert_from(other); contents = std::move(other.contents); } /** Construct a Buffer that captures and owns an rvalue Runtime::Buffer */ - template - Buffer(Runtime::Buffer &&buf, const std::string &name = "") + template + Buffer(Runtime::Buffer &&buf, const std::string &name = "") : contents(new Internal::BufferContents) { contents->buf = std::move(buf); if (name.empty()) { @@ -200,50 +209,50 @@ class Buffer { typename = typename std::enable_if::value>::type> explicit Buffer(Type t, int first, Args... rest) - : Buffer(Runtime::Buffer(t, Internal::get_shape_from_start_of_parameter_pack(first, rest...)), + : Buffer(Runtime::Buffer(t, Internal::get_shape_from_start_of_parameter_pack(first, rest...)), Internal::get_name_from_end_of_parameter_pack(rest...)) { } explicit Buffer(const halide_buffer_t &buf, const std::string &name = "") - : Buffer(Runtime::Buffer(buf), name) { + : Buffer(Runtime::Buffer(buf), name) { } template::value>::type> explicit Buffer(int first, Args... rest) - : Buffer(Runtime::Buffer(Internal::get_shape_from_start_of_parameter_pack(first, rest...)), + : Buffer(Runtime::Buffer(Internal::get_shape_from_start_of_parameter_pack(first, rest...)), Internal::get_name_from_end_of_parameter_pack(rest...)) { } explicit Buffer(Type t, const std::vector &sizes, const std::string &name = "") - : Buffer(Runtime::Buffer(t, sizes), name) { + : Buffer(Runtime::Buffer(t, sizes), name) { } explicit Buffer(Type t, const std::vector &sizes, const std::vector &storage_order, const std::string &name = "") - : Buffer(Runtime::Buffer(t, sizes, storage_order), name) { + : Buffer(Runtime::Buffer(t, sizes, storage_order), name) { } explicit Buffer(const std::vector &sizes, const std::string &name = "") - : Buffer(Runtime::Buffer(sizes), name) { + : Buffer(Runtime::Buffer(sizes), name) { } explicit Buffer(const std::vector &sizes, const std::vector &storage_order, const std::string &name = "") - : Buffer(Runtime::Buffer(sizes, storage_order), name) { + : Buffer(Runtime::Buffer(sizes, storage_order), name) { } template explicit Buffer(Array (&vals)[N], const std::string &name = "") - : Buffer(Runtime::Buffer(vals), name) { + : Buffer(Runtime::Buffer(vals), name) { } template *data, int first, Args &&...rest) - : Buffer(Runtime::Buffer(t, data, Internal::get_shape_from_start_of_parameter_pack(first, rest...)), + : Buffer(Runtime::Buffer(t, data, Internal::get_shape_from_start_of_parameter_pack(first, rest...)), Internal::get_name_from_end_of_parameter_pack(rest...)) { } @@ -261,28 +270,28 @@ class Buffer { Internal::add_const_if_T_is_const *data, const std::vector &sizes, const std::string &name = "") - : Buffer(Runtime::Buffer(t, data, sizes, name)) { + : Buffer(Runtime::Buffer(t, data, sizes, name)) { } template::value>::type> explicit Buffer(T *data, int first, Args &&...rest) - : Buffer(Runtime::Buffer(data, Internal::get_shape_from_start_of_parameter_pack(first, rest...)), + : Buffer(Runtime::Buffer(data, Internal::get_shape_from_start_of_parameter_pack(first, rest...)), Internal::get_name_from_end_of_parameter_pack(rest...)) { } explicit Buffer(T *data, const std::vector &sizes, const std::string &name = "") - : Buffer(Runtime::Buffer(data, sizes), name) { + : Buffer(Runtime::Buffer(data, sizes), name) { } explicit Buffer(Type t, Internal::add_const_if_T_is_const *data, const std::vector &sizes, const std::string &name = "") - : Buffer(Runtime::Buffer(t, data, sizes), name) { + : Buffer(Runtime::Buffer(t, data, sizes), name) { } explicit Buffer(Type t, @@ -290,66 +299,60 @@ class Buffer { int d, const halide_dimension_t *shape, const std::string &name = "") - : Buffer(Runtime::Buffer(t, data, d, shape), name) { + : Buffer(Runtime::Buffer(t, data, d, shape), name) { } explicit Buffer(T *data, int d, const halide_dimension_t *shape, const std::string &name = "") - : Buffer(Runtime::Buffer(data, d, shape), name) { + : Buffer(Runtime::Buffer(data, d, shape), name) { } - static Buffer make_scalar(const std::string &name = "") { - return Buffer(Runtime::Buffer::make_scalar(), name); + static Buffer make_scalar(const std::string &name = "") { + return Buffer(Runtime::Buffer::make_scalar(), name); } static Buffer<> make_scalar(Type t, const std::string &name = "") { return Buffer<>(Runtime::Buffer<>::make_scalar(t), name); } - static Buffer make_scalar(T *data, const std::string &name = "") { - return Buffer(Runtime::Buffer::make_scalar(data), name); + static Buffer make_scalar(T *data, const std::string &name = "") { + return Buffer(Runtime::Buffer::make_scalar(data), name); } - static Buffer make_interleaved(int width, int height, int channels, const std::string &name = "") { - return Buffer(Runtime::Buffer::make_interleaved(width, height, channels), - name); + static Buffer make_interleaved(int width, int height, int channels, const std::string &name = "") { + return Buffer(Runtime::Buffer::make_interleaved(width, height, channels), name); } static Buffer<> make_interleaved(Type t, int width, int height, int channels, const std::string &name = "") { - return Buffer<>(Runtime::Buffer<>::make_interleaved(t, width, height, channels), - name); + return Buffer<>(Runtime::Buffer<>::make_interleaved(t, width, height, channels), name); } - static Buffer make_interleaved(T *data, int width, int height, int channels, const std::string &name = "") { - return Buffer(Runtime::Buffer::make_interleaved(data, width, height, channels), - name); + static Buffer make_interleaved(T *data, int width, int height, int channels, const std::string &name = "") { + return Buffer(Runtime::Buffer::make_interleaved(data, width, height, channels), name); } static Buffer> make_interleaved(Type t, T *data, int width, int height, int channels, const std::string &name = "") { using T2 = Internal::add_const_if_T_is_const; - return Buffer(Runtime::Buffer::make_interleaved(t, data, width, height, channels), - name); + return Buffer(Runtime::Buffer::make_interleaved(t, data, width, height, channels), name); } - template - static Buffer make_with_shape_of(Buffer src, - void *(*allocate_fn)(size_t) = nullptr, - void (*deallocate_fn)(void *) = nullptr, - const std::string &name = "") { - return Buffer(Runtime::Buffer::make_with_shape_of(*src.get(), allocate_fn, deallocate_fn), - name); + template + static Buffer make_with_shape_of(Buffer src, + void *(*allocate_fn)(size_t) = nullptr, + void (*deallocate_fn)(void *) = nullptr, + const std::string &name = "") { + return Buffer(Runtime::Buffer::make_with_shape_of(*src.get(), allocate_fn, deallocate_fn), name); } - template - static Buffer make_with_shape_of(const Runtime::Buffer &src, - void *(*allocate_fn)(size_t) = nullptr, - void (*deallocate_fn)(void *) = nullptr, - const std::string &name = "") { - return Buffer(Runtime::Buffer::make_with_shape_of(src, allocate_fn, deallocate_fn), - name); + template + static Buffer make_with_shape_of(const Runtime::Buffer &src, + void *(*allocate_fn)(size_t) = nullptr, + void (*deallocate_fn)(void *) = nullptr, + const std::string &name = "") { + return Buffer(Runtime::Buffer::make_with_shape_of(src, allocate_fn, deallocate_fn), name); } // @} @@ -365,8 +368,8 @@ class Buffer { // @} /** Check if two Buffer objects point to the same underlying Buffer */ - template - bool same_as(const Buffer &other) const { + template + bool same_as(const Buffer &other) const { return (const void *)(contents.get()) == (const void *)(other.contents.get()); } @@ -379,28 +382,28 @@ class Buffer { /** Get a pointer to the underlying Runtime::Buffer */ // @{ - Runtime::Buffer *get() { + Runtime::Buffer *get() { // It's already type-checked, so no need to use as. - return (Runtime::Buffer *)(&contents->buf); + return (Runtime::Buffer *)(&contents->buf); } - const Runtime::Buffer *get() const { - return (const Runtime::Buffer *)(&contents->buf); + const Runtime::Buffer *get() const { + return (const Runtime::Buffer *)(&contents->buf); } // @} // We forward numerous methods from the underlying Buffer -#define HALIDE_BUFFER_FORWARD_CONST(method) \ - template \ - auto method(Args &&...args) const->decltype(std::declval>().method(std::forward(args)...)) { \ - user_assert(defined()) << "Undefined buffer calling const method " #method "\n"; \ - return get()->method(std::forward(args)...); \ +#define HALIDE_BUFFER_FORWARD_CONST(method) \ + template \ + auto method(Args &&...args) const->decltype(std::declval>().method(std::forward(args)...)) { \ + user_assert(defined()) << "Undefined buffer calling const method " #method "\n"; \ + return get()->method(std::forward(args)...); \ } -#define HALIDE_BUFFER_FORWARD(method) \ - template \ - auto method(Args &&...args)->decltype(std::declval>().method(std::forward(args)...)) { \ - user_assert(defined()) << "Undefined buffer calling method " #method "\n"; \ - return get()->method(std::forward(args)...); \ +#define HALIDE_BUFFER_FORWARD(method) \ + template \ + auto method(Args &&...args)->decltype(std::declval>().method(std::forward(args)...)) { \ + user_assert(defined()) << "Undefined buffer calling method " #method "\n"; \ + return get()->method(std::forward(args)...); \ } // This is a weird-looking but effective workaround for a deficiency in "perfect forwarding": @@ -413,10 +416,10 @@ class Buffer { // and forward it as is, we can just use ... to allow an arbitrary number of commas, // then use __VA_ARGS__ to forward the mess as-is, and while it looks horrible, it // works. -#define HALIDE_BUFFER_FORWARD_INITIALIZER_LIST(method, ...) \ - inline auto method(const __VA_ARGS__ &a)->decltype(std::declval>().method(a)) { \ - user_assert(defined()) << "Undefined buffer calling method " #method "\n"; \ - return get()->method(a); \ +#define HALIDE_BUFFER_FORWARD_INITIALIZER_LIST(method, ...) \ + inline auto method(const __VA_ARGS__ &a)->decltype(std::declval>().method(a)) { \ + user_assert(defined()) << "Undefined buffer calling method " #method "\n"; \ + return get()->method(a); \ } /** Does the same thing as the equivalent Halide::Runtime::Buffer method */ @@ -475,44 +478,50 @@ class Buffer { #undef HALIDE_BUFFER_FORWARD_CONST template - Buffer &for_each_value(Fn &&f, Args... other_buffers) { + Buffer &for_each_value(Fn &&f, Args... other_buffers) { get()->for_each_value(std::forward(f), (*std::forward(other_buffers).get())...); return *this; } template - const Buffer &for_each_value(Fn &&f, Args... other_buffers) const { + const Buffer &for_each_value(Fn &&f, Args... other_buffers) const { get()->for_each_value(std::forward(f), (*std::forward(other_buffers).get())...); return *this; } template - Buffer &for_each_element(Fn &&f) { + Buffer &for_each_element(Fn &&f) { get()->for_each_element(std::forward(f)); return *this; } template - const Buffer &for_each_element(Fn &&f) const { + const Buffer &for_each_element(Fn &&f) const { get()->for_each_element(std::forward(f)); return *this; } template - Buffer &fill(FnOrValue &&f) { + Buffer &fill(FnOrValue &&f) { get()->fill(std::forward(f)); return *this; } - static constexpr bool has_static_halide_type = Runtime::Buffer::has_static_halide_type; + static constexpr bool has_static_halide_type = Runtime::Buffer::has_static_halide_type; static halide_type_t static_halide_type() { - return Runtime::Buffer::static_halide_type(); + return Runtime::Buffer::static_halide_type(); } - template - static bool can_convert_from(const Buffer &other) { - return Halide::Runtime::Buffer::can_convert_from(*other.get()); + static constexpr bool has_static_dimensions = Runtime::Buffer::has_static_dimensions; + + static int static_dimensions() { + return Runtime::Buffer::static_dimensions(); + } + + template + static bool can_convert_from(const Buffer &other) { + return Halide::Runtime::Buffer::can_convert_from(*other.get()); } // Note that since Runtime::Buffer stores halide_type_t rather than Halide::Type, @@ -524,42 +533,42 @@ class Buffer { } template - Buffer as() const { - return Buffer(*this); + Buffer as() const { + return Buffer(*this); } - Buffer copy() const { - return Buffer(std::move(contents->buf.as().copy())); + Buffer copy() const { + return Buffer(std::move(contents->buf.as().copy())); } - template - void copy_from(const Buffer &other) { + template + void copy_from(const Buffer &other) { contents->buf.copy_from(*other.get()); } template - auto operator()(int first, Args &&...args) -> decltype(std::declval>()(first, std::forward(args)...)) { + auto operator()(int first, Args &&...args) -> decltype(std::declval>()(first, std::forward(args)...)) { return (*get())(first, std::forward(args)...); } template - auto operator()(int first, Args &&...args) const -> decltype(std::declval>()(first, std::forward(args)...)) { + auto operator()(int first, Args &&...args) const -> decltype(std::declval>()(first, std::forward(args)...)) { return (*get())(first, std::forward(args)...); } - auto operator()(const int *pos) -> decltype(std::declval>()(pos)) { + auto operator()(const int *pos) -> decltype(std::declval>()(pos)) { return (*get())(pos); } - auto operator()(const int *pos) const -> decltype(std::declval>()(pos)) { + auto operator()(const int *pos) const -> decltype(std::declval>()(pos)) { return (*get())(pos); } - auto operator()() -> decltype(std::declval>()()) { + auto operator()() -> decltype(std::declval>()()) { return (*get())(); } - auto operator()() const -> decltype(std::declval>()()) { + auto operator()() const -> decltype(std::declval>()()) { return (*get())(); } // @} diff --git a/src/Closure.h b/src/Closure.h index 85b23a1cb31c..8be9831465d5 100644 --- a/src/Closure.h +++ b/src/Closure.h @@ -8,15 +8,13 @@ #include #include +#include "Buffer.h" #include "IR.h" #include "IRVisitor.h" #include "Scope.h" namespace Halide { -template -class Buffer; - namespace Internal { /** A helper class to manage closures. Walks over a statement and @@ -66,7 +64,7 @@ class Closure : public IRVisitor { protected: void found_buffer_ref(const std::string &name, Type type, - bool read, bool written, const Halide::Buffer &image); + bool read, bool written, const Halide::Buffer<> &image); public: Closure() = default; diff --git a/src/Generator.cpp b/src/Generator.cpp index 4d3ac8cf91f0..a4181887bcaf 100644 --- a/src/Generator.cpp +++ b/src/Generator.cpp @@ -2259,7 +2259,7 @@ void generator_test() { Input input_func_typed{"input_func_typed", Int(16), 1}; Input input_func_untyped{"input_func_untyped", 1}; Input input_func_array{"input_func_array", 1}; - Input> input_buffer_typed{"input_buffer_typed", 3}; + Input> input_buffer_typed{"input_buffer_typed"}; Input> input_buffer_untyped{"input_buffer_untyped"}; Output output{"output", Float(32), 1}; diff --git a/src/Generator.h b/src/Generator.h index 47653439c107..e10171b1cbf2 100644 --- a/src/Generator.h +++ b/src/Generator.h @@ -283,10 +283,6 @@ #endif namespace Halide { - -template -class Buffer; - namespace Internal { void generator_test(); @@ -1679,16 +1675,20 @@ class GeneratorInput_Buffer : public GeneratorInputImpl { explicit GeneratorInput_Buffer(const std::string &name) : Super(name, IOKind::Buffer, TBase::has_static_halide_type ? std::vector{TBase::static_halide_type()} : std::vector{}, - -1) { + TBase::has_static_dimensions ? TBase::static_dimensions() : -1) { } GeneratorInput_Buffer(const std::string &name, const Type &t, int d = -1) : Super(name, IOKind::Buffer, {t}, d) { static_assert(!TBase::has_static_halide_type, "You can only specify a Type argument for Input> if T is void or omitted."); + static_assert(!TBase::has_static_dimensions, "You can only specify a dimension argument for Input> if D is -1 or omitted."); } GeneratorInput_Buffer(const std::string &name, int d) - : Super(name, IOKind::Buffer, TBase::has_static_halide_type ? std::vector{TBase::static_halide_type()} : std::vector{}, d) { + : Super(name, IOKind::Buffer, + TBase::has_static_halide_type ? std::vector{TBase::static_halide_type()} : std::vector{}, + d) { + static_assert(!TBase::has_static_dimensions, "You can only specify a dimension argument for Input> if D is -1 or omitted."); } template @@ -2480,12 +2480,52 @@ class GeneratorOutput_Buffer : public GeneratorOutputImpl { return t; } - GeneratorOutput_Buffer(const std::string &name, const std::vector &t = {}, int d = -1) + explicit GeneratorOutput_Buffer(const std::string &name) + : Super(name, IOKind::Buffer, my_types({}), -1) { + } + + GeneratorOutput_Buffer(const std::string &name, const std::vector &t, int d) : Super(name, IOKind::Buffer, my_types(t), d) { + internal_assert(!t.empty()); + internal_assert(d != -1); + static_assert(!TBase::has_static_halide_type, "You can only specify a Type argument for Output> if T is void or omitted."); + static_assert(!TBase::has_static_dimensions, "You can only specify a dimension argument for Output> if D is -1 or omitted."); + } + + GeneratorOutput_Buffer(const std::string &name, const std::vector &t) + : Super(name, IOKind::Buffer, my_types(t), -1) { + internal_assert(!t.empty()); + static_assert(!TBase::has_static_halide_type, "You can only specify a Type argument for Output> if T is void or omitted."); + } + + GeneratorOutput_Buffer(const std::string &name, int d) + : Super(name, IOKind::Buffer, my_types({}), d) { + internal_assert(d != -1); + static_assert(!TBase::has_static_dimensions, "You can only specify a dimension argument for Output> if D is -1 or omitted."); + } + + GeneratorOutput_Buffer(size_t array_size, const std::string &name) + : Super(array_size, name, IOKind::Buffer, my_types({}), -1) { } - GeneratorOutput_Buffer(size_t array_size, const std::string &name, const std::vector &t = {}, int d = -1) + GeneratorOutput_Buffer(size_t array_size, const std::string &name, const std::vector &t, int d) : Super(array_size, name, IOKind::Buffer, my_types(t), d) { + internal_assert(!t.empty()); + internal_assert(d != -1); + static_assert(!TBase::has_static_halide_type, "You can only specify a Type argument for Output> if T is void or omitted."); + static_assert(!TBase::has_static_dimensions, "You can only specify a dimension argument for Output> if D is -1 or omitted."); + } + + GeneratorOutput_Buffer(size_t array_size, const std::string &name, const std::vector &t) + : Super(array_size, name, IOKind::Buffer, my_types(t), -1) { + internal_assert(!t.empty()); + static_assert(!TBase::has_static_halide_type, "You can only specify a Type argument for Output> if T is void or omitted."); + } + + GeneratorOutput_Buffer(size_t array_size, const std::string &name, int d) + : Super(array_size, name, IOKind::Buffer, my_types({}), d) { + internal_assert(d != -1); + static_assert(!TBase::has_static_dimensions, "You can only specify a dimension argument for Output> if D is -1 or omitted."); } HALIDE_NO_USER_CODE_INLINE std::string get_c_type() const override { @@ -2605,10 +2645,18 @@ class GeneratorOutput_Func : public GeneratorOutputImpl { : Super(name, IOKind::Function, std::vector{}, -1) { } - GeneratorOutput_Func(const std::string &name, const std::vector &t, int d = -1) + GeneratorOutput_Func(const std::string &name, const std::vector &t, int d) : Super(name, IOKind::Function, t, d) { } + GeneratorOutput_Func(const std::string &name, const std::vector &t) + : Super(name, IOKind::Function, t, -1) { + } + + GeneratorOutput_Func(const std::string &name, int d) + : Super(name, IOKind::Function, {}, d) { + } + GeneratorOutput_Func(size_t array_size, const std::string &name, const std::vector &t, int d) : Super(array_size, name, IOKind::Function, t, d) { } @@ -2711,7 +2759,7 @@ class GeneratorOutput : public Internal::GeneratorOutputImplBase { } explicit GeneratorOutput(const std::string &name, int d) - : Super(name, {}, d) { + : Super(name, d) { } explicit GeneratorOutput(const std::string &name, const Type &t) @@ -2731,7 +2779,7 @@ class GeneratorOutput : public Internal::GeneratorOutputImplBase { } explicit GeneratorOutput(size_t array_size, const std::string &name, int d) - : Super(array_size, name, {}, d) { + : Super(array_size, name, d) { } explicit GeneratorOutput(size_t array_size, const std::string &name, const Type &t) @@ -3007,8 +3055,8 @@ class NamesInterface { } template using GeneratorParam = Halide::GeneratorParam; - template - using Buffer = Halide::Buffer; + template + using Buffer = Halide::Buffer; template using Param = Halide::Param; static inline Type Bool(int lanes = 1) { diff --git a/src/Module.h b/src/Module.h index aed88a344057..3f0bc5163062 100644 --- a/src/Module.h +++ b/src/Module.h @@ -19,7 +19,7 @@ namespace Halide { -template +template class Buffer; struct Target; diff --git a/src/Parameter.h b/src/Parameter.h index a8780249282d..8498952366ac 100644 --- a/src/Parameter.h +++ b/src/Parameter.h @@ -6,6 +6,7 @@ */ #include +#include "Buffer.h" #include "IntrusivePtr.h" #include "Type.h" #include "Util.h" // for HALIDE_NO_USER_CODE_INLINE @@ -14,8 +15,6 @@ namespace Halide { struct ArgumentEstimates; -template -class Buffer; struct Expr; struct Type; enum class MemoryType; diff --git a/src/RDom.h b/src/RDom.h index 4e0d8d2b3d89..c58eebe8f257 100644 --- a/src/RDom.h +++ b/src/RDom.h @@ -17,7 +17,7 @@ namespace Halide { -template +template class Buffer; class OutputImageParam; @@ -227,11 +227,11 @@ class RDom { * a given Buffer or ImageParam. Has the same dimensionality as * the argument. */ // @{ - RDom(const Buffer &); + RDom(const Buffer &); RDom(const OutputImageParam &); - template - HALIDE_NO_USER_CODE_INLINE RDom(const Buffer &im) - : RDom(Buffer(im)) { + template + HALIDE_NO_USER_CODE_INLINE RDom(const Buffer &im) + : RDom(Buffer(im)) { } // @} diff --git a/src/Realization.h b/src/Realization.h index bc7f227b254c..d2ed848ee54d 100644 --- a/src/Realization.h +++ b/src/Realization.h @@ -4,6 +4,7 @@ #include #include +#include "Buffer.h" #include "Util.h" // for all_are_convertible /** \file @@ -13,9 +14,6 @@ namespace Halide { -template -class Buffer; - /** A Realization is a vector of references to existing Buffer objects. * A pipeline with multiple outputs realize to a Realization. */ class Realization { diff --git a/src/runtime/HalideBuffer.h b/src/runtime/HalideBuffer.h index e16c97dd57c1..fd70420ee7c0 100644 --- a/src/runtime/HalideBuffer.h +++ b/src/runtime/HalideBuffer.h @@ -39,7 +39,7 @@ namespace Halide { namespace Runtime { // Forward-declare our Buffer class -template +template class Buffer; // A helper to check if a parameter pack is entirely implicitly @@ -116,24 +116,29 @@ struct DeviceRefCount { * The template parameter T is the element type. For buffers where the * element type is unknown, or may vary, use void or const void. * - * D is the maximum number of dimensions that can be represented using - * space inside the class itself. Set it to the maximum dimensionality + * The template parameter Dims is the number of dimensions. For buffers where + * the dimensionality type is unknown at, or may vary, use -1 (or Buffer::DynamicDims). + * + * InClassDimStorage is the maximum number of dimensions that can be represented + * using space inside the class itself. Set it to the maximum dimensionality * you expect this buffer to be. If the actual dimensionality exceeds - * this, heap storage is allocated to track the shape of the buffer. D - * defaults to 4, which should cover nearly all usage. + * this, heap storage is allocated to track the shape of the buffer. + * InClassDimStorage defaults to 4, which should cover nearly all usage. * * The class optionally allocates and owns memory for the image using * a shared pointer allocated with the provided allocator. If they are * null, malloc and free are used. Any device-side allocation is * considered as owned if and only if the host-side allocation is * owned. */ -template +template class Buffer { /** The underlying halide_buffer_t */ halide_buffer_t buf = {}; /** Some in-class storage for shape of the dimensions. */ - halide_dimension_t shape[D]; + halide_dimension_t shape[InClassDimStorage]; /** The allocation owned by this Buffer. NULL if the Buffer does not * own the memory. */ @@ -171,7 +176,7 @@ class Buffer { /** Get the Halide type of T. Callers should not use the result if * has_static_halide_type is false. */ - static halide_type_t static_halide_type() { + static constexpr halide_type_t static_halide_type() { return halide_type_of::type>(); } @@ -180,6 +185,18 @@ class Buffer { return alloc != nullptr; } + static constexpr int DynamicDims = -1; + + static constexpr bool has_static_dimensions = (Dims != DynamicDims); + + /** Callers should not use the result if + * has_static_dimensions is false. */ + static constexpr int static_dimensions() { + return Dims; + } + + static_assert(!has_static_dimensions || static_dimensions() >= 0); + private: /** Increment the reference count of any owned allocation */ void incref() const { @@ -202,15 +219,15 @@ class Buffer { // Note that this is called "cropped" but can also encompass a slice/embed // operation as well. struct DevRefCountCropped : DeviceRefCount { - Buffer cropped_from; - DevRefCountCropped(const Buffer &cropped_from) + Buffer cropped_from; + DevRefCountCropped(const Buffer &cropped_from) : cropped_from(cropped_from) { ownership = BufferDeviceOwnership::Cropped; } }; /** Setup the device ref count for a buffer to indicate it is a crop (or slice, embed, etc) of cropped_from */ - void crop_from(const Buffer &cropped_from) { + void crop_from(const Buffer &cropped_from) { assert(dev_ref_count == nullptr); dev_ref_count = new DevRefCountCropped(cropped_from); } @@ -274,11 +291,27 @@ class Buffer { } } + template + void make_static_shape_storage() { + static_assert(Dims == DynamicDims || Dims == DimsSpecified, + "Number of arguments to Buffer() does not match static dimensionality"); + buf.dimensions = DimsSpecified; + if constexpr (Dims == DynamicDims) { + buf.dim = (DimsSpecified <= InClassDimStorage) ? shape : new halide_dimension_t[DimsSpecified]; + } else { + static_assert(InClassDimStorage >= Dims); + buf.dim = shape; + } + } + void make_shape_storage(const int dimensions) { + if (Dims != DynamicDims && Dims != dimensions) { + assert(false && "Number of arguments to Buffer() does not match static dimensionality"); + } // This should usually be inlined, so if dimensions is statically known, // we can skip the call to new buf.dimensions = dimensions; - buf.dim = (dimensions <= D) ? shape : new halide_dimension_t[dimensions]; + buf.dim = (dimensions <= InClassDimStorage) ? shape : new halide_dimension_t[dimensions]; } void copy_shape_from(const halide_buffer_t &other) { @@ -287,8 +320,8 @@ class Buffer { std::copy(other.dim, other.dim + other.dimensions, buf.dim); } - template - void move_shape_from(Buffer &&other) { + template + void move_shape_from(Buffer &&other) { if (other.shape == other.buf.dim) { copy_shape_from(other.buf); } else { @@ -389,10 +422,10 @@ class Buffer { } } - void complete_device_crop(Buffer &result_host_cropped) const { + void complete_device_crop(Buffer &result_host_cropped) const { assert(buf.device_interface != nullptr); if (buf.device_interface->device_crop(nullptr, &this->buf, &result_host_cropped.buf) == 0) { - const Buffer *cropped_from = this; + const Buffer *cropped_from = this; // TODO: Figure out what to do if dev_ref_count is nullptr. Should incref logic run here? // is it possible to get to this point without incref having run at least once since // the device field was set? (I.e. in the internal logic of crop. incref might have been @@ -406,6 +439,8 @@ class Buffer { /** slice a single dimension without handling device allocation. */ void slice_host(int d, int pos) { + static_assert(Dims == DynamicDims); + assert(dimensions() > 0); assert(d >= 0 && d < dimensions()); assert(pos >= dim(d).min() && pos <= dim(d).max()); buf.dimensions--; @@ -419,10 +454,10 @@ class Buffer { buf.dim[buf.dimensions] = {0, 0, 0}; } - void complete_device_slice(Buffer &result_host_sliced, int d, int pos) const { + void complete_device_slice(Buffer &result_host_sliced, int d, int pos) const { assert(buf.device_interface != nullptr); if (buf.device_interface->device_slice(nullptr, &this->buf, d, pos, &result_host_sliced.buf) == 0) { - const Buffer *sliced_from = this; + const Buffer *sliced_from = this; // TODO: Figure out what to do if dev_ref_count is nullptr. Should incref logic run here? // is it possible to get to this point without incref having run at least once since // the device field was set? (I.e. in the internal logic of slice. incref might have been @@ -521,7 +556,9 @@ class Buffer { } /** Get the dimensionality of the buffer. */ + // TODO: make constexpr, optimize for const case int dimensions() const { + assert(Dims == DynamicDims || Dims == buf.dimensions); return buf.dimensions; } @@ -558,7 +595,7 @@ class Buffer { Buffer() : shape() { buf.type = static_halide_type(); - make_shape_storage(0); + make_static_shape_storage<0>(); } /** Make a Buffer from a halide_buffer_t */ @@ -569,46 +606,55 @@ class Buffer { } /** Give Buffers access to the members of Buffers of different dimensionalities and types. */ - template + template friend class Buffer; private: - template + template static void static_assert_can_convert_from() { static_assert((!std::is_const::value || std::is_const::value), "Can't convert from a Buffer to a Buffer"); static_assert(std::is_same::type, typename std::remove_const::type>::value || - T_is_void || Buffer::T_is_void, + T_is_void || Buffer::T_is_void, "type mismatch constructing Buffer"); + static_assert(Dims == DynamicDims || D2 == DynamicDims || Dims == D2, + "Can't convert from a Buffer with static dimensionality to a Buffer with different static dimensionality"); } public: - /** Determine if a Buffer can be constructed from some other Buffer type. + /** Determine if a Buffer can be constructed from some other Buffer type. * If this can be determined at compile time, fail with a static assert; otherwise * return a boolean based on runtime typing. */ - template - static bool can_convert_from(const Buffer &other) { - static_assert_can_convert_from(); - if (Buffer::T_is_void && !T_is_void) { - return other.type() == static_halide_type(); + template + static bool can_convert_from(const Buffer &other) { + static_assert_can_convert_from(); + if (Buffer::T_is_void && !T_is_void) { + if (other.type() != static_halide_type()) { + return false; + } + } + if (Dims != DynamicDims) { + if (other.dimensions() != Dims) { + return false; + } } return true; } - /** Fail an assertion at runtime or compile-time if an Buffer + /** Fail an assertion at runtime or compile-time if an Buffer * cannot be constructed from some other Buffer type. */ - template - static void assert_can_convert_from(const Buffer &other) { + template + static void assert_can_convert_from(const Buffer &other) { // Explicitly call static_assert_can_convert_from() here so // that we always get compile-time checking, even if compiling with // assertions disabled. - static_assert_can_convert_from(); + static_assert_can_convert_from(); assert(can_convert_from(other)); } /** Copy constructor. Does not copy underlying data. */ - Buffer(const Buffer &other) + Buffer(const Buffer &other) : buf(other.buf), alloc(other.alloc) { other.incref(); @@ -617,13 +663,13 @@ class Buffer { } /** Construct a Buffer from a Buffer of different dimensionality - * and type. Asserts that the type matches (at runtime, if one of - * the types is void). Note that this constructor is + * and type. Asserts that the type and dimensionality matches (at runtime, + * if one of the types is void). Note that this constructor is * implicit. This, for example, lets you pass things like * Buffer or Buffer to functions expected * Buffer. */ - template - Buffer(const Buffer &other) + template + Buffer(const Buffer &other) : buf(other.buf), alloc(other.alloc) { assert_can_convert_from(other); @@ -633,36 +679,36 @@ class Buffer { } /** Move constructor */ - Buffer(Buffer &&other) noexcept + Buffer(Buffer &&other) noexcept : buf(other.buf), alloc(other.alloc), dev_ref_count(other.dev_ref_count) { other.dev_ref_count = nullptr; other.alloc = nullptr; - move_shape_from(std::forward>(other)); + move_shape_from(std::forward>(other)); other.buf = halide_buffer_t(); } /** Move-construct a Buffer from a Buffer of different * dimensionality and type. Asserts that the types match (at * runtime if one of the types is void). */ - template - Buffer(Buffer &&other) + template + Buffer(Buffer &&other) : buf(other.buf), alloc(other.alloc), dev_ref_count(other.dev_ref_count) { assert_can_convert_from(other); other.dev_ref_count = nullptr; other.alloc = nullptr; - move_shape_from(std::forward>(other)); + move_shape_from(std::forward>(other)); other.buf = halide_buffer_t(); } /** Assign from another Buffer of possibly-different * dimensionality and type. Asserts that the types match (at * runtime if one of the types is void). */ - template - Buffer &operator=(const Buffer &other) { + template + Buffer &operator=(const Buffer &other) { if ((const void *)this == (const void *)&other) { return *this; } @@ -678,7 +724,7 @@ class Buffer { } /** Standard assignment operator */ - Buffer &operator=(const Buffer &other) { + Buffer &operator=(const Buffer &other) { // The cast to void* here is just to satisfy clang-tidy if ((const void *)this == (const void *)&other) { return *this; @@ -696,8 +742,8 @@ class Buffer { /** Move from another Buffer of possibly-different * dimensionality and type. Asserts that the types match (at * runtime if one of the types is void). */ - template - Buffer &operator=(Buffer &&other) { + template + Buffer &operator=(Buffer &&other) { assert_can_convert_from(other); decref(); alloc = other.alloc; @@ -706,13 +752,13 @@ class Buffer { other.dev_ref_count = nullptr; free_shape_storage(); buf = other.buf; - move_shape_from(std::forward>(other)); + move_shape_from(std::forward>(other)); other.buf = halide_buffer_t(); return *this; } /** Standard move-assignment operator */ - Buffer &operator=(Buffer &&other) noexcept { + Buffer &operator=(Buffer &&other) noexcept { decref(); alloc = other.alloc; other.alloc = nullptr; @@ -720,7 +766,7 @@ class Buffer { other.dev_ref_count = nullptr; free_shape_storage(); buf = other.buf; - move_shape_from(std::forward>(other)); + move_shape_from(std::forward>(other)); other.buf = halide_buffer_t(); return *this; } @@ -792,7 +838,7 @@ class Buffer { int extents[] = {first, (int)rest...}; buf.type = t; constexpr int buf_dimensions = 1 + (int)(sizeof...(rest)); - make_shape_storage(buf_dimensions); + make_static_shape_storage(); initialize_shape(extents); if (!Internal::any_zero(extents)) { check_overflow(); @@ -812,7 +858,7 @@ class Buffer { int extents[] = {first}; buf.type = static_halide_type(); constexpr int buf_dimensions = 1; - make_shape_storage(buf_dimensions); + make_static_shape_storage(); initialize_shape(extents); if (first != 0) { check_overflow(); @@ -828,7 +874,7 @@ class Buffer { int extents[] = {first, second, (int)rest...}; buf.type = static_halide_type(); constexpr int buf_dimensions = 2 + (int)(sizeof...(rest)); - make_shape_storage(buf_dimensions); + make_static_shape_storage(); initialize_shape(extents); if (!Internal::any_zero(extents)) { check_overflow(); @@ -843,6 +889,7 @@ class Buffer { assert(static_halide_type() == t); } buf.type = t; + // make_shape_storage() will do a runtime check that dimensionality matches. make_shape_storage((int)sizes.size()); initialize_shape(sizes); if (!Internal::any_zero(sizes)) { @@ -885,6 +932,7 @@ class Buffer { * take ownership of the data, and does not set the host_dirty flag. */ template explicit Buffer(Array (&vals)[N]) { + // TODO: this could probably be made constexpr const int buf_dimensions = dimensionality_of_array(vals); buf.type = scalar_type_of_array(vals); buf.host = (uint8_t *)vals; @@ -904,9 +952,9 @@ class Buffer { } int extents[] = {first, (int)rest...}; buf.type = t; - constexpr int buf_dimensions = 1 + (int)(sizeof...(rest)); buf.host = (uint8_t *)const_cast(data); - make_shape_storage(buf_dimensions); + constexpr int buf_dimensions = 1 + (int)(sizeof...(rest)); + make_static_shape_storage(); initialize_shape(extents); } @@ -918,9 +966,9 @@ class Buffer { explicit Buffer(T *data, int first, Args &&...rest) { int extents[] = {first, (int)rest...}; buf.type = static_halide_type(); - constexpr int buf_dimensions = 1 + (int)(sizeof...(rest)); buf.host = (uint8_t *)const_cast::type *>(data); - make_shape_storage(buf_dimensions); + constexpr int buf_dimensions = 1 + (int)(sizeof...(rest)); + make_static_shape_storage(); initialize_shape(extents); } @@ -1021,9 +1069,9 @@ class Buffer { * Buffer, or converting a Buffer& to Buffer&. * Does a runtime assert if the source buffer type is void. */ template - HALIDE_ALWAYS_INLINE Buffer &as() & { - Buffer::assert_can_convert_from(*this); - return *((Buffer *)this); + HALIDE_ALWAYS_INLINE Buffer &as() & { + Buffer::assert_can_convert_from(*this); + return *((Buffer *)this); } /** Return a const typed reference to this Buffer. Useful for @@ -1031,37 +1079,37 @@ class Buffer { * reference to another Buffer type. Does a runtime assert if the * source buffer type is void. */ template - HALIDE_ALWAYS_INLINE const Buffer &as() const & { - Buffer::assert_can_convert_from(*this); - return *((const Buffer *)this); + HALIDE_ALWAYS_INLINE const Buffer &as() const & { + Buffer::assert_can_convert_from(*this); + return *((const Buffer *)this); } /** Returns this rval Buffer with a different type attached. Does * a dynamic type check if the source type is void. */ template - HALIDE_ALWAYS_INLINE Buffer as() && { - Buffer::assert_can_convert_from(*this); - return *((Buffer *)this); + HALIDE_ALWAYS_INLINE Buffer as() && { + Buffer::assert_can_convert_from(*this); + return *((Buffer *)this); } /** as_const() is syntactic sugar for .as(), to avoid the need * to recapitulate the type argument. */ // @{ HALIDE_ALWAYS_INLINE - Buffer::type, D> &as_const() & { + Buffer::type, Dims, InClassDimStorage> &as_const() & { // Note that we can skip the assert_can_convert_from(), since T -> const T // conversion is always legal. - return *((Buffer::type, D> *)this); + return *((Buffer::type, Dims, InClassDimStorage> *)this); } HALIDE_ALWAYS_INLINE - const Buffer::type, D> &as_const() const & { - return *((const Buffer::type, D> *)this); + const Buffer::type, Dims, InClassDimStorage> &as_const() const & { + return *((const Buffer::type, Dims, InClassDimStorage> *)this); } HALIDE_ALWAYS_INLINE - Buffer::type, D> as_const() && { - return *((Buffer::type, D> *)this); + Buffer::type, Dims, InClassDimStorage> as_const() && { + return *((Buffer::type, Dims, InClassDimStorage> *)this); } // @} @@ -1109,9 +1157,9 @@ class Buffer { * can easily cast it back to Buffer if desired, which is * always safe and free.) */ - Buffer copy(void *(*allocate_fn)(size_t) = nullptr, - void (*deallocate_fn)(void *) = nullptr) const { - Buffer dst = Buffer::make_with_shape_of(*this, allocate_fn, deallocate_fn); + Buffer copy(void *(*allocate_fn)(size_t) = nullptr, + void (*deallocate_fn)(void *) = nullptr) const { + Buffer dst = Buffer::make_with_shape_of(*this, allocate_fn, deallocate_fn); dst.copy_from(*this); return dst; } @@ -1120,10 +1168,11 @@ class Buffer { * (vs. keeping the same memory layout as the original). Requires that 'this' * has exactly 3 dimensions. */ - Buffer copy_to_interleaved(void *(*allocate_fn)(size_t) = nullptr, - void (*deallocate_fn)(void *) = nullptr) const { + Buffer copy_to_interleaved(void *(*allocate_fn)(size_t) = nullptr, + void (*deallocate_fn)(void *) = nullptr) const { + static_assert(Dims == DynamicDims || Dims == 3); assert(dimensions() == 3); - Buffer dst = Buffer::make_interleaved(nullptr, width(), height(), channels()); + Buffer dst = Buffer::make_interleaved(nullptr, width(), height(), channels()); dst.set_min(min(0), min(1), min(2)); dst.allocate(allocate_fn, deallocate_fn); dst.copy_from(*this); @@ -1133,8 +1182,8 @@ class Buffer { /** Like copy(), but the copy is created in planar memory layout * (vs. keeping the same memory layout as the original). */ - Buffer copy_to_planar(void *(*allocate_fn)(size_t) = nullptr, - void (*deallocate_fn)(void *) = nullptr) const { + Buffer copy_to_planar(void *(*allocate_fn)(size_t) = nullptr, + void (*deallocate_fn)(void *) = nullptr) const { std::vector mins, extents; const int dims = dimensions(); mins.reserve(dims); @@ -1143,7 +1192,7 @@ class Buffer { mins.push_back(dim(d).min()); extents.push_back(dim(d).extent()); } - Buffer dst = Buffer(nullptr, extents); + Buffer dst = Buffer(nullptr, extents); dst.set_min(mins); dst.allocate(allocate_fn, deallocate_fn); dst.copy_from(*this); @@ -1159,7 +1208,7 @@ class Buffer { * my_func(input.alias(), output); * }\endcode */ - inline Buffer alias() const { + inline Buffer alias() const { return *this; } @@ -1172,18 +1221,20 @@ class Buffer { * to the correct location first like so: \code * framebuffer.copy_from(sprite.translated({x, y})); \endcode */ - template - void copy_from(Buffer src) { + template + void copy_from(Buffer src) { static_assert(!std::is_const::value, "Cannot call copy_from() on a Buffer"); assert(!device_dirty() && "Cannot call Halide::Runtime::Buffer::copy_from on a device dirty destination."); assert(!src.device_dirty() && "Cannot call Halide::Runtime::Buffer::copy_from on a device dirty source."); - Buffer dst(*this); + Buffer dst(*this); + static_assert(Dims == DynamicDims || D2 == DynamicDims || Dims == D2); assert(src.dimensions() == dst.dimensions()); // Trim the copy to the region in common - for (int i = 0; i < dimensions(); i++) { + const int d = dimensions(); + for (int i = 0; i < d; i++) { int min_coord = std::max(dst.dim(i).min(), src.dim(i).min()); int max_coord = std::min(dst.dim(i).max(), src.dim(i).max()); if (max_coord < min_coord) { @@ -1200,23 +1251,23 @@ class Buffer { // into a static dispatch to the right-sized copy.) if (T_is_void ? (type().bytes() == 1) : (sizeof(not_void_T) == 1)) { using MemType = uint8_t; - auto &typed_dst = (Buffer &)dst; - auto &typed_src = (Buffer &)src; + auto &typed_dst = (Buffer &)dst; + auto &typed_src = (Buffer &)src; typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src); } else if (T_is_void ? (type().bytes() == 2) : (sizeof(not_void_T) == 2)) { using MemType = uint16_t; - auto &typed_dst = (Buffer &)dst; - auto &typed_src = (Buffer &)src; + auto &typed_dst = (Buffer &)dst; + auto &typed_src = (Buffer &)src; typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src); } else if (T_is_void ? (type().bytes() == 4) : (sizeof(not_void_T) == 4)) { using MemType = uint32_t; - auto &typed_dst = (Buffer &)dst; - auto &typed_src = (Buffer &)src; + auto &typed_dst = (Buffer &)dst; + auto &typed_src = (Buffer &)src; typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src); } else if (T_is_void ? (type().bytes() == 8) : (sizeof(not_void_T) == 8)) { using MemType = uint64_t; - auto &typed_dst = (Buffer &)dst; - auto &typed_src = (Buffer &)src; + auto &typed_dst = (Buffer &)dst; + auto &typed_src = (Buffer &)src; typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src); } else { assert(false && "type().bytes() must be 1, 2, 4, or 8"); @@ -1228,10 +1279,10 @@ class Buffer { * the given dimension. Asserts that the crop region is within * the existing bounds: you cannot "crop outwards", even if you know there * is valid Buffer storage (e.g. because you already cropped inwards). */ - Buffer cropped(int d, int min, int extent) const { + Buffer cropped(int d, int min, int extent) const { // Make a fresh copy of the underlying buffer (but not a fresh // copy of the allocation, if there is one). - Buffer im = *this; + Buffer im = *this; // This guarantees the prexisting device ref is dropped if the // device_crop call fails and maintains the buffer in a consistent @@ -1264,10 +1315,10 @@ class Buffer { * the first N dimensions. Asserts that the crop region is within * the existing bounds. The cropped image may drop any device handle * if the device_interface cannot accomplish the crop in-place. */ - Buffer cropped(const std::vector> &rect) const { + Buffer cropped(const std::vector> &rect) const { // Make a fresh copy of the underlying buffer (but not a fresh // copy of the allocation, if there is one). - Buffer im = *this; + Buffer im = *this; // This guarantees the prexisting device ref is dropped if the // device_crop call fails and maintains the buffer in a consistent @@ -1301,8 +1352,8 @@ class Buffer { * translated coordinates in the given dimension. Positive values * move the image data to the right or down relative to the * coordinate system. Drops any device handle. */ - Buffer translated(int d, int dx) const { - Buffer im = *this; + Buffer translated(int d, int dx) const { + Buffer im = *this; im.translate(d, dx); return im; } @@ -1317,8 +1368,8 @@ class Buffer { /** Make an image which refers to the same data translated along * the first N dimensions. */ - Buffer translated(const std::vector &delta) const { - Buffer im = *this; + Buffer translated(const std::vector &delta) const { + Buffer im = *this; im.translate(delta); return im; } @@ -1373,8 +1424,8 @@ class Buffer { * using a swapped indexing order for the dimensions given. So * A = B.transposed(0, 1) means that A(i, j) == B(j, i), and more * strongly that A.address_of(i, j) == B.address_of(j, i). */ - Buffer transposed(int d1, int d2) const { - Buffer im = *this; + Buffer transposed(int d1, int d2) const { + Buffer im = *this; im.transpose(d1, d2); return im; } @@ -1414,16 +1465,19 @@ class Buffer { /** Make a buffer which refers to the same data in the same * layout using a different ordering of the dimensions. */ - Buffer transposed(const std::vector &order) const { - Buffer im = *this; + Buffer transposed(const std::vector &order) const { + Buffer im = *this; im.transpose(order); return im; } /** Make a lower-dimensional buffer that refers to one slice of * this buffer. */ - Buffer sliced(int d, int pos) const { - Buffer im = *this; + Buffer sliced(int d, int pos) const { + static_assert(Dims == DynamicDims || Dims > 0, "Cannot slice a 0-dimensional buffer"); + assert(dimensions() > 0); + + Buffer im = *this; // This guarantees the prexisting device ref is dropped if the // device_slice call fails and maintains the buffer in a consistent @@ -1439,15 +1493,22 @@ class Buffer { /** Make a lower-dimensional buffer that refers to one slice of this * buffer at the dimension's minimum. */ - inline Buffer sliced(int d) const { + inline Buffer sliced(int d) const { + static_assert(Dims == DynamicDims || Dims > 0, "Cannot slice a 0-dimensional buffer"); + assert(dimensions() > 0); + return sliced(d, dim(d).min()); } /** Rewrite the buffer to refer to a single lower-dimensional * slice of itself along the given dimension at the given * coordinate. Does not move any data around or free the original - * memory, so other views of the same data are unaffected. */ + * memory, so other views of the same data are unaffected. Can + * only be called on a Buffer with dynamic dimensionality. */ void slice(int d, int pos) { + static_assert(Dims == DynamicDims, "Cannot call slice() on a Buffer with static dimensionality."); + assert(dimensions() > 0); + // An optimization for non-device buffers. For the device case, // a temp buffer is required, so reuse the not-in-place version. // TODO(zalman|abadams): Are nop slices common enough to special @@ -1474,8 +1535,8 @@ class Buffer { &im(x, y, c) == &im2(x, 17, y, c); \endcode */ - Buffer embedded(int d, int pos = 0) const { - Buffer im(*this); + Buffer embedded(int d, int pos = 0) const { + Buffer im(*this); im.embed(d, pos); return im; } @@ -1483,6 +1544,7 @@ class Buffer { /** Embed a buffer in-place, increasing the * dimensionality. */ void embed(int d, int pos = 0) { + static_assert(Dims == DynamicDims, "Cannot call embed() on a Buffer with static dimensionality."); assert(d >= 0 && d <= dimensions()); add_dimension(); translate(dimensions() - 1, pos); @@ -1496,6 +1558,7 @@ class Buffer { * its stride. The new dimension is the last dimension. This is a * special case of embed. */ void add_dimension() { + static_assert(Dims == DynamicDims, "Cannot call add_dimension() on a Buffer with static dimensionality."); const int dims = buf.dimensions; buf.dimensions++; if (buf.dim != shape) { @@ -1506,7 +1569,7 @@ class Buffer { } delete[] buf.dim; buf.dim = new_shape; - } else if (dims == D) { + } else if (dims == InClassDimStorage) { // Transition from the in-class storage to the heap make_shape_storage(buf.dimensions); for (int i = 0; i < dims; i++) { @@ -1679,8 +1742,9 @@ class Buffer { * using (x, y, c). Passing it to a generator requires that the * generator has been compiled with support for interleaved (also * known as packed or chunky) memory layouts. */ - static Buffer make_interleaved(halide_type_t t, int width, int height, int channels) { - Buffer im(t, channels, width, height); + static Buffer make_interleaved(halide_type_t t, int width, int height, int channels) { + static_assert(Dims == DynamicDims || Dims == 3, "make_interleaved() must be called on a Buffer that can represent 3 dimensions."); + Buffer im(t, channels, width, height); // Note that this is equivalent to calling transpose({2, 0, 1}), // but slightly more efficient. im.transpose(0, 1); @@ -1694,52 +1758,56 @@ class Buffer { * using (x, y, c). Passing it to a generator requires that the * generator has been compiled with support for interleaved (also * known as packed or chunky) memory layouts. */ - static Buffer make_interleaved(int width, int height, int channels) { + static Buffer make_interleaved(int width, int height, int channels) { return make_interleaved(static_halide_type(), width, height, channels); } /** Wrap an existing interleaved image. */ - static Buffer, D> + static Buffer, Dims, InClassDimStorage> make_interleaved(halide_type_t t, T *data, int width, int height, int channels) { - Buffer, D> im(t, data, channels, width, height); + static_assert(Dims == DynamicDims || Dims == 3, "make_interleaved() must be called on a Buffer that can represent 3 dimensions."); + Buffer, Dims, InClassDimStorage> im(t, data, channels, width, height); im.transpose(0, 1); im.transpose(1, 2); return im; } /** Wrap an existing interleaved image. */ - static Buffer make_interleaved(T *data, int width, int height, int channels) { + static Buffer make_interleaved(T *data, int width, int height, int channels) { return make_interleaved(static_halide_type(), data, width, height, channels); } /** Make a zero-dimensional Buffer */ - static Buffer, D> make_scalar(halide_type_t t) { - Buffer, 1> buf(t, 1); + static Buffer, Dims, InClassDimStorage> make_scalar(halide_type_t t) { + static_assert(Dims == DynamicDims || Dims == 0, "make_scalar() must be called on a Buffer that can represent 0 dimensions."); + Buffer, DynamicDims, InClassDimStorage> buf(t, 1); buf.slice(0, 0); return buf; } /** Make a zero-dimensional Buffer */ - static Buffer make_scalar() { - Buffer buf(1); + static Buffer make_scalar() { + static_assert(Dims == DynamicDims || Dims == 0, "make_scalar() must be called on a Buffer that can represent 0 dimensions."); + Buffer buf(1); buf.slice(0, 0); return buf; } /** Make a zero-dimensional Buffer that points to non-owned, existing data */ - static Buffer make_scalar(T *data) { - Buffer buf(data, 1); + static Buffer make_scalar(T *data) { + static_assert(Dims == DynamicDims || Dims == 0, "make_scalar() must be called on a Buffer that can represent 0 dimensions."); + Buffer buf(data, 1); buf.slice(0, 0); return buf; } /** Make a buffer with the same shape and memory nesting order as * another buffer. It may have a different type. */ - template - static Buffer make_with_shape_of(Buffer src, - void *(*allocate_fn)(size_t) = nullptr, - void (*deallocate_fn)(void *) = nullptr) { - + template + static Buffer make_with_shape_of(Buffer src, + void *(*allocate_fn)(size_t) = nullptr, + void (*deallocate_fn)(void *) = nullptr) { + static_assert(Dims == D2 || Dims == DynamicDims); const halide_type_t dst_type = T_is_void ? src.type() : halide_type_of::type>(); return Buffer<>::make_with_shape_of_helper(dst_type, src.dimensions(), src.buf.dim, allocate_fn, deallocate_fn); @@ -1846,6 +1914,8 @@ class Buffer { HALIDE_ALWAYS_INLINE const not_void_T &operator()(int first, Args... rest) const { static_assert(!T_is_void, "Cannot use operator() on Buffer types"); + constexpr int expected_dims = 1 + (int)(sizeof...(rest)); + static_assert(Dims == DynamicDims || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()"); assert(!device_dirty()); return *((const not_void_T *)(address_of(first, rest...))); } @@ -1855,6 +1925,8 @@ class Buffer { operator()() const { static_assert(!T_is_void, "Cannot use operator() on Buffer types"); + constexpr int expected_dims = 0; + static_assert(Dims == DynamicDims || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()"); assert(!device_dirty()); return *((const not_void_T *)(data())); } @@ -1875,6 +1947,8 @@ class Buffer { operator()(int first, Args... rest) { static_assert(!T_is_void, "Cannot use operator() on Buffer types"); + constexpr int expected_dims = 1 + (int)(sizeof...(rest)); + static_assert(Dims == DynamicDims || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()"); set_host_dirty(); return *((not_void_T *)(address_of(first, rest...))); } @@ -1884,6 +1958,8 @@ class Buffer { operator()() { static_assert(!T_is_void, "Cannot use operator() on Buffer types"); + constexpr int expected_dims = 0; + static_assert(Dims == DynamicDims || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()"); set_host_dirty(); return *((not_void_T *)(data())); } @@ -1905,7 +1981,7 @@ class Buffer { return all_equal; } - Buffer &fill(not_void_T val) { + Buffer &fill(not_void_T val) { set_host_dirty(); for_each_value([=](T &v) { v = val; }); return *this; @@ -2059,14 +2135,14 @@ class Buffer { * will result in a compilation error. */ // @{ template - HALIDE_ALWAYS_INLINE const Buffer &for_each_value(Fn &&f, Args &&...other_buffers) const { + HALIDE_ALWAYS_INLINE const Buffer &for_each_value(Fn &&f, Args &&...other_buffers) const { for_each_value_impl(f, std::forward(other_buffers)...); return *this; } template HALIDE_ALWAYS_INLINE - Buffer & + Buffer & for_each_value(Fn &&f, Args &&...other_buffers) { for_each_value_impl(f, std::forward(other_buffers)...); return *this; @@ -2258,14 +2334,14 @@ class Buffer { */ // @{ template - HALIDE_ALWAYS_INLINE const Buffer &for_each_element(Fn &&f) const { + HALIDE_ALWAYS_INLINE const Buffer &for_each_element(Fn &&f) const { for_each_element_impl(f); return *this; } template HALIDE_ALWAYS_INLINE - Buffer & + Buffer & for_each_element(Fn &&f) { for_each_element_impl(f); return *this; @@ -2276,7 +2352,7 @@ class Buffer { template struct FillHelper { Fn f; - Buffer *buf; + Buffer *buf; template()(std::declval()...))> @@ -2284,7 +2360,7 @@ class Buffer { (*buf)(args...) = f(args...); } - FillHelper(Fn &&f, Buffer *buf) + FillHelper(Fn &&f, Buffer *buf) : f(std::forward(f)), buf(buf) { } }; @@ -2296,7 +2372,7 @@ class Buffer { * stored to the coordinate corresponding to the arguments. */ template::type>::value>::type> - Buffer &fill(Fn &&f) { + Buffer &fill(Fn &&f) { // We'll go via for_each_element. We need a variadic wrapper lambda. FillHelper wrapper(std::forward(f), this); return for_each_element(wrapper); diff --git a/test/correctness/halide_buffer.cpp b/test/correctness/halide_buffer.cpp index 91c3cce07279..e0bb1bcac0e1 100644 --- a/test/correctness/halide_buffer.cpp +++ b/test/correctness/halide_buffer.cpp @@ -128,12 +128,84 @@ int main(int argc, char **argv) { { // Check make a Buffer from a Buffer of a different type + Buffer a(100, 80); + Buffer b(a); // statically safe + Buffer c(b); // statically safe + Buffer d(c); // does runtime check of actual type. + Buffer e(a); // statically safe + Buffer f(e); // runtime checks + + static_assert(a.has_static_halide_type); + static_assert(b.has_static_halide_type); + static_assert(!c.has_static_halide_type); + static_assert(d.has_static_halide_type); + static_assert(!e.has_static_halide_type); + static_assert(f.has_static_halide_type); + + static_assert(a.static_halide_type() == halide_type_of()); + static_assert(b.static_halide_type() == halide_type_of()); + static_assert(d.static_halide_type() == halide_type_of()); + static_assert(f.static_halide_type() == halide_type_of()); + } + + { + // Check Buffers with static dimensionality Buffer a(100, 80); - Buffer b(a); // statically safe - Buffer c(b); // statically safe - Buffer d(c); // does runtime check of actual type. - Buffer e(a); // statically safe - Buffer f(e); // runtime checks + Buffer b(a); // statically safe + Buffer c(a); // checks at runtime (and succeeds) + Buffer::DynamicDims> d(a); // same as previous, just explicit syntax + Buffer e(d); // checks at runtime (and succeeds because d.dims = 2) + // Buffer f(a); // won't compile: static_assert failure + // Buffer g(c); // fails at runtime: c.dims = 2 + + static_assert(a.has_static_dimensions); + static_assert(b.has_static_dimensions); + static_assert(!c.has_static_dimensions); + static_assert(!d.has_static_dimensions); + static_assert(e.has_static_dimensions); + + static_assert(a.static_dimensions() == 2); + static_assert(b.static_dimensions() == 2); + static_assert(e.static_dimensions() == 2); + + Buffer s1 = a.sliced(0); + assert(s1.dimensions() == 1); + assert(s1.dim(0).extent() == 80); + + Buffer s2 = a.sliced(1); + assert(s2.dimensions() == 1); + assert(s2.dim(0).extent() == 100); + + Buffer s3 = s2.sliced(0); + static_assert(a.has_static_dimensions && s3.static_dimensions() == 0); + assert(s3.dimensions() == 0); + + // auto s3a = s3.sliced(0); // won't compile: can't call sliced() on a zero-dim buffer + // Buffer s3b = a.sliced(0); // won't compile: return type has incompatible dimensionality + // a.slice(0); // won't compile: can't call slice() on static-dimensioned buffer + + Buffer s4 = a.sliced(0); // assign to dynamic-dimensioned result + static_assert(!s4.has_static_dimensions); + assert(s4.dimensions() == 1); + + s4.slice(0); // ok to call on dynamic-dimensioned + assert(s4.dimensions() == 0); + + Buffer e0 = Buffer::make_scalar(); + + auto e1 = e0.embedded(0); + static_assert(e1.has_static_dimensions && e1.static_dimensions() == 1); + assert(e1.dimensions() == 1); + + // Buffer e2 = a.embedded(0); // won't compile: return type has incompatible dimensionality + // e1.embed(0); // won't compile: can't call embed() on static-dimensioned buffer + + Buffer e3 = e0.embedded(0); // assign to dynamic-dimensioned result + static_assert(!e3.has_static_dimensions); + assert(e3.dimensions() == 1); + + e3.embed(0); // ok to call on dynamic-dimensioned + assert(e3.dimensions() == 2); } { diff --git a/test/generator/metadata_tester_generator.cpp b/test/generator/metadata_tester_generator.cpp index b25563415af0..b3d38c16a730 100644 --- a/test/generator/metadata_tester_generator.cpp +++ b/test/generator/metadata_tester_generator.cpp @@ -10,9 +10,9 @@ enum class SomeEnum { Foo, class MetadataTester : public Halide::Generator { public: Input input{"input"}; // must be overridden to {UInt(8), 3} - Input> typed_input_buffer{"typed_input_buffer", 3}; - Input> dim_only_input_buffer{"dim_only_input_buffer", 3}; // must be overridden to type=UInt(8) - Input> untyped_input_buffer{"untyped_input_buffer"}; // must be overridden to {UInt(8), 3} + Input> typed_input_buffer{"typed_input_buffer"}; + Input> dim_only_input_buffer{"dim_only_input_buffer"}; // must be overridden to type=UInt(8) + Input> untyped_input_buffer{"untyped_input_buffer"}; // must be overridden to {UInt(8), 3} Input no_default_value{"no_default_value"}; Input b{"b", true}; Input i8{"i8", 8, -8, 127}; @@ -26,11 +26,9 @@ class MetadataTester : public Halide::Generator { Input f32{"f32", 32.1234f, -3200.1234f, 3200.1234f}; Input f64{"f64", 64.25f, -6400.25f, 6400.25f}; Input h{"h", nullptr}; - Input input_not_nod{"input_not_nod"}; // must be overridden to type=uint8 dim=3 Input input_nod{"input_nod", UInt(8)}; // must be overridden to type=uint8 dim=3 Input input_not{"input_not", 3}; // must be overridden to type=uint8 - Input array_input{"array_input", UInt(8), 3}; // must be overridden to size=2 Input array2_input{"array2_input", UInt(8), 3}; Input array_i8{"array_i8"}; // must be overridden to size=2 @@ -41,38 +39,38 @@ class MetadataTester : public Halide::Generator { Input array2_i32{"array2_i32", 32, -32, 127}; Input array_h { "array_h", nullptr }; // must be overridden to size=2 - Input[2]> buffer_array_input1 { "buffer_array_input1", 3 }; - Input[2]> buffer_array_input2 { "buffer_array_input2" }; // buffer_array_input2.dim must be set - Input[2]> buffer_array_input3 { "buffer_array_input3", 3 }; // buffer_array_input2.type must be set - Input[2]> buffer_array_input4 { "buffer_array_input4" }; // dim and type must be set + Input[2]> buffer_array_input1 { "buffer_array_input1" }; + Input[2]> buffer_array_input2 { "buffer_array_input2" }; // buffer_array_input2.dim must be set + Input[2]> buffer_array_input3 { "buffer_array_input3" }; // buffer_array_input2.type must be set + Input[2]> buffer_array_input4 { "buffer_array_input4" }; // dim and type must be set // .size must be specified for all of these - Input[]> buffer_array_input5 { "buffer_array_input5", 3 }; - Input[]> buffer_array_input6 { "buffer_array_input6" }; // buffer_array_input2.dim must be set - Input[]> buffer_array_input7 { "buffer_array_input7", 3 }; // buffer_array_input2.type must be set - Input[]> buffer_array_input8 { "buffer_array_input8" }; // dim and type must be set + Input[]> buffer_array_input5 { "buffer_array_input5" }; + Input[]> buffer_array_input6 { "buffer_array_input6" }; // buffer_array_input2.dim must be set + Input[]> buffer_array_input7 { "buffer_array_input7" }; // buffer_array_input2.type must be set + Input[]> buffer_array_input8 { "buffer_array_input8" }; // dim and type must be set - Input> buffer_f16_typed{"buffer_f16_typed", 1}; - Input> buffer_f16_untyped{"buffer_f16_untyped", 1}; + Input> buffer_f16_typed{"buffer_f16_typed"}; + Input> buffer_f16_untyped{"buffer_f16_untyped"}; Input untyped_scalar_input{"untyped_scalar_input"}; // untyped_scalar_input.type must be set to uint8 Output output{"output"}; // must be overridden to {{Float(32), Float(32)}, 3} - Output> typed_output_buffer{"typed_output_buffer", 3}; + Output> typed_output_buffer{"typed_output_buffer"}; Output> type_only_output_buffer{"type_only_output_buffer"}; // untyped outputs can have type and/or dimensions inferred - Output> dim_only_output_buffer{"dim_only_output_buffer", 3}; // untyped outputs can have type and/or dimensions inferred + Output> dim_only_output_buffer{"dim_only_output_buffer"}; // untyped outputs can have type and/or dimensions inferred Output> untyped_output_buffer{"untyped_output_buffer"}; // untyped outputs can have type and/or dimensions inferred - Output> tupled_output_buffer{"tupled_output_buffer", {Float(32), Int(32)}, 3}; + Output> tupled_output_buffer{"tupled_output_buffer", {Float(32), Int(32)}}; Output output_scalar{"output_scalar"}; Output array_outputs{"array_outputs", Float(32), 3}; // must be overridden to size=2 Output array_outputs2{"array_outputs2", {Float(32), Float(32)}, 3}; Output array_outputs3{"array_outputs3"}; - Output[2]> array_outputs4 { "array_outputs4", 3 }; + Output[2]> array_outputs4 { "array_outputs4" }; Output[2]> array_outputs5 { "array_outputs5" }; // dimensions will be inferred by usage Output[2]> array_outputs6 { "array_outputs6" }; // dimensions and type will be inferred by usage // .size must be specified for all of these - Output[]> array_outputs7 { "array_outputs7", 3 }; + Output[]> array_outputs7 { "array_outputs7" }; Output[]> array_outputs8 { "array_outputs8" }; Output[]> array_outputs9 { "array_outputs9" }; From c016a10c35319a40f38942abd8c464a9ef4faab8 Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Thu, 20 Jan 2022 15:01:36 -0800 Subject: [PATCH 02/13] Update metadata_tester_generator.cpp --- test/generator/metadata_tester_generator.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/generator/metadata_tester_generator.cpp b/test/generator/metadata_tester_generator.cpp index b3d38c16a730..f046d00e7c67 100644 --- a/test/generator/metadata_tester_generator.cpp +++ b/test/generator/metadata_tester_generator.cpp @@ -26,9 +26,9 @@ class MetadataTester : public Halide::Generator { Input f32{"f32", 32.1234f, -3200.1234f, 3200.1234f}; Input f64{"f64", 64.25f, -6400.25f, 6400.25f}; Input h{"h", nullptr}; - Input input_not_nod{"input_not_nod"}; // must be overridden to type=uint8 dim=3 - Input input_nod{"input_nod", UInt(8)}; // must be overridden to type=uint8 dim=3 - Input input_not{"input_not", 3}; // must be overridden to type=uint8 + Input input_not_nod{"input_not_nod"}; // must be overridden to type=uint8 dim=3 + Input input_nod{"input_nod", UInt(8)}; // must be overridden to type=uint8 dim=3 + Input input_not{"input_not", 3}; // must be overridden to type=uint8 Input array_input{"array_input", UInt(8), 3}; // must be overridden to size=2 Input array2_input{"array2_input", UInt(8), 3}; Input array_i8{"array_i8"}; // must be overridden to size=2 From 1e7a5ef3dbe329bbc82652b494fb8b910dc90548 Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Thu, 20 Jan 2022 15:31:37 -0800 Subject: [PATCH 03/13] Update Generator.h --- src/Generator.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Generator.h b/src/Generator.h index e10171b1cbf2..25c4fd063063 100644 --- a/src/Generator.h +++ b/src/Generator.h @@ -1678,12 +1678,17 @@ class GeneratorInput_Buffer : public GeneratorInputImpl { TBase::has_static_dimensions ? TBase::static_dimensions() : -1) { } - GeneratorInput_Buffer(const std::string &name, const Type &t, int d = -1) + GeneratorInput_Buffer(const std::string &name, const Type &t, int d) : Super(name, IOKind::Buffer, {t}, d) { static_assert(!TBase::has_static_halide_type, "You can only specify a Type argument for Input> if T is void or omitted."); static_assert(!TBase::has_static_dimensions, "You can only specify a dimension argument for Input> if D is -1 or omitted."); } + GeneratorInput_Buffer(const std::string &name, const Type &t) + : Super(name, IOKind::Buffer, {t}, -1) { + static_assert(!TBase::has_static_halide_type, "You can only specify a Type argument for Input> if T is void or omitted."); + } + GeneratorInput_Buffer(const std::string &name, int d) : Super(name, IOKind::Buffer, TBase::has_static_halide_type ? std::vector{TBase::static_halide_type()} : std::vector{}, From 67e75af73705757609953c6537ebfff9276e9db4 Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Thu, 20 Jan 2022 15:44:01 -0800 Subject: [PATCH 04/13] Update Generator.h --- src/Generator.h | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/src/Generator.h b/src/Generator.h index 25c4fd063063..8ccb78876757 100644 --- a/src/Generator.h +++ b/src/Generator.h @@ -2477,58 +2477,58 @@ class GeneratorOutput_Buffer : public GeneratorOutputImpl { protected: using TBase = typename Super::TBase; - static std::vector my_types(const std::vector &t) { - if (TBase::has_static_halide_type) { - user_assert(t.empty()) << "Cannot pass a Type argument for an Output with a non-void static type\n"; - return std::vector{TBase::static_halide_type()}; - } - return t; - } - explicit GeneratorOutput_Buffer(const std::string &name) - : Super(name, IOKind::Buffer, my_types({}), -1) { + : Super(name, IOKind::Buffer, + TBase::has_static_halide_type ? std::vector{TBase::static_halide_type()} : std::vector{}, + TBase::has_static_dimensions ? TBase::static_dimensions() : -1) { } GeneratorOutput_Buffer(const std::string &name, const std::vector &t, int d) - : Super(name, IOKind::Buffer, my_types(t), d) { + : Super(name, IOKind::Buffer, t, d) { internal_assert(!t.empty()); internal_assert(d != -1); - static_assert(!TBase::has_static_halide_type, "You can only specify a Type argument for Output> if T is void or omitted."); + static_assert(!TBase::has_static_halide_type, "You can only specify a Type argument for Output> if T is void or omitted."); static_assert(!TBase::has_static_dimensions, "You can only specify a dimension argument for Output> if D is -1 or omitted."); } GeneratorOutput_Buffer(const std::string &name, const std::vector &t) - : Super(name, IOKind::Buffer, my_types(t), -1) { + : Super(name, IOKind::Buffer, t, -1) { internal_assert(!t.empty()); - static_assert(!TBase::has_static_halide_type, "You can only specify a Type argument for Output> if T is void or omitted."); + static_assert(!TBase::has_static_halide_type, "You can only specify a Type argument for Output> if T is void or omitted."); } GeneratorOutput_Buffer(const std::string &name, int d) - : Super(name, IOKind::Buffer, my_types({}), d) { + : Super(name, IOKind::Buffer, + TBase::has_static_halide_type ? std::vector{TBase::static_halide_type()} : std::vector{}, + d) { internal_assert(d != -1); static_assert(!TBase::has_static_dimensions, "You can only specify a dimension argument for Output> if D is -1 or omitted."); } GeneratorOutput_Buffer(size_t array_size, const std::string &name) - : Super(array_size, name, IOKind::Buffer, my_types({}), -1) { + : Super(array_size, name, IOKind::Buffer, + TBase::has_static_halide_type ? std::vector{TBase::static_halide_type()} : std::vector{}, + TBase::has_static_dimensions ? TBase::static_dimensions() : -1) { } GeneratorOutput_Buffer(size_t array_size, const std::string &name, const std::vector &t, int d) - : Super(array_size, name, IOKind::Buffer, my_types(t), d) { + : Super(array_size, name, IOKind::Buffer, t, d) { internal_assert(!t.empty()); internal_assert(d != -1); - static_assert(!TBase::has_static_halide_type, "You can only specify a Type argument for Output> if T is void or omitted."); + static_assert(!TBase::has_static_halide_type, "You can only specify a Type argument for Output> if T is void or omitted."); static_assert(!TBase::has_static_dimensions, "You can only specify a dimension argument for Output> if D is -1 or omitted."); } GeneratorOutput_Buffer(size_t array_size, const std::string &name, const std::vector &t) - : Super(array_size, name, IOKind::Buffer, my_types(t), -1) { + : Super(array_size, name, IOKind::Buffer, t, -1) { internal_assert(!t.empty()); - static_assert(!TBase::has_static_halide_type, "You can only specify a Type argument for Output> if T is void or omitted."); + static_assert(!TBase::has_static_halide_type, "You can only specify a Type argument for Output> if T is void or omitted."); } GeneratorOutput_Buffer(size_t array_size, const std::string &name, int d) - : Super(array_size, name, IOKind::Buffer, my_types({}), d) { + : Super(array_size, name, IOKind::Buffer, + TBase::has_static_halide_type ? std::vector{TBase::static_halide_type()} : std::vector{}, + d) { internal_assert(d != -1); static_assert(!TBase::has_static_dimensions, "You can only specify a dimension argument for Output> if D is -1 or omitted."); } From 954e740ae337eafa78894d2f7fa01b45007d0807 Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Fri, 21 Jan 2022 14:48:33 -0800 Subject: [PATCH 05/13] DynamicDims -> BufferDimsUnconstrained --- apps/hannk/util/buffer_util.h | 2 +- python_bindings/src/PyBuffer.cpp | 2 +- src/Buffer.h | 10 ++-- src/runtime/HalideBuffer.h | 81 ++++++++++++++++-------------- test/correctness/halide_buffer.cpp | 12 ++--- 5 files changed, 55 insertions(+), 52 deletions(-) diff --git a/apps/hannk/util/buffer_util.h b/apps/hannk/util/buffer_util.h index 22d71367b8b3..da9f780efcf7 100644 --- a/apps/hannk/util/buffer_util.h +++ b/apps/hannk/util/buffer_util.h @@ -14,7 +14,7 @@ namespace hannk { // Using a Buffer with space for max_rank dimensions is a meaningful // win for some corner cases (when adding dimensions to > 4). template -using HalideBuffer = Halide::Runtime::Buffer::DynamicDims, max_rank>; +using HalideBuffer = Halide::Runtime::Buffer; // dynamic_type_dispatch is a utility for functors that want to be able // to dynamically dispatch a halide_type_t to type-specialized code. diff --git a/python_bindings/src/PyBuffer.cpp b/python_bindings/src/PyBuffer.cpp index 4391ecb9f367..178baf65c047 100644 --- a/python_bindings/src/PyBuffer.cpp +++ b/python_bindings/src/PyBuffer.cpp @@ -393,7 +393,7 @@ void define_buffer(py::module &m) { py::arg("dirty") = true) .def("copy", &Buffer<>::copy) - .def("copy_from", &Buffer<>::copy_from::DynamicDims>) + .def("copy_from", &Buffer<>::copy_from::BufferDimsUnconstrained>) .def("add_dimension", (void (Buffer<>::*)()) & Buffer<>::add_dimension) diff --git a/src/Buffer.h b/src/Buffer.h index a96bc1486e05..7bcb00c2038f 100644 --- a/src/Buffer.h +++ b/src/Buffer.h @@ -8,7 +8,7 @@ namespace Halide { -template +template class Buffer; struct JITUserContext; @@ -114,7 +114,7 @@ std::string buffer_type_name() { * template parameter is T = void. * * A Buffer can refer to a Buffer if D1 == D2, - * or if D1 is -1 (meaning "dimensionality is checked at runtime, not compiletime"). + * or if D1 is BufferDimsUnconstrained (meaning "dimensionality is checked at runtime, not compiletime"). */ template class Buffer { @@ -134,7 +134,7 @@ class Buffer { std::is_void::value || std::is_void::value, "type mismatch constructing Buffer"); - static_assert(Dims == DynamicDims || D2 == DynamicDims || Dims == D2, + static_assert(Dims == BufferDimsUnconstrained || D2 == BufferDimsUnconstrained || Dims == D2, "Can't convert from a Buffer with static dimensionality to a Buffer with different static dimensionality"); } else { // Don't delegate to @@ -153,8 +153,8 @@ class Buffer { } public: - static constexpr int DynamicDims = -1; - static_assert(Dims == DynamicDims || Dims >= 0); + static constexpr int BufferDimsUnconstrained = Halide::Runtime::BufferDimsUnconstrained; + static_assert(Dims == BufferDimsUnconstrained || Dims >= 0); typedef T ElemType; diff --git a/src/runtime/HalideBuffer.h b/src/runtime/HalideBuffer.h index fd70420ee7c0..52c0dd7d987f 100644 --- a/src/runtime/HalideBuffer.h +++ b/src/runtime/HalideBuffer.h @@ -106,6 +106,8 @@ struct DeviceRefCount { BufferDeviceOwnership ownership{BufferDeviceOwnership::Allocated}; }; +constexpr int BufferDimsUnconstrained = -1; + /** A templated Buffer class that wraps halide_buffer_t and adds * functionality. When using Halide from C++, this is the preferred * way to create input and output buffers. The overhead of using this @@ -117,7 +119,7 @@ struct DeviceRefCount { * element type is unknown, or may vary, use void or const void. * * The template parameter Dims is the number of dimensions. For buffers where - * the dimensionality type is unknown at, or may vary, use -1 (or Buffer::DynamicDims). + * the dimensionality type is unknown at, or may vary, use BufferDimsUnconstrained. * * InClassDimStorage is the maximum number of dimensions that can be represented * using space inside the class itself. Set it to the maximum dimensionality @@ -131,8 +133,8 @@ struct DeviceRefCount { * considered as owned if and only if the host-side allocation is * owned. */ template + int Dims = BufferDimsUnconstrained, + int InClassDimStorage = (Dims == BufferDimsUnconstrained ? 4 : std::max(Dims, 1))> class Buffer { /** The underlying halide_buffer_t */ halide_buffer_t buf = {}; @@ -185,9 +187,7 @@ class Buffer { return alloc != nullptr; } - static constexpr int DynamicDims = -1; - - static constexpr bool has_static_dimensions = (Dims != DynamicDims); + static constexpr bool has_static_dimensions = (Dims != BufferDimsUnconstrained); /** Callers should not use the result if * has_static_dimensions is false. */ @@ -293,10 +293,10 @@ class Buffer { template void make_static_shape_storage() { - static_assert(Dims == DynamicDims || Dims == DimsSpecified, + static_assert(Dims == BufferDimsUnconstrained || Dims == DimsSpecified, "Number of arguments to Buffer() does not match static dimensionality"); buf.dimensions = DimsSpecified; - if constexpr (Dims == DynamicDims) { + if constexpr (Dims == BufferDimsUnconstrained) { buf.dim = (DimsSpecified <= InClassDimStorage) ? shape : new halide_dimension_t[DimsSpecified]; } else { static_assert(InClassDimStorage >= Dims); @@ -305,7 +305,7 @@ class Buffer { } void make_shape_storage(const int dimensions) { - if (Dims != DynamicDims && Dims != dimensions) { + if (Dims != BufferDimsUnconstrained && Dims != dimensions) { assert(false && "Number of arguments to Buffer() does not match static dimensionality"); } // This should usually be inlined, so if dimensions is statically known, @@ -439,7 +439,7 @@ class Buffer { /** slice a single dimension without handling device allocation. */ void slice_host(int d, int pos) { - static_assert(Dims == DynamicDims); + static_assert(Dims == BufferDimsUnconstrained); assert(dimensions() > 0); assert(d >= 0 && d < dimensions()); assert(pos >= dim(d).min() && pos <= dim(d).max()); @@ -454,7 +454,7 @@ class Buffer { buf.dim[buf.dimensions] = {0, 0, 0}; } - void complete_device_slice(Buffer &result_host_sliced, int d, int pos) const { + void complete_device_slice(Buffer &result_host_sliced, int d, int pos) const { assert(buf.device_interface != nullptr); if (buf.device_interface->device_slice(nullptr, &this->buf, d, pos, &result_host_sliced.buf) == 0) { const Buffer *sliced_from = this; @@ -558,7 +558,7 @@ class Buffer { /** Get the dimensionality of the buffer. */ // TODO: make constexpr, optimize for const case int dimensions() const { - assert(Dims == DynamicDims || Dims == buf.dimensions); + assert(Dims == BufferDimsUnconstrained || Dims == buf.dimensions); return buf.dimensions; } @@ -618,7 +618,7 @@ class Buffer { typename std::remove_const::type>::value || T_is_void || Buffer::T_is_void, "type mismatch constructing Buffer"); - static_assert(Dims == DynamicDims || D2 == DynamicDims || Dims == D2, + static_assert(Dims == BufferDimsUnconstrained || D2 == BufferDimsUnconstrained || Dims == D2, "Can't convert from a Buffer with static dimensionality to a Buffer with different static dimensionality"); } @@ -634,7 +634,7 @@ class Buffer { return false; } } - if (Dims != DynamicDims) { + if (Dims != BufferDimsUnconstrained) { if (other.dimensions() != Dims) { return false; } @@ -1170,7 +1170,7 @@ class Buffer { */ Buffer copy_to_interleaved(void *(*allocate_fn)(size_t) = nullptr, void (*deallocate_fn)(void *) = nullptr) const { - static_assert(Dims == DynamicDims || Dims == 3); + static_assert(Dims == BufferDimsUnconstrained || Dims == 3); assert(dimensions() == 3); Buffer dst = Buffer::make_interleaved(nullptr, width(), height(), channels()); dst.set_min(min(0), min(1), min(2)); @@ -1229,7 +1229,7 @@ class Buffer { Buffer dst(*this); - static_assert(Dims == DynamicDims || D2 == DynamicDims || Dims == D2); + static_assert(Dims == BufferDimsUnconstrained || D2 == BufferDimsUnconstrained || Dims == D2); assert(src.dimensions() == dst.dimensions()); // Trim the copy to the region in common @@ -1473,11 +1473,12 @@ class Buffer { /** Make a lower-dimensional buffer that refers to one slice of * this buffer. */ - Buffer sliced(int d, int pos) const { - static_assert(Dims == DynamicDims || Dims > 0, "Cannot slice a 0-dimensional buffer"); + Buffer + sliced(int d, int pos) const { + static_assert(Dims == BufferDimsUnconstrained || Dims > 0, "Cannot slice a 0-dimensional buffer"); assert(dimensions() > 0); - Buffer im = *this; + Buffer im = *this; // This guarantees the prexisting device ref is dropped if the // device_slice call fails and maintains the buffer in a consistent @@ -1493,8 +1494,9 @@ class Buffer { /** Make a lower-dimensional buffer that refers to one slice of this * buffer at the dimension's minimum. */ - inline Buffer sliced(int d) const { - static_assert(Dims == DynamicDims || Dims > 0, "Cannot slice a 0-dimensional buffer"); + Buffer + sliced(int d) const { + static_assert(Dims == BufferDimsUnconstrained || Dims > 0, "Cannot slice a 0-dimensional buffer"); assert(dimensions() > 0); return sliced(d, dim(d).min()); @@ -1506,7 +1508,7 @@ class Buffer { * memory, so other views of the same data are unaffected. Can * only be called on a Buffer with dynamic dimensionality. */ void slice(int d, int pos) { - static_assert(Dims == DynamicDims, "Cannot call slice() on a Buffer with static dimensionality."); + static_assert(Dims == BufferDimsUnconstrained, "Cannot call slice() on a Buffer with static dimensionality."); assert(dimensions() > 0); // An optimization for non-device buffers. For the device case, @@ -1535,8 +1537,9 @@ class Buffer { &im(x, y, c) == &im2(x, 17, y, c); \endcode */ - Buffer embedded(int d, int pos = 0) const { - Buffer im(*this); + Buffer + embedded(int d, int pos = 0) const { + Buffer im(*this); im.embed(d, pos); return im; } @@ -1544,7 +1547,7 @@ class Buffer { /** Embed a buffer in-place, increasing the * dimensionality. */ void embed(int d, int pos = 0) { - static_assert(Dims == DynamicDims, "Cannot call embed() on a Buffer with static dimensionality."); + static_assert(Dims == BufferDimsUnconstrained, "Cannot call embed() on a Buffer with static dimensionality."); assert(d >= 0 && d <= dimensions()); add_dimension(); translate(dimensions() - 1, pos); @@ -1558,7 +1561,7 @@ class Buffer { * its stride. The new dimension is the last dimension. This is a * special case of embed. */ void add_dimension() { - static_assert(Dims == DynamicDims, "Cannot call add_dimension() on a Buffer with static dimensionality."); + static_assert(Dims == BufferDimsUnconstrained, "Cannot call add_dimension() on a Buffer with static dimensionality."); const int dims = buf.dimensions; buf.dimensions++; if (buf.dim != shape) { @@ -1743,7 +1746,7 @@ class Buffer { * generator has been compiled with support for interleaved (also * known as packed or chunky) memory layouts. */ static Buffer make_interleaved(halide_type_t t, int width, int height, int channels) { - static_assert(Dims == DynamicDims || Dims == 3, "make_interleaved() must be called on a Buffer that can represent 3 dimensions."); + static_assert(Dims == BufferDimsUnconstrained || Dims == 3, "make_interleaved() must be called on a Buffer that can represent 3 dimensions."); Buffer im(t, channels, width, height); // Note that this is equivalent to calling transpose({2, 0, 1}), // but slightly more efficient. @@ -1765,7 +1768,7 @@ class Buffer { /** Wrap an existing interleaved image. */ static Buffer, Dims, InClassDimStorage> make_interleaved(halide_type_t t, T *data, int width, int height, int channels) { - static_assert(Dims == DynamicDims || Dims == 3, "make_interleaved() must be called on a Buffer that can represent 3 dimensions."); + static_assert(Dims == BufferDimsUnconstrained || Dims == 3, "make_interleaved() must be called on a Buffer that can represent 3 dimensions."); Buffer, Dims, InClassDimStorage> im(t, data, channels, width, height); im.transpose(0, 1); im.transpose(1, 2); @@ -1779,24 +1782,24 @@ class Buffer { /** Make a zero-dimensional Buffer */ static Buffer, Dims, InClassDimStorage> make_scalar(halide_type_t t) { - static_assert(Dims == DynamicDims || Dims == 0, "make_scalar() must be called on a Buffer that can represent 0 dimensions."); - Buffer, DynamicDims, InClassDimStorage> buf(t, 1); + static_assert(Dims == BufferDimsUnconstrained || Dims == 0, "make_scalar() must be called on a Buffer that can represent 0 dimensions."); + Buffer, BufferDimsUnconstrained, InClassDimStorage> buf(t, 1); buf.slice(0, 0); return buf; } /** Make a zero-dimensional Buffer */ static Buffer make_scalar() { - static_assert(Dims == DynamicDims || Dims == 0, "make_scalar() must be called on a Buffer that can represent 0 dimensions."); - Buffer buf(1); + static_assert(Dims == BufferDimsUnconstrained || Dims == 0, "make_scalar() must be called on a Buffer that can represent 0 dimensions."); + Buffer buf(1); buf.slice(0, 0); return buf; } /** Make a zero-dimensional Buffer that points to non-owned, existing data */ static Buffer make_scalar(T *data) { - static_assert(Dims == DynamicDims || Dims == 0, "make_scalar() must be called on a Buffer that can represent 0 dimensions."); - Buffer buf(data, 1); + static_assert(Dims == BufferDimsUnconstrained || Dims == 0, "make_scalar() must be called on a Buffer that can represent 0 dimensions."); + Buffer buf(data, 1); buf.slice(0, 0); return buf; } @@ -1807,7 +1810,7 @@ class Buffer { static Buffer make_with_shape_of(Buffer src, void *(*allocate_fn)(size_t) = nullptr, void (*deallocate_fn)(void *) = nullptr) { - static_assert(Dims == D2 || Dims == DynamicDims); + static_assert(Dims == D2 || Dims == BufferDimsUnconstrained); const halide_type_t dst_type = T_is_void ? src.type() : halide_type_of::type>(); return Buffer<>::make_with_shape_of_helper(dst_type, src.dimensions(), src.buf.dim, allocate_fn, deallocate_fn); @@ -1915,7 +1918,7 @@ class Buffer { static_assert(!T_is_void, "Cannot use operator() on Buffer types"); constexpr int expected_dims = 1 + (int)(sizeof...(rest)); - static_assert(Dims == DynamicDims || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()"); + static_assert(Dims == BufferDimsUnconstrained || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()"); assert(!device_dirty()); return *((const not_void_T *)(address_of(first, rest...))); } @@ -1926,7 +1929,7 @@ class Buffer { static_assert(!T_is_void, "Cannot use operator() on Buffer types"); constexpr int expected_dims = 0; - static_assert(Dims == DynamicDims || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()"); + static_assert(Dims == BufferDimsUnconstrained || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()"); assert(!device_dirty()); return *((const not_void_T *)(data())); } @@ -1948,7 +1951,7 @@ class Buffer { static_assert(!T_is_void, "Cannot use operator() on Buffer types"); constexpr int expected_dims = 1 + (int)(sizeof...(rest)); - static_assert(Dims == DynamicDims || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()"); + static_assert(Dims == BufferDimsUnconstrained || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()"); set_host_dirty(); return *((not_void_T *)(address_of(first, rest...))); } @@ -1959,7 +1962,7 @@ class Buffer { static_assert(!T_is_void, "Cannot use operator() on Buffer types"); constexpr int expected_dims = 0; - static_assert(Dims == DynamicDims || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()"); + static_assert(Dims == BufferDimsUnconstrained || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()"); set_host_dirty(); return *((not_void_T *)(data())); } diff --git a/test/correctness/halide_buffer.cpp b/test/correctness/halide_buffer.cpp index e0bb1bcac0e1..27ee3ee0c544 100644 --- a/test/correctness/halide_buffer.cpp +++ b/test/correctness/halide_buffer.cpp @@ -151,12 +151,12 @@ int main(int argc, char **argv) { { // Check Buffers with static dimensionality Buffer a(100, 80); - Buffer b(a); // statically safe - Buffer c(a); // checks at runtime (and succeeds) - Buffer::DynamicDims> d(a); // same as previous, just explicit syntax - Buffer e(d); // checks at runtime (and succeeds because d.dims = 2) - // Buffer f(a); // won't compile: static_assert failure - // Buffer g(c); // fails at runtime: c.dims = 2 + Buffer b(a); // statically safe + Buffer c(a); // checks at runtime (and succeeds) + Buffer d(a); // same as previous, just explicit syntax + Buffer e(d); // checks at runtime (and succeeds because d.dims = 2) + // Buffer f(a); // won't compile: static_assert failure + // Buffer g(c); // fails at runtime: c.dims = 2 static_assert(a.has_static_dimensions); static_assert(b.has_static_dimensions); From 9e12a9e48a22b67e1ab9d686a875ac7bb624321f Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Wed, 26 Jan 2022 09:23:22 -0800 Subject: [PATCH 06/13] Update fft_aot_test.cpp --- apps/fft/fft_aot_test.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/apps/fft/fft_aot_test.cpp b/apps/fft/fft_aot_test.cpp index 042bdb1158d1..4f6ed4dc9c1d 100644 --- a/apps/fft/fft_aot_test.cpp +++ b/apps/fft/fft_aot_test.cpp @@ -19,6 +19,9 @@ const int32_t kSize = 16; using Halide::Runtime::Buffer; +// Note that real_buffer() is 3D (with the 3rd dimension having extent 0) +// because the fft is written generically to require 3D inputs, even when they are real. +// Hence, the resulting buffer must be accessed with buf(i, j, 0). Buffer real_buffer(int32_t y_size = kSize) { return Buffer::make_interleaved(kSize, y_size, 1); } From 55866e4e02441f2b6512a793bb2452ae55c06956 Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Wed, 26 Jan 2022 09:28:04 -0800 Subject: [PATCH 07/13] Update HalideBuffer.h --- src/runtime/HalideBuffer.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/runtime/HalideBuffer.h b/src/runtime/HalideBuffer.h index 52c0dd7d987f..9415043983ba 100644 --- a/src/runtime/HalideBuffer.h +++ b/src/runtime/HalideBuffer.h @@ -556,10 +556,13 @@ class Buffer { } /** Get the dimensionality of the buffer. */ - // TODO: make constexpr, optimize for const case int dimensions() const { - assert(Dims == BufferDimsUnconstrained || Dims == buf.dimensions); - return buf.dimensions; + if constexpr (has_static_dimensions) { + return Dims; + } else { + assert(Dims == BufferDimsUnconstrained || Dims == buf.dimensions); + return buf.dimensions; + } } /** Get the type of the elements. */ @@ -932,7 +935,6 @@ class Buffer { * take ownership of the data, and does not set the host_dirty flag. */ template explicit Buffer(Array (&vals)[N]) { - // TODO: this could probably be made constexpr const int buf_dimensions = dimensionality_of_array(vals); buf.type = scalar_type_of_array(vals); buf.host = (uint8_t *)vals; From 02ff5d972795ea5f415f8b86aca6e492a268b07f Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Wed, 26 Jan 2022 09:37:18 -0800 Subject: [PATCH 08/13] Update HalideBuffer.h --- src/runtime/HalideBuffer.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/runtime/HalideBuffer.h b/src/runtime/HalideBuffer.h index 9415043983ba..4783549273f3 100644 --- a/src/runtime/HalideBuffer.h +++ b/src/runtime/HalideBuffer.h @@ -560,7 +560,6 @@ class Buffer { if constexpr (has_static_dimensions) { return Dims; } else { - assert(Dims == BufferDimsUnconstrained || Dims == buf.dimensions); return buf.dimensions; } } From e5d8cd811ecd939bf23d722483e9e2552f76ecf2 Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Wed, 26 Jan 2022 11:58:32 -0800 Subject: [PATCH 09/13] Fix as<>(), default ctor in HalideBuffer.h - Allow as<> to optionally convert dimensionality as well - The default ctor shouldn't always assume Dims == 0 - make_static_shape_storage() can be more constrained --- src/runtime/HalideBuffer.h | 61 +++++++++++++++++++++++++------------- 1 file changed, 40 insertions(+), 21 deletions(-) diff --git a/src/runtime/HalideBuffer.h b/src/runtime/HalideBuffer.h index 4783549273f3..0aad75b478d6 100644 --- a/src/runtime/HalideBuffer.h +++ b/src/runtime/HalideBuffer.h @@ -297,7 +297,12 @@ class Buffer { "Number of arguments to Buffer() does not match static dimensionality"); buf.dimensions = DimsSpecified; if constexpr (Dims == BufferDimsUnconstrained) { - buf.dim = (DimsSpecified <= InClassDimStorage) ? shape : new halide_dimension_t[DimsSpecified]; + if constexpr (DimsSpecified <= InClassDimStorage) { + buf.dim = shape; + } else { + static_assert(DimsSpecified >= 1); + buf.dim = new halide_dimension_t[DimsSpecified]; + } } else { static_assert(InClassDimStorage >= Dims); buf.dim = shape; @@ -597,7 +602,10 @@ class Buffer { Buffer() : shape() { buf.type = static_halide_type(); - make_static_shape_storage<0>(); + // If Dims are statically known, must create storage that many. + // otherwise, make a zero-dimensional buffer. + constexpr int buf_dimensions = (Dims == BufferDimsUnconstrained) ? 0 : Dims; + make_static_shape_storage(); } /** Make a Buffer from a halide_buffer_t */ @@ -1068,29 +1076,40 @@ class Buffer { /** Return a typed reference to this Buffer. Useful for converting * a reference to a Buffer to a reference to, for example, a * Buffer, or converting a Buffer& to Buffer&. - * Does a runtime assert if the source buffer type is void. */ - template - HALIDE_ALWAYS_INLINE Buffer &as() & { - Buffer::assert_can_convert_from(*this); - return *((Buffer *)this); + * You can also optionally sspecify a new value for Dims; this is useful + * mainly for removing the dimensionality constraint on a Buffer with + * explicit dimensionality. Does a runtime assert if the source buffer type + * is void or the new dimensionality is incompatible. */ + template + HALIDE_ALWAYS_INLINE Buffer &as() & { + Buffer::assert_can_convert_from(*this); + return *((Buffer *)this); } - /** Return a const typed reference to this Buffer. Useful for - * converting a conference reference to one Buffer type to a const - * reference to another Buffer type. Does a runtime assert if the - * source buffer type is void. */ - template - HALIDE_ALWAYS_INLINE const Buffer &as() const & { - Buffer::assert_can_convert_from(*this); - return *((const Buffer *)this); + /** Return a const typed reference to this Buffer. Useful for converting + * a reference to a Buffer to a reference to, for example, a + * Buffer, or converting a Buffer& to Buffer&. + * You can also optionally sspecify a new value for Dims; this is useful + * mainly for removing the dimensionality constraint on a Buffer with + * explicit dimensionality. Does a runtime assert if the source buffer type + * is void or the new dimensionality is incompatible. */ + template + HALIDE_ALWAYS_INLINE const Buffer &as() const & { + Buffer::assert_can_convert_from(*this); + return *((const Buffer *)this); } - /** Returns this rval Buffer with a different type attached. Does - * a dynamic type check if the source type is void. */ - template - HALIDE_ALWAYS_INLINE Buffer as() && { - Buffer::assert_can_convert_from(*this); - return *((Buffer *)this); + /** Return an rval reference to this Buffer. Useful for converting + * a reference to a Buffer to a reference to, for example, a + * Buffer, or converting a Buffer& to Buffer&. + * You can also optionally sspecify a new value for Dims; this is useful + * mainly for removing the dimensionality constraint on a Buffer with + * explicit dimensionality. Does a runtime assert if the source buffer type + * is void or the new dimensionality is incompatible. */ + template + HALIDE_ALWAYS_INLINE Buffer as() && { + Buffer::assert_can_convert_from(*this); + return *((Buffer *)this); } /** as_const() is syntactic sugar for .as(), to avoid the need From beda5a9f0cd0a3a3c50a8d43b78762312817a75f Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Wed, 26 Jan 2022 11:58:55 -0800 Subject: [PATCH 10/13] Add extra template arg to Buffer<>::as<> also --- src/Buffer.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Buffer.h b/src/Buffer.h index 7bcb00c2038f..504ed59649db 100644 --- a/src/Buffer.h +++ b/src/Buffer.h @@ -532,13 +532,13 @@ class Buffer { return contents->buf.type(); } - template + template Buffer as() const { - return Buffer(*this); + return Buffer(*this); } Buffer copy() const { - return Buffer(std::move(contents->buf.as().copy())); + return Buffer(std::move(contents->buf.as().copy())); } template From 67f57ab9a1996779a01a97189a4d9573784f2ec4 Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Wed, 26 Jan 2022 11:59:18 -0800 Subject: [PATCH 11/13] Realization::operator Buffer() needs to know about the extra template parameter too --- src/Realization.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/Realization.h b/src/Realization.h index d2ed848ee54d..29596d0b3218 100644 --- a/src/Realization.h +++ b/src/Realization.h @@ -31,18 +31,19 @@ class Realization { Buffer &operator[](size_t x); /** Single-element realizations are implicitly castable to Buffers. */ - template - operator Buffer() const { - return images[0]; + template + operator Buffer() const { + return images[0].as(); } /** Construct a Realization that acts as a reference to some * existing Buffers. The element type of the Buffers may not be * const. */ template, Args...>::value>::type> - Realization(Buffer &a, Args &&...args) { + Realization(Buffer &a, Args &&...args) { images = std::vector>({a, args...}); } From 63f8094267cbbceeecccd82c83cfd109a9785b7f Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Wed, 26 Jan 2022 11:59:35 -0800 Subject: [PATCH 12/13] halide_image_io.h needs some attention --- tools/halide_image_io.h | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/tools/halide_image_io.h b/tools/halide_image_io.h index db99050fc142..3ce0ac195e4e 100644 --- a/tools/halide_image_io.h +++ b/tools/halide_image_io.h @@ -1949,16 +1949,22 @@ bool save_tiff(ImageType &im, const std::string &filename) { return true; } -// Given something like ImageType, produce typedef ImageType -template +// Given something like ImageType, produce typedef ImageType +template +struct ImageTypeWithDynamicDims { + using type = decltype(std::declval().template as()); +}; + +// Given something like ImageType, produce typedef ImageType +template struct ImageTypeWithElemType { - using type = decltype(std::declval().template as()); + using type = decltype(std::declval().template as()); }; -// Given something like ImageType, produce typedef ImageType +// Given something like ImageType, produce typedef ImageType template struct ImageTypeWithConstElemType { - using type = decltype(std::declval().template as::type>()); + using type = decltype(std::declval().template as::type, -1>()); }; template @@ -2154,7 +2160,8 @@ struct ImageTypeConversion { return convert_image(src); default: assert(false && "Unsupported type"); - return ImageType(); + using RetImageType = typename Internal::ImageTypeWithDynamicDims::type; + return RetImageType(); } } @@ -2201,7 +2208,8 @@ struct ImageTypeConversion { return convert_image(src.template as(), dst_type); default: assert(false && "Unsupported type"); - return ImageType(); + using RetImageType = typename Internal::ImageTypeWithDynamicDims::type; + return RetImageType(); } } }; @@ -2335,7 +2343,7 @@ class load_and_convert_image { // a runtime error will occur. template void save_image(ImageType &im, const std::string &filename) { - (void)save(im, filename); + (void)save::type, check>(im, filename); } // Like save_image, but quietly convert the saved image to a type that the @@ -2347,11 +2355,12 @@ void convert_and_save_image(ImageType &im, const std::string &filename) { im.copy_to_host(); std::set info; - (void)save_query(filename, &info); + (void)save_query::type, check>(filename, &info); const FormatInfo best = Internal::best_save_format(im, info); if (best.type == im.type() && best.dimensions == im.dimensions()) { // It's an exact match, we can save as-is. - (void)save(im, filename); + using DynamicImageDims = typename Internal::ImageTypeWithDynamicDims::type; + (void)save(im.template as(), filename); } else { using DynamicImageType = typename Internal::ImageTypeWithElemType::type; DynamicImageType im_converted = ImageTypeConversion::convert_image(im, best.type); From d58a09de9748338dd671135feaa93a239d25627a Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Wed, 26 Jan 2022 12:01:14 -0800 Subject: [PATCH 13/13] Convert apps/ to use static Buffer dims where useful Convert most of the apps to use static Buffer dimensions where it makes sense. (This uncovered a few glitches in https://github.com/halide/Halide/pull/6574.) --- apps/HelloAndroid/jni/hello_generator.cpp | 4 +- .../jni/deinterleave_generator.cpp | 2 +- .../jni/edge_detect_generator.cpp | 4 +- apps/HelloMatlab/iir_blur.cpp | 4 +- apps/HelloWasm/Makefile | 2 +- apps/HelloWasm/core.cpp | 12 ++-- .../reaction_diffusion_generator.cpp | 10 +-- .../reaction_diffusion_2_generator.cpp | 10 +-- apps/auto_viz/auto_viz_demo.cpp | 4 +- apps/auto_viz/auto_viz_demo_generator.cpp | 4 +- apps/bgu/bgu_generator.cpp | 8 +-- apps/bgu/filter.cpp | 8 +-- .../bilateral_grid_generator.cpp | 5 +- apps/bilateral_grid/filter.cpp | 4 +- apps/blur/halide_blur_generator.cpp | 4 +- apps/blur/test.cpp | 22 +++--- apps/c_backend/pipeline_cpp_generator.cpp | 4 +- apps/c_backend/pipeline_generator.cpp | 4 +- apps/c_backend/run.cpp | 6 +- apps/c_backend/run_cpp.cpp | 6 +- apps/camera_pipe/camera_pipe_generator.cpp | 9 ++- apps/camera_pipe/process.cpp | 6 +- apps/conv_layer/conv_layer_generator.cpp | 9 ++- apps/conv_layer/process.cpp | 8 +-- apps/cuda_mat_mul/mat_mul_generator.cpp | 6 +- apps/cuda_mat_mul/runner.cpp | 4 +- .../depthwise_separable_conv_generator.cpp | 10 +-- apps/depthwise_separable_conv/process.cpp | 10 +-- apps/fft/fft_generator.cpp | 4 +- apps/fft/main.cpp | 16 ++--- apps/hannk/halide/common_halide_test.cpp | 12 ++-- apps/hannk/halide/conv_generator.cpp | 12 ++-- apps/hannk/halide/copy_generator.cpp | 6 +- .../hannk/halide/depthwise_conv_generator.cpp | 12 ++-- apps/hannk/halide/elementwise_generator.cpp | 18 ++--- apps/hannk/halide/fill_generator.cpp | 3 +- .../hannk/halide/normalizations_generator.cpp | 8 +-- apps/hannk/halide/pool_generator.cpp | 8 +-- apps/hannk/halide/reductions_generator.cpp | 4 +- .../hannk/interpreter/elementwise_program.cpp | 2 +- apps/hannk/interpreter/elementwise_program.h | 4 +- apps/hannk/interpreter/ops.cpp | 16 ++--- apps/hannk/interpreter/ops.h | 6 +- apps/hannk/interpreter/transforms.cpp | 2 +- apps/hannk/tflite/tflite_parser.cpp | 2 +- apps/hannk/util/buffer_util.h | 4 +- apps/harris/filter.cpp | 4 +- apps/harris/harris_generator.cpp | 4 +- apps/hexagon_benchmarks/conv3x3_generator.cpp | 6 +- .../dilate3x3_generator.cpp | 4 +- .../gaussian5x5_generator.cpp | 4 +- .../median3x3_generator.cpp | 4 +- apps/hexagon_benchmarks/process.h | 16 ++--- apps/hexagon_benchmarks/sobel_generator.cpp | 4 +- .../pipeline_raw_linear_interleaved_basic.cpp | 4 +- .../process_raw_linear_interleaved_basic.cpp | 4 +- apps/hexagon_dma/process_yuv_linear_basic.cpp | 14 ++-- apps/hist/filter.cpp | 4 +- apps/hist/hist_generator.cpp | 4 +- apps/iir_blur/filter.cpp | 4 +- apps/iir_blur/iir_blur_generator.cpp | 4 +- apps/interpolate/filter.cpp | 4 +- apps/interpolate/interpolate_generator.cpp | 4 +- apps/lens_blur/lens_blur_generator.cpp | 6 +- apps/lens_blur/process.cpp | 6 +- .../benchmarks/halide_benchmarks.cpp | 12 ++-- .../linear_algebra/src/blas_l1_generators.cpp | 16 ++--- .../linear_algebra/src/blas_l2_generators.cpp | 14 ++-- .../linear_algebra/src/blas_l3_generators.cpp | 12 ++-- apps/linear_algebra/src/halide_blas.cpp | 22 +++--- apps/linear_blur/linear_blur_generator.cpp | 4 +- apps/linear_blur/run_linear_blur.cpp | 4 +- .../local_laplacian_generator.cpp | 5 +- apps/local_laplacian/process.cpp | 4 +- apps/max_filter/filter.cpp | 4 +- apps/max_filter/max_filter_generator.cpp | 4 +- apps/nl_means/nl_means_generator.cpp | 4 +- apps/nl_means/process.cpp | 4 +- apps/onnx/onnx_converter_generator_test.cc | 6 +- apps/onnx/onnx_converter_test.cc | 62 ++++++++--------- apps/openglcompute/jni/oglc_run.cpp | 42 ++++++------ .../jni/oglc_two_kernels_run.cpp | 6 +- apps/resize/resize_generator.cpp | 4 +- apps/resnet_50/Resnet50Generator.cpp | 66 +++++++++--------- apps/resnet_50/process.cpp | 68 ++++++++++--------- apps/stencil_chain/process.cpp | 6 +- .../stencil_chain/stencil_chain_generator.cpp | 4 +- apps/unsharp/filter.cpp | 4 +- apps/unsharp/unsharp_generator.cpp | 4 +- apps/wavelet/daubechies_x_generator.cpp | 4 +- apps/wavelet/haar_x_generator.cpp | 4 +- .../inverse_daubechies_x_generator.cpp | 4 +- apps/wavelet/inverse_haar_x_generator.cpp | 4 +- apps/wavelet/wavelet.cpp | 12 ++-- 94 files changed, 419 insertions(+), 422 deletions(-) diff --git a/apps/HelloAndroid/jni/hello_generator.cpp b/apps/HelloAndroid/jni/hello_generator.cpp index f77d059534f8..a4d5e5e9b6f4 100644 --- a/apps/HelloAndroid/jni/hello_generator.cpp +++ b/apps/HelloAndroid/jni/hello_generator.cpp @@ -6,8 +6,8 @@ namespace { class Hello : public Generator { public: - Input> input{"input", 2}; - Output> result{"result", 2}; + Input> input{"input"}; + Output> result{"result"}; void generate() { tone_curve(x) = cast(pow(cast(x) / 256.0f, 1.8f) * 256.0f); diff --git a/apps/HelloAndroidCamera2/jni/deinterleave_generator.cpp b/apps/HelloAndroidCamera2/jni/deinterleave_generator.cpp index d5e2612ede19..c94509337bfe 100644 --- a/apps/HelloAndroidCamera2/jni/deinterleave_generator.cpp +++ b/apps/HelloAndroidCamera2/jni/deinterleave_generator.cpp @@ -4,7 +4,7 @@ namespace { class Deinterleave : public Halide::Generator { public: - Input> uvInterleaved{"uvInterleaved", 2}; + Input> uvInterleaved{"uvInterleaved"}; // There is no way to declare a Buffer, so we must use Output instead Output result{"result", {UInt(8), UInt(8)}, 2}; diff --git a/apps/HelloAndroidCamera2/jni/edge_detect_generator.cpp b/apps/HelloAndroidCamera2/jni/edge_detect_generator.cpp index 1944daaf1aea..c80f1913a61e 100644 --- a/apps/HelloAndroidCamera2/jni/edge_detect_generator.cpp +++ b/apps/HelloAndroidCamera2/jni/edge_detect_generator.cpp @@ -4,8 +4,8 @@ namespace { class EdgeDetect : public Halide::Generator { public: - Input> input{"input", 2}; - Output> result{"result", 2}; + Input> input{"input"}; + Output> result{"result"}; void generate() { Var x, y; diff --git a/apps/HelloMatlab/iir_blur.cpp b/apps/HelloMatlab/iir_blur.cpp index 3507ad38c2b5..c7bc494801ac 100644 --- a/apps/HelloMatlab/iir_blur.cpp +++ b/apps/HelloMatlab/iir_blur.cpp @@ -59,12 +59,12 @@ class IirBlur : public Generator { public: // This is the input image: a 3D (color) image with 32 bit float // pixels. - Input> input{"input", 3}; + Input> input{"input"}; // The filter coefficient, alpha is the weight of the input to the // filter. Input alpha{"alpha"}; - Output> output{"output", 3}; + Output> output{"output"}; void generate() { Expr width = input.width(); diff --git a/apps/HelloWasm/Makefile b/apps/HelloWasm/Makefile index 4de5b2ea6c43..d8e89769f3bd 100644 --- a/apps/HelloWasm/Makefile +++ b/apps/HelloWasm/Makefile @@ -2,7 +2,7 @@ include ../support/Makefile.inc # The emscripten compiler EMCC ?= emcc -EMCC_FLAGS ?= -s WASM=1 -s USE_SDL=2 -s TOTAL_MEMORY=512MB -O3 -I $(HALIDE_DISTRIB_PATH)/include +EMCC_FLAGS ?= -std=c++17 -s WASM=1 -s USE_SDL=2 -s TOTAL_MEMORY=512MB -O3 -I $(HALIDE_DISTRIB_PATH)/include EMCC_THREADS_FLAGS ?= $(EMCC_FLAGS) -pthread -matomics # the output dir for the .js products must be fixed, because that's what index.html looks for diff --git a/apps/HelloWasm/core.cpp b/apps/HelloWasm/core.cpp index 30dd77d1c14f..ae952e188bc6 100644 --- a/apps/HelloWasm/core.cpp +++ b/apps/HelloWasm/core.cpp @@ -24,9 +24,9 @@ struct Context { double smoothed_blit_time = 0; double last_frame_time = 0; - Halide::Runtime::Buffer buf1; - Halide::Runtime::Buffer buf2; - Halide::Runtime::Buffer pixel_buf; + Halide::Runtime::Buffer buf1; + Halide::Runtime::Buffer buf2; + Halide::Runtime::Buffer pixel_buf; }; void mainloop(void *arg) { @@ -102,9 +102,9 @@ int main() { Context ctx; ctx.renderer = renderer; - ctx.buf1 = Halide::Runtime::Buffer(W, H, 3); - ctx.buf2 = Halide::Runtime::Buffer(W, H, 3); - ctx.pixel_buf = Halide::Runtime::Buffer(W, H); + ctx.buf1 = Halide::Runtime::Buffer(W, H, 3); + ctx.buf2 = Halide::Runtime::Buffer(W, H, 3); + ctx.pixel_buf = Halide::Runtime::Buffer(W, H); ctx.tex = SDL_CreateTexture(renderer, SDL_PIXELFORMAT_ARGB8888, diff --git a/apps/HelloWasm/reaction_diffusion_generator.cpp b/apps/HelloWasm/reaction_diffusion_generator.cpp index c363cc397d82..efd7c3c9b95f 100644 --- a/apps/HelloWasm/reaction_diffusion_generator.cpp +++ b/apps/HelloWasm/reaction_diffusion_generator.cpp @@ -4,7 +4,7 @@ namespace { class ReactionDiffusionInit : public Halide::Generator { public: - Output> output{"output", 3}; + Output> output{"output"}; GeneratorParam threads{"threads", true}; void generate() { @@ -24,11 +24,11 @@ class ReactionDiffusionInit : public Halide::Generator { class ReactionDiffusionUpdate : public Halide::Generator { public: - Input> state{"state", 3}; + Input> state{"state"}; Input mouse_x{"mouse_x"}; Input mouse_y{"mouse_y"}; Input frame{"frame"}; - Output> new_state{"new_state", 3}; + Output> new_state{"new_state"}; GeneratorParam threads{"threads", false}; void generate() { @@ -139,8 +139,8 @@ class ReactionDiffusionUpdate : public Halide::Generator { public: - Input> state{"state", 3}; - Output> render{"render", 2}; + Input> state{"state"}; + Output> render{"render"}; GeneratorParam threads{"threads", false}; void generate() { diff --git a/apps/HelloiOS/HelloiOS/reaction_diffusion_2_generator.cpp b/apps/HelloiOS/HelloiOS/reaction_diffusion_2_generator.cpp index 341549253f19..308ef39344f9 100644 --- a/apps/HelloiOS/HelloiOS/reaction_diffusion_2_generator.cpp +++ b/apps/HelloiOS/HelloiOS/reaction_diffusion_2_generator.cpp @@ -4,7 +4,7 @@ namespace { class ReactionDiffusion2Init : public Halide::Generator { public: - Output> output{"output", 3}; + Output> output{"output"}; void generate() { output(x, y, c) = Halide::random_float(); @@ -28,11 +28,11 @@ class ReactionDiffusion2Init : public Halide::Generator class ReactionDiffusion2Update : public Halide::Generator { public: - Input> state{"state", 3}; + Input> state{"state"}; Input mouse_x{"mouse_x"}; Input mouse_y{"mouse_y"}; Input frame{"frame"}; - Output> new_state{"new_state", 3}; + Output> new_state{"new_state"}; void generate() { clamped = Halide::BoundaryConditions::repeat_edge(state); @@ -163,10 +163,10 @@ class ReactionDiffusion2Update : public Halide::Generator { public: - Input> state{"state", 3}; + Input> state{"state"}; // TODO(srj): should be Input; using Input to work around Issue #1760 Input output_bgra{"output_bgra", 0, 0, 1}; - Output> render{"render", 3}; + Output> render{"render"}; void generate() { Func contour; diff --git a/apps/auto_viz/auto_viz_demo.cpp b/apps/auto_viz/auto_viz_demo.cpp index 82ab118e8181..3129a8ed8667 100644 --- a/apps/auto_viz/auto_viz_demo.cpp +++ b/apps/auto_viz/auto_viz_demo.cpp @@ -49,10 +49,10 @@ void parse_commandline(int argc, char **argv) { int main(int argc, char **argv) { parse_commandline(argc, argv); - Halide::Runtime::Buffer in = Halide::Tools::load_and_convert_image(infile); + Halide::Runtime::Buffer in = Halide::Tools::load_and_convert_image(infile); int out_width = in.width() * scale_factor; int out_height = in.height() * scale_factor; - Halide::Runtime::Buffer out(out_width, out_height, 3); + Halide::Runtime::Buffer out(out_width, out_height, 3); decltype(&auto_viz_demo_naive_up) variants[2][3] = { diff --git a/apps/auto_viz/auto_viz_demo_generator.cpp b/apps/auto_viz/auto_viz_demo_generator.cpp index 3d5aef6eb99f..81082294ac90 100644 --- a/apps/auto_viz/auto_viz_demo_generator.cpp +++ b/apps/auto_viz/auto_viz_demo_generator.cpp @@ -24,9 +24,9 @@ class AutoVizDemo : public Halide::Generator { // resample in x and in y). GeneratorParam upsample{"upsample", false}; - Input> input{"input", 3}; + Input> input{"input"}; Input scale_factor{"scale_factor"}; - Output> output{"output", 3}; + Output> output{"output"}; // Common Vars Var x, y, c, k; diff --git a/apps/bgu/bgu_generator.cpp b/apps/bgu/bgu_generator.cpp index 89e8f41b9e4a..054df3e52ba6 100644 --- a/apps/bgu/bgu_generator.cpp +++ b/apps/bgu/bgu_generator.cpp @@ -265,11 +265,11 @@ class BGU : public Generator { // Size of each spatial bin in the grid. Typically 16. Input s_sigma{"s_sigma"}; - Input> splat_loc{"splat_loc", 3}; - Input> values{"values", 3}; - Input> slice_loc{"slice_loc", 3}; + Input> splat_loc{"splat_loc"}; + Input> values{"values"}; + Input> slice_loc{"slice_loc"}; - Output> output{"output", 3}; + Output> output{"output"}; void generate() { // Algorithm diff --git a/apps/bgu/filter.cpp b/apps/bgu/filter.cpp index 1a66a737bcca..c8d987c9be5a 100644 --- a/apps/bgu/filter.cpp +++ b/apps/bgu/filter.cpp @@ -31,14 +31,14 @@ int main(int argc, char **argv) { // BGU will be good at capturing the contrast enhancement and // vignette, and bad at capturing the high-frequency sharpening. - Halide::Runtime::Buffer high_res_in = load_and_convert_image(argv[1]); + Halide::Runtime::Buffer high_res_in = load_and_convert_image(argv[1]); const int W = high_res_in.width(); const int H = high_res_in.height(); const int C = high_res_in.channels(); - Halide::Runtime::Buffer high_res_out(W, H, C); - Halide::Runtime::Buffer low_res_in(W / 8, H / 8, C); - Halide::Runtime::Buffer low_res_out(W / 8, H / 8, C); + Halide::Runtime::Buffer high_res_out(W, H, C); + Halide::Runtime::Buffer low_res_in(W / 8, H / 8, C); + Halide::Runtime::Buffer low_res_out(W / 8, H / 8, C); // Downsample the input with a box filter low_res_in.fill(0.0f); diff --git a/apps/bilateral_grid/bilateral_grid_generator.cpp b/apps/bilateral_grid/bilateral_grid_generator.cpp index 75c2b2f8fb1b..ede57459d5ab 100644 --- a/apps/bilateral_grid/bilateral_grid_generator.cpp +++ b/apps/bilateral_grid/bilateral_grid_generator.cpp @@ -7,10 +7,9 @@ class BilateralGrid : public Halide::Generator { public: GeneratorParam s_sigma{"s_sigma", 8}; - Input> input{"input", 2}; + Input> input{"input"}; Input r_sigma{"r_sigma"}; - - Output> bilateral_grid{"bilateral_grid", 2}; + Output> bilateral_grid{"bilateral_grid"}; void generate() { Var x("x"), y("y"), z("z"), c("c"); diff --git a/apps/bilateral_grid/filter.cpp b/apps/bilateral_grid/filter.cpp index ee87662a6863..6eb95c4643d3 100644 --- a/apps/bilateral_grid/filter.cpp +++ b/apps/bilateral_grid/filter.cpp @@ -24,8 +24,8 @@ int main(int argc, char **argv) { float r_sigma = (float)atof(argv[3]); int timing_iterations = atoi(argv[4]); - Buffer input = load_and_convert_image(argv[1]); - Buffer output(input.width(), input.height()); + Buffer input = load_and_convert_image(argv[1]); + Buffer output(input.width(), input.height()); bilateral_grid(input, r_sigma, output); diff --git a/apps/blur/halide_blur_generator.cpp b/apps/blur/halide_blur_generator.cpp index c16896641665..c3fd5009689e 100644 --- a/apps/blur/halide_blur_generator.cpp +++ b/apps/blur/halide_blur_generator.cpp @@ -28,8 +28,8 @@ class HalideBlur : public Halide::Generator { GeneratorParam tile_x{"tile_x", 32}; // X tile. GeneratorParam tile_y{"tile_y", 8}; // Y tile. - Input> input{"input", 2}; - Output> blur_y{"blur_y", 2}; + Input> input{"input"}; + Output> blur_y{"blur_y"}; void generate() { Func blur_x("blur_x"); diff --git a/apps/blur/test.cpp b/apps/blur/test.cpp index 6d7e678285e7..3ce299541c8a 100644 --- a/apps/blur/test.cpp +++ b/apps/blur/test.cpp @@ -15,9 +15,9 @@ using namespace Halide::Tools; double t; -Buffer blur(Buffer in) { - Buffer tmp(in.width() - 8, in.height()); - Buffer out(in.width() - 8, in.height() - 2); +Buffer blur(Buffer in) { + Buffer tmp(in.width() - 8, in.height()); + Buffer out(in.width() - 8, in.height() - 2); t = benchmark(10, 1, [&]() { for (int y = 0; y < tmp.height(); y++) @@ -32,8 +32,8 @@ Buffer blur(Buffer in) { return out; } -Buffer blur_fast(Buffer in) { - Buffer out(in.width() - 8, in.height() - 2); +Buffer blur_fast(Buffer in) { + Buffer out(in.width() - 8, in.height() - 2); t = benchmark(10, 1, [&]() { #ifdef __SSE2__ @@ -133,8 +133,8 @@ Buffer blur_fast(Buffer in) { #include "halide_blur.h" -Buffer blur_halide(Buffer in) { - Buffer out(in.width() - 8, in.height() - 2); +Buffer blur_halide(Buffer in) { + Buffer out(in.width() - 8, in.height() - 2); // Call it once to initialize the halide runtime stuff halide_blur(in, out); @@ -162,7 +162,7 @@ int main(int argc, char **argv) { const int width = is_hexagon ? 648 : 2568; const int height = is_hexagon ? 482 : 1922; - Buffer input(width, height); + Buffer input(width, height); for (int y = 0; y < input.height(); y++) { for (int x = 0; x < input.width(); x++) { @@ -170,13 +170,13 @@ int main(int argc, char **argv) { } } - Buffer blurry = blur(input); + Buffer blurry = blur(input); double slow_time = t; - Buffer speedy = blur_fast(input); + Buffer speedy = blur_fast(input); double fast_time = t; - Buffer halide = blur_halide(input); + Buffer halide = blur_halide(input); double halide_time = t; printf("times: %f %f %f\n", slow_time, fast_time, halide_time); diff --git a/apps/c_backend/pipeline_cpp_generator.cpp b/apps/c_backend/pipeline_cpp_generator.cpp index db1e140cb7b3..a6fc3746ffb7 100644 --- a/apps/c_backend/pipeline_cpp_generator.cpp +++ b/apps/c_backend/pipeline_cpp_generator.cpp @@ -50,8 +50,8 @@ HalideExtern_2(int, an_extern_c_func, int, float); class PipelineCpp : public Halide::Generator { public: - Input> input{"input", 2}; - Output> output{"output", 2}; + Input> input{"input"}; + Output> output{"output"}; void generate() { Var x, y; diff --git a/apps/c_backend/pipeline_generator.cpp b/apps/c_backend/pipeline_generator.cpp index a4aacaa6b0d6..c6a28bc477fa 100644 --- a/apps/c_backend/pipeline_generator.cpp +++ b/apps/c_backend/pipeline_generator.cpp @@ -7,8 +7,8 @@ HalideExtern_2(int, an_extern_func, int, int); class Pipeline : public Halide::Generator { public: - Input> input{"input", 2}; - Output> output{"output", 2}; + Input> input{"input"}; + Output> output{"output"}; void generate() { Var x, y; diff --git a/apps/c_backend/run.cpp b/apps/c_backend/run.cpp index 07e86bf7c882..29abfbcdac20 100644 --- a/apps/c_backend/run.cpp +++ b/apps/c_backend/run.cpp @@ -37,7 +37,7 @@ extern "C" int an_extern_stage(halide_buffer_t *in, halide_buffer_t *out) { } int main(int argc, char **argv) { - Buffer in(1432, 324); + Buffer in(1432, 324); for (int y = 0; y < in.height(); y++) { for (int x = 0; x < in.width(); x++) { @@ -45,8 +45,8 @@ int main(int argc, char **argv) { } } - Buffer out_native(423, 633); - Buffer out_c(423, 633); + Buffer out_native(423, 633); + Buffer out_c(423, 633); pipeline_native(in, out_native); diff --git a/apps/c_backend/run_cpp.cpp b/apps/c_backend/run_cpp.cpp index 20659dcaee1b..4490a8a27641 100644 --- a/apps/c_backend/run_cpp.cpp +++ b/apps/c_backend/run_cpp.cpp @@ -66,7 +66,7 @@ int cpp_extern_2(int a1, float a2) { } // namespace namespace_shared_outer int main(int argc, char **argv) { - Buffer in(100, 100); + Buffer in(100, 100); for (int y = 0; y < in.height(); y++) { for (int x = 0; x < in.width(); x++) { @@ -74,8 +74,8 @@ int main(int argc, char **argv) { } } - Buffer out_native(100, 100); - Buffer out_c(100, 100); + Buffer out_native(100, 100); + Buffer out_c(100, 100); pipeline_cpp_native(in, out_native); diff --git a/apps/camera_pipe/camera_pipe_generator.cpp b/apps/camera_pipe/camera_pipe_generator.cpp index 488bcd1a58d5..ec0323676cd4 100644 --- a/apps/camera_pipe/camera_pipe_generator.cpp +++ b/apps/camera_pipe/camera_pipe_generator.cpp @@ -216,17 +216,16 @@ class CameraPipe : public Halide::Generator { // currently allow 8-bit computations GeneratorParam result_type{"result_type", UInt(8)}; - Input> input{"input", 2}; - Input> matrix_3200{"matrix_3200", 2}; - Input> matrix_7000{"matrix_7000", 2}; + Input> input{"input"}; + Input> matrix_3200{"matrix_3200"}; + Input> matrix_7000{"matrix_7000"}; Input color_temp{"color_temp"}; Input gamma{"gamma"}; Input contrast{"contrast"}; Input sharpen_strength{"sharpen_strength"}; Input blackLevel{"blackLevel"}; Input whiteLevel{"whiteLevel"}; - - Output> processed{"processed", 3}; + Output> processed{"processed"}; void generate(); diff --git a/apps/camera_pipe/process.cpp b/apps/camera_pipe/process.cpp index 38dc564150d6..76a737de3022 100644 --- a/apps/camera_pipe/process.cpp +++ b/apps/camera_pipe/process.cpp @@ -29,9 +29,9 @@ int main(int argc, char **argv) { #endif fprintf(stderr, "input: %s\n", argv[1]); - Buffer input = load_and_convert_image(argv[1]); + Buffer input = load_and_convert_image(argv[1]); fprintf(stderr, " %d %d\n", input.width(), input.height()); - Buffer output(((input.width() - 32) / 32) * 32, ((input.height() - 24) / 32) * 32, 3); + Buffer output(((input.width() - 32) / 32) * 32, ((input.height() - 24) / 32) * 32, 3); #ifdef HL_MEMINFO info(input, "input"); @@ -48,7 +48,7 @@ int main(int argc, char **argv) { float _matrix_7000[][4] = {{2.2997f, -0.4478f, 0.1706f, -39.0923f}, {-0.3826f, 1.5906f, -0.2080f, -25.4311f}, {-0.0888f, -0.7344f, 2.2832f, -20.0826f}}; - Buffer matrix_3200(4, 3), matrix_7000(4, 3); + Buffer matrix_3200(4, 3), matrix_7000(4, 3); for (int i = 0; i < 3; i++) { for (int j = 0; j < 4; j++) { matrix_3200(j, i) = _matrix_3200[i][j]; diff --git a/apps/conv_layer/conv_layer_generator.cpp b/apps/conv_layer/conv_layer_generator.cpp index f8f93f380652..5b6ff1ee5e10 100644 --- a/apps/conv_layer/conv_layer_generator.cpp +++ b/apps/conv_layer/conv_layer_generator.cpp @@ -6,11 +6,10 @@ using namespace Halide; class ConvolutionLayer : public Halide::Generator { public: - Input> input{"input", 4}; - Input> filter{"filter", 4}; - Input> bias{"bias", 1}; - - Output> relu{"relu", 4}; + Input> input{"input"}; + Input> filter{"filter"}; + Input> bias{"bias"}; + Output> relu{"relu"}; void generate() { const int N = 5, CI = 128, CO = 128, W = 100, H = 80; diff --git a/apps/conv_layer/process.cpp b/apps/conv_layer/process.cpp index 2a33e7a274fc..1a0eecc4d38a 100644 --- a/apps/conv_layer/process.cpp +++ b/apps/conv_layer/process.cpp @@ -13,9 +13,9 @@ using namespace Halide::Runtime; int main(int argc, char **argv) { const int N = 5, CI = 128, CO = 128, W = 100, H = 80; - Buffer input(CI, W + 2, H + 2, N); - Buffer filter(CO, 3, 3, CI); - Buffer bias(CO); + Buffer input(CI, W + 2, H + 2, N); + Buffer filter(CO, 3, 3, CI); + Buffer bias(CO); for (int c = 0; c < input.dim(3).extent(); c++) { for (int z = 0; z < input.channels(); z++) { @@ -41,7 +41,7 @@ int main(int argc, char **argv) { bias(x) = rand(); } - Buffer output(CO, W, H, N); + Buffer output(CO, W, H, N); // This is necessary to get the PTX compiler to do a good // job. TODO: This should be a scheduling directive or a runtime diff --git a/apps/cuda_mat_mul/mat_mul_generator.cpp b/apps/cuda_mat_mul/mat_mul_generator.cpp index d0f35bb61199..1214ffbbbe8f 100644 --- a/apps/cuda_mat_mul/mat_mul_generator.cpp +++ b/apps/cuda_mat_mul/mat_mul_generator.cpp @@ -15,10 +15,10 @@ void set_alignment_and_bounds(OutputImageParam p, int size) { class MatMul : public Halide::Generator { public: GeneratorParam size{"size", 1024}; - Input> A{"A", 2}; - Input> B{"B", 2}; + Input> A{"A"}; + Input> B{"B"}; - Output> out{"out", 2}; + Output> out{"out"}; void generate() { // 688 us on an RTX 2060 diff --git a/apps/cuda_mat_mul/runner.cpp b/apps/cuda_mat_mul/runner.cpp index 99d2a7a85bb1..898496632802 100644 --- a/apps/cuda_mat_mul/runner.cpp +++ b/apps/cuda_mat_mul/runner.cpp @@ -30,7 +30,7 @@ int main(int argc, char **argv) { // Check correctness using small-integer matrices if (1) { - Buffer A(size, size), B(size, size), C(size, size); + Buffer A(size, size), B(size, size), C(size, size); A.for_each_value([](float &v) { v = (rand() & 3) - 1; }); B.for_each_value([](float &v) { v = (rand() & 3) - 1; }); A.set_host_dirty(); @@ -54,7 +54,7 @@ int main(int argc, char **argv) { // Benchmark it { - Buffer A(size, size), B(size, size), C(size, size); + Buffer A(size, size), B(size, size), C(size, size); double t = Halide::Tools::benchmark(5, 5, [&]() { mat_mul(A, B, C); C.device_sync(); diff --git a/apps/depthwise_separable_conv/depthwise_separable_conv_generator.cpp b/apps/depthwise_separable_conv/depthwise_separable_conv_generator.cpp index a7c56be4eef3..d560a8bea376 100644 --- a/apps/depthwise_separable_conv/depthwise_separable_conv_generator.cpp +++ b/apps/depthwise_separable_conv/depthwise_separable_conv_generator.cpp @@ -8,19 +8,19 @@ using namespace Halide::BoundaryConditions; class DepthwiseSeparableConvolution : public Generator { public: // [in_channels, width, height, batch_size] - Input> input{"input", 4}; + Input> input{"input"}; // [channel_multiplier, in_channels, filter_width, filter_height] - Input> depthwise_filter{"depthwise_filter", 4}; + Input> depthwise_filter{"depthwise_filter"}; // [out_channels, channel_multiplier * in_channels] - Input> pointwise_filter{"pointwise_filter", 2}; + Input> pointwise_filter{"pointwise_filter"}; // [out_channels] - Input> bias{"bias", 1}; + Input> bias{"bias"}; // [out_channels, width, height, batch_size] - Output> output{"output", 4}; + Output> output{"output"}; void generate() { // The algorithm. It will be a generic depthwise convolution, diff --git a/apps/depthwise_separable_conv/process.cpp b/apps/depthwise_separable_conv/process.cpp index 78c696bc8cdb..39860cf9fbef 100644 --- a/apps/depthwise_separable_conv/process.cpp +++ b/apps/depthwise_separable_conv/process.cpp @@ -13,10 +13,10 @@ int main(int argc, char **argv) { // Second layer of MobileNet v2 const int N = 4, CI = 32, CO = 16, CM = 1, W = 112, H = 112; - Buffer input(CI, W, H, N); - Buffer depthwise_filter(CM, CI, 3, 3); - Buffer pointwise_filter(CO, CI * CM); - Buffer bias(CO); + Buffer input(CI, W, H, N); + Buffer depthwise_filter(CM, CI, 3, 3); + Buffer pointwise_filter(CO, CI * CM); + Buffer bias(CO); for (int c = 0; c < input.dim(3).extent(); c++) { for (int z = 0; z < input.channels(); z++) { @@ -48,7 +48,7 @@ int main(int argc, char **argv) { bias(x) = rand(); } - Buffer output(CO, W, H, N); + Buffer output(CO, W, H, N); output.fill(0.0f); // Manually-tuned version diff --git a/apps/fft/fft_generator.cpp b/apps/fft/fft_generator.cpp index be2e3ca11209..e24481f90d2f 100644 --- a/apps/fft/fft_generator.cpp +++ b/apps/fft/fft_generator.cpp @@ -81,8 +81,8 @@ class FFTGenerator : public Halide::Generator { // Dim0: extent = size0, stride = 2 // Dim1: extent = size1, stride = size0 * 2 // Dim2: extent = 2, stride = 1 (real followed by imaginary components) - Input> input{"input", 3}; - Output> output{"output", 3}; + Input> input{"input"}; + Output> output{"output"}; void generate() { _halide_user_assert(size0 > 0) << "FFT must be at least 1D\n"; diff --git a/apps/fft/main.cpp b/apps/fft/main.cpp index e788c45a84a0..49244240524f 100644 --- a/apps/fft/main.cpp +++ b/apps/fft/main.cpp @@ -21,14 +21,14 @@ using namespace Halide::Tools; Var x("x"), y("y"); template -Func make_real(const Buffer &re) { +Func make_real(const Buffer &re) { Func ret; ret(x, y) = re(x, y); return ret; } template -ComplexFunc make_complex(const Buffer &re) { +ComplexFunc make_complex(const Buffer &re) { ComplexFunc ret; ret(x, y) = re(x, y); return ret; @@ -47,7 +47,7 @@ int main(int argc, char **argv) { } // Generate a random image to convolve with. - Buffer in(W, H); + Buffer in(W, H); for (int y = 0; y < H; y++) { for (int x = 0; x < W; x++) { in(x, y) = (float)rand() / (float)RAND_MAX; @@ -56,7 +56,7 @@ int main(int argc, char **argv) { // Construct a box filter kernel centered on the origin. const int box = 3; - Buffer kernel(W, H); + Buffer kernel(W, H); for (int y = 0; y < H; y++) { for (int x = 0; x < W; x++) { int u = x < (W - x) ? x : (W - x); @@ -107,8 +107,8 @@ int main(int argc, char **argv) { filtered_r2c = fft2d_c2r(dft_filtered, W, H, target, inv_desc); } - Buffer result_c2c = filtered_c2c.realize({W, H}, target); - Buffer result_r2c = filtered_r2c.realize({W, H}, target); + Buffer result_c2c = filtered_c2c.realize({W, H}, target); + Buffer result_r2c = filtered_r2c.realize({W, H}, target); for (int y = 0; y < H; y++) { for (int x = 0; x < W; x++) { @@ -140,8 +140,8 @@ int main(int argc, char **argv) { Var rep("rep"); - Buffer re_in = lambda(x, y, 0.0f).realize({W, H}); - Buffer im_in = lambda(x, y, 0.0f).realize({W, H}); + Buffer re_in = lambda(x, y, 0.0f).realize({W, H}); + Buffer im_in = lambda(x, y, 0.0f).realize({W, H}); printf("%12s %5s%11s%5s %5s%11s%5s\n", "", "", "Halide", "", "", "FFTW", ""); printf("%12s %10s %10s %10s %10s %10s\n", "DFT type", "Time (us)", "MFLOP/s", "Time (us)", "MFLOP/s", "Ratio"); diff --git a/apps/hannk/halide/common_halide_test.cpp b/apps/hannk/halide/common_halide_test.cpp index 7ed274b41da7..bad17fc89f2e 100644 --- a/apps/hannk/halide/common_halide_test.cpp +++ b/apps/hannk/halide/common_halide_test.cpp @@ -33,7 +33,7 @@ bool test_approx_log2() { auto results = test.realize({extent}); const int log2_precisions_size = sizeof(log2_precisions) / sizeof(log2_precisions[0]); for (int z = 0; z < log2_precisions_size; z++) { - Buffer result = results[z]; + Buffer result = results[z]; const int log2_precision = log2_precisions[z]; const double precision = 1 << log2_precision; for (int x = 0; x < result.width(); x++) { @@ -76,7 +76,7 @@ bool test_approx_exp2() { const int log2_precision_results_size = sizeof(log2_precision_results) / sizeof(log2_precision_results[0]); for (int z = 0; z < log2_precision_results_size; z++) { - Buffer result = results[z]; + Buffer result = results[z]; const int log2_precision_result = log2_precision_results[z]; const double precision_result = 1 << log2_precision_result; for (int y = 0; y < result.height(); y++) { @@ -122,7 +122,7 @@ bool test_approx_log2p1_exp2() { const int log2_precision_results_size = sizeof(log2_precision_results) / sizeof(log2_precision_results[0]); for (int z = 0; z < log2_precision_results_size; z++) { - Buffer result = results[z]; + Buffer result = results[z]; const int log2_precision_result = log2_precision_results[z]; const double precision_result = 1 << log2_precision_result; for (int y = 0; y < result.height(); y++) { @@ -163,7 +163,7 @@ bool test_approx_log2m1_exp2() { const int log2_precision_results_size = sizeof(log2_precision_results) / sizeof(log2_precision_results[0]); for (int z = 0; z < log2_precision_results_size; z++) { - Buffer result = results[z]; + Buffer result = results[z]; const int log2_precision_result = log2_precision_results[z]; const double precision_result = 1 << log2_precision_result; for (int y = 0; y < result.height(); y++) { @@ -204,7 +204,7 @@ bool test_approx_logistic() { const int log2_precision_results_size = sizeof(log2_precision_results) / sizeof(log2_precision_results[0]); for (int z = 0; z < log2_precision_results_size; z++) { - Buffer result = results[z]; + Buffer result = results[z]; const int log2_precision_result = log2_precision_results[z]; const double precision_result = 1 << log2_precision_result; const double absolute_tolerance = precision_result / 128; @@ -250,7 +250,7 @@ bool test_approx_tanh() { const int log2_precision_results_size = sizeof(log2_precision_results) / sizeof(log2_precision_results[0]); for (int z = 0; z < log2_precision_results_size; z++) { - Buffer result = results[z]; + Buffer result = results[z]; const int log2_precision_result = log2_precision_results[z]; const double precision_result = 1 << log2_precision_result; const double absolute_tolerance = std::max(3.0, precision_result / 512); diff --git a/apps/hannk/halide/conv_generator.cpp b/apps/hannk/halide/conv_generator.cpp index 6c408452f40b..a0dde6d64d25 100644 --- a/apps/hannk/halide/conv_generator.cpp +++ b/apps/hannk/halide/conv_generator.cpp @@ -43,17 +43,17 @@ class Conv : public Generator { GeneratorParam unroll_reduction_{"unroll_reduction", 4}; // Unsigned 8-bit input tensor, indexed by c, x, y, b. - Input> input_{"input", 4}; + Input> input_{"input"}; Input input_zero_{"input_zero"}; // A 6D array of filter coefficients indexed by ci % n, co % k, ci / n, co / k, x, y, // where n = vector_reduction and k = accum_vector_size (below). - Input> filter_{"filter", 6}; + Input> filter_{"filter"}; Input filter_zero_{"filter_zero"}; // A 1D array of 32-bit biases. The bias should be added to the c // dimension of the output. - Input> bias_{"bias", 1}; + Input> bias_{"bias"}; // The stride specifies how the input [x, y] is sub-subsampled. For every // spatial location [x, y] in the output buffer, the input buffer is sampled @@ -70,7 +70,7 @@ class Conv : public Generator { Input output_min_{"output_min"}; Input output_max_{"output_max"}; - Output> output_{"output", 4}; + Output> output_{"output"}; void configure() { if (use_8bit_multiply(target)) { @@ -321,13 +321,13 @@ class Conv : public Generator { // The above generator expects the filter to already be tiled into class TileConvFilter : public Generator { public: - Input> input_{"input", 4}; + Input> input_{"input"}; Input input_zero_{"input_zero"}; Input output_zero_{"output_zero"}; // 6D array of filter coefficients indexed by ci % n, co % k, ci / n, co / k, x, y, // where n = vector_reduction and k = accum_vector_size (below). - Output> output_{"output", 6}; + Output> output_{"output"}; void configure() { if (use_8bit_multiply(target)) { diff --git a/apps/hannk/halide/copy_generator.cpp b/apps/hannk/halide/copy_generator.cpp index 45aadbdf3db7..dd48a5ea094d 100644 --- a/apps/hannk/halide/copy_generator.cpp +++ b/apps/hannk/halide/copy_generator.cpp @@ -9,11 +9,9 @@ namespace hannk { // TODO: It might be better to implement this in C++ and not Halide. It's a trivial pipeline. class Copy : public Generator { public: - Input> input_{"input", 4}; - + Input> input_{"input"}; Input pad_value_{"pad_value"}; - - Output> output_{"output", 4}; + Output> output_{"output"}; void generate() { Var c("c"), x("x"), y("y"), b("b"); diff --git a/apps/hannk/halide/depthwise_conv_generator.cpp b/apps/hannk/halide/depthwise_conv_generator.cpp index 3ede2a8a053a..9000c89873d8 100644 --- a/apps/hannk/halide/depthwise_conv_generator.cpp +++ b/apps/hannk/halide/depthwise_conv_generator.cpp @@ -20,15 +20,15 @@ class DepthwiseConv : public Generator { GeneratorParam shallow_{"shallow", false}; // Unsigned 8-bit input tensor, indexed by ci, x, y, b. - Input> input_{"input", 4}; + Input> input_{"input"}; Input input_zero_{"input_zero"}; // A 3D array of 8-bit filter coefficients indexed by co, x, y. - Input> filter_{"filter", 3}; + Input> filter_{"filter"}; Input filter_zero_{"filter_zero"}; // A 1D array of 32-bit biases indexed by co. - Input> bias_{"bias", 1}; + Input> bias_{"bias"}; // The stride specifies how the input [x, y] are sub-subsampled. For every // spatial location [x, y] in the output buffer, the input buffer is sampled @@ -51,7 +51,7 @@ class DepthwiseConv : public Generator { Input output_min_{"output_min"}; Input output_max_{"output_max"}; - Output> output_{"output", 4}; + Output> output_{"output"}; void generate() { // The algorithm. @@ -244,13 +244,13 @@ class DepthwiseConv : public Generator { class UpsampleChannels : public Generator { public: // Unsigned 8-bit input tensor, indexed by ci, x, y, b. - Input> input_{"input", 4}; + Input> input_{"input"}; // The depth multiplier specifies the ratio between co and ci. Input factor_{"factor"}; // Unsigned 8-bit output tensor, indexed by co, x, y, b. - Output> output_{"output", 4}; + Output> output_{"output"}; void generate() { Var x("x"), y("y"), c("c"), b("b"); diff --git a/apps/hannk/halide/elementwise_generator.cpp b/apps/hannk/halide/elementwise_generator.cpp index 376c1a866762..c393c8ed063a 100644 --- a/apps/hannk/halide/elementwise_generator.cpp +++ b/apps/hannk/halide/elementwise_generator.cpp @@ -11,11 +11,11 @@ namespace hannk { class Add : public Generator { public: // Input buffers and quantization parameters. - Input> input1_{"input1", 2}; + Input> input1_{"input1"}; Input input1_zero_{"input1_zero"}; Input input1_multiplier_{"input1_multiplier"}; - Input> input2_{"input2", 2}; + Input> input2_{"input2"}; Input input2_zero_{"input2_zero"}; Input input2_multiplier_{"input2_multiplier"}; @@ -23,7 +23,7 @@ class Add : public Generator { Input output_min_{"output_min"}; Input output_max_{"output_max"}; - Output> output_{"output", 2}; + Output> output_{"output"}; void generate() { Var x("x"), y("y"); @@ -61,10 +61,10 @@ class Add : public Generator { class Mul : public Generator { public: - Input> input1_{"input1", 2}; + Input> input1_{"input1"}; Input input1_zero_{"input1_zero"}; - Input> input2_{"input2", 2}; + Input> input2_{"input2"}; Input input2_zero_{"input2_zero"}; Input output_zero_{"output_zero"}; @@ -73,7 +73,7 @@ class Mul : public Generator { Input output_min_{"output_min"}; Input output_max_{"output_max"}; - Output> output_{"output", 2}; + Output> output_{"output"}; void generate() { Var x("x"), y("y"); @@ -122,13 +122,13 @@ class Elementwise : public Generator { GeneratorParam output3_type_{"output3_type", Int(0)}; // An array of inputs. - Input[]> inputs_ { "inputs", 2 }; + Input[]> inputs_ { "inputs" }; // The program to run. See elementwise_program.h for a description of // this buffer. - Input> program_{"program", 2}; + Input> program_{"program"}; // Type is determined by the GeneratorParams specified. - Output> output_{"output", 2}; + Output> output_{"output"}; void generate() { Var x("x"), y("y"), u("u"); diff --git a/apps/hannk/halide/fill_generator.cpp b/apps/hannk/halide/fill_generator.cpp index 53ef76149228..9f45b2dc17a9 100644 --- a/apps/hannk/halide/fill_generator.cpp +++ b/apps/hannk/halide/fill_generator.cpp @@ -10,8 +10,7 @@ class Fill : public Generator { public: // Value to fill the output with. Input value_{"value"}; - - Output> output_{"output", 4}; + Output> output_{"output"}; void generate() { Var c("c"), x("x"), y("y"), b("b"); diff --git a/apps/hannk/halide/normalizations_generator.cpp b/apps/hannk/halide/normalizations_generator.cpp index fdd90dfd5762..29fd4727a581 100644 --- a/apps/hannk/halide/normalizations_generator.cpp +++ b/apps/hannk/halide/normalizations_generator.cpp @@ -9,10 +9,10 @@ namespace hannk { class L2Normalization : public Generator { public: - Input> input_{"input", 2}; + Input> input_{"input"}; Input input_zero_{"input_zero"}; - Output> output_{"output", 2}; + Output> output_{"output"}; void generate() { Var x("x"), y("y"); @@ -61,7 +61,7 @@ class L2Normalization : public Generator { class Softmax : public Generator { public: - Input> input_{"input", 2}; + Input> input_{"input"}; // The beta multiplier and shift should have an extra factor of log2(e). Input beta_multiplier_{"beta_multiplier"}; Input beta_shift_{"beta_shift"}; @@ -69,7 +69,7 @@ class Softmax : public Generator { Input output_zero_{"output_zero"}; Input output_multiplier_{"output_multiplier"}; Input output_shift_{"output_shift"}; - Output> output_{"output", 2}; + Output> output_{"output"}; void generate() { // The algorithm. diff --git a/apps/hannk/halide/pool_generator.cpp b/apps/hannk/halide/pool_generator.cpp index f58acc9d843c..1733569f3ea1 100644 --- a/apps/hannk/halide/pool_generator.cpp +++ b/apps/hannk/halide/pool_generator.cpp @@ -10,7 +10,7 @@ namespace hannk { class AveragePool : public Generator { public: // Unsigned 8-bit input tensor, indexed by c, x, y, b. - Input> input_{"input", 4}; + Input> input_{"input"}; // The stride specifies how the input [x, y] are sub-subsampled. For every // spatial location [x, y] in the output buffer, the input buffer is sampled @@ -23,7 +23,7 @@ class AveragePool : public Generator { Input output_min_{"output_min"}; Input output_max_{"output_max"}; - Output> output_{"output", 4}; + Output> output_{"output"}; void generate() { // The algorithm. @@ -96,7 +96,7 @@ class AveragePool : public Generator { class MaxPool : public Generator { public: // Unsigned 8-bit input tensor, indexed by c, x, y, b. - Input> input_{"input", 4}; + Input> input_{"input"}; // The stride specifies how the input [x, y] are sub-subsampled. For every // spatial location [x, y] in the output buffer, the input buffer is sampled @@ -109,7 +109,7 @@ class MaxPool : public Generator { Input output_min_{"output_min"}; Input output_max_{"output_max"}; - Output> output_{"output", 4}; + Output> output_{"output"}; void generate() { // The algorithm. diff --git a/apps/hannk/halide/reductions_generator.cpp b/apps/hannk/halide/reductions_generator.cpp index d16d9166d8d1..5efab9d10f68 100644 --- a/apps/hannk/halide/reductions_generator.cpp +++ b/apps/hannk/halide/reductions_generator.cpp @@ -8,7 +8,7 @@ namespace hannk { class Mean : public Generator { public: - Input> input_{"input", 4}; + Input> input_{"input"}; // The bounds of the region to reduce. This pipeline is // implemented as a stencil over this reach at each output. @@ -23,7 +23,7 @@ class Mean : public Generator { Input b_min_{"b_min"}; Input b_extent_{"b_extent"}; - Output> output_{"output", 4}; + Output> output_{"output"}; void generate() { // The algorithm. diff --git a/apps/hannk/interpreter/elementwise_program.cpp b/apps/hannk/interpreter/elementwise_program.cpp index 86c0764fddf7..6588799914e7 100644 --- a/apps/hannk/interpreter/elementwise_program.cpp +++ b/apps/hannk/interpreter/elementwise_program.cpp @@ -86,7 +86,7 @@ ElementwiseAssembler::ElementwiseAssembler(int16_t *buffer, int buffer_size) : instructions(buffer, InstructionSize, buffer_size / InstructionSize) { } -Halide::Runtime::Buffer ElementwiseAssembler::assemble(std::initializer_list outputs) { +Halide::Runtime::Buffer ElementwiseAssembler::assemble(std::initializer_list outputs) { // Check if the outputs are in the right place already. bool in_order = true; int needed_index = size - (int)outputs.size() + 1; diff --git a/apps/hannk/interpreter/elementwise_program.h b/apps/hannk/interpreter/elementwise_program.h index 68806fb2916c..7c1f20b3cb96 100644 --- a/apps/hannk/interpreter/elementwise_program.h +++ b/apps/hannk/interpreter/elementwise_program.h @@ -52,7 +52,7 @@ class ElementwiseAssembler { }; private: - Halide::Runtime::Buffer instructions; + Halide::Runtime::Buffer instructions; int size = 0; Slot add_instruction(OpCode op, Slot op1, Slot op2, int16_t op3, int16_t op4 = 0); @@ -67,7 +67,7 @@ class ElementwiseAssembler { // Assemble the current program. The return value is the buffer // fromt his assembler cropped to the region needed for the program. - Halide::Runtime::Buffer assemble(std::initializer_list outputs); + Halide::Runtime::Buffer assemble(std::initializer_list outputs); // Write the current program to the given stream. void disassemble(std::ostream &output); diff --git a/apps/hannk/interpreter/ops.cpp b/apps/hannk/interpreter/ops.cpp index 3f6d03c5dc65..b931e473fbd5 100644 --- a/apps/hannk/interpreter/ops.cpp +++ b/apps/hannk/interpreter/ops.cpp @@ -836,10 +836,10 @@ void call_conv2d(halide_buffer_t *input, halide_buffer_t *filter, halide_buffer_ bool ConvOp::prepare() { // Pass minimal sized buffers to learn about the alignment requirements. // TODO: need to adapt this to the types of in, filt, out once we support multiple variants - HalideBuffer input_buf(nullptr, 1, 1, 1, 1); - HalideBuffer bias_buf(nullptr, 1); - HalideBuffer filter_buf(filter_type(), nullptr, 1, 1, 1, 1, 1, 1); - HalideBuffer output_buf(nullptr, 1, 1, 1, 1); + HalideBuffer input_buf(nullptr, 1, 1, 1, 1); + HalideBuffer bias_buf(nullptr, 1); + HalideBuffer filter_buf(filter_type(), nullptr, 1, 1, 1, 1, 1, 1); + HalideBuffer output_buf(nullptr, 1, 1, 1, 1); if (conv_u8_u8_u8(input_buf, 0, filter_buf, 0, bias_buf, 1, 1, 1, 1, 0, 0, 0, 0, 0, output_buf) != 0) { return false; } @@ -975,10 +975,10 @@ BoundsMap DepthwiseConv2DOp::map_bounds(int input_idx, int output_idx) const { bool DepthwiseConv2DOp::prepare() { // Pass minimal sized buffers to learn about the alignment requirements. // TODO: need to adapt this to the types of in, filt, out once we support multiple variants - HalideBuffer input_buf(nullptr, 1, 1, 1, 1); - HalideBuffer bias_buf(nullptr, 1); - HalideBuffer filter_buf(nullptr, 1, 1, 1); - HalideBuffer output_buf(nullptr, 1, 1, 1, 1); + HalideBuffer input_buf(nullptr, 1, 1, 1, 1); + HalideBuffer bias_buf(nullptr, 1); + HalideBuffer filter_buf(nullptr, 1, 1, 1); + HalideBuffer output_buf(nullptr, 1, 1, 1, 1); if (depthwise_conv_uint8(input_buf, 0, filter_buf, 0, bias_buf, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, output_buf) != 0) { return false; } diff --git a/apps/hannk/interpreter/ops.h b/apps/hannk/interpreter/ops.h index e20d2c5ab71c..7115d428c929 100644 --- a/apps/hannk/interpreter/ops.h +++ b/apps/hannk/interpreter/ops.h @@ -212,13 +212,13 @@ class DepthwiseConv2DOp : public Op { class ElementwiseProgramOp : public ElementwiseOp { private: - Halide::Runtime::Buffer program_; + Halide::Runtime::Buffer program_; public: - ElementwiseProgramOp(std::vector inputs, const TensorPtr &output, HalideBuffer program) + ElementwiseProgramOp(std::vector inputs, const TensorPtr &output, HalideBuffer program) : ElementwiseOp(std::move(inputs), {output}), program_(program) { } - ElementwiseProgramOp(std::vector inputs, std::vector outputs, HalideBuffer program) + ElementwiseProgramOp(std::vector inputs, std::vector outputs, HalideBuffer program) : ElementwiseOp(std::move(inputs), std::move(outputs)), program_(program) { } diff --git a/apps/hannk/interpreter/transforms.cpp b/apps/hannk/interpreter/transforms.cpp index 709577704129..160c1c84fe45 100644 --- a/apps/hannk/interpreter/transforms.cpp +++ b/apps/hannk/interpreter/transforms.cpp @@ -335,7 +335,7 @@ class PadForOps : public OpMutator { TensorPtr padded = std::make_shared(input->name() + ".padded", input->type(), required, input->quantization()); - HalideBuffer padding_data(2, input->rank()); + HalideBuffer padding_data(2, input->rank()); // Center the crop, except for the channel dimension. // TODO: Is this always correct? const int r = input->rank(); diff --git a/apps/hannk/tflite/tflite_parser.cpp b/apps/hannk/tflite/tflite_parser.cpp index 19edcbe58a31..725d5ab96499 100644 --- a/apps/hannk/tflite/tflite_parser.cpp +++ b/apps/hannk/tflite/tflite_parser.cpp @@ -274,7 +274,7 @@ class Parser { shape_tensor = tensors_[op->inputs()->Get(1)]; } else if (options) { size_t size = options->new_shape()->size(); - HalideBuffer shape_data(size); + HalideBuffer shape_data(size); for (size_t i = 0; i < size; i++) { shape_data(i) = options->new_shape()->Get(i); } diff --git a/apps/hannk/util/buffer_util.h b/apps/hannk/util/buffer_util.h index da9f780efcf7..eae2fdc01797 100644 --- a/apps/hannk/util/buffer_util.h +++ b/apps/hannk/util/buffer_util.h @@ -13,8 +13,8 @@ namespace hannk { // Using a Buffer with space for max_rank dimensions is a meaningful // win for some corner cases (when adding dimensions to > 4). -template -using HalideBuffer = Halide::Runtime::Buffer; +template +using HalideBuffer = Halide::Runtime::Buffer; // dynamic_type_dispatch is a utility for functors that want to be able // to dynamically dispatch a halide_type_t to type-specialized code. diff --git a/apps/harris/filter.cpp b/apps/harris/filter.cpp index 43bb2ff3a38a..6668e2b08b5b 100644 --- a/apps/harris/filter.cpp +++ b/apps/harris/filter.cpp @@ -19,10 +19,10 @@ int main(int argc, char **argv) { return 1; } - Halide::Runtime::Buffer input = load_and_convert_image(argv[1]); + Halide::Runtime::Buffer input = load_and_convert_image(argv[1]); // The harris app doesn't use a boundary condition - Halide::Runtime::Buffer output(input.width() - 6, input.height() - 6); + Halide::Runtime::Buffer output(input.width() - 6, input.height() - 6); output.set_min(3, 3); double best_manual = benchmark([&]() { diff --git a/apps/harris/harris_generator.cpp b/apps/harris/harris_generator.cpp index 02f3afb51d39..feb16d1d7170 100644 --- a/apps/harris/harris_generator.cpp +++ b/apps/harris/harris_generator.cpp @@ -12,8 +12,8 @@ Expr sum3x3(Func f, Var x, Var y) { class Harris : public Halide::Generator { public: - Input> input{"input", 3}; - Output> output{"output", 2}; + Input> input{"input"}; + Output> output{"output"}; void generate() { Var x("x"), y("y"), c("c"); diff --git a/apps/hexagon_benchmarks/conv3x3_generator.cpp b/apps/hexagon_benchmarks/conv3x3_generator.cpp index 12b759c05fbc..2c32a10d8a1e 100644 --- a/apps/hexagon_benchmarks/conv3x3_generator.cpp +++ b/apps/hexagon_benchmarks/conv3x3_generator.cpp @@ -6,10 +6,10 @@ class Conv3x3 : public Generator { public: GeneratorParam accumulator_type{"accumulator_type", Int(16)}; // Takes an 8 bit image; one channel. - Input> input{"input", 2}; - Input> mask{"mask", 2}; + Input> input{"input"}; + Input> mask{"mask"}; // Outputs an 8 bit image; one channel. - Output> output{"output", 2}; + Output> output{"output"}; GeneratorParam use_parallel_sched{"use_parallel_sched", true}; GeneratorParam use_prefetch_sched{"use_prefetch_sched", true}; diff --git a/apps/hexagon_benchmarks/dilate3x3_generator.cpp b/apps/hexagon_benchmarks/dilate3x3_generator.cpp index d380658f7e98..2d7e13075d3d 100644 --- a/apps/hexagon_benchmarks/dilate3x3_generator.cpp +++ b/apps/hexagon_benchmarks/dilate3x3_generator.cpp @@ -5,9 +5,9 @@ using namespace Halide; class Dilate3x3 : public Generator { public: // Takes an 8 bit image; one channel. - Input> input{"input", 2}; + Input> input{"input"}; // Outputs an 8 bit image; one channel. - Output> output{"output", 2}; + Output> output{"output"}; GeneratorParam use_parallel_sched{"use_parallel_sched", true}; GeneratorParam use_prefetch_sched{"use_prefetch_sched", true}; diff --git a/apps/hexagon_benchmarks/gaussian5x5_generator.cpp b/apps/hexagon_benchmarks/gaussian5x5_generator.cpp index 252a27a528d7..84bb071cc5d3 100644 --- a/apps/hexagon_benchmarks/gaussian5x5_generator.cpp +++ b/apps/hexagon_benchmarks/gaussian5x5_generator.cpp @@ -4,8 +4,8 @@ using namespace Halide; class Gaussian5x5 : public Generator { public: - Input> input{"input", 2}; - Output> output{"output", 2}; + Input> input{"input"}; + Output> output{"output"}; GeneratorParam use_parallel_sched{"use_parallel_sched", true}; GeneratorParam use_prefetch_sched{"use_prefetch_sched", true}; diff --git a/apps/hexagon_benchmarks/median3x3_generator.cpp b/apps/hexagon_benchmarks/median3x3_generator.cpp index b7cbabb1166d..47dcc4cce0e3 100644 --- a/apps/hexagon_benchmarks/median3x3_generator.cpp +++ b/apps/hexagon_benchmarks/median3x3_generator.cpp @@ -10,9 +10,9 @@ class Median3x3 : public Generator { public: // Takes an 8 bit image; one channel. - Input> input{"input", 2}; + Input> input{"input"}; // Outputs an 8 bit image; one channel. - Output> output{"output", 2}; + Output> output{"output"}; GeneratorParam use_parallel_sched{"use_parallel_sched", true}; GeneratorParam use_prefetch_sched{"use_prefetch_sched", true}; diff --git a/apps/hexagon_benchmarks/process.h b/apps/hexagon_benchmarks/process.h index 839ed6514305..f72db31f13f3 100644 --- a/apps/hexagon_benchmarks/process.h +++ b/apps/hexagon_benchmarks/process.h @@ -46,8 +46,8 @@ struct PipelineDescriptorBase { }; class Conv3x3a16Descriptor : public PipelineDescriptorBase { - Halide::Runtime::Buffer u8_in, u8_out; - Halide::Runtime::Buffer i8_mask; + Halide::Runtime::Buffer u8_in, u8_out; + Halide::Runtime::Buffer i8_mask; public: Conv3x3a16Descriptor(int W, int H) @@ -131,7 +131,7 @@ class Conv3x3a16Descriptor : public PipelineDescriptorBase { }; class Dilate3x3Descriptor : public PipelineDescriptorBase { - Halide::Runtime::Buffer u8_in, u8_out; + Halide::Runtime::Buffer u8_in, u8_out; private: static uint8_t max3(uint8_t a, uint8_t b, uint8_t c) { @@ -207,7 +207,7 @@ class Dilate3x3Descriptor : public PipelineDescriptorBase { }; class Median3x3Descriptor : public PipelineDescriptorBase { - Halide::Runtime::Buffer u8_in, u8_out; + Halide::Runtime::Buffer u8_in, u8_out; public: Median3x3Descriptor(int W, int H) @@ -276,7 +276,7 @@ class Median3x3Descriptor : public PipelineDescriptorBase { }; class Gaussian5x5Descriptor : public PipelineDescriptorBase { - Halide::Runtime::Buffer u8_in, u8_out; + Halide::Runtime::Buffer u8_in, u8_out; public: Gaussian5x5Descriptor(int W, int H) @@ -347,7 +347,7 @@ class Gaussian5x5Descriptor : public PipelineDescriptorBase { }; class SobelDescriptor : public PipelineDescriptorBase { - Halide::Runtime::Buffer u8_in, u8_out; + Halide::Runtime::Buffer u8_in, u8_out; public: SobelDescriptor(int W, int H) @@ -423,8 +423,8 @@ class SobelDescriptor : public PipelineDescriptorBase { }; class Conv3x3a32Descriptor : public PipelineDescriptorBase { - Halide::Runtime::Buffer u8_in, u8_out; - Halide::Runtime::Buffer i8_mask; + Halide::Runtime::Buffer u8_in, u8_out; + Halide::Runtime::Buffer i8_mask; public: Conv3x3a32Descriptor(int W, int H) diff --git a/apps/hexagon_benchmarks/sobel_generator.cpp b/apps/hexagon_benchmarks/sobel_generator.cpp index 460d2e506478..14ab54ec295f 100644 --- a/apps/hexagon_benchmarks/sobel_generator.cpp +++ b/apps/hexagon_benchmarks/sobel_generator.cpp @@ -4,8 +4,8 @@ using namespace Halide; class Sobel : public Generator { public: - Input> input{"input", 2}; - Output> output{"output", 2}; + Input> input{"input"}; + Output> output{"output"}; GeneratorParam use_parallel_sched{"use_parallel_sched", true}; GeneratorParam use_prefetch_sched{"use_prefetch_sched", true}; diff --git a/apps/hexagon_dma/pipeline_raw_linear_interleaved_basic.cpp b/apps/hexagon_dma/pipeline_raw_linear_interleaved_basic.cpp index f2bb63f0a3d5..ffc1b97d019c 100644 --- a/apps/hexagon_dma/pipeline_raw_linear_interleaved_basic.cpp +++ b/apps/hexagon_dma/pipeline_raw_linear_interleaved_basic.cpp @@ -6,8 +6,8 @@ using namespace Halide; // 2, and (optionally) writes it back via DMA. class DmaPipeline : public Generator { public: - Input> input{"input", 3}; - Output> output{"output", 3}; + Input> input{"input"}; + Output> output{"output"}; enum class Schedule { Basic, Fold, diff --git a/apps/hexagon_dma/process_raw_linear_interleaved_basic.cpp b/apps/hexagon_dma/process_raw_linear_interleaved_basic.cpp index 32e27ef85e7d..483066b83ad3 100644 --- a/apps/hexagon_dma/process_raw_linear_interleaved_basic.cpp +++ b/apps/hexagon_dma/process_raw_linear_interleaved_basic.cpp @@ -94,10 +94,10 @@ int main(int argc, char **argv) { } // Setup Halide input buffer with the test buffer - auto input = Halide::Runtime::Buffer::make_interleaved(width, height, 4); + auto input = Halide::Runtime::Buffer::make_interleaved(width, height, 4); // Setup Halide output buffer - auto output = Halide::Runtime::Buffer::make_interleaved(width, height, 4); + auto output = Halide::Runtime::Buffer::make_interleaved(width, height, 4); // DMA_step 1: Assign buffer to DMA interface input.device_wrap_native(halide_hexagon_dma_device_interface(), reinterpret_cast(data_in)); diff --git a/apps/hexagon_dma/process_yuv_linear_basic.cpp b/apps/hexagon_dma/process_yuv_linear_basic.cpp index 329d245ef6e6..bfaf1e0207a9 100644 --- a/apps/hexagon_dma/process_yuv_linear_basic.cpp +++ b/apps/hexagon_dma/process_yuv_linear_basic.cpp @@ -117,10 +117,10 @@ inline int process_pipeline(T const &type, const int width, const int height, } // Setup Halide input buffer with the test buffer - Halide::Runtime::Buffer input_validation(data_in, width, height, 2); - Halide::Runtime::Buffer input(nullptr, width, (3 * height) / 2); - Halide::Runtime::Buffer input_y = input.cropped(1, 0, height); // Luma plane only - Halide::Runtime::Buffer input_uv = input.cropped(1, height, height / 2); // Chroma plane only, with reduced height + Halide::Runtime::Buffer input_validation(data_in, width, height, 2); + Halide::Runtime::Buffer input(nullptr, width, (3 * height) / 2); + Halide::Runtime::Buffer input_y = input.cropped(1, 0, height); // Luma plane only + Halide::Runtime::Buffer input_uv = input.cropped(1, height, height / 2); // Chroma plane only, with reduced height // describe the UV interleaving for 4:2:0 format input_uv.embed(2, 0); @@ -130,9 +130,9 @@ inline int process_pipeline(T const &type, const int width, const int height, input_uv.raw_buffer()->dim[0].extent = width / 2; // Setup Halide output buffer - Halide::Runtime::Buffer output(width, (3 * height) / 2); - Halide::Runtime::Buffer output_y = output.cropped(1, 0, height); // Luma plane only - Halide::Runtime::Buffer output_uv = output.cropped(1, height, (height / 2)); // Chroma plane only, with reduced height + Halide::Runtime::Buffer output(width, (3 * height) / 2); + Halide::Runtime::Buffer output_y = output.cropped(1, 0, height); // Luma plane only + Halide::Runtime::Buffer output_uv = output.cropped(1, height, (height / 2)); // Chroma plane only, with reduced height // describe the UV interleaving for 4:2:0 format output_uv.embed(2, 0); diff --git a/apps/hist/filter.cpp b/apps/hist/filter.cpp index 49cb0c2bb613..125f562233be 100644 --- a/apps/hist/filter.cpp +++ b/apps/hist/filter.cpp @@ -19,8 +19,8 @@ int main(int argc, char **argv) { return 1; } - Halide::Runtime::Buffer input = load_and_convert_image(argv[1]); - Halide::Runtime::Buffer output(input.width(), input.height(), 3); + Halide::Runtime::Buffer input = load_and_convert_image(argv[1]); + Halide::Runtime::Buffer output(input.width(), input.height(), 3); double best_manual = benchmark([&]() { hist(input, output); diff --git a/apps/hist/hist_generator.cpp b/apps/hist/hist_generator.cpp index ba8a32a35f5b..e3d5de7f5737 100644 --- a/apps/hist/hist_generator.cpp +++ b/apps/hist/hist_generator.cpp @@ -6,8 +6,8 @@ using namespace Halide::ConciseCasts; class Hist : public Halide::Generator { public: - Input> input{"input", 3}; - Output> output{"output", 3}; + Input> input{"input"}; + Output> output{"output"}; void generate() { Var x("x"), y("y"), c("c"); diff --git a/apps/iir_blur/filter.cpp b/apps/iir_blur/filter.cpp index 57ac4236e99c..fe0abd45ff79 100644 --- a/apps/iir_blur/filter.cpp +++ b/apps/iir_blur/filter.cpp @@ -19,8 +19,8 @@ int main(int argc, char **argv) { return 1; } - Halide::Runtime::Buffer input = load_and_convert_image(argv[1]); - Halide::Runtime::Buffer output(input.width(), input.height(), input.channels()); + Halide::Runtime::Buffer input = load_and_convert_image(argv[1]); + Halide::Runtime::Buffer output(input.width(), input.height(), input.channels()); double best_manual = benchmark([&]() { iir_blur(input, 0.5f, output); diff --git a/apps/iir_blur/iir_blur_generator.cpp b/apps/iir_blur/iir_blur_generator.cpp index e9467579ac98..59ef065e79e6 100644 --- a/apps/iir_blur/iir_blur_generator.cpp +++ b/apps/iir_blur/iir_blur_generator.cpp @@ -133,12 +133,12 @@ class IirBlur : public Generator { public: // This is the input image: a 3D (color) image with 32 bit float // pixels. - Input> input{"input", 3}; + Input> input{"input"}; // The filter coefficient, alpha is the weight of the input to the // filter. Input alpha{"alpha"}; - Output> output{"output", 3}; + Output> output{"output"}; void generate() { Expr width = input.width(); diff --git a/apps/interpolate/filter.cpp b/apps/interpolate/filter.cpp index 5481e89bdd80..0a093d2462a5 100644 --- a/apps/interpolate/filter.cpp +++ b/apps/interpolate/filter.cpp @@ -19,8 +19,8 @@ int main(int argc, char **argv) { return 1; } - Halide::Runtime::Buffer input = load_and_convert_image(argv[1]); - Halide::Runtime::Buffer output(input.width(), input.height(), 3); + Halide::Runtime::Buffer input = load_and_convert_image(argv[1]); + Halide::Runtime::Buffer output(input.width(), input.height(), 3); double best_manual = benchmark([&]() { interpolate(input, output); diff --git a/apps/interpolate/interpolate_generator.cpp b/apps/interpolate/interpolate_generator.cpp index 3a47a8f9d6bc..58d6d65374eb 100644 --- a/apps/interpolate/interpolate_generator.cpp +++ b/apps/interpolate/interpolate_generator.cpp @@ -14,8 +14,8 @@ class Interpolate : public Halide::Generator { public: GeneratorParam levels{"levels", 10}; - Input> input{"input", 3}; - Output> output{"output", 3}; + Input> input{"input"}; + Output> output{"output"}; void generate() { Var x("x"), y("y"), c("c"); diff --git a/apps/lens_blur/lens_blur_generator.cpp b/apps/lens_blur/lens_blur_generator.cpp index 62df9c365509..52fad46cb82b 100644 --- a/apps/lens_blur/lens_blur_generator.cpp +++ b/apps/lens_blur/lens_blur_generator.cpp @@ -8,8 +8,8 @@ using namespace Halide; class LensBlur : public Halide::Generator { public: - Input> left_im{"left_im", 3}; - Input> right_im{"right_im", 3}; + Input> left_im{"left_im"}; + Input> right_im{"right_im"}; // The number of displacements to consider Input slices{"slices", 32, 1, 64}; // The depth to focus on @@ -19,7 +19,7 @@ class LensBlur : public Halide::Generator { // The number of samples of the aperture to use Input aperture_samples{"aperture_samples", 32, 1, 64}; - Output> final{"final", 3}; + Output> final{"final"}; void generate() { /* THE ALGORITHM */ diff --git a/apps/lens_blur/process.cpp b/apps/lens_blur/process.cpp index 8002751705e6..e82df286ba06 100644 --- a/apps/lens_blur/process.cpp +++ b/apps/lens_blur/process.cpp @@ -23,13 +23,13 @@ int main(int argc, char **argv) { // them. cuMemAlloc/cuMemFree is slower than the algorithm! halide_reuse_device_allocations(nullptr, true); - Buffer left_im = load_image(argv[1]); - Buffer right_im = load_image(argv[1]); + Buffer left_im = load_image(argv[1]); + Buffer right_im = load_image(argv[1]); uint32_t slices = atoi(argv[2]); uint32_t focus_depth = atoi(argv[3]); float blur_radius_scale = atof(argv[4]); uint32_t aperture_samples = atoi(argv[5]); - Buffer output(left_im.width(), left_im.height(), 3); + Buffer output(left_im.width(), left_im.height(), 3); int timing_iterations = atoi(argv[6]); lens_blur(left_im, right_im, slices, focus_depth, blur_radius_scale, diff --git a/apps/linear_algebra/benchmarks/halide_benchmarks.cpp b/apps/linear_algebra/benchmarks/halide_benchmarks.cpp index 4b2e629f2f1b..e150fa840253 100644 --- a/apps/linear_algebra/benchmarks/halide_benchmarks.cpp +++ b/apps/linear_algebra/benchmarks/halide_benchmarks.cpp @@ -22,8 +22,8 @@ template struct BenchmarksBase { typedef T Scalar; - typedef Halide::Runtime::Buffer Vector; - typedef Halide::Runtime::Buffer Matrix; + typedef Halide::Runtime::Buffer Vector; + typedef Halide::Runtime::Buffer Matrix; std::random_device rand_dev; std::default_random_engine rand_eng{rand_dev()}; @@ -102,10 +102,10 @@ struct BenchmarksBase { struct BenchmarksFloat : public BenchmarksBase { BenchmarksFloat(std::string n) : BenchmarksBase(n), - result(Halide::Runtime::Buffer::make_scalar()) { + result(Halide::Runtime::Buffer::make_scalar()) { } - Halide::Runtime::Buffer result; + Halide::Runtime::Buffer result; L1Benchmark(copy, "s", halide_scopy(x.raw_buffer(), y.raw_buffer())); L1Benchmark(scal, "s", halide_sscal(alpha, x.raw_buffer())); @@ -131,10 +131,10 @@ struct BenchmarksFloat : public BenchmarksBase { struct BenchmarksDouble : public BenchmarksBase { BenchmarksDouble(std::string n) : BenchmarksBase(n), - result(Halide::Runtime::Buffer::make_scalar()) { + result(Halide::Runtime::Buffer::make_scalar()) { } - Halide::Runtime::Buffer result; + Halide::Runtime::Buffer result; L1Benchmark(copy, "d", halide_dcopy(x.raw_buffer(), y.raw_buffer())); L1Benchmark(scal, "d", halide_dscal(alpha, x.raw_buffer())); diff --git a/apps/linear_algebra/src/blas_l1_generators.cpp b/apps/linear_algebra/src/blas_l1_generators.cpp index 713ac8cdf6e1..3b6efd5fc410 100644 --- a/apps/linear_algebra/src/blas_l1_generators.cpp +++ b/apps/linear_algebra/src/blas_l1_generators.cpp @@ -25,10 +25,10 @@ class AXPYGenerator : public Generator> { // Standard ordering of parameters in AXPY functions. Input a_{"a", 1}; - Input> x_{"x", 1}; - Input> y_{"y", 1}; + Input> x_{"x"}; + Input> y_{"y"}; - Output> result_{"result", 1}; + Output> result_{"result"}; template Expr calc(Arg i) { @@ -86,10 +86,10 @@ class DotGenerator : public Generator> { GeneratorParam parallel_{"parallel", true}; GeneratorParam block_size_{"block_size", 1024}; - Input> x_{"x", 1}; - Input> y_{"y", 1}; + Input> x_{"x"}; + Input> y_{"y"}; - Output> result_{"result", 0}; + Output> result_{"result"}; void generate() { assert(get_target().has_feature(Target::NoBoundsQuery)); @@ -140,9 +140,9 @@ class AbsSumGenerator : public Generator> { GeneratorParam parallel_{"parallel", true}; GeneratorParam block_size_{"block_size", 1024}; - Input> x_{"x", 1}; + Input> x_{"x"}; - Output> result_{"result", 0}; + Output> result_{"result"}; void generate() { assert(get_target().has_feature(Target::NoBoundsQuery)); diff --git a/apps/linear_algebra/src/blas_l2_generators.cpp b/apps/linear_algebra/src/blas_l2_generators.cpp index d6daea5bea62..511af2bbbf66 100644 --- a/apps/linear_algebra/src/blas_l2_generators.cpp +++ b/apps/linear_algebra/src/blas_l2_generators.cpp @@ -25,12 +25,12 @@ class GEMVGenerator : public Generator> { // Standard ordering of parameters in GEMV functions. Input a_{"a", 1}; - Input> A_{"A", 2}; - Input> x_{"x", 1}; + Input> A_{"A"}; + Input> x_{"x"}; Input b_{"b", 1}; - Input> y_{"y", 1}; + Input> y_{"y"}; - Output> output_{"output", 1}; + Output> output_{"output"}; void generate() { assert(get_target().has_feature(Target::NoBoundsQuery)); @@ -215,10 +215,10 @@ class GERGenerator : public Generator> { // Standard ordering of parameters in GEMV functions. Input a_{"a", 1}; - Input> x_{"x", 1}; - Input> y_{"y", 1}; + Input> x_{"x"}; + Input> y_{"y"}; - Output> result_{"result", 2}; + Output> result_{"result"}; void generate() { const int vec_size = vectorize_ ? natural_vector_size(type_of()) : 1; diff --git a/apps/linear_algebra/src/blas_l3_generators.cpp b/apps/linear_algebra/src/blas_l3_generators.cpp index 951dbcbc47d1..bcf082ef1e9f 100644 --- a/apps/linear_algebra/src/blas_l3_generators.cpp +++ b/apps/linear_algebra/src/blas_l3_generators.cpp @@ -23,12 +23,12 @@ class GEMMGenerator : public Generator> { // Standard ordering of parameters in GEMM functions. Input a_{"a_", 1}; - Input> A_{"A_", 2}; - Input> B_{"B_", 2}; + Input> A_{"A_"}; + Input> B_{"B_"}; Input b_{"b_", 1}; - Input> C_{"C_", 2}; + Input> C_{"C_"}; - Output> result_{"result", 2}; + Output> result_{"result"}; void generate() { // Matrices are interpreted as column-major by default. The @@ -41,8 +41,8 @@ class GEMMGenerator : public Generator> { const int vec = std::max(4, natural_vector_size(a_.type())); const int s = vec * 2; - Input> *A_in = &A_; - Input> *B_in = &B_; + Input> *A_in = &A_; + Input> *B_in = &B_; // If they're both transposed, then reverse the order and transpose the result instead. const bool transpose_AB = (bool)transpose_A_ && (bool)transpose_B_; diff --git a/apps/linear_algebra/src/halide_blas.cpp b/apps/linear_algebra/src/halide_blas.cpp index 8625b92c0272..605bc57d6d1e 100644 --- a/apps/linear_algebra/src/halide_blas.cpp +++ b/apps/linear_algebra/src/halide_blas.cpp @@ -5,28 +5,30 @@ using Halide::Runtime::Buffer; -#define assert_no_error(func) \ - if (func != 0) { \ - std::cerr << "ERROR! Halide kernel returned non-zero value.\n"; \ - } +#define assert_no_error(func) \ + do { \ + if (func != 0) { \ + std::cerr << "ERROR! Halide kernel returned non-zero value.\n"; \ + } \ + } while (0) namespace { template -Buffer init_scalar_buffer(T *x) { - return Buffer::make_scalar(x); +Buffer init_scalar_buffer(T *x) { + return Buffer::make_scalar(x); } template -Buffer init_vector_buffer(const int N, T *x, const int incx) { +Buffer init_vector_buffer(const int N, T *x, const int incx) { halide_dimension_t shape = {0, N, incx}; - return Buffer(x, 1, &shape); + return Buffer(x, 1, &shape); } template -Buffer init_matrix_buffer(const int M, const int N, T *A, const int lda) { +Buffer init_matrix_buffer(const int M, const int N, T *A, const int lda) { halide_dimension_t shape[] = {{0, M, 1}, {0, N, lda}}; - return Buffer(A, 2, shape); + return Buffer(A, 2, shape); } } // namespace diff --git a/apps/linear_blur/linear_blur_generator.cpp b/apps/linear_blur/linear_blur_generator.cpp index 326827199f67..9b18e4b4bd3d 100644 --- a/apps/linear_blur/linear_blur_generator.cpp +++ b/apps/linear_blur/linear_blur_generator.cpp @@ -6,8 +6,8 @@ namespace { struct LinearBlur : public Halide::Generator { - Input> input{"input", 3}; - Output> output{"output", 3}; + Input> input{"input"}; + Output> output{"output"}; void generate() { Var x("x"), y("y"), c("c"); diff --git a/apps/linear_blur/run_linear_blur.cpp b/apps/linear_blur/run_linear_blur.cpp index 42715b63bd1e..8f44ffd542bf 100644 --- a/apps/linear_blur/run_linear_blur.cpp +++ b/apps/linear_blur/run_linear_blur.cpp @@ -19,8 +19,8 @@ int main(int argc, char **argv) { int use_linear = atoi(argv[1]); - Buffer input = load_and_convert_image(argv[2]); - Buffer output = Buffer::make_with_shape_of(input); + Buffer input = load_and_convert_image(argv[2]); + Buffer output = Buffer::make_with_shape_of(input); // Call either the simple or linear-corrected blur at runtime, // mainly to demonstrate how simple_blur can be used either standalone diff --git a/apps/local_laplacian/local_laplacian_generator.cpp b/apps/local_laplacian/local_laplacian_generator.cpp index 7aded18b1e61..b1c697a2a3b7 100644 --- a/apps/local_laplacian/local_laplacian_generator.cpp +++ b/apps/local_laplacian/local_laplacian_generator.cpp @@ -9,12 +9,11 @@ class LocalLaplacian : public Halide::Generator { public: GeneratorParam pyramid_levels{"pyramid_levels", 8, 1, maxJ}; - Input> input{"input", 3}; + Input> input{"input"}; Input levels{"levels"}; Input alpha{"alpha"}; Input beta{"beta"}; - - Output> output{"output", 3}; + Output> output{"output"}; void generate() { /* THE ALGORITHM */ diff --git a/apps/local_laplacian/process.cpp b/apps/local_laplacian/process.cpp index bddfa415adc3..389ddbfb1b1d 100644 --- a/apps/local_laplacian/process.cpp +++ b/apps/local_laplacian/process.cpp @@ -21,11 +21,11 @@ int main(int argc, char **argv) { } // Input may be a PNG8 - Buffer input = load_and_convert_image(argv[1]); + Buffer input = load_and_convert_image(argv[1]); int levels = atoi(argv[2]); float alpha = atof(argv[3]), beta = atof(argv[4]); - Buffer output(input.width(), input.height(), 3); + Buffer output(input.width(), input.height(), 3); int timing = atoi(argv[5]); local_laplacian(input, levels, alpha / (levels - 1), beta, output); diff --git a/apps/max_filter/filter.cpp b/apps/max_filter/filter.cpp index e673e73c0f41..1d0a2e90cfca 100644 --- a/apps/max_filter/filter.cpp +++ b/apps/max_filter/filter.cpp @@ -19,8 +19,8 @@ int main(int argc, char **argv) { return 1; } - Halide::Runtime::Buffer input = load_and_convert_image(argv[1]); - Halide::Runtime::Buffer output(input.width(), input.height(), 3); + Halide::Runtime::Buffer input = load_and_convert_image(argv[1]); + Halide::Runtime::Buffer output(input.width(), input.height(), 3); double best_manual = benchmark([&]() { max_filter(input, output); diff --git a/apps/max_filter/max_filter_generator.cpp b/apps/max_filter/max_filter_generator.cpp index 9f60bccf82f2..02856a5e4604 100644 --- a/apps/max_filter/max_filter_generator.cpp +++ b/apps/max_filter/max_filter_generator.cpp @@ -8,8 +8,8 @@ using namespace Halide::BoundaryConditions; class Max : public Halide::Generator { public: GeneratorParam radius_{"radius", 26}; - Input> input_{"input", 3}; - Output> output_{"output", 3}; + Input> input_{"input"}; + Output> output_{"output"}; void generate() { Var x("x"), y("y"), c("c"), t("t"); diff --git a/apps/nl_means/nl_means_generator.cpp b/apps/nl_means/nl_means_generator.cpp index 31e00572f371..ec51844119ed 100644 --- a/apps/nl_means/nl_means_generator.cpp +++ b/apps/nl_means/nl_means_generator.cpp @@ -6,12 +6,12 @@ using namespace Halide; class NonLocalMeans : public Halide::Generator { public: - Input> input{"input", 3}; + Input> input{"input"}; Input patch_size{"patch_size"}; Input search_area{"search_area"}; Input sigma{"sigma"}; - Output> non_local_means{"non_local_means", 3}; + Output> non_local_means{"non_local_means"}; void generate() { /* THE ALGORITHM */ diff --git a/apps/nl_means/process.cpp b/apps/nl_means/process.cpp index 34cc7d923fbd..4cf015b6db02 100644 --- a/apps/nl_means/process.cpp +++ b/apps/nl_means/process.cpp @@ -18,11 +18,11 @@ int main(int argc, char **argv) { return 0; } - Buffer input = load_and_convert_image(argv[1]); + Buffer input = load_and_convert_image(argv[1]); int patch_size = atoi(argv[2]); int search_area = atoi(argv[3]); float sigma = atof(argv[4]); - Buffer output(input.width(), input.height(), 3); + Buffer output(input.width(), input.height(), 3); int timing_iterations = atoi(argv[5]); nl_means(input, patch_size, search_area, sigma, output); diff --git a/apps/onnx/onnx_converter_generator_test.cc b/apps/onnx/onnx_converter_generator_test.cc index 5b87f842b0ce..c0f1077de0c1 100644 --- a/apps/onnx/onnx_converter_generator_test.cc +++ b/apps/onnx/onnx_converter_generator_test.cc @@ -6,9 +6,9 @@ int main(int argc, char **argv) { std::cout << "Running onnx_converter_generator_test...\n"; - Halide::Runtime::Buffer A(3, 4); - Halide::Runtime::Buffer B(3, 4); - Halide::Runtime::Buffer C(3, 4); + Halide::Runtime::Buffer A(3, 4); + Halide::Runtime::Buffer B(3, 4); + Halide::Runtime::Buffer C(3, 4); std::mt19937 rnd(123); A.for_each_value([&](float &v) { diff --git a/apps/onnx/onnx_converter_test.cc b/apps/onnx/onnx_converter_test.cc index 6742658cff90..f361117fd016 100644 --- a/apps/onnx/onnx_converter_test.cc +++ b/apps/onnx/onnx_converter_test.cc @@ -21,7 +21,7 @@ static void test_abs() { std::vector node_inputs; node_inputs.resize(1); node_inputs[0].shape = {200}; - Halide::Buffer input(200); + Halide::Buffer input(200); std::uniform_real_distribution dis(-1.0, 1.0); std::mt19937 rnd; input.for_each_value([&](float &f) { f = dis(rnd); }); @@ -31,7 +31,7 @@ static void test_abs() { Node converted = convert_node(abs_node, node_inputs); GOOGLE_CHECK_EQ(1, converted.outputs.size()); - Halide::Buffer output = converted.outputs[0].rep.realize({200}); + Halide::Buffer output = converted.outputs[0].rep.realize({200}); for (int i = 0; i < 200; ++i) { EXPECT_EQ(output(i), std::abs(input(i))); } @@ -47,7 +47,7 @@ static void test_activation_function() { std::vector node_inputs; node_inputs.resize(1); node_inputs[0].shape = {200}; - Halide::Buffer input(200); + Halide::Buffer input(200); std::mt19937 rnd; std::uniform_real_distribution dis(-1.0, 1.0); input.for_each_value([&](float &f) { f = dis(rnd); }); @@ -57,7 +57,7 @@ static void test_activation_function() { Node converted = convert_node(relu_node, node_inputs); GOOGLE_CHECK_EQ(1, converted.outputs.size()); - Halide::Buffer output = converted.outputs[0].rep.realize({200}); + Halide::Buffer output = converted.outputs[0].rep.realize({200}); for (int i = 0; i < 200; ++i) { EXPECT_EQ(output(i), std::max(input(i), 0.0f)); } @@ -76,7 +76,7 @@ static void test_cast() { attr->set_i(onnx::TensorProto_DataType_FLOAT); node_inputs.resize(1); node_inputs[0].shape = {200}; - Halide::Buffer input(200); + Halide::Buffer input(200); std::mt19937 rnd; std::uniform_int_distribution dis(-100, 100); input.for_each_value([&](int &f) { f = dis(rnd); }); @@ -86,7 +86,7 @@ static void test_cast() { Node converted = convert_node(cast_node, node_inputs); GOOGLE_CHECK_EQ(1, converted.outputs.size()); - Halide::Buffer output = converted.outputs[0].rep.realize({200}); + Halide::Buffer output = converted.outputs[0].rep.realize({200}); for (int i = 0; i < 200; ++i) { EXPECT_EQ(output(i), static_cast(input(i))); } @@ -104,12 +104,12 @@ static void test_add() { node_inputs.resize(2); node_inputs[0].shape = {200}; node_inputs[1].shape = node_inputs[0].shape; - Halide::Buffer in1(200); + Halide::Buffer in1(200); std::mt19937 rnd; std::uniform_real_distribution dis(-1.0, 1.0); std::uniform_real_distribution dis10(-10.0, 10.0); in1.for_each_value([&](float &f) { f = dis(rnd); }); - Halide::Buffer in2(200); + Halide::Buffer in2(200); in2.for_each_value([&](float &f) { f = dis10(rnd); }); Halide::Var index; node_inputs[0].rep(index) = in1(index); @@ -118,7 +118,7 @@ static void test_add() { Node converted = convert_node(add_node, node_inputs); GOOGLE_CHECK_EQ(1, converted.outputs.size()); - Halide::Buffer output = converted.outputs[0].rep.realize({200}); + Halide::Buffer output = converted.outputs[0].rep.realize({200}); for (int i = 0; i < 200; ++i) { EXPECT_NEAR(output(i), in1(i) + in2(i), 1e-6); } @@ -145,7 +145,7 @@ static void test_constant() { Node converted = convert_node(add_node, {}); GOOGLE_CHECK_EQ(1, converted.outputs.size()); - Halide::Buffer output = converted.outputs[0].rep.realize({3, 7}); + Halide::Buffer output = converted.outputs[0].rep.realize({3, 7}); for (int i = 0; i < 3; ++i) { for (int j = 0; j < 7; ++j) { EXPECT_EQ(output(i, j), value.float_data(j + 7 * i)); @@ -172,11 +172,11 @@ static void test_gemm() { std::uniform_real_distribution dis10(-10.0, 10.0); std::mt19937 rnd; - Halide::Buffer in1(32, 100); + Halide::Buffer in1(32, 100); in1.for_each_value([&](float &f) { f = dis(rnd); }); - Halide::Buffer in2(100, 64); + Halide::Buffer in2(100, 64); in2.for_each_value([&](float &f) { f = dis10(rnd); }); - Halide::Buffer in3(32, 64); + Halide::Buffer in3(32, 64); in3.for_each_value([&](float &f) { f = dis(rnd); }); Halide::Var i1, j1; node_inputs[0].rep(i1, j1) = in1(i1, j1); @@ -187,7 +187,7 @@ static void test_gemm() { Node converted = convert_node(add_node, node_inputs); GOOGLE_CHECK_EQ(1, converted.outputs.size()); - Halide::Buffer output = converted.outputs[0].rep.realize({32, 64}); + Halide::Buffer output = converted.outputs[0].rep.realize({32, 64}); for (int i = 0; i < 32; ++i) { for (int j = 0; j < 64; ++j) { @@ -217,7 +217,7 @@ static void test_conv() { std::uniform_real_distribution dis10(-10.0, 10.0); std::mt19937 rnd; - Halide::Buffer weights(7, 5, 3, 3); + Halide::Buffer weights(7, 5, 3, 3); weights.for_each_value([&](float &f) { f = dis10(rnd); }); Halide::Var i2, j2, k2, l2; node_inputs[1].rep(i2, j2, k2, l2) = weights(i2, j2, k2, l2); @@ -231,7 +231,7 @@ static void test_conv() { node_inputs[0].shape[dim] = in_shape[trial][dim]; } - Halide::Buffer in(in_shape[trial]); + Halide::Buffer in(in_shape[trial]); in.for_each_value([&](float &f) { f = dis(rnd); }); Halide::Var i1, j1, k1, l1; node_inputs[0].rep = Halide::Func(); @@ -240,7 +240,7 @@ static void test_conv() { Node converted = convert_node(add_node, node_inputs); GOOGLE_CHECK_EQ(1, converted.outputs.size()); - Halide::Buffer output = + Halide::Buffer output = converted.outputs[0].rep.realize(out_shape[trial]); for (int i = 0; i < 3; ++i) { @@ -278,7 +278,7 @@ static void test_sum() { std::vector node_inputs; node_inputs.resize(1); node_inputs[0].shape = {7, 3, 5, 11}; - Halide::Buffer in1(7, 3, 5, 11); + Halide::Buffer in1(7, 3, 5, 11); std::uniform_real_distribution dis(-1.0, 1.0); std::mt19937 rnd; in1.for_each_value([&](float &f) { f = dis(rnd); }); @@ -288,7 +288,7 @@ static void test_sum() { Node converted = convert_node(sum_node, node_inputs); GOOGLE_CHECK_EQ(1, converted.outputs.size()); - Halide::Buffer output = converted.outputs[0].rep.realize({1, 3, 1, 11}); + Halide::Buffer output = converted.outputs[0].rep.realize({1, 3, 1, 11}); for (int i = 0; i < 3; ++i) { for (int j = 0; j < 11; ++j) { float expected = 0.0f; @@ -316,11 +316,11 @@ static void test_where_broadcast() { node_inputs[0].shape = {2, 2, 2}; node_inputs[1].shape = {2}; node_inputs[2].shape = {2, 2}; - Halide::Buffer in_c(2, 2, 2); + Halide::Buffer in_c(2, 2, 2); in_c.for_each_element( [&](int x, int y, int z) { in_c(x, y, z) = (x == y && x == z); }); - Halide::Buffer in_x(2); - Halide::Buffer in_y(2, 2); + Halide::Buffer in_x(2); + Halide::Buffer in_y(2, 2); std::uniform_real_distribution dis(-1.0, 1.0); std::mt19937 rnd; in_x.for_each_value([&](float &f) { f = dis(rnd); }); @@ -332,7 +332,7 @@ static void test_where_broadcast() { Node converted = convert_node(where_node, node_inputs); GOOGLE_CHECK_EQ(1, converted.outputs.size()); - Halide::Buffer output = converted.outputs[0].rep.realize({2, 2, 2}); + Halide::Buffer output = converted.outputs[0].rep.realize({2, 2, 2}); for (int i = 0; i < 2; ++i) { for (int j = 0; j < 2; ++j) { @@ -361,7 +361,7 @@ static void test_concat() { std::vector node_inputs; node_inputs.resize(2); node_inputs[0].shape = {7, 3}; - Halide::Buffer in1(7, 3); + Halide::Buffer in1(7, 3); std::uniform_real_distribution dis(-1.0, 1.0); std::mt19937 rnd; in1.for_each_value([&](float &f) { f = dis(rnd); }); @@ -369,14 +369,14 @@ static void test_concat() { node_inputs[0].rep(i, j) = in1(i, j); node_inputs[1].shape = {5, 3}; - Halide::Buffer in2(5, 3); + Halide::Buffer in2(5, 3); in2.for_each_value([&](float &f) { f = dis(rnd); }); node_inputs[1].rep(i, j) = in2(i, j); Node converted = convert_node(concat_node, node_inputs); GOOGLE_CHECK_EQ(1, converted.outputs.size()); - Halide::Buffer output = converted.outputs[0].rep.realize({7 + 5, 3}); + Halide::Buffer output = converted.outputs[0].rep.realize({7 + 5, 3}); for (int i = 0; i < 3; ++i) { for (int j = 0; j < 7; ++j) { EXPECT_EQ(in1(j, i), output(j, i)); @@ -406,7 +406,7 @@ static void test_constant_fill() { Node converted = convert_node(concat_node, {}); GOOGLE_CHECK_EQ(1, converted.outputs.size()); - Halide::Buffer output = converted.outputs[0].rep.realize({3, 4}); + Halide::Buffer output = converted.outputs[0].rep.realize({3, 4}); for (int i = 0; i < 3; ++i) { for (int j = 0; j < 4; ++j) { EXPECT_EQ(2u, output(i, j)); @@ -469,7 +469,7 @@ static void test_model() { std::unordered_map dummy; Model converted = convert_model(model, dummy, IOLayout::Native); - Halide::Buffer input_values(3, 7); + Halide::Buffer input_values(3, 7); std::uniform_real_distribution dis(-1.0, 1.0); std::mt19937 rnd; input_values.for_each_value([&](float &f) { f = dis(rnd); }); @@ -477,7 +477,7 @@ static void test_model() { Halide::ImageParam &input = converted.inputs.at("model_input"); input.set(input_values); Tensor node = converted.outputs.at("model_output"); - Halide::Buffer output_values = node.rep.realize({3, 7}); + Halide::Buffer output_values = node.rep.realize({3, 7}); for (int i = 0; i < 3; ++i) { for (int j = 0; j < 7; ++j) { @@ -489,11 +489,11 @@ static void test_model() { } Tensor size = converted.outputs.at("output_size"); - Halide::Buffer output_size = size.rep.realize(); + Halide::Buffer output_size = size.rep.realize(); EXPECT_EQ(21, output_size()); Tensor shape = converted.outputs.at("output_shape"); - Halide::Buffer output_shape = shape.rep.realize({2}); + Halide::Buffer output_shape = shape.rep.realize({2}); EXPECT_EQ(3, output_shape(0)); EXPECT_EQ(7, output_shape(1)); } diff --git a/apps/openglcompute/jni/oglc_run.cpp b/apps/openglcompute/jni/oglc_run.cpp index e95e025c4261..3378ab555dd1 100644 --- a/apps/openglcompute/jni/oglc_run.cpp +++ b/apps/openglcompute/jni/oglc_run.cpp @@ -28,7 +28,7 @@ struct timing { int best_rep = 0; template - timing(filter_t filter, Buffer *input, Buffer *output) + timing(filter_t filter, Buffer *input, Buffer *output) : filter(filter), input(&input->template as()), output(&output->template as()) { } @@ -66,17 +66,17 @@ class Tester; template bool doBlur(Tester *tester, - Buffer bt_input, - Buffer bt_output, - Buffer bt_output_arm) { + Buffer bt_input, + Buffer bt_output, + Buffer bt_output_arm) { return false; // This abstract implementation should never be called } template bool doCopy(Tester *tester, - Buffer bt_input, - Buffer bt_output, - Buffer bt_output_arm) { + Buffer bt_input, + Buffer bt_output, + Buffer bt_output_arm) { return false; // This abstract implementation should never be called } @@ -90,7 +90,7 @@ class Tester { } private: - bool validate(Buffer actual, Buffer expected) { + bool validate(Buffer actual, Buffer expected) { int count_mismatches = 0; actual.for_each_element([&](int x, int y, int c) { T actual_value = actual(x, y, c); @@ -112,7 +112,7 @@ class Tester { return count_mismatches == 0; } - void print(Buffer buf) { + void print(Buffer buf) { for (int j = 0; j < std::min(buf.height(), 10); j++) { std::stringstream oss; for (int i = 0; i < std::min(buf.width(), 10); i++) { @@ -131,9 +131,9 @@ class Tester { } public: - bool test(Buffer input, - Buffer output, - Buffer output_arm, + bool test(Buffer input, + Buffer output, + Buffer output_arm, filter_t avg_filter, filter_t avg_filter_arm) { @@ -191,7 +191,7 @@ class Tester { int height = 2048; int channels = 4; - auto input = Buffer::make_interleaved(width, height, channels); + auto input = Buffer::make_interleaved(width, height, channels); LOGI("Allocated memory for %dx%dx%d image", width, height, channels); input.for_each_element([&](int i, int j, int k) { @@ -201,8 +201,8 @@ class Tester { LOGI("Input :\n"); print(input); - auto output = Buffer::make_interleaved(width, height, channels); - auto output_arm = Buffer::make_interleaved(width, height, channels); + auto output = Buffer::make_interleaved(width, height, channels); + auto output_arm = Buffer::make_interleaved(width, height, channels); doBlur(this, input, output, output_arm); } @@ -210,9 +210,9 @@ class Tester { template<> bool doBlur(Tester *tester, - Buffer bt_input, - Buffer bt_output, - Buffer bt_output_arm) { + Buffer bt_input, + Buffer bt_output, + Buffer bt_output_arm) { return tester->test(bt_input, bt_output, bt_output_arm, avg_filter_float, @@ -221,9 +221,9 @@ bool doBlur(Tester *tester, template<> bool doBlur(Tester *tester, - Buffer bt_input, - Buffer bt_output, - Buffer bt_output_arm) { + Buffer bt_input, + Buffer bt_output, + Buffer bt_output_arm) { return tester->test(bt_input, bt_output, bt_output_arm, avg_filter_uint32t, diff --git a/apps/openglcompute/jni/oglc_two_kernels_run.cpp b/apps/openglcompute/jni/oglc_two_kernels_run.cpp index 69333593b77a..6574de25ae39 100644 --- a/apps/openglcompute/jni/oglc_two_kernels_run.cpp +++ b/apps/openglcompute/jni/oglc_two_kernels_run.cpp @@ -12,7 +12,7 @@ #define LOGE(...) __android_log_print(ANDROID_LOG_ERROR, "oglc_run", __VA_ARGS__) template -void print(Halide::Runtime::Buffer buf) { +void print(Halide::Runtime::Buffer buf) { for (int j = 0; j < std::min(buf.height(), 10); j++) { std::stringstream oss; for (int i = 0; i < std::min(buf.width(), 10); i++) { @@ -37,7 +37,7 @@ int main(int argc, char **argv) { int height = 128; int channels = 4; - auto input = Halide::Runtime::Buffer::make_interleaved(width, height, channels); + auto input = Halide::Runtime::Buffer::make_interleaved(width, height, channels); LOGI("Allocated memory for %dx%dx%d image", width, height, channels); input.for_each_element([&](int i, int j, int k) { @@ -47,7 +47,7 @@ int main(int argc, char **argv) { LOGI("Input :\n"); print(input); - auto output = Halide::Runtime::Buffer::make_interleaved(width, height, channels); + auto output = Halide::Runtime::Buffer::make_interleaved(width, height, channels); two_kernels_filter(input, output); LOGI("Filter is done."); diff --git a/apps/resize/resize_generator.cpp b/apps/resize/resize_generator.cpp index 9494ed75f1bd..18d2a5c4fef2 100644 --- a/apps/resize/resize_generator.cpp +++ b/apps/resize/resize_generator.cpp @@ -63,9 +63,9 @@ class Resize : public Halide::Generator { // resample in x and in y). GeneratorParam upsample{"upsample", false}; - Input> input{"input", 3}; + Input> input{"input"}; Input scale_factor{"scale_factor"}; - Output> output{"output", 3}; + Output> output{"output"}; // Common Vars Var x, y, c, k; diff --git a/apps/resnet_50/Resnet50Generator.cpp b/apps/resnet_50/Resnet50Generator.cpp index dddc55aa0e20..7320a80b7e1d 100644 --- a/apps/resnet_50/Resnet50Generator.cpp +++ b/apps/resnet_50/Resnet50Generator.cpp @@ -28,42 +28,42 @@ int find_index(int value, std::vector vec) { class Resnet50Generator : public Halide::Generator { public: - Input> input{"input", 3}; + Input> input{"input"}; /** parameter values for scaling layers **/ - Input> conv1_gamma{"conv1_gamma", 1}; - Input[4]> br1_gamma { "br1_gamma", 1 }; - Input[16]> br2a_gamma { "br2a_gamma", 1 }; - Input[16]> br2b_gamma { "br2b_gamma", 1 }; - Input[16]> br2c_gamma { "br2c_gamma", 1 }; - - Input> conv1_beta{"conv1_beta", 1}; - Input[4]> br1_beta { "br1_beta", 1 }; - Input[16]> br2a_beta { "br2a_beta", 1 }; - Input[16]> br2b_beta { "br2b_beta", 1 }; - Input[16]> br2c_beta { "br2c_beta", 1 }; - - Input> conv1_mu{"conv1_mu", 1}; - Input[4]> br1_mu { "br1_mu", 1 }; - Input[16]> br2a_mu { "br2a_mu", 1 }; - Input[16]> br2b_mu { "br2b_mu", 1 }; - Input[16]> br2c_mu { "br2c_mu", 1 }; - - Input> conv1_sig{"conv1_sig", 1}; - Input[4]> br1_sig { "br1_sig", 1 }; - Input[16]> br2a_sig { "br2a_sig", 1 }; - Input[16]> br2b_sig { "br2b_sig", 1 }; - Input[16]> br2c_sig { "br2c_sig", 1 }; + Input> conv1_gamma{"conv1_gamma"}; + Input[4]> br1_gamma { "br1_gamma" }; + Input[16]> br2a_gamma { "br2a_gamma" }; + Input[16]> br2b_gamma { "br2b_gamma" }; + Input[16]> br2c_gamma { "br2c_gamma" }; + + Input> conv1_beta{"conv1_beta"}; + Input[4]> br1_beta { "br1_beta" }; + Input[16]> br2a_beta { "br2a_beta" }; + Input[16]> br2b_beta { "br2b_beta" }; + Input[16]> br2c_beta { "br2c_beta" }; + + Input> conv1_mu{"conv1_mu"}; + Input[4]> br1_mu { "br1_mu" }; + Input[16]> br2a_mu { "br2a_mu" }; + Input[16]> br2b_mu { "br2b_mu" }; + Input[16]> br2c_mu { "br2c_mu" }; + + Input> conv1_sig{"conv1_sig"}; + Input[4]> br1_sig { "br1_sig" }; + Input[16]> br2a_sig { "br2a_sig" }; + Input[16]> br2b_sig { "br2b_sig" }; + Input[16]> br2c_sig { "br2c_sig" }; /** weights and biases for convolutions **/ - Input> conv1_weights{"conv1_weights", 4}; - Input[4]> br1_conv_weights { "br1_conv_weights", 4 }; - Input[16]> br2a_conv_weights { "br2a_conv_weights", 4 }; - Input[16]> br2b_conv_weights { "br2b_conv_weights", 4 }; - Input[16]> br2c_conv_weights { "br2c_conv_weights", 4 }; + Input> conv1_weights{"conv1_weights"}; + Input[4]> br1_conv_weights { "br1_conv_weights" }; + Input[16]> br2a_conv_weights { "br2a_conv_weights" }; + Input[16]> br2b_conv_weights { "br2b_conv_weights" }; + Input[16]> br2c_conv_weights { "br2c_conv_weights" }; - Input> fc1000_weights{"fc1000_weights", 2}; - Input> fc1000_bias{"fc1000_bias", 1}; - Output> final_output{"final_output", 1}; + Input> fc1000_weights{"fc1000_weights"}; + Input> fc1000_bias{"fc1000_bias"}; + Output> final_output{"final_output"}; /** list out shapes of each layers weights **/ // weight shapes: out channels, kernel_w, kernel_h, pad, stride. In channels infered by input tensor shape @@ -378,6 +378,6 @@ class Resnet50Generator : public Halide::Generator { return output; } }; -} //namespace +} // namespace HALIDE_REGISTER_GENERATOR(Resnet50Generator, resnet50) diff --git a/apps/resnet_50/process.cpp b/apps/resnet_50/process.cpp index aaede81f0fa7..0cd829ce70e9 100644 --- a/apps/resnet_50/process.cpp +++ b/apps/resnet_50/process.cpp @@ -53,13 +53,15 @@ std::vector load_shape(const std::string &shapefile) { return dims; } -void write_buffer_to_file(const Buffer &buf, const std::string &filename) { +void write_buffer_to_file(const Buffer &buf, const std::string &filename) { std::ofstream o(filename, std::ios_base::trunc | std::ios_base::binary); o.write((const char *)(buf.data()), buf.size_in_bytes()); o.close(); assert(!o.fail()); } +// Deliberately unconstrained dims here; caller will +// convert with an implicit runtime check Buffer load_buffer_from_file(const std::string &filename, std::vector &shape) { Buffer buffer(shape); std::ifstream infile(filename, std::ios::binary); @@ -69,25 +71,25 @@ Buffer load_buffer_from_file(const std::string &filename, std::vector load_conv_params(std::string shapefile, std::string datafile) { +Buffer load_conv_params(std::string shapefile, std::string datafile) { std::vector shape = load_shape(shapefile); assert(shape.size() == 4); return load_buffer_from_file(datafile, shape); } -Buffer load_batch_norm_params(std::string shapefile, std::string datafile) { +Buffer load_batch_norm_params(std::string shapefile, std::string datafile) { std::vector shape = load_shape(shapefile); assert(shape.size()); return load_buffer_from_file(datafile, shape); } -Buffer load_fc_weight(std::string shapefile, std::string datafile) { +Buffer load_fc_weight(std::string shapefile, std::string datafile) { std::vector shape = load_shape(shapefile); assert(shape.size() == 2); return load_buffer_from_file(datafile, shape); } -Buffer load_fc_bias(std::string shapefile, std::string datafile) { +Buffer load_fc_bias(std::string shapefile, std::string datafile) { std::vector shape = load_shape(shapefile); assert(shape.size() == 1); return load_buffer_from_file(datafile, shape); @@ -103,39 +105,39 @@ int main(int argc, char **argv) { int seed = atoi(argv[3]); std::string output_file = argv[4]; - Buffer input(3, 224, 224); - Buffer output(1000); + Buffer input(3, 224, 224); + Buffer output(1000); - Buffer conv1_weights; - Buffer conv1_mu; - Buffer conv1_sig; - Buffer conv1_gamma; - Buffer conv1_beta; + Buffer conv1_weights; + Buffer conv1_mu; + Buffer conv1_sig; + Buffer conv1_gamma; + Buffer conv1_beta; - Buffer br2a_conv_weights[16]; - Buffer br2b_conv_weights[16]; - Buffer br2c_conv_weights[16]; - Buffer br1_conv_weights[4]; + Buffer br2a_conv_weights[16]; + Buffer br2b_conv_weights[16]; + Buffer br2c_conv_weights[16]; + Buffer br1_conv_weights[4]; - Buffer br2a_gamma[16]; - Buffer br2b_gamma[16]; - Buffer br2c_gamma[16]; - Buffer br1_gamma[4]; + Buffer br2a_gamma[16]; + Buffer br2b_gamma[16]; + Buffer br2c_gamma[16]; + Buffer br1_gamma[4]; - Buffer br2a_beta[16]; - Buffer br2b_beta[16]; - Buffer br2c_beta[16]; - Buffer br1_beta[4]; + Buffer br2a_beta[16]; + Buffer br2b_beta[16]; + Buffer br2c_beta[16]; + Buffer br1_beta[4]; - Buffer br2a_mu[16]; - Buffer br2b_mu[16]; - Buffer br2c_mu[16]; - Buffer br1_mu[4]; + Buffer br2a_mu[16]; + Buffer br2b_mu[16]; + Buffer br2c_mu[16]; + Buffer br1_mu[4]; - Buffer br2a_sig[16]; - Buffer br2b_sig[16]; - Buffer br2c_sig[16]; - Buffer br1_sig[4]; + Buffer br2a_sig[16]; + Buffer br2b_sig[16]; + Buffer br2c_sig[16]; + Buffer br1_sig[4]; /** load parameters for first section **/ std::string conv1_w_shapefile = weight_dir + "conv1_weight_shape.data"; @@ -236,7 +238,7 @@ int main(int argc, char **argv) { std::string bias_shapefile = weight_dir + "fc_bias_shape.data"; std::string bias_datafile = weight_dir + "fc_bias.data"; - Buffer fc1000_weights = load_fc_weight(weight_shapefile, weight_datafile); + Buffer fc1000_weights = load_fc_weight(weight_shapefile, weight_datafile); Buffer fc1000_bias = load_fc_bias(bias_shapefile, bias_datafile); std::mt19937 e2(seed); diff --git a/apps/stencil_chain/process.cpp b/apps/stencil_chain/process.cpp index a6c63ac2e670..65385e39804c 100644 --- a/apps/stencil_chain/process.cpp +++ b/apps/stencil_chain/process.cpp @@ -21,11 +21,11 @@ int main(int argc, char **argv) { } // Input may be a PNG8 - Buffer input = load_and_convert_image(argv[1]); + Buffer input_rgb = load_and_convert_image(argv[1]); // Just take the red channel - input.slice(2, 0); + Buffer input = input_rgb.sliced(2, 0); - Buffer output(input.width(), input.height()); + Buffer output(input.width(), input.height()); int timing = atoi(argv[2]); stencil_chain(input, output); diff --git a/apps/stencil_chain/stencil_chain_generator.cpp b/apps/stencil_chain/stencil_chain_generator.cpp index b067b8499c4f..ebe07d51bdba 100644 --- a/apps/stencil_chain/stencil_chain_generator.cpp +++ b/apps/stencil_chain/stencil_chain_generator.cpp @@ -6,8 +6,8 @@ class StencilChain : public Halide::Generator { public: GeneratorParam stencils{"stencils", 32, 1, 100}; - Input> input{"input", 2}; - Output> output{"output", 2}; + Input> input{"input"}; + Output> output{"output"}; void generate() { diff --git a/apps/unsharp/filter.cpp b/apps/unsharp/filter.cpp index feb714e411de..8a245c224568 100644 --- a/apps/unsharp/filter.cpp +++ b/apps/unsharp/filter.cpp @@ -19,8 +19,8 @@ int main(int argc, char **argv) { return 1; } - Halide::Runtime::Buffer input = load_and_convert_image(argv[1]); - Halide::Runtime::Buffer output(input.width(), input.height(), 3); + Halide::Runtime::Buffer input = load_and_convert_image(argv[1]); + Halide::Runtime::Buffer output(input.width(), input.height(), 3); double best_manual = benchmark([&]() { unsharp(input, output); diff --git a/apps/unsharp/unsharp_generator.cpp b/apps/unsharp/unsharp_generator.cpp index 3c065f5d738f..d68702bf1e20 100644 --- a/apps/unsharp/unsharp_generator.cpp +++ b/apps/unsharp/unsharp_generator.cpp @@ -6,8 +6,8 @@ class Unsharp : public Halide::Generator { public: GeneratorParam sigma{"sigma", 1.5f}; - Input> input{"input", 3}; - Output> output{"output", 3}; + Input> input{"input"}; + Output> output{"output"}; void generate() { Var x("x"), y("y"), c("c"); diff --git a/apps/wavelet/daubechies_x_generator.cpp b/apps/wavelet/daubechies_x_generator.cpp index 8d43d503f740..ff7861aa3907 100644 --- a/apps/wavelet/daubechies_x_generator.cpp +++ b/apps/wavelet/daubechies_x_generator.cpp @@ -8,8 +8,8 @@ Halide::Var x("x"), y("y"), c("c"); class daubechies_x : public Halide::Generator { public: - Input> in_{"in", 2}; - Output> out_{"out", 3}; + Input> in_{"in"}; + Output> out_{"out"}; void generate() { Func in = Halide::BoundaryConditions::repeat_edge(in_); diff --git a/apps/wavelet/haar_x_generator.cpp b/apps/wavelet/haar_x_generator.cpp index ee89caebb5da..641a5b4118c2 100644 --- a/apps/wavelet/haar_x_generator.cpp +++ b/apps/wavelet/haar_x_generator.cpp @@ -8,8 +8,8 @@ Halide::Var x("x"), y("y"), c("c"); class haar_x : public Halide::Generator { public: - Input> in_{"in", 2}; - Output> out_{"out", 3}; + Input> in_{"in"}; + Output> out_{"out"}; void generate() { Func in = Halide::BoundaryConditions::repeat_edge(in_); diff --git a/apps/wavelet/inverse_daubechies_x_generator.cpp b/apps/wavelet/inverse_daubechies_x_generator.cpp index 43fcfefd02fa..31eb46498641 100644 --- a/apps/wavelet/inverse_daubechies_x_generator.cpp +++ b/apps/wavelet/inverse_daubechies_x_generator.cpp @@ -8,8 +8,8 @@ Halide::Var x("x"), y("y"), c("c"); class inverse_daubechies_x : public Halide::Generator { public: - Input> in_{"in", 3}; - Output> out_{"out", 2}; + Input> in_{"in"}; + Output> out_{"out"}; void generate() { Func in = Halide::BoundaryConditions::repeat_edge(in_); diff --git a/apps/wavelet/inverse_haar_x_generator.cpp b/apps/wavelet/inverse_haar_x_generator.cpp index 774561a8763a..8643f810d018 100644 --- a/apps/wavelet/inverse_haar_x_generator.cpp +++ b/apps/wavelet/inverse_haar_x_generator.cpp @@ -8,8 +8,8 @@ Halide::Var x("x"), y("y"), c("c"); class inverse_haar_x : public Halide::Generator { public: - Input> in_{"in", 3}; - Output> out_{"out", 2}; + Input> in_{"in"}; + Output> out_{"out"}; void generate() { Func in = Halide::BoundaryConditions::repeat_edge(in_); diff --git a/apps/wavelet/wavelet.cpp b/apps/wavelet/wavelet.cpp index f72f0c8ad669..03b58f3c5c19 100644 --- a/apps/wavelet/wavelet.cpp +++ b/apps/wavelet/wavelet.cpp @@ -31,14 +31,14 @@ T clamp(T x, T min, T max) { } template -void save_untransformed(Buffer t, const std::string &filename) { +void save_untransformed(Buffer t, const std::string &filename) { convert_and_save_image(t, filename); printf("Saved %s\n", filename.c_str()); } template -void save_transformed(Buffer t, const std::string &filename) { - Buffer rearranged(t.width() * 2, t.height(), 1); +void save_transformed(Buffer t, const std::string &filename) { + Buffer rearranged(t.width() * 2, t.height(), 1); for (int y = 0; y < t.height(); y++) { for (int x = 0; x < t.width(); x++) { rearranged(x, y, 0) = clamp(t(x, y, 0), 0.0f, 1.0f); @@ -57,9 +57,9 @@ int main(int argc, char **argv) { const std::string src_image = argv[1]; const std::string dirname = argv[2]; - Buffer input = load_and_convert_image(src_image); - Buffer transformed(input.width() / 2, input.height(), 2); - Buffer inverse_transformed(input.width(), input.height()); + Buffer input = load_and_convert_image(src_image); + Buffer transformed(input.width() / 2, input.height(), 2); + Buffer inverse_transformed(input.width(), input.height()); _assert(haar_x(input, transformed) == 0, "haar_x failed"); save_transformed(transformed, dirname + "/haar_x.png");