diff --git a/.ci/azure-buildtest-awkward.yml b/.ci/azure-buildtest-awkward.yml index 2abefd6820..4d3688e3aa 100644 --- a/.ci/azure-buildtest-awkward.yml +++ b/.ci/azure-buildtest-awkward.yml @@ -10,6 +10,7 @@ trigger: - .ci/azure-deploy-awkward.yml - .ci/linux-build.sh - docs/* + - studies/* pr: branches: diff --git a/.gitignore b/.gitignore index a9a42fbfcb..9c35d92a21 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +studies/**/sample-* + ############################################################# IDEs # ... diff --git a/.gitmodules b/.gitmodules index b547d6bc97..27b03d4421 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,6 @@ [submodule "pybind11"] path = pybind11 url = https://github.com/pybind/pybind11.git -[submodule "simdjson"] - path = simdjson - url = https://github.com/lemire/simdjson.git +[submodule "rapidjson"] + path = rapidjson + url = https://github.com/Tencent/rapidjson.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 2aa177297b..9d32778c1f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,8 +19,9 @@ add_definitions(-DVERSION_INFO="${VERSION_INFO}") set(CMAKE_MACOSX_RPATH 1) file(GLOB CPU_KERNEL_SOURCES "src/cpu-kernels/*.cpp") -file(GLOB LIBAWKWARD_SOURCES "src/libawkward/*.cpp" "src/libawkward/array/*.cpp" "src/libawkward/fillable/*.cpp" "src/libawkward/type/*.cpp") +file(GLOB LIBAWKWARD_SOURCES "src/libawkward/*.cpp" "src/libawkward/array/*.cpp" "src/libawkward/fillable/*.cpp" "src/libawkward/type/*.cpp" "src/libawkward/io/*.cpp") include_directories(include) +include_directories(rapidjson/include) add_subdirectory(pybind11) @@ -44,6 +45,7 @@ add_library(awkward SHARED $) target_link_libraries(awkward-static PRIVATE awkward-cpu-kernels-static) target_link_libraries(awkward PRIVATE awkward-cpu-kernels-static) addtest(PR016 tests/test_PR016_finish_getitem_for_rawarray.cpp) +addtest(PR019 tests/test_PR019_use_json_library.cpp) pybind11_add_module(layout src/pyawkward.cpp) set_target_properties(layout PROPERTIES CXX_VISIBILITY_PRESET default) diff --git a/README.md b/README.md index 7e666a8037..1bec86c127 100644 --- a/README.md +++ b/README.md @@ -119,6 +119,7 @@ Completed items are ☑check-marked. See [closed PRs](https://github.com/scikit- * [ ] Translation to and from Apache Arrow and Parquet in C++. * [ ] Persistence to any medium that stores named binary blobs, as before, but accessible via C++ (especially for writing). The persistence format might differ slightly from the existing one (break backward compatibility, if needed). * [ ] Universal `array.get[...]` as a softer form of `array[...]` that skips non-existent indexes, rather than raising errors. + * [ ] Explicit interface with [NumExpr](https://numexpr.readthedocs.io/en/latest/index.html). ### At some point in the future diff --git a/awkward1/__init__.py b/awkward1/__init__.py index 1b2e64c890..80f4ecee1a 100644 --- a/awkward1/__init__.py +++ b/awkward1/__init__.py @@ -2,6 +2,7 @@ import awkward1.layout import awkward1._numba + from awkward1.operations.convert import * __version__ = awkward1.layout.__version__ diff --git a/awkward1/operations/convert.py b/awkward1/operations/convert.py index 3c7cf2bfb1..b6e4963258 100644 --- a/awkward1/operations/convert.py +++ b/awkward1/operations/convert.py @@ -1,12 +1,23 @@ # BSD 3-Clause License; see https://github.com/jpivarski/awkward-1.0/blob/master/LICENSE import numbers +import json +try: + from collections.abc import Iterable +except ImportError: + from collections import Iterable import numpy import awkward1.util import awkward1.layout +def fromiter(iterable, initial=1024, resize=2.0): + out = awkward1.layout.FillableArray(initial=initial, resize=resize) + for x in iterable: + out.fill(x) + return out.snapshot() + def tolist(array): if array is None or isinstance(array, (bool, str, bytes, numbers.Number)): return array @@ -26,4 +37,22 @@ def tolist(array): else: raise TypeError("unrecognized array type: {0}".format(repr(array))) -__all__ = [x for x in list(globals()) if not x.startswith("_") and x not in ("awkward1", "numpy")] +fromjson = awkward1.layout.fromjson + +def tojson(array, *args, **kwargs): + if array is None or isinstance(array, (bool, str, bytes, numbers.Number)): + return json.dumps(array) + + elif isinstance(array, numpy.ndarray): + return awkward1.layout.NumpyArray(array).tojson(*args, **kwargs) + + elif isinstance(array, awkward1.layout.FillableArray): + return array.snapshot().tojson(*args, **kwargs) + + elif isinstance(array, awkward1.layout.Content): + return array.tojson(*args, **kwargs) + + else: + raise TypeError("unrecognized array type: {0}".format(repr(array))) + +__all__ = [x for x in list(globals()) if not x.startswith("_") and x not in ("numbers", "json", "Iterable", "numpy", "awkward1")] diff --git a/include/awkward/Content.h b/include/awkward/Content.h index d8381c4197..ef3cbe46a8 100644 --- a/include/awkward/Content.h +++ b/include/awkward/Content.h @@ -3,9 +3,12 @@ #ifndef AWKWARD_CONTENT_H_ #define AWKWARD_CONTENT_H_ +#include + #include "awkward/cpu-kernels/util.h" #include "awkward/Identity.h" #include "awkward/Slice.h" +#include "awkward/io/json.h" namespace awkward { class Content { @@ -17,6 +20,7 @@ namespace awkward { virtual void setid() = 0; virtual void setid(const std::shared_ptr id) = 0; virtual const std::string tostring_part(const std::string indent, const std::string pre, const std::string post) const = 0; + virtual void tojson_part(ToJson& builder) const = 0; virtual int64_t length() const = 0; virtual const std::shared_ptr shallow_copy() const = 0; virtual void checksafe() const = 0; @@ -30,6 +34,8 @@ namespace awkward { virtual const std::pair minmax_depth() const = 0; const std::string tostring() const; + const std::string tojson(bool pretty, int64_t maxdecimals) const; + void tojson(FILE* destination, bool pretty, int64_t maxdecimals, int64_t buffersize) const; const std::shared_ptr getitem_ellipsis(const Slice& tail, const Index64& advanced) const; const std::shared_ptr getitem_newaxis(const Slice& tail, const Index64& advanced) const; }; diff --git a/include/awkward/Index.h b/include/awkward/Index.h index d0e460d629..8b740e84af 100644 --- a/include/awkward/Index.h +++ b/include/awkward/Index.h @@ -35,6 +35,7 @@ namespace awkward { const std::string tostring_part(const std::string indent, const std::string pre, const std::string post) const; T getitem_at(int64_t at) const; T getitem_at_unsafe(int64_t at) const; + void setitem_at_unsafe(int64_t at, T value) const; IndexOf getitem_range(int64_t start, int64_t stop) const; IndexOf getitem_range_unsafe(int64_t start, int64_t stop) const; virtual const std::shared_ptr shallow_copy() const; diff --git a/include/awkward/Slice.h b/include/awkward/Slice.h index f88792550b..3014767ce2 100644 --- a/include/awkward/Slice.h +++ b/include/awkward/Slice.h @@ -36,7 +36,7 @@ namespace awkward { class SliceRange: public SliceItem { public: - SliceRange(int64_t start, int64_t stop, int64_t step): start_(start), stop_(stop), step_(step) { + SliceRange(int64_t start, int64_t stop, int64_t step): start_(start), stop_(stop), step_(step == none() ? 1 : step) { assert(step_ != 0); } int64_t start() const { return start_; } @@ -114,6 +114,12 @@ namespace awkward { const Slice tail() const; const std::string tostring() const; void append(const std::shared_ptr& item); + void append(const SliceAt& item); + void append(const SliceRange& item); + void append(const SliceEllipsis& item); + void append(const SliceNewAxis& item); + template + void append(const SliceArrayOf& item); void become_sealed(); bool isadvanced() const; diff --git a/include/awkward/array/ListArray.h b/include/awkward/array/ListArray.h index d30e60e1ea..b5d9ac92fc 100644 --- a/include/awkward/array/ListArray.h +++ b/include/awkward/array/ListArray.h @@ -29,6 +29,7 @@ namespace awkward { virtual void setid(); virtual void setid(const std::shared_ptr id); virtual const std::string tostring_part(const std::string indent, const std::string pre, const std::string post) const; + virtual void tojson_part(ToJson& builder) const; virtual int64_t length() const; virtual const std::shared_ptr shallow_copy() const; virtual void checksafe() const; diff --git a/include/awkward/array/ListOffsetArray.h b/include/awkward/array/ListOffsetArray.h index c3dd023950..f09b7cf8a6 100644 --- a/include/awkward/array/ListOffsetArray.h +++ b/include/awkward/array/ListOffsetArray.h @@ -27,6 +27,7 @@ namespace awkward { virtual void setid(); virtual void setid(const std::shared_ptr id); virtual const std::string tostring_part(const std::string indent, const std::string pre, const std::string post) const; + virtual void tojson_part(ToJson& builder) const; virtual int64_t length() const; virtual const std::shared_ptr shallow_copy() const; virtual void checksafe() const; diff --git a/include/awkward/array/NumpyArray.h b/include/awkward/array/NumpyArray.h index 53e8255e0f..59d1512bc4 100644 --- a/include/awkward/array/NumpyArray.h +++ b/include/awkward/array/NumpyArray.h @@ -37,6 +37,7 @@ namespace awkward { bool isscalar() const; bool isempty() const; void* byteptr() const; + void* byteptr(ssize_t at) const; ssize_t bytelength() const; uint8_t getbyte(ssize_t at) const; @@ -45,6 +46,7 @@ namespace awkward { virtual void setid(); virtual void setid(const std::shared_ptr id); virtual const std::string tostring_part(const std::string indent, const std::string pre, const std::string post) const; + virtual void tojson_part(ToJson& builder) const; virtual int64_t length() const; virtual const std::shared_ptr shallow_copy() const; virtual void checksafe() const; diff --git a/include/awkward/array/RawArray.h b/include/awkward/array/RawArray.h index d6bd651f4d..229bb57732 100644 --- a/include/awkward/array/RawArray.h +++ b/include/awkward/array/RawArray.h @@ -20,6 +20,26 @@ #include "awkward/Content.h" namespace awkward { + void tojson_boolean(ToJson& builder, bool* array, int64_t length) { + for (int i = 0; i < length; i++) { + builder.boolean((bool)array[i]); + } + } + + template + void tojson_integer(ToJson& builder, T* array, int64_t length) { + for (int i = 0; i < length; i++) { + builder.integer((int64_t)array[i]); + } + } + + template + void tojson_real(ToJson& builder, T* array, int64_t length) { + for (int i = 0; i < length; i++) { + builder.real((double)array[i]); + } + } + template class RawArrayOf: public Content { public: @@ -123,6 +143,42 @@ namespace awkward { return out.str(); } + virtual void tojson_part(ToJson& builder) const { + if (std::is_same::value) { + tojson_real(builder, reinterpret_cast(byteptr()), length()); + } + else if (std::is_same::value) { + tojson_real(builder, reinterpret_cast(byteptr()), length()); + } + else if (std::is_same::value) { + tojson_real(builder, reinterpret_cast(byteptr()), length()); + } + else if (std::is_same::value) { + tojson_real(builder, reinterpret_cast(byteptr()), length()); + } + else if (std::is_same::value) { + tojson_real(builder, reinterpret_cast(byteptr()), length()); + } + else if (std::is_same::value) { + tojson_real(builder, reinterpret_cast(byteptr()), length()); + } + else if (std::is_same::value) { + tojson_real(builder, reinterpret_cast(byteptr()), length()); + } + else if (std::is_same::value) { + tojson_real(builder, reinterpret_cast(byteptr()), length()); + } + else if (std::is_same::value) { + tojson_real(builder, reinterpret_cast(byteptr()), length()); + } + else if (std::is_same::value) { + tojson_real(builder, reinterpret_cast(byteptr()), length()); + } + else { + throw std::invalid_argument(std::string("cannot convert RawArrayOf<") + typeid(T).name() + std::string("> into JSON")); + } + } + virtual int64_t length() const { return length_; } virtual const std::shared_ptr shallow_copy() const { return std::shared_ptr(new RawArrayOf(id_, ptr_, offset_, length_, itemsize_)); } @@ -232,7 +288,7 @@ namespace awkward { throw std::runtime_error("array.ndim != 1"); } Index64 flathead = array->ravel(); - Error err = awkward_regularize_arrayslice_64( + struct Error err = awkward_regularize_arrayslice_64( flathead.ptr().get(), flathead.length(), length_); @@ -247,7 +303,7 @@ namespace awkward { virtual const std::shared_ptr carry(const Index64& carry) const { std::shared_ptr ptr(new T[(size_t)carry.length()], awkward::util::array_deleter()); - Error err = awkward_numpyarray_getitem_next_null_64( + struct Error err = awkward_numpyarray_getitem_next_null_64( reinterpret_cast(ptr.get()), reinterpret_cast(ptr_.get()), carry.length(), diff --git a/include/awkward/cpu-kernels/getitem.h b/include/awkward/cpu-kernels/getitem.h index 9e7dccac10..4f9dcf9ffa 100644 --- a/include/awkward/cpu-kernels/getitem.h +++ b/include/awkward/cpu-kernels/getitem.h @@ -7,49 +7,49 @@ extern "C" { void awkward_regularize_rangeslice(int64_t* start, int64_t* stop, bool posstep, bool hasstart, bool hasstop, int64_t length); - Error awkward_regularize_arrayslice_64(int64_t* flatheadptr, int64_t lenflathead, int64_t length); + struct Error awkward_regularize_arrayslice_64(int64_t* flatheadptr, int64_t lenflathead, int64_t length); - Error awkward_slicearray_ravel_64(int64_t* toptr, const int64_t* fromptr, int64_t ndim, const int64_t* shape, const int64_t* strides); + struct Error awkward_slicearray_ravel_64(int64_t* toptr, const int64_t* fromptr, int64_t ndim, const int64_t* shape, const int64_t* strides); - Error awkward_carry_arange_64(int64_t* toptr, int64_t length); + struct Error awkward_carry_arange_64(int64_t* toptr, int64_t length); - Error awkward_identity32_getitem_carry_64(int32_t* newidentityptr, const int32_t* identityptr, const int64_t* carryptr, int64_t lencarry, int64_t offset, int64_t width, int64_t length); - Error awkward_identity64_getitem_carry_64(int64_t* newidentityptr, const int64_t* identityptr, const int64_t* carryptr, int64_t lencarry, int64_t offset, int64_t width, int64_t length); + struct Error awkward_identity32_getitem_carry_64(int32_t* newidentityptr, const int32_t* identityptr, const int64_t* carryptr, int64_t lencarry, int64_t offset, int64_t width, int64_t length); + struct Error awkward_identity64_getitem_carry_64(int64_t* newidentityptr, const int64_t* identityptr, const int64_t* carryptr, int64_t lencarry, int64_t offset, int64_t width, int64_t length); - Error awkward_numpyarray_contiguous_init_64(int64_t* toptr, int64_t skip, int64_t stride); - Error awkward_numpyarray_contiguous_copy_64(uint8_t* toptr, const uint8_t* fromptr, int64_t len, int64_t stride, int64_t offset, const int64_t* pos); - Error awkward_numpyarray_contiguous_next_64(int64_t* topos, const int64_t* frompos, int64_t len, int64_t skip, int64_t stride); + struct Error awkward_numpyarray_contiguous_init_64(int64_t* toptr, int64_t skip, int64_t stride); + struct Error awkward_numpyarray_contiguous_copy_64(uint8_t* toptr, const uint8_t* fromptr, int64_t len, int64_t stride, int64_t offset, const int64_t* pos); + struct Error awkward_numpyarray_contiguous_next_64(int64_t* topos, const int64_t* frompos, int64_t len, int64_t skip, int64_t stride); - Error awkward_numpyarray_getitem_next_null_64(uint8_t* toptr, const uint8_t* fromptr, int64_t len, int64_t stride, int64_t offset, const int64_t* pos); - Error awkward_numpyarray_getitem_next_at_64(int64_t* nextcarryptr, const int64_t* carryptr, int64_t lencarry, int64_t skip, int64_t at); - Error awkward_numpyarray_getitem_next_range_64(int64_t* nextcarryptr, const int64_t* carryptr, int64_t lencarry, int64_t lenhead, int64_t skip, int64_t start, int64_t step); - Error awkward_numpyarray_getitem_next_range_advanced_64(int64_t* nextcarryptr, int64_t* nextadvancedptr, const int64_t* carryptr, const int64_t* advancedptr, int64_t lencarry, int64_t lenhead, int64_t skip, int64_t start, int64_t step); - Error awkward_numpyarray_getitem_next_array_64(int64_t* nextcarryptr, int64_t* nextadvancedptr, const int64_t* carryptr, const int64_t* flatheadptr, int64_t lencarry, int64_t lenflathead, int64_t skip); - Error awkward_numpyarray_getitem_next_array_advanced_64(int64_t* nextcarryptr, const int64_t* carryptr, const int64_t* advancedptr, const int64_t* flatheadptr, int64_t lencarry, int64_t skip); + struct Error awkward_numpyarray_getitem_next_null_64(uint8_t* toptr, const uint8_t* fromptr, int64_t len, int64_t stride, int64_t offset, const int64_t* pos); + struct Error awkward_numpyarray_getitem_next_at_64(int64_t* nextcarryptr, const int64_t* carryptr, int64_t lencarry, int64_t skip, int64_t at); + struct Error awkward_numpyarray_getitem_next_range_64(int64_t* nextcarryptr, const int64_t* carryptr, int64_t lencarry, int64_t lenhead, int64_t skip, int64_t start, int64_t step); + struct Error awkward_numpyarray_getitem_next_range_advanced_64(int64_t* nextcarryptr, int64_t* nextadvancedptr, const int64_t* carryptr, const int64_t* advancedptr, int64_t lencarry, int64_t lenhead, int64_t skip, int64_t start, int64_t step); + struct Error awkward_numpyarray_getitem_next_array_64(int64_t* nextcarryptr, int64_t* nextadvancedptr, const int64_t* carryptr, const int64_t* flatheadptr, int64_t lencarry, int64_t lenflathead, int64_t skip); + struct Error awkward_numpyarray_getitem_next_array_advanced_64(int64_t* nextcarryptr, const int64_t* carryptr, const int64_t* advancedptr, const int64_t* flatheadptr, int64_t lencarry, int64_t skip); - Error awkward_listarray32_getitem_next_at_64(int64_t* tocarry, const int32_t* fromstarts, const int32_t* fromstops, int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset, int64_t at); - Error awkward_listarray64_getitem_next_at_64(int64_t* tocarry, const int64_t* fromstarts, const int64_t* fromstops, int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset, int64_t at); + struct Error awkward_listarray32_getitem_next_at_64(int64_t* tocarry, const int32_t* fromstarts, const int32_t* fromstops, int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset, int64_t at); + struct Error awkward_listarray64_getitem_next_at_64(int64_t* tocarry, const int64_t* fromstarts, const int64_t* fromstops, int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset, int64_t at); - Error awkward_listarray32_getitem_next_range_carrylength(int64_t* carrylength, const int32_t* fromstarts, const int32_t* fromstops, int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset, int64_t start, int64_t stop, int64_t step); - Error awkward_listarray64_getitem_next_range_carrylength(int64_t* carrylength, const int64_t* fromstarts, const int64_t* fromstops, int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset, int64_t start, int64_t stop, int64_t step); + struct Error awkward_listarray32_getitem_next_range_carrylength(int64_t* carrylength, const int32_t* fromstarts, const int32_t* fromstops, int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset, int64_t start, int64_t stop, int64_t step); + struct Error awkward_listarray64_getitem_next_range_carrylength(int64_t* carrylength, const int64_t* fromstarts, const int64_t* fromstops, int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset, int64_t start, int64_t stop, int64_t step); - Error awkward_listarray32_getitem_next_range_64(int32_t* tooffsets, int64_t* tocarry, const int32_t* fromstarts, const int32_t* fromstops, int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset, int64_t start, int64_t stop, int64_t step); - Error awkward_listarray64_getitem_next_range_64(int64_t* tooffsets, int64_t* tocarry, const int64_t* fromstarts, const int64_t* fromstops, int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset, int64_t start, int64_t stop, int64_t step); + struct Error awkward_listarray32_getitem_next_range_64(int32_t* tooffsets, int64_t* tocarry, const int32_t* fromstarts, const int32_t* fromstops, int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset, int64_t start, int64_t stop, int64_t step); + struct Error awkward_listarray64_getitem_next_range_64(int64_t* tooffsets, int64_t* tocarry, const int64_t* fromstarts, const int64_t* fromstops, int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset, int64_t start, int64_t stop, int64_t step); - Error awkward_listarray32_getitem_next_range_counts_64(int64_t* total, const int32_t* fromoffsets, int64_t lenstarts); - Error awkward_listarray64_getitem_next_range_counts_64(int64_t* total, const int64_t* fromoffsets, int64_t lenstarts); + struct Error awkward_listarray32_getitem_next_range_counts_64(int64_t* total, const int32_t* fromoffsets, int64_t lenstarts); + struct Error awkward_listarray64_getitem_next_range_counts_64(int64_t* total, const int64_t* fromoffsets, int64_t lenstarts); - Error awkward_listarray32_getitem_next_range_spreadadvanced_64(int64_t* toadvanced, const int64_t* fromadvanced, const int32_t* fromoffsets, int64_t lenstarts); - Error awkward_listarray64_getitem_next_range_spreadadvanced_64(int64_t* toadvanced, const int64_t* fromadvanced, const int64_t* fromoffsets, int64_t lenstarts); + struct Error awkward_listarray32_getitem_next_range_spreadadvanced_64(int64_t* toadvanced, const int64_t* fromadvanced, const int32_t* fromoffsets, int64_t lenstarts); + struct Error awkward_listarray64_getitem_next_range_spreadadvanced_64(int64_t* toadvanced, const int64_t* fromadvanced, const int64_t* fromoffsets, int64_t lenstarts); - Error awkward_listarray32_getitem_next_array_64(int32_t* tooffsets, int64_t* tocarry, int64_t* toadvanced, const int32_t* fromstarts, const int32_t* fromstops, const int64_t* fromarray, int64_t startsoffset, int64_t stopsoffset, int64_t lenstarts, int64_t lenarray, int64_t lencontent); - Error awkward_listarray64_getitem_next_array_64(int64_t* tooffsets, int64_t* tocarry, int64_t* toadvanced, const int64_t* fromstarts, const int64_t* fromstops, const int64_t* fromarray, int64_t startsoffset, int64_t stopsoffset, int64_t lenstarts, int64_t lenarray, int64_t lencontent); + struct Error awkward_listarray32_getitem_next_array_64(int32_t* tooffsets, int64_t* tocarry, int64_t* toadvanced, const int32_t* fromstarts, const int32_t* fromstops, const int64_t* fromarray, int64_t startsoffset, int64_t stopsoffset, int64_t lenstarts, int64_t lenarray, int64_t lencontent); + struct Error awkward_listarray64_getitem_next_array_64(int64_t* tooffsets, int64_t* tocarry, int64_t* toadvanced, const int64_t* fromstarts, const int64_t* fromstops, const int64_t* fromarray, int64_t startsoffset, int64_t stopsoffset, int64_t lenstarts, int64_t lenarray, int64_t lencontent); - Error awkward_listarray32_getitem_next_array_advanced_64(int64_t* tocarry, int64_t* toadvanced, const int32_t* fromstarts, const int32_t* fromstops, const int64_t* fromarray, const int64_t* fromadvanced, int64_t startsoffset, int64_t stopsoffset, int64_t lenstarts, int64_t lenarray, int64_t lencontent); - Error awkward_listarray64_getitem_next_array_advanced_64(int64_t* tocarry, int64_t* toadvanced, const int64_t* fromstarts, const int64_t* fromstops, const int64_t* fromarray, const int64_t* fromadvanced, int64_t startsoffset, int64_t stopsoffset, int64_t lenstarts, int64_t lenarray, int64_t lencontent); + struct Error awkward_listarray32_getitem_next_array_advanced_64(int64_t* tocarry, int64_t* toadvanced, const int32_t* fromstarts, const int32_t* fromstops, const int64_t* fromarray, const int64_t* fromadvanced, int64_t startsoffset, int64_t stopsoffset, int64_t lenstarts, int64_t lenarray, int64_t lencontent); + struct Error awkward_listarray64_getitem_next_array_advanced_64(int64_t* tocarry, int64_t* toadvanced, const int64_t* fromstarts, const int64_t* fromstops, const int64_t* fromarray, const int64_t* fromadvanced, int64_t startsoffset, int64_t stopsoffset, int64_t lenstarts, int64_t lenarray, int64_t lencontent); - Error awkward_listarray32_getitem_carry_64(int32_t* tostarts, int32_t* tostops, const int32_t* fromstarts, const int32_t* fromstops, const int64_t* fromcarry, int64_t startsoffset, int64_t stopsoffset, int64_t lenstarts, int64_t lencarry); - Error awkward_listarray64_getitem_carry_64(int64_t* tostarts, int64_t* tostops, const int64_t* fromstarts, const int64_t* fromstops, const int64_t* fromcarry, int64_t startsoffset, int64_t stopsoffset, int64_t lenstarts, int64_t lencarry); + struct Error awkward_listarray32_getitem_carry_64(int32_t* tostarts, int32_t* tostops, const int32_t* fromstarts, const int32_t* fromstops, const int64_t* fromcarry, int64_t startsoffset, int64_t stopsoffset, int64_t lenstarts, int64_t lencarry); + struct Error awkward_listarray64_getitem_carry_64(int64_t* tostarts, int64_t* tostops, const int64_t* fromstarts, const int64_t* fromstops, const int64_t* fromcarry, int64_t startsoffset, int64_t stopsoffset, int64_t lenstarts, int64_t lencarry); } #endif // AWKWARDCPU_GETITEM_H_ diff --git a/include/awkward/cpu-kernels/identity.h b/include/awkward/cpu-kernels/identity.h index 3dbe706dac..c6c8506306 100644 --- a/include/awkward/cpu-kernels/identity.h +++ b/include/awkward/cpu-kernels/identity.h @@ -6,14 +6,14 @@ #include "awkward/cpu-kernels/util.h" extern "C" { - Error awkward_new_identity32(int32_t* toptr, int64_t length); - Error awkward_new_identity64(int64_t* toptr, int64_t length); + struct Error awkward_new_identity32(int32_t* toptr, int64_t length); + struct Error awkward_new_identity64(int64_t* toptr, int64_t length); - Error awkward_identity32_to_identity64(int64_t* toptr, const int32_t* fromptr, int64_t length); + struct Error awkward_identity32_to_identity64(int64_t* toptr, const int32_t* fromptr, int64_t length); - Error awkward_identity32_from_listarray32(int32_t* toptr, const int32_t* fromptr, const int32_t* fromstarts, const int32_t* fromstops, int64_t fromptroffset, int64_t startsoffset, int64_t stopsoffset, int64_t tolength, int64_t fromlength, int64_t fromwidth); - Error awkward_identity64_from_listarray32(int64_t* toptr, const int64_t* fromptr, const int32_t* fromstarts, const int32_t* fromstops, int64_t fromptroffset, int64_t startsoffset, int64_t stopsoffset, int64_t tolength, int64_t fromlength, int64_t fromwidth); - Error awkward_identity64_from_listarray64(int64_t* toptr, const int64_t* fromptr, const int64_t* fromstarts, const int64_t* fromstops, int64_t fromptroffset, int64_t startsoffset, int64_t stopsoffset, int64_t tolength, int64_t fromlength, int64_t fromwidth); + struct Error awkward_identity32_from_listarray32(int32_t* toptr, const int32_t* fromptr, const int32_t* fromstarts, const int32_t* fromstops, int64_t fromptroffset, int64_t startsoffset, int64_t stopsoffset, int64_t tolength, int64_t fromlength, int64_t fromwidth); + struct Error awkward_identity64_from_listarray32(int64_t* toptr, const int64_t* fromptr, const int32_t* fromstarts, const int32_t* fromstops, int64_t fromptroffset, int64_t startsoffset, int64_t stopsoffset, int64_t tolength, int64_t fromlength, int64_t fromwidth); + struct Error awkward_identity64_from_listarray64(int64_t* toptr, const int64_t* fromptr, const int64_t* fromstarts, const int64_t* fromstops, int64_t fromptroffset, int64_t startsoffset, int64_t stopsoffset, int64_t tolength, int64_t fromlength, int64_t fromwidth); } #endif // AWKWARDCPU_IDENTITY_H_ diff --git a/include/awkward/cpu-kernels/util.h b/include/awkward/cpu-kernels/util.h index cc3a5af1d5..efc1cc5d79 100644 --- a/include/awkward/cpu-kernels/util.h +++ b/include/awkward/cpu-kernels/util.h @@ -13,11 +13,17 @@ #endif typedef unsigned char uint8_t; typedef signed char int8_t; + typedef unsigned short uint16_t; + typedef signed short int16_t; + typedef unsigned int uint32_t; typedef signed int int32_t; + typedef unsigned __int64 uint64_t; typedef signed __int64 int64_t; + #define ERROR Error #else #include #include + #define ERROR struct Error #endif #include @@ -29,8 +35,8 @@ extern "C" { int64_t attempt; int64_t extra; }; - Error success(); - Error failure(const char* str, int64_t location, int64_t attempt); + struct Error success(); + struct Error failure(const char* str, int64_t location, int64_t attempt); const int8_t kMaxInt8 = 127; // 2**7 - 1 const uint8_t kMaxUInt8 = 255; // 2**8 - 1 diff --git a/include/awkward/fillable/FillableArray.h b/include/awkward/fillable/FillableArray.h index b04ef5937e..be9d22c163 100644 --- a/include/awkward/fillable/FillableArray.h +++ b/include/awkward/fillable/FillableArray.h @@ -23,7 +23,7 @@ namespace awkward { const std::shared_ptr getitem_at(int64_t at) const; const std::shared_ptr getitem_range(int64_t start, int64_t stop) const; const std::shared_ptr getitem(const Slice& where) const; - + void null(); void boolean(bool x); void integer(int64_t x); @@ -31,6 +31,17 @@ namespace awkward { void beginlist(); void endlist(); + template + void fill(const std::vector& vector) { + beginlist(); + for (auto x : vector) { + fill(x); + } + endlist(); + } + void fill(int64_t x) { integer(x); } + void fill(double x) { real(x); } + private: std::shared_ptr fillable_; diff --git a/include/awkward/fillable/GrowableBuffer.h b/include/awkward/fillable/GrowableBuffer.h index 8a6518ffd3..5faecaa475 100644 --- a/include/awkward/fillable/GrowableBuffer.h +++ b/include/awkward/fillable/GrowableBuffer.h @@ -9,6 +9,7 @@ #include "awkward/cpu-kernels/util.h" #include "awkward/fillable/FillableOptions.h" +#include "awkward/Index.h" namespace awkward { template @@ -87,6 +88,14 @@ namespace awkward { length_++; } + T getitem_at_unsafe(int64_t at) const { + return ptr_.get()[at]; + } + + IndexOf toindex() const { + return IndexOf(ptr_, 0, length_); + } + private: const FillableOptions options_; std::shared_ptr ptr_; diff --git a/include/awkward/io/json.h b/include/awkward/io/json.h new file mode 100644 index 0000000000..07dad54ea6 --- /dev/null +++ b/include/awkward/io/json.h @@ -0,0 +1,151 @@ +// BSD 3-Clause License; see https://github.com/jpivarski/awkward-1.0/blob/master/LICENSE + +#ifndef AWKWARD_IO_JSON_H_ +#define AWKWARD_IO_JSON_H_ + +#include +#include + +#include "rapidjson/reader.h" +#include "rapidjson/writer.h" +#include "rapidjson/prettywriter.h" +#include "rapidjson/stringbuffer.h" +#include "rapidjson/filereadstream.h" +#include "rapidjson/filewritestream.h" +#include "rapidjson/error/en.h" + +#include "awkward/fillable/FillableOptions.h" +#include "awkward/cpu-kernels/util.h" +#include "awkward/util.h" + +namespace rj = rapidjson; + +namespace awkward { + class Content; + + const std::shared_ptr FromJsonString(const char* source, const FillableOptions& options); + const std::shared_ptr FromJsonFile(FILE* source, const FillableOptions& options, int64_t buffersize); + + class ToJson { + public: + virtual void null() = 0; + virtual void boolean(bool x) = 0; + virtual void integer(int64_t x) = 0; + virtual void real(double x) = 0; + virtual void beginlist() = 0; + virtual void endlist() = 0; + virtual void beginrec() = 0; + virtual void endrec() = 0; + virtual void fieldname(const char* x) = 0; + virtual void string(const char* x) = 0; + }; + + class ToJsonString: public ToJson { + public: + ToJsonString(int64_t maxdecimals): buffer_(), writer_(buffer_) { + if (maxdecimals >= 0) { + writer_.SetMaxDecimalPlaces((int)maxdecimals); + } + } + + virtual void null() { writer_.Null(); } + virtual void boolean(bool x) { writer_.Bool(x); } + virtual void integer(int64_t x) { writer_.Int64(x); } + virtual void real(double x) { writer_.Double(x); } + virtual void beginlist() { writer_.StartArray(); } + virtual void endlist() { writer_.EndArray(); } + virtual void beginrec() { writer_.StartObject(); } + virtual void endrec() { writer_.EndObject(); } + virtual void fieldname(const char* x) { writer_.Key(x); } + virtual void string(const char* x) { writer_.String(x); } + + std::string tostring() { + return std::string(buffer_.GetString()); + } + + private: + rj::StringBuffer buffer_; + rj::Writer writer_; + }; + + class ToJsonPrettyString: public ToJson { + public: + ToJsonPrettyString(int64_t maxdecimals): buffer_(), writer_(buffer_) { + if (maxdecimals >= 0) { + writer_.SetMaxDecimalPlaces((int)maxdecimals); + } + } + + virtual void null() { writer_.Null(); } + virtual void boolean(bool x) { writer_.Bool(x); } + virtual void integer(int64_t x) { writer_.Int64(x); } + virtual void real(double x) { writer_.Double(x); } + virtual void beginlist() { writer_.StartArray(); } + virtual void endlist() { writer_.EndArray(); } + virtual void beginrec() { writer_.StartObject(); } + virtual void endrec() { writer_.EndObject(); } + virtual void fieldname(const char* x) { writer_.Key(x); } + virtual void string(const char* x) { writer_.String(x); } + + std::string tostring() { + return std::string(buffer_.GetString()); + } + + private: + rj::StringBuffer buffer_; + rj::PrettyWriter writer_; + }; + + class ToJsonFile: public ToJson { + public: + ToJsonFile(FILE* destination, int64_t maxdecimals, int64_t buffersize): buffer_(new char[(size_t)buffersize], awkward::util::array_deleter()), stream_(destination, buffer_.get(), ((size_t)buffersize)*sizeof(char)), writer_(stream_) { + if (maxdecimals >= 0) { + writer_.SetMaxDecimalPlaces((int)maxdecimals); + } + } + + virtual void null() { writer_.Null(); } + virtual void boolean(bool x) { writer_.Bool(x); } + virtual void integer(int64_t x) { writer_.Int64(x); } + virtual void real(double x) { writer_.Double(x); } + virtual void beginlist() { writer_.StartArray(); } + virtual void endlist() { writer_.EndArray(); } + virtual void beginrec() { writer_.StartObject(); } + virtual void endrec() { writer_.EndObject(); } + virtual void fieldname(const char* x) { writer_.Key(x); } + virtual void string(const char* x) { writer_.String(x); } + + private: + std::shared_ptr buffer_; + rj::FileWriteStream stream_; + rj::Writer writer_; + }; + + class ToJsonPrettyFile: public ToJson { + public: + ToJsonPrettyFile(FILE* destination, int64_t maxdecimals, int64_t buffersize): buffer_(new char[(size_t)buffersize], awkward::util::array_deleter()), stream_(destination, buffer_.get(), ((size_t)buffersize)*sizeof(char)), writer_(stream_) { + if (maxdecimals >= 0) { + writer_.SetMaxDecimalPlaces((int)maxdecimals); + } + } + + virtual void null() { writer_.Null(); } + virtual void boolean(bool x) { writer_.Bool(x); } + virtual void integer(int64_t x) { writer_.Int64(x); } + virtual void real(double x) { writer_.Double(x); } + virtual void beginlist() { writer_.StartArray(); } + virtual void endlist() { writer_.EndArray(); } + virtual void beginrec() { writer_.StartObject(); } + virtual void endrec() { writer_.EndObject(); } + virtual void fieldname(const char* x) { writer_.Key(x); } + virtual void string(const char* x) { writer_.String(x); } + + private: + std::shared_ptr buffer_; + rj::FileWriteStream stream_; + rj::PrettyWriter writer_; + }; + +} + +#endif // AWKWARD_IO_JSON_H_ diff --git a/include/awkward/io/root.h b/include/awkward/io/root.h new file mode 100644 index 0000000000..e19d28c06c --- /dev/null +++ b/include/awkward/io/root.h @@ -0,0 +1,20 @@ +// BSD 3-Clause License; see https://github.com/jpivarski/awkward-1.0/blob/master/LICENSE + +#ifndef AWKWARD_IO_ROOT_H_ +#define AWKWARD_IO_ROOT_H_ + +#include +#include + +#include "awkward/cpu-kernels/util.h" +#include "awkward/util.h" +#include "awkward/Index.h" +#include "awkward/fillable/FillableOptions.h" +#include "awkward/Content.h" +#include "awkward/array/NumpyArray.h" + +namespace awkward { + const std::shared_ptr FromROOT_nestedvector(const Index64& byteoffsets, const NumpyArray& rawdata, int64_t depth, int64_t itemsize, std::string format, const FillableOptions& options); +} + +#endif // AWKWARD_IO_ROOT_H_ diff --git a/include/awkward/util.h b/include/awkward/util.h index c8c5d1cf22..03959bb5ba 100644 --- a/include/awkward/util.h +++ b/include/awkward/util.h @@ -11,7 +11,7 @@ namespace awkward { class Identity; namespace util { - void handle_error(const Error& err, const std::string classname, const Identity* id); + void handle_error(const struct Error& err, const std::string classname, const Identity* id); template class array_deleter { diff --git a/rapidjson b/rapidjson new file mode 160000 index 0000000000..f54b0e47a0 --- /dev/null +++ b/rapidjson @@ -0,0 +1 @@ +Subproject commit f54b0e47a08782a6131cc3d60f94d038fa6e0a51 diff --git a/simdjson b/simdjson deleted file mode 160000 index bd9628df93..0000000000 --- a/simdjson +++ /dev/null @@ -1 +0,0 @@ -Subproject commit bd9628df93851c2baf6316378a91a0b7dff32a22 diff --git a/src/cpu-kernels/getitem.cpp b/src/cpu-kernels/getitem.cpp index b9ec3e8e2a..e33e38665c 100644 --- a/src/cpu-kernels/getitem.cpp +++ b/src/cpu-kernels/getitem.cpp @@ -33,7 +33,7 @@ void awkward_regularize_rangeslice(int64_t* start, int64_t* stop, bool posstep, } template -Error awkward_regularize_arrayslice(T* flatheadptr, int64_t lenflathead, int64_t length) { +ERROR awkward_regularize_arrayslice(T* flatheadptr, int64_t lenflathead, int64_t length) { for (int64_t i = 0; i < lenflathead; i++) { T original = flatheadptr[i]; if (flatheadptr[i] < 0) { @@ -45,12 +45,12 @@ Error awkward_regularize_arrayslice(T* flatheadptr, int64_t lenflathead, int64_t } return success(); } -Error awkward_regularize_arrayslice_64(int64_t* flatheadptr, int64_t lenflathead, int64_t length) { +ERROR awkward_regularize_arrayslice_64(int64_t* flatheadptr, int64_t lenflathead, int64_t length) { return awkward_regularize_arrayslice(flatheadptr, lenflathead, length); } template -Error awkward_slicearray_ravel(T* toptr, const T* fromptr, int64_t ndim, const int64_t* shape, const int64_t* strides) { +ERROR awkward_slicearray_ravel(T* toptr, const T* fromptr, int64_t ndim, const int64_t* shape, const int64_t* strides) { if (ndim == 1) { for (T i = 0; i < shape[0]; i++) { toptr[i] = fromptr[i*strides[0]]; @@ -58,7 +58,7 @@ Error awkward_slicearray_ravel(T* toptr, const T* fromptr, int64_t ndim, const i } else { for (T i = 0; i < shape[0]; i++) { - Error err = awkward_slicearray_ravel(&toptr[i*shape[1]], &fromptr[i*strides[0]], ndim - 1, &shape[1], &strides[1]); + ERROR err = awkward_slicearray_ravel(&toptr[i*shape[1]], &fromptr[i*strides[0]], ndim - 1, &shape[1], &strides[1]); if (err.str != nullptr) { return err; } @@ -66,23 +66,23 @@ Error awkward_slicearray_ravel(T* toptr, const T* fromptr, int64_t ndim, const i } return success(); } -Error awkward_slicearray_ravel_64(int64_t* toptr, const int64_t* fromptr, int64_t ndim, const int64_t* shape, const int64_t* strides) { +ERROR awkward_slicearray_ravel_64(int64_t* toptr, const int64_t* fromptr, int64_t ndim, const int64_t* shape, const int64_t* strides) { return awkward_slicearray_ravel(toptr, fromptr, ndim, shape, strides); } template -Error awkward_carry_arange(T* toptr, int64_t length) { +ERROR awkward_carry_arange(T* toptr, int64_t length) { for (int64_t i = 0; i < length; i++) { toptr[i] = i; } return success(); } -Error awkward_carry_arange_64(int64_t* toptr, int64_t length) { +ERROR awkward_carry_arange_64(int64_t* toptr, int64_t length) { return awkward_carry_arange(toptr, length); } template -Error awkward_identity_getitem_carry(ID* newidentityptr, const ID* identityptr, const T* carryptr, int64_t lencarry, int64_t offset, int64_t width, int64_t length) { +ERROR awkward_identity_getitem_carry(ID* newidentityptr, const ID* identityptr, const T* carryptr, int64_t lencarry, int64_t offset, int64_t width, int64_t length) { for (int64_t i = 0; i < lencarry; i++) { if (carryptr[i] >= length) { return failure("index out of range", kSliceNone, carryptr[i]); @@ -93,37 +93,37 @@ Error awkward_identity_getitem_carry(ID* newidentityptr, const ID* identityptr, } return success(); } -Error awkward_identity32_getitem_carry_64(int32_t* newidentityptr, const int32_t* identityptr, const int64_t* carryptr, int64_t lencarry, int64_t offset, int64_t width, int64_t length) { +ERROR awkward_identity32_getitem_carry_64(int32_t* newidentityptr, const int32_t* identityptr, const int64_t* carryptr, int64_t lencarry, int64_t offset, int64_t width, int64_t length) { return awkward_identity_getitem_carry(newidentityptr, identityptr, carryptr, lencarry, offset, width, length); } -Error awkward_identity64_getitem_carry_64(int64_t* newidentityptr, const int64_t* identityptr, const int64_t* carryptr, int64_t lencarry, int64_t offset, int64_t width, int64_t length) { +ERROR awkward_identity64_getitem_carry_64(int64_t* newidentityptr, const int64_t* identityptr, const int64_t* carryptr, int64_t lencarry, int64_t offset, int64_t width, int64_t length) { return awkward_identity_getitem_carry(newidentityptr, identityptr, carryptr, lencarry, offset, width, length); } template -Error awkward_numpyarray_contiguous_init(T* toptr, int64_t skip, int64_t stride) { +ERROR awkward_numpyarray_contiguous_init(T* toptr, int64_t skip, int64_t stride) { for (int64_t i = 0; i < skip; i++) { toptr[i] = i*stride; } return success(); } -Error awkward_numpyarray_contiguous_init_64(int64_t* toptr, int64_t skip, int64_t stride) { +ERROR awkward_numpyarray_contiguous_init_64(int64_t* toptr, int64_t skip, int64_t stride) { return awkward_numpyarray_contiguous_init(toptr, skip, stride); } template -Error awkward_numpyarray_contiguous_copy(uint8_t* toptr, const uint8_t* fromptr, int64_t len, int64_t stride, int64_t offset, const T* pos) { +ERROR awkward_numpyarray_contiguous_copy(uint8_t* toptr, const uint8_t* fromptr, int64_t len, int64_t stride, int64_t offset, const T* pos) { for (int64_t i = 0; i < len; i++) { memcpy(&toptr[i*stride], &fromptr[offset + (int64_t)pos[i]], (size_t)stride); } return success(); } -Error awkward_numpyarray_contiguous_copy_64(uint8_t* toptr, const uint8_t* fromptr, int64_t len, int64_t stride, int64_t offset, const int64_t* pos) { +ERROR awkward_numpyarray_contiguous_copy_64(uint8_t* toptr, const uint8_t* fromptr, int64_t len, int64_t stride, int64_t offset, const int64_t* pos) { return awkward_numpyarray_contiguous_copy(toptr, fromptr, len, stride, offset, pos); } template -Error awkward_numpyarray_contiguous_next(T* topos, const T* frompos, int64_t len, int64_t skip, int64_t stride) { +ERROR awkward_numpyarray_contiguous_next(T* topos, const T* frompos, int64_t len, int64_t skip, int64_t stride) { for (int64_t i = 0; i < len; i++) { for (int64_t j = 0; j < skip; j++) { topos[i*skip + j] = frompos[i] + j*stride; @@ -131,34 +131,34 @@ Error awkward_numpyarray_contiguous_next(T* topos, const T* frompos, int64_t len } return success(); } -Error awkward_numpyarray_contiguous_next_64(int64_t* topos, const int64_t* frompos, int64_t len, int64_t skip, int64_t stride) { +ERROR awkward_numpyarray_contiguous_next_64(int64_t* topos, const int64_t* frompos, int64_t len, int64_t skip, int64_t stride) { return awkward_numpyarray_contiguous_next(topos, frompos, len, skip, stride); } template -Error awkward_numpyarray_getitem_next_null(uint8_t* toptr, const uint8_t* fromptr, int64_t len, int64_t stride, int64_t offset, const T* pos) { +ERROR awkward_numpyarray_getitem_next_null(uint8_t* toptr, const uint8_t* fromptr, int64_t len, int64_t stride, int64_t offset, const T* pos) { for (int64_t i = 0; i < len; i++) { std::memcpy(&toptr[i*stride], &fromptr[offset + pos[i]*stride], (size_t)stride); } return success(); } -Error awkward_numpyarray_getitem_next_null_64(uint8_t* toptr, const uint8_t* fromptr, int64_t len, int64_t stride, int64_t offset, const int64_t* pos) { +ERROR awkward_numpyarray_getitem_next_null_64(uint8_t* toptr, const uint8_t* fromptr, int64_t len, int64_t stride, int64_t offset, const int64_t* pos) { return awkward_numpyarray_getitem_next_null(toptr, fromptr, len, stride, offset, pos); } template -Error awkward_numpyarray_getitem_next_at(T* nextcarryptr, const T* carryptr, int64_t lencarry, int64_t skip, int64_t at) { +ERROR awkward_numpyarray_getitem_next_at(T* nextcarryptr, const T* carryptr, int64_t lencarry, int64_t skip, int64_t at) { for (int64_t i = 0; i < lencarry; i++) { nextcarryptr[i] = skip*carryptr[i] + at; } return success(); } -Error awkward_numpyarray_getitem_next_at_64(int64_t* nextcarryptr, const int64_t* carryptr, int64_t lencarry, int64_t skip, int64_t at) { +ERROR awkward_numpyarray_getitem_next_at_64(int64_t* nextcarryptr, const int64_t* carryptr, int64_t lencarry, int64_t skip, int64_t at) { return awkward_numpyarray_getitem_next_at(nextcarryptr, carryptr, lencarry, skip, at); } template -Error awkward_numpyarray_getitem_next_range(T* nextcarryptr, const T* carryptr, int64_t lencarry, int64_t lenhead, int64_t skip, int64_t start, int64_t step) { +ERROR awkward_numpyarray_getitem_next_range(T* nextcarryptr, const T* carryptr, int64_t lencarry, int64_t lenhead, int64_t skip, int64_t start, int64_t step) { for (int64_t i = 0; i < lencarry; i++) { for (int64_t j = 0; j < lenhead; j++) { nextcarryptr[i*lenhead + j] = skip*carryptr[i] + start + j*step; @@ -166,12 +166,12 @@ Error awkward_numpyarray_getitem_next_range(T* nextcarryptr, const T* carryptr, } return success(); } -Error awkward_numpyarray_getitem_next_range_64(int64_t* nextcarryptr, const int64_t* carryptr, int64_t lencarry, int64_t lenhead, int64_t skip, int64_t start, int64_t step) { +ERROR awkward_numpyarray_getitem_next_range_64(int64_t* nextcarryptr, const int64_t* carryptr, int64_t lencarry, int64_t lenhead, int64_t skip, int64_t start, int64_t step) { return awkward_numpyarray_getitem_next_range(nextcarryptr, carryptr, lencarry, lenhead, skip, start, step); } template -Error awkward_numpyarray_getitem_next_range_advanced(T* nextcarryptr, T* nextadvancedptr, const T* carryptr, const T* advancedptr, int64_t lencarry, int64_t lenhead, int64_t skip, int64_t start, int64_t step) { +ERROR awkward_numpyarray_getitem_next_range_advanced(T* nextcarryptr, T* nextadvancedptr, const T* carryptr, const T* advancedptr, int64_t lencarry, int64_t lenhead, int64_t skip, int64_t start, int64_t step) { for (int64_t i = 0; i < lencarry; i++) { for (int64_t j = 0; j < lenhead; j++) { nextcarryptr[i*lenhead + j] = skip*carryptr[i] + start + j*step; @@ -180,12 +180,12 @@ Error awkward_numpyarray_getitem_next_range_advanced(T* nextcarryptr, T* nextadv } return success(); } -Error awkward_numpyarray_getitem_next_range_advanced_64(int64_t* nextcarryptr, int64_t* nextadvancedptr, const int64_t* carryptr, const int64_t* advancedptr, int64_t lencarry, int64_t lenhead, int64_t skip, int64_t start, int64_t step) { +ERROR awkward_numpyarray_getitem_next_range_advanced_64(int64_t* nextcarryptr, int64_t* nextadvancedptr, const int64_t* carryptr, const int64_t* advancedptr, int64_t lencarry, int64_t lenhead, int64_t skip, int64_t start, int64_t step) { return awkward_numpyarray_getitem_next_range_advanced(nextcarryptr, nextadvancedptr, carryptr, advancedptr, lencarry, lenhead, skip, start, step); } template -Error awkward_numpyarray_getitem_next_array(T* nextcarryptr, T* nextadvancedptr, const T* carryptr, const T* flatheadptr, int64_t lencarry, int64_t lenflathead, int64_t skip) { +ERROR awkward_numpyarray_getitem_next_array(T* nextcarryptr, T* nextadvancedptr, const T* carryptr, const T* flatheadptr, int64_t lencarry, int64_t lenflathead, int64_t skip) { for (int64_t i = 0; i < lencarry; i++) { for (int64_t j = 0; j < lenflathead; j++) { nextcarryptr[i*lenflathead + j] = skip*carryptr[i] + flatheadptr[j]; @@ -194,23 +194,23 @@ Error awkward_numpyarray_getitem_next_array(T* nextcarryptr, T* nextadvancedptr, } return success(); } -Error awkward_numpyarray_getitem_next_array_64(int64_t* nextcarryptr, int64_t* nextadvancedptr, const int64_t* carryptr, const int64_t* flatheadptr, int64_t lencarry, int64_t lenflathead, int64_t skip) { +ERROR awkward_numpyarray_getitem_next_array_64(int64_t* nextcarryptr, int64_t* nextadvancedptr, const int64_t* carryptr, const int64_t* flatheadptr, int64_t lencarry, int64_t lenflathead, int64_t skip) { return awkward_numpyarray_getitem_next_array(nextcarryptr, nextadvancedptr, carryptr, flatheadptr, lencarry, lenflathead, skip); } template -Error awkward_numpyarray_getitem_next_array_advanced(T* nextcarryptr, const T* carryptr, const T* advancedptr, const T* flatheadptr, int64_t lencarry, int64_t skip) { +ERROR awkward_numpyarray_getitem_next_array_advanced(T* nextcarryptr, const T* carryptr, const T* advancedptr, const T* flatheadptr, int64_t lencarry, int64_t skip) { for (int64_t i = 0; i < lencarry; i++) { nextcarryptr[i] = skip*carryptr[i] + flatheadptr[advancedptr[i]]; } return success(); } -Error awkward_numpyarray_getitem_next_array_advanced_64(int64_t* nextcarryptr, const int64_t* carryptr, const int64_t* advancedptr, const int64_t* flatheadptr, int64_t lencarry, int64_t skip) { +ERROR awkward_numpyarray_getitem_next_array_advanced_64(int64_t* nextcarryptr, const int64_t* carryptr, const int64_t* advancedptr, const int64_t* flatheadptr, int64_t lencarry, int64_t skip) { return awkward_numpyarray_getitem_next_array_advanced(nextcarryptr, carryptr, advancedptr, flatheadptr, lencarry, skip); } template -Error awkward_listarray_getitem_next_at(T* tocarry, const C* fromstarts, const C* fromstops, int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset, int64_t at) { +ERROR awkward_listarray_getitem_next_at(T* tocarry, const C* fromstarts, const C* fromstops, int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset, int64_t at) { for (int64_t i = 0; i < lenstarts; i++) { int64_t length = fromstops[stopsoffset + i] - fromstarts[startsoffset + i]; int64_t regular_at = at; @@ -224,15 +224,15 @@ Error awkward_listarray_getitem_next_at(T* tocarry, const C* fromstarts, const C } return success(); } -Error awkward_listarray32_getitem_next_at_64(int64_t* tocarry, const int32_t* fromstarts, const int32_t* fromstops, int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset, int64_t at) { +ERROR awkward_listarray32_getitem_next_at_64(int64_t* tocarry, const int32_t* fromstarts, const int32_t* fromstops, int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset, int64_t at) { return awkward_listarray_getitem_next_at(tocarry, fromstarts, fromstops, lenstarts, startsoffset, stopsoffset, at); } -Error awkward_listarray64_getitem_next_at_64(int64_t* tocarry, const int64_t* fromstarts, const int64_t* fromstops, int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset, int64_t at) { +ERROR awkward_listarray64_getitem_next_at_64(int64_t* tocarry, const int64_t* fromstarts, const int64_t* fromstops, int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset, int64_t at) { return awkward_listarray_getitem_next_at(tocarry, fromstarts, fromstops, lenstarts, startsoffset, stopsoffset, at); } template -Error awkward_listarray_getitem_next_range_carrylength(int64_t* carrylength, const C* fromstarts, const C* fromstops, int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset, int64_t start, int64_t stop, int64_t step) { +ERROR awkward_listarray_getitem_next_range_carrylength(int64_t* carrylength, const C* fromstarts, const C* fromstops, int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset, int64_t start, int64_t stop, int64_t step) { *carrylength = 0; for (int64_t i = 0; i < lenstarts; i++) { int64_t length = fromstops[stopsoffset + i] - fromstarts[startsoffset + i]; @@ -252,15 +252,15 @@ Error awkward_listarray_getitem_next_range_carrylength(int64_t* carrylength, con } return success(); } -Error awkward_listarray32_getitem_next_range_carrylength(int64_t* carrylength, const int32_t* fromstarts, const int32_t* fromstops, int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset, int64_t start, int64_t stop, int64_t step) { +ERROR awkward_listarray32_getitem_next_range_carrylength(int64_t* carrylength, const int32_t* fromstarts, const int32_t* fromstops, int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset, int64_t start, int64_t stop, int64_t step) { return awkward_listarray_getitem_next_range_carrylength(carrylength, fromstarts, fromstops, lenstarts, startsoffset, stopsoffset, start, stop, step); } -Error awkward_listarray64_getitem_next_range_carrylength(int64_t* carrylength, const int64_t* fromstarts, const int64_t* fromstops, int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset, int64_t start, int64_t stop, int64_t step) { +ERROR awkward_listarray64_getitem_next_range_carrylength(int64_t* carrylength, const int64_t* fromstarts, const int64_t* fromstops, int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset, int64_t start, int64_t stop, int64_t step) { return awkward_listarray_getitem_next_range_carrylength(carrylength, fromstarts, fromstops, lenstarts, startsoffset, stopsoffset, start, stop, step); } template -Error awkward_listarray_getitem_next_range(C* tooffsets, T* tocarry, const C* fromstarts, const C* fromstops, int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset, int64_t start, int64_t stop, int64_t step) { +ERROR awkward_listarray_getitem_next_range(C* tooffsets, T* tocarry, const C* fromstarts, const C* fromstops, int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset, int64_t start, int64_t stop, int64_t step) { int64_t k = 0; tooffsets[0] = 0; for (int64_t i = 0; i < lenstarts; i++) { @@ -284,30 +284,30 @@ Error awkward_listarray_getitem_next_range(C* tooffsets, T* tocarry, const C* fr } return success(); } -Error awkward_listarray32_getitem_next_range_64(int32_t* tooffsets, int64_t* tocarry, const int32_t* fromstarts, const int32_t* fromstops, int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset, int64_t start, int64_t stop, int64_t step) { +ERROR awkward_listarray32_getitem_next_range_64(int32_t* tooffsets, int64_t* tocarry, const int32_t* fromstarts, const int32_t* fromstops, int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset, int64_t start, int64_t stop, int64_t step) { return awkward_listarray_getitem_next_range(tooffsets, tocarry, fromstarts, fromstops, lenstarts, startsoffset, stopsoffset, start, stop, step); } -Error awkward_listarray64_getitem_next_range_64(int64_t* tooffsets, int64_t* tocarry, const int64_t* fromstarts, const int64_t* fromstops, int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset, int64_t start, int64_t stop, int64_t step) { +ERROR awkward_listarray64_getitem_next_range_64(int64_t* tooffsets, int64_t* tocarry, const int64_t* fromstarts, const int64_t* fromstops, int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset, int64_t start, int64_t stop, int64_t step) { return awkward_listarray_getitem_next_range(tooffsets, tocarry, fromstarts, fromstops, lenstarts, startsoffset, stopsoffset, start, stop, step); } template -Error awkward_listarray_getitem_next_range_counts(int64_t* total, const C* fromoffsets, int64_t lenstarts) { +ERROR awkward_listarray_getitem_next_range_counts(int64_t* total, const C* fromoffsets, int64_t lenstarts) { *total = 0; for (int64_t i = 0; i < lenstarts; i++) { *total = *total + fromoffsets[i + 1] - fromoffsets[i]; } return success(); } -Error awkward_listarray32_getitem_next_range_counts_64(int64_t* total, const int32_t* fromoffsets, int64_t lenstarts) { +ERROR awkward_listarray32_getitem_next_range_counts_64(int64_t* total, const int32_t* fromoffsets, int64_t lenstarts) { return awkward_listarray_getitem_next_range_counts(total, fromoffsets, lenstarts); } -Error awkward_listarray64_getitem_next_range_counts_64(int64_t* total, const int64_t* fromoffsets, int64_t lenstarts) { +ERROR awkward_listarray64_getitem_next_range_counts_64(int64_t* total, const int64_t* fromoffsets, int64_t lenstarts) { return awkward_listarray_getitem_next_range_counts(total, fromoffsets, lenstarts); } template -Error awkward_listarray_getitem_next_range_spreadadvanced(T* toadvanced, const T* fromadvanced, const C* fromoffsets, int64_t lenstarts) { +ERROR awkward_listarray_getitem_next_range_spreadadvanced(T* toadvanced, const T* fromadvanced, const C* fromoffsets, int64_t lenstarts) { for (int64_t i = 0; i < lenstarts; i++) { C count = fromoffsets[i + 1] - fromoffsets[i]; for (int64_t j = 0; j < count; j++) { @@ -316,15 +316,15 @@ Error awkward_listarray_getitem_next_range_spreadadvanced(T* toadvanced, const T } return success(); } -Error awkward_listarray32_getitem_next_range_spreadadvanced_64(int64_t* toadvanced, const int64_t* fromadvanced, const int32_t* fromoffsets, int64_t lenstarts) { +ERROR awkward_listarray32_getitem_next_range_spreadadvanced_64(int64_t* toadvanced, const int64_t* fromadvanced, const int32_t* fromoffsets, int64_t lenstarts) { return awkward_listarray_getitem_next_range_spreadadvanced(toadvanced, fromadvanced, fromoffsets, lenstarts); } -Error awkward_listarray64_getitem_next_range_spreadadvanced_64(int64_t* toadvanced, const int64_t* fromadvanced, const int64_t* fromoffsets, int64_t lenstarts) { +ERROR awkward_listarray64_getitem_next_range_spreadadvanced_64(int64_t* toadvanced, const int64_t* fromadvanced, const int64_t* fromoffsets, int64_t lenstarts) { return awkward_listarray_getitem_next_range_spreadadvanced(toadvanced, fromadvanced, fromoffsets, lenstarts); } template -Error awkward_listarray_getitem_next_array(C* tooffsets, T* tocarry, T* toadvanced, const C* fromstarts, const C* fromstops, const T* fromarray, int64_t startsoffset, int64_t stopsoffset, int64_t lenstarts, int64_t lenarray, int64_t lencontent) { +ERROR awkward_listarray_getitem_next_array(C* tooffsets, T* tocarry, T* toadvanced, const C* fromstarts, const C* fromstops, const T* fromarray, int64_t startsoffset, int64_t stopsoffset, int64_t lenstarts, int64_t lenarray, int64_t lencontent) { tooffsets[0] = 0; for (int64_t i = 0; i < lenstarts; i++) { if (fromstops[stopsoffset + i] < fromstarts[startsoffset + i]) { @@ -349,15 +349,15 @@ Error awkward_listarray_getitem_next_array(C* tooffsets, T* tocarry, T* toadvanc } return success(); } -Error awkward_listarray32_getitem_next_array_64(int32_t* tooffsets, int64_t* tocarry, int64_t* toadvanced, const int32_t* fromstarts, const int32_t* fromstops, const int64_t* fromarray, int64_t startsoffset, int64_t stopsoffset, int64_t lenstarts, int64_t lenarray, int64_t lencontent) { +ERROR awkward_listarray32_getitem_next_array_64(int32_t* tooffsets, int64_t* tocarry, int64_t* toadvanced, const int32_t* fromstarts, const int32_t* fromstops, const int64_t* fromarray, int64_t startsoffset, int64_t stopsoffset, int64_t lenstarts, int64_t lenarray, int64_t lencontent) { return awkward_listarray_getitem_next_array(tooffsets, tocarry, toadvanced, fromstarts, fromstops, fromarray, startsoffset, stopsoffset, lenstarts, lenarray, lencontent); } -Error awkward_listarray64_getitem_next_array_64(int64_t* tooffsets, int64_t* tocarry, int64_t* toadvanced, const int64_t* fromstarts, const int64_t* fromstops, const int64_t* fromarray, int64_t startsoffset, int64_t stopsoffset, int64_t lenstarts, int64_t lenarray, int64_t lencontent) { +ERROR awkward_listarray64_getitem_next_array_64(int64_t* tooffsets, int64_t* tocarry, int64_t* toadvanced, const int64_t* fromstarts, const int64_t* fromstops, const int64_t* fromarray, int64_t startsoffset, int64_t stopsoffset, int64_t lenstarts, int64_t lenarray, int64_t lencontent) { return awkward_listarray_getitem_next_array(tooffsets, tocarry, toadvanced, fromstarts, fromstops, fromarray, startsoffset, stopsoffset, lenstarts, lenarray, lencontent); } template -Error awkward_listarray_getitem_next_array_advanced(T* tocarry, T* toadvanced, const C* fromstarts, const C* fromstops, const T* fromarray, const T* fromadvanced, int64_t startsoffset, int64_t stopsoffset, int64_t lenstarts, int64_t lenarray, int64_t lencontent) { +ERROR awkward_listarray_getitem_next_array_advanced(T* tocarry, T* toadvanced, const C* fromstarts, const C* fromstops, const T* fromarray, const T* fromadvanced, int64_t startsoffset, int64_t stopsoffset, int64_t lenstarts, int64_t lenarray, int64_t lencontent) { for (int64_t i = 0; i < lenstarts; i++) { if (fromstops[stopsoffset + i] < fromstarts[startsoffset + i]) { return failure("stops[i] < starts[i]", i, kSliceNone); @@ -381,15 +381,15 @@ Error awkward_listarray_getitem_next_array_advanced(T* tocarry, T* toadvanced, c } return success(); } -Error awkward_listarray32_getitem_next_array_advanced_64(int64_t* tocarry, int64_t* toadvanced, const int32_t* fromstarts, const int32_t* fromstops, const int64_t* fromarray, const int64_t* fromadvanced, int64_t startsoffset, int64_t stopsoffset, int64_t lenstarts, int64_t lenarray, int64_t lencontent) { +ERROR awkward_listarray32_getitem_next_array_advanced_64(int64_t* tocarry, int64_t* toadvanced, const int32_t* fromstarts, const int32_t* fromstops, const int64_t* fromarray, const int64_t* fromadvanced, int64_t startsoffset, int64_t stopsoffset, int64_t lenstarts, int64_t lenarray, int64_t lencontent) { return awkward_listarray_getitem_next_array_advanced(tocarry, toadvanced, fromstarts, fromstops, fromarray, fromadvanced, startsoffset, stopsoffset, lenstarts, lenarray, lencontent); } -Error awkward_listarray64_getitem_next_array_advanced_64(int64_t* tocarry, int64_t* toadvanced, const int64_t* fromstarts, const int64_t* fromstops, const int64_t* fromarray, const int64_t* fromadvanced, int64_t startsoffset, int64_t stopsoffset, int64_t lenstarts, int64_t lenarray, int64_t lencontent) { +ERROR awkward_listarray64_getitem_next_array_advanced_64(int64_t* tocarry, int64_t* toadvanced, const int64_t* fromstarts, const int64_t* fromstops, const int64_t* fromarray, const int64_t* fromadvanced, int64_t startsoffset, int64_t stopsoffset, int64_t lenstarts, int64_t lenarray, int64_t lencontent) { return awkward_listarray_getitem_next_array_advanced(tocarry, toadvanced, fromstarts, fromstops, fromarray, fromadvanced, startsoffset, stopsoffset, lenstarts, lenarray, lencontent); } template -Error awkward_listarray_getitem_carry(C* tostarts, C* tostops, const C* fromstarts, const C* fromstops, const T* fromcarry, int64_t startsoffset, int64_t stopsoffset, int64_t lenstarts, int64_t lencarry) { +ERROR awkward_listarray_getitem_carry(C* tostarts, C* tostops, const C* fromstarts, const C* fromstops, const T* fromcarry, int64_t startsoffset, int64_t stopsoffset, int64_t lenstarts, int64_t lencarry) { for (int64_t i = 0; i < lencarry; i++) { if (fromcarry[i] >= lenstarts) { return failure("index out of range", i, fromcarry[i]); @@ -399,9 +399,9 @@ Error awkward_listarray_getitem_carry(C* tostarts, C* tostops, const C* fromstar } return success(); } -Error awkward_listarray32_getitem_carry_64(int32_t* tostarts, int32_t* tostops, const int32_t* fromstarts, const int32_t* fromstops, const int64_t* fromcarry, int64_t startsoffset, int64_t stopsoffset, int64_t lenstarts, int64_t lencarry) { +ERROR awkward_listarray32_getitem_carry_64(int32_t* tostarts, int32_t* tostops, const int32_t* fromstarts, const int32_t* fromstops, const int64_t* fromcarry, int64_t startsoffset, int64_t stopsoffset, int64_t lenstarts, int64_t lencarry) { return awkward_listarray_getitem_carry(tostarts, tostops, fromstarts, fromstops, fromcarry, startsoffset, stopsoffset, lenstarts, lencarry); } -Error awkward_listarray64_getitem_carry_64(int64_t* tostarts, int64_t* tostops, const int64_t* fromstarts, const int64_t* fromstops, const int64_t* fromcarry, int64_t startsoffset, int64_t stopsoffset, int64_t lenstarts, int64_t lencarry) { +ERROR awkward_listarray64_getitem_carry_64(int64_t* tostarts, int64_t* tostops, const int64_t* fromstarts, const int64_t* fromstops, const int64_t* fromcarry, int64_t startsoffset, int64_t stopsoffset, int64_t lenstarts, int64_t lencarry) { return awkward_listarray_getitem_carry(tostarts, tostops, fromstarts, fromstops, fromcarry, startsoffset, stopsoffset, lenstarts, lencarry); } diff --git a/src/cpu-kernels/identity.cpp b/src/cpu-kernels/identity.cpp index 753667efd3..a74c4e4ff2 100644 --- a/src/cpu-kernels/identity.cpp +++ b/src/cpu-kernels/identity.cpp @@ -3,20 +3,20 @@ #include "awkward/cpu-kernels/identity.h" template -Error awkward_new_identity(T* toptr, int64_t length) { +ERROR awkward_new_identity(T* toptr, int64_t length) { for (T i = 0; i < length; i++) { toptr[i] = i; } return success(); } -Error awkward_new_identity32(int32_t* toptr, int64_t length) { +ERROR awkward_new_identity32(int32_t* toptr, int64_t length) { return awkward_new_identity(toptr, length); } -Error awkward_new_identity64(int64_t* toptr, int64_t length) { +ERROR awkward_new_identity64(int64_t* toptr, int64_t length) { return awkward_new_identity(toptr, length); } -Error awkward_identity32_to_identity64(int64_t* toptr, const int32_t* fromptr, int64_t length) { +ERROR awkward_identity32_to_identity64(int64_t* toptr, const int32_t* fromptr, int64_t length) { for (int64_t i = 0; i < length; i++) { toptr[i]= (int64_t)fromptr[i]; } @@ -24,7 +24,7 @@ Error awkward_identity32_to_identity64(int64_t* toptr, const int32_t* fromptr, i } template -Error awkward_identity_from_listarray(ID* toptr, const ID* fromptr, const T* fromstarts, const T* fromstops, int64_t fromptroffset, int64_t startsoffset, int64_t stopsoffset, int64_t tolength, int64_t fromlength, int64_t fromwidth) { +ERROR awkward_identity_from_listarray(ID* toptr, const ID* fromptr, const T* fromstarts, const T* fromstops, int64_t fromptroffset, int64_t startsoffset, int64_t stopsoffset, int64_t tolength, int64_t fromlength, int64_t fromwidth) { for (int64_t k = 0; k < tolength*(fromwidth + 1); k++) { toptr[k] = -1; } @@ -43,12 +43,12 @@ Error awkward_identity_from_listarray(ID* toptr, const ID* fromptr, const T* fro } return success(); } -Error awkward_identity32_from_listarray32(int32_t* toptr, const int32_t* fromptr, const int32_t* fromstarts, const int32_t* fromstops, int64_t fromptroffset, int64_t startsoffset, int64_t stopsoffset, int64_t tolength, int64_t fromlength, int64_t fromwidth) { +ERROR awkward_identity32_from_listarray32(int32_t* toptr, const int32_t* fromptr, const int32_t* fromstarts, const int32_t* fromstops, int64_t fromptroffset, int64_t startsoffset, int64_t stopsoffset, int64_t tolength, int64_t fromlength, int64_t fromwidth) { return awkward_identity_from_listarray(toptr, fromptr, fromstarts, fromstops, fromptroffset, startsoffset, stopsoffset, tolength, fromlength, fromwidth); } -Error awkward_identity64_from_listarray32(int64_t* toptr, const int64_t* fromptr, const int32_t* fromstarts, const int32_t* fromstops, int64_t fromptroffset, int64_t startsoffset, int64_t stopsoffset, int64_t tolength, int64_t fromlength, int64_t fromwidth) { +ERROR awkward_identity64_from_listarray32(int64_t* toptr, const int64_t* fromptr, const int32_t* fromstarts, const int32_t* fromstops, int64_t fromptroffset, int64_t startsoffset, int64_t stopsoffset, int64_t tolength, int64_t fromlength, int64_t fromwidth) { return awkward_identity_from_listarray(toptr, fromptr, fromstarts, fromstops, fromptroffset, startsoffset, stopsoffset, tolength, fromlength, fromwidth); } -Error awkward_identity64_from_listarray64(int64_t* toptr, const int64_t* fromptr, const int64_t* fromstarts, const int64_t* fromstops, int64_t fromptroffset, int64_t startsoffset, int64_t stopsoffset, int64_t tolength, int64_t fromlength, int64_t fromwidth) { +ERROR awkward_identity64_from_listarray64(int64_t* toptr, const int64_t* fromptr, const int64_t* fromstarts, const int64_t* fromstops, int64_t fromptroffset, int64_t startsoffset, int64_t stopsoffset, int64_t tolength, int64_t fromlength, int64_t fromwidth) { return awkward_identity_from_listarray(toptr, fromptr, fromstarts, fromstops, fromptroffset, startsoffset, stopsoffset, tolength, fromlength, fromwidth); } diff --git a/src/cpu-kernels/util.cpp b/src/cpu-kernels/util.cpp index 4bf2b265cd..70cdb3bc6e 100644 --- a/src/cpu-kernels/util.cpp +++ b/src/cpu-kernels/util.cpp @@ -4,8 +4,8 @@ #include "awkward/cpu-kernels/util.h" -Error success() { - Error out; +struct Error success() { + struct Error out; out.str = nullptr; out.location = kSliceNone; out.attempt = kSliceNone; @@ -13,8 +13,8 @@ Error success() { return out; } -Error failure(const char* str, int64_t location, int64_t attempt) { - Error out; +struct Error failure(const char* str, int64_t location, int64_t attempt) { + struct Error out; out.str = str; out.location = location; out.attempt = attempt; diff --git a/src/libawkward/Content.cpp b/src/libawkward/Content.cpp index f3d5640366..89a13827fd 100644 --- a/src/libawkward/Content.cpp +++ b/src/libawkward/Content.cpp @@ -9,6 +9,38 @@ namespace awkward { return tostring_part("", "", ""); } + const std::string Content::tojson(bool pretty, int64_t maxdecimals) const { + if (pretty) { + ToJsonPrettyString builder(maxdecimals); + builder.beginlist(); + tojson_part(builder); + builder.endlist(); + return builder.tostring(); + } + else { + ToJsonString builder(maxdecimals); + builder.beginlist(); + tojson_part(builder); + builder.endlist(); + return builder.tostring(); + } + } + + void Content::tojson(FILE* destination, bool pretty, int64_t maxdecimals, int64_t buffersize) const { + if (pretty) { + ToJsonPrettyFile builder(destination, maxdecimals, buffersize); + builder.beginlist(); + tojson_part(builder); + builder.endlist(); + } + else { + ToJsonFile builder(destination, maxdecimals, buffersize); + builder.beginlist(); + tojson_part(builder); + builder.endlist(); + } + } + const std::shared_ptr Content::getitem(const Slice& where) const { Index64 nextstarts(1); Index64 nextstops(1); diff --git a/src/libawkward/Identity.cpp b/src/libawkward/Identity.cpp index 230cb3955c..ce21dd315c 100644 --- a/src/libawkward/Identity.cpp +++ b/src/libawkward/Identity.cpp @@ -105,7 +105,7 @@ namespace awkward { std::shared_ptr out(rawout); if (std::is_same::value) { - Error err = awkward_identity32_getitem_carry_64( + struct Error err = awkward_identity32_getitem_carry_64( reinterpret_cast(rawout->ptr().get()), reinterpret_cast(ptr_.get()), carry.ptr().get(), @@ -116,7 +116,7 @@ namespace awkward { util::handle_error(err, classname(), nullptr); } else if (std::is_same::value) { - Error err = awkward_identity64_getitem_carry_64( + struct Error err = awkward_identity64_getitem_carry_64( reinterpret_cast(rawout->ptr().get()), reinterpret_cast(ptr_.get()), carry.ptr().get(), diff --git a/src/libawkward/Index.cpp b/src/libawkward/Index.cpp index b98af6bc76..0c8d10600e 100644 --- a/src/libawkward/Index.cpp +++ b/src/libawkward/Index.cpp @@ -80,6 +80,12 @@ namespace awkward { return ptr_.get()[(size_t)(offset_ + at)]; } + template + void IndexOf::setitem_at_unsafe(int64_t at, T value) const { + assert(0 <= at && at < length_); + ptr_.get()[(size_t)(offset_ + at)] = value; + } + template IndexOf IndexOf::getitem_range(int64_t start, int64_t stop) const { int64_t regular_start = start; diff --git a/src/libawkward/Slice.cpp b/src/libawkward/Slice.cpp index 1448c06545..9754b34e31 100644 --- a/src/libawkward/Slice.cpp +++ b/src/libawkward/Slice.cpp @@ -171,6 +171,27 @@ namespace awkward { items_.push_back(item); } + void Slice::append(const SliceAt& item) { + items_.push_back(item.shallow_copy()); + } + + void Slice::append(const SliceRange& item) { + items_.push_back(item.shallow_copy()); + } + + void Slice::append(const SliceEllipsis& item) { + items_.push_back(item.shallow_copy()); + } + + void Slice::append(const SliceNewAxis& item) { + items_.push_back(item.shallow_copy()); + } + + template + void Slice::append(const SliceArrayOf& item) { + items_.push_back(item.shallow_copy()); + } + void Slice::become_sealed() { assert(!sealed_); diff --git a/src/libawkward/array/ListArray.cpp b/src/libawkward/array/ListArray.cpp index 95530882da..445cb83689 100644 --- a/src/libawkward/array/ListArray.cpp +++ b/src/libawkward/array/ListArray.cpp @@ -40,7 +40,7 @@ namespace awkward { if (Identity32* rawid = dynamic_cast(bigid.get())) { Identity32* rawsubid = new Identity32(Identity::newref(), rawid->fieldloc(), rawid->width() + 1, content_.get()->length()); std::shared_ptr subid(rawsubid); - Error err = awkward_identity32_from_listarray32( + struct Error err = awkward_identity32_from_listarray32( rawsubid->ptr().get(), rawid->ptr().get(), starts_.ptr().get(), @@ -57,7 +57,7 @@ namespace awkward { else if (Identity64* rawid = dynamic_cast(bigid.get())) { Identity64* rawsubid = new Identity64(Identity::newref(), rawid->fieldloc(), rawid->width() + 1, content_.get()->length()); std::shared_ptr subid(rawsubid); - Error err = awkward_identity64_from_listarray32( + struct Error err = awkward_identity64_from_listarray32( rawsubid->ptr().get(), rawid->ptr().get(), starts_.ptr().get(), @@ -91,7 +91,7 @@ namespace awkward { if (Identity64* rawid = dynamic_cast(bigid.get())) { Identity64* rawsubid = new Identity64(Identity::newref(), rawid->fieldloc(), rawid->width() + 1, content_.get()->length()); std::shared_ptr subid(rawsubid); - Error err = awkward_identity64_from_listarray64( + struct Error err = awkward_identity64_from_listarray64( rawsubid->ptr().get(), rawid->ptr().get(), starts_.ptr().get(), @@ -117,14 +117,14 @@ namespace awkward { if (length() <= kMaxInt32) { Identity32* rawid = new Identity32(Identity::newref(), Identity::FieldLoc(), 1, length()); std::shared_ptr newid(rawid); - Error err = awkward_new_identity32(rawid->ptr().get(), length()); + struct Error err = awkward_new_identity32(rawid->ptr().get(), length()); util::handle_error(err, classname(), id_.get()); setid(newid); } else { Identity64* rawid = new Identity64(Identity::newref(), Identity::FieldLoc(), 1, length()); std::shared_ptr newid(rawid); - Error err = awkward_new_identity64(rawid->ptr().get(), length()); + struct Error err = awkward_new_identity64(rawid->ptr().get(), length()); util::handle_error(err, classname(), id_.get()); setid(newid); } @@ -144,6 +144,15 @@ namespace awkward { return out.str(); } + template + void ListArrayOf::tojson_part(ToJson& builder) const { + for (int64_t i = 0; i < length(); i++) { + builder.beginlist(); + getitem_at_unsafe(i).get()->tojson_part(builder); + builder.endlist(); + } + } + template int64_t ListArrayOf::length() const { return starts_.length(); @@ -232,7 +241,7 @@ namespace awkward { std::shared_ptr nexthead = tail.head(); Slice nexttail = tail.tail(); Index64 nextcarry(lenstarts); - Error err = awkward_listarray32_getitem_next_at_64( + struct Error err = awkward_listarray32_getitem_next_at_64( nextcarry.ptr().get(), starts_.ptr().get(), stops_.ptr().get(), @@ -255,7 +264,7 @@ namespace awkward { step = 1; } int64_t carrylength; - Error err1 = awkward_listarray32_getitem_next_range_carrylength( + struct Error err1 = awkward_listarray32_getitem_next_range_carrylength( &carrylength, starts_.ptr().get(), stops_.ptr().get(), @@ -270,7 +279,7 @@ namespace awkward { Index32 nextoffsets(lenstarts + 1); Index64 nextcarry(carrylength); - Error err2 = awkward_listarray32_getitem_next_range_64( + struct Error err2 = awkward_listarray32_getitem_next_range_64( nextoffsets.ptr().get(), nextcarry.ptr().get(), starts_.ptr().get(), @@ -289,13 +298,13 @@ namespace awkward { } else { int64_t total; - Error err1 = awkward_listarray32_getitem_next_range_counts_64( + struct Error err1 = awkward_listarray32_getitem_next_range_counts_64( &total, nextoffsets.ptr().get(), lenstarts); util::handle_error(err1, classname(), id_.get()); Index64 nextadvanced(total); - Error err2 = awkward_listarray32_getitem_next_range_spreadadvanced_64( + struct Error err2 = awkward_listarray32_getitem_next_range_spreadadvanced_64( nextadvanced.ptr().get(), advanced.ptr().get(), nextoffsets.ptr().get(), @@ -321,7 +330,7 @@ namespace awkward { Index64 nextcarry(lenstarts*flathead.length()); Index64 nextadvanced(lenstarts*flathead.length()); Index32 nextoffsets(lenstarts + 1); - Error err = awkward_listarray32_getitem_next_array_64( + struct Error err = awkward_listarray32_getitem_next_array_64( nextoffsets.ptr().get(), nextcarry.ptr().get(), nextadvanced.ptr().get(), @@ -341,7 +350,7 @@ namespace awkward { else { Index64 nextcarry(lenstarts); Index64 nextadvanced(lenstarts); - Error err = awkward_listarray32_getitem_next_array_advanced_64( + struct Error err = awkward_listarray32_getitem_next_array_advanced_64( nextcarry.ptr().get(), nextadvanced.ptr().get(), starts_.ptr().get(), @@ -380,7 +389,7 @@ namespace awkward { std::shared_ptr nexthead = tail.head(); Slice nexttail = tail.tail(); Index64 nextcarry(lenstarts); - Error err = awkward_listarray64_getitem_next_at_64( + struct Error err = awkward_listarray64_getitem_next_at_64( nextcarry.ptr().get(), starts_.ptr().get(), stops_.ptr().get(), @@ -403,7 +412,7 @@ namespace awkward { step = 1; } int64_t carrylength; - Error err1 = awkward_listarray64_getitem_next_range_carrylength( + struct Error err1 = awkward_listarray64_getitem_next_range_carrylength( &carrylength, starts_.ptr().get(), stops_.ptr().get(), @@ -418,7 +427,7 @@ namespace awkward { Index64 nextoffsets(lenstarts + 1); Index64 nextcarry(carrylength); - Error err2 = awkward_listarray64_getitem_next_range_64( + struct Error err2 = awkward_listarray64_getitem_next_range_64( nextoffsets.ptr().get(), nextcarry.ptr().get(), starts_.ptr().get(), @@ -437,13 +446,13 @@ namespace awkward { } else { int64_t total; - Error err1 = awkward_listarray64_getitem_next_range_counts_64( + struct Error err1 = awkward_listarray64_getitem_next_range_counts_64( &total, nextoffsets.ptr().get(), lenstarts); util::handle_error(err1, classname(), id_.get()); Index64 nextadvanced(total); - Error err2 = awkward_listarray64_getitem_next_range_spreadadvanced_64( + struct Error err2 = awkward_listarray64_getitem_next_range_spreadadvanced_64( nextadvanced.ptr().get(), advanced.ptr().get(), nextoffsets.ptr().get(), @@ -469,7 +478,7 @@ namespace awkward { Index64 nextcarry(lenstarts*flathead.length()); Index64 nextadvanced(lenstarts*flathead.length()); Index64 nextoffsets(lenstarts + 1); - Error err = awkward_listarray64_getitem_next_array_64( + struct Error err = awkward_listarray64_getitem_next_array_64( nextoffsets.ptr().get(), nextcarry.ptr().get(), nextadvanced.ptr().get(), @@ -489,7 +498,7 @@ namespace awkward { else { Index64 nextcarry(lenstarts); Index64 nextadvanced(lenstarts); - Error err = awkward_listarray64_getitem_next_array_advanced_64( + struct Error err = awkward_listarray64_getitem_next_array_advanced_64( nextcarry.ptr().get(), nextadvanced.ptr().get(), starts_.ptr().get(), @@ -520,7 +529,7 @@ namespace awkward { } Index32 nextstarts(carry.length()); Index32 nextstops(carry.length()); - Error err = awkward_listarray32_getitem_carry_64( + struct Error err = awkward_listarray32_getitem_carry_64( nextstarts.ptr().get(), nextstops.ptr().get(), starts_.ptr().get(), @@ -546,7 +555,7 @@ namespace awkward { } Index64 nextstarts(carry.length()); Index64 nextstops(carry.length()); - Error err = awkward_listarray64_getitem_carry_64( + struct Error err = awkward_listarray64_getitem_carry_64( nextstarts.ptr().get(), nextstops.ptr().get(), starts_.ptr().get(), diff --git a/src/libawkward/array/ListOffsetArray.cpp b/src/libawkward/array/ListOffsetArray.cpp index a2697fe199..a1d818ba3b 100644 --- a/src/libawkward/array/ListOffsetArray.cpp +++ b/src/libawkward/array/ListOffsetArray.cpp @@ -52,7 +52,7 @@ namespace awkward { if (Identity32* rawid = dynamic_cast(bigid.get())) { Identity32* rawsubid = new Identity32(Identity::newref(), rawid->fieldloc(), rawid->width() + 1, content_.get()->length()); std::shared_ptr subid(rawsubid); - Error err = awkward_identity32_from_listarray32( + struct Error err = awkward_identity32_from_listarray32( rawsubid->ptr().get(), rawid->ptr().get(), starts.ptr().get(), @@ -69,7 +69,7 @@ namespace awkward { else if (Identity64* rawid = dynamic_cast(bigid.get())) { Identity64* rawsubid = new Identity64(Identity::newref(), rawid->fieldloc(), rawid->width() + 1, content_.get()->length()); std::shared_ptr subid(rawsubid); - Error err = awkward_identity64_from_listarray32( + struct Error err = awkward_identity64_from_listarray32( rawsubid->ptr().get(), rawid->ptr().get(), starts.ptr().get(), @@ -105,7 +105,7 @@ namespace awkward { if (Identity64* rawid = dynamic_cast(bigid.get())) { Identity64* rawsubid = new Identity64(Identity::newref(), rawid->fieldloc(), rawid->width() + 1, content_.get()->length()); std::shared_ptr subid(rawsubid); - Error err = awkward_identity64_from_listarray64( + struct Error err = awkward_identity64_from_listarray64( rawsubid->ptr().get(), rawid->ptr().get(), starts.ptr().get(), @@ -131,14 +131,14 @@ namespace awkward { if (length() <= kMaxInt32) { Identity32* rawid = new Identity32(Identity::newref(), Identity::FieldLoc(), 1, length()); std::shared_ptr newid(rawid); - Error err = awkward_new_identity32(rawid->ptr().get(), length()); + struct Error err = awkward_new_identity32(rawid->ptr().get(), length()); util::handle_error(err, classname(), id_.get()); setid(newid); } else { Identity64* rawid = new Identity64(Identity::newref(), Identity::FieldLoc(), 1, length()); std::shared_ptr newid(rawid); - Error err = awkward_new_identity64(rawid->ptr().get(), length()); + struct Error err = awkward_new_identity64(rawid->ptr().get(), length()); util::handle_error(err, classname(), id_.get()); setid(newid); } @@ -157,6 +157,15 @@ namespace awkward { return out.str(); } + template + void ListOffsetArrayOf::tojson_part(ToJson& builder) const { + for (int64_t i = 0; i < length(); i++) { + builder.beginlist(); + getitem_at_unsafe(i).get()->tojson_part(builder); + builder.endlist(); + } + } + template int64_t ListOffsetArrayOf::length() const { return offsets_.length() - 1; @@ -235,7 +244,7 @@ namespace awkward { std::shared_ptr nexthead = tail.head(); Slice nexttail = tail.tail(); Index64 nextcarry(lenstarts); - Error err = awkward_listarray32_getitem_next_at_64( + struct Error err = awkward_listarray32_getitem_next_at_64( nextcarry.ptr().get(), starts.ptr().get(), stops.ptr().get(), @@ -260,7 +269,7 @@ namespace awkward { step = 1; } int64_t carrylength; - Error err1 = awkward_listarray32_getitem_next_range_carrylength( + struct Error err1 = awkward_listarray32_getitem_next_range_carrylength( &carrylength, starts.ptr().get(), stops.ptr().get(), @@ -275,7 +284,7 @@ namespace awkward { Index32 nextoffsets(lenstarts + 1); Index64 nextcarry(carrylength); - Error err2 = awkward_listarray32_getitem_next_range_64( + struct Error err2 = awkward_listarray32_getitem_next_range_64( nextoffsets.ptr().get(), nextcarry.ptr().get(), starts.ptr().get(), @@ -294,13 +303,13 @@ namespace awkward { } else { int64_t total; - Error err1 = awkward_listarray32_getitem_next_range_counts_64( + struct Error err1 = awkward_listarray32_getitem_next_range_counts_64( &total, nextoffsets.ptr().get(), lenstarts); util::handle_error(err1, classname(), id_.get()); Index64 nextadvanced(total); - Error err2 = awkward_listarray32_getitem_next_range_spreadadvanced_64( + struct Error err2 = awkward_listarray32_getitem_next_range_spreadadvanced_64( nextadvanced.ptr().get(), advanced.ptr().get(), nextoffsets.ptr().get(), @@ -329,7 +338,7 @@ namespace awkward { Index64 nextadvanced(lenstarts*flathead.length()); Index32 nextoffsets(lenstarts + 1); Index32 nextstops(lenstarts); - Error err = awkward_listarray32_getitem_next_array_64( + struct Error err = awkward_listarray32_getitem_next_array_64( nextoffsets.ptr().get(), nextcarry.ptr().get(), nextadvanced.ptr().get(), @@ -349,7 +358,7 @@ namespace awkward { else { Index64 nextcarry(lenstarts); Index64 nextadvanced(lenstarts); - Error err = awkward_listarray32_getitem_next_array_advanced_64( + struct Error err = awkward_listarray32_getitem_next_array_advanced_64( nextcarry.ptr().get(), nextadvanced.ptr().get(), starts.ptr().get(), @@ -387,7 +396,7 @@ namespace awkward { std::shared_ptr nexthead = tail.head(); Slice nexttail = tail.tail(); Index64 nextcarry(lenstarts); - Error err = awkward_listarray64_getitem_next_at_64( + struct Error err = awkward_listarray64_getitem_next_at_64( nextcarry.ptr().get(), starts.ptr().get(), stops.ptr().get(), @@ -412,7 +421,7 @@ namespace awkward { step = 1; } int64_t carrylength; - Error err1 = awkward_listarray64_getitem_next_range_carrylength( + struct Error err1 = awkward_listarray64_getitem_next_range_carrylength( &carrylength, starts.ptr().get(), stops.ptr().get(), @@ -427,7 +436,7 @@ namespace awkward { Index64 nextoffsets(lenstarts + 1); Index64 nextcarry(carrylength); - Error err2 = awkward_listarray64_getitem_next_range_64( + struct Error err2 = awkward_listarray64_getitem_next_range_64( nextoffsets.ptr().get(), nextcarry.ptr().get(), starts.ptr().get(), @@ -446,13 +455,13 @@ namespace awkward { } else { int64_t total; - Error err1 = awkward_listarray64_getitem_next_range_counts_64( + struct Error err1 = awkward_listarray64_getitem_next_range_counts_64( &total, nextoffsets.ptr().get(), lenstarts); util::handle_error(err1, classname(), id_.get()); Index64 nextadvanced(total); - Error err2 = awkward_listarray64_getitem_next_range_spreadadvanced_64( + struct Error err2 = awkward_listarray64_getitem_next_range_spreadadvanced_64( nextadvanced.ptr().get(), advanced.ptr().get(), nextoffsets.ptr().get(), @@ -481,7 +490,7 @@ namespace awkward { Index64 nextadvanced(lenstarts*flathead.length()); Index64 nextoffsets(lenstarts + 1); Index64 nextstops(lenstarts); - Error err = awkward_listarray64_getitem_next_array_64( + struct Error err = awkward_listarray64_getitem_next_array_64( nextoffsets.ptr().get(), nextcarry.ptr().get(), nextadvanced.ptr().get(), @@ -501,7 +510,7 @@ namespace awkward { else { Index64 nextcarry(lenstarts); Index64 nextadvanced(lenstarts); - Error err = awkward_listarray64_getitem_next_array_advanced_64( + struct Error err = awkward_listarray64_getitem_next_array_advanced_64( nextcarry.ptr().get(), nextadvanced.ptr().get(), starts.ptr().get(), @@ -530,7 +539,7 @@ namespace awkward { Index32 stops = make_stops(offsets_); Index32 nextstarts(carry.length()); Index32 nextstops(carry.length()); - Error err = awkward_listarray32_getitem_carry_64( + struct Error err = awkward_listarray32_getitem_carry_64( nextstarts.ptr().get(), nextstops.ptr().get(), starts.ptr().get(), @@ -554,7 +563,7 @@ namespace awkward { Index64 stops = make_stops(offsets_); Index64 nextstarts(carry.length()); Index64 nextstops(carry.length()); - Error err = awkward_listarray64_getitem_carry_64( + struct Error err = awkward_listarray64_getitem_carry_64( nextstarts.ptr().get(), nextstops.ptr().get(), starts.ptr().get(), diff --git a/src/libawkward/array/NumpyArray.cpp b/src/libawkward/array/NumpyArray.cpp index f42a758a8e..b68b40fe97 100644 --- a/src/libawkward/array/NumpyArray.cpp +++ b/src/libawkward/array/NumpyArray.cpp @@ -29,6 +29,10 @@ namespace awkward { return reinterpret_cast(reinterpret_cast(ptr_.get()) + byteoffset_); } + void* NumpyArray::byteptr(ssize_t at) const { + return reinterpret_cast(reinterpret_cast(ptr_.get()) + byteoffset_ + at); + } + ssize_t NumpyArray::bytelength() const { if (isscalar()) { return itemsize_; @@ -56,14 +60,14 @@ namespace awkward { if (length() <= kMaxInt32) { Identity32* rawid = new Identity32(Identity::newref(), Identity::FieldLoc(), 1, length()); std::shared_ptr newid(rawid); - Error err = awkward_new_identity32(rawid->ptr().get(), length()); + struct Error err = awkward_new_identity32(rawid->ptr().get(), length()); util::handle_error(err, classname(), id_.get()); setid(newid); } else { Identity64* rawid = new Identity64(Identity::newref(), Identity::FieldLoc(), 1, length()); std::shared_ptr newid(rawid); - Error err = awkward_new_identity64(rawid->ptr().get(), length()); + struct Error err = awkward_new_identity64(rawid->ptr().get(), length()); util::handle_error(err, classname(), id_.get()); setid(newid); } @@ -177,6 +181,87 @@ namespace awkward { return out.str(); } + void tojson_boolean(ToJson& builder, bool* array, int64_t length) { + for (int i = 0; i < length; i++) { + builder.boolean(array[i]); + } + } + + template + void tojson_integer(ToJson& builder, T* array, int64_t length) { + for (int i = 0; i < length; i++) { + builder.integer(array[i]); + } + } + + template + void tojson_real(ToJson& builder, T* array, int64_t length) { + for (int i = 0; i < length; i++) { + builder.real(array[i]); + } + } + + void NumpyArray::tojson_part(ToJson& builder) const { + if (ndim() == 1) { + if (format_.compare("d") == 0) { + tojson_real(builder, reinterpret_cast(byteptr()), length()); + } + else if (format_.compare("f") == 0) { + tojson_real(builder, reinterpret_cast(byteptr()), length()); + } +#ifdef _MSC_VER + else if (format_.compare("q") == 0) { +#else + else if (format_.compare("l") == 0) { +#endif + tojson_integer(builder, reinterpret_cast(byteptr()), length()); + } +#ifdef _MSC_VER + else if (format_.compare("Q") == 0) { +#else + else if (format_.compare("L") == 0) { +#endif + tojson_integer(builder, reinterpret_cast(byteptr()), length()); + } +#ifdef _MSC_VER + else if (format_.compare("l") == 0) { +#else + else if (format_.compare("i") == 0) { +#endif + tojson_integer(builder, reinterpret_cast(byteptr()), length()); + } +#ifdef _MSC_VER + else if (format_.compare("L") == 0) { +#else + else if (format_.compare("I") == 0) { +#endif + tojson_integer(builder, reinterpret_cast(byteptr()), length()); + } + else if (format_.compare("h") == 0) { + tojson_real(builder, reinterpret_cast(byteptr()), length()); + } + else if (format_.compare("H") == 0) { + tojson_real(builder, reinterpret_cast(byteptr()), length()); + } + else if (format_.compare("b") == 0) { + tojson_real(builder, reinterpret_cast(byteptr()), length()); + } + else if (format_.compare("B") == 0 || format_.compare("c") == 0) { + tojson_real(builder, reinterpret_cast(byteptr()), length()); + } + else { + throw std::invalid_argument(std::string("cannot convert Numpy format \"") + format_ + std::string("\" into JSON")); + } + } + else { + for (int64_t i = 0; i < length(); i++) { + builder.beginlist(); + getitem_at_unsafe(i).get()->tojson_part(builder); + builder.endlist(); + } + } + } + int64_t NumpyArray::length() const { if (isscalar()) { return -1; @@ -289,7 +374,7 @@ namespace awkward { const std::shared_ptr NumpyArray::getitem_next(const std::shared_ptr head, const Slice& tail, const Index64& advanced) const { assert(!isscalar()); Index64 carry(shape_[0]); - Error err = awkward_carry_arange_64(carry.ptr().get(), shape_[0]); + struct Error err = awkward_carry_arange_64(carry.ptr().get(), shape_[0]); util::handle_error(err, classname(), id_.get()); return getitem_next(head, tail, carry, advanced, shape_[0], strides_[0], false).shallow_copy(); } @@ -298,7 +383,7 @@ namespace awkward { assert(!isscalar()); std::shared_ptr ptr(new uint8_t[(size_t)(carry.length()*strides_[0])], awkward::util::array_deleter()); - Error err = awkward_numpyarray_getitem_next_null_64( + struct Error err = awkward_numpyarray_getitem_next_null_64( reinterpret_cast(ptr.get()), reinterpret_cast(ptr_.get()), carry.length(), @@ -367,7 +452,7 @@ namespace awkward { } else { Index64 bytepos(shape_[0]); - Error err = awkward_numpyarray_contiguous_init_64(bytepos.ptr().get(), shape_[0], strides_[0]); + struct Error err = awkward_numpyarray_contiguous_init_64(bytepos.ptr().get(), shape_[0], strides_[0]); util::handle_error(err, classname(), id_.get()); return contiguous_next(bytepos); } @@ -376,7 +461,7 @@ namespace awkward { const NumpyArray NumpyArray::contiguous_next(Index64 bytepos) const { if (iscontiguous()) { std::shared_ptr ptr(new uint8_t[(size_t)(bytepos.length()*strides_[0])], awkward::util::array_deleter()); - Error err = awkward_numpyarray_contiguous_copy_64( + struct Error err = awkward_numpyarray_contiguous_copy_64( reinterpret_cast(ptr.get()), reinterpret_cast(ptr_.get()), bytepos.length(), @@ -389,7 +474,7 @@ namespace awkward { else if (shape_.size() == 1) { std::shared_ptr ptr(new uint8_t[(size_t)(bytepos.length()*itemsize_)], awkward::util::array_deleter()); - Error err = awkward_numpyarray_contiguous_copy_64( + struct Error err = awkward_numpyarray_contiguous_copy_64( reinterpret_cast(ptr.get()), reinterpret_cast(ptr_.get()), bytepos.length(), @@ -405,7 +490,7 @@ namespace awkward { NumpyArray next(id_, ptr_, flatten_shape(shape_), flatten_strides(strides_), byteoffset_, itemsize_, format_); Index64 nextbytepos(bytepos.length()*shape_[1]); - Error err = awkward_numpyarray_contiguous_next_64( + struct Error err = awkward_numpyarray_contiguous_next_64( nextbytepos.ptr().get(), bytepos.ptr().get(), bytepos.length(), @@ -522,7 +607,7 @@ namespace awkward { const NumpyArray NumpyArray::getitem_next(const std::shared_ptr head, const Slice& tail, const Index64& carry, const Index64& advanced, int64_t length, int64_t stride, bool first) const { if (head.get() == nullptr) { std::shared_ptr ptr(new uint8_t[(size_t)(carry.length()*stride)], awkward::util::array_deleter()); - Error err = awkward_numpyarray_getitem_next_null_64( + struct Error err = awkward_numpyarray_getitem_next_null_64( reinterpret_cast(ptr.get()), reinterpret_cast(ptr_.get()), carry.length(), @@ -564,7 +649,7 @@ namespace awkward { } Index64 nextcarry(carry.length()); - Error err = awkward_numpyarray_getitem_next_at_64( + struct Error err = awkward_numpyarray_getitem_next_at_64( nextcarry.ptr().get(), carry.ptr().get(), carry.length(), @@ -604,7 +689,7 @@ namespace awkward { if (advanced.length() == 0) { Index64 nextcarry(carry.length()*lenhead); - Error err = awkward_numpyarray_getitem_next_range_64( + struct Error err = awkward_numpyarray_getitem_next_range_64( nextcarry.ptr().get(), carry.ptr().get(), carry.length(), @@ -625,7 +710,7 @@ namespace awkward { else { Index64 nextcarry(carry.length()*lenhead); Index64 nextadvanced(carry.length()*lenhead); - Error err = awkward_numpyarray_getitem_next_range_advanced_64( + struct Error err = awkward_numpyarray_getitem_next_range_advanced_64( nextcarry.ptr().get(), nextadvanced.ptr().get(), carry.ptr().get(), @@ -688,7 +773,7 @@ namespace awkward { Slice nexttail = tail.tail(); Index64 flathead = array->ravel(); - Error err = awkward_regularize_arrayslice_64( + struct Error err = awkward_regularize_arrayslice_64( flathead.ptr().get(), flathead.length(), shape_[1]); @@ -697,7 +782,7 @@ namespace awkward { if (advanced.length() == 0) { Index64 nextcarry(carry.length()*flathead.length()); Index64 nextadvanced(carry.length()*flathead.length()); - Error err = awkward_numpyarray_getitem_next_array_64( + struct Error err = awkward_numpyarray_getitem_next_array_64( nextcarry.ptr().get(), nextadvanced.ptr().get(), carry.ptr().get(), @@ -725,7 +810,7 @@ namespace awkward { else { Index64 nextcarry(carry.length()); - Error err = awkward_numpyarray_getitem_next_array_advanced_64( + struct Error err = awkward_numpyarray_getitem_next_array_advanced_64( nextcarry.ptr().get(), carry.ptr().get(), advanced.ptr().get(), diff --git a/src/libawkward/io/json.cpp b/src/libawkward/io/json.cpp new file mode 100644 index 0000000000..f2543ca2b2 --- /dev/null +++ b/src/libawkward/io/json.cpp @@ -0,0 +1,83 @@ +// BSD 3-Clause License; see https://github.com/jpivarski/awkward-1.0/blob/master/LICENSE + +#include "awkward/fillable/FillableArray.h" +#include "awkward/Content.h" + +#include "awkward/io/json.h" + +namespace awkward { + class Handler: public rj::BaseReaderHandler, Handler> { + public: + Handler(const FillableOptions& options): array_(options), depth_(0) { } + + const std::shared_ptr snapshot() const { + return array_.snapshot(); + } + + bool Null() { array_.null(); return true; } + bool Bool(bool x) { array_.boolean(x); return true; } + bool Int(int x) { array_.integer((int64_t)x); return true; } + bool Uint(unsigned int x) { array_.integer((int64_t)x); return true; } + bool Int64(int64_t x) { array_.integer(x); return true; } + bool Uint64(uint64_t x) { array_.integer((int64_t)x); return true; } + bool Double(double x) { array_.real(x); return true; } + + bool StartArray() { + if (depth_ != 0) { + array_.beginlist(); + } + depth_++; + return true; + } + bool EndArray(rj::SizeType numfields) { + depth_--; + if (depth_ != 0) { + array_.endlist(); + } + return true; + } + + bool StartObject() { + throw std::runtime_error("not implemented: Handler::StartObject"); + } + bool EndObject(rj::SizeType numfields) { + throw std::runtime_error("not implemented: Handler::EndObject"); + } + bool Key(const char* str, rj::SizeType length, bool copy) { + throw std::runtime_error("not implemented: Handler::Key"); + } + bool String(const char* str, rj::SizeType length, bool copy) { + throw std::runtime_error("not implemented: Handler::String"); + } + + private: + FillableArray array_; + int64_t depth_; + }; + + const std::shared_ptr FromJsonString(const char* source, const FillableOptions& options) { + Handler handler(options); + rj::Reader reader; + rj::StringStream stream(source); + if (reader.Parse(stream, handler)) { + return handler.snapshot(); + } + else { + throw std::invalid_argument(std::string("JSON error at char ") + std::to_string(reader.GetErrorOffset()) + std::string(": ") + std::string(rj::GetParseError_En(reader.GetParseErrorCode()))); + } + } + + const std::shared_ptr FromJsonFile(FILE* source, const FillableOptions& options, int64_t buffersize) { + Handler handler(options); + rj::Reader reader; + std::shared_ptr buffer(new char[(size_t)buffersize], awkward::util::array_deleter()); + rj::FileReadStream stream(source, buffer.get(), ((size_t)buffersize)*sizeof(char)); + if (reader.Parse(stream, handler)) { + return handler.snapshot(); + } + else { + throw std::invalid_argument(std::string("JSON error at char ") + std::to_string(reader.GetErrorOffset()) + std::string(": ") + std::string(rj::GetParseError_En(reader.GetParseErrorCode()))); + } + return handler.snapshot(); + } +} diff --git a/src/libawkward/io/root.cpp b/src/libawkward/io/root.cpp new file mode 100644 index 0000000000..be9e9902e4 --- /dev/null +++ b/src/libawkward/io/root.cpp @@ -0,0 +1,77 @@ +// BSD 3-Clause License; see https://github.com/jpivarski/awkward-1.0/blob/master/LICENSE + +#include + +#include "awkward/Content.h" +#include "awkward/Identity.h" +#include "awkward/array/ListOffsetArray.h" +#include "awkward/fillable/GrowableBuffer.h" + +#include "awkward/io/root.h" + +namespace awkward { + void FromROOT_nestedvector_fill(std::vector>& levels, GrowableBuffer& bytepos_tocopy, int64_t& bytepos, const NumpyArray& rawdata, int64_t whichlevel, int64_t itemsize) { + if (whichlevel == levels.size()) { + bytepos_tocopy.append(bytepos); + bytepos += itemsize; + } + + else { + uint32_t bigendian = *reinterpret_cast(rawdata.byteptr((ssize_t)bytepos)); + + // FIXME: check native endianness + uint32_t length = ((bigendian >> 24) & 0xff) | // move byte 3 to byte 0 + ((bigendian << 8) & 0xff0000) | // move byte 1 to byte 2 + ((bigendian >> 8) & 0xff00) | // move byte 2 to byte 1 + ((bigendian << 24) & 0xff000000); // byte 0 to byte 3 + + bytepos += sizeof(int32_t); + for (uint32_t i = 0; i < length; i++) { + FromROOT_nestedvector_fill(levels, bytepos_tocopy, bytepos, rawdata, whichlevel + 1, itemsize); + } + int64_t previous = levels[(unsigned int)whichlevel].getitem_at_unsafe(levels[(unsigned int)whichlevel].length() - 1); + levels[(unsigned int)whichlevel].append(previous + length); + } + } + + const std::shared_ptr FromROOT_nestedvector(const Index64& byteoffsets, const NumpyArray& rawdata, int64_t depth, int64_t itemsize, std::string format, const FillableOptions& options) { + assert(depth > 0); + assert(rawdata.ndim() == 1); + + Index64 level0(byteoffsets.length()); + level0.setitem_at_unsafe(0, 0); + + std::vector> levels; + for (int64_t i = 0; i < depth; i++) { + levels.push_back(GrowableBuffer(options)); + levels[(size_t)i].append(0); + } + + GrowableBuffer bytepos_tocopy(options); + + for (int64_t i = 0; i < byteoffsets.length() - 1; i++) { + int64_t bytepos = byteoffsets.getitem_at_unsafe(i); + FromROOT_nestedvector_fill(levels, bytepos_tocopy, bytepos, rawdata, 0, itemsize); + level0.setitem_at_unsafe(i + 1, levels[0].length()); + } + + std::shared_ptr ptr(new uint8_t[(size_t)(bytepos_tocopy.length()*itemsize)], awkward::util::array_deleter()); + ssize_t offset = rawdata.byteoffset(); + uint8_t* toptr = reinterpret_cast(ptr.get()); + uint8_t* fromptr = reinterpret_cast(rawdata.ptr().get()); + for (int64_t i = 0; i < bytepos_tocopy.length(); i++) { + ssize_t bytepos = (ssize_t)bytepos_tocopy.getitem_at_unsafe(i); + std::memcpy(&toptr[(ssize_t)(i*itemsize)], &fromptr[offset + bytepos], (size_t)itemsize); + } + + std::vector shape = { (ssize_t)bytepos_tocopy.length() }; + std::vector strides = { (ssize_t)itemsize }; + std::shared_ptr out(new NumpyArray(Identity::none(), ptr, shape, strides, 0, (ssize_t)itemsize, format)); + + for (int64_t i = depth - 1; i >= 0; i--) { + out = std::shared_ptr(new ListOffsetArray64(Identity::none(), levels[(size_t)i].toindex(), out)); + } + return out; + } + +} diff --git a/src/libawkward/util.cpp b/src/libawkward/util.cpp index 6fe5fa813b..3f51fe6c1e 100644 --- a/src/libawkward/util.cpp +++ b/src/libawkward/util.cpp @@ -8,7 +8,7 @@ namespace awkward { namespace util { - void handle_error(const Error& err, const std::string classname, const Identity* id) { + void handle_error(const struct Error& err, const std::string classname, const Identity* id) { if (err.str != nullptr) { std::stringstream out; out << "in " << classname; diff --git a/src/pyawkward.cpp b/src/pyawkward.cpp index 114b21553f..9bedf1b1d8 100644 --- a/src/pyawkward.cpp +++ b/src/pyawkward.cpp @@ -1,5 +1,6 @@ // BSD 3-Clause License; see https://github.com/jpivarski/awkward-1.0/blob/master/LICENSE +#include #include #include @@ -23,6 +24,8 @@ #include "awkward/type/ListType.h" #include "awkward/type/OptionType.h" #include "awkward/type/UnionType.h" +#include "awkward/io/json.h" +#include "awkward/io/root.h" namespace py = pybind11; namespace ak = awkward; @@ -457,6 +460,31 @@ py::object getitem(T& self, py::object obj) { return box(self.getitem(toslice(obj))); } +void fillable_fill(ak::FillableArray& self, py::object obj) { + if (py::isinstance(obj)) { + self.boolean(obj.cast()); + } + else if (py::isinstance(obj)) { + self.integer(obj.cast()); + } + else if (py::isinstance(obj)) { + self.real(obj.cast()); + } + // FIXME: strings, dicts... + else if (py::isinstance(obj)) { + py::sequence seq = obj.cast(); + self.beginlist(); + for (auto x : seq) { + fillable_fill(self, x); + } + self.endlist(); + return; + } + else { + throw std::invalid_argument(std::string("cannot convert ") + obj.attr("__repr__")().cast() + std::string(" to an array element")); + } +} + py::class_ make_FillableArray(py::handle m, std::string name) { return (py::class_(m, name.c_str()) .def(py::init([](int64_t initial, double resize) -> ak::FillableArray { @@ -478,6 +506,7 @@ py::class_ make_FillableArray(py::handle m, std::string name) .def("real", &ak::FillableArray::real) .def("beginlist", &ak::FillableArray::beginlist) .def("endlist", &ak::FillableArray::endlist) + .def("fill", &fillable_fill) ); } @@ -602,6 +631,44 @@ ak::Iterator iter(T& self) { return ak::Iterator(self.shallow_copy()); } +int64_t check_maxdecimals(py::object maxdecimals) { + if (maxdecimals.is(py::none())) { + return -1; + } + try { + return maxdecimals.cast(); + } + catch (py::cast_error err) { + throw std::invalid_argument("maxdecimals must be None or an integer"); + } +} + +template +std::string tojson_string(T& self, bool pretty, py::object maxdecimals) { + return self.tojson(pretty, check_maxdecimals(maxdecimals)); +} + +template +void tojson_file(T& self, std::string destination, bool pretty, py::object maxdecimals, int64_t buffersize) { +#ifdef _MSC_VER + FILE* file; + if (fopen_s(&file, destination.c_str(), "wb") != 0) { +#else + FILE* file = fopen(destination.c_str(), "wb"); + if (file == nullptr) { +#endif + throw std::invalid_argument(std::string("file \"") + destination + std::string("\" could not be opened for writing")); + } + try { + self.tojson(file, pretty, check_maxdecimals(maxdecimals), buffersize); + } + catch (...) { + fclose(file); + throw; + } + fclose(file); +} + template py::class_ content(py::class_& x) { return x.def("__repr__", &repr) @@ -614,7 +681,9 @@ py::class_ content(py::class_& x) { }) .def("__len__", &len) .def("__getitem__", &getitem) - .def("__iter__", &iter); + .def("__iter__", &iter) + .def("tojson", &tojson_string, py::arg("pretty") = false, py::arg("maxdecimals") = py::none()) + .def("tojson", &tojson_file, py::arg("destination"), py::arg("pretty") = false, py::arg("maxdecimals") = py::none(), py::arg("buffersize") = 65536); } py::class_ make_Content(py::handle m, std::string name) { @@ -738,4 +807,45 @@ PYBIND11_MODULE(layout, m) { make_ListOffsetArrayOf(m, "ListOffsetArray32"); make_ListOffsetArrayOf(m, "ListOffsetArray64"); + + m.def("fromjson", [](std::string source, int64_t initial, double resize, int64_t buffersize) -> py::object { + bool isarray = false; + for (char const &x: source) { + if (x != 9 && x != 10 && x != 13 && x != 32) { // whitespace + if (x == 91) { // opening square bracket + isarray = true; + } + break; + } + } + if (isarray) { + return box(ak::FromJsonString(source.c_str(), ak::FillableOptions(initial, resize))); + } + else { +#ifdef _MSC_VER + FILE* file; + if (fopen_s(&file, source.c_str(), "rb") != 0) { +#else + FILE* file = fopen(source.c_str(), "rb"); + if (file == nullptr) { +#endif + throw std::invalid_argument(std::string("file \"") + source + std::string("\" could not be opened for reading")); + } + std::shared_ptr out(nullptr); + try { + out = FromJsonFile(file, ak::FillableOptions(initial, resize), buffersize); + } + catch (...) { + fclose(file); + throw; + } + fclose(file); + return box(out); + } + }, py::arg("source"), py::arg("initial") = 1024, py::arg("resize") = 2.0, py::arg("buffersize") = 65536); + + m.def("fromroot_nestedvector", [](ak::Index64& byteoffsets, ak::NumpyArray& rawdata, int64_t depth, int64_t itemsize, std::string format, int64_t initial, double resize) -> py::object { + return box(FromROOT_nestedvector(byteoffsets, rawdata, depth, itemsize, format, ak::FillableOptions(initial, resize))); + }, py::arg("byteoffsets"), py::arg("rawdata"), py::arg("depth"), py::arg("itemsize"), py::arg("format"), py::arg("initial") = 1024, py::arg("resize") = 2.0); + } diff --git a/studies/chep-2019/chep2019-plot.gnuplot b/studies/chep-2019/chep2019-plot.gnuplot new file mode 100644 index 0000000000..9ab7f66be2 --- /dev/null +++ b/studies/chep-2019/chep2019-plot.gnuplot @@ -0,0 +1,10 @@ +set term svg +set output "read_ttree.svg" + +set nokey + +set size ratio 0.5 + +set logscale y + +plot [-0.5:3.5][0.1:2000] "read_rntuple.dat" t "RNTuple + new awkward" with lines, "read_ttree_old.dat" t "TTree in uproot + old awkward" with lines, "read_ttree_new.dat" t "TTree in uproot + new awkward" with lines, "read_pyobj_old.dat" t "Python objects in old awkward" with lines, "read_pyobj_new.dat" t "Python objects in new awkward" with lines, "read_json_old.dat" t "JSON in old awkward" with lines, "read_json_new.dat" t "JSON in new awkward" with lines diff --git a/studies/chep-2019/chep2019-studies-2.py b/studies/chep-2019/chep2019-studies-2.py new file mode 100644 index 0000000000..8bce27feb0 --- /dev/null +++ b/studies/chep-2019/chep2019-studies-2.py @@ -0,0 +1,111 @@ +import time + +import numpy + +import ROOT +import root_numpy +import awkward +import awkward1 +import uproot +from uproot import asgenobj, asdtype, STLVector + +num = 0 +starttime = time.time() +branch = uproot.open("data/sample-jagged0.root")["jagged0"]["branch"] +for i in range(branch.numbaskets): + result = branch.basket(i) + num += len(result) +walltime = time.time() - starttime +print("TTree uproot jagged0\t", walltime, "sec;\t", num/walltime/1e6, "million floats/sec") + +file = ROOT.TFile("data/sample-jagged0.root") +tree = file.Get("jagged0") +starttime = time.time() +for i in range(branch.numbaskets): + result = root_numpy.tree2array(tree, "branch", start=branch.basket_entrystart(i), stop=branch.basket_entrystop(i)) +walltime = time.time() - starttime +print("TTree root_numpy jagged0\t", walltime, "sec;\t", num/walltime/1e6, "million floats/sec") + +num = 0 +starttime = time.time() +branch = uproot.open("data/sample-jagged1.root")["jagged1"]["branch"] +for i in range(branch.numbaskets): + result = branch.basket(i) + num += len(result.content) +walltime = time.time() - starttime +print("TTree OLD jagged1\t", walltime, "sec;\t", num/walltime/1e6, "million floats/sec") + +num = 0 +starttime = time.time() +branch = uproot.open("data/sample-jagged1.root")["jagged1"]["branch"] +for i in range(branch.numbaskets): + jagged = branch.basket(i, uproot.asdebug) + byteoffsets = awkward1.layout.Index64(jagged.offsets) + rawdata = awkward1.layout.NumpyArray(jagged.content[6:]) + result = awkward1.layout.fromroot_nestedvector(byteoffsets, rawdata, 1, numpy.dtype(">f").itemsize, ">f") + q = numpy.asarray(result.content).astype("f").itemsize, ">f") + q = numpy.asarray(result.content.content).astype("f4")))), branch._context, 6)) + q = awkward.fromiter(jagged) + num += len(q.content.content.content) +walltime = time.time() - starttime +print("TTree OLD jagged3\t", walltime, "sec;\t", num/walltime/1e6, "million floats/sec") + +num = 0 +starttime = time.time() +branch = uproot.open("data/sample-jagged3.root")["jagged3"]["branch"] +for i in range(5): + jagged = branch.basket(i, uproot.asdebug) + byteoffsets = awkward1.layout.Index64(jagged.offsets) + rawdata = awkward1.layout.NumpyArray(jagged.content[6:]) + result = awkward1.layout.fromroot_nestedvector(byteoffsets, rawdata, 3, numpy.dtype(">f").itemsize, ">f") + q = numpy.asarray(result.content.content.content).astype(" +#include +#include + +#include "awkward/Identity.h" +#include "awkward/array/RawArray.h" +#include "awkward/array/ListOffsetArray.h" + +#include "ROOT/RNTupleModel.hxx" +#include "ROOT/RNTupleMetrics.hxx" +#include "ROOT/RNTupleOptions.hxx" +#include "ROOT/RNTupleUtil.hxx" +#include "ROOT/RNTuple.hxx" +#include "ROOT/RNTupleView.hxx" +#include "ROOT/RNTupleDS.hxx" +#include "ROOT/RNTupleDescriptor.hxx" + +#define LENJAGGED0 1073741824 +#define LENJAGGED1 134217728 +#define LENJAGGED2 16777216 +#define LENJAGGED3 2097152 + +#define kDefaultClusterSizeEntries 64000 + +namespace ak = awkward; + +template +void fillpages(T* array, V& view, int64_t& offset, int64_t length, int64_t shift) { + int64_t current = 0; + while (current < length) { + T* data = (T*)view.fField.Map(offset + current); + int32_t num = view.fField.fPrincipalColumn->fCurrentPage.GetNElements(); + int32_t skipped = (offset + current) - view.fField.fPrincipalColumn->fCurrentPage.GetGlobalRangeFirst(); + int32_t remaining = num - skipped; + if (current + remaining > length) { + remaining = length - current; + } + if (remaining > 0) { + memcpy(&array[current + shift], data, remaining*sizeof(T)); + } + current += remaining; + } + offset += current; +} + +double jagged0() { + double total_length = 0.0; + + auto model = ROOT::Experimental::RNTupleModel::Create(); + ROOT::Experimental::RNTupleReadOptions options; + auto ntuple = ROOT::Experimental::RNTupleReader::Open(std::move(model), "jagged0", "data/sample-jagged0.ntuple", options); + auto view0 = ntuple->GetViewCollection("field"); + + int64_t offset0 = 0; + for (uint64_t entry = 0; entry < LENJAGGED0; entry += kDefaultClusterSizeEntries) { + int64_t length = kDefaultClusterSizeEntries; + if (entry + length > LENJAGGED0) { + length = LENJAGGED0 - entry; + } + ak::RawArrayOf content(ak::Identity::none(), length); + float* rawcontent = content.ptr().get(); + fillpages(rawcontent, view0, offset0, length, 0); + total_length += length; + } + std::cout << total_length << std::endl; + return total_length; +} + +double jagged1() { + double total_length = 0.0; + + auto model = ROOT::Experimental::RNTupleModel::Create(); + ROOT::Experimental::RNTupleReadOptions options; + auto ntuple = ROOT::Experimental::RNTupleReader::Open(std::move(model), "jagged1", "data/sample-jagged1.ntuple", options); + auto view1 = ntuple->GetViewCollection("field"); + auto view0 = view1.GetView("float"); + + int64_t offset1 = 0; + int64_t offset0 = 0; + for (int64_t entry = 0; entry < LENJAGGED1; entry += kDefaultClusterSizeEntries) { + int64_t length = kDefaultClusterSizeEntries; + if (entry + length > LENJAGGED1) { + length = LENJAGGED1 - entry; + } + ak::Index32 offsets1(length + 1); + int32_t* rawoffsets1 = offsets1.ptr().get(); + rawoffsets1[0] = 0; + fillpages(rawoffsets1, view1, offset1, length, 1); + + length = rawoffsets1[length]; + ak::RawArrayOf content(ak::Identity::none(), length); + float* rawcontent = content.borrow(0); + fillpages(rawcontent, view0, offset0, length, 0); + + ak::ListOffsetArray32 done(ak::Identity::none(), offsets1, content.shallow_copy()); + total_length += length; + } + + std::cout << total_length << std::endl; + return total_length; +} + +double jagged2() { + double total_length = 0.0; + + auto model = ROOT::Experimental::RNTupleModel::Create(); + ROOT::Experimental::RNTupleReadOptions options; + auto ntuple = ROOT::Experimental::RNTupleReader::Open(std::move(model), "jagged2", "data/sample-jagged2.ntuple", options); + auto view2 = ntuple->GetViewCollection("field"); + auto view1 = view2.GetViewCollection("std::vector"); + auto view0 = view1.GetView("float"); + + int64_t offset2 = 0; + int64_t offset1 = 0; + int64_t offset0 = 0; + for (int64_t entry = 0; entry < LENJAGGED2; entry += kDefaultClusterSizeEntries) { + int64_t length = kDefaultClusterSizeEntries; + if (entry + length > LENJAGGED2) { + length = LENJAGGED2 - entry; + } + ak::Index32 offsets2(length + 1); + int32_t* rawoffsets2 = offsets2.ptr().get(); + rawoffsets2[0] = 0; + fillpages(rawoffsets2, view2, offset2, length, 1); + + length = rawoffsets2[length]; + ak::Index32 offsets1(length + 1); + int32_t* rawoffsets1 = offsets1.ptr().get(); + rawoffsets1[0] = 0; + fillpages(rawoffsets1, view1, offset1, length, 1); + + length = rawoffsets1[length]; + ak::RawArrayOf content(ak::Identity::none(), length); + float* rawcontent = content.borrow(0); + fillpages(rawcontent, view0, offset0, length, 0); + + ak::ListOffsetArray32 tmp(ak::Identity::none(), offsets1, content.shallow_copy()); + ak::ListOffsetArray32 done(ak::Identity::none(), offsets2, tmp.shallow_copy()); + total_length += length; + } + + std::cout << total_length << std::endl; + return total_length; +} + +double jagged3() { + double total_length = 0.0; + + auto model = ROOT::Experimental::RNTupleModel::Create(); + ROOT::Experimental::RNTupleReadOptions options; + auto ntuple = ROOT::Experimental::RNTupleReader::Open(std::move(model), "jagged3", "data/sample-jagged3.ntuple", options); + auto view3 = ntuple->GetViewCollection("field"); + auto view2 = view3.GetViewCollection("std::vector>"); + auto view1 = view2.GetViewCollection("std::vector"); + auto view0 = view1.GetView("float"); + + int64_t offset3 = 0; + int64_t offset2 = 0; + int64_t offset1 = 0; + int64_t offset0 = 0; + for (int64_t entry = 0; entry < LENJAGGED3; entry += kDefaultClusterSizeEntries) { + int64_t length = kDefaultClusterSizeEntries; + if (entry + length > LENJAGGED3) { + length = LENJAGGED3 - entry; + } + ak::Index32 offsets3(length + 1); + int32_t* rawoffsets3 = offsets3.ptr().get(); + rawoffsets3[0] = 0; + fillpages(rawoffsets3, view3, offset3, length, 1); + + length = rawoffsets3[length]; + ak::Index32 offsets2(length + 1); + int32_t* rawoffsets2 = offsets2.ptr().get(); + rawoffsets2[0] = 0; + fillpages(rawoffsets2, view2, offset2, length, 1); + + length = rawoffsets2[length]; + ak::Index32 offsets1(length + 1); + int32_t* rawoffsets1 = offsets1.ptr().get(); + rawoffsets1[0] = 0; + fillpages(rawoffsets1, view1, offset1, length, 1); + + length = rawoffsets1[length]; + ak::RawArrayOf content(ak::Identity::none(), length); + float* rawcontent = content.borrow(0); + fillpages(rawcontent, view0, offset0, length, 0); + + ak::ListOffsetArray32 tmp1(ak::Identity::none(), offsets1, content.shallow_copy()); + ak::ListOffsetArray32 tmp2(ak::Identity::none(), offsets2, tmp1.shallow_copy()); + ak::ListOffsetArray32 done(ak::Identity::none(), offsets3, tmp2.shallow_copy()); + total_length += length; + } + + std::cout << total_length << std::endl; + return total_length; +} + +int main() { + { + auto start0 = std::chrono::high_resolution_clock::now(); + double num0 = jagged0(); + auto stop0 = std::chrono::high_resolution_clock::now(); + double walltime0 = std::chrono::duration(stop0 - start0).count(); + std::cout << "jagged0 " << walltime0 << "sec;\t" << num0/walltime0/1e6 << " million floats/sec" << std::endl; + } + { + auto start1 = std::chrono::high_resolution_clock::now(); + double num1 = jagged1(); + auto stop1 = std::chrono::high_resolution_clock::now(); + double walltime1 = std::chrono::duration(stop1 - start1).count(); + std::cout << "jagged1 " << walltime1 << "sec;\t" << num1/walltime1/1e6 << " million floats/sec" << std::endl; + } + { + auto start2 = std::chrono::high_resolution_clock::now(); + double num2 = jagged2(); + auto stop2 = std::chrono::high_resolution_clock::now(); + double walltime2 = std::chrono::duration(stop2 - start2).count(); + std::cout << "jagged2 " << walltime2 << "sec;\t" << num2/walltime2/1e6 << " million floats/sec" << std::endl; + } + { + auto start3 = std::chrono::high_resolution_clock::now(); + double num3 = jagged3(); + auto stop3 = std::chrono::high_resolution_clock::now(); + double walltime3 = std::chrono::duration(stop3 - start3).count(); + std::cout << "jagged3 " << walltime3 << "sec;\t" << num3/walltime3/1e6 << " million floats/sec" << std::endl; + } + return 0; +} diff --git a/studies/chep-2019/chep2019-studies.py b/studies/chep-2019/chep2019-studies.py new file mode 100644 index 0000000000..1bbf655e98 --- /dev/null +++ b/studies/chep-2019/chep2019-studies.py @@ -0,0 +1,300 @@ +import time +import json + +import numpy + +import awkward +import awkward1 + +content = numpy.fromfile(open("data/sample-content.float32", "rb"), dtype=numpy.float32) +offsets1 = numpy.fromfile(open("data/sample-offsets1.int64", "rb"), dtype=numpy.int64) +offsets2 = numpy.fromfile(open("data/sample-offsets2.int64", "rb"), dtype=numpy.int64) +offsets3 = numpy.fromfile(open("data/sample-offsets3.int64", "rb"), dtype=numpy.int64) + +array0 = awkward.JaggedArray.fromoffsets(offsets3, + awkward.JaggedArray.fromoffsets(offsets2, + awkward.JaggedArray.fromoffsets(offsets1, + content))) + +array1 = awkward1.layout.ListOffsetArray64( + awkward1.layout.Index64(offsets3), + awkward1.layout.ListOffsetArray64( + awkward1.layout.Index64(offsets2), + awkward1.layout.ListOffsetArray64( + awkward1.layout.Index64(offsets1), + awkward1.layout.NumpyArray(content)))) + +############################# slicing at each depth + +if False: + FRAC = 1 + REPS = 100 + starttime = time.time() + for i in range(REPS): + q = array0[1:] + walltime = (time.time() - starttime)*FRAC/REPS + print("array0[1:]\t", walltime, "sec;\t", len(content)/walltime/1e6, "million floats/sec") + + FRAC = 1 + REPS = 100 + starttime = time.time() + for i in range(REPS): + q = array1[1:] + walltime = (time.time() - starttime)*FRAC/REPS + print("array1[1:]\t", walltime, "sec;\t", len(content)/walltime/1e6, "million floats/sec") + + FRAC = 1 + REPS = 10 + starttime = time.time() + for i in range(REPS): + q = array0[:, 1:] + walltime = (time.time() - starttime)*FRAC/REPS + print("array0[:, 1:]\t", walltime, "sec;\t", len(content)/walltime/1e6, "million floats/sec") + + FRAC = 1 + REPS = 10 + starttime = time.time() + for i in range(REPS): + q = array1[:, 1:] + walltime = (time.time() - starttime)*FRAC/REPS + print("array1[:, 1:]\t", walltime, "sec;\t", len(content)/walltime/1e6, "million floats/sec") + + print("array0[:, :, 1:] can't be done") + + FRAC = 1 + REPS = 5 + starttime = time.time() + for i in range(REPS): + q = array1[:, :, 1:] + walltime = (time.time() - starttime)*FRAC/REPS + print("array1[:, :, 1:]\t", walltime, "sec;\t", len(content)/walltime/1e6, "million floats/sec") + + print("array0[:, :, :, 1:] can't be done") + + REPS = 2 + FRAC = 2 + tmp = array1[:len(array1) // FRAC] + starttime = time.time() + for i in range(REPS): + q = tmp[:, :, :, 1:] + walltime = (time.time() - starttime)*FRAC/REPS + print("array1[:, :, :, 1:]\t", walltime, "sec;\t", len(content)/walltime/1e6, "million floats/sec") + +############################# slicing at first inner depth + +if False: + FRAC = 2 + REPS = 100 + tmp = array0.content.content[len(array0.content.content) // FRAC] + starttime = time.time() + for i in range(REPS): + q = tmp # array0.content.content[:, 1:] + walltime = (time.time() - starttime)*FRAC/REPS + print("array0.content.content[:, 1:]\t", walltime, "sec;\t", len(content)/walltime/1e12, "trillion floats/sec") + + FRAC = 2 + REPS = 100 + tmp = array1.content.content[len(array1.content.content) // FRAC] + starttime = time.time() + for i in range(REPS): + q = tmp # array1.content.content[:, 1:] + walltime = (time.time() - starttime)*FRAC/REPS + print("array1.content.content[:, 1:]\t", walltime, "sec;\t", len(content)/walltime/1e12, "trillion floats/sec") + + FRAC = 2 + REPS = 100 + tmp = array0.content[len(array0.content) // FRAC] + starttime = time.time() + for i in range(REPS): + q = tmp[:, 1:] # array0.content[:, 1:] + walltime = (time.time() - starttime)*FRAC/REPS + print("array0.content[:, 1:]\t", walltime, "sec;\t", len(content)/walltime/1e12, "trillion floats/sec") + + FRAC = 2 + REPS = 100 + tmp = array1.content[len(array1.content) // FRAC] + starttime = time.time() + for i in range(REPS): + q = tmp[:, 1:] # array1.content[:, 1:] + walltime = (time.time() - starttime)*FRAC/REPS + print("array1.content[:, 1:]\t", walltime, "sec;\t", len(content)/walltime/1e12, "trillion floats/sec") + + FRAC = 2 + REPS = 100 + tmp = array0[len(array0) // FRAC] + starttime = time.time() + for i in range(REPS): + q = tmp[:, 1:] # array0[:, 1:] + walltime = (time.time() - starttime)*FRAC/REPS + print("array0[:, 1:]\t", walltime, "sec;\t", len(content)/walltime/1e12, "trillion floats/sec") + + FRAC = 2 + REPS = 100 + tmp = array1[len(array1) // FRAC] + starttime = time.time() + for i in range(REPS): + q = tmp[:, 1:] # array1[:, 1:] + walltime = (time.time() - starttime)*FRAC/REPS + print("array1[:, 1:]\t", walltime, "sec;\t", len(content)/walltime/1e12, "trillion floats/sec") + +############################# from Python iterable +print("from Python iterable") + +pyobj0 = awkward1.tolist(array1.content.content.content[:2000000]) # 200000000 takes 4 sec +sizepyobj0 = len(pyobj0) + +FRAC = 1 +REPS = 2 +starttime = time.time() +for i in range(REPS): + q = awkward.fromiter(pyobj0) +walltime = (time.time() - starttime)*FRAC/REPS +print("awkward.fromiter(pyobj0)\t", walltime, "sec;\t", sizepyobj0/walltime/1e6, "million floats/sec") + +FRAC = 1 +REPS = 2 +starttime = time.time() +for i in range(REPS): + q = awkward1.fromiter(pyobj0, initial=sizepyobj0+1) +walltime = (time.time() - starttime)*FRAC/REPS +print("awkward1.fromiter(pyobj0)\t", walltime, "sec;\t", sizepyobj0/walltime/1e6, "million floats/sec") + +pyobj1 = awkward1.tolist(array1.content.content[:200000]) # 200000 takes 1 sec +sizepyobj1 = sum(len(x) for x in pyobj1) + +FRAC = 1 +REPS = 2 +starttime = time.time() +for i in range(REPS): + q = awkward.fromiter(pyobj1) +walltime = (time.time() - starttime)*FRAC/REPS +print("awkward.fromiter(pyobj1)\t", walltime, "sec;\t", sizepyobj1/walltime/1e6, "million floats/sec") + +FRAC = 1 +REPS = 2 +starttime = time.time() +for i in range(REPS): + q = awkward1.fromiter(pyobj1, initial=sizepyobj1+1) +walltime = (time.time() - starttime)*FRAC/REPS +print("awkward1.fromiter(pyobj1)\t", walltime, "sec;\t", sizepyobj1/walltime/1e6, "million floats/sec") + +pyobj2 = awkward1.tolist(array1.content[:200000]) # 20000 takes 1 sec +sizepyobj2 = sum(sum(len(y) for y in x) for x in pyobj2) + +FRAC = 1 +REPS = 2 +starttime = time.time() +for i in range(REPS): + q = awkward.fromiter(pyobj2) +walltime = (time.time() - starttime)*FRAC/REPS +print("awkward.fromiter(pyobj2)\t", walltime, "sec;\t", sizepyobj2/walltime/1e6, "million floats/sec") + +FRAC = 1 +REPS = 2 +starttime = time.time() +for i in range(REPS): + q = awkward1.fromiter(pyobj2, initial=sizepyobj2+1) +walltime = (time.time() - starttime)*FRAC/REPS +print("awkward1.fromiter(pyobj2)\t", walltime, "sec;\t", sizepyobj2/walltime/1e6, "million floats/sec") + +pyobj3 = awkward1.tolist(array1[:20000]) # 2000 takes 1 sec +sizepyobj3 = sum(sum(sum(len(z) for z in y) for y in x) for x in pyobj3) + +FRAC = 1 +REPS = 2 +starttime = time.time() +for i in range(REPS): + q = awkward.fromiter(pyobj3) +walltime = (time.time() - starttime)*FRAC/REPS +print("awkward.fromiter(pyobj3)\t", walltime, "sec;\t", sizepyobj3/walltime/1e6, "million floats/sec") + +FRAC = 1 +REPS = 2 +starttime = time.time() +for i in range(REPS): + q = awkward1.fromiter(pyobj3, initial=sizepyobj3+1) +walltime = (time.time() - starttime)*FRAC/REPS +print("awkward1.fromiter(pyobj3)\t", walltime, "sec;\t", sizepyobj3/walltime/1e6, "million floats/sec") + +############################# from JSON +print("from JSON") + +pyobj0 = awkward1.tolist(array1.content.content.content[:2000000]) # 200000000 takes 4 sec +sizejobj0 = len(pyobj0) +jobj0 = json.dumps(pyobj0) + +FRAC = 1 +REPS = 2 +starttime = time.time() +for i in range(REPS): + q = awkward.fromiter(json.loads(jobj0)) +walltime = (time.time() - starttime)*FRAC/REPS +print("awkward.fromiter(json.loads(jobj0))\t", walltime, "sec;\t", sizejobj0/walltime/1e6, "million floats/sec") + +FRAC = 1 +REPS = 2 +starttime = time.time() +for i in range(REPS): + q = awkward1.fromjson(jobj0, initial=sizejobj0+1) +walltime = (time.time() - starttime)*FRAC/REPS +print("awkward1.fromjson(jobj0)\t", walltime, "sec;\t", sizejobj0/walltime/1e6, "million floats/sec") + +pyobj1 = awkward1.tolist(array1.content.content[:200000]) # 200000 takes 1 sec +sizejobj1 = sum(len(x) for x in pyobj1) +jobj1 = json.dumps(pyobj1) + +FRAC = 1 +REPS = 2 +starttime = time.time() +for i in range(REPS): + q = awkward.fromiter(json.loads(jobj1)) +walltime = (time.time() - starttime)*FRAC/REPS +print("awkward.fromiter(json.loads(jobj1))\t", walltime, "sec;\t", sizejobj1/walltime/1e6, "million floats/sec") + +FRAC = 1 +REPS = 2 +starttime = time.time() +for i in range(REPS): + q = awkward1.fromjson(jobj1, initial=sizejobj1+1) +walltime = (time.time() - starttime)*FRAC/REPS +print("awkward1.fromjson(jobj1)\t", walltime, "sec;\t", sizejobj1/walltime/1e6, "million floats/sec") + +pyobj2 = awkward1.tolist(array1.content[:200000]) # 20000 takes 1 sec +sizejobj2 = sum(sum(len(y) for y in x) for x in pyobj2) +jobj2 = json.dumps(pyobj2) + +FRAC = 1 +REPS = 2 +starttime = time.time() +for i in range(REPS): + q = awkward.fromiter(json.loads(jobj2)) +walltime = (time.time() - starttime)*FRAC/REPS +print("awkward.fromiter(json.loads(jobj2))\t", walltime, "sec;\t", sizejobj2/walltime/1e6, "million floats/sec") + +FRAC = 1 +REPS = 2 +starttime = time.time() +for i in range(REPS): + q = awkward1.fromjson(jobj2, initial=sizejobj2+1) +walltime = (time.time() - starttime)*FRAC/REPS +print("awkward1.fromjson(jobj2)\t", walltime, "sec;\t", sizejobj2/walltime/1e6, "million floats/sec") + +pyobj3 = awkward1.tolist(array1[:20000]) # 2000 takes 1 sec +sizejobj3 = sum(sum(sum(len(z) for z in y) for y in x) for x in pyobj3) +jobj3 = json.dumps(pyobj3) + +FRAC = 1 +REPS = 2 +starttime = time.time() +for i in range(REPS): + q = awkward.fromiter(json.loads(jobj3)) +walltime = (time.time() - starttime)*FRAC/REPS +print("awkward.fromiter(json.loads(jobj3))\t", walltime, "sec;\t", sizejobj3/walltime/1e6, "million floats/sec") + +FRAC = 1 +REPS = 2 +starttime = time.time() +for i in range(REPS): + q = awkward1.fromjson(jobj3, initial=sizejobj3+1) +walltime = (time.time() - starttime)*FRAC/REPS +print("awkward1.fromjson(jobj3)\t", walltime, "sec;\t", sizejobj3/walltime/1e6, "million floats/sec") diff --git a/studies/chep-2019/make-numerical-json.py b/studies/chep-2019/make-numerical-json.py new file mode 100644 index 0000000000..ae17bab143 --- /dev/null +++ b/studies/chep-2019/make-numerical-json.py @@ -0,0 +1,23 @@ +import numpy + +maxbytes = 4*1024**3 # 4 GB + +content = numpy.empty(maxbytes // numpy.dtype(numpy.float32).itemsize, dtype=numpy.float32) +for i in range(0, len(content), len(content) // 8): + content[i : i + len(content) // 8] = numpy.random.normal(0, 1, len(content) // 8) +content.tofile(open("sample-content.float32", "wb")) + +offsets1 = numpy.arange(0, len(content) + 8, 8, dtype=numpy.int64) +offsets1[1:-1] += numpy.random.randint(0, 8, len(offsets1) - 2) +offsets1.tofile(open("sample-offsets1.int64", "wb")) + +offsets2 = numpy.arange(0, len(offsets1) - 1 + 8, 8, dtype=numpy.int64) +offsets2[1:-1] += numpy.random.randint(0, 8, len(offsets2) - 2) +offsets2.tofile(open("sample-offsets2.int64", "wb")) + +offsets3 = numpy.arange(0, len(offsets2) - 1 + 8, 8, dtype=numpy.int64) +offsets3[1:-1] += numpy.random.randint(0, 8, len(offsets3) - 2) +offsets3.tofile(open("sample-offsets3.int64", "wb")) + +# import awkward +# a = awkward.JaggedArray.fromoffsets(offsets3, awkward.JaggedArray.fromoffsets(offsets2, awkward.JaggedArray.fromoffsets(offsets1, content))) diff --git a/studies/chep-2019/read-numerical-json.py b/studies/chep-2019/read-numerical-json.py new file mode 100644 index 0000000000..9246865d2f --- /dev/null +++ b/studies/chep-2019/read-numerical-json.py @@ -0,0 +1,24 @@ +import os +os.chdir("..") +print(os.getcwd()) + +import numpy +import awkward1 + +content = numpy.fromfile(open("studies/sample-content.float32", "rb"), dtype=numpy.float32) +offsets1 = numpy.fromfile(open("studies/sample-offsets1.int64", "rb"), dtype=numpy.int64) +offsets2 = numpy.fromfile(open("studies/sample-offsets2.int64", "rb"), dtype=numpy.int64) +offsets3 = numpy.fromfile(open("studies/sample-offsets3.int64", "rb"), dtype=numpy.int64) + +array = awkward1.layout.ListOffsetArray64( + awkward1.layout.Index64(offsets3), + awkward1.layout.ListOffsetArray64( + awkward1.layout.Index64(offsets2), + awkward1.layout.ListOffsetArray64( + awkward1.layout.Index64(offsets1), + awkward1.layout.NumpyArray(content)))) + +array.tojson("studies/sample-jagged3.json", maxdecimals=5) +array.content.tojson("studies/sample-jagged2.json", maxdecimals=5) +array.content.content.tojson("studies/sample-jagged1.json", maxdecimals=5) +array.content.content.content.tojson("studies/sample-jagged0.json", maxdecimals=5) diff --git a/studies/chep-2019/read_json_new.dat b/studies/chep-2019/read_json_new.dat new file mode 100644 index 0000000000..0fc29dea18 --- /dev/null +++ b/studies/chep-2019/read_json_new.dat @@ -0,0 +1,4 @@ +0 14.376991625169566 +1 12.329521232171187 +2 11.059646572719467 +3 10.472477631753778 diff --git a/studies/chep-2019/read_json_old.dat b/studies/chep-2019/read_json_old.dat new file mode 100644 index 0000000000..8661d998dc --- /dev/null +++ b/studies/chep-2019/read_json_old.dat @@ -0,0 +1,4 @@ +0 0.5298200665542955 +1 0.4031612656516802 +2 0.3715163987964306 +3 0.3775490542293258 diff --git a/studies/chep-2019/read_pyobj_new.dat b/studies/chep-2019/read_pyobj_new.dat new file mode 100644 index 0000000000..eb4cb0f24e --- /dev/null +++ b/studies/chep-2019/read_pyobj_new.dat @@ -0,0 +1,4 @@ +0 3.748414200133741 +1 13.546941380017987 +2 19.87186089758016 +3 19.443585721788086 diff --git a/studies/chep-2019/read_pyobj_old.dat b/studies/chep-2019/read_pyobj_old.dat new file mode 100644 index 0000000000..f8a86bbd6e --- /dev/null +++ b/studies/chep-2019/read_pyobj_old.dat @@ -0,0 +1,4 @@ +0 0.6758153738223585 +1 0.48330834850132 +2 0.4687868015176278 +3 0.462845896680771 diff --git a/studies/chep-2019/read_rntuple.dat b/studies/chep-2019/read_rntuple.dat new file mode 100644 index 0000000000..7537122f11 --- /dev/null +++ b/studies/chep-2019/read_rntuple.dat @@ -0,0 +1,4 @@ +0 540.276 +1 1257.33 +2 902.673 +3 450.659 diff --git a/studies/chep-2019/read_ttree.pdf b/studies/chep-2019/read_ttree.pdf new file mode 100644 index 0000000000..511429903c Binary files /dev/null and b/studies/chep-2019/read_ttree.pdf differ diff --git a/studies/chep-2019/read_ttree.png b/studies/chep-2019/read_ttree.png new file mode 100644 index 0000000000..5cbcd1aa51 Binary files /dev/null and b/studies/chep-2019/read_ttree.png differ diff --git a/studies/chep-2019/read_ttree.svg b/studies/chep-2019/read_ttree.svg new file mode 100644 index 0000000000..065f7f777e --- /dev/null +++ b/studies/chep-2019/read_ttree.svg @@ -0,0 +1,536 @@ + + + + + + image/svg+xml + + Gnuplot + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Gnuplot + Produced by GNUPLOT 5.2 patchlevel 2 + + + + 0.1 + + + + 1 + + + + 10 + + + + 100 + + + + 1000 + + + + + + + + + + + + + + + + + + flatNumpy + jaggedarray + doublyjagged + triplyjagged + fromiter(pyobj) in old Awkward + fromiter(JSON) in old Awkward + fromiter(pyobj) in new Awkward + fromiter(JSON) in new Awkward + read TTree from uproot + new Awkward + read TTree from uproot + old Awkward + read new ROOT RNTuple into Awkward + rate in millions of floats/sec (higher is better) + diff --git a/studies/chep-2019/read_ttree_new.dat b/studies/chep-2019/read_ttree_new.dat new file mode 100644 index 0000000000..f1b1673f4e --- /dev/null +++ b/studies/chep-2019/read_ttree_new.dat @@ -0,0 +1,4 @@ +0 226.8612387 +1 61.80874402 +2 49.95179812 +3 20.59638241 diff --git a/studies/chep-2019/read_ttree_old.dat b/studies/chep-2019/read_ttree_old.dat new file mode 100644 index 0000000000..3cf36b1257 --- /dev/null +++ b/studies/chep-2019/read_ttree_old.dat @@ -0,0 +1,4 @@ +0 226.8612387 +1 24.90447011 +2 0.2915979096 +3 0.3016240853 diff --git a/studies/chep-2019/read_ttree_rootnumpy.dat b/studies/chep-2019/read_ttree_rootnumpy.dat new file mode 100644 index 0000000000..b8282a7f18 --- /dev/null +++ b/studies/chep-2019/read_ttree_rootnumpy.dat @@ -0,0 +1,3 @@ +0 13.82373862 +1 11.99450228 +2 13.68735568 diff --git a/studies/chep-2019/write_rntuple.cpp b/studies/chep-2019/write_rntuple.cpp new file mode 100644 index 0000000000..cfe69666e6 --- /dev/null +++ b/studies/chep-2019/write_rntuple.cpp @@ -0,0 +1,47 @@ +// c++ --std=c++14 -I../../jblomer-root-build/include -L../../jblomer-root-build/lib -L/home/pivarski/miniconda3/lib -L/usr/lib/x86_64-linux-gnu -lc -lpcre -llz4 -lzstd -lsqlite3 -lssl -ltbb -lcairo -lrt -lCore -lImt -lRIO -lNet -lHist -lGraf -lGraf3d -lGpad -lROOTVecOps -lTree -lTreePlayer -lRint -lPostscript -lMatrix -lPhysics -lMathCore -lThread -lMultiProc -lROOTDataFrame -pthread -lm -ldl -rdynamic -lROOTNTuple write_rntuple.cpp -o write_rntuple && ./write_rntuple + +// g++ `../../jblomer-root-build/bin/root-config --cflags --libdir --incdir --libs` write_rntuple.cpp -o write_rntuple && ./write_rntuple + +// g++ -pthread -std=c++14 -m64 -I../../jblomer-root-build/include -L../../jblomer-root-build/lib -lCore -lImt -lRIO -lNet -lHist -lGraf -lGraf3d -lGpad -lROOTVecOps -lTree -lTreePlayer -lRint -lPostscript -lMatrix -lPhysics -lMathCore -lThread -lMultiProc -lROOTDataFrame -lROOTNTuple -pthread -lm -ldl -rdynamic write_rntuple.cpp -o write_rntuple && ./write_rntuple + +#include "ROOT/RNTupleModel.hxx" +#include "ROOT/RNTupleMetrics.hxx" +#include "ROOT/RNTupleOptions.hxx" +#include "ROOT/RNTupleUtil.hxx" +#include "ROOT/RNTuple.hxx" +#include "ROOT/RNTupleView.hxx" +#include "ROOT/RNTupleDS.hxx" +#include "ROOT/RNTupleDescriptor.hxx" + +#include + +int main() { + std::cout << "ONE" << std::endl; + + auto model = ROOT::Experimental::RNTupleModel::Create(); + + std::cout << "TWO" << std::endl; + + auto fldData = model->MakeField("data"); + + std::cout << "THREE" << std::endl; + + ROOT::Experimental::RNTupleWriteOptions options; + options.SetCompression(0); + + std::cout << "FOUR" << std::endl; + + auto ntuple = ROOT::Experimental::RNTupleWriter::Recreate(std::move(model), "ntuple", "sample-jagged0.ntuple", options); + + std::cout << "FIVE" << std::endl; + + *fldData = 3.14; + + std::cout << "SIX" << std::endl; + + ntuple->Fill(); + + std::cout << "SEVEN" << std::endl; + + return 0; +} diff --git a/studies/chep-2019/write_ttree.cpp b/studies/chep-2019/write_ttree.cpp new file mode 100644 index 0000000000..2ea02e25be --- /dev/null +++ b/studies/chep-2019/write_ttree.cpp @@ -0,0 +1,158 @@ +#include "TInterpreter.h" +#include "TFile.h" +#include "TTree.h" +#include "TBranch.h" + +#include +#include + +#define LENCONTENT 1073741824 +#define LENOFFSETS1 134217729 +#define LENOFFSETS2 16777217 +#define LENOFFSETS3 2097153 + +void getdata(float* content, int64_t* offsets1, int64_t* offsets2, int64_t* offsets3) { + FILE* f; + + f = fopen("sample-content.float32", "rb"); + fread(content, sizeof(float), LENCONTENT, f); + fclose(f); + + std::cout << "content " << content[0] << " ... " << content[LENCONTENT - 1] << std::endl; + + f = fopen("sample-offsets1.int64", "rb"); + fread(offsets1, sizeof(int64_t), LENOFFSETS1, f); + fclose(f); + + std::cout << "offsets1 " << offsets1[0] << " ... " << offsets1[LENOFFSETS1 - 1] << std::endl; + + f = fopen("sample-offsets2.int64", "rb"); + fread(offsets2, sizeof(int64_t), LENOFFSETS2, f); + fclose(f); + + std::cout << "offsets2 " << offsets2[0] << " ... " << offsets2[LENOFFSETS2 - 1] << std::endl; + + f = fopen("sample-offsets3.int64", "rb"); + fread(offsets3, sizeof(int64_t), LENOFFSETS3, f); + fclose(f); + + std::cout << "offsets3 " << offsets3[0] << " ... " << offsets3[LENOFFSETS3 - 1] << std::endl; +} + +void ttree0(float* content, int64_t* offsets1, int64_t* offsets2, int64_t* offsets3) { + std::cout << "starting tree0" << std::endl; + float jagged0; + TFile* file0 = new TFile("sample-jagged0.root", "RECREATE"); + file0->SetCompressionLevel(0); + TTree* tree0 = new TTree("jagged0", ""); + tree0->Branch("branch", &jagged0, 10485760); + for (int64_t i0 = 0; i0 < LENCONTENT; i0++) { + jagged0 = content[i0]; + tree0->Fill(); + } + tree0->Write(); + file0->Close(); +} + +void ttree1(float* content, int64_t* offsets1, int64_t* offsets2, int64_t* offsets3) { + std::cout << "starting tree1" << std::endl; + gInterpreter->GenerateDictionary("vector", "vector"); + std::vector jagged1; + TFile* file1 = new TFile("sample-jagged1.root", "RECREATE"); + file1->SetCompressionLevel(0); + TTree* tree1 = new TTree("jagged1", ""); + tree1->Branch("branch", &jagged1, 10485760); + for (int64_t i1 = 0; i1 < LENOFFSETS1 - 1; i1++) { + jagged1.clear(); + int64_t start1 = offsets1[i1]; + int64_t stop1 = offsets1[i1 + 1]; + for (int64_t i0 = start1; i0 < stop1; i0++) { + jagged1.push_back(content[i0]); + } + tree1->Fill(); + } + tree1->Write(); + file1->Close(); +} + +void ttree2(float* content, int64_t* offsets1, int64_t* offsets2, int64_t* offsets3) { + std::cout << "starting tree2" << std::endl; + gInterpreter->GenerateDictionary("vector >", "vector"); + std::vector> jagged2; + TFile* file2 = new TFile("sample-jagged2.root", "RECREATE"); + file2->SetCompressionLevel(0); + TTree* tree2 = new TTree("jagged2", ""); + tree2->Branch("branch", &jagged2, 10485760); + for (int64_t i2 = 0; i2 < LENOFFSETS2 - 1; i2++) { + jagged2.clear(); + int64_t start2 = offsets2[i2]; + int64_t stop2 = offsets2[i2 + 1]; + for (int64_t i1 = start2; i1 < stop2; i1++) { + std::vector tmp1; + int64_t start1 = offsets1[i1]; + int64_t stop1 = offsets1[i1 + 1]; + for (int64_t i0 = start1; i0 < stop1; i0++) { + tmp1.push_back(content[i0]); + } + jagged2.push_back(tmp1); + } + tree2->Fill(); + } + tree2->Write(); + file2->Close(); +} + +void ttree3(float* content, int64_t* offsets1, int64_t* offsets2, int64_t* offsets3) { + std::cout << "starting tree3" << std::endl; + gInterpreter->GenerateDictionary("vector > >", "vector"); + std::vector>> jagged3; + TFile* file3 = new TFile("sample-jagged3.root", "RECREATE"); + TTree* tree3 = new TTree("jagged3", ""); + tree3->Branch("branch", &jagged3, 10485760); + for (int64_t i3 = 0; i3 < LENOFFSETS3 - 1; i3++) { + jagged3.clear(); + int64_t start3 = offsets3[i3]; + int64_t stop3 = offsets3[i3 + 1]; + for (int64_t i2 = start3; i2 < stop3; i2++) { + std::vector> tmp2; + int64_t start2 = offsets2[i2]; + int64_t stop2 = offsets2[i2 + 1]; + for (int64_t i1 = start2; i1 < stop2; i1++) { + std::vector tmp1; + int64_t start1 = offsets1[i1]; + int64_t stop1 = offsets1[i1 + 1]; + for (int64_t i0 = start1; i0 < stop1; i0++) { + tmp1.push_back(content[i0]); + } + tmp2.push_back(tmp1); + } + jagged3.push_back(tmp2); + } + tree3->Fill(); + } + tree3->Write(); + file3->Close(); +} + + +int main() { + float* content = new float[LENCONTENT]; + int64_t* offsets1 = new int64_t[LENOFFSETS1]; + int64_t* offsets2 = new int64_t[LENOFFSETS2]; + int64_t* offsets3 = new int64_t[LENOFFSETS3]; + + getdata(content, offsets1, offsets2, offsets3); + + ttree0(content, offsets1, offsets2, offsets3); + ttree1(content, offsets1, offsets2, offsets3); + ttree2(content, offsets1, offsets2, offsets3); + ttree3(content, offsets1, offsets2, offsets3); + + return 0; +} + +// python3 -i -c 'import uproot; from uproot import asgenobj, asdtype, STLVector; t0 = uproot.open("data/sample-jagged0.root")["jagged0"]; t1 = uproot.open("data/sample-jagged1.root")["jagged1"]; t2 = uproot.open("data/sample-jagged2.root")["jagged2"]; t3 = uproot.open("data/sample-jagged3.root")["jagged3"]' +// t0["branch"].array(entrystart=-100) +// t1["branch"].array(entrystart=-100) +// t2["branch"].array(entrystart=-100) +// t3["branch"].array(asgenobj(STLVector(STLVector(STLVector(asdtype(">f4")))), t3["branch"]._context, 6), entrystart=-100) diff --git a/studies/small-example.json b/studies/small-example.json new file mode 100644 index 0000000000..17139c77aa --- /dev/null +++ b/studies/small-example.json @@ -0,0 +1 @@ +[[[1.1, 2.2, 3.3], [], [4.4, 5.5]], [], [[6.6, 7.7], [8.8, 9.9]]] diff --git a/studies/use-rapidjson.cpp b/studies/use-rapidjson.cpp new file mode 100644 index 0000000000..c87078c1e5 --- /dev/null +++ b/studies/use-rapidjson.cpp @@ -0,0 +1,48 @@ +// c++ --std=c++11 use-rapidjson.cpp -o use-rapidjson && ./use-rapidjson + +#include +#include + +#include "../rapidjson/include/rapidjson/reader.h" +#include "../rapidjson/include/rapidjson/filereadstream.h" + +namespace rj = rapidjson; + +class MyHandler: public rj::BaseReaderHandler, MyHandler> { + public: + bool Null() { std::cout << "Null()" << std::endl; return true; } + bool Bool(bool b) { std::cout << "Bool(" << std::boolalpha << b << ")" << std::endl; return true; } + bool Int(int i) { std::cout << "Int(" << i << ")" << std::endl; return true; } + bool Uint(unsigned u) { std::cout << "Uint(" << u << ")" << std::endl; return true; } + bool Int64(int64_t i) { std::cout << "Int64(" << i << ")" << std::endl; return true; } + bool Uint64(uint64_t u) { std::cout << "Uint64(" << u << ")" << std::endl; return true; } + bool Double(double d) { std::cout << "Double(" << d << ")" << std::endl; return true; } + bool String(const char* str, rj::SizeType length, bool copy) { + std::cout << "String(" << str << ", " << length << ", " << std::boolalpha << copy << ")" << std::endl; + return true; + } + bool StartObject() { std::cout << "StartObject()" << std::endl; return true; } + bool Key(const char* str, rj::SizeType length, bool copy) { + std::cout << "Key(" << str << ", " << length << ", " << std::boolalpha << copy << ")" << std::endl; + return true; + } + bool EndObject(rj::SizeType memberCount) { std::cout << "EndObject(" << memberCount << ")" << std::endl; return true; } + bool StartArray() { std::cout << "StartArray()" << std::endl; return true; } + bool EndArray(rj::SizeType elementCount) { std::cout << "EndArray(" << elementCount << ")" << std::endl; return true; } +}; + +int main() { + const char json[] = " { \"hel\\u2012lo\" : \"wo\\u2012rld\", \"t\" : true , \"f\" : false, \"n\": null, \"i\":123, \"pi\": 3.1416, \"a\":[1, 2, 3, 4] } "; + + MyHandler handler; + rj::Reader reader; + + rj::StringStream ss(json); + // FILE* fp = fopen("small-example.json", "rb"); + // char readBuffer[65536]; + // rj::FileReadStream ss(fp, readBuffer, sizeof(readBuffer)); + + reader.Parse(ss, handler); + + return 0; +} diff --git a/studies/use-simdjson.cpp b/studies/use-simdjson.cpp new file mode 100644 index 0000000000..68ee7b4931 --- /dev/null +++ b/studies/use-simdjson.cpp @@ -0,0 +1,10 @@ +#include "../simdjson/singleheader/simdjson.h" +#include "../simdjson/singleheader/simdjson.cpp" + +using namespace simdjson; + +int main(int argc, char *argv[]) { + padded_string unparsed = get_corpus("small-example.json"); + ParsedJson parsed = build_parsed_json(unparsed); + return 0; +} diff --git a/tests/test_PR019_use_json_library.cpp b/tests/test_PR019_use_json_library.cpp new file mode 100644 index 0000000000..76c6709165 --- /dev/null +++ b/tests/test_PR019_use_json_library.cpp @@ -0,0 +1,26 @@ +// BSD 3-Clause License; see https://github.com/jpivarski/awkward-1.0/blob/master/LICENSE + +#include "awkward/Slice.h" +#include "awkward/fillable/FillableArray.h" +#include "awkward/fillable/FillableOptions.h" + +namespace ak = awkward; + +int main(int, char**) { + std::vector>> vector = + {{{0.0, 1.1, 2.2}, {}, {3.3, 4.4}}, {{5.5}}, {}, {{6.6, 7.7, 8.8, 9.9}}}; + + ak::FillableArray builder(ak::FillableOptions(1024, 2.0)); + for (auto x : vector) builder.fill(x); + std::shared_ptr array = builder.snapshot(); + + ak::Slice slice; + slice.append(ak::SliceRange(ak::Slice::none(), ak::Slice::none(), -1)); + slice.append(ak::SliceRange(ak::Slice::none(), ak::Slice::none(), 2)); + slice.append(ak::SliceRange(1, ak::Slice::none(), ak::Slice::none())); + + if (array.get()->getitem(slice).get()->tojson(false, 1) != + "[[[7.7,8.8,9.9]],[],[[]],[[1.1,2.2],[4.4]]]") + return -1; + return 0; +} diff --git a/tests/test_PR019_use_json_library.py b/tests/test_PR019_use_json_library.py new file mode 100644 index 0000000000..46c2c24191 --- /dev/null +++ b/tests/test_PR019_use_json_library.py @@ -0,0 +1,85 @@ +# BSD 3-Clause License; see https://github.com/jpivarski/awkward-1.0/blob/master/LICENSE + +import sys +import os +import json + +import pytest +import numpy + +import awkward1 + +def test_fromstring(): + a = awkward1.fromjson("[[1.1, 2.2, 3], [], [4, 5.5]]") + assert awkward1.tolist(a) == [[1.1, 2.2, 3.0], [], [4.0, 5.5]] + + with pytest.raises(ValueError): + awkward1.fromjson("[[1.1, 2.2, 3], [blah], [4, 5.5]]") + +def test_fromfile(tmp_path): + with open(os.path.join(str(tmp_path), "tmp1.json"), "w") as f: + f.write("[[1.1, 2.2, 3], [], [4, 5.5]]") + + a = awkward1.fromjson(os.path.join(str(tmp_path), "tmp1.json")) + assert awkward1.tolist(a) == [[1.1, 2.2, 3.0], [], [4.0, 5.5]] + + with pytest.raises(ValueError): + awkward1.fromjson("nonexistent.json") + + with open(os.path.join(str(tmp_path), "tmp2.json"), "w") as f: + f.write("[[1.1, 2.2, 3], []], [4, 5.5]]") + + with pytest.raises(ValueError): + awkward1.fromjson(os.path.join(str(tmp_path), "tmp2.json")) + +def test_tostring(): + content = awkward1.layout.NumpyArray(numpy.arange(2*3*5*7).reshape(-1, 7)) + offsetsA = numpy.arange(0, 2*3*5 + 5, 5) + offsetsB = numpy.arange(0, 2*3 + 3, 3) + startsA, stopsA = offsetsA[:-1], offsetsA[1:] + startsB, stopsB = offsetsB[:-1], offsetsB[1:] + + listoffsetarrayA32 = awkward1.layout.ListOffsetArray32(awkward1.layout.Index32(offsetsA), content) + listarrayA32 = awkward1.layout.ListArray32(awkward1.layout.Index32(startsA), awkward1.layout.Index32(stopsA), content) + modelA = numpy.arange(2*3*5*7).reshape(2*3, 5, 7) + + listoffsetarrayB32 = awkward1.layout.ListOffsetArray32(awkward1.layout.Index32(offsetsB), listoffsetarrayA32) + listarrayB32 = awkward1.layout.ListArray32(awkward1.layout.Index32(startsB), awkward1.layout.Index32(stopsB), listarrayA32) + modelB = numpy.arange(2*3*5*7).reshape(2, 3, 5, 7) + + assert content.tojson() == json.dumps(awkward1.tolist(content), separators=(",", ":")) + assert listoffsetarrayA32.tojson() == json.dumps(modelA.tolist(), separators=(",", ":")) + assert listoffsetarrayB32.tojson() == json.dumps(modelB.tolist(), separators=(",", ":")) + awkward1.tojson(awkward1.fromjson("[[1.1,2.2,3],[],[4,5.5]]")) == "[[1.1,2.2,3],[],[4,5.5]]" + +def test_tofile(tmp_path): + awkward1.tojson(awkward1.fromjson("[[1.1,2.2,3],[],[4,5.5]]"), os.path.join(str(tmp_path), "tmp1.json")) + + with open(os.path.join(str(tmp_path), "tmp1.json"), "r") as f: + f.read() == "[[1.1,2.2,3],[],[4,5.5]]" + +def test_root_nestedvector(): + # fromcounts([3, 2], fromcounts([1, 0, 2, 2, 1], [123, 99, 123, 99, 123, 123])) + # + + # outer offsets: [0, 3, 5] + # inner offsets: [0, 1, 1, 3, 5, 6] + + byteoffsets = awkward1.layout.Index64(numpy.array([0, 28, 52], dtype=numpy.int64)) + rawdata = awkward1.layout.NumpyArray(numpy.array([ + 0, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 99, 0, 0, 0, 123, + 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 99, 0, 0, 0, 123, 0, 0, 0, 1, 0, 0, 0, 123 + ], dtype=numpy.uint8)) + + result = awkward1.layout.fromroot_nestedvector(byteoffsets, rawdata, 2, numpy.dtype(">i").itemsize, ">i") + assert numpy.asarray(result.offsets).tolist() == [0, 3, 5] + assert numpy.asarray(result.content.offsets).tolist() == [0, 1, 1, 3, 5, 6] + assert numpy.asarray(result.content.content).tolist() == [123, 99, 123, 99, 123, 123] + assert awkward1.tolist(result) == [[[123], [], [99, 123]], [[99, 123], [123]]] + +def test_fromiter(): + assert awkward1.tolist(awkward1.fromiter([True, True, False, False, True])) == [True, True, False, False, True] + assert awkward1.tolist(awkward1.fromiter([5, 4, 3, 2, 1])) == [5, 4, 3, 2, 1] + assert awkward1.tolist(awkward1.fromiter([5, 4, 3.14, 2.22, 1.23])) == [5.0, 4.0, 3.14, 2.22, 1.23] + assert awkward1.tolist(awkward1.fromiter([[1.1, 2.2, 3.3], [], [4.4, 5.5]])) == [[1.1, 2.2, 3.3], [], [4.4, 5.5]] + assert awkward1.tolist(awkward1.fromiter([[[1.1, 2.2, 3.3], []], [[4.4, 5.5]], [], [[6.6], [7.7, 8.8, 9.9]]])) == [[[1.1, 2.2, 3.3], []], [[4.4, 5.5]], [], [[6.6], [7.7, 8.8, 9.9]]]