From ec09c74e5c84a2d8cedc1164e4cfda3c31559bf2 Mon Sep 17 00:00:00 2001 From: Andrew Stein Date: Sun, 4 Oct 2020 17:37:36 -0400 Subject: [PATCH] Use arrow C++ for CSV parsing in JS --- cmake/arrow.txt.in | 2 +- cmake/arrow/CMakeLists.txt | 31 +- cmake/arrow/config.h | 6 +- cmake/modules/FindFlatbuffers.cmake | 1 - cmake/modules/FindPyArrow.cmake | 29 +- cpp/perspective/CMakeLists.txt | 12 +- cpp/perspective/src/cpp/arrow_csv.cpp | 60 ++ cpp/perspective/src/cpp/arrow_loader.cpp | 162 +++-- cpp/perspective/src/cpp/arrow_writer.cpp | 18 + cpp/perspective/src/cpp/emscripten.cpp | 89 ++- .../vendor/arrow_single_threaded_reader.cpp | 623 ++++++++++++++++++ cpp/perspective/src/cpp/view.cpp | 26 +- .../src/include/perspective/arrow_csv.h | 28 + .../src/include/perspective/arrow_loader.h | 17 +- .../src/include/perspective/binding.h | 1 + .../vendor/arrow_single_threaded_reader.h | 61 ++ .../test/results/linux.docker.json | 234 +++---- .../test/results/linux.docker.json | 6 +- .../test/results/linux.docker.json | 46 +- packages/perspective/README.md | 3 - packages/perspective/package.json | 4 +- packages/perspective/src/js/perspective.js | 40 +- .../perspective/src/js/view_formatters.js | 31 +- scripts/bench.js | 2 +- 24 files changed, 1269 insertions(+), 263 deletions(-) create mode 100644 cpp/perspective/src/cpp/arrow_csv.cpp create mode 100644 cpp/perspective/src/cpp/vendor/arrow_single_threaded_reader.cpp create mode 100644 cpp/perspective/src/include/perspective/arrow_csv.h create mode 100644 cpp/perspective/src/include/perspective/vendor/arrow_single_threaded_reader.h diff --git a/cmake/arrow.txt.in b/cmake/arrow.txt.in index a8ec274da3..208d4a1eb2 100644 --- a/cmake/arrow.txt.in +++ b/cmake/arrow.txt.in @@ -5,7 +5,7 @@ project(arrow-download NONE) include(ExternalProject) ExternalProject_Add(apachearrow GIT_REPOSITORY https://github.com/apache/arrow.git - GIT_TAG apache-arrow-0.16.0 + GIT_TAG apache-arrow-1.0.1 SOURCE_DIR "${CMAKE_BINARY_DIR}/arrow-src" BINARY_DIR "${CMAKE_BINARY_DIR}/arrow-build" CONFIGURE_COMMAND "" diff --git a/cmake/arrow/CMakeLists.txt b/cmake/arrow/CMakeLists.txt index ce6d131c3a..51c11ac7f9 100644 --- a/cmake/arrow/CMakeLists.txt +++ b/cmake/arrow/CMakeLists.txt @@ -1,9 +1,14 @@ set(CMAKE_SHARED_LIBRARY_SUFFIX .so) set(ARROW_SRCS - ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/builder.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/pretty_print.cc + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/array_base.cc + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/array_binary.cc + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/array_decimal.cc + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/array_dict.cc + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/array_nested.cc + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/array_primitive.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/builder_adaptive.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/builder_base.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/builder_binary.cc @@ -13,11 +18,14 @@ set(ARROW_SRCS ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/builder_primitive.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/builder_union.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/concatenate.cc - ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/dict_internal.cc + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/data.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/diff.cc + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/util.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/validate.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/buffer.cc + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/chunked_array.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/compare.cc + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/device.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/extension_type.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/memory_pool.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/pretty_print.cc @@ -34,9 +42,9 @@ set(ARROW_SRCS ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/csv/converter.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/csv/chunker.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/csv/column_builder.cc + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/csv/column_decoder.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/csv/options.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/csv/parser.cc - ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/csv/reader.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/filesystem/filesystem.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/filesystem/localfs.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/filesystem/mockfs.cc @@ -50,17 +58,22 @@ set(ARROW_SRCS ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/json/reader.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/io/buffered.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/io/compressed.cc - ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/io/file.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/io/interfaces.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/io/memory.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/testing/util.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/basic_decimal.cc + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/bit_block_counter.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/bit_util.cc + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/bitmap_builders.cc + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/bitmap_ops.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/compression.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/cpu_info.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/decimal.cc + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/future.cc + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/delimiting.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/int_util.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/io_util.cc + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/iterator.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/logging.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/key_value_metadata.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/memory.cc @@ -70,11 +83,17 @@ set(ARROW_SRCS ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/thread_pool.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/trie.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/utf8.cc + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/value_parsing.cc + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/vendored/double-conversion/double-conversion.cc + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/vendored/double-conversion/cached-powers.cc + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/vendored/double-conversion/diy-fp.cc + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/vendored/double-conversion/bignum.cc + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/vendored/double-conversion/strtod.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/vendored/datetime/tz.cpp ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/dictionary.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/feather.cc - ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/json_integration.cc - ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/json_internal.cc + # ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/json_integration.cc + # ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/json_internal.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/json_simple.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/message.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/metadata_internal.cc diff --git a/cmake/arrow/config.h b/cmake/arrow/config.h index 2d63504e59..33ccf6f1cd 100644 --- a/cmake/arrow/config.h +++ b/cmake/arrow/config.h @@ -15,9 +15,9 @@ // specific language governing permissions and limitations // under the License. -#define ARROW_VERSION_MAJOR 0 -#define ARROW_VERSION_MINOR 16 -#define ARROW_VERSION_PATCH 0 +#define ARROW_VERSION_MAJOR 1 +#define ARROW_VERSION_MINOR 0 +#define ARROW_VERSION_PATCH 1 #define ARROW_VERSION ((ARROW_VERSION_MAJOR * 1000) + ARROW_VERSION_MINOR) * 1000 + ARROW_VERSION_PATCH /* #undef DOUBLE_CONVERSION_HAS_CASE_INSENSIBILITY */ diff --git a/cmake/modules/FindFlatbuffers.cmake b/cmake/modules/FindFlatbuffers.cmake index 2f7b4d7278..42cfe75438 100644 --- a/cmake/modules/FindFlatbuffers.cmake +++ b/cmake/modules/FindFlatbuffers.cmake @@ -42,7 +42,6 @@ if(NOT ${FLATBUFFERS_INCLUDE_DIR}) set(FLATBUFFERS_INCLUDE_DIR /usr/local/include) endif() -message("${FLATBUFFERS_COMPILER}") include(FindPackageHandleStandardArgs) find_package_handle_standard_args(FLATBUFFERS REQUIRED_VARS FLATBUFFERS_INCLUDE_DIR FLATBUFFERS_COMPILER) \ No newline at end of file diff --git a/cmake/modules/FindPyArrow.cmake b/cmake/modules/FindPyArrow.cmake index 5b978ea829..743331c1eb 100644 --- a/cmake/modules/FindPyArrow.cmake +++ b/cmake/modules/FindPyArrow.cmake @@ -24,7 +24,7 @@ execute_process( "from __future__ import print_function\ntry: import pyarrow; print(' '.join(pyarrow.get_libraries()), end='')\nexcept:pass" OUTPUT_VARIABLE __pyarrow_libraries) -# And the version + # And the version execute_process( COMMAND "${Python_EXECUTABLE}" -c "from __future__ import print_function\ntry: import pyarrow; print(pyarrow.__version__, end='')\nexcept:pass" @@ -45,23 +45,38 @@ if(${CMAKE_SYSTEM_NAME} MATCHES "Windows") # windows its just "arrow.dll" set(PYTHON_PYARROW_PYTHON_SHARED_LIBRARY "arrow_python") set(PYTHON_PYARROW_ARROW_SHARED_LIBRARY "arrow") - set(PYTHON_PYARROW_LIBRARIES ${PYTHON_PYARROW_PYTHON_SHARED_LIBRARY} ${PYTHON_PYARROW_ARROW_SHARED_LIBRARY}) +elseif (CMAKE_SYSTEM_NAME MATCHES "Darwin" AND ${PYARROW_VERSION_MAJOR} EQUAL "1") + # Link against pre-built libarrow on MacOS + set(PYTHON_PYARROW_PYTHON_SHARED_LIBRARY ${PYTHON_PYARROW_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}arrow_python.100.dylib) + set(PYTHON_PYARROW_ARROW_SHARED_LIBRARY ${PYTHON_PYARROW_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}arrow.100.dylib) elseif (CMAKE_SYSTEM_NAME MATCHES "Darwin") # Link against pre-built libarrow on MacOS set(PYTHON_PYARROW_PYTHON_SHARED_LIBRARY ${PYTHON_PYARROW_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}arrow_python.${PYARROW_VERSION_MINOR}.dylib) set(PYTHON_PYARROW_ARROW_SHARED_LIBRARY ${PYTHON_PYARROW_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}arrow.${PYARROW_VERSION_MINOR}.dylib) - set(PYTHON_PYARROW_LIBRARIES ${PYTHON_PYARROW_PYTHON_SHARED_LIBRARY} ${PYTHON_PYARROW_ARROW_SHARED_LIBRARY}) -else() +elseif (${PYARROW_VERSION_MAJOR} EQUAL "1") # linux - set(PYTHON_PYARROW_PYTHON_SHARED_LIBRARY ${CMAKE_SHARED_LIBRARY_PREFIX}arrow_python${CMAKE_SHARED_LIBRARY_SUFFIX}.${PYARROW_VERSION_MINOR}) - set(PYTHON_PYARROW_ARROW_SHARED_LIBRARY ${CMAKE_SHARED_LIBRARY_PREFIX}arrow${CMAKE_SHARED_LIBRARY_SUFFIX}.${PYARROW_VERSION_MINOR}) - set(PYTHON_PYARROW_LIBRARIES ${PYTHON_PYARROW_PYTHON_SHARED_LIBRARY} ${PYTHON_PYARROW_ARROW_SHARED_LIBRARY}) + set(PYTHON_PYARROW_PYTHON_SHARED_LIBRARY ${PYTHON_PYARROW_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}arrow_python${CMAKE_SHARED_LIBRARY_SUFFIX}.100) + set(PYTHON_PYARROW_ARROW_SHARED_LIBRARY ${PYTHON_PYARROW_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}arrow${CMAKE_SHARED_LIBRARY_SUFFIX}.100) +else() + set(PYTHON_PYARROW_PYTHON_SHARED_LIBRARY ${PYTHON_PYARROW_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}arrow_python${CMAKE_SHARED_LIBRARY_SUFFIX}.${PYARROW_VERSION_MINOR}) + set(PYTHON_PYARROW_ARROW_SHARED_LIBRARY ${PYTHON_PYARROW_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}arrow${CMAKE_SHARED_LIBRARY_SUFFIX}.${PYARROW_VERSION_MINOR}) endif() +set(PYTHON_PYARROW_LIBRARIES ${PYTHON_PYARROW_PYTHON_SHARED_LIBRARY} ${PYTHON_PYARROW_ARROW_SHARED_LIBRARY}) + if(PYTHON_PYARROW_INCLUDE_DIR AND PYTHON_PYARROW_LIBRARIES) set(PYTHON_PYARROW_FOUND 1 CACHE INTERNAL "Python pyarrow found") endif() + +# set(PYTHON_PYARROW_LIBRARIES ${PYTHON_PYARROW_PYTHON_SHARED_LIBRARY} ${PYTHON_PYARROW_ARROW_SHARED_LIBRARY}) +# else() +# # linux +# set(PYTHON_PYARROW_PYTHON_SHARED_LIBRARY ${CMAKE_SHARED_LIBRARY_PREFIX}arrow_python${CMAKE_SHARED_LIBRARY_SUFFIX}.${PYARROW_VERSION_MINOR}) +# set(PYTHON_PYARROW_ARROW_SHARED_LIBRARY ${CMAKE_SHARED_LIBRARY_PREFIX}arrow${CMAKE_SHARED_LIBRARY_SUFFIX}.${PYARROW_VERSION_MINOR}) + + + include(FindPackageHandleStandardArgs) find_package_handle_standard_args(PyArrow REQUIRED_VARS PYTHON_PYARROW_INCLUDE_DIR PYTHON_PYARROW_LIBRARIES PYTHON_PYARROW_LIBRARY_DIR VERSION_VAR __pyarrow_version) diff --git a/cpp/perspective/CMakeLists.txt b/cpp/perspective/CMakeLists.txt index 19e74d1b47..383eb5299d 100644 --- a/cpp/perspective/CMakeLists.txt +++ b/cpp/perspective/CMakeLists.txt @@ -548,7 +548,13 @@ set (SOURCE_FILES set(PYTHON_SOURCE_FILES ${SOURCE_FILES} ${PSP_PYTHON_SRC}/src/column.cpp - ) +) + +set(WASM_SOURCE_FILES ${SOURCE_FILES} + ${PSP_CPP_SRC}/src/cpp/arrow_csv.cpp + ${PSP_CPP_SRC}/src/cpp/vendor/arrow_single_threaded_reader.cpp +) + set (PYTHON_BINDING_SOURCE_FILES ${PSP_PYTHON_SRC}/src/accessor.cpp @@ -570,7 +576,7 @@ else() endif() if (PSP_WASM_BUILD) - add_library(psp ${SOURCE_FILES}) + add_library(psp ${WASM_SOURCE_FILES}) target_compile_definitions(psp PRIVATE PSP_ENABLE_WASM=1) set_target_properties(psp PROPERTIES COMPILE_FLAGS "${ASYNC_MODE_FLAGS}") target_link_libraries(psp arrow) @@ -663,7 +669,7 @@ elseif(PSP_CPP_BUILD OR PSP_PYTHON_BUILD) endif() ######################## else() - add_library(psp SHARED ${SOURCE_FILES}) + add_library(psp SHARED ${WASM_SOURCE_FILES}) # Link perspective against custom-built minimal arrow target_link_libraries(psp arrow) diff --git a/cpp/perspective/src/cpp/arrow_csv.cpp b/cpp/perspective/src/cpp/arrow_csv.cpp new file mode 100644 index 0000000000..2df5e0a296 --- /dev/null +++ b/cpp/perspective/src/cpp/arrow_csv.cpp @@ -0,0 +1,60 @@ +/****************************************************************************** + * + * Copyright (c) 2019, the Perspective Authors. + * + * This file is part of the Perspective library, distributed under the terms of + * the Apache License 2.0. The full license can be found in the LICENSE file. + * + */ + +#include +#include +#include +#include + +// This causes build warnings +// https://github.com/emscripten-core/emscripten/issues/8574 +#include + +namespace perspective { +namespace apachearrow { + + std::shared_ptr<::arrow::Table> + csvToTable(std::string& csv, bool is_update, + std::unordered_map>& + schema) { + arrow::MemoryPool* pool = arrow::default_memory_pool(); + auto input = std::make_shared(csv); + auto read_options = arrow::csv::ReadOptions::Defaults(); + auto parse_options = arrow::csv::ParseOptions::Defaults(); + auto convert_options = arrow::csv::ConvertOptions::Defaults(); + + read_options.use_threads = false; + convert_options.timestamp_parsers + = std::vector>{ + arrow::TimestampParser::MakeISO8601(), + arrow::TimestampParser::MakeStrptime("%Y-%m-%d\\D%H:%M:%S.%f"), + arrow::TimestampParser::MakeStrptime("%m-%d-%Y"), + arrow::TimestampParser::MakeStrptime("%m/%d/%Y"), + arrow::TimestampParser::MakeStrptime("%d %m %Y"), + arrow::TimestampParser::MakeStrptime("%H:%M:%S.%f"), + }; + + if (is_update) { + convert_options.column_types = std::move(schema); + } + + auto maybe_reader = arrow::csv::TableReader::Make( + pool, input, read_options, parse_options, convert_options); + + std::shared_ptr reader = *maybe_reader; + + auto maybe_table = reader->Read(); + if (!maybe_table.ok()) { + PSP_COMPLAIN_AND_ABORT(maybe_table.status().ToString()); + } + return *maybe_table; + } + +} // namespace apachearrow +} // namespace perspective \ No newline at end of file diff --git a/cpp/perspective/src/cpp/arrow_loader.cpp b/cpp/perspective/src/cpp/arrow_loader.cpp index 51ba586c99..bc97702247 100644 --- a/cpp/perspective/src/cpp/arrow_loader.cpp +++ b/cpp/perspective/src/cpp/arrow_loader.cpp @@ -12,6 +12,104 @@ namespace perspective { namespace apachearrow { + void + load_stream(const uintptr_t ptr, const uint32_t length, std::shared_ptr& table) { + arrow::io::BufferReader buffer_reader(reinterpret_cast(ptr), length); +#if ARROW_VERSION_MAJOR < 1 + std::shared_ptr batch_reader; + arrow::Status status = arrow::ipc::RecordBatchStreamReader::Open(&buffer_reader, &batch_reader); + if (!status.ok()) { + std::stringstream ss; + ss << "Failed to open RecordBatchStreamReader: " << status.message() << std::endl; + PSP_COMPLAIN_AND_ABORT(ss.str()); + } else { + status = batch_reader->ReadAll(&table); + if (!status.ok()) { + std::stringstream ss; + ss << "Failed to read stream record batch: " << status.message() << std::endl; + PSP_COMPLAIN_AND_ABORT(ss.str()); + }; + } +#else + auto status = arrow::ipc::RecordBatchStreamReader::Open(&buffer_reader); + if (!status.ok()) { + std::stringstream ss; + ss << "Failed to open RecordBatchStreamReader: " << status.status().ToString() << std::endl; + PSP_COMPLAIN_AND_ABORT(ss.str()); + } else { + auto batch_reader = *status; + auto status5 = batch_reader->ReadAll(&table); + if (!status5.ok()) { + std::stringstream ss; + ss << "Failed to read stream record batch: " << status5.ToString() << std::endl; + PSP_COMPLAIN_AND_ABORT(ss.str()); + }; + } +#endif + } + + void + load_file(const uintptr_t ptr, const uint32_t length, std::shared_ptr& table) { + arrow::io::BufferReader buffer_reader(reinterpret_cast(ptr), length); +#if ARROW_VERSION_MAJOR < 1 + std::shared_ptr batch_reader; + arrow::Status status = arrow::ipc::RecordBatchFileReader::Open(&buffer_reader, &batch_reader); + if (!status.ok()) { + std::stringstream ss; + ss << "Failed to open RecordBatchFileReader: " << status.message() << std::endl; + PSP_COMPLAIN_AND_ABORT(ss.str()); + } else { + std::vector> batches; + auto num_batches = batch_reader->num_record_batches(); + for (int i = 0; i < num_batches; ++i) { + std::shared_ptr chunk; + status = batch_reader->ReadRecordBatch(i, &chunk); + if (!status.ok()) { + PSP_COMPLAIN_AND_ABORT( + "Failed to read file record batch: " + status.message()); + } + batches.push_back(chunk); + } + status = arrow::Table::FromRecordBatches(batches, &table); + if (!status.ok()) { + std::stringstream ss; + ss << "Failed to create Table from RecordBatches: " + << status.message() << std::endl; + PSP_COMPLAIN_AND_ABORT(ss.str()); + }; + }; +#else + auto status = arrow::ipc::RecordBatchFileReader::Open(&buffer_reader); + if (!status.ok()) { + std::stringstream ss; + ss << "Failed to open RecordBatchFileReader: " << status.status().ToString() << std::endl; + PSP_COMPLAIN_AND_ABORT(ss.str()); + } else { + std::shared_ptr batch_reader = *status; + std::vector> batches; + auto num_batches = batch_reader->num_record_batches(); + for (int i = 0; i < num_batches; ++i) { + + auto status2 = batch_reader->ReadRecordBatch(i); + if (!status2.ok()) { + PSP_COMPLAIN_AND_ABORT( + "Failed to read file record batch: " + status2.status().ToString()); + } + std::shared_ptr chunk = *status2; + batches.push_back(chunk); + } + auto status3 = arrow::Table::FromRecordBatches(batches); + if (!status3.ok()) { + std::stringstream ss; + ss << "Failed to create Table from RecordBatches: " + << status3.status().ToString() << std::endl; + PSP_COMPLAIN_AND_ABORT(ss.str()); + }; + table = *status3; + }; +#endif + } + using namespace perspective; ArrowLoader::ArrowLoader() {} @@ -47,6 +145,8 @@ namespace apachearrow { return DTYPE_TIME; } else if (src == "date32" || src == "date64") { return DTYPE_DATE; + } else if (src == "null") { + return DTYPE_STR; } std::stringstream ss; ss << "Could not load arrow column of type `" << src << "`" << std::endl; @@ -58,47 +158,9 @@ namespace apachearrow { ArrowLoader::initialize(const uintptr_t ptr, const uint32_t length) { arrow::io::BufferReader buffer_reader(reinterpret_cast(ptr), length); if (std::memcmp("ARROW1", (const void *)ptr, 6) == 0) { - std::shared_ptr batch_reader; - arrow::Status status = arrow::ipc::RecordBatchFileReader::Open(&buffer_reader, &batch_reader); - if (!status.ok()) { - std::stringstream ss; - ss << "Failed to open RecordBatchFileReader: " << status.message() << std::endl; - PSP_COMPLAIN_AND_ABORT(ss.str()); - } else { - std::vector> batches; - auto num_batches = batch_reader->num_record_batches(); - for (int i = 0; i < num_batches; ++i) { - std::shared_ptr chunk; - status = batch_reader->ReadRecordBatch(i, &chunk); - if (!status.ok()) { - PSP_COMPLAIN_AND_ABORT( - "Failed to read file record batch: " + status.message()); - } - batches.push_back(chunk); - } - status = arrow::Table::FromRecordBatches(batches, &m_table); - if (!status.ok()) { - std::stringstream ss; - ss << "Failed to create Table from RecordBatches: " - << status.message() << std::endl; - PSP_COMPLAIN_AND_ABORT(ss.str()); - }; - }; + load_file(ptr, length, m_table); } else { - std::shared_ptr batch_reader; - arrow::Status status = arrow::ipc::RecordBatchStreamReader::Open(&buffer_reader, &batch_reader); - if (!status.ok()) { - std::stringstream ss; - ss << "Failed to open RecordBatchStreamReader: " << status.message() << std::endl; - PSP_COMPLAIN_AND_ABORT(ss.str()); - } else { - status = batch_reader->ReadAll(&m_table); - if (!status.ok()) { - std::stringstream ss; - ss << "Failed to read stream record batch: " << status.message() << std::endl; - PSP_COMPLAIN_AND_ABORT(ss.str()); - }; - } + load_stream(ptr, length, m_table); } std::shared_ptr schema = m_table->schema(); @@ -110,6 +172,21 @@ namespace apachearrow { } } +#ifdef PSP_ENABLE_WASM + void + ArrowLoader::init_csv(std::string& csv, bool is_update, std::unordered_map>& psp_schema) { + m_table = csvToTable(csv, is_update, psp_schema); + + std::shared_ptr schema = m_table->schema(); + std::vector> fields = schema->fields(); + + for (auto field : fields) { + m_names.push_back(field->name()); + m_types.push_back(convert_type(field->type()->name())); + } + } +#endif + void ArrowLoader::fill_table( t_data_table& tbl, @@ -383,6 +460,11 @@ namespace apachearrow { dest->set_nth(offset + i, v); } } break; + case arrow::NullType::type_id: { + for (uint32_t i = 0; i < len; ++i) { + dest->set_valid(i, false); + } + } break; default: { std::stringstream ss; std::string arrow_type = src->type()->ToString(); diff --git a/cpp/perspective/src/cpp/arrow_writer.cpp b/cpp/perspective/src/cpp/arrow_writer.cpp index 043ae11a94..334f38dd4c 100644 --- a/cpp/perspective/src/cpp/arrow_writer.cpp +++ b/cpp/perspective/src/cpp/arrow_writer.cpp @@ -263,11 +263,29 @@ namespace apachearrow { auto dictionary_type = arrow::dictionary(arrow::int32(), arrow::utf8()); +#if ARROW_VERSION_MAJOR < 1 std::shared_ptr dictionary_array; PSP_CHECK_ARROW_STATUS(arrow::DictionaryArray::FromArrays( dictionary_type, indices_array, values_array, &dictionary_array)); return dictionary_array; +#else + arrow::Result> result = arrow::DictionaryArray::FromArrays( + dictionary_type, + indices_array, + values_array + ); + + if (!result.ok()) { + std::stringstream ss; + ss << "Could not write values for dictionary array: " + << result.status().message() + << std::endl; + PSP_COMPLAIN_AND_ABORT(ss.str()); + } + + return *result; +#endif } } // namespace arrow diff --git a/cpp/perspective/src/cpp/emscripten.cpp b/cpp/perspective/src/cpp/emscripten.cpp index 92ece3faf2..d80ec6e60e 100644 --- a/cpp/perspective/src/cpp/emscripten.cpp +++ b/cpp/perspective/src/cpp/emscripten.cpp @@ -10,6 +10,7 @@ #include #include #include +#include using namespace emscripten; using namespace perspective; @@ -1021,6 +1022,7 @@ namespace binding { t_op op, bool is_update, bool is_arrow, + bool is_csv, t_uindex port_id) { bool table_initialized = has_value(table); std::shared_ptr pool; @@ -1044,24 +1046,79 @@ namespace binding { // Determine metadata bool is_delete = op == OP_DELETE; + if (is_arrow && !is_delete) { - t_val constructor = accessor["constructor"]; - std::int32_t length = accessor["byteLength"].as(); - - // Allocate memory - ptr = reinterpret_cast(malloc(length)); - if (ptr == NULL) { - std::cout << "Unable to load arrow of size 0" << std::endl; - return nullptr; - } + if (is_csv) { + std::string s = accessor.as(); + auto map = std::unordered_map>(); + if (is_update) { + auto gnode_output_schema = gnode->get_output_schema(); + auto schema = gnode_output_schema.drop({"psp_okey"}); + auto column_names = schema.columns(); + auto data_types = schema.types(); + + for (auto idx = 0; idx < column_names.size(); ++idx) { + const std::string& name = column_names[idx]; + const t_dtype& type = data_types[idx]; + switch (type) { + case DTYPE_FLOAT32: + map[name] = std::make_shared(); + break; + case DTYPE_FLOAT64: + map[name] = std::make_shared(); + break; + case DTYPE_STR: + map[name] = std::make_shared(); + break; + case DTYPE_BOOL: + map[name] = std::make_shared(); + break; + case DTYPE_UINT32: + map[name] = std::make_shared(); + break; + case DTYPE_UINT64: + map[name] = std::make_shared(); + break; + case DTYPE_INT32: + map[name] = std::make_shared(); + break; + case DTYPE_INT64: + map[name] = std::make_shared(); + break; + case DTYPE_TIME: + map[name] = std::make_shared(); + break; + case DTYPE_DATE: + map[name] = std::make_shared(); + break; + default: + std::stringstream ss; + ss << "Error loading arrow type " << dtype_to_str(type) << " for column " << name << std::endl; + PSP_COMPLAIN_AND_ABORT(ss.str()) + break; + } + } + } + arrow_loader.init_csv(s, is_update, map); + } else { + t_val constructor = accessor["constructor"]; + std::int32_t length = accessor["byteLength"].as(); + + // Allocate memory + ptr = reinterpret_cast(malloc(length)); + if (ptr == NULL) { + std::cout << "Unable to load arrow of size 0" << std::endl; + return nullptr; + } - // Write to the C++ heap where we allocated the space - t_val memory = t_val::module_property("HEAP8")["buffer"]; - t_val memoryView = constructor.new_(memory, ptr, length); - memoryView.call("set", accessor); + // Write to the C++ heap where we allocated the space + t_val memory = t_val::module_property("HEAP8")["buffer"]; + t_val memoryView = constructor.new_(memory, ptr, length); + memoryView.call("set", accessor); - // Parse the arrow and get its metadata - arrow_loader.initialize(ptr, length); + // Parse the arrow and get its metadata + arrow_loader.initialize(ptr, length); + } // Always use the `Table` column names and data types on up if (table_initialized && is_update) { @@ -1167,7 +1224,7 @@ namespace binding { _fill_data(data_table, accessor, input_schema, index, offset, limit, is_update); } - if (is_arrow) { + if (is_arrow && !is_csv) { free((void *)ptr); } diff --git a/cpp/perspective/src/cpp/vendor/arrow_single_threaded_reader.cpp b/cpp/perspective/src/cpp/vendor/arrow_single_threaded_reader.cpp new file mode 100644 index 0000000000..898a17280c --- /dev/null +++ b/cpp/perspective/src/cpp/vendor/arrow_single_threaded_reader.cpp @@ -0,0 +1,623 @@ +/****************************************************************************** + * + * Copyright (c) 2019, the Perspective Authors. + * + * This file is part of the Perspective library, distributed under the terms of + * the Apache License 2.0. The full license can be found in the LICENSE file. + * + * Originally forked from + * https://github.com/apache/arrow/blob/apache-arrow-1.0.1/cpp/src/arrow/csv/reader.cc + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* * * * WARNING * * * + * + * This file and respective header is a fork of + * https://github.com/apache/arrow/blob/apache-arrow-1.0.1/cpp/src/arrow/csv/reader.cc + * which removes references to `std::thread` such that compilation under + * Emscripten is possible. It should not be modified directly. + * + * TODO Pending a better solution or upstream fix .. + * +*/ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "arrow/array.h" +#include "arrow/buffer.h" +#include "arrow/csv/chunker.h" +#include "arrow/csv/column_builder.h" +#include "arrow/csv/column_decoder.h" +#include "arrow/csv/options.h" +#include "arrow/csv/parser.h" +#include "arrow/io/interfaces.h" +#include "arrow/result.h" +#include "arrow/status.h" +#include "arrow/table.h" +#include "arrow/type.h" +#include "arrow/util/iterator.h" +#include "arrow/util/logging.h" +#include "arrow/util/macros.h" +#include "arrow/util/optional.h" +#include "arrow/util/task_group.h" +#include "arrow/util/utf8.h" + +namespace arrow { + +class MemoryPool; + +namespace io { + + class InputStream; + +} // namespace io + +namespace csv { + + // using internal::GetCpuThreadPool; + // using internal::ThreadPool; + + struct ConversionSchema { + struct Column { + std::string name; + // Physical column index in CSV file + int32_t index; + // If true, make a column of nulls + bool is_missing; + // If set, convert the CSV column to this type + // If unset (and is_missing is false), infer the type from the CSV + // column + std::shared_ptr type; + }; + + static Column + NullColumn(std::string col_name, std::shared_ptr type) { + return Column{std::move(col_name), -1, true, std::move(type)}; + } + + static Column + TypedColumn(std::string col_name, int32_t col_index, + std::shared_ptr type) { + return Column{ + std::move(col_name), col_index, false, std::move(type)}; + } + + static Column + InferredColumn(std::string col_name, int32_t col_index) { + return Column{std::move(col_name), col_index, false, nullptr}; + } + + std::vector columns; + }; + + // An iterator of Buffers that makes sure there is no straddling CRLF + // sequence. + class CSVBufferIterator { + public: + explicit CSVBufferIterator( + Iterator> buffer_iterator) + : buffer_iterator_(std::move(buffer_iterator)) {} + + static Iterator> + Make(Iterator> buffer_iterator) { + CSVBufferIterator it(std::move(buffer_iterator)); + return Iterator>(std::move(it)); + } + + Result> + Next() { + ARROW_ASSIGN_OR_RAISE(auto buf, buffer_iterator_.Next()); + if (buf == nullptr) { + // EOF + return nullptr; + } + + int64_t offset = 0; + if (first_buffer_) { + ARROW_ASSIGN_OR_RAISE( + auto data, util::SkipUTF8BOM(buf->data(), buf->size())); + offset += data - buf->data(); + DCHECK_GE(offset, 0); + first_buffer_ = false; + } + + if (trailing_cr_ && buf->data()[offset] == '\n') { + // Skip '\r\n' line separator that started at the end of + // previous buffer + ++offset; + } + + trailing_cr_ = (buf->data()[buf->size() - 1] == '\r'); + buf = SliceBuffer(buf, offset); + if (buf->size() == 0) { + // EOF + return nullptr; + } else { + return buf; + } + } + + protected: + Iterator> buffer_iterator_; + bool first_buffer_ = true; + // Whether there was a trailing CR at the end of last received buffer + bool trailing_cr_ = false; + }; + + struct CSVBlock { + // (partial + completion + buffer) is an entire delimited CSV buffer. + std::shared_ptr partial; + std::shared_ptr completion; + std::shared_ptr buffer; + int64_t block_index; + bool is_final; + std::function consume_bytes; + }; + + class BlockReader { + public: + BlockReader(std::unique_ptr chunker, + Iterator> buffer_iterator, + std::shared_ptr first_buffer) + : chunker_(std::move(chunker)) + , buffer_iterator_(std::move(buffer_iterator)) + , partial_(std::make_shared("")) + , buffer_(std::move(first_buffer)) {} + + protected: + std::unique_ptr chunker_; + Iterator> buffer_iterator_; + + std::shared_ptr partial_, buffer_; + int64_t block_index_ = 0; + // Whether there was a trailing CR at the end of last received buffer + bool trailing_cr_ = false; + }; + + // An object that reads delimited CSV blocks for serial use. + // The number of bytes consumed should be notified after each read, + // using CSVBlock::consume_bytes. + class SerialBlockReader : public BlockReader { + public: + using BlockReader::BlockReader; + + Result> + Next() { + if (buffer_ == nullptr) { + // EOF + return util::optional(); + } + + std::shared_ptr next_buffer, completion; + ARROW_ASSIGN_OR_RAISE(next_buffer, buffer_iterator_.Next()); + bool is_final = (next_buffer == nullptr); + + if (is_final) { + // End of file reached => compute completion from penultimate + // block + RETURN_NOT_OK(chunker_->ProcessFinal( + partial_, buffer_, &completion, &buffer_)); + } else { + // Get completion of partial from previous block. + RETURN_NOT_OK(chunker_->ProcessWithPartial( + partial_, buffer_, &completion, &buffer_)); + } + int64_t bytes_before_buffer = partial_->size() + completion->size(); + + auto consume_bytes = [this, bytes_before_buffer, next_buffer]( + int64_t nbytes) -> Status { + DCHECK_GE(nbytes, 0); + auto offset = nbytes - bytes_before_buffer; + if (offset < 0) { + // Should not happen + return Status::Invalid( + "CSV parser got out of sync with chunker"); + } + partial_ = SliceBuffer(buffer_, offset); + buffer_ = next_buffer; + return Status::OK(); + }; + + return CSVBlock{partial_, completion, buffer_, block_index_++, + is_final, std::move(consume_bytes)}; + } + }; + + + ///////////////////////////////////////////////////////////////////////// + // Base class for common functionality + + class ReaderMixin { + public: + ReaderMixin(MemoryPool* pool, std::shared_ptr input, + const ReadOptions& read_options, const ParseOptions& parse_options, + const ConvertOptions& convert_options) + : pool_(pool) + , read_options_(read_options) + , parse_options_(parse_options) + , convert_options_(convert_options) + , input_(std::move(input)) {} + + protected: + // Read header and column names from buffer, create column builders + Status + ProcessHeader( + const std::shared_ptr& buf, std::shared_ptr* rest) { + const uint8_t* data = buf->data(); + const auto data_end = data + buf->size(); + DCHECK_GT(data_end - data, 0); + + if (read_options_.skip_rows) { + // Skip initial rows (potentially invalid CSV data) + auto num_skipped_rows + = SkipRows(data, static_cast(data_end - data), + read_options_.skip_rows, &data); + if (num_skipped_rows < read_options_.skip_rows) { + return Status::Invalid("Could not skip initial ", + read_options_.skip_rows, + " rows from CSV file, " + "either file is too short or header is larger than " + "block size"); + } + } + + if (read_options_.column_names.empty()) { + // Parse one row (either to read column names or to know the + // number of columns) + BlockParser parser(pool_, parse_options_, num_csv_cols_, 1); + uint32_t parsed_size = 0; + RETURN_NOT_OK(parser.Parse( + util::string_view( + reinterpret_cast(data), data_end - data), + &parsed_size)); + if (parser.num_rows() != 1) { + return Status::Invalid( + "Could not read first row from CSV file, either " + "file is too short or header is larger than block " + "size"); + } + if (parser.num_cols() == 0) { + return Status::Invalid("No columns in CSV file"); + } + + if (read_options_.autogenerate_column_names) { + column_names_ = GenerateColumnNames(parser.num_cols()); + } else { + // Read column names from header row + auto visit = [&](const uint8_t* data, uint32_t size, + bool quoted) -> Status { + column_names_.emplace_back( + reinterpret_cast(data), size); + return Status::OK(); + }; + RETURN_NOT_OK(parser.VisitLastRow(visit)); + DCHECK_EQ(static_cast(parser.num_cols()), + column_names_.size()); + // Skip parsed header row + data += parsed_size; + } + } else { + column_names_ = read_options_.column_names; + } + *rest = SliceBuffer(buf, data - buf->data()); + + num_csv_cols_ = static_cast(column_names_.size()); + DCHECK_GT(num_csv_cols_, 0); + + return MakeConversionSchema(); + } + + std::vector + GenerateColumnNames(int32_t num_cols) { + std::vector res; + res.reserve(num_cols); + for (int32_t i = 0; i < num_cols; ++i) { + std::stringstream ss; + ss << "f" << i; + res.push_back(ss.str()); + } + return res; + } + + // Make conversion schema from options and parsed CSV header + Status + MakeConversionSchema() { + // Append a column converted from CSV data + auto append_csv_column + = [&](std::string col_name, int32_t col_index) { + // Does the named column have a fixed type? + auto it = convert_options_.column_types.find(col_name); + if (it == convert_options_.column_types.end()) { + conversion_schema_.columns.push_back( + ConversionSchema::InferredColumn( + std::move(col_name), col_index)); + } else { + conversion_schema_.columns.push_back( + ConversionSchema::TypedColumn( + std::move(col_name), col_index, it->second)); + } + }; + + // Append a column of nulls + auto append_null_column = [&](std::string col_name) { + // If the named column has a fixed type, use it, otherwise use + // null() + std::shared_ptr type; + auto it = convert_options_.column_types.find(col_name); + if (it == convert_options_.column_types.end()) { + type = null(); + } else { + type = it->second; + } + conversion_schema_.columns.push_back( + ConversionSchema::NullColumn( + std::move(col_name), std::move(type))); + }; + + if (convert_options_.include_columns.empty()) { + // Include all columns in CSV file order + for (int32_t col_index = 0; col_index < num_csv_cols_; + ++col_index) { + append_csv_column(column_names_[col_index], col_index); + } + } else { + // Include columns from `include_columns` (in that order) + // Compute indices of columns in the CSV file + std::unordered_map col_indices; + col_indices.reserve(column_names_.size()); + for (int32_t i = 0; + i < static_cast(column_names_.size()); ++i) { + col_indices.emplace(column_names_[i], i); + } + + for (const auto& col_name : convert_options_.include_columns) { + auto it = col_indices.find(col_name); + if (it != col_indices.end()) { + append_csv_column(col_name, it->second); + } else if (convert_options_.include_missing_columns) { + append_null_column(col_name); + } else { + return Status::KeyError("Column '", col_name, + "' in include_columns " + "does not exist in CSV file"); + } + } + } + return Status::OK(); + } + + struct ParseResult { + std::shared_ptr parser; + int64_t parsed_bytes; + }; + + Result + Parse(const std::shared_ptr& partial, + const std::shared_ptr& completion, + const std::shared_ptr& block, int64_t block_index, + bool is_final) { + static constexpr int32_t max_num_rows + = std::numeric_limits::max(); + auto parser = std::make_shared( + pool_, parse_options_, num_csv_cols_, max_num_rows); + + std::shared_ptr straddling; + std::vector views; + if (partial->size() != 0 || completion->size() != 0) { + if (partial->size() == 0) { + straddling = completion; + } else if (completion->size() == 0) { + straddling = partial; + } else { + ARROW_ASSIGN_OR_RAISE(straddling, + ConcatenateBuffers({partial, completion}, pool_)); + } + views = { + util::string_view(*straddling), util::string_view(*block)}; + } else { + views = {util::string_view(*block)}; + } + uint32_t parsed_size; + if (is_final) { + RETURN_NOT_OK(parser->ParseFinal(views, &parsed_size)); + } else { + RETURN_NOT_OK(parser->Parse(views, &parsed_size)); + } + return ParseResult{ + std::move(parser), static_cast(parsed_size)}; + } + + MemoryPool* pool_; + ReadOptions read_options_; + ParseOptions parse_options_; + ConvertOptions convert_options_; + + // Number of columns in the CSV file + int32_t num_csv_cols_ = -1; + // Column names in the CSV file + std::vector column_names_; + ConversionSchema conversion_schema_; + + std::shared_ptr input_; + Iterator> buffer_iterator_; + std::shared_ptr task_group_; + }; + + ///////////////////////////////////////////////////////////////////////// + // Base class for one-shot table readers + + class BaseTableReader : public ReaderMixin, public csv::TableReader { + public: + using ReaderMixin::ReaderMixin; + + virtual Status Init() = 0; + + protected: + // Make column builders from conversion schema + Status + MakeColumnBuilders() { + for (const auto& column : conversion_schema_.columns) { + std::shared_ptr builder; + if (column.is_missing) { + ARROW_ASSIGN_OR_RAISE(builder, + ColumnBuilder::MakeNull( + pool_, column.type, task_group_)); + } else if (column.type != nullptr) { + ARROW_ASSIGN_OR_RAISE(builder, + ColumnBuilder::Make(pool_, column.type, column.index, + convert_options_, task_group_)); + } else { + ARROW_ASSIGN_OR_RAISE(builder, + ColumnBuilder::Make(pool_, column.index, + convert_options_, task_group_)); + } + column_builders_.push_back(std::move(builder)); + } + return Status::OK(); + } + + Result + ParseAndInsert(const std::shared_ptr& partial, + const std::shared_ptr& completion, + const std::shared_ptr& block, int64_t block_index, + bool is_final) { + ARROW_ASSIGN_OR_RAISE(auto result, + Parse(partial, completion, block, block_index, is_final)); + RETURN_NOT_OK(ProcessData(result.parser, block_index)); + return result.parsed_bytes; + } + + // Trigger conversion of parsed block data + Status + ProcessData( + const std::shared_ptr& parser, int64_t block_index) { + for (auto& builder : column_builders_) { + builder->Insert(block_index, parser); + } + return Status::OK(); + } + + Result> + MakeTable() { + DCHECK_EQ( + column_builders_.size(), conversion_schema_.columns.size()); + + std::vector> fields; + std::vector> columns; + + for (int32_t i = 0; + i < static_cast(column_builders_.size()); ++i) { + const auto& column = conversion_schema_.columns[i]; + ARROW_ASSIGN_OR_RAISE( + auto array, column_builders_[i]->Finish()); + fields.push_back(::arrow::field(column.name, array->type())); + columns.emplace_back(std::move(array)); + } + return Table::Make(schema(fields), columns); + } + + // Column builders for target Table (in ConversionSchema order) + std::vector> column_builders_; + }; + + ///////////////////////////////////////////////////////////////////////// + // Base class for streaming readers + + ///////////////////////////////////////////////////////////////////////// + // Serial TableReader implementation + + class SerialTableReader : public BaseTableReader { + public: + using BaseTableReader::BaseTableReader; + + Status + Init() override { + ARROW_ASSIGN_OR_RAISE(auto istream_it, + io::MakeInputStreamIterator(input_, read_options_.block_size)); + + buffer_iterator_ = CSVBufferIterator::Make(std::move(istream_it)); + return Status::OK(); + } + + Result> + Read() override { + task_group_ = internal::TaskGroup::MakeSerial(); + + // First block + ARROW_ASSIGN_OR_RAISE(auto first_buffer, buffer_iterator_.Next()); + if (first_buffer == nullptr) { + return Status::Invalid("Empty CSV file"); + } + RETURN_NOT_OK(ProcessHeader(first_buffer, &first_buffer)); + RETURN_NOT_OK(MakeColumnBuilders()); + + SerialBlockReader block_reader(MakeChunker(parse_options_), + std::move(buffer_iterator_), std::move(first_buffer)); + + while (true) { + ARROW_ASSIGN_OR_RAISE(auto maybe_block, block_reader.Next()); + if (!maybe_block.has_value()) { + // EOF + break; + } + ARROW_ASSIGN_OR_RAISE(int64_t parsed_bytes, + ParseAndInsert(maybe_block->partial, + maybe_block->completion, maybe_block->buffer, + maybe_block->block_index, maybe_block->is_final)); + RETURN_NOT_OK(maybe_block->consume_bytes(parsed_bytes)); + } + // Finish conversion, create schema and table + RETURN_NOT_OK(task_group_->Finish()); + return MakeTable(); + } + }; + + ///////////////////////////////////////////////////////////////////////// + // Factory functions + + Result> + TableReader::Make(MemoryPool* pool, std::shared_ptr input, + const ReadOptions& read_options, const ParseOptions& parse_options, + const ConvertOptions& convert_options) { + std::shared_ptr reader; + // if (read_options.use_threads) { + // reader = std::make_shared( + // pool, input, read_options, parse_options, convert_options, + // GetCpuThreadPool()); + // } else { + reader = std::make_shared( + pool, input, read_options, parse_options, convert_options); + + RETURN_NOT_OK(reader->Init()); + return reader; + } + +} // namespace csv +} // namespace arrow diff --git a/cpp/perspective/src/cpp/view.cpp b/cpp/perspective/src/cpp/view.cpp index 449ce8fb9d..efd7d9d0bf 100644 --- a/cpp/perspective/src/cpp/view.cpp +++ b/cpp/perspective/src/cpp/view.cpp @@ -567,23 +567,33 @@ View::data_slice_to_arrow( } std::shared_ptr buffer; + +#if ARROW_VERSION_MAJOR < 1 auto allocated = arrow::AllocateResizableBuffer(0, &buffer); if (!allocated.ok()) { std::stringstream ss; ss << "Failed to allocate buffer: " << allocated.message() << std::endl; PSP_COMPLAIN_AND_ABORT(ss.str()); } - - arrow::io::BufferOutputStream sink(buffer); - - auto options = arrow::ipc::IpcOptions::Defaults(); - // options.allow_64bit = true; - // options.write_legacy_ipc_format = true; - // options.alignment = 64; + arrow::io::BufferOutputStream sink(buffer); + auto options = arrow::ipc::IpcOptions::Defaults(); auto res = arrow::ipc::RecordBatchStreamWriter::Open(&sink, arrow_schema, options); - std::shared_ptr writer = *res; +#else + arrow::Result> allocated = arrow::AllocateResizableBuffer(0); + if (!allocated.ok()) { + std::stringstream ss; + ss << "Failed to allocate buffer: " << allocated.status().message() << std::endl; + PSP_COMPLAIN_AND_ABORT(ss.str()); + } + + buffer = *allocated; + arrow::io::BufferOutputStream sink(buffer); + auto options = arrow::ipc::IpcWriteOptions::Defaults(); + auto res = arrow::ipc::NewStreamWriter(&sink, arrow_schema, options); +#endif + std::shared_ptr writer = *res; PSP_CHECK_ARROW_STATUS(writer->WriteRecordBatch(*batches)); PSP_CHECK_ARROW_STATUS(writer->Close()); return std::make_shared(buffer->ToString()); diff --git a/cpp/perspective/src/include/perspective/arrow_csv.h b/cpp/perspective/src/include/perspective/arrow_csv.h new file mode 100644 index 0000000000..8aab6b9693 --- /dev/null +++ b/cpp/perspective/src/include/perspective/arrow_csv.h @@ -0,0 +1,28 @@ +/****************************************************************************** + * + * Copyright (c) 2017, the Perspective Authors. + * + * This file is part of the Perspective library, distributed under the terms of + * the Apache License 2.0. The full license can be found in the LICENSE file. + * + */ + +#pragma once +#include +#include +#include + +namespace perspective { +namespace apachearrow { + + /** + * @brief Initialize the arrow loader with a CSV. + * + * @param ptr + */ + std::shared_ptr<::arrow::Table> csvToTable(std::string& csv, bool is_update, + std::unordered_map>& + schema); + +} // namespace apachearrow +} // namespace perspective \ No newline at end of file diff --git a/cpp/perspective/src/include/perspective/arrow_loader.h b/cpp/perspective/src/include/perspective/arrow_loader.h index 7f6fe47fe0..03369ea506 100644 --- a/cpp/perspective/src/include/perspective/arrow_loader.h +++ b/cpp/perspective/src/include/perspective/arrow_loader.h @@ -14,14 +14,16 @@ #include #include #include - +#include +#include #include #include #include #include -#include -#include +#if ARROW_VERSION_MAJOR >= 1 +#include +#endif namespace perspective { namespace apachearrow { @@ -38,6 +40,15 @@ namespace apachearrow { */ void initialize(uintptr_t ptr, std::uint32_t); +#ifdef PSP_ENABLE_WASM + /** + * @brief Initialize the arrow loader with a CSV. + * + * @param ptr + */ + void init_csv(std::string& csv, bool is_update, std::unordered_map>& schema); +#endif + /** * @brief Given an arrow binary and a data table, load the arrow into * Perspective. If updating an existing table, use the `input_schema` diff --git a/cpp/perspective/src/include/perspective/binding.h b/cpp/perspective/src/include/perspective/binding.h index 717b065d97..b9a1bc54b5 100644 --- a/cpp/perspective/src/include/perspective/binding.h +++ b/cpp/perspective/src/include/perspective/binding.h @@ -237,6 +237,7 @@ namespace binding { t_op op, bool is_update, bool is_arrow, + bool is_csv, t_uindex port_id); /****************************************************************************** diff --git a/cpp/perspective/src/include/perspective/vendor/arrow_single_threaded_reader.h b/cpp/perspective/src/include/perspective/vendor/arrow_single_threaded_reader.h new file mode 100644 index 0000000000..ceaa2ae4a6 --- /dev/null +++ b/cpp/perspective/src/include/perspective/vendor/arrow_single_threaded_reader.h @@ -0,0 +1,61 @@ +/****************************************************************************** + * + * Copyright (c) 2019, the Perspective Authors. + * + * This file is part of the Perspective library, distributed under the terms of + * the Apache License 2.0. The full license can be found in the LICENSE file. + * + * Originally forked from + * https://github.com/apache/arrow/blob/apache-arrow-1.0.1/cpp/src/arrow/csv/reader.h + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once +#include +#include "arrow/csv/options.h" // IWYU pragma: keep +#include "arrow/record_batch.h" +#include "arrow/result.h" +#include "arrow/type.h" +#include "arrow/type_fwd.h" +#include "arrow/util/visibility.h" + +namespace arrow { + +namespace io { + class InputStream; +} // namespace io + +namespace csv { + + /// A class that reads an entire CSV file into a Arrow Table + class ARROW_EXPORT TableReader { + public: + virtual ~TableReader() = default; + + /// Read the entire CSV file and convert it to a Arrow Table + virtual Result> Read() = 0; + + /// Create a TableReader instance + static Result> Make(MemoryPool* pool, + std::shared_ptr input, const ReadOptions&, + const ParseOptions&, const ConvertOptions&); + }; + +} // namespace csv +} // namespace arrow diff --git a/packages/perspective-viewer-d3fc/test/results/linux.docker.json b/packages/perspective-viewer-d3fc/test/results/linux.docker.json index 0cae398fbd..96b76db7c2 100644 --- a/packages/perspective-viewer-d3fc/test/results/linux.docker.json +++ b/packages/perspective-viewer-d3fc/test/results/linux.docker.json @@ -1,9 +1,9 @@ { "candlestick_filter_by_a_single_instrument_": "f988ca6494d7a36bada09928cd1a544e", - "candlestick_filter_to_date_range_": "8ca4da0a6229d4f9db4a845d5d415c20", - "__GIT_COMMIT__": "47a2e5afad7e33e7fff76fd872999b5cf48d2c1c", + "candlestick_filter_to_date_range_": "a08755a828b3ab52426cb306c04ad0d7", + "__GIT_COMMIT__": "ffcc3be9fb137f23a21e3737e07e809e55e71e49", "ohlc_filter_by_a_single_instrument_": "0110fac1f2befac1b97a9d33f0022acf", - "ohlc_filter_to_date_range_": "3ec466996be47e2c8df135a4303bf383", + "ohlc_filter_to_date_range_": "96db579b0cb0fa173ed7e6d4d9539fa2", "scatter_shows_a_grid_without_any_settings_applied_": "8677946ab48f16a376c421500d59e6c0", "scatter_pivots_by_a_row_": "a4dd9b7daff4e639fb7a56b01d2f4990", "scatter_pivots_by_two_rows_": "cf59f774c819f3f38a202af3cb95f4bf", @@ -18,128 +18,128 @@ "scatter_sorts_by_an_alpha_column_": "e69fca4d7f53c86d49e96b060dd726d6", "scatter_displays_visible_columns_": "fd50cc20ca70e0a4dd8f2012d9b6737a", "yscatter_shows_a_grid_without_any_settings_applied_": "a7d598f0b351ec2aa44c30773ab2646a", - "yscatter_pivots_by_a_row_": "35e5de5b94de6566b8b52fdc5bf9ff46", - "yscatter_pivots_by_two_rows_": "6ac7168cf400b96582db9ecef67469b7", - "yscatter_pivots_by_a_column_": "e2207ba56085c40ccd8b2bfd955bd408", - "yscatter_pivots_by_a_row_and_a_column_": "f41548f0ab914b486b9511f6a5de518a", - "yscatter_pivots_by_two_rows_and_two_columns_": "b15b78c0fa616db67049f71379272752", - "yscatter_sorts_by_a_hidden_column_": "c5344780bfbf4c27483f12f31a308f2f", - "yscatter_sorts_by_a_numeric_column_": "95d1620a19f8e20a6fc9865813b78ab9", - "yscatter_filters_by_a_numeric_column_": "54809011faa507984498d8777ded9cba", - "yscatter_filters_by_a_datetime_column_": "2d3e320e67d69646482c747dfecc6be8", - "yscatter_highlights_invalid_filter_": "922c2a4448fbed3a5ce12e5d3b35a6a6", - "yscatter_sorts_by_an_alpha_column_": "35df9e17f92161800ea2c06d27197ea1", - "yscatter_displays_visible_columns_": "71133a51f69c896c65f615b1f8e212c3", + "yscatter_pivots_by_a_row_": "87377e265c9f018b08d6db096f37aa6b", + "yscatter_pivots_by_two_rows_": "91adf3850cdd8cdb68823d30f40bf40f", + "yscatter_pivots_by_a_column_": "7d969179d543ee63b4872b4324416546", + "yscatter_pivots_by_a_row_and_a_column_": "a184f6f082cb5638e807c99789b80108", + "yscatter_pivots_by_two_rows_and_two_columns_": "bb2a6b52c851d1e4b529da952a6d07c1", + "yscatter_sorts_by_a_hidden_column_": "bdf965a704840456d49d80bfc9b4b475", + "yscatter_sorts_by_a_numeric_column_": "3ece41f11ed3bd8858b1f2666d912b1e", + "yscatter_filters_by_a_numeric_column_": "af788156d6492bd57d45f87f1a7efa16", + "yscatter_filters_by_a_datetime_column_": "49e574cb65fc1332c150dc8d5a516eb0", + "yscatter_highlights_invalid_filter_": "563fdede28e49d6af664a3dd7dbfb118", + "yscatter_sorts_by_an_alpha_column_": "b921d09668edf3fa154a68421674b39e", + "yscatter_displays_visible_columns_": "8a729aa2222726b8eba7c10b430e72d5", "heatmap_shows_a_grid_without_any_settings_applied_": "69789d9f32a5de2e5735e163fd9039e0", - "heatmap_pivots_by_a_row_": "435628da62b71b29241fe0568c15e9cf", - "heatmap_pivots_by_two_rows_": "a65b4d3755bdfcf2877a9f34a8bec88d", - "heatmap_pivots_by_a_column_": "33170a814e3a6e6681c822dbcdfd4b9a", - "heatmap_pivots_by_a_row_and_a_column_": "c8f48db37893f6154f2ca57e893bf4ad", - "heatmap_pivots_by_two_rows_and_two_columns_": "3f96a6cd4d3e9d5287af50c244c45924", - "heatmap_sorts_by_a_hidden_column_": "0afd7be4decd0bf5ecb7affd647ba4c0", - "heatmap_sorts_by_a_numeric_column_": "f10ee7c8e2c3107773b5d2794087355a", - "heatmap_filters_by_a_numeric_column_": "fc671e14ec4857bd0c352b4b36b3be0e", - "heatmap_filters_by_a_datetime_column_": "55aa976d76f0a16de9e98c16e02d7996", - "heatmap_highlights_invalid_filter_": "d8005b25ae066a57d5734e889d2c5d1c", - "heatmap_sorts_by_an_alpha_column_": "250283f61cf38d83a294259932c29deb", - "heatmap_displays_visible_columns_": "f2e2ebf9f315da1a2184b41993627af5", + "heatmap_pivots_by_a_row_": "cfc030e2db313e6f33e12e8f8094b7e5", + "heatmap_pivots_by_two_rows_": "e24d30e8b75627f5311052a56c04e0bc", + "heatmap_pivots_by_a_column_": "3c67d50401d66b65504e1174e3ed6638", + "heatmap_pivots_by_a_row_and_a_column_": "15e8f0ae253a8a0fdea0a63dbebc61fc", + "heatmap_pivots_by_two_rows_and_two_columns_": "b0cf765e03ede65fe7bea698faf87c52", + "heatmap_sorts_by_a_hidden_column_": "b759567f949b9883f038855b1133fcae", + "heatmap_sorts_by_a_numeric_column_": "754f7aa144fcb44075a82f4ad1840ef1", + "heatmap_filters_by_a_numeric_column_": "10fb59395285f3db03640f34c54eea93", + "heatmap_filters_by_a_datetime_column_": "a4f16b723ebabfbc0150dbaa1ba27655", + "heatmap_highlights_invalid_filter_": "4e3b1dd60d51047e1c48cb32edb4440d", + "heatmap_sorts_by_an_alpha_column_": "76138dec98e73589e2aa71c460eea0a3", + "heatmap_displays_visible_columns_": "ae8bfa24a35ff7a89bfc54ddc2afa3fd", "bar_shows_a_grid_without_any_settings_applied_": "5498ffae2e1fd52f8efb46684109de4b", - "bar_pivots_by_a_row_": "d345ed4d9cbfd6233f67f1b4989a7c2d", - "bar_pivots_by_two_rows_": "e8fac03ea69acab22f329e4c1d2e2f2a", - "bar_pivots_by_a_column_": "da7f30db8df953e5b000821ed307d45c", - "bar_pivots_by_a_row_and_a_column_": "5b1734bbadc5ba6e305b71b02fde2b13", - "bar_pivots_by_two_rows_and_two_columns_": "d1edae61877385dd69f82b46d901cdf4", - "bar_sorts_by_a_hidden_column_": "f725062490ccaf95764f77f5644d8a69", - "bar_sorts_by_a_numeric_column_": "586b79e1b5b8c3b36ed5df0d9c17d74c", - "bar_filters_by_a_numeric_column_": "c7a836ba66e34d72677e8e84d641b842", - "bar_filters_by_a_datetime_column_": "ed63e946afea78138bf3050261b7b3cc", - "bar_highlights_invalid_filter_": "a0df83307c00ca783884683328e3e86e", - "bar_sorts_by_an_alpha_column_": "cabaf3916280bf843c8a890ee5e92982", - "bar_displays_visible_columns_": "92c0e217121544ff7b0343bd536d1534", + "bar_pivots_by_a_row_": "fa91a51c74bab76568ee1ae108527e02", + "bar_pivots_by_two_rows_": "87c73b86eb25c6d4422e18d0c55a5a44", + "bar_pivots_by_a_column_": "91e33514db6c28d70a29f2c75e72ee82", + "bar_pivots_by_a_row_and_a_column_": "156f1ac9c7df901901873a2e444c373c", + "bar_pivots_by_two_rows_and_two_columns_": "140bdd48b583c12796112a917462e3a0", + "bar_sorts_by_a_hidden_column_": "5960c3d30e12688b64d5576fa2de484f", + "bar_sorts_by_a_numeric_column_": "bb3fe6d353f0df2311bd2be015ff937b", + "bar_filters_by_a_numeric_column_": "ac07ab406687cd11ce9c849a7cfa50e4", + "bar_filters_by_a_datetime_column_": "bee4ffc15a7ddebd1bc69235d500ef0d", + "bar_highlights_invalid_filter_": "651f010cb2c66573f1cfae4ec83401b6", + "bar_sorts_by_an_alpha_column_": "22cd2411fc77a51b59174251f1b0c9d4", + "bar_displays_visible_columns_": "b97bf6c716f91c708b6f350e3fc7b05a", "bar-x_shows_a_grid_without_any_settings_applied_": "4af73cc0325585e5da8e93674c5ec616", - "bar-x_pivots_by_a_row_": "ff0307f0fb5e91edd32de74dbd77ab63", - "bar-x_pivots_by_two_rows_": "9845db17eec9ff45ec4717b9ff3177df", - "bar-x_pivots_by_a_column_": "86d72e8ab1216b273154d8524aad19a3", - "bar-x_pivots_by_a_row_and_a_column_": "52101f9c2b6a6bf431b4830dba42d2a8", - "bar-x_pivots_by_two_rows_and_two_columns_": "bfb30e0054507f11f3d0be843be440f4", - "bar-x_sorts_by_a_hidden_column_": "bf6a6f9a5f9001ba36bd8ee0949c8400", - "bar-x_sorts_by_a_numeric_column_": "803e48a5d0eea90df6883bb3779abd56", - "bar-x_filters_by_a_numeric_column_": "8a3a41cbb43ccd9cfc3058e3e4c7a8df", - "bar-x_filters_by_a_datetime_column_": "440e3ca2e2c4a4146d133cea3f82c174", - "bar-x_highlights_invalid_filter_": "6baac728457305da2b3fabb9035fabaf", - "bar-x_sorts_by_an_alpha_column_": "102f30a5793f44a3cac4035578a4fcd3", - "bar-x_displays_visible_columns_": "a2dbd3de7d724220f2c8cb8866231428", + "bar-x_pivots_by_a_row_": "89684df5e4440adfcd4b97f79e7641d1", + "bar-x_pivots_by_two_rows_": "f61dff243c768fc9f73e5bc92c0c6b17", + "bar-x_pivots_by_a_column_": "3c330c1fe7ebf6163e47793e9b660c5d", + "bar-x_pivots_by_a_row_and_a_column_": "865efa693f278180b82bc158316dbf86", + "bar-x_pivots_by_two_rows_and_two_columns_": "046420577b5ab5002b6b434dee44cfba", + "bar-x_sorts_by_a_hidden_column_": "46e0004f6b85211b95068e02118dc9a8", + "bar-x_sorts_by_a_numeric_column_": "b24a7e8ea3d1e2962917a923e2a7310e", + "bar-x_filters_by_a_numeric_column_": "94c31a1e38b474404d171e04e5262240", + "bar-x_filters_by_a_datetime_column_": "a9c23ffd03362217b5d5dba5c3f1dcfd", + "bar-x_highlights_invalid_filter_": "e57d4009f0e42368e3c0526b3b05ebf7", + "bar-x_sorts_by_an_alpha_column_": "32d512fe6289f43b2b8c10421224de75", + "bar-x_displays_visible_columns_": "bd45cdb98e61a51a2066e3cf7c48c1d4", "bar-themed_shows_a_grid_without_any_settings_applied_": "47a633099c7cd24242a96db3afdc1925", - "bar-themed_pivots_by_a_row_": "39d0642663ee269b8c9a9917dbce6f75", - "bar-themed_pivots_by_two_rows_": "3357644018e40d78fbbeb418c5ac4524", - "bar-themed_pivots_by_a_column_": "268d244bcc4d51c24b23e9e976b7cabb", - "bar-themed_pivots_by_a_row_and_a_column_": "e80fe5f107828ab7aa1cc98098eddb50", - "bar-themed_pivots_by_two_rows_and_two_columns_": "9ba6f40563a25a559ead44c1be4a7507", - "bar-themed_sorts_by_a_hidden_column_": "62067dd4a56df865eeabfca75dd9bf68", - "bar-themed_sorts_by_a_numeric_column_": "2ef8142b6e0413a626e27d7319655692", - "bar-themed_filters_by_a_numeric_column_": "3fce631e6c3636a569d9d73c126c2f30", - "bar-themed_filters_by_a_datetime_column_": "7898c099b4a3f4a1cd01c24681dbce65", - "bar-themed_highlights_invalid_filter_": "b82c7b5f73118201f2189d599d0722a0", - "bar-themed_sorts_by_an_alpha_column_": "e2a90eab3c5d3801319948a347ac187a", - "bar-themed_displays_visible_columns_": "6767acabfd9bce8078005e3c05943635", + "bar-themed_pivots_by_a_row_": "9cdf3dc2c0d04d7412cce2b000413c1e", + "bar-themed_pivots_by_two_rows_": "81ec4e9bb95db440cde4b1bdaaee658e", + "bar-themed_pivots_by_a_column_": "698eb6b0db13f7feebe6bb8d122e1816", + "bar-themed_pivots_by_a_row_and_a_column_": "080b891c62c27d54b6515102c1b1c760", + "bar-themed_pivots_by_two_rows_and_two_columns_": "852006774a5ca34dd0981226bb24ff68", + "bar-themed_sorts_by_a_hidden_column_": "77cbc52d9f24838f8398d19cb1a160fe", + "bar-themed_sorts_by_a_numeric_column_": "f9eca6d6962a72453df4f491a379b990", + "bar-themed_filters_by_a_numeric_column_": "23a5366b2beaf7d20ac52810ec27bdea", + "bar-themed_filters_by_a_datetime_column_": "3f6e1f326f3f1b8db02a5d0bcd35df09", + "bar-themed_highlights_invalid_filter_": "f23edaf1a83fe19d31dfce2b85884b42", + "bar-themed_sorts_by_an_alpha_column_": "442f2f81a2d096d8cc8bf36197bc6ad0", + "bar-themed_displays_visible_columns_": "c69705b6fbe782cc7808f6810f09a75a", "line_shows_a_grid_without_any_settings_applied_": "7eeb0105d0aabb0eec68bb997e0c4e3c", - "line_pivots_by_a_row_": "9c4f5c68d4a9e2c875c6cf61977efefe", - "line_pivots_by_two_rows_": "f92243f2ab1e4ba608aad31423d1e092", - "line_pivots_by_a_column_": "852f8c36b1abdb07019dd7838cec93b1", - "line_pivots_by_a_row_and_a_column_": "9fe1536f521b9977aae9390c04791ec2", - "line_pivots_by_two_rows_and_two_columns_": "d4f58b5079a03b7df15bf9e8b5abecc5", - "line_sorts_by_a_hidden_column_": "57ad4f52dc974602ad73e940258a344c", - "line_sorts_by_a_numeric_column_": "efb5c9fb7d4ddc4403c6206fee181b85", - "line_filters_by_a_numeric_column_": "c72ef823fc99767c461330da40117185", - "line_filters_by_a_datetime_column_": "3257ccf5821d2d6a1eb7da08f3ee9c58", - "line_highlights_invalid_filter_": "9ef07dde3e12651050c642546657e3b0", - "line_sorts_by_an_alpha_column_": "afb8c2eef43a6ea16fc10ecfb8979e20", - "line_displays_visible_columns_": "a2d08e3ebffcf45279d9655d4fbcd17f", + "line_pivots_by_a_row_": "27d5827dc2f25bb7cc766af54e947a20", + "line_pivots_by_two_rows_": "b39b40ee08b31751397f0846b66d50c2", + "line_pivots_by_a_column_": "82fa1c50a36cd33b84c54834bbbc7848", + "line_pivots_by_a_row_and_a_column_": "766c8f8cedbe569e1b2cae7e4f2bcfb2", + "line_pivots_by_two_rows_and_two_columns_": "59322589673eb4bbde98d047884b28c2", + "line_sorts_by_a_hidden_column_": "ab0f7707aae419aa6e7a8a02374e91d1", + "line_sorts_by_a_numeric_column_": "3dc3e7a148851215a5b688289246480b", + "line_filters_by_a_numeric_column_": "cc0c2c1badf59b0467b2168a98e389f4", + "line_filters_by_a_datetime_column_": "d4fe46e52a7b8951b5cf04bddb057860", + "line_highlights_invalid_filter_": "b1485c7cfcc7b394d3ad23890bdd4690", + "line_sorts_by_an_alpha_column_": "4e474d8d7a082b8cb8ab00bb425797f0", + "line_displays_visible_columns_": "43ba319bf29e2336f1f4ac960e7619bd", "area_shows_a_grid_without_any_settings_applied_": "93f61e4a4c43a3adfaf0bbb68207a7d8", - "area_pivots_by_a_row_": "8c9881bb0163b6aa48209d199f82ed47", - "area_pivots_by_two_rows_": "8b5ce19705814510a22ace87cb957a0f", - "area_pivots_by_a_column_": "37969915321c9caf81706a02147b2718", - "area_pivots_by_a_row_and_a_column_": "ad1e4fd193ef4f5e7fbe91cad0917fc4", - "area_pivots_by_two_rows_and_two_columns_": "05cced5930ae09a5927bab7368b82bd6", - "area_sorts_by_a_hidden_column_": "ae5581f7c1f19d1b745d98e9072e85ee", - "area_sorts_by_a_numeric_column_": "f9b64fc3bd374fc0bc9361e7db9cb6c8", - "area_filters_by_a_numeric_column_": "91dea755e8d895bc626e8cd0289abf2e", - "area_filters_by_a_datetime_column_": "5865eff1de56eed8d4891fe584532b97", - "area_highlights_invalid_filter_": "7200dfdb095fd8758e6b683c245742b1", - "area_sorts_by_an_alpha_column_": "5e037514b5f188588949e746613f2c01", - "area_displays_visible_columns_": "1121a9c51f6667d92d753780549dc970", + "area_pivots_by_a_row_": "0f82beded4a0c44715889a476456f24e", + "area_pivots_by_two_rows_": "2659ecbd6ce786c579346190a7b4a65e", + "area_pivots_by_a_column_": "3a8b5659bb5e8c16be3f8afcafc36350", + "area_pivots_by_a_row_and_a_column_": "d38c3f66783f477836dbb9e5e95ec7b5", + "area_pivots_by_two_rows_and_two_columns_": "26076c5c191a524e4f4240a68b7813af", + "area_sorts_by_a_hidden_column_": "43e01b4b72d1019ac5e3f81616df66a7", + "area_sorts_by_a_numeric_column_": "b068d1828856db2a69d294019f4ec1f8", + "area_filters_by_a_numeric_column_": "16d9498b1fa2c99e4bb265447865cbd7", + "area_filters_by_a_datetime_column_": "5d31b814eb91fe38bde814d3d34c9694", + "area_highlights_invalid_filter_": "52be3280aaba807523f429e0758921f2", + "area_sorts_by_an_alpha_column_": "80dc5dabb6c4383e3b0d54ed756f44ca", + "area_displays_visible_columns_": "b4d734622f197c72ec6209ebf83f1df7", "scatter_tooltips_with_no_color_and_size_": "9a9667ba1c2e22f17611e3aefd8f6d78", - "scatter_tooltip_columns_works": "b59a3ea8b011760748e9b6733c52e8d3", - "scatter_tooltip_columns_works_when_color_column_is_null": "ccc4c40761f7f991ff86fd2c2a5389d4", + "scatter_tooltip_columns_works": "45237a2a00f1f0a4f645c841c932c448", + "scatter_tooltip_columns_works_when_color_column_is_null": "279ce46543716ce6a4db04a5fbee477e", "treemap_shows_a_grid_without_any_settings_applied_": "e61a53b560304cfbe3a6464218910cb0", - "treemap_pivots_by_a_row_": "43ae270c3a6e7ed5c19bf71aa3a35e9c", - "treemap_pivots_by_two_rows_": "1e1679e4b43672360c903055f5694b23", - "treemap_pivots_by_a_column_": "31229e560fb4a9ddba475d8da60fcef4", - "treemap_pivots_by_a_row_and_a_column_": "83151e882eaf24bb53ae7836f7b127aa", - "treemap_pivots_by_two_rows_and_two_columns_": "617ba2b4c3feb1e3d43f3cbedd149a2c", - "treemap_sorts_by_a_hidden_column_": "3c1cbbe9d52aaf10c29234f62fbbae09", - "treemap_sorts_by_a_numeric_column_": "320b033284dbb109f1c6cc63bab03f05", - "treemap_filters_by_a_numeric_column_": "3a847b92ff685756d93c9f073e58f331", - "treemap_filters_by_a_datetime_column_": "1bfe00e99610bc336d676a567f63e07d", - "treemap_highlights_invalid_filter_": "a397780b30b0e7bf990232472ebe9176", - "treemap_sorts_by_an_alpha_column_": "046d040908811f04c15f7646d9d70733", - "treemap_displays_visible_columns_": "62996aa87b1237b0be568a339a700bdf", - "treemap_with_column_position_1_set_to_null_": "c23aeb23156dc3cbe5df1a30729b7a3d", - "treemap_tooltip_columns_works": "6db99d38579a74ff0a1a2b7625866cab", - "line_Sets_a_category_axis_when_pivoted_by_a_computed_datetime": "eb1c86dc44988ad9a65fdd5a335850b8", + "treemap_pivots_by_a_row_": "567ffebc01e3849aabd04750529dd5a9", + "treemap_pivots_by_two_rows_": "1960a32fc891a6ee3345c627e99f8807", + "treemap_pivots_by_a_column_": "9cf2283ff8d50d5925d3067e3a92f383", + "treemap_pivots_by_a_row_and_a_column_": "f5249f2acf77519bb58bdb6a3f977d3f", + "treemap_pivots_by_two_rows_and_two_columns_": "8fcd74e58b004a32a8c225fd66f42731", + "treemap_sorts_by_a_hidden_column_": "222bb23c02489c22300de0ea03d32f88", + "treemap_sorts_by_a_numeric_column_": "e69944af74526797c821817757d97c33", + "treemap_filters_by_a_numeric_column_": "39b9185e077bee99ee8f6a2e7b535f53", + "treemap_filters_by_a_datetime_column_": "b792d5349dd91e1ce3cafa970f099af7", + "treemap_highlights_invalid_filter_": "952bf2840ffbd400e15a6a9c22fc5a64", + "treemap_sorts_by_an_alpha_column_": "ca9b4554d557278f7ddf295f4fa9100f", + "treemap_displays_visible_columns_": "8bb7edeafde1b360e69a3c82311f6bd4", + "treemap_with_column_position_1_set_to_null_": "f50a1857b0ccee3a397e3eacc91931e9", + "treemap_tooltip_columns_works": "654484647891ebbc8e0e9ec41ceddfcd", + "line_Sets_a_category_axis_when_pivoted_by_a_computed_datetime": "b35442f81409b8c269f1e03b859cb09f", "sunburst_sunburst_label_shows_formatted_date": "590f474e076fd49ce10eb5e97bfc66d3", - "treemap_treemap_label_shows_formatted_date": "5286b0c316c4b4b4d8f95edc41166578", + "treemap_treemap_label_shows_formatted_date": "75b8779f329f3197699c97ee5202f7a3", "xyline_shows_a_grid_without_any_settings_applied_": "6d4bdd941a04d6e39fe14c2ea001886e", - "xyline_pivots_by_a_row_": "14c3bc345abb892ccab5ac2dad1f777a", - "xyline_pivots_by_two_rows_": "5ea6481c8e465b87a1fcaca2e8b2a759", - "xyline_pivots_by_a_column_": "665a82227ede8b9129c6c88574097eae", - "xyline_pivots_by_a_row_and_a_column_": "29f56130b2d178def3a3feb24304d779", - "xyline_pivots_by_two_rows_and_two_columns_": "70fc58b149bb28dd571fce92616eec02", - "xyline_sorts_by_a_hidden_column_": "c38fc638ae076df1f0caf51214bf7938", - "xyline_sorts_by_a_numeric_column_": "eb6022557a889eafce59eda986600f0c", - "xyline_filters_by_a_numeric_column_": "076a949ff4297a506463a0182159fc8b", - "xyline_filters_by_a_datetime_column_": "e43eb7c8d45ff150e8c4c8713b3bafd5", - "xyline_highlights_invalid_filter_": "f8f312392516ed4dfa070d436f6ede30", - "xyline_sorts_by_an_alpha_column_": "9cc9fc4d49b0671eeca80ad272664256", - "xyline_displays_visible_columns_": "533a7f651187eb77e32b2e0690b0b7b9" + "xyline_pivots_by_a_row_": "cc7bd9ed73adab5da553a918daabd1bb", + "xyline_pivots_by_two_rows_": "c6e57809b071e1db5eb545e1847bc09a", + "xyline_pivots_by_a_column_": "21a49e82868104a4262c09342e5c8af1", + "xyline_pivots_by_a_row_and_a_column_": "75a3a097de1d1be932b395aaca1882d2", + "xyline_pivots_by_two_rows_and_two_columns_": "efb94dc0c9c84f07747bd4b9a3caf533", + "xyline_sorts_by_a_hidden_column_": "5781668342ee488374c6789f500a034c", + "xyline_sorts_by_a_numeric_column_": "399e47b6ec4f1f2ad4813530e2603043", + "xyline_filters_by_a_numeric_column_": "a267179fad6f20beb16ff4399c9bd89f", + "xyline_filters_by_a_datetime_column_": "686bf3d9a7ae981505826700d023a61a", + "xyline_highlights_invalid_filter_": "824c12f472d7df047c4c221d0168dadd", + "xyline_sorts_by_an_alpha_column_": "e2410b6def6a5916e9a9f96063ad1352", + "xyline_displays_visible_columns_": "dcd473a0a6f0dfe49b8a0b8781757a7c" } \ No newline at end of file diff --git a/packages/perspective-viewer-datagrid/test/results/linux.docker.json b/packages/perspective-viewer-datagrid/test/results/linux.docker.json index dd7da37a40..53bc258675 100644 --- a/packages/perspective-viewer-datagrid/test/results/linux.docker.json +++ b/packages/perspective-viewer-datagrid/test/results/linux.docker.json @@ -5,14 +5,14 @@ "superstore_pivots_by_a_column_": "8f064a78e0b77f505632df053317a806", "superstore_pivots_by_a_row_and_a_column_": "c3cd66b434ae463f556fd3ab429d4f2a", "superstore_pivots_by_two_rows_and_two_columns_": "ddbfbcb81e6fda2fbba7405f5ab00443", - "superstore_sorts_by_a_hidden_column_": "910bdbc2579b1e8f24282356fae5d317", + "superstore_sorts_by_a_hidden_column_": "a4b2f7179bdb07728465f5f2a510c512", "superstore_sorts_by_a_numeric_column_": "880ea680df9bc48415450553735c2a6c", "superstore_filters_by_a_numeric_column_": "45d824114c57a56b990fe637f5514330", "superstore_filters_by_a_datetime_column_": "4ebf21eead875f46e1470714ef79ba3d", "superstore_highlights_invalid_filter_": "452e3d6d1fb9fcb18bfc6a3b6ef021ff", "superstore_sorts_by_an_alpha_column_": "7d4a5cde8d795e020eec5e27763eacbd", - "superstore_displays_visible_columns_": "91b906e38d0e3115c929cfa5e15cb5fb", + "superstore_displays_visible_columns_": "e2f92ee6e81b832b526e4d262577eb4e", "superstore_resets_viewable_area_when_the_logical_size_expands_": "e0dcc4db517a7ff5471f27301aaceb29", "superstore_resets_viewable_area_when_the_physical_size_expands_": "7e269a544b300a204b82806f32bef31b", - "__GIT_COMMIT__": "8256bc9394fe07d15b2d7856f8f94630c1df6a87" + "__GIT_COMMIT__": "ffcc3be9fb137f23a21e3737e07e809e55e71e49" } \ No newline at end of file diff --git a/packages/perspective-viewer/test/results/linux.docker.json b/packages/perspective-viewer/test/results/linux.docker.json index 9793d81ea4..29baab00be 100644 --- a/packages/perspective-viewer/test/results/linux.docker.json +++ b/packages/perspective-viewer/test/results/linux.docker.json @@ -1,11 +1,11 @@ { "Computed_Expressions_click_on_add_column_button_opens_the_computed_expression_UI_": "787046935085a24c7537df4932edc360", - "Computed_Expressions_click_on_close_button_closes_the_computed_expression_UI_": "6bb59f8783bc095f18245153a7a8309f", + "Computed_Expressions_click_on_close_button_closes_the_computed_expression_UI_": "1494dcc679123ecc1f739d250122530e", "Computed_Expressions_An_expression_that_doesn_t_reach_max-width_should_undock_the_autocomplete": "f69452acb9374ff54c2d2efc02f2e756", "Computed_Expressions_Typing_a_partial_expression_should_search_by_expression_label_and_value": "f8fe9572b3e91bca257080accb8a204e", "Computed_Expressions_Typing_a_column_name_followed_by_a_partial_function_should_not_show_autocomplete": "41f895894f53f4e88aee8516fc4dc823", "Computed_Expressions_Typing_an_alias_should_not_show_autocomplete": "93882c206fb320ffc1b25f24edede902", - "Computed_Expressions_Typing_a_numeric_function_should_show_autocomplete_for_numeric_columns": "3049062e1f7ae5467b612c3ec3ddbe38", + "Computed_Expressions_Typing_a_numeric_function_should_show_autocomplete_for_numeric_columns": "d02fa621e93cf99b9de1e41ea87774ff", "Computed_Expressions_Typing_a_string_function_should_show_autocomplete_for_string_columns": "d7228918383dad5572179f46101608bf", "Computed_Expressions_Typing_a_datetime_function_should_show_autocomplete_for_datetime_columns": "cb6f3d0b24f214f02ca5b44dbd61471e", "Computed_Expressions_Typing_a_partial_column_name_should_show_autocomplete": "dc6bc237afed37845241281d9e7013d7", @@ -20,32 +20,32 @@ "Computed_Expressions_Pressing_arrow_up_from_the_first_item_should_select_the_last_autocomplete_item": "d5ac2a245dd20f99d8b1bbab292c66c6", "Computed_Expressions_Pressing_arrow_down_on_an_undocked_autocomplete_should_select_the_next_autocomplete_item": "32cc425a0c1426805776307befa4808c", "Computed_Expressions_Pressing_arrow_down_on_the_last_item_on_an_undocked_autocomplete_should_select_the_first_autocomplete_item": "a87b2a4caeb89462218838e0aa4b1c99", - "Computed_Expressions_Pressing_arrow_up_on_an_undocked_autocomplete_should_select_the_previous_autocomplete_item": "9fb08a9f61a7624be9c5440c15d0e959", - "Computed_Expressions_Pressing_arrow_up_from_the_first_item_on_an_undocked_autocomplete_should_select_the_last_autocomplete_item": "9fb08a9f61a7624be9c5440c15d0e959", + "Computed_Expressions_Pressing_arrow_up_on_an_undocked_autocomplete_should_select_the_previous_autocomplete_item": "d933230508af2bdb2c1ba6ac92ec6d02", + "Computed_Expressions_Pressing_arrow_up_from_the_first_item_on_an_undocked_autocomplete_should_select_the_last_autocomplete_item": "d933230508af2bdb2c1ba6ac92ec6d02", "Computed_Expressions_Pressing_enter_should_apply_the_autocomplete_item": "41d3ce99b43fbb426bc0082a35a8a398", "Computed_Expressions_Pressing_enter_should_apply_the_selected_column": "8195926349fe8e79798e5986f106d0eb", "Computed_Expressions_Column_replace_should_work_for_a_fragment": "db8a6e6c1c6d7ccb84c91fb438c84c33", - "Computed_Expressions_Removing_computed_columns_should_reset_active_columns,_pivots,_sort,_and_filter_": "e274afe52fbb2cc0f41afb9ffb5214e2", + "Computed_Expressions_Removing_computed_columns_should_reset_active_columns,_pivots,_sort,_and_filter_": "762f323aac929b8203392612327dfb15", "Computed_Expressions_Resetting_the_viewer_with_computed_columns_should_place_columns_in_the_inactive_list_": "5ee83390a075ca717aed3301d4edeb84", "Computed_Expressions_Resetting_the_viewer_with_computed_columns_in_active_columns_should_reset_columns_but_not_delete_columns_": "779b8e16d0572daf15fc1f9948396071", "Computed_Expressions_Resetting_the_viewer_with_computed_columns_set_as_pivots_should_reset_pivots_but_not_delete_columns_": "779b8e16d0572daf15fc1f9948396071", "Computed_Expressions_Resetting_the_viewer_with_computed_columns_set_as_sort_should_reset_sort_but_not_delete_columns_": "779b8e16d0572daf15fc1f9948396071", "Computed_Expressions_Resetting_the_viewer_with_computed_columns_set_as_filters_should_reset_filters_but_not_delete_columns_": "779b8e16d0572daf15fc1f9948396071", "Computed_Expressions_saving_without_an_expression_should_fail_as_button_is_disabled_": "787046935085a24c7537df4932edc360", - "Computed_Expressions_saving_a_single_computed_expression_should_add_it_to_inactive_columns_": "e00cd2bd0896f8bd3b9d24f5cd65dc48", - "Computed_Expressions_saving_a_single_computed_expression_with_dependencies_should_add_all_columns_to_inactive_columns_": "05727090e59408a6bc980815bf77219c", - "Computed_Expressions_aggregates_by_computed_expression_column_": "045f9897dae281c99dfb753ba4b6571e", - "Computed_Expressions_computed_expression_column_aggregates_should_persist_": "7ef5c1843d3a809ee4c98f8e5ae46d4b", - "Computed_Expressions_Computed_expression_columns_should_persist_when_new_views_are_created_": "a15bde5601988f3675f9609cd1d32cea", - "Computed_Expressions_Computed_expression_columns_should_persist_when_new_computed_columns_are_added_": "f6b27f5d4a316fea6f7fa2e2cb2a6695", - "Computed_Expressions_sorts_by_computed_expression_column_": "3a20e3e392fd51df0d1080222bb3ec00", - "Computed_Expressions_filters_by_computed_expression_column_": "4c61a87c0d8a97299f8beb45a1b100bb", - "Computed_Expressions_row_pivots_by_computed_expression_column_": "7f1193c0d5263875d64608eb64b6a5b5", - "Computed_Expressions_column_pivots_by_computed_expression_column_": "d61ecec2fb1ffb2300b7c24628992c7f", - "Computed_Expressions_row_and_column_pivots_by_computed_expression_column_": "6c6fc5add16563e94159a94fac0a0259", - "Computed_Expressions_adds_computed_expression_via_attribute": "61823f0bc691ac151934b31effb46d8a", - "Computed_Expressions_adds_computed_expression_via_attribute_in_classic_syntax": "41af6c36a9d04595cdbd4d21b48cc75f", - "Computed_Expressions_Computed_expressions_are_saved_without_changes": "563f8c45e0ec32bc4b07d30ed5f0086e", + "Computed_Expressions_saving_a_single_computed_expression_should_add_it_to_inactive_columns_": "d9f64695081f8a824f66fd7285a6b085", + "Computed_Expressions_saving_a_single_computed_expression_with_dependencies_should_add_all_columns_to_inactive_columns_": "929920ddfe72148dcb4893537501de85", + "Computed_Expressions_aggregates_by_computed_expression_column_": "a52caa73f331fa70d926ab52fc688a9d", + "Computed_Expressions_computed_expression_column_aggregates_should_persist_": "9c6ef43bbf5f984b367377006fe85d9c", + "Computed_Expressions_Computed_expression_columns_should_persist_when_new_views_are_created_": "ff62caedc89894e96516a4e252e1a3a4", + "Computed_Expressions_Computed_expression_columns_should_persist_when_new_computed_columns_are_added_": "97d775dcd2ef680bcfa15962dc40df6d", + "Computed_Expressions_sorts_by_computed_expression_column_": "af7faf71d363f0de30cee5265b73c9e7", + "Computed_Expressions_filters_by_computed_expression_column_": "b2a740ee7201d70539cd0ee3ec04de01", + "Computed_Expressions_row_pivots_by_computed_expression_column_": "92d9b2f00eac70dbcd8bdffbc2fa6054", + "Computed_Expressions_column_pivots_by_computed_expression_column_": "6b2010e262f1980ddfd5d41c9731ceb4", + "Computed_Expressions_row_and_column_pivots_by_computed_expression_column_": "7c32f25dede9ce764c70e230ff45de6d", + "Computed_Expressions_adds_computed_expression_via_attribute": "2c95b846c47240435c577d7d28e8b2e9", + "Computed_Expressions_adds_computed_expression_via_attribute_in_classic_syntax": "31c624a22709d460c350a55f751df209", + "Computed_Expressions_Computed_expressions_are_saved_without_changes": "ba6eba3b8e6275619fc9117df92f26ba", "Computed_Expressions_Computed_expressions_are_restored_without_changes": "69f4a6ff3b226eacb9e09b8828b2025b", "Computed_Expressions_On_restore,_computed_expressions_in_the_active_columns_list_are_restored_correctly_": "563f8c45e0ec32bc4b07d30ed5f0086e", "Computed_Expressions_On_restore,_computed_expressions_in_pivots_are_restored_correctly_": "d2ea399d6c4b1ee2051f054a6d474b7a", @@ -54,7 +54,7 @@ "Computed_Expressions_On_restore,_computed_expressions_in_classic_syntax_are_parsed_correctly_": "563f8c45e0ec32bc4b07d30ed5f0086e", "superstore_adds_computed_column_via_attribute": "f2fa526d6f0c168a47ab272bb7ae5771", "superstore_user_defined_aggregates_maintained_on_computed_columns": "c7aa2902feb512ceb1e1487de80878ff", - "__GIT_COMMIT__": "2df957aa0feae17b14622bcd2cc489b5421215fd", + "__GIT_COMMIT__": "ffcc3be9fb137f23a21e3737e07e809e55e71e49", "blank_Handles_reloading_with_a_schema_": "9d171ff4c95a7f31eff2e7127b8caedf", "superstore_doesn_t_leak_tables_": "5bb762b2860eb5af067bb83afbca3264", "superstore_doesn_t_leak_elements_": "5bb762b2860eb5af067bb83afbca3264", @@ -66,14 +66,14 @@ "superstore_pivots_by_a_column_": "4ad247aa09460d938c15608e51a83625", "superstore_pivots_by_a_row_and_a_column_": "fcf835ffb19bef22a7c427dd9bc125bf", "superstore_pivots_by_two_rows_and_two_columns_": "b5262c6cfe8cc79caa6a6fda3b6d32c7", - "superstore_sorts_by_a_hidden_column_": "bf9991d79cb9c3c9cb3a8f25cea2e77e", + "superstore_sorts_by_a_hidden_column_": "3213055bb3f02e8a889a6b5dd3888e7d", "superstore_sorts_by_a_numeric_column_": "052052c4cbbb787417ffcd3066b7fd53", "superstore_filters_by_a_numeric_column_": "1ca66d76cb980d704b02c49f15b63b8f", "superstore_filters_by_a_datetime_column_": "9f09bdba0a81968a6b8a776c16053797", "superstore_highlights_invalid_filter_": "08bcfe85294601215c7a3ff615aa528b", "superstore_sorts_by_an_alpha_column_": "405370214ee975d003def318787868d1", - "superstore_displays_visible_columns_": "7a7118d6cdb7ce751d5fbb5a36141e6a", - "superstore_Responsive_Layout_shows_horizontal_columns_on_small_vertical_viewports_": "d90b1544ef1efa405b6eb68ee136e4af", + "superstore_displays_visible_columns_": "a19d6196607e9846a1abdf2d163e100c", + "superstore_Responsive_Layout_shows_horizontal_columns_on_small_vertical_viewports_": "be8fad6e926895793b7d0b80cae0fe8d", "superstore_Responsive_Layout_shows_horizontal_columns_on_small_vertical_and_horizontal_viewports_": "e51afee0ec45554b3a27d1ec313305f6", "superstore_replaces_all_rows_": "03995f23192c53d36160ab55127ff5c0", "blank_When_transferables_are_enabled,_transfers_an_arrow_in_load_": "dcfab9d5536933ba80b87b503985f398", diff --git a/packages/perspective/README.md b/packages/perspective/README.md index 3a400d72df..c2b4774724 100644 --- a/packages/perspective/README.md +++ b/packages/perspective/README.md @@ -281,9 +281,6 @@ serialize. serialize. - .end_col number - The ending column index from which to serialize. - - .config Object - A config object for the Papaparse -[https://www.papaparse.com/docs#json-to-csv](https://www.papaparse.com/docs#json-to-csv) config object. - * * * diff --git a/packages/perspective/package.json b/packages/perspective/package.json index 383eaf915d..a38298536a 100644 --- a/packages/perspective/package.json +++ b/packages/perspective/package.json @@ -59,13 +59,13 @@ "flatbuffers": "^1.10.2", "lodash": "^4.17.4", "moment": "^2.19.1", - "papaparse": "^5.2.0", "text-encoding-utf-8": "^1.0.2", "tslib": "^1.9.3", "ws": "^6.1.2" }, "devDependencies": { "@finos/perspective-webpack-plugin": "^0.5.5", - "jsverify": "^0.8.4" + "jsverify": "^0.8.4", + "papaparse": "^5.2.0" } } diff --git a/packages/perspective/src/js/perspective.js b/packages/perspective/src/js/perspective.js index 7a1ae295f4..8264203b76 100644 --- a/packages/perspective/src/js/perspective.js +++ b/packages/perspective/src/js/perspective.js @@ -16,7 +16,6 @@ import {bindall, get_column_type} from "./utils.js"; import {Server} from "./api/server.js"; import formatters from "./view_formatters"; -import papaparse from "papaparse"; // IE fix - chrono::steady_clock depends on performance.now() which does not // exist in IE workers @@ -91,8 +90,8 @@ export default function(Module) { * @private * @returns {Table} An `std::shared_ptr` to a `Table` inside C++. */ - function make_table(accessor, _Table, index, limit, op, is_update, is_arrow, port_id) { - _Table = __MODULE__.make_table(_Table, accessor, limit || 4294967295, index, op, is_update, is_arrow, port_id); + function make_table(accessor, _Table, index, limit, op, is_update, is_arrow, is_csv, port_id) { + _Table = __MODULE__.make_table(_Table, accessor, limit || 4294967295, index, op, is_update, is_arrow, is_csv, port_id); const pool = _Table.get_pool(); const table_id = _Table.get_id(); @@ -594,9 +593,6 @@ export default function(Module) { * serialize. * @param {number} options.end_col The ending column index from which to * serialize. - * @param {Object} options.config A config object for the Papaparse - * {@link https://www.papaparse.com/docs#json-to-csv} config object. - * * @returns {Promise} A Promise resolving to a string in CSV format * representing the rows of this {@link module:perspective~view}. If this * {@link module:perspective~view} had a "row_pivots" config parameter @@ -1440,6 +1436,7 @@ export default function(Module) { let schema = this._Table.get_schema(); let types = schema.types(); let is_arrow = false; + let is_csv = false; pdata = accessor; @@ -1450,13 +1447,9 @@ export default function(Module) { if (data[0] === ",") { data = "_" + data; } - accessor.init(papaparse.parse(data.trim(), {header: true}).data); - accessor.names = cols.concat(accessor.names.filter(x => x === "__INDEX__")); - accessor.types = extract_vector(types).slice(0, accessor.names.length); - - if (meter) { - meter(accessor.row_count); - } + is_csv = true; + is_arrow = true; + pdata = data; } else { accessor.init(data); accessor.names = cols.concat(accessor.names.filter(x => x === "__INDEX__")); @@ -1491,7 +1484,7 @@ export default function(Module) { const op = __MODULE__.t_op.OP_INSERT; // update the Table in C++, but don't keep the returned Table // reference as it is identical - make_table(pdata, this._Table, this.index || "", this.limit, op, true, is_arrow, options.port_id); + make_table(pdata, this._Table, this.index || "", this.limit, op, true, is_arrow, is_csv, options.port_id); this.initialized = true; } catch (e) { console.error(`Update failed: ${e}`); @@ -1533,7 +1526,7 @@ export default function(Module) { const op = __MODULE__.t_op.OP_DELETE; // update the Table in C++, but don't keep the returned Table // reference as it is identical - make_table(pdata, this._Table, this.index || "", this.limit, op, false, is_arrow, options.port_id); + make_table(pdata, this._Table, this.index || "", this.limit, op, false, is_arrow, false, options.port_id); this.initialized = true; } catch (e) { console.error(`Remove failed`, e); @@ -1655,18 +1648,19 @@ export default function(Module) { let data_accessor; let is_arrow = false; let overridden_types = {}; + let is_csv = false; if (data instanceof ArrayBuffer || (typeof Buffer !== "undefined" && data instanceof Buffer)) { data_accessor = new Uint8Array(data); is_arrow = true; - } else { - if (typeof data === "string") { - if (data[0] === ",") { - data = "_" + data; - } - data = papaparse.parse(data.trim(), {dynamicTyping: true, header: true}).data; + } else if (typeof data === "string") { + if (data[0] === ",") { + data = "_" + data; } - + is_csv = true; + is_arrow = true; + data_accessor = data; + } else { accessor.clean(); overridden_types = accessor.init(data); data_accessor = accessor; @@ -1681,7 +1675,7 @@ export default function(Module) { try { const op = __MODULE__.t_op.OP_INSERT; // Always create new tables using port 0 - _Table = make_table(data_accessor, undefined, options.index, options.limit, op, false, is_arrow, 0); + _Table = make_table(data_accessor, undefined, options.index, options.limit, op, false, is_arrow, is_csv, 0); return new table(_Table, options.index, undefined, options.limit, overridden_types); } catch (e) { if (_Table) { diff --git a/packages/perspective/src/js/view_formatters.js b/packages/perspective/src/js/view_formatters.js index d165974b10..d562ee23ac 100644 --- a/packages/perspective/src/js/view_formatters.js +++ b/packages/perspective/src/js/view_formatters.js @@ -7,8 +7,6 @@ * */ -import papaparse from "papaparse"; - const jsonFormatter = { initDataValue: () => [], initRowValue: () => ({}), @@ -23,7 +21,34 @@ const jsonFormatter = { const csvFormatter = Object.assign({}, jsonFormatter, { addColumnValue: (data, row, colName, value) => row[colName.split("|").join(",")].unshift(value), setColumnValue: (data, row, colName, value) => (row[colName.split("|").join(",")] = value), - formatData: (data, config) => papaparse.unparse(data, config) + formatData: function(data, {delimiter = ","} = {}) { + if (data.length === 0) { + return ""; + } + + const format = function(x) { + if (x === null) { + return ""; + } + switch (typeof x) { + case "object": + case "string": + return x.indexOf(delimiter) > -1 ? `"${x}"` : x.toString(); + case "number": + return x; + case "boolean": + return x.toString(); + } + }; + + const columns = Object.keys(data[0]); + let csv = columns.map(format).join(delimiter); + for (let x = 0; x < data.length; x++) { + csv += "\r\n" + columns.map(column => format(data[x][column])).join(delimiter); + } + + return csv; + } }); const jsonTableFormatter = { diff --git a/scripts/bench.js b/scripts/bench.js index 7be9cfca8c..b7a07a1074 100644 --- a/scripts/bench.js +++ b/scripts/bench.js @@ -13,7 +13,7 @@ const args = process.argv.slice(2); const LIMIT = args.indexOf("--limit"); const IS_DELTA = args.indexOf("--delta"); -if (process.env.PSP_PROJECT === undefined || process.env.PSP_PROJECT === "javascript") { +if (process.env.PSP_PROJECT === undefined || process.env.PSP_PROJECT === "js") { function docker() { console.log("Creating puppeteer docker image"); let cmd = "docker run -it --rm --shm-size=2g --cap-add=SYS_NICE -u root -e PACKAGE=${PACKAGE} -e HTTPS_PROXY -e HTTPS_PROXY -v $(pwd):/src -w /src";