Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgrade WebAssembly build to Arrow 1.0.1 #1207

Merged
merged 1 commit into from
Oct 4, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmake/arrow.txt.in
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ project(arrow-download NONE)
include(ExternalProject)
ExternalProject_Add(apachearrow
GIT_REPOSITORY https://github.com/apache/arrow.git
GIT_TAG apache-arrow-0.16.0
GIT_TAG apache-arrow-1.0.1
SOURCE_DIR "${CMAKE_BINARY_DIR}/arrow-src"
BINARY_DIR "${CMAKE_BINARY_DIR}/arrow-build"
CONFIGURE_COMMAND ""
Expand Down
31 changes: 25 additions & 6 deletions cmake/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
set(CMAKE_SHARED_LIBRARY_SUFFIX .so)

set(ARROW_SRCS
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/builder.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/pretty_print.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/array_base.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/array_binary.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/array_decimal.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/array_dict.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/array_nested.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/array_primitive.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/builder_adaptive.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/builder_base.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/builder_binary.cc
Expand All @@ -13,11 +18,14 @@ set(ARROW_SRCS
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/builder_primitive.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/builder_union.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/concatenate.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/dict_internal.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/data.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/diff.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/util.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/validate.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/buffer.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/chunked_array.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/compare.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/device.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/extension_type.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/memory_pool.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/pretty_print.cc
Expand All @@ -34,9 +42,9 @@ set(ARROW_SRCS
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/csv/converter.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/csv/chunker.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/csv/column_builder.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/csv/column_decoder.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/csv/options.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/csv/parser.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/csv/reader.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/filesystem/filesystem.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/filesystem/localfs.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/filesystem/mockfs.cc
Expand All @@ -50,17 +58,22 @@ set(ARROW_SRCS
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/json/reader.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/io/buffered.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/io/compressed.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/io/file.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/io/interfaces.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/io/memory.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/testing/util.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/basic_decimal.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/bit_block_counter.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/bit_util.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/bitmap_builders.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/bitmap_ops.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/compression.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/cpu_info.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/decimal.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/future.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/delimiting.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/int_util.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/io_util.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/iterator.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/logging.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/key_value_metadata.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/memory.cc
Expand All @@ -70,11 +83,17 @@ set(ARROW_SRCS
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/thread_pool.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/trie.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/utf8.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/value_parsing.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/vendored/double-conversion/double-conversion.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/vendored/double-conversion/cached-powers.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/vendored/double-conversion/diy-fp.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/vendored/double-conversion/bignum.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/vendored/double-conversion/strtod.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/vendored/datetime/tz.cpp
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/dictionary.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/feather.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/json_integration.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/json_internal.cc
# ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/json_integration.cc
# ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/json_internal.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/json_simple.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/message.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/metadata_internal.cc
Expand Down
6 changes: 3 additions & 3 deletions cmake/arrow/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
// specific language governing permissions and limitations
// under the License.

#define ARROW_VERSION_MAJOR 0
#define ARROW_VERSION_MINOR 16
#define ARROW_VERSION_PATCH 0
#define ARROW_VERSION_MAJOR 1
#define ARROW_VERSION_MINOR 0
#define ARROW_VERSION_PATCH 1
#define ARROW_VERSION ((ARROW_VERSION_MAJOR * 1000) + ARROW_VERSION_MINOR) * 1000 + ARROW_VERSION_PATCH

/* #undef DOUBLE_CONVERSION_HAS_CASE_INSENSIBILITY */
Expand Down
1 change: 0 additions & 1 deletion cmake/modules/FindFlatbuffers.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ if(NOT ${FLATBUFFERS_INCLUDE_DIR})
set(FLATBUFFERS_INCLUDE_DIR /usr/local/include)
endif()

message("${FLATBUFFERS_COMPILER}")
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(FLATBUFFERS REQUIRED_VARS
FLATBUFFERS_INCLUDE_DIR FLATBUFFERS_COMPILER)
29 changes: 22 additions & 7 deletions cmake/modules/FindPyArrow.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ execute_process(
"from __future__ import print_function\ntry: import pyarrow; print(' '.join(pyarrow.get_libraries()), end='')\nexcept:pass"
OUTPUT_VARIABLE __pyarrow_libraries)

# And the version
# And the version
execute_process(
COMMAND "${Python_EXECUTABLE}" -c
texodus marked this conversation as resolved.
Show resolved Hide resolved
"from __future__ import print_function\ntry: import pyarrow; print(pyarrow.__version__, end='')\nexcept:pass"
Expand All @@ -45,23 +45,38 @@ if(${CMAKE_SYSTEM_NAME} MATCHES "Windows")
# windows its just "arrow.dll"
set(PYTHON_PYARROW_PYTHON_SHARED_LIBRARY "arrow_python")
set(PYTHON_PYARROW_ARROW_SHARED_LIBRARY "arrow")
set(PYTHON_PYARROW_LIBRARIES ${PYTHON_PYARROW_PYTHON_SHARED_LIBRARY} ${PYTHON_PYARROW_ARROW_SHARED_LIBRARY})
elseif (CMAKE_SYSTEM_NAME MATCHES "Darwin" AND ${PYARROW_VERSION_MAJOR} EQUAL "1")
# Link against pre-built libarrow on MacOS
set(PYTHON_PYARROW_PYTHON_SHARED_LIBRARY ${PYTHON_PYARROW_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}arrow_python.100.dylib)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't hardcode the version, just use "arrow_python.dylib" etc

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We are going to have to cope with the conditional naming for these libraries, as they are not disted in this version under this name (and don't seem to exhibit any consistent version convention at all).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's link this into an apache arrow issue, for pyarrow < 1 they had symlinks and we'll need those for some platforms

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@wesm this makes linking against these libraries super ugly. Since the version is managed by python anyway (e.g. in the version of the python package), wouldn't it be better to ship just the unversioned binary (e.g. libarrow.dylib)?

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See http://arrow.apache.org/docs/python/extending.html#building-extensions-against-pypi-wheels. We had to make changes to prevent shared libraries from being duplicated in the wheels. It might be possible to change things to ship unversioned shared libraries instead, but it will require some surgery on the Cython modules to get them to link with the unversioned libs (because they look for the libraries with the ABI version when they link). Feel free to open a JIRA issue in Apache Arrow with a proposal -- I have definitely hit the limit for how much time I can personally spend on it

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @wesm

@timkpaine I am not particularly bothered by this, it is easy to fix and this file must be manually updated for new Arrow versions regardless.

set(PYTHON_PYARROW_ARROW_SHARED_LIBRARY ${PYTHON_PYARROW_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}arrow.100.dylib)
texodus marked this conversation as resolved.
Show resolved Hide resolved
elseif (CMAKE_SYSTEM_NAME MATCHES "Darwin")
# Link against pre-built libarrow on MacOS
set(PYTHON_PYARROW_PYTHON_SHARED_LIBRARY ${PYTHON_PYARROW_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}arrow_python.${PYARROW_VERSION_MINOR}.dylib)
set(PYTHON_PYARROW_ARROW_SHARED_LIBRARY ${PYTHON_PYARROW_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}arrow.${PYARROW_VERSION_MINOR}.dylib)
set(PYTHON_PYARROW_LIBRARIES ${PYTHON_PYARROW_PYTHON_SHARED_LIBRARY} ${PYTHON_PYARROW_ARROW_SHARED_LIBRARY})
else()
elseif (${PYARROW_VERSION_MAJOR} EQUAL "1")
# linux
set(PYTHON_PYARROW_PYTHON_SHARED_LIBRARY ${CMAKE_SHARED_LIBRARY_PREFIX}arrow_python${CMAKE_SHARED_LIBRARY_SUFFIX}.${PYARROW_VERSION_MINOR})
set(PYTHON_PYARROW_ARROW_SHARED_LIBRARY ${CMAKE_SHARED_LIBRARY_PREFIX}arrow${CMAKE_SHARED_LIBRARY_SUFFIX}.${PYARROW_VERSION_MINOR})
set(PYTHON_PYARROW_LIBRARIES ${PYTHON_PYARROW_PYTHON_SHARED_LIBRARY} ${PYTHON_PYARROW_ARROW_SHARED_LIBRARY})
set(PYTHON_PYARROW_PYTHON_SHARED_LIBRARY ${PYTHON_PYARROW_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}arrow_python${CMAKE_SHARED_LIBRARY_SUFFIX}.100)
texodus marked this conversation as resolved.
Show resolved Hide resolved
set(PYTHON_PYARROW_ARROW_SHARED_LIBRARY ${PYTHON_PYARROW_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}arrow${CMAKE_SHARED_LIBRARY_SUFFIX}.100)
texodus marked this conversation as resolved.
Show resolved Hide resolved
else()
set(PYTHON_PYARROW_PYTHON_SHARED_LIBRARY ${PYTHON_PYARROW_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}arrow_python${CMAKE_SHARED_LIBRARY_SUFFIX}.${PYARROW_VERSION_MINOR})
set(PYTHON_PYARROW_ARROW_SHARED_LIBRARY ${PYTHON_PYARROW_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}arrow${CMAKE_SHARED_LIBRARY_SUFFIX}.${PYARROW_VERSION_MINOR})
endif()

set(PYTHON_PYARROW_LIBRARIES ${PYTHON_PYARROW_PYTHON_SHARED_LIBRARY} ${PYTHON_PYARROW_ARROW_SHARED_LIBRARY})

if(PYTHON_PYARROW_INCLUDE_DIR AND PYTHON_PYARROW_LIBRARIES)
set(PYTHON_PYARROW_FOUND 1 CACHE INTERNAL "Python pyarrow found")
endif()


# set(PYTHON_PYARROW_LIBRARIES ${PYTHON_PYARROW_PYTHON_SHARED_LIBRARY} ${PYTHON_PYARROW_ARROW_SHARED_LIBRARY})
# else()
# # linux
# set(PYTHON_PYARROW_PYTHON_SHARED_LIBRARY ${CMAKE_SHARED_LIBRARY_PREFIX}arrow_python${CMAKE_SHARED_LIBRARY_SUFFIX}.${PYARROW_VERSION_MINOR})
# set(PYTHON_PYARROW_ARROW_SHARED_LIBRARY ${CMAKE_SHARED_LIBRARY_PREFIX}arrow${CMAKE_SHARED_LIBRARY_SUFFIX}.${PYARROW_VERSION_MINOR})



include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(PyArrow REQUIRED_VARS PYTHON_PYARROW_INCLUDE_DIR PYTHON_PYARROW_LIBRARIES PYTHON_PYARROW_LIBRARY_DIR
VERSION_VAR __pyarrow_version)
12 changes: 9 additions & 3 deletions cpp/perspective/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -548,7 +548,13 @@ set (SOURCE_FILES

set(PYTHON_SOURCE_FILES ${SOURCE_FILES}
${PSP_PYTHON_SRC}/src/column.cpp
)
)

set(WASM_SOURCE_FILES ${SOURCE_FILES}
${PSP_CPP_SRC}/src/cpp/arrow_csv.cpp
${PSP_CPP_SRC}/src/cpp/vendor/arrow_single_threaded_reader.cpp
)


set (PYTHON_BINDING_SOURCE_FILES
${PSP_PYTHON_SRC}/src/accessor.cpp
Expand All @@ -570,7 +576,7 @@ else()
endif()

if (PSP_WASM_BUILD)
add_library(psp ${SOURCE_FILES})
add_library(psp ${WASM_SOURCE_FILES})
target_compile_definitions(psp PRIVATE PSP_ENABLE_WASM=1)
set_target_properties(psp PROPERTIES COMPILE_FLAGS "${ASYNC_MODE_FLAGS}")
target_link_libraries(psp arrow)
Expand Down Expand Up @@ -663,7 +669,7 @@ elseif(PSP_CPP_BUILD OR PSP_PYTHON_BUILD)
endif()
########################
else()
add_library(psp SHARED ${SOURCE_FILES})
add_library(psp SHARED ${WASM_SOURCE_FILES})

# Link perspective against custom-built minimal arrow
target_link_libraries(psp arrow)
Expand Down
60 changes: 60 additions & 0 deletions cpp/perspective/src/cpp/arrow_csv.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/******************************************************************************
*
* Copyright (c) 2019, the Perspective Authors.
*
* This file is part of the Perspective library, distributed under the terms of
* the Apache License 2.0. The full license can be found in the LICENSE file.
*
*/

#include <perspective/base.h>
#include <perspective/arrow_csv.h>
#include <arrow/util/value_parsing.h>
#include <arrow/io/memory.h>

// This causes build warnings
// https://github.com/emscripten-core/emscripten/issues/8574
#include <perspective/vendor/arrow_single_threaded_reader.h>

namespace perspective {
namespace apachearrow {

std::shared_ptr<::arrow::Table>
csvToTable(std::string& csv, bool is_update,
std::unordered_map<std::string, std::shared_ptr<arrow::DataType>>&
schema) {
arrow::MemoryPool* pool = arrow::default_memory_pool();
auto input = std::make_shared<arrow::io::BufferReader>(csv);
auto read_options = arrow::csv::ReadOptions::Defaults();
auto parse_options = arrow::csv::ParseOptions::Defaults();
auto convert_options = arrow::csv::ConvertOptions::Defaults();

read_options.use_threads = false;
convert_options.timestamp_parsers
= std::vector<std::shared_ptr<arrow::TimestampParser>>{
arrow::TimestampParser::MakeISO8601(),
arrow::TimestampParser::MakeStrptime("%Y-%m-%d\\D%H:%M:%S.%f"),
arrow::TimestampParser::MakeStrptime("%m-%d-%Y"),
arrow::TimestampParser::MakeStrptime("%m/%d/%Y"),
arrow::TimestampParser::MakeStrptime("%d %m %Y"),
arrow::TimestampParser::MakeStrptime("%H:%M:%S.%f"),
};

if (is_update) {
convert_options.column_types = std::move(schema);
}

auto maybe_reader = arrow::csv::TableReader::Make(
pool, input, read_options, parse_options, convert_options);

std::shared_ptr<arrow::csv::TableReader> reader = *maybe_reader;

auto maybe_table = reader->Read();
if (!maybe_table.ok()) {
PSP_COMPLAIN_AND_ABORT(maybe_table.status().ToString());
}
return *maybe_table;
}

} // namespace apachearrow
} // namespace perspective
Loading