diff --git a/ci/docker/ubuntu-22.04-cpp.dockerfile b/ci/docker/ubuntu-22.04-cpp.dockerfile index e8416c1378a9a..eb189841cd344 100644 --- a/ci/docker/ubuntu-22.04-cpp.dockerfile +++ b/ci/docker/ubuntu-22.04-cpp.dockerfile @@ -65,6 +65,7 @@ RUN latest_system_llvm=14 && \ RUN apt-get update -y -q && \ apt-get install -y -q --no-install-recommends \ autoconf \ + bzip2 \ ca-certificates \ ccache \ cmake \ @@ -115,10 +116,20 @@ RUN apt-get update -y -q && \ rapidjson-dev \ rsync \ tzdata \ - wget && \ + wget \ + xz-utils && \ apt-get clean && \ rm -rf /var/lib/apt/lists* +# install emscripten using EMSDK +ARG emscripten_version="3.1.45" +RUN cd ~ && git clone https://github.com/emscripten-core/emsdk.git && \ + cd emsdk && \ + ./emsdk install ${emscripten_version} && \ + ./emsdk activate ${emscripten_version} && \ + echo "Installed emsdk to:" ~/emsdk + + ARG gcc_version="" RUN if [ "${gcc_version}" = "" ]; then \ apt-get update -y -q && \ @@ -151,6 +162,9 @@ RUN if [ "${gcc_version}" = "" ]; then \ update-alternatives --set c++ /usr/bin/g++; \ fi +# make sure zlib is cached in the EMSDK folder +RUN source ~/emsdk/emsdk_env.sh && embuilder --pic build zlib + COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_minio.sh latest /usr/local diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh index 1e09924a5e576..52c89acb9a76a 100755 --- a/ci/scripts/cpp_build.sh +++ b/ci/scripts/cpp_build.sh @@ -92,112 +92,132 @@ esac mkdir -p ${build_dir} pushd ${build_dir} -cmake \ - -Dabsl_SOURCE=${absl_SOURCE:-} \ - -DARROW_ACERO=${ARROW_ACERO:-OFF} \ - -DARROW_AZURE=${ARROW_AZURE:-OFF} \ - -DARROW_BOOST_USE_SHARED=${ARROW_BOOST_USE_SHARED:-ON} \ - -DARROW_BUILD_BENCHMARKS_REFERENCE=${ARROW_BUILD_BENCHMARKS:-OFF} \ - -DARROW_BUILD_BENCHMARKS=${ARROW_BUILD_BENCHMARKS:-OFF} \ - -DARROW_BUILD_EXAMPLES=${ARROW_BUILD_EXAMPLES:-OFF} \ - -DARROW_BUILD_INTEGRATION=${ARROW_BUILD_INTEGRATION:-OFF} \ - -DARROW_BUILD_SHARED=${ARROW_BUILD_SHARED:-ON} \ - -DARROW_BUILD_STATIC=${ARROW_BUILD_STATIC:-ON} \ - -DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS:-OFF} \ - -DARROW_BUILD_UTILITIES=${ARROW_BUILD_UTILITIES:-ON} \ - -DARROW_COMPUTE=${ARROW_COMPUTE:-ON} \ - -DARROW_CSV=${ARROW_CSV:-ON} \ - -DARROW_CUDA=${ARROW_CUDA:-OFF} \ - -DARROW_CXXFLAGS=${ARROW_CXXFLAGS:-} \ - -DARROW_CXX_FLAGS_DEBUG="${ARROW_CXX_FLAGS_DEBUG:-}" \ - -DARROW_CXX_FLAGS_RELEASE="${ARROW_CXX_FLAGS_RELEASE:-}" \ - -DARROW_CXX_FLAGS_RELWITHDEBINFO="${ARROW_CXX_FLAGS_RELWITHDEBINFO:-}" \ - -DARROW_C_FLAGS_DEBUG="${ARROW_C_FLAGS_DEBUG:-}" \ - -DARROW_C_FLAGS_RELEASE="${ARROW_C_FLAGS_RELEASE:-}" \ - -DARROW_C_FLAGS_RELWITHDEBINFO="${ARROW_C_FLAGS_RELWITHDEBINFO:-}" \ - -DARROW_DATASET=${ARROW_DATASET:-OFF} \ - -DARROW_DEPENDENCY_SOURCE=${ARROW_DEPENDENCY_SOURCE:-AUTO} \ - -DARROW_ENABLE_THREADING=${ARROW_ENABLE_THREADING:-ON} \ - -DARROW_ENABLE_TIMING_TESTS=${ARROW_ENABLE_TIMING_TESTS:-ON} \ - -DARROW_EXTRA_ERROR_CONTEXT=${ARROW_EXTRA_ERROR_CONTEXT:-OFF} \ - -DARROW_FILESYSTEM=${ARROW_FILESYSTEM:-ON} \ - -DARROW_FLIGHT=${ARROW_FLIGHT:-OFF} \ - -DARROW_FLIGHT_SQL=${ARROW_FLIGHT_SQL:-OFF} \ - -DARROW_FUZZING=${ARROW_FUZZING:-OFF} \ - -DARROW_GANDIVA_PC_CXX_FLAGS=${ARROW_GANDIVA_PC_CXX_FLAGS:-} \ - -DARROW_GANDIVA=${ARROW_GANDIVA:-OFF} \ - -DARROW_GCS=${ARROW_GCS:-OFF} \ - -DARROW_HDFS=${ARROW_HDFS:-ON} \ - -DARROW_INSTALL_NAME_RPATH=${ARROW_INSTALL_NAME_RPATH:-ON} \ - -DARROW_JEMALLOC=${ARROW_JEMALLOC:-ON} \ - -DARROW_JSON=${ARROW_JSON:-ON} \ - -DARROW_LARGE_MEMORY_TESTS=${ARROW_LARGE_MEMORY_TESTS:-OFF} \ - -DARROW_MIMALLOC=${ARROW_MIMALLOC:-OFF} \ - -DARROW_NO_DEPRECATED_API=${ARROW_NO_DEPRECATED_API:-OFF} \ - -DARROW_ORC=${ARROW_ORC:-OFF} \ - -DARROW_PARQUET=${ARROW_PARQUET:-OFF} \ - -DARROW_RUNTIME_SIMD_LEVEL=${ARROW_RUNTIME_SIMD_LEVEL:-MAX} \ - -DARROW_S3=${ARROW_S3:-OFF} \ - -DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL:-DEFAULT} \ - -DARROW_SKYHOOK=${ARROW_SKYHOOK:-OFF} \ - -DARROW_SUBSTRAIT=${ARROW_SUBSTRAIT:-OFF} \ - -DARROW_TEST_LINKAGE=${ARROW_TEST_LINKAGE:-shared} \ - -DARROW_TEST_MEMCHECK=${ARROW_TEST_MEMCHECK:-OFF} \ - -DARROW_USE_ASAN=${ARROW_USE_ASAN:-OFF} \ - -DARROW_USE_CCACHE=${ARROW_USE_CCACHE:-ON} \ - -DARROW_USE_GLOG=${ARROW_USE_GLOG:-OFF} \ - -DARROW_USE_LD_GOLD=${ARROW_USE_LD_GOLD:-OFF} \ - -DARROW_USE_MOLD=${ARROW_USE_MOLD:-OFF} \ - -DARROW_USE_PRECOMPILED_HEADERS=${ARROW_USE_PRECOMPILED_HEADERS:-OFF} \ - -DARROW_USE_STATIC_CRT=${ARROW_USE_STATIC_CRT:-OFF} \ - -DARROW_USE_TSAN=${ARROW_USE_TSAN:-OFF} \ - -DARROW_USE_UBSAN=${ARROW_USE_UBSAN:-OFF} \ - -DARROW_VERBOSE_THIRDPARTY_BUILD=${ARROW_VERBOSE_THIRDPARTY_BUILD:-OFF} \ - -DARROW_WITH_BROTLI=${ARROW_WITH_BROTLI:-OFF} \ - -DARROW_WITH_BZ2=${ARROW_WITH_BZ2:-OFF} \ - -DARROW_WITH_LZ4=${ARROW_WITH_LZ4:-OFF} \ - -DARROW_WITH_OPENTELEMETRY=${ARROW_WITH_OPENTELEMETRY:-OFF} \ - -DARROW_WITH_MUSL=${ARROW_WITH_MUSL:-OFF} \ - -DARROW_WITH_SNAPPY=${ARROW_WITH_SNAPPY:-OFF} \ - -DARROW_WITH_UCX=${ARROW_WITH_UCX:-OFF} \ - -DARROW_WITH_UTF8PROC=${ARROW_WITH_UTF8PROC:-ON} \ - -DARROW_WITH_ZLIB=${ARROW_WITH_ZLIB:-OFF} \ - -DARROW_WITH_ZSTD=${ARROW_WITH_ZSTD:-OFF} \ - -DAWSSDK_SOURCE=${AWSSDK_SOURCE:-} \ - -DAzure_SOURCE=${Azure_SOURCE:-} \ - -Dbenchmark_SOURCE=${benchmark_SOURCE:-} \ - -DBOOST_SOURCE=${BOOST_SOURCE:-} \ - -DBrotli_SOURCE=${Brotli_SOURCE:-} \ - -DBUILD_WARNING_LEVEL=${BUILD_WARNING_LEVEL:-CHECKIN} \ - -Dc-ares_SOURCE=${cares_SOURCE:-} \ - -DCMAKE_BUILD_TYPE=${ARROW_BUILD_TYPE:-debug} \ - -DCMAKE_VERBOSE_MAKEFILE=${CMAKE_VERBOSE_MAKEFILE:-OFF} \ - -DCMAKE_C_FLAGS="${CFLAGS:-}" \ - -DCMAKE_CXX_FLAGS="${CXXFLAGS:-}" \ - -DCMAKE_CXX_STANDARD="${CMAKE_CXX_STANDARD:-17}" \ - -DCMAKE_INSTALL_LIBDIR=${CMAKE_INSTALL_LIBDIR:-lib} \ - -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX:-${ARROW_HOME}} \ - -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD:-OFF} \ - -Dgflags_SOURCE=${gflags_SOURCE:-} \ - -Dgoogle_cloud_cpp_storage_SOURCE=${google_cloud_cpp_storage_SOURCE:-} \ - -DgRPC_SOURCE=${gRPC_SOURCE:-} \ - -DGTest_SOURCE=${GTest_SOURCE:-} \ - -Dlz4_SOURCE=${lz4_SOURCE:-} \ - -DORC_SOURCE=${ORC_SOURCE:-} \ - -DPARQUET_BUILD_EXAMPLES=${PARQUET_BUILD_EXAMPLES:-OFF} \ - -DPARQUET_BUILD_EXECUTABLES=${PARQUET_BUILD_EXECUTABLES:-OFF} \ - -DPARQUET_REQUIRE_ENCRYPTION=${PARQUET_REQUIRE_ENCRYPTION:-ON} \ - -DProtobuf_SOURCE=${Protobuf_SOURCE:-} \ - -DRapidJSON_SOURCE=${RapidJSON_SOURCE:-} \ - -Dre2_SOURCE=${re2_SOURCE:-} \ - -DSnappy_SOURCE=${Snappy_SOURCE:-} \ - -DThrift_SOURCE=${Thrift_SOURCE:-} \ - -Dutf8proc_SOURCE=${utf8proc_SOURCE:-} \ - -Dzstd_SOURCE=${zstd_SOURCE:-} \ - -Dxsimd_SOURCE=${xsimd_SOURCE:-} \ - -G "${CMAKE_GENERATOR:-Ninja}" \ - ${ARROW_CMAKE_ARGS} \ - ${source_dir} +if [ "${ARROW_EMSCRIPTEN:-OFF}" = "ON" ]; then + if [ "${UBUNTU}" = "20.04" ]; then + echo "arrow emscripten build is not supported on Ubuntu 20.04, run with UBUNTU=22.04" + exit -1 + fi + n_jobs=2 # Emscripten build fails on docker unless this is set really low + source ~/emsdk/emsdk_env.sh + emcmake cmake \ + --preset=ninja-${ARROW_BUILD_TYPE:-debug}-emscripten \ + -DCMAKE_VERBOSE_MAKEFILE=${CMAKE_VERBOSE_MAKEFILE:-OFF} \ + -DCMAKE_C_FLAGS="${CFLAGS:-}" \ + -DCMAKE_CXX_FLAGS="${CXXFLAGS:-}" \ + -DCMAKE_CXX_STANDARD="${CMAKE_CXX_STANDARD:-17}" \ + -DCMAKE_INSTALL_LIBDIR=${CMAKE_INSTALL_LIBDIR:-lib} \ + -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX:-${ARROW_HOME}} \ + -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD:-OFF} \ + ${ARROW_CMAKE_ARGS} \ + ${source_dir} +else + cmake \ + -Dabsl_SOURCE=${absl_SOURCE:-} \ + -DARROW_ACERO=${ARROW_ACERO:-OFF} \ + -DARROW_AZURE=${ARROW_AZURE:-OFF} \ + -DARROW_BOOST_USE_SHARED=${ARROW_BOOST_USE_SHARED:-ON} \ + -DARROW_BUILD_BENCHMARKS_REFERENCE=${ARROW_BUILD_BENCHMARKS:-OFF} \ + -DARROW_BUILD_BENCHMARKS=${ARROW_BUILD_BENCHMARKS:-OFF} \ + -DARROW_BUILD_EXAMPLES=${ARROW_BUILD_EXAMPLES:-OFF} \ + -DARROW_BUILD_INTEGRATION=${ARROW_BUILD_INTEGRATION:-OFF} \ + -DARROW_BUILD_SHARED=${ARROW_BUILD_SHARED:-ON} \ + -DARROW_BUILD_STATIC=${ARROW_BUILD_STATIC:-ON} \ + -DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS:-OFF} \ + -DARROW_BUILD_UTILITIES=${ARROW_BUILD_UTILITIES:-ON} \ + -DARROW_COMPUTE=${ARROW_COMPUTE:-ON} \ + -DARROW_CSV=${ARROW_CSV:-ON} \ + -DARROW_CUDA=${ARROW_CUDA:-OFF} \ + -DARROW_CXXFLAGS=${ARROW_CXXFLAGS:-} \ + -DARROW_CXX_FLAGS_DEBUG="${ARROW_CXX_FLAGS_DEBUG:-}" \ + -DARROW_CXX_FLAGS_RELEASE="${ARROW_CXX_FLAGS_RELEASE:-}" \ + -DARROW_CXX_FLAGS_RELWITHDEBINFO="${ARROW_CXX_FLAGS_RELWITHDEBINFO:-}" \ + -DARROW_C_FLAGS_DEBUG="${ARROW_C_FLAGS_DEBUG:-}" \ + -DARROW_C_FLAGS_RELEASE="${ARROW_C_FLAGS_RELEASE:-}" \ + -DARROW_C_FLAGS_RELWITHDEBINFO="${ARROW_C_FLAGS_RELWITHDEBINFO:-}" \ + -DARROW_DATASET=${ARROW_DATASET:-OFF} \ + -DARROW_DEPENDENCY_SOURCE=${ARROW_DEPENDENCY_SOURCE:-AUTO} \ + -DARROW_ENABLE_THREADING=${ARROW_ENABLE_THREADING:-ON} \ + -DARROW_ENABLE_TIMING_TESTS=${ARROW_ENABLE_TIMING_TESTS:-ON} \ + -DARROW_EXTRA_ERROR_CONTEXT=${ARROW_EXTRA_ERROR_CONTEXT:-OFF} \ + -DARROW_FILESYSTEM=${ARROW_FILESYSTEM:-ON} \ + -DARROW_FLIGHT=${ARROW_FLIGHT:-OFF} \ + -DARROW_FLIGHT_SQL=${ARROW_FLIGHT_SQL:-OFF} \ + -DARROW_FUZZING=${ARROW_FUZZING:-OFF} \ + -DARROW_GANDIVA_PC_CXX_FLAGS=${ARROW_GANDIVA_PC_CXX_FLAGS:-} \ + -DARROW_GANDIVA=${ARROW_GANDIVA:-OFF} \ + -DARROW_GCS=${ARROW_GCS:-OFF} \ + -DARROW_HDFS=${ARROW_HDFS:-ON} \ + -DARROW_INSTALL_NAME_RPATH=${ARROW_INSTALL_NAME_RPATH:-ON} \ + -DARROW_JEMALLOC=${ARROW_JEMALLOC:-ON} \ + -DARROW_JSON=${ARROW_JSON:-ON} \ + -DARROW_LARGE_MEMORY_TESTS=${ARROW_LARGE_MEMORY_TESTS:-OFF} \ + -DARROW_MIMALLOC=${ARROW_MIMALLOC:-OFF} \ + -DARROW_NO_DEPRECATED_API=${ARROW_NO_DEPRECATED_API:-OFF} \ + -DARROW_ORC=${ARROW_ORC:-OFF} \ + -DARROW_PARQUET=${ARROW_PARQUET:-OFF} \ + -DARROW_RUNTIME_SIMD_LEVEL=${ARROW_RUNTIME_SIMD_LEVEL:-MAX} \ + -DARROW_S3=${ARROW_S3:-OFF} \ + -DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL:-DEFAULT} \ + -DARROW_SKYHOOK=${ARROW_SKYHOOK:-OFF} \ + -DARROW_SUBSTRAIT=${ARROW_SUBSTRAIT:-OFF} \ + -DARROW_TEST_LINKAGE=${ARROW_TEST_LINKAGE:-shared} \ + -DARROW_TEST_MEMCHECK=${ARROW_TEST_MEMCHECK:-OFF} \ + -DARROW_USE_ASAN=${ARROW_USE_ASAN:-OFF} \ + -DARROW_USE_CCACHE=${ARROW_USE_CCACHE:-ON} \ + -DARROW_USE_GLOG=${ARROW_USE_GLOG:-OFF} \ + -DARROW_USE_LD_GOLD=${ARROW_USE_LD_GOLD:-OFF} \ + -DARROW_USE_MOLD=${ARROW_USE_MOLD:-OFF} \ + -DARROW_USE_PRECOMPILED_HEADERS=${ARROW_USE_PRECOMPILED_HEADERS:-OFF} \ + -DARROW_USE_STATIC_CRT=${ARROW_USE_STATIC_CRT:-OFF} \ + -DARROW_USE_TSAN=${ARROW_USE_TSAN:-OFF} \ + -DARROW_USE_UBSAN=${ARROW_USE_UBSAN:-OFF} \ + -DARROW_VERBOSE_THIRDPARTY_BUILD=${ARROW_VERBOSE_THIRDPARTY_BUILD:-OFF} \ + -DARROW_WITH_BROTLI=${ARROW_WITH_BROTLI:-OFF} \ + -DARROW_WITH_BZ2=${ARROW_WITH_BZ2:-OFF} \ + -DARROW_WITH_LZ4=${ARROW_WITH_LZ4:-OFF} \ + -DARROW_WITH_OPENTELEMETRY=${ARROW_WITH_OPENTELEMETRY:-OFF} \ + -DARROW_WITH_MUSL=${ARROW_WITH_MUSL:-OFF} \ + -DARROW_WITH_SNAPPY=${ARROW_WITH_SNAPPY:-OFF} \ + -DARROW_WITH_UCX=${ARROW_WITH_UCX:-OFF} \ + -DARROW_WITH_UTF8PROC=${ARROW_WITH_UTF8PROC:-ON} \ + -DARROW_WITH_ZLIB=${ARROW_WITH_ZLIB:-OFF} \ + -DARROW_WITH_ZSTD=${ARROW_WITH_ZSTD:-OFF} \ + -DAWSSDK_SOURCE=${AWSSDK_SOURCE:-} \ + -DAzure_SOURCE=${Azure_SOURCE:-} \ + -Dbenchmark_SOURCE=${benchmark_SOURCE:-} \ + -DBOOST_SOURCE=${BOOST_SOURCE:-} \ + -DBrotli_SOURCE=${Brotli_SOURCE:-} \ + -DBUILD_WARNING_LEVEL=${BUILD_WARNING_LEVEL:-CHECKIN} \ + -Dc-ares_SOURCE=${cares_SOURCE:-} \ + -DCMAKE_BUILD_TYPE=${ARROW_BUILD_TYPE:-debug} \ + -DCMAKE_VERBOSE_MAKEFILE=${CMAKE_VERBOSE_MAKEFILE:-OFF} \ + -DCMAKE_C_FLAGS="${CFLAGS:-}" \ + -DCMAKE_CXX_FLAGS="${CXXFLAGS:-}" \ + -DCMAKE_CXX_STANDARD="${CMAKE_CXX_STANDARD:-17}" \ + -DCMAKE_INSTALL_LIBDIR=${CMAKE_INSTALL_LIBDIR:-lib} \ + -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX:-${ARROW_HOME}} \ + -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD:-OFF} \ + -Dgflags_SOURCE=${gflags_SOURCE:-} \ + -Dgoogle_cloud_cpp_storage_SOURCE=${google_cloud_cpp_storage_SOURCE:-} \ + -DgRPC_SOURCE=${gRPC_SOURCE:-} \ + -DGTest_SOURCE=${GTest_SOURCE:-} \ + -Dlz4_SOURCE=${lz4_SOURCE:-} \ + -DORC_SOURCE=${ORC_SOURCE:-} \ + -DPARQUET_BUILD_EXAMPLES=${PARQUET_BUILD_EXAMPLES:-OFF} \ + -DPARQUET_BUILD_EXECUTABLES=${PARQUET_BUILD_EXECUTABLES:-OFF} \ + -DPARQUET_REQUIRE_ENCRYPTION=${PARQUET_REQUIRE_ENCRYPTION:-ON} \ + -DProtobuf_SOURCE=${Protobuf_SOURCE:-} \ + -DRapidJSON_SOURCE=${RapidJSON_SOURCE:-} \ + -Dre2_SOURCE=${re2_SOURCE:-} \ + -DSnappy_SOURCE=${Snappy_SOURCE:-} \ + -DThrift_SOURCE=${Thrift_SOURCE:-} \ + -Dutf8proc_SOURCE=${utf8proc_SOURCE:-} \ + -Dzstd_SOURCE=${zstd_SOURCE:-} \ + -Dxsimd_SOURCE=${xsimd_SOURCE:-} \ + -G "${CMAKE_GENERATOR:-Ninja}" \ + ${ARROW_CMAKE_ARGS} \ + ${source_dir} +fi export CMAKE_BUILD_PARALLEL_LEVEL=${CMAKE_BUILD_PARALLEL_LEVEL:-$[${n_jobs} + 1]} time cmake --build . --target install diff --git a/ci/scripts/cpp_test.sh b/ci/scripts/cpp_test.sh index f388825fd0a98..2c640f2c1fb6a 100755 --- a/ci/scripts/cpp_test.sh +++ b/ci/scripts/cpp_test.sh @@ -80,6 +80,10 @@ case "$(uname)" in ;; esac +if [ "${ARROW_EMSCRIPTEN:-OFF}" = "ON" ]; then + n_jobs=1 # avoid spurious fails on emscripten due to loading too many big executables +fi + pushd ${build_dir} if [ -z "${PYTHON}" ] && ! which python > /dev/null 2>&1; then diff --git a/ci/scripts/go_bench.sh b/ci/scripts/go_bench.sh old mode 100644 new mode 100755 diff --git a/cpp/CMakePresets.json b/cpp/CMakePresets.json index 9d99b3b2a79e0..13d1241990c31 100644 --- a/cpp/CMakePresets.json +++ b/cpp/CMakePresets.json @@ -46,6 +46,32 @@ "CMAKE_BUILD_TYPE": "RelWithDebInfo" } }, + { + "name": "features-emscripten", + "hidden": true, + "cacheVariables": { + "ARROW_ACERO": "ON", + "ARROW_BUILD_SHARED": "OFF", + "ARROW_BUILD_STATIC": "ON", + "ARROW_CUDA": "OFF", + "ARROW_DEPENDENCY_SOURCE": "BUNDLED", + "ARROW_DEPENDENCY_USE_SHARED": "OFF", + "ARROW_ENABLE_THREADING": "OFF", + "ARROW_FLIGHT": "OFF", + "ARROW_IPC": "ON", + "ARROW_JEMALLOC": "OFF", + "ARROW_MIMALLOC": "OFF", + "ARROW_ORC": "ON", + "ARROW_RUNTIME_SIMD_LEVEL": "NONE", + "ARROW_S3": "OFF", + "ARROW_SIMD_LEVEL": "NONE", + "ARROW_SUBSTRAIT": "ON", + "ARROW_WITH_BROTLI": "ON", + "ARROW_WITH_OPENTELEMETRY": "OFF", + "ARROW_WITH_SNAPPY": "ON", + "CMAKE_C_BYTE_ORDER": "LITTLE_ENDIAN" + } + }, { "name": "features-minimal", "hidden": true, @@ -341,6 +367,24 @@ "displayName": "Release build with CUDA integration", "cacheVariables": {} }, + { + "name": "ninja-debug-emscripten", + "inherits": [ + "features-emscripten", + "base-debug" + ], + "displayName": "Debug build which builds an Emscripten library", + "cacheVariables": {} + }, + { + "name": "ninja-release-emscripten", + "inherits": [ + "features-emscripten", + "base-release" + ], + "displayName": "Release build which builds an Emscripten library", + "cacheVariables": {} + }, { "name": "ninja-release-flight", "inherits": [ @@ -447,4 +491,4 @@ } } ] -} +} \ No newline at end of file diff --git a/cpp/build-support/emscripten-test-init.js b/cpp/build-support/emscripten-test-init.js new file mode 100644 index 0000000000000..bbb542a29f021 --- /dev/null +++ b/cpp/build-support/emscripten-test-init.js @@ -0,0 +1,24 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +var Module = { +}; + +// make sure tests can access the current parquet test data files +Module.preRun = () => {ENV.PARQUET_TEST_DATA = process.env.PARQUET_TEST_DATA; + ENV.ARROW_TEST_DATA = process.env.ARROW_TEST_DATA; +}; \ No newline at end of file diff --git a/cpp/cmake_modules/BuildUtils.cmake b/cpp/cmake_modules/BuildUtils.cmake index 7a45e9cca59de..e7523add27223 100644 --- a/cpp/cmake_modules/BuildUtils.cmake +++ b/cpp/cmake_modules/BuildUtils.cmake @@ -760,8 +760,8 @@ function(ADD_TEST_CASE REL_TEST_NAME) valgrind --suppressions=valgrind.supp --tool=memcheck --gen-suppressions=all \ --num-callers=500 --leak-check=full --leak-check-heuristics=stdstring \ --error-exitcode=1 ${TEST_PATH} ${ARG_TEST_ARGUMENTS}") - elseif(WIN32) - add_test(${TEST_NAME} ${TEST_PATH} ${ARG_TEST_ARGUMENTS}) + elseif(WIN32 OR CMAKE_SYSTEM_NAME STREQUAL "Emscripten") + add_test(NAME ${TEST_NAME} COMMAND ${TEST_NAME} ${ARG_TEST_ARGUMENTS}) else() add_test(${TEST_NAME} ${BUILD_SUPPORT_DIR}/run-test.sh diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake index 90decb4224ec6..1d709fe98d7fe 100644 --- a/cpp/cmake_modules/SetupCxxFlags.cmake +++ b/cpp/cmake_modules/SetupCxxFlags.cmake @@ -24,7 +24,9 @@ include(CheckCXXSourceCompiles) message(STATUS "System processor: ${CMAKE_SYSTEM_PROCESSOR}") if(NOT DEFINED ARROW_CPU_FLAG) - if(CMAKE_SYSTEM_PROCESSOR MATCHES "AMD64|amd64|X86|x86|i[3456]86|x64") + if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") + set(ARROW_CPU_FLAG "emscripten") + elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "AMD64|amd64|X86|x86|i[3456]86|x64") set(ARROW_CPU_FLAG "x86") elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|ARM64|arm64") set(ARROW_CPU_FLAG "aarch64") @@ -312,7 +314,12 @@ if("${BUILD_WARNING_LEVEL}" STREQUAL "CHECKIN") set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wall") set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wextra") set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wdocumentation") - set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wshorten-64-to-32") + if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") + # size_t is 32 bit in Emscripten wasm32 - ignore conversion errors + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-shorten-64-to-32") + else() + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wshorten-64-to-32") + endif() set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-missing-braces") set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-unused-parameter") set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-constant-logical-operand") @@ -692,17 +699,36 @@ if(NOT MSVC) set(C_DEBUG_FLAGS "") set(CXX_DEBUG_FLAGS "") if(NOT MSVC) - if(NOT CMAKE_C_FLAGS_DEBUG MATCHES "-O") - string(APPEND C_DEBUG_FLAGS " -O0") - endif() - if(NOT CMAKE_CXX_FLAGS_DEBUG MATCHES "-O") - string(APPEND CXX_DEBUG_FLAGS " -O0") - endif() - if(ARROW_GGDB_DEBUG) - string(APPEND C_DEBUG_FLAGS " -ggdb") - string(APPEND CXX_DEBUG_FLAGS " -ggdb") - string(APPEND C_RELWITHDEBINFO_FLAGS " -ggdb") - string(APPEND CXX_RELWITHDEBINFO_FLAGS " -ggdb") + if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") + # with -g it uses DWARF debug info, which is really slow to build + # on emscripten (and uses tons of memory) + string(REPLACE "-g" " " CMAKE_CXX_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG}) + string(REPLACE "-g" " " CMAKE_C_FLAGS_DEBUG ${CMAKE_C_FLAGS_DEBUG}) + string(APPEND C_DEBUG_FLAGS " -g2") + string(APPEND CXX_DEBUG_FLAGS " -g2") + string(APPEND C_RELWITHDEBINFO_FLAGS " -g2") + string(APPEND CXX_RELWITHDEBINFO_FLAGS " -g2") + # without -O1, emscripten executables are *MASSIVE*. Don't use -O0 + if(NOT CMAKE_C_FLAGS_DEBUG MATCHES "-O") + string(APPEND C_DEBUG_FLAGS " -O1") + endif() + if(NOT CMAKE_CXX_FLAGS_DEBUG MATCHES "-O") + string(APPEND CXX_DEBUG_FLAGS " -O1") + endif() + else() + if(NOT CMAKE_C_FLAGS_DEBUG MATCHES "-O") + string(APPEND C_DEBUG_FLAGS " -O0") + endif() + if(NOT CMAKE_CXX_FLAGS_DEBUG MATCHES "-O") + string(APPEND CXX_DEBUG_FLAGS " -O0") + endif() + + if(ARROW_GGDB_DEBUG) + string(APPEND C_DEBUG_FLAGS " -ggdb") + string(APPEND CXX_DEBUG_FLAGS " -ggdb") + string(APPEND C_RELWITHDEBINFO_FLAGS " -ggdb") + string(APPEND CXX_RELWITHDEBINFO_FLAGS " -ggdb") + endif() endif() endif() @@ -733,3 +759,40 @@ if(MSVC) set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${MSVC_LINKER_FLAGS}") endif() endif() + +if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") + # flags are: + # 1) We force *everything* to build as position independent + # 2) And with support for C++ exceptions + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC -fexceptions") + # deprecated-literal-operator error is thrown in datetime (vendored lib in arrow) + set(CMAKE_CXX_FLAGS + "${CMAKE_CXX_FLAGS} -fPIC -fexceptions -Wno-error=deprecated-literal-operator") + + # flags for creating shared libraries (only used in pyarrow, because + # Emscripten builds libarrow as static) + # flags are: + # 1) Tell it to use JavaScript / WebAssembly 64 bit number support. + # 2) Tell it to build with support for C++ exceptions + # 3) Skip linker flags error which happens with -soname parameter + set(ARROW_EMSCRIPTEN_LINKER_FLAGS "-sWASM_BIGINT=1 -fexceptions -Wno-error=linkflags") + set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS + "-sSIDE_MODULE=1 ${ARROW_EMSCRIPTEN_LINKER_FLAGS}") + set(CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS + "-sSIDE_MODULE=1 ${ARROW_EMSCRIPTEN_LINKER_FLAGS}") + set(CMAKE_SHARED_LINKER_FLAGS "-sSIDE_MODULE=1 ${ARROW_EMSCRIPTEN_LINKER_FLAGS}") + if(ARROW_TESTING) + # flags for building test executables for use in node + if("${CMAKE_BUILD_TYPE}" STREQUAL "RELEASE") + set(CMAKE_EXE_LINKER_FLAGS + "${ARROW_EMSCRIPTEN_LINKER_FLAGS} -sALLOW_MEMORY_GROWTH -lnodefs.js -lnoderawfs.js --pre-js ${BUILD_SUPPORT_DIR}/emscripten-test-init.js" + ) + else() + set(CMAKE_EXE_LINKER_FLAGS + "${ARROW_EMSCRIPTEN_LINKER_FLAGS} -sERROR_ON_WASM_CHANGES_AFTER_LINK=1 -sALLOW_MEMORY_GROWTH -lnodefs.js -lnoderawfs.js --pre-js ${BUILD_SUPPORT_DIR}/emscripten-test-init.js" + ) + endif() + else() + set(CMAKE_EXE_LINKER_FLAGS "${ARROW_EMSCRIPTEN_LINKER_FLAGS} -sALLOW_MEMORY_GROWTH") + endif() +endif() diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index ad7344b09dd4e..4a67eac1d4d59 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -976,6 +976,23 @@ set(EP_COMMON_CMAKE_ARGS -DCMAKE_OSX_SYSROOT=${CMAKE_OSX_SYSROOT} -DCMAKE_VERBOSE_MAKEFILE=${CMAKE_VERBOSE_MAKEFILE}) +# if building with a toolchain file, pass that through +if(CMAKE_TOOLCHAIN_FILE) + list(APPEND EP_COMMON_CMAKE_ARGS -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}) +endif() + +# and crosscompiling emulator (for try_run() ) +if(CMAKE_CROSSCOMPILING_EMULATOR) + string(REPLACE ";" ${EP_LIST_SEPARATOR} EP_CMAKE_CROSSCOMPILING_EMULATOR + "${CMAKE_CROSSCOMPILING_EMULATOR}") + list(APPEND EP_COMMON_CMAKE_ARGS + -DCMAKE_CROSSCOMPILING_EMULATOR=${EP_CMAKE_CROSSCOMPILING_EMULATOR}) +endif() + +if(CMAKE_PROJECT_INCLUDE) + list(APPEND EP_COMMON_CMAKE_ARGS -DCMAKE_PROJECT_INCLUDE=${CMAKE_PROJECT_INCLUDE}) +endif() + # Enable s/ccache if set by parent. if(CMAKE_C_COMPILER_LAUNCHER AND CMAKE_CXX_COMPILER_LAUNCHER) list(APPEND EP_COMMON_CMAKE_ARGS @@ -1349,6 +1366,14 @@ macro(build_snappy) set(SNAPPY_PATCH_COMMAND) endif() + if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") + # ignore linker flag errors, as Snappy sets + # -Werror -Wall, and Emscripten doesn't support -soname + list(APPEND SNAPPY_CMAKE_ARGS + "-DCMAKE_SHARED_LINKER_FLAGS=${CMAKE_SHARED_LINKER_FLAGS}" + "-Wno-error=linkflags") + endif() + externalproject_add(snappy_ep ${EP_COMMON_OPTIONS} BUILD_IN_SOURCE 1 @@ -1394,6 +1419,7 @@ macro(build_brotli) message(STATUS "Building brotli from source") set(BROTLI_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/brotli_ep/src/brotli_ep-install") set(BROTLI_INCLUDE_DIR "${BROTLI_PREFIX}/include") + set(BROTLI_LIB_DIR "${BROTLI_PREFIX}/lib") set(BROTLI_STATIC_LIBRARY_ENC "${BROTLI_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}brotlienc-static${CMAKE_STATIC_LIBRARY_SUFFIX}" ) @@ -1405,6 +1431,26 @@ macro(build_brotli) ) set(BROTLI_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} "-DCMAKE_INSTALL_PREFIX=${BROTLI_PREFIX}") + set(BROTLI_EP_OPTIONS) + if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") + # "cmake install" is disabled for Brotli on Emscripten, so the + # default INSTALL_COMMAND fails. We need to disable the default + # INSTALL_COMMAND. + list(APPEND + BROTLI_EP_OPTIONS + INSTALL_COMMAND + ${CMAKE_COMMAND} + -E + true) + + set(BROTLI_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/brotli_ep-prefix/src/brotli_ep-build) + set(BROTLI_BUILD_LIBS + "${BROTLI_BUILD_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlienc-static${CMAKE_STATIC_LIBRARY_SUFFIX}" + "${BROTLI_BUILD_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlidec-static${CMAKE_STATIC_LIBRARY_SUFFIX}" + "${BROTLI_BUILD_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlicommon-static${CMAKE_STATIC_LIBRARY_SUFFIX}" + ) + endif() + externalproject_add(brotli_ep ${EP_COMMON_OPTIONS} URL ${BROTLI_SOURCE_URL} @@ -1414,7 +1460,20 @@ macro(build_brotli) "${BROTLI_STATIC_LIBRARY_COMMON}" ${BROTLI_BUILD_BYPRODUCTS} CMAKE_ARGS ${BROTLI_CMAKE_ARGS} - STEP_TARGETS headers_copy) + STEP_TARGETS headers_copy ${BROTLI_EP_OPTIONS}) + + if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") + # Copy the libraries to our install directory manually. + set(BROTLI_BUILD_INCLUDE_DIR + ${CMAKE_CURRENT_BINARY_DIR}/brotli_ep-prefix/src/brotli_ep/c/include/brotli) + add_custom_command(TARGET brotli_ep + POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different + ${BROTLI_BUILD_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}*${CMAKE_STATIC_LIBRARY_SUFFIX} + ${BROTLI_LIB_DIR} + COMMAND ${CMAKE_COMMAND} -E copy_directory + ${BROTLI_BUILD_INCLUDE_DIR} ${BROTLI_INCLUDE_DIR}/brotli) + endif() file(MAKE_DIRECTORY "${BROTLI_INCLUDE_DIR}") @@ -1657,6 +1716,9 @@ macro(build_thrift) if(DEFINED BOOST_ROOT) list(APPEND THRIFT_CMAKE_ARGS "-DBOOST_ROOT=${BOOST_ROOT}") endif() + if(DEFINED Boost_INCLUDE_DIR) + list(APPEND THRIFT_CMAKE_ARGS "-DBoost_INCLUDE_DIR=${Boost_INCLUDE_DIR}") + endif() if(DEFINED Boost_NAMESPACE) list(APPEND THRIFT_CMAKE_ARGS "-DBoost_NAMESPACE=${Boost_NAMESPACE}") endif() @@ -1798,6 +1860,36 @@ macro(build_protobuf) add_dependencies(arrow::protobuf::protoc protobuf_ep) list(APPEND ARROW_BUNDLED_STATIC_LIBS arrow::protobuf::libprotobuf) + + if(CMAKE_CROSSCOMPILING) + # If we are cross compiling, we need to build protoc for the host + # system also, as it is used when building Arrow + # We do this by calling CMake as a child process + # with CXXFLAGS / CFLAGS and CMake flags cleared. + set(PROTOBUF_HOST_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/protobuf_ep_host-install") + set(PROTOBUF_HOST_COMPILER "${PROTOBUF_HOST_PREFIX}/bin/protoc") + + set(PROTOBUF_HOST_CMAKE_ARGS + "-DCMAKE_CXX_FLAGS=" + "-DCMAKE_C_FLAGS=" + "-DCMAKE_INSTALL_PREFIX=${PROTOBUF_HOST_PREFIX}" + -Dprotobuf_BUILD_TESTS=OFF + -Dprotobuf_DEBUG_POSTFIX=) + + externalproject_add(protobuf_ep_host + ${EP_COMMON_OPTIONS} + CMAKE_ARGS ${PROTOBUF_HOST_CMAKE_ARGS} + BUILD_BYPRODUCTS "${PROTOBUF_HOST_COMPILER}" + BUILD_IN_SOURCE 1 + URL ${PROTOBUF_SOURCE_URL} + URL_HASH "SHA256=${ARROW_PROTOBUF_BUILD_SHA256_CHECKSUM}") + + add_executable(arrow::protobuf::host_protoc IMPORTED) + set_target_properties(arrow::protobuf::host_protoc + PROPERTIES IMPORTED_LOCATION "${PROTOBUF_HOST_COMPILER}") + + add_dependencies(arrow::protobuf::host_protoc protobuf_ep_host) + endif() endmacro() if(ARROW_WITH_PROTOBUF) @@ -1862,7 +1954,11 @@ if(ARROW_WITH_PROTOBUF) else() set(ARROW_PROTOBUF_LIBPROTOC protobuf::libprotoc) endif() - if(TARGET arrow::protobuf::protoc) + if(TARGET arrow::protobuf::host_protoc) + # make sure host protoc is used for compiling protobuf files + # during build of e.g. orc + set(ARROW_PROTOBUF_PROTOC arrow::protobuf::host_protoc) + elseif(TARGET arrow::protobuf::protoc) set(ARROW_PROTOBUF_PROTOC arrow::protobuf::protoc) else() if(NOT TARGET protobuf::protoc) @@ -2164,8 +2260,15 @@ function(build_gtest) if(APPLE) string(APPEND CMAKE_CXX_FLAGS " -Wno-unused-value" " -Wno-ignored-attributes") endif() - set(BUILD_SHARED_LIBS ON) - set(BUILD_STATIC_LIBS OFF) + # If we're building static libs for Emscripten, we need to build *everything* as + # static libs. + if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") + set(BUILD_SHARED_LIBS OFF) + set(BUILD_STATIC_LIBS ON) + else() + set(BUILD_SHARED_LIBS ON) + set(BUILD_STATIC_LIBS OFF) + endif() # We need to use "cache" variable to override the default # INSTALL_GTEST option by this value. See also: # https://cmake.org/cmake/help/latest/policy/CMP0077.html @@ -2403,37 +2506,58 @@ endif() macro(build_zlib) message(STATUS "Building ZLIB from source") - set(ZLIB_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/zlib_ep/src/zlib_ep-install") - if(MSVC) - if(${UPPERCASE_BUILD_TYPE} STREQUAL "DEBUG") - set(ZLIB_STATIC_LIB_NAME zlibstaticd.lib) - else() - set(ZLIB_STATIC_LIB_NAME zlibstatic.lib) + + # ensure zlib is built with -fpic + # and make sure that the build finds the version in Emscripten ports + # - n.b. the actual linking happens because -sUSE_ZLIB=1 is + # set in the compiler variables, but cmake expects + # it to exist at configuration time if we aren't building it as + # bundled. We need to do this for all packages + # not just zlib as some depend on zlib, but we don't rebuild + # if it exists already + if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") + # build zlib using Emscripten ports + if(NOT EXISTS ${EMSCRIPTEN_SYSROOT}/lib/wasm32-emscripten/pic/libz.a) + execute_process(COMMAND embuilder --pic --force build zlib) endif() + add_library(ZLIB::ZLIB STATIC IMPORTED) + set_property(TARGET ZLIB::ZLIB + PROPERTY IMPORTED_LOCATION + "${EMSCRIPTEN_SYSROOT}/lib/wasm32-emscripten/pic/libz.a") + list(APPEND ARROW_BUNDLED_STATIC_LIBS ZLIB::ZLIB) else() - set(ZLIB_STATIC_LIB_NAME libz.a) - endif() - set(ZLIB_STATIC_LIB "${ZLIB_PREFIX}/lib/${ZLIB_STATIC_LIB_NAME}") - set(ZLIB_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} "-DCMAKE_INSTALL_PREFIX=${ZLIB_PREFIX}") + set(ZLIB_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/zlib_ep/src/zlib_ep-install") + if(MSVC) + if(${UPPERCASE_BUILD_TYPE} STREQUAL "DEBUG") + set(ZLIB_STATIC_LIB_NAME zlibstaticd.lib) + else() + set(ZLIB_STATIC_LIB_NAME zlibstatic.lib) + endif() + else() + set(ZLIB_STATIC_LIB_NAME libz.a) + endif() + set(ZLIB_STATIC_LIB "${ZLIB_PREFIX}/lib/${ZLIB_STATIC_LIB_NAME}") + set(ZLIB_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} "-DCMAKE_INSTALL_PREFIX=${ZLIB_PREFIX}") - externalproject_add(zlib_ep - ${EP_COMMON_OPTIONS} - URL ${ZLIB_SOURCE_URL} - URL_HASH "SHA256=${ARROW_ZLIB_BUILD_SHA256_CHECKSUM}" - BUILD_BYPRODUCTS "${ZLIB_STATIC_LIB}" - CMAKE_ARGS ${ZLIB_CMAKE_ARGS}) + externalproject_add(zlib_ep + ${EP_COMMON_OPTIONS} + URL ${ZLIB_SOURCE_URL} + URL_HASH "SHA256=${ARROW_ZLIB_BUILD_SHA256_CHECKSUM}" + BUILD_BYPRODUCTS "${ZLIB_STATIC_LIB}" + CMAKE_ARGS ${ZLIB_CMAKE_ARGS}) - file(MAKE_DIRECTORY "${ZLIB_PREFIX}/include") + file(MAKE_DIRECTORY "${ZLIB_PREFIX}/include") - add_library(ZLIB::ZLIB STATIC IMPORTED) - set(ZLIB_LIBRARIES ${ZLIB_STATIC_LIB}) - set(ZLIB_INCLUDE_DIRS "${ZLIB_PREFIX}/include") - set_target_properties(ZLIB::ZLIB PROPERTIES IMPORTED_LOCATION ${ZLIB_LIBRARIES}) - target_include_directories(ZLIB::ZLIB BEFORE INTERFACE "${ZLIB_INCLUDE_DIRS}") + add_library(ZLIB::ZLIB STATIC IMPORTED) + set(ZLIB_LIBRARIES ${ZLIB_STATIC_LIB}) + set(ZLIB_INCLUDE_DIRS "${ZLIB_PREFIX}/include") + set_target_properties(ZLIB::ZLIB PROPERTIES IMPORTED_LOCATION ${ZLIB_LIBRARIES}) + target_include_directories(ZLIB::ZLIB BEFORE INTERFACE "${ZLIB_INCLUDE_DIRS}") - add_dependencies(ZLIB::ZLIB zlib_ep) + add_dependencies(ZLIB::ZLIB zlib_ep) + list(APPEND ARROW_BUNDLED_STATIC_LIBS ZLIB::ZLIB) + endif() - list(APPEND ARROW_BUNDLED_STATIC_LIBS ZLIB::ZLIB) set(ZLIB_VENDORED TRUE) endmacro() @@ -4390,6 +4514,10 @@ macro(build_orc) "-DPROTOBUF_LIBRARY=$" "-DPROTOC_LIBRARY=$" "-DSNAPPY_HOME=${ORC_SNAPPY_ROOT}" + "-DSNAPPY_LIBRARY=$" + "-DLZ4_LIBRARY=$" + "-DLZ4_STATIC_LIBRARY=$" + "-DLZ4_INCLUDE_DIR=${ORC_LZ4_ROOT}/include" "-DSNAPPY_INCLUDE_DIR=${ORC_SNAPPY_INCLUDE_DIR}" "-DZSTD_HOME=${ORC_ZSTD_ROOT}" "-DZSTD_INCLUDE_DIR=$" diff --git a/cpp/src/arrow/array/array_dict_test.cc b/cpp/src/arrow/array/array_dict_test.cc index 4ae9e3d6dcbfc..22d6d1fc5ae92 100644 --- a/cpp/src/arrow/array/array_dict_test.cc +++ b/cpp/src/arrow/array/array_dict_test.cc @@ -1129,7 +1129,7 @@ TEST(TestDictionary, Validate) { arr = std::make_shared(dict_type, indices, MakeArray(invalid_data)); ASSERT_RAISES(Invalid, arr->ValidateFull()); -#if !defined(__APPLE__) && !defined(ARROW_VALGRIND) +#if !defined(__APPLE__) && !defined(ARROW_VALGRIND) && !defined(__EMSCRIPTEN__) // GH-35712: ASSERT_DEATH would make testing slow on macOS. ASSERT_DEATH( { diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc index 26289a7f787e1..c7dbdef2436c3 100644 --- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc @@ -1988,6 +1988,11 @@ TYPED_TEST(TestBaseBinaryKernels, ExtractRegexInvalid) { #endif TYPED_TEST(TestStringKernels, Strptime) { +#ifdef __EMSCRIPTEN__ + GTEST_SKIP() << "Skipping some strptime tests due to emscripten bug " + "https://github.com/emscripten-core/emscripten/issues/20466"; +#endif + std::string input1 = R"(["5/1/2020", null, null, "12/13/1900", null])"; std::string input2 = R"(["5-1-2020", "12/13/1900"])"; std::string input3 = R"(["5/1/2020", "AA/BB/CCCC"])"; @@ -2008,6 +2013,7 @@ TYPED_TEST(TestStringKernels, Strptime) { this->CheckUnary("strptime", input4, unit, output4, &options); options.format = "%m/%d/%Y %%z"; + // emscripten bug https://github.com/emscripten-core/emscripten/issues/20466 this->CheckUnary("strptime", input5, unit, output1, &options); options.error_is_null = false; @@ -2019,6 +2025,11 @@ TYPED_TEST(TestStringKernels, Strptime) { } TYPED_TEST(TestStringKernels, StrptimeZoneOffset) { +#ifdef __EMSCRIPTEN__ + GTEST_SKIP() + << "Emscripten bug https://github.com/emscripten-core/emscripten/issues/20467"; +#endif + if (!arrow::internal::kStrptimeSupportsZone) { GTEST_SKIP() << "strptime does not support %z on this platform"; } diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc index 8dac6525fe2e6..8da8c760ea22b 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc @@ -2143,7 +2143,10 @@ TEST_F(ScalarTemporalTest, StrftimeCLocale) { TEST_F(ScalarTemporalTest, StrftimeOtherLocale) { #ifdef _WIN32 GTEST_SKIP() << "There is a known bug in strftime for locales on Windows (ARROW-15922)"; -#else +#elif defined(__EMSCRIPTEN__) + GTEST_SKIP() << "Emscripten doesn't build with multiple locales as default"; +#endif + if (!LocaleExists("fr_FR.UTF-8")) { GTEST_SKIP() << "locale 'fr_FR.UTF-8' doesn't exist on this system"; } @@ -2155,10 +2158,12 @@ TEST_F(ScalarTemporalTest, StrftimeOtherLocale) { ["01 janvier 1970 00:00:59,123", "18 août 2021 15:11:50,456", null])"; CheckScalarUnary("strftime", timestamp(TimeUnit::MILLI, "UTC"), milliseconds, utf8(), expected, &options); -#endif } TEST_F(ScalarTemporalTest, StrftimeInvalidLocale) { +#ifdef __EMSCRIPTEN__ + GTEST_SKIP() << "Emscripten doesn't build with multiple locales as default"; +#endif auto options = StrftimeOptions("%d %B %Y %H:%M:%S", "nonexistent"); const char* seconds = R"(["1970-01-01T00:00:59", null])"; auto arr = ArrayFromJSON(timestamp(TimeUnit::SECOND, "UTC"), seconds); diff --git a/cpp/src/arrow/filesystem/localfs_test.cc b/cpp/src/arrow/filesystem/localfs_test.cc index f90833a88d118..b76c7ebad45db 100644 --- a/cpp/src/arrow/filesystem/localfs_test.cc +++ b/cpp/src/arrow/filesystem/localfs_test.cc @@ -138,6 +138,9 @@ TEST(FileSystemFromUri, LinkedRegisteredFactory) { } TEST(FileSystemFromUri, LoadedRegisteredFactory) { +#ifdef __EMSCRIPTEN__ + GTEST_SKIP() << "Emscripten dynamic library testing disabled"; +#endif // Since the registrar's definition is in libarrow_filesystem_example.so, // its factory will be registered only after the library is dynamically loaded. std::string path; diff --git a/cpp/src/arrow/io/file.cc b/cpp/src/arrow/io/file.cc index 00426f9957b1f..cc3a5187059e9 100644 --- a/cpp/src/arrow/io/file.cc +++ b/cpp/src/arrow/io/file.cc @@ -398,8 +398,14 @@ class MemoryMappedFile::MemoryMap ~Region() { if (data_ != nullptr) { +#ifndef __EMSCRIPTEN__ int result = munmap(data(), static_cast(size_)); + // emscripten erroneously reports failures in munmap + // https://github.com/emscripten-core/emscripten/issues/20459 ARROW_CHECK_EQ(result, 0) << "munmap failed"; +#else + munmap(data(), static_cast(size_)); +#endif } } diff --git a/cpp/src/arrow/io/file_test.cc b/cpp/src/arrow/io/file_test.cc index e7e7ba949c9fd..af414891b950e 100644 --- a/cpp/src/arrow/io/file_test.cc +++ b/cpp/src/arrow/io/file_test.cc @@ -42,6 +42,7 @@ #include "arrow/status.h" #include "arrow/testing/gtest_util.h" #include "arrow/testing/util.h" +#include "arrow/util/config.h" #include "arrow/util/future.h" #include "arrow/util/io_util.h" @@ -486,6 +487,10 @@ TEST_F(TestReadableFile, CustomMemoryPool) { } TEST_F(TestReadableFile, ThreadSafety) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + std::string data = "foobar"; { std::ofstream stream; @@ -540,6 +545,9 @@ class TestPipeIO : public ::testing::Test { }; TEST_F(TestPipeIO, TestWrite) { +#ifdef __EMSCRIPTEN__ + GTEST_SKIP() << "Pipes not supported on Emscripten"; +#endif std::string data1 = "test", data2 = "data!"; std::shared_ptr file; uint8_t buffer[10]; @@ -570,6 +578,9 @@ TEST_F(TestPipeIO, TestWrite) { } TEST_F(TestPipeIO, ReadableFileFails) { +#ifdef __EMSCRIPTEN__ + GTEST_SKIP() << "Pipes not supported on Emscripten"; +#endif // ReadableFile fails on non-seekable fd ASSERT_RAISES(IOError, ReadableFile::Open(pipe_.rfd.fd())); } @@ -1048,6 +1059,10 @@ TEST_F(TestMemoryMappedFile, CastableToFileInterface) { } TEST_F(TestMemoryMappedFile, ThreadSafety) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + std::string data = "foobar"; std::string path = TempFile("ipc-multithreading-test"); CreateFile(path, static_cast(data.size())); diff --git a/cpp/src/arrow/ipc/read_write_test.cc b/cpp/src/arrow/ipc/read_write_test.cc index c5075299a3e35..ff7838cc39d72 100644 --- a/cpp/src/arrow/ipc/read_write_test.cc +++ b/cpp/src/arrow/ipc/read_write_test.cc @@ -1046,6 +1046,9 @@ class RecursionLimits : public ::testing::Test, public io::MemoryMapFixture { }; TEST_F(RecursionLimits, WriteLimit) { +#ifdef __EMSCRIPTEN__ + GTEST_SKIP() << "This crashes the Emscripten runtime."; +#endif int32_t metadata_length = -1; int64_t body_length = -1; std::shared_ptr schema; @@ -1078,6 +1081,10 @@ TEST_F(RecursionLimits, ReadLimit) { // Test fails with a structured exception on Windows + Debug #if !defined(_WIN32) || defined(NDEBUG) TEST_F(RecursionLimits, StressLimit) { +#ifdef __EMSCRIPTEN__ + GTEST_SKIP() << "This crashes the Emscripten runtime."; +#endif + auto CheckDepth = [this](int recursion_depth, bool* it_works) { int32_t metadata_length = -1; int64_t body_length = -1; diff --git a/cpp/src/arrow/util/async_generator_test.cc b/cpp/src/arrow/util/async_generator_test.cc index 2b74313db279b..afb03b67209a6 100644 --- a/cpp/src/arrow/util/async_generator_test.cc +++ b/cpp/src/arrow/util/async_generator_test.cc @@ -399,6 +399,10 @@ TEST(TestAsyncUtil, MapParallelStress) { } TEST(TestAsyncUtil, MapQueuingFailStress) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + constexpr int NTASKS = 10; constexpr int NITEMS = 10; for (bool slow : {true, false}) { @@ -1872,6 +1876,10 @@ TEST(PushGenerator, DanglingProducer) { } TEST(PushGenerator, Stress) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + const int NTHREADS = 20; const int NVALUES = 2000; const int NFUTURES = NVALUES + 100; diff --git a/cpp/src/arrow/util/atfork_test.cc b/cpp/src/arrow/util/atfork_test.cc index 004e28e19514a..750f4d138793b 100644 --- a/cpp/src/arrow/util/atfork_test.cc +++ b/cpp/src/arrow/util/atfork_test.cc @@ -35,6 +35,7 @@ #include "arrow/testing/gtest_util.h" #include "arrow/util/atfork_internal.h" +#include "arrow/util/config.h" #include "arrow/util/io_util.h" #include "arrow/util/logging.h" @@ -109,6 +110,10 @@ class TestAtFork : public ::testing::Test { #ifndef _WIN32 TEST_F(TestAtFork, EmptyHandlers) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + auto handlers = std::make_shared(); RegisterAtFork(handlers); @@ -130,6 +135,10 @@ TEST_F(TestAtFork, EmptyHandlers) { } TEST_F(TestAtFork, SingleThread) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + auto handlers1 = std::make_shared(PushBefore(1), PushParentAfter(11), PushChildAfter(21)); auto handlers2 = std::make_shared(PushBefore(2), PushParentAfter(12), @@ -188,6 +197,10 @@ TEST_F(TestAtFork, SingleThread) { // https://github.com/google/sanitizers/issues/950. TEST_F(TestAtFork, MultipleThreads) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + const int kNumThreads = 5; const int kNumIterations = 40; const int kParentAfterAddend = 10000; @@ -245,6 +258,9 @@ TEST_F(TestAtFork, NestedChild) { #ifdef __APPLE__ GTEST_SKIP() << "Nested fork is not supported on macOS"; #endif +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif auto handlers1 = std::make_shared(PushBefore(1), PushParentAfter(11), PushChildAfter(21)); @@ -286,6 +302,10 @@ TEST_F(TestAtFork, NestedChild) { #ifdef _WIN32 TEST_F(TestAtFork, NoOp) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + auto handlers = std::make_shared(PushBefore(1), PushParentAfter(11), PushChildAfter(21)); diff --git a/cpp/src/arrow/util/cache_test.cc b/cpp/src/arrow/util/cache_test.cc index 6b71baa369b9b..264bfe68ec5d2 100644 --- a/cpp/src/arrow/util/cache_test.cc +++ b/cpp/src/arrow/util/cache_test.cc @@ -26,6 +26,7 @@ #include "arrow/testing/gtest_util.h" #include "arrow/util/cache_internal.h" +#include "arrow/util/config.h" namespace arrow { namespace internal { @@ -255,6 +256,10 @@ TYPED_TEST(TestMemoizeLru, Basics) { this->TestBasics(); } class TestMemoizeLruThreadSafe : public TestMemoizeLru {}; TEST_F(TestMemoizeLruThreadSafe, Threads) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + using V = IntValue; Callable c; diff --git a/cpp/src/arrow/util/cancel_test.cc b/cpp/src/arrow/util/cancel_test.cc index 45f6cde4f5579..713418f15a0cc 100644 --- a/cpp/src/arrow/util/cancel_test.cc +++ b/cpp/src/arrow/util/cancel_test.cc @@ -232,6 +232,10 @@ class SignalCancelTest : public CancelTest { }; TEST_F(SignalCancelTest, Register) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + RegisterHandler(); TriggerSignal(); @@ -239,6 +243,10 @@ TEST_F(SignalCancelTest, Register) { } TEST_F(SignalCancelTest, RegisterUnregister) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + // The signal stop source was set up but no handler was registered, // so the token shouldn't be signalled. TriggerSignal(); @@ -261,6 +269,10 @@ TEST_F(SignalCancelTest, RegisterUnregister) { #if !(defined(_WIN32) || defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER) || \ defined(THREAD_SANITIZER)) TEST_F(SignalCancelTest, ForkSafetyUnregisteredHandlers) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + RunInChild([&]() { // Child TriggerSignal(); @@ -284,6 +296,10 @@ TEST_F(SignalCancelTest, ForkSafetyUnregisteredHandlers) { } TEST_F(SignalCancelTest, ForkSafetyRegisteredHandlers) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + RegisterHandler(); RunInChild([&]() { @@ -307,6 +323,10 @@ TEST_F(SignalCancelTest, ForkSafetyRegisteredHandlers) { #endif TEST_F(CancelTest, ThreadedPollSuccess) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + constexpr int kNumThreads = 10; std::vector results(kNumThreads); @@ -339,6 +359,10 @@ TEST_F(CancelTest, ThreadedPollSuccess) { } TEST_F(CancelTest, ThreadedPollCancel) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + constexpr int kNumThreads = 10; std::vector results(kNumThreads); diff --git a/cpp/src/arrow/util/counting_semaphore_test.cc b/cpp/src/arrow/util/counting_semaphore_test.cc index a5fa9f6bde891..4de11ce852a03 100644 --- a/cpp/src/arrow/util/counting_semaphore_test.cc +++ b/cpp/src/arrow/util/counting_semaphore_test.cc @@ -22,12 +22,17 @@ #include #include "arrow/testing/gtest_util.h" +#include "arrow/util/config.h" #include "gtest/gtest.h" namespace arrow { namespace util { TEST(CountingSemaphore, Basic) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + CountingSemaphore semaphore; std::atomic acquired{false}; std::atomic started{false}; @@ -50,6 +55,10 @@ TEST(CountingSemaphore, Basic) { } TEST(CountingSemaphore, CloseAborts) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + CountingSemaphore semaphore; std::atomic cleanup{false}; std::thread acquirer([&] { @@ -64,6 +73,10 @@ TEST(CountingSemaphore, CloseAborts) { } TEST(CountingSemaphore, Stress) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + constexpr uint32_t NTHREADS = 10; CountingSemaphore semaphore; std::vector max_allowed_cases = {1, 3}; diff --git a/cpp/src/arrow/util/future_test.cc b/cpp/src/arrow/util/future_test.cc index 87891e48efa5e..2ed2b69aed524 100644 --- a/cpp/src/arrow/util/future_test.cc +++ b/cpp/src/arrow/util/future_test.cc @@ -415,6 +415,10 @@ TEST(FutureRefTest, HeadRemoved) { } TEST(FutureStressTest, Callback) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + #ifdef ARROW_VALGRIND const int NITERS = 2; #else @@ -471,6 +475,10 @@ TEST(FutureStressTest, Callback) { } TEST(FutureStressTest, TryAddCallback) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + for (unsigned int n = 0; n < 1; n++) { auto fut = Future<>::Make(); std::atomic callbacks_added(0); @@ -527,6 +535,10 @@ TEST(FutureStressTest, TryAddCallback) { } TEST(FutureStressTest, DeleteAfterWait) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + constexpr int kNumTasks = 100; for (int i = 0; i < kNumTasks; i++) { { @@ -1543,6 +1555,10 @@ TEST(FnOnceTest, MoveOnlyDataType) { } TEST(FutureTest, MatcherExamples) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + EXPECT_THAT(Future::MakeFinished(Status::Invalid("arbitrary error")), Finishes(Raises(StatusCode::Invalid))); diff --git a/cpp/src/arrow/util/io_util.cc b/cpp/src/arrow/util/io_util.cc index 5928ebcb88959..d48f9eb97d562 100644 --- a/cpp/src/arrow/util/io_util.cc +++ b/cpp/src/arrow/util/io_util.cc @@ -95,6 +95,7 @@ #include "arrow/result.h" #include "arrow/util/atfork_internal.h" #include "arrow/util/checked_cast.h" +#include "arrow/util/config.h" #include "arrow/util/io_util.h" #include "arrow/util/logging.h" #include "arrow/util/mutex.h" @@ -1485,6 +1486,7 @@ Status MemoryMapRemap(void* addr, size_t old_size, size_t new_size, int fildes, } Status MemoryAdviseWillNeed(const std::vector& regions) { +#ifndef __EMSCRIPTEN__ const auto page_size = static_cast(GetPageSize()); DCHECK_GT(page_size, 0); const size_t page_mask = ~(page_size - 1); @@ -1543,6 +1545,9 @@ Status MemoryAdviseWillNeed(const std::vector& regions) { #else return Status::OK(); #endif +#else + return Status::OK(); +#endif } // @@ -2067,7 +2072,9 @@ Status SendSignal(int signum) { } Status SendSignalToThread(int signum, uint64_t thread_id) { -#ifdef _WIN32 +#ifndef ARROW_ENABLE_THREADING + return Status::NotImplemented("Can't send signal with no threads"); +#elif defined(_WIN32) return Status::NotImplemented("Cannot send signal to specific thread on Windows"); #else // Have to use a C-style cast because pthread_t can be a pointer *or* integer type diff --git a/cpp/src/arrow/util/io_util_test.cc b/cpp/src/arrow/util/io_util_test.cc index d0569c799561f..73213bf9ce48a 100644 --- a/cpp/src/arrow/util/io_util_test.cc +++ b/cpp/src/arrow/util/io_util_test.cc @@ -40,6 +40,7 @@ #include "arrow/buffer.h" #include "arrow/testing/gtest_util.h" #include "arrow/util/bit_util.h" +#include "arrow/util/config.h" #include "arrow/util/cpu_info.h" #include "arrow/util/io_util.h" #include "arrow/util/logging.h" @@ -146,8 +147,8 @@ TEST(MemoryAdviseWillNeed, Basics) { ASSERT_OK(MemoryAdviseWillNeed({{addr1, 0}, {addr2 + 1, 0}})); // Should probably fail - // (but on Windows, MemoryAdviseWillNeed can be a no-op) -#ifndef _WIN32 + // (but on Windows or Emscripten, MemoryAdviseWillNeed can be a no-op) +#if !defined(_WIN32) && !defined(__EMSCRIPTEN__) ASSERT_RAISES(IOError, MemoryAdviseWillNeed({{nullptr, std::numeric_limits::max()}})); #endif @@ -368,6 +369,10 @@ TestSelfPipe* TestSelfPipe::instance_; TEST_F(TestSelfPipe, MakeAndShutdown) {} TEST_F(TestSelfPipe, WaitAndSend) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + StartReading(); SleepABit(); AssertPayloadsEventually({}); @@ -380,6 +385,10 @@ TEST_F(TestSelfPipe, WaitAndSend) { } TEST_F(TestSelfPipe, SendAndWait) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + self_pipe_->Send(123456789123456789ULL); StartReading(); SleepABit(); @@ -390,6 +399,10 @@ TEST_F(TestSelfPipe, SendAndWait) { } TEST_F(TestSelfPipe, WaitAndShutdown) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + StartReading(); SleepABit(); ASSERT_OK(self_pipe_->Shutdown()); @@ -401,6 +414,9 @@ TEST_F(TestSelfPipe, WaitAndShutdown) { } TEST_F(TestSelfPipe, ShutdownAndWait) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif self_pipe_->Send(123456789123456789ULL); ASSERT_OK(self_pipe_->Shutdown()); StartReading(); @@ -413,6 +429,10 @@ TEST_F(TestSelfPipe, ShutdownAndWait) { } TEST_F(TestSelfPipe, WaitAndSendFromSignal) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + signal_received_.store(0); SignalHandlerGuard guard(SIGINT, &HandleSignal); @@ -431,6 +451,10 @@ TEST_F(TestSelfPipe, WaitAndSendFromSignal) { } TEST_F(TestSelfPipe, SendFromSignalAndWait) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + signal_received_.store(0); SignalHandlerGuard guard(SIGINT, &HandleSignal); @@ -450,6 +474,10 @@ TEST_F(TestSelfPipe, SendFromSignalAndWait) { #if !(defined(_WIN32) || defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER) || \ defined(THREAD_SANITIZER)) TEST_F(TestSelfPipe, ForkSafety) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + self_pipe_->Send(123456789123456789ULL); auto child_pid = fork(); @@ -1025,6 +1053,9 @@ TEST_F(TestSendSignal, Generic) { } TEST_F(TestSendSignal, ToThread) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "SendSignalToThread requires threading"; +#endif #ifdef _WIN32 uint64_t dummy_thread_id = 42; ASSERT_RAISES(NotImplemented, SendSignalToThread(SIGINT, dummy_thread_id)); diff --git a/cpp/src/arrow/util/mutex.cc b/cpp/src/arrow/util/mutex.cc index 9f82ad45b0740..bbf2a9a93e692 100644 --- a/cpp/src/arrow/util/mutex.cc +++ b/cpp/src/arrow/util/mutex.cc @@ -24,6 +24,7 @@ #include #endif +#include "arrow/util/config.h" #include "arrow/util/logging.h" namespace arrow { @@ -35,9 +36,12 @@ struct Mutex::Impl { Mutex::Guard::Guard(Mutex* locked) : locked_(locked, [](Mutex* locked) { +#ifdef ARROW_ENABLE_THREADING DCHECK(!locked->impl_->mutex_.try_lock()); +#endif locked->impl_->mutex_.unlock(); - }) {} + }) { +} Mutex::Guard Mutex::TryLock() { DCHECK_NE(impl_, nullptr); diff --git a/cpp/src/arrow/util/rle_encoding_test.cc b/cpp/src/arrow/util/rle_encoding_test.cc index 01d1ffd767fc9..26984e5f7735d 100644 --- a/cpp/src/arrow/util/rle_encoding_test.cc +++ b/cpp/src/arrow/util/rle_encoding_test.cc @@ -214,7 +214,14 @@ TEST(BitUtil, RoundTripIntValues) { void ValidateRle(const std::vector& values, int bit_width, uint8_t* expected_encoding, int expected_len) { const int len = 64 * 1024; +#ifdef __EMSCRIPTEN__ + // don't make this on the stack as it is + // too big for emscripten + std::vector buffer_vec(static_cast(len)); + uint8_t* buffer = buffer_vec.data(); +#else uint8_t buffer[len]; +#endif EXPECT_LE(expected_len, len); RleEncoder encoder(buffer, len, bit_width); @@ -227,7 +234,7 @@ void ValidateRle(const std::vector& values, int bit_width, if (expected_len != -1) { EXPECT_EQ(encoded_len, expected_len); } - if (expected_encoding != NULL) { + if (expected_encoding != NULL && encoded_len == expected_len) { EXPECT_EQ(memcmp(buffer, expected_encoding, encoded_len), 0); } @@ -256,7 +263,14 @@ void ValidateRle(const std::vector& values, int bit_width, // the returned values are not all the same bool CheckRoundTrip(const std::vector& values, int bit_width) { const int len = 64 * 1024; +#ifdef __EMSCRIPTEN__ + // don't make this on the stack as it is + // too big for emscripten + std::vector buffer_vec(static_cast(len)); + uint8_t* buffer = buffer_vec.data(); +#else uint8_t buffer[len]; +#endif RleEncoder encoder(buffer, len, bit_width); for (size_t i = 0; i < values.size(); ++i) { bool result = encoder.Put(values[i]); diff --git a/cpp/src/arrow/util/value_parsing_test.cc b/cpp/src/arrow/util/value_parsing_test.cc index 30c5e6aae74ba..92d727019aaf5 100644 --- a/cpp/src/arrow/util/value_parsing_test.cc +++ b/cpp/src/arrow/util/value_parsing_test.cc @@ -794,6 +794,11 @@ TEST(TimestampParser, StrptimeZoneOffset) { if (!kStrptimeSupportsZone) { GTEST_SKIP() << "strptime does not support %z on this platform"; } +#ifdef __EMSCRIPTEN__ + GTEST_SKIP() << "Test temporarily disabled due to emscripten bug " + "https://github.com/emscripten-core/emscripten/issues/20467 "; +#endif + std::string format = "%Y-%d-%m %H:%M:%S%z"; auto parser = TimestampParser::MakeStrptime(format); diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index cf46cb8c6ad70..f6ba03552dcce 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -1164,6 +1164,14 @@ tasks: flags: "-e ARROW_CSV=ON -e ARROW_PARQUET=ON" image: ubuntu-cpp-minimal + test-ubuntu-22.04-cpp-emscripten: + ci: github + template: docker-tests/github.linux.yml + params: + env: + UBUNTU: 22.04 + image: ubuntu-cpp-emscripten + {% for python_version in ["3.8", "3.9", "3.10", "3.11", "3.12"] %} test-conda-python-{{ python_version }}: ci: github diff --git a/docker-compose.yml b/docker-compose.yml index 9b0610fe553b5..46717557bc337 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -157,6 +157,7 @@ x-hierarchy: - ubuntu-csharp - ubuntu-cpp-sanitizer - ubuntu-cpp-thread-sanitizer + - ubuntu-cpp-emscripten - ubuntu-r-sanitizer - ubuntu-r-valgrind - ubuntu-swift @@ -652,6 +653,31 @@ services: ARROW_USE_TSAN: "ON" command: *cpp-command + ubuntu-cpp-emscripten: + # Usage: + # docker-compose build ubuntu-cpp-emscripten + # docker-compose run --rm ubuntu-cpp-emscripten + # Parameters: + # ARCH: amd64, arm64v8, ... + # UBUNTU: 22.04 + image: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cpp + build: + context: . + dockerfile: ci/docker/ubuntu-${UBUNTU}-cpp.dockerfile + cache_from: + - ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cpp + args: + arch: ${ARCH} + clang_tools: ${CLANG_TOOLS} + llvm: ${LLVM} + shm_size: *shm-size + volumes: *ubuntu-volumes + environment: + <<: [*common, *ccache, *sccache, *cpp] + ARROW_EMSCRIPTEN: "ON" + UBUNTU: + command: *cpp-command + fedora-cpp: # Usage: # docker-compose build fedora-cpp diff --git a/docs/source/developers/cpp/emscripten.rst b/docs/source/developers/cpp/emscripten.rst new file mode 100644 index 0000000000000..b4c563aae1a3b --- /dev/null +++ b/docs/source/developers/cpp/emscripten.rst @@ -0,0 +1,99 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + + +.. highlight:: console + +.. _developers-cpp-emscripten: +=============================================== +Cross compiling for WebAssembly with Emscripten +=============================================== + +Prerequisites +------------- +You need CMake and compilers etc. installed as per the normal build instructions. Before building with Emscripten, you also need to install Emscripten and +activate it using the commands below (see https://emscripten.org/docs/getting_started/downloads.html for details). + +.. code:: shell + + git clone https://github.com/emscripten-core/emsdk.git + cd emsdk + # replace with the desired EMSDK version. + # e.g. for Pyodide 0.24, you need EMSDK version 3.1.45 + ./emsdk install + ./emsdk activate + source ./emsdk_env.sh + +If you want to build PyArrow for `Pyodide `_, you +need ``pyodide-build`` installed via ``pip``, and to be running with the +same version of Python that Pyodide is built for, along with the same +versions of emsdk tools. + +.. code:: shell + + # install Pyodide build tools. + # e.g. for version 0.24 of Pyodide: + pip install pyodide-build==0.24 + +Then build with the ``ninja-release-emscripten`` CMake preset, +like below: + +.. code:: shell + + emcmake cmake --preset "ninja-release-emscripten" + ninja install + +This will install a built static library version of ``libarrow`` it into the +Emscripten sysroot cache, meaning you can build things that depend on it +and they will find ``libarrow``. + +e.g. if you want to build for Pyodide, run the commands above, and then +go to ``arrow/python`` and run + +.. code:: shell + + pyodide build + +It should make a wheel targeting the currently enabled version of +Pyodide (i.e. the version corresponding to the currently installed +``pyodide-build``) in the ``dist`` subdirectory. + + +Manual Build +------------ + +If you want to manually build for Emscripten, take a look at the +``CMakePresets.json`` file in the ``arrow/cpp`` directory for a list of things +you will need to override. In particular you will need: + +#. Build dependencies set to ``BUNDLED``, so it uses properly cross + compiled build dependencies. + +#. ``CMAKE_TOOLCHAIN_FILE`` set by using ``emcmake cmake`` instead of just ``cmake``. + +#. You will quite likely need to set ``ARROW_ENABLE_THREADING`` to ``OFF`` + for builds targeting single threaded Emscripten environments such as + Pyodide. + +#. ``ARROW_FLIGHT`` and anything else that uses network probably won't + work. + +#. ``ARROW_JEMALLOC`` and ``ARROW_MIMALLOC`` again probably need to be + ``OFF`` + +#. ``ARROW_BUILD_STATIC`` set to ``ON`` and ``ARROW_BUILD_SHARED`` set to + ``OFF`` is most likely to work. diff --git a/docs/source/developers/cpp/index.rst b/docs/source/developers/cpp/index.rst index 36c9778bea1b0..603e1607dc543 100644 --- a/docs/source/developers/cpp/index.rst +++ b/docs/source/developers/cpp/index.rst @@ -27,5 +27,6 @@ C++ Development building development windows + emscripten conventions fuzzing