Skip to content

Commit

Permalink
Support finding installed arrow libraries from system (#9992)
Browse files Browse the repository at this point in the history
Summary:
Gluten project needs to pre-build arrow before building velox. And it's also possible that arrow
libs have already been installed, e.g., via vcpkg. Then, velox doesn't need to build arrow from
source again.

Pull Request resolved: #9992

Reviewed By: xiaoxmeng

Differential Revision: D59122226

Pulled By: bikramSingh91

fbshipit-source-id: 8a0320cc2a57cec3bf3aa894ccc1e8f8c1fb175b
  • Loading branch information
PHILO-HE authored and facebook-github-bot committed Jun 28, 2024
1 parent fd955bf commit 0d80228
Show file tree
Hide file tree
Showing 8 changed files with 166 additions and 53 deletions.
1 change: 1 addition & 0 deletions .github/workflows/linux-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ jobs:
Protobuf_SOURCE: BUNDLED # can be removed after #10134 is merged
simdjson_SOURCE: BUNDLED
xsimd_SOURCE: BUNDLED
Arrow_SOURCE: AUTO
CUDA_VERSION: "12.4"
steps:
- uses: actions/checkout@v4
Expand Down
37 changes: 37 additions & 0 deletions CMake/FindArrow.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

find_library(ARROW_LIB libarrow.a)
find_library(PARQUET_LIB libparquet.a)
find_library(ARROW_TESTING_LIB libarrow_testing.a)
if("${ARROW_LIB}" STREQUAL "ARROW_LIB-NOTFOUND"
# OR "${PARQUET_LIB}" STREQUAL "PARQUET_LIB-NOTFOUND"
OR "${ARROW_TESTING_LIB}" STREQUAL "ARROW_TESTING_LIB-NOTFOUND")
set(Arrow_FOUND false)
return()
endif()
set(Arrow_FOUND true)

add_library(arrow STATIC IMPORTED GLOBAL)
add_library(parquet STATIC IMPORTED GLOBAL)
add_library(arrow_testing STATIC IMPORTED GLOBAL)

find_path(ARROW_INCLUDE_PATH arrow/api.h)
set_target_properties(
arrow arrow_testing parquet PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
${ARROW_INCLUDE_PATH})
set_target_properties(arrow PROPERTIES IMPORTED_LOCATION ${ARROW_LIB})
set_target_properties(parquet PROPERTIES IMPORTED_LOCATION ${PARQUET_LIB})
set_target_properties(arrow_testing PROPERTIES IMPORTED_LOCATION
${ARROW_TESTING_LIB})
103 changes: 56 additions & 47 deletions third_party/cmake_modules/FindThrift.cmake → CMake/FindThrift.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -12,27 +12,26 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# - Find Thrift (a cross platform RPC lib/tool)
# * Find Thrift (a cross platform RPC lib/tool)
#
# Variables used by this module, they can change the default behaviour and need
# to be set before calling find_package:
#
# Thrift_ROOT - When set, this path is inspected instead of standard library
# locations as the root of the Thrift installation.
# The environment variable THRIFT_HOME overrides this variable.
# Thrift_ROOT - When set, this path is inspected instead of standard library
# locations as the root of the Thrift installation. The environment variable
# THRIFT_HOME overrides this variable.
#
# This module defines
# Thrift_FOUND, whether Thrift is found or not
# Thrift_COMPILER_FOUND, whether Thrift compiler is found or not
# This module defines Thrift_FOUND, whether Thrift is found or not
# Thrift_COMPILER_FOUND, whether Thrift compiler is found or not
#
# thrift::thrift, a library target to use Thrift
# thrift::compiler, a executable target to use Thrift compiler
# thrift::thrift, a library target to use Thrift thrift::compiler, a executable
# target to use Thrift compiler

function(EXTRACT_THRIFT_VERSION)
if(THRIFT_INCLUDE_DIR)
file(READ "${THRIFT_INCLUDE_DIR}/thrift/config.h" THRIFT_CONFIG_H_CONTENT)
string(REGEX MATCH "#define PACKAGE_VERSION \"[0-9.]+\"" THRIFT_VERSION_DEFINITION
"${THRIFT_CONFIG_H_CONTENT}")
string(REGEX MATCH "#define PACKAGE_VERSION \"[0-9.]+\""
THRIFT_VERSION_DEFINITION "${THRIFT_CONFIG_H_CONTENT}")
string(REGEX MATCH "[0-9.]+" Thrift_VERSION "${THRIFT_VERSION_DEFINITION}")
set(Thrift_VERSION
"${Thrift_VERSION}"
Expand Down Expand Up @@ -66,14 +65,16 @@ set(THRIFT_LIB_NAME_BASE "thrift${THRIFT_MSVC_LIB_SUFFIX}")
if(ARROW_THRIFT_USE_SHARED)
set(THRIFT_LIB_NAMES thrift)
if(CMAKE_IMPORT_LIBRARY_SUFFIX)
list(APPEND
THRIFT_LIB_NAMES
"${CMAKE_IMPORT_LIBRARY_PREFIX}${THRIFT_LIB_NAME_BASE}${CMAKE_IMPORT_LIBRARY_SUFFIX}"
list(
APPEND
THRIFT_LIB_NAMES
"${CMAKE_IMPORT_LIBRARY_PREFIX}${THRIFT_LIB_NAME_BASE}${CMAKE_IMPORT_LIBRARY_SUFFIX}"
)
endif()
list(APPEND
THRIFT_LIB_NAMES
"${CMAKE_SHARED_LIBRARY_PREFIX}${THRIFT_LIB_NAME_BASE}${CMAKE_SHARED_LIBRARY_SUFFIX}"
list(
APPEND
THRIFT_LIB_NAMES
"${CMAKE_SHARED_LIBRARY_PREFIX}${THRIFT_LIB_NAME_BASE}${CMAKE_SHARED_LIBRARY_SUFFIX}"
)
else()
set(THRIFT_LIB_NAMES
Expand All @@ -82,40 +83,47 @@ else()
endif()

if(Thrift_ROOT)
find_library(THRIFT_LIB
NAMES ${THRIFT_LIB_NAMES}
PATHS ${Thrift_ROOT}
PATH_SUFFIXES "lib/${CMAKE_LIBRARY_ARCHITECTURE}" "lib")
find_path(THRIFT_INCLUDE_DIR thrift/Thrift.h
PATHS ${Thrift_ROOT}
PATH_SUFFIXES "include")
find_program(THRIFT_COMPILER thrift
PATHS ${Thrift_ROOT}
PATH_SUFFIXES "bin")
find_library(
THRIFT_LIB
NAMES ${THRIFT_LIB_NAMES}
PATHS ${Thrift_ROOT}
PATH_SUFFIXES "lib/${CMAKE_LIBRARY_ARCHITECTURE}" "lib")
find_path(
THRIFT_INCLUDE_DIR thrift/Thrift.h
PATHS ${Thrift_ROOT}
PATH_SUFFIXES "include")
find_program(
THRIFT_COMPILER thrift
PATHS ${Thrift_ROOT}
PATH_SUFFIXES "bin")
extract_thrift_version()
else()
# THRIFT-4760: The pkgconfig files are currently only installed when using autotools.
# Starting with 0.13, they are also installed for the CMake-based installations of Thrift.
# THRIFT-4760: The pkgconfig files are currently only installed when using
# autotools. Starting with 0.13, they are also installed for the CMake-based
# installations of Thrift.
find_package(PkgConfig QUIET)
pkg_check_modules(THRIFT_PC thrift)
if(THRIFT_PC_FOUND)
set(THRIFT_INCLUDE_DIR "${THRIFT_PC_INCLUDEDIR}")

list(APPEND THRIFT_PC_LIBRARY_DIRS "${THRIFT_PC_LIBDIR}")

find_library(THRIFT_LIB
NAMES ${THRIFT_LIB_NAMES}
PATHS ${THRIFT_PC_LIBRARY_DIRS}
NO_DEFAULT_PATH)
find_program(THRIFT_COMPILER thrift
HINTS ${THRIFT_PC_PREFIX}
NO_DEFAULT_PATH
PATH_SUFFIXES "bin")
find_library(
THRIFT_LIB
NAMES ${THRIFT_LIB_NAMES}
PATHS ${THRIFT_PC_LIBRARY_DIRS}
NO_DEFAULT_PATH)
find_program(
THRIFT_COMPILER thrift
HINTS ${THRIFT_PC_PREFIX}
NO_DEFAULT_PATH
PATH_SUFFIXES "bin")
set(Thrift_VERSION ${THRIFT_PC_VERSION})
else()
find_library(THRIFT_LIB
NAMES ${THRIFT_LIB_NAMES}
PATH_SUFFIXES "lib/${CMAKE_LIBRARY_ARCHITECTURE}" "lib")
find_library(
THRIFT_LIB
NAMES ${THRIFT_LIB_NAMES}
PATH_SUFFIXES "lib/${CMAKE_LIBRARY_ARCHITECTURE}" "lib")
find_path(THRIFT_INCLUDE_DIR thrift/Thrift.h PATH_SUFFIXES "include")
find_program(THRIFT_COMPILER thrift PATH_SUFFIXES "bin")
extract_thrift_version()
Expand All @@ -140,14 +148,15 @@ if(Thrift_FOUND)
else()
add_library(thrift::thrift STATIC IMPORTED)
endif()
set_target_properties(thrift::thrift
PROPERTIES IMPORTED_LOCATION "${THRIFT_LIB}"
INTERFACE_INCLUDE_DIRECTORIES "${THRIFT_INCLUDE_DIR}")
set_target_properties(
thrift::thrift
PROPERTIES IMPORTED_LOCATION "${THRIFT_LIB}" INTERFACE_INCLUDE_DIRECTORIES
"${THRIFT_INCLUDE_DIR}")
if(WIN32 AND NOT MSVC_TOOLCHAIN)
# We don't need this for Visual C++ because Thrift uses
# "#pragma comment(lib, "Ws2_32.lib")" in
# thrift/windows/config.h for Visual C++.
set_target_properties(thrift::thrift PROPERTIES INTERFACE_LINK_LIBRARIES "ws2_32")
# We don't need this for Visual C++ because Thrift uses "#pragma
# comment(lib, "Ws2_32.lib")" in thrift/windows/config.h for Visual C++.
set_target_properties(thrift::thrift PROPERTIES INTERFACE_LINK_LIBRARIES
"ws2_32")
endif()

if(Thrift_COMPILER_FOUND)
Expand Down
16 changes: 16 additions & 0 deletions CMake/resolve_dependency_modules/arrow.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
include_guard(GLOBAL)

add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/arrow)
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH}
"${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules/")

include(ExternalProject)
project(Arrow)

if(VELOX_ENABLE_ARROW)
find_package(Thrift)
Expand All @@ -24,6 +20,7 @@ if(VELOX_ENABLE_ARROW)
else()
set(THRIFT_SOURCE "BUNDLED")
endif()

set(ARROW_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/arrow_ep")
set(ARROW_CMAKE_ARGS
-DARROW_PARQUET=OFF
Expand Down Expand Up @@ -74,6 +71,7 @@ if(VELOX_ENABLE_ARROW)
CMAKE_ARGS ${ARROW_CMAKE_ARGS}
BUILD_BYPRODUCTS ${ARROW_LIBDIR}/libarrow.a ${ARROW_LIBDIR}/libparquet.a
${ARROW_LIBDIR}/libarrow_testing.a ${THRIFT_LIB})

add_library(arrow STATIC IMPORTED GLOBAL)
add_library(arrow_testing STATIC IMPORTED GLOBAL)
add_library(parquet STATIC IMPORTED GLOBAL)
Expand Down
6 changes: 5 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -568,5 +568,9 @@ if("${TREAT_WARNINGS_AS_ERRORS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror")
endif()

add_subdirectory(third_party)
if(VELOX_ENABLE_ARROW)
set_source(Arrow)
resolve_dependency(Arrow)
endif()

add_subdirectory(velox)
24 changes: 24 additions & 0 deletions scripts/setup-centos9.sh
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,29 @@ function install_duckdb {
fi
}

ARROW_VERSION=15.0.0

function install_arrow {
wget_and_untar https://archive.apache.org/dist/arrow/arrow-${ARROW_VERSION}/apache-arrow-${ARROW_VERSION}.tar.gz arrow
cd arrow/cpp
cmake_install \
-DARROW_PARQUET=OFF \
-DARROW_WITH_THRIFT=ON \
-DARROW_WITH_LZ4=ON \
-DARROW_WITH_SNAPPY=ON \
-DARROW_WITH_ZLIB=ON \
-DARROW_WITH_ZSTD=ON \
-DARROW_JEMALLOC=OFF \
-DARROW_SIMD_LEVEL=NONE \
-DARROW_RUNTIME_SIMD_LEVEL=NONE \
-DARROW_WITH_UTF8PROC=OFF \
-DARROW_TESTING=ON \
-DCMAKE_INSTALL_PREFIX=/usr/local \
-DCMAKE_BUILD_TYPE=Release \
-DARROW_BUILD_STATIC=ON \
-DThrift_SOURCE=BUNDLED
}

function install_cuda {
# See https://developer.nvidia.com/cuda-downloads
dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo
Expand All @@ -209,6 +232,7 @@ function install_velox_deps {
run_and_time install_mvfst
run_and_time install_fbthrift
run_and_time install_duckdb
run_and_time install_arrow
}

(return 2> /dev/null) && return # If script was sourced, don't run commands.
Expand Down
24 changes: 24 additions & 0 deletions scripts/setup-ubuntu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,29 @@ function install_duckdb {
fi
}

ARROW_VERSION=15.0.0

function install_arrow {
wget_and_untar https://archive.apache.org/dist/arrow/arrow-${ARROW_VERSION}/apache-arrow-${ARROW_VERSION}.tar.gz arrow
cd arrow/cpp
cmake_install \
-DARROW_PARQUET=OFF \
-DARROW_WITH_THRIFT=ON \
-DARROW_WITH_LZ4=ON \
-DARROW_WITH_SNAPPY=ON \
-DARROW_WITH_ZLIB=ON \
-DARROW_WITH_ZSTD=ON \
-DARROW_JEMALLOC=OFF \
-DARROW_SIMD_LEVEL=NONE \
-DARROW_RUNTIME_SIMD_LEVEL=NONE \
-DARROW_WITH_UTF8PROC=OFF \
-DARROW_TESTING=ON \
-DCMAKE_INSTALL_PREFIX=/usr/local \
-DCMAKE_BUILD_TYPE=Release \
-DARROW_BUILD_STATIC=ON \
-DThrift_SOURCE=BUNDLED
}

function install_cuda {
# See https://developer.nvidia.com/cuda-downloads
if ! dpkg -l cuda-keyring 1>/dev/null; then
Expand All @@ -179,6 +202,7 @@ function install_velox_deps {
run_and_time install_fbthrift
run_and_time install_conda
run_and_time install_duckdb
run_and_time install_arrow
}

function install_apt_deps {
Expand Down

0 comments on commit 0d80228

Please sign in to comment.