diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 1e9336e09fba8..79ce100271885 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -32,12 +32,13 @@ env:
jobs:
complete:
- name: AMD64 Debian 12 Complete Documentation
+ name: AMD64 Ubuntu 24.04 Complete Documentation
runs-on: ubuntu-latest
if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
timeout-minutes: 150
env:
JDK: 21
+ UBUNTU: 24.04
steps:
- name: Checkout Arrow
uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0
@@ -50,8 +51,8 @@ jobs:
uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0
with:
path: .docker
- key: debian-docs-${{ hashFiles('cpp/**') }}
- restore-keys: debian-docs-
+ key: ubuntu-docs-${{ hashFiles('cpp/**') }}
+ restore-keys: ubuntu-docs-
- name: Setup Python
uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0
with:
@@ -63,7 +64,7 @@ jobs:
ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
JDK: 21
- run: archery docker run debian-docs
+ run: archery docker run ubuntu-docs
- name: Docker Push
if: >-
success() &&
@@ -74,4 +75,4 @@ jobs:
ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
continue-on-error: true
- run: archery docker push debian-docs
+ run: archery docker push ubuntu-docs
diff --git a/.github/workflows/java.yml b/.github/workflows/java.yml
index 1ac1927dd66d2..afd24af279961 100644
--- a/.github/workflows/java.yml
+++ b/.github/workflows/java.yml
@@ -62,6 +62,7 @@ jobs:
maven: [3.9.6]
image: [java]
env:
+ JDK: 22
TEST_JDK: ${{ matrix.jdk }}
MAVEN: ${{ matrix.maven }}
steps:
diff --git a/.github/workflows/java_jni.yml b/.github/workflows/java_jni.yml
index 1cab25be4df2c..8160e7f69ba32 100644
--- a/.github/workflows/java_jni.yml
+++ b/.github/workflows/java_jni.yml
@@ -51,7 +51,7 @@ env:
jobs:
docker:
- name: AMD64 manylinux2014 Java JNI
+ name: AMD64 manylinux Java JNI
runs-on: ubuntu-latest
if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
timeout-minutes: 240
@@ -68,8 +68,8 @@ jobs:
uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0
with:
path: .docker
- key: java-jni-manylinux-2014-${{ hashFiles('cpp/**', 'java/**') }}
- restore-keys: java-jni-manylinux-2014-
+ key: java-jni-manylinux-2-28-${{ hashFiles('cpp/**', 'java/**') }}
+ restore-keys: java-jni-manylinux-2-28-
- name: Setup Python
uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0
with:
@@ -80,7 +80,7 @@ jobs:
env:
ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
- run: archery docker run java-jni-manylinux-2014
+ run: archery docker run java-jni-manylinux-2-28
- name: Docker Push
if: >-
success() &&
@@ -91,7 +91,7 @@ jobs:
ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
continue-on-error: true
- run: archery docker push java-jni-manylinux-2014
+ run: archery docker push java-jni-manylinux-2-28
docker_integration_python:
name: AMD64 Conda Java C Data Interface Integration
diff --git a/ci/docker/conda-python-jpype.dockerfile b/ci/docker/conda-python-jpype.dockerfile
index d9b43afdaec9e..3603ba2b7dd97 100644
--- a/ci/docker/conda-python-jpype.dockerfile
+++ b/ci/docker/conda-python-jpype.dockerfile
@@ -20,7 +20,7 @@ ARG arch=amd64
ARG python=3.8
FROM ${repo}:${arch}-conda-python-${python}
-ARG jdk=11
+ARG jdk=21
ARG maven=3.8.7
RUN mamba install -q -y \
maven=${maven} \
diff --git a/ci/docker/java-jni-manylinux-201x.dockerfile b/ci/docker/java-jni-manylinux.dockerfile
similarity index 99%
rename from ci/docker/java-jni-manylinux-201x.dockerfile
rename to ci/docker/java-jni-manylinux.dockerfile
index 8b73c73c1d240..97f681d4adb00 100644
--- a/ci/docker/java-jni-manylinux-201x.dockerfile
+++ b/ci/docker/java-jni-manylinux.dockerfile
@@ -33,7 +33,7 @@ RUN vcpkg install \
--x-feature=s3
# Install Java
-ARG java=1.8.0
+ARG java=21
ARG maven=3.9.3
RUN yum install -y java-$java-openjdk-devel && \
yum clean all && \
diff --git a/ci/docker/java.dockerfile b/ci/docker/java.dockerfile
new file mode 100644
index 0000000000000..2535abce2741a
--- /dev/null
+++ b/ci/docker/java.dockerfile
@@ -0,0 +1,37 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+FROM maven:3.9.6-eclipse-temurin-22
+
+COPY --from=maven:3.9.6-eclipse-temurin-8 /opt/java/openjdk /opt/java/openjdk8
+COPY --from=maven:3.9.6-eclipse-temurin-11 /opt/java/openjdk /opt/java/openjdk11
+COPY --from=maven:3.9.6-eclipse-temurin-17 /opt/java/openjdk /opt/java/openjdk17
+COPY --from=maven:3.9.6-eclipse-temurin-21 /opt/java/openjdk /opt/java/openjdk21
+
+
+env JAVA8_HOME /opt/java/openjdk8
+env JAVA11_HOME /opt/java/openjdk11
+env JAVA17_HOME /opt/java/openjdk17
+env JAVA21_HOME /opt/java/openjdk21
+env JAVA22_HOME /opt/java/openjdk
+
+RUN find "$JAVA8_HOME/lib" "$JAVA11_HOME/lib" "$JAVA17_HOME/lib" "$JAVA21_HOME/lib" "$JAVA22_HOME/lib" -name '*.so' -exec dirname '{}' ';' | sort -u > /etc/ld.so.conf.d/docker-openjdk.conf; \
+ ldconfig;
+
+COPY ci/maven-toolchains.xml /usr/share/maven/conf/toolchains.xml
+
+CMD ["mvn"]
diff --git a/ci/docker/linux-apt-docs.dockerfile b/ci/docker/linux-apt-docs.dockerfile
index 1c916840e071b..5bd4e2c8d0214 100644
--- a/ci/docker/linux-apt-docs.dockerfile
+++ b/ci/docker/linux-apt-docs.dockerfile
@@ -23,21 +23,18 @@ ARG jdk=8
ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium
-# See R install instructions at https://cloud.r-project.org/bin/linux/
+# See R install instructions at https://cloud.r-project.org/bin/linux/ubuntu
RUN apt-get update -y && \
apt-get install -y \
apt-transport-https \
+ software-properties-common \
dirmngr \
gpg \
lsb-release && \
- gpg --keyserver keyserver.ubuntu.com \
- --recv-key 95C0FAF38DB3CCAD0C080A7BDC78B2DDEABC47B7 && \
- gpg --export 95C0FAF38DB3CCAD0C080A7BDC78B2DDEABC47B7 | \
- gpg --no-default-keyring \
- --keyring /usr/share/keyrings/cran.gpg \
- --import - && \
- echo "deb [signed-by=/usr/share/keyrings/cran.gpg] https://cloud.r-project.org/bin/linux/$(lsb_release -is | tr 'A-Z' 'a-z') $(lsb_release -cs)-cran40/" | \
- tee /etc/apt/sources.list.d/cran.list && \
+ wget -qO- https://cloud.r-project.org/bin/linux/ubuntu/marutter_pubkey.asc | \
+ tee -a /etc/apt/trusted.gpg.d/cran_ubuntu_key.asc && \
+ # NOTE: Only R >= 4.0 is available in this repo
+ add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu '$(lsb_release -cs)'-cran40/' && \
if [ -f /etc/apt/sources.list.d/debian.sources ]; then \
sed -i \
-e 's/main$/main contrib non-free non-free-firmware/g' \
@@ -47,8 +44,7 @@ RUN apt-get update -y && \
apt-get install -y --no-install-recommends \
autoconf-archive \
automake \
- chromium \
- chromium-sandbox \
+ chromium-browser \
curl \
doxygen \
gi-docgen \
diff --git a/ci/maven-toolchains.xml b/ci/maven-toolchains.xml
new file mode 100644
index 0000000000000..f2eafb0ba9409
--- /dev/null
+++ b/ci/maven-toolchains.xml
@@ -0,0 +1,64 @@
+
+
+
+
+ jdk
+
+ 1.8
+ openjdk
+
+
+ ${env.JAVA8_HOME}
+
+
+
+ jdk
+
+ 11
+ openjdk
+
+
+ ${env.JAVA11_HOME}
+
+
+
+ jdk
+
+ 17
+ openjdk
+
+
+ ${env.JAVA17_HOME}
+
+
+
+ jdk
+
+ 21
+ openjdk
+
+
+ ${env.JAVA21_HOME}
+
+
+
+ jdk
+
+ 22
+ openjdk
+
+
+ ${env.JAVA22_HOME}
+
+
+
diff --git a/ci/scripts/java_jni_manylinux_build.sh b/ci/scripts/java_jni_manylinux_build.sh
index 6f3769751af42..af5c3037b4f0a 100755
--- a/ci/scripts/java_jni_manylinux_build.sh
+++ b/ci/scripts/java_jni_manylinux_build.sh
@@ -35,9 +35,14 @@ echo "=== Clear output directories and leftovers ==="
rm -rf ${build_dir}
echo "=== Building Arrow C++ libraries ==="
-devtoolset_version=$(rpm -qa "devtoolset-*-gcc" --queryformat %{VERSION} | \
+devtoolset_version=$(rpm -qa "devtoolset-*-gcc" "gcc-toolset-*-gcc" --queryformat %{VERSION} | \
grep -o "^[0-9]*")
-devtoolset_include_cpp="/opt/rh/devtoolset-${devtoolset_version}/root/usr/include/c++/${devtoolset_version}"
+if [ -d "/opt/rh/devtoolset-${devtoolset_version}" ]; then
+ devtoolset_include_cpp="/opt/rh/devtoolset-${devtoolset_version}/root/usr/include/c++/${devtoolset_version}"
+else
+ devtoolset_include_cpp="/opt/rh/gcc-toolset-${devtoolset_version}/root/usr/include/c++/${devtoolset_version}"
+fi
+
: ${ARROW_ACERO:=ON}
export ARROW_ACERO
: ${ARROW_BUILD_TESTS:=ON}
diff --git a/dev/tasks/java-jars/github.yml b/dev/tasks/java-jars/github.yml
index acfd45e39beb3..bbfa4c551452b 100644
--- a/dev/tasks/java-jars/github.yml
+++ b/dev/tasks/java-jars/github.yml
@@ -57,7 +57,7 @@ jobs:
archery docker run \
-e ARROW_JAVA_BUILD=OFF \
-e ARROW_JAVA_TEST=OFF \
- java-jni-manylinux-2014
+ java-jni-manylinux-2-28
- name: Compress into single artifact to keep directory structure
run: tar -cvzf arrow-shared-libs-linux-{{ arch }}.tar.gz arrow/java-dist/
- name: Upload artifacts
@@ -69,7 +69,7 @@ jobs:
{{ macros.github_login_dockerhub()|indent }}
- name: Push Docker image
shell: bash
- run: archery docker push java-jni-manylinux-2014
+ run: archery docker push java-jni-manylinux-2-28
{% endif %}
build-cpp-macos:
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 61df283960ccf..7e14d126495a1 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -65,7 +65,7 @@ groups:
- r-binary-packages
- ubuntu-*
- wheel-*
- - test-debian-*-docs
+ - test-ubuntu-*-docs
{############################# Testing tasks #################################}
@@ -1446,15 +1446,16 @@ tasks:
{% endfor %}
# be sure to update binary-task.rb when upgrading ubuntu
- test-debian-12-docs:
+ test-ubuntu-24.04-docs:
ci: github
template: docs/github.linux.yml
params:
env:
- JDK: 17
+ JDK: 21
+ UBUNTU: 24.04
pr_number: Unset
flags: "-v $PWD/build/:/build/"
- image: debian-docs
+ image: ubuntu-docs
publish: false
artifacts:
- docs.tar.gz
@@ -1583,8 +1584,9 @@ tasks:
template: docs/github.linux.yml
params:
env:
- JDK: 17
+ JDK: 21
+ UBUNTU: 24.04
pr_number: Unset
flags: "-v $PWD/build/:/build/"
- image: debian-docs
+ image: ubuntu-docs
publish: true
diff --git a/docker-compose.yml b/docker-compose.yml
index 625bea7e7f00c..feb6fe7a30078 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -131,8 +131,7 @@ x-hierarchy:
- debian-cpp:
- debian-c-glib:
- debian-ruby
- - debian-python:
- - debian-docs
+ - debian-python
- debian-go:
- debian-go-cgo
- debian-go-cgo-python
@@ -146,7 +145,8 @@ x-hierarchy:
- ubuntu-c-glib:
- ubuntu-ruby
- ubuntu-lint
- - ubuntu-python
+ - ubuntu-python:
+ - ubuntu-docs
- ubuntu-python-sdist-test
- ubuntu-r
- ubuntu-r-only-r
@@ -168,9 +168,9 @@ x-hierarchy:
# helper services
- impala
- postgres
- - python-wheel-manylinux-2014:
- - java-jni-manylinux-2014
- - python-wheel-manylinux-2-28
+ - python-wheel-manylinux-2014
+ - python-wheel-manylinux-2-28:
+ - java-jni-manylinux-2-28
- python-wheel-manylinux-test-imports
- python-wheel-manylinux-test-unittests
- python-wheel-windows-vs2019
@@ -1164,22 +1164,22 @@ services:
target: "C:/arrow"
command: arrow\\ci\\scripts\\python_wheel_windows_test.bat
- java-jni-manylinux-2014:
- image: ${REPO}:${ARCH}-java-jni-manylinux-2014-vcpkg-${VCPKG}
+ java-jni-manylinux-2-28:
+ image: ${REPO}:${ARCH}-java-jni-manylinux-2-28-vcpkg-${VCPKG}
build:
args:
- base: ${REPO}:${ARCH}-python-${PYTHON}-wheel-manylinux-2014-vcpkg-${VCPKG}
- java: 1.8.0
+ base: ${REPO}:${ARCH}-python-${PYTHON}-wheel-manylinux-2-28-vcpkg-${VCPKG}
+ java: 21
context: .
- dockerfile: ci/docker/java-jni-manylinux-201x.dockerfile
+ dockerfile: ci/docker/java-jni-manylinux.dockerfile
cache_from:
- - ${REPO}:${ARCH}-java-jni-manylinux-2014-vcpkg-${VCPKG}
+ - ${REPO}:${ARCH}-java-jni-manylinux-2-28-vcpkg-${VCPKG}
environment:
<<: [*common, *ccache]
volumes:
- .:/arrow:delegated
- ${DOCKER_VOLUME_PREFIX}maven-cache:/root/.m2:delegated
- - ${DOCKER_VOLUME_PREFIX}python-wheel-manylinux2014-ccache:/ccache:delegated
+ - ${DOCKER_VOLUME_PREFIX}python-wheel-manylinux-2-28-ccache:/ccache:delegated
command:
["pip install -e /arrow/dev/archery && \
/arrow/ci/scripts/java_jni_manylinux_build.sh /arrow /build /arrow/java-dist && \
@@ -1712,10 +1712,12 @@ services:
# Usage:
# docker-compose build java
# docker-compose run java
- # Parameters:
- # MAVEN: 3.9.5
- # JDK: 8, 11, 17, 21
- image: ghcr.io/laurentgo/multi-jdk-maven-image:main
+ image: ${REPO}:${ARCH}-java
+ build:
+ context: .
+ dockerfile: ci/docker/java.dockerfile
+ cache_from:
+ - ${REPO}:${ARCH}-java
shm_size: *shm-size
volumes: &java-volumes
- .:/arrow:delegated
@@ -1742,7 +1744,7 @@ services:
repo: ${REPO}
arch: ${ARCH}
# Use a newer JDK as it seems to improve stability
- jdk: 17
+ jdk: 21
maven: ${MAVEN}
node: ${NODE}
go: ${GO}
@@ -1761,24 +1763,24 @@ services:
################################ Docs #######################################
- debian-docs:
+ ubuntu-docs:
# Usage:
- # docker-compose build debian-cpp
- # docker-compose build debian-python
- # docker-compose build debian-docs
- # docker-compose run --rm debian-docs
- image: ${REPO}:${ARCH}-debian-${DEBIAN}-docs
+ # docker-compose build ubuntu-cpp
+ # docker-compose build ubuntu-python
+ # docker-compose build ubuntu-docs
+ # docker-compose run --rm ubuntu-docs
+ image: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-docs
build:
context: .
dockerfile: ci/docker/linux-apt-docs.dockerfile
cache_from:
- - ${REPO}:${ARCH}-debian-${DEBIAN}-docs
+ - ${REPO}:${ARCH}-ubuntu-${UBUNTU}-docs
args:
r: ${R}
jdk: ${JDK}
maven: ${MAVEN}
node: ${NODE}
- base: ${REPO}:${ARCH}-debian-${DEBIAN}-python-3
+ base: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-python-3
# This is for Chromium used by Mermaid. Chromium uses namespace
# isolation for security by default.
cap_add:
@@ -1797,7 +1799,7 @@ services:
BUILD_DOCS_JS: "ON"
BUILD_DOCS_PYTHON: "ON"
BUILD_DOCS_R: "ON"
- volumes: *debian-volumes
+ volumes: *ubuntu-volumes
command: >
/bin/bash -c "
sudo mkdir -p /build /ccache &&
diff --git a/docs/source/developers/java/building.rst b/docs/source/developers/java/building.rst
index 82053e901186c..df6ae21b6a639 100644
--- a/docs/source/developers/java/building.rst
+++ b/docs/source/developers/java/building.rst
@@ -32,8 +32,12 @@ Arrow Java uses the `Maven `_ build system.
Building requires:
+* JDK 21+
+* Maven 3.6.3+
+
+Testing requires:
+
* JDK 8+
-* Maven 3+
.. note::
CI will test all supported JDK LTS versions, plus the latest non-LTS version.
@@ -321,6 +325,63 @@ Building Java JNI Modules
-Darrow.c.jni.dist.dir=/java-dist/lib/ \
-Parrow-jni clean install
+Testing
+=======
+
+By default, Maven will use the same Java version to build Arrow and run the tests.
+In order to run the test suite against a specific version of the Java runtime, Maven
+toolchains needs to be configured beforehand, and then a test specific property needs to
+be set.
+
+Configuring Maven toolchains
+----------------------------
+
+To be able to use a JDK for testing, it needs to be registered first in Maven ``toolchains.xml``
+configuration file usually located under ``${HOME}/.m2`` with the following snippet added to it:
+
+ .. code-block::
+
+
+
+
+ [...]
+
+
+ jdk
+
+ 21
+ temurin
+
+
+ path/to/jdk/home
+
+
+
+ [...]
+
+
+
+Testing with a specific JDK
+---------------------------
+
+To run Arrow tests with a specific JDK version, use the ``arrow.test.jdk-version`` property.
+
+- To run Arrow tests with JDK 1.8, use the following snippet:
+
+ .. code-block::
+
+ $ cd arrow/java
+ $ mvn -Darrow.test.jdk-version=1.8 clean verify
+
+
+- To run Arrow tests with JDK 17, use the following snippet:
+
+ .. code-block::
+
+ $ cd arrow/java
+ $ mvn -Darrow.test.jdk-version=17 clean verify
+
+
IDE Configuration
=================
diff --git a/docs/source/python/getstarted.rst b/docs/source/python/getstarted.rst
index 42e415c40b835..b58c05d8fb93c 100644
--- a/docs/source/python/getstarted.rst
+++ b/docs/source/python/getstarted.rst
@@ -138,6 +138,7 @@ Loading back the partitioned dataset will detect the chunks
and will lazily load chunks of data only when iterating over them
.. ipython:: python
+ :okwarning:
import datetime
diff --git a/java/bom/pom.xml b/java/bom/pom.xml
index 793337ef1751e..b2943f8dc15f9 100644
--- a/java/bom/pom.xml
+++ b/java/bom/pom.xml
@@ -36,6 +36,7 @@ under the License.
+ 21
1.8
1.8
8
@@ -51,8 +52,6 @@ under the License.
is addressed
-->
3.2.2
- 3.6.3
- 3.5.0
diff --git a/java/c/src/test/python/integration_tests.py b/java/c/src/test/python/integration_tests.py
index ab2ee1742f366..b48ea27112c5d 100644
--- a/java/c/src/test/python/integration_tests.py
+++ b/java/c/src/test/python/integration_tests.py
@@ -52,13 +52,15 @@ def setup_jvm():
# This will be the default behaviour in jpype 0.8+
kwargs['convertStrings'] = False
- # For debugging purpose please uncomment the following, and include *jvm_args, before **kwargs
- # in startJVM function call
- # jvm_args = [
+ jvm_args = [
+ "--add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED"
+ ]
+ # For debugging purpose please uncomment the following
+ # jvm_args += [
# "-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5005"
# ]
- jpype.startJVM(jpype.getDefaultJVMPath(), "-Djava.class.path=" + jar_path, **kwargs)
+ jpype.startJVM(jpype.getDefaultJVMPath(), "-Djava.class.path=" + jar_path, *jvm_args, **kwargs)
class Bridge:
diff --git a/java/maven/pom.xml b/java/maven/pom.xml
index c804ce82de79e..2e80128de7e84 100644
--- a/java/maven/pom.xml
+++ b/java/maven/pom.xml
@@ -43,6 +43,7 @@ under the License.
true
+ 21
1.8
1.8
8
diff --git a/java/pom.xml b/java/pom.xml
index 9227f9f2a81b9..200b3ded139cb 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -111,6 +111,7 @@ under the License.
--add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED
+ 21
1.8
1.8
8