diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 1e9336e09fba8..79ce100271885 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -32,12 +32,13 @@ env: jobs: complete: - name: AMD64 Debian 12 Complete Documentation + name: AMD64 Ubuntu 24.04 Complete Documentation runs-on: ubuntu-latest if: ${{ !contains(github.event.pull_request.title, 'WIP') }} timeout-minutes: 150 env: JDK: 21 + UBUNTU: 24.04 steps: - name: Checkout Arrow uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0 @@ -50,8 +51,8 @@ jobs: uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 with: path: .docker - key: debian-docs-${{ hashFiles('cpp/**') }} - restore-keys: debian-docs- + key: ubuntu-docs-${{ hashFiles('cpp/**') }} + restore-keys: ubuntu-docs- - name: Setup Python uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 with: @@ -63,7 +64,7 @@ jobs: ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} JDK: 21 - run: archery docker run debian-docs + run: archery docker run ubuntu-docs - name: Docker Push if: >- success() && @@ -74,4 +75,4 @@ jobs: ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} continue-on-error: true - run: archery docker push debian-docs + run: archery docker push ubuntu-docs diff --git a/.github/workflows/java.yml b/.github/workflows/java.yml index 1ac1927dd66d2..afd24af279961 100644 --- a/.github/workflows/java.yml +++ b/.github/workflows/java.yml @@ -62,6 +62,7 @@ jobs: maven: [3.9.6] image: [java] env: + JDK: 22 TEST_JDK: ${{ matrix.jdk }} MAVEN: ${{ matrix.maven }} steps: diff --git a/.github/workflows/java_jni.yml b/.github/workflows/java_jni.yml index 1cab25be4df2c..8160e7f69ba32 100644 --- a/.github/workflows/java_jni.yml +++ b/.github/workflows/java_jni.yml @@ -51,7 +51,7 @@ env: jobs: docker: - name: AMD64 manylinux2014 Java JNI + name: AMD64 manylinux Java JNI runs-on: ubuntu-latest if: ${{ !contains(github.event.pull_request.title, 'WIP') }} timeout-minutes: 240 @@ -68,8 +68,8 @@ jobs: uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 with: path: .docker - key: java-jni-manylinux-2014-${{ hashFiles('cpp/**', 'java/**') }} - restore-keys: java-jni-manylinux-2014- + key: java-jni-manylinux-2-28-${{ hashFiles('cpp/**', 'java/**') }} + restore-keys: java-jni-manylinux-2-28- - name: Setup Python uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 with: @@ -80,7 +80,7 @@ jobs: env: ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} - run: archery docker run java-jni-manylinux-2014 + run: archery docker run java-jni-manylinux-2-28 - name: Docker Push if: >- success() && @@ -91,7 +91,7 @@ jobs: ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} continue-on-error: true - run: archery docker push java-jni-manylinux-2014 + run: archery docker push java-jni-manylinux-2-28 docker_integration_python: name: AMD64 Conda Java C Data Interface Integration diff --git a/ci/docker/conda-python-jpype.dockerfile b/ci/docker/conda-python-jpype.dockerfile index d9b43afdaec9e..3603ba2b7dd97 100644 --- a/ci/docker/conda-python-jpype.dockerfile +++ b/ci/docker/conda-python-jpype.dockerfile @@ -20,7 +20,7 @@ ARG arch=amd64 ARG python=3.8 FROM ${repo}:${arch}-conda-python-${python} -ARG jdk=11 +ARG jdk=21 ARG maven=3.8.7 RUN mamba install -q -y \ maven=${maven} \ diff --git a/ci/docker/java-jni-manylinux-201x.dockerfile b/ci/docker/java-jni-manylinux.dockerfile similarity index 99% rename from ci/docker/java-jni-manylinux-201x.dockerfile rename to ci/docker/java-jni-manylinux.dockerfile index 8b73c73c1d240..97f681d4adb00 100644 --- a/ci/docker/java-jni-manylinux-201x.dockerfile +++ b/ci/docker/java-jni-manylinux.dockerfile @@ -33,7 +33,7 @@ RUN vcpkg install \ --x-feature=s3 # Install Java -ARG java=1.8.0 +ARG java=21 ARG maven=3.9.3 RUN yum install -y java-$java-openjdk-devel && \ yum clean all && \ diff --git a/ci/docker/java.dockerfile b/ci/docker/java.dockerfile new file mode 100644 index 0000000000000..2535abce2741a --- /dev/null +++ b/ci/docker/java.dockerfile @@ -0,0 +1,37 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +FROM maven:3.9.6-eclipse-temurin-22 + +COPY --from=maven:3.9.6-eclipse-temurin-8 /opt/java/openjdk /opt/java/openjdk8 +COPY --from=maven:3.9.6-eclipse-temurin-11 /opt/java/openjdk /opt/java/openjdk11 +COPY --from=maven:3.9.6-eclipse-temurin-17 /opt/java/openjdk /opt/java/openjdk17 +COPY --from=maven:3.9.6-eclipse-temurin-21 /opt/java/openjdk /opt/java/openjdk21 + + +env JAVA8_HOME /opt/java/openjdk8 +env JAVA11_HOME /opt/java/openjdk11 +env JAVA17_HOME /opt/java/openjdk17 +env JAVA21_HOME /opt/java/openjdk21 +env JAVA22_HOME /opt/java/openjdk + +RUN find "$JAVA8_HOME/lib" "$JAVA11_HOME/lib" "$JAVA17_HOME/lib" "$JAVA21_HOME/lib" "$JAVA22_HOME/lib" -name '*.so' -exec dirname '{}' ';' | sort -u > /etc/ld.so.conf.d/docker-openjdk.conf; \ + ldconfig; + +COPY ci/maven-toolchains.xml /usr/share/maven/conf/toolchains.xml + +CMD ["mvn"] diff --git a/ci/docker/linux-apt-docs.dockerfile b/ci/docker/linux-apt-docs.dockerfile index 1c916840e071b..5bd4e2c8d0214 100644 --- a/ci/docker/linux-apt-docs.dockerfile +++ b/ci/docker/linux-apt-docs.dockerfile @@ -23,21 +23,18 @@ ARG jdk=8 ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium -# See R install instructions at https://cloud.r-project.org/bin/linux/ +# See R install instructions at https://cloud.r-project.org/bin/linux/ubuntu RUN apt-get update -y && \ apt-get install -y \ apt-transport-https \ + software-properties-common \ dirmngr \ gpg \ lsb-release && \ - gpg --keyserver keyserver.ubuntu.com \ - --recv-key 95C0FAF38DB3CCAD0C080A7BDC78B2DDEABC47B7 && \ - gpg --export 95C0FAF38DB3CCAD0C080A7BDC78B2DDEABC47B7 | \ - gpg --no-default-keyring \ - --keyring /usr/share/keyrings/cran.gpg \ - --import - && \ - echo "deb [signed-by=/usr/share/keyrings/cran.gpg] https://cloud.r-project.org/bin/linux/$(lsb_release -is | tr 'A-Z' 'a-z') $(lsb_release -cs)-cran40/" | \ - tee /etc/apt/sources.list.d/cran.list && \ + wget -qO- https://cloud.r-project.org/bin/linux/ubuntu/marutter_pubkey.asc | \ + tee -a /etc/apt/trusted.gpg.d/cran_ubuntu_key.asc && \ + # NOTE: Only R >= 4.0 is available in this repo + add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu '$(lsb_release -cs)'-cran40/' && \ if [ -f /etc/apt/sources.list.d/debian.sources ]; then \ sed -i \ -e 's/main$/main contrib non-free non-free-firmware/g' \ @@ -47,8 +44,7 @@ RUN apt-get update -y && \ apt-get install -y --no-install-recommends \ autoconf-archive \ automake \ - chromium \ - chromium-sandbox \ + chromium-browser \ curl \ doxygen \ gi-docgen \ diff --git a/ci/maven-toolchains.xml b/ci/maven-toolchains.xml new file mode 100644 index 0000000000000..f2eafb0ba9409 --- /dev/null +++ b/ci/maven-toolchains.xml @@ -0,0 +1,64 @@ + + + + + jdk + + 1.8 + openjdk + + + ${env.JAVA8_HOME} + + + + jdk + + 11 + openjdk + + + ${env.JAVA11_HOME} + + + + jdk + + 17 + openjdk + + + ${env.JAVA17_HOME} + + + + jdk + + 21 + openjdk + + + ${env.JAVA21_HOME} + + + + jdk + + 22 + openjdk + + + ${env.JAVA22_HOME} + + + diff --git a/ci/scripts/java_jni_manylinux_build.sh b/ci/scripts/java_jni_manylinux_build.sh index 6f3769751af42..af5c3037b4f0a 100755 --- a/ci/scripts/java_jni_manylinux_build.sh +++ b/ci/scripts/java_jni_manylinux_build.sh @@ -35,9 +35,14 @@ echo "=== Clear output directories and leftovers ===" rm -rf ${build_dir} echo "=== Building Arrow C++ libraries ===" -devtoolset_version=$(rpm -qa "devtoolset-*-gcc" --queryformat %{VERSION} | \ +devtoolset_version=$(rpm -qa "devtoolset-*-gcc" "gcc-toolset-*-gcc" --queryformat %{VERSION} | \ grep -o "^[0-9]*") -devtoolset_include_cpp="/opt/rh/devtoolset-${devtoolset_version}/root/usr/include/c++/${devtoolset_version}" +if [ -d "/opt/rh/devtoolset-${devtoolset_version}" ]; then + devtoolset_include_cpp="/opt/rh/devtoolset-${devtoolset_version}/root/usr/include/c++/${devtoolset_version}" +else + devtoolset_include_cpp="/opt/rh/gcc-toolset-${devtoolset_version}/root/usr/include/c++/${devtoolset_version}" +fi + : ${ARROW_ACERO:=ON} export ARROW_ACERO : ${ARROW_BUILD_TESTS:=ON} diff --git a/dev/tasks/java-jars/github.yml b/dev/tasks/java-jars/github.yml index acfd45e39beb3..bbfa4c551452b 100644 --- a/dev/tasks/java-jars/github.yml +++ b/dev/tasks/java-jars/github.yml @@ -57,7 +57,7 @@ jobs: archery docker run \ -e ARROW_JAVA_BUILD=OFF \ -e ARROW_JAVA_TEST=OFF \ - java-jni-manylinux-2014 + java-jni-manylinux-2-28 - name: Compress into single artifact to keep directory structure run: tar -cvzf arrow-shared-libs-linux-{{ arch }}.tar.gz arrow/java-dist/ - name: Upload artifacts @@ -69,7 +69,7 @@ jobs: {{ macros.github_login_dockerhub()|indent }} - name: Push Docker image shell: bash - run: archery docker push java-jni-manylinux-2014 + run: archery docker push java-jni-manylinux-2-28 {% endif %} build-cpp-macos: diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 61df283960ccf..7e14d126495a1 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -65,7 +65,7 @@ groups: - r-binary-packages - ubuntu-* - wheel-* - - test-debian-*-docs + - test-ubuntu-*-docs {############################# Testing tasks #################################} @@ -1446,15 +1446,16 @@ tasks: {% endfor %} # be sure to update binary-task.rb when upgrading ubuntu - test-debian-12-docs: + test-ubuntu-24.04-docs: ci: github template: docs/github.linux.yml params: env: - JDK: 17 + JDK: 21 + UBUNTU: 24.04 pr_number: Unset flags: "-v $PWD/build/:/build/" - image: debian-docs + image: ubuntu-docs publish: false artifacts: - docs.tar.gz @@ -1583,8 +1584,9 @@ tasks: template: docs/github.linux.yml params: env: - JDK: 17 + JDK: 21 + UBUNTU: 24.04 pr_number: Unset flags: "-v $PWD/build/:/build/" - image: debian-docs + image: ubuntu-docs publish: true diff --git a/docker-compose.yml b/docker-compose.yml index 625bea7e7f00c..feb6fe7a30078 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -131,8 +131,7 @@ x-hierarchy: - debian-cpp: - debian-c-glib: - debian-ruby - - debian-python: - - debian-docs + - debian-python - debian-go: - debian-go-cgo - debian-go-cgo-python @@ -146,7 +145,8 @@ x-hierarchy: - ubuntu-c-glib: - ubuntu-ruby - ubuntu-lint - - ubuntu-python + - ubuntu-python: + - ubuntu-docs - ubuntu-python-sdist-test - ubuntu-r - ubuntu-r-only-r @@ -168,9 +168,9 @@ x-hierarchy: # helper services - impala - postgres - - python-wheel-manylinux-2014: - - java-jni-manylinux-2014 - - python-wheel-manylinux-2-28 + - python-wheel-manylinux-2014 + - python-wheel-manylinux-2-28: + - java-jni-manylinux-2-28 - python-wheel-manylinux-test-imports - python-wheel-manylinux-test-unittests - python-wheel-windows-vs2019 @@ -1164,22 +1164,22 @@ services: target: "C:/arrow" command: arrow\\ci\\scripts\\python_wheel_windows_test.bat - java-jni-manylinux-2014: - image: ${REPO}:${ARCH}-java-jni-manylinux-2014-vcpkg-${VCPKG} + java-jni-manylinux-2-28: + image: ${REPO}:${ARCH}-java-jni-manylinux-2-28-vcpkg-${VCPKG} build: args: - base: ${REPO}:${ARCH}-python-${PYTHON}-wheel-manylinux-2014-vcpkg-${VCPKG} - java: 1.8.0 + base: ${REPO}:${ARCH}-python-${PYTHON}-wheel-manylinux-2-28-vcpkg-${VCPKG} + java: 21 context: . - dockerfile: ci/docker/java-jni-manylinux-201x.dockerfile + dockerfile: ci/docker/java-jni-manylinux.dockerfile cache_from: - - ${REPO}:${ARCH}-java-jni-manylinux-2014-vcpkg-${VCPKG} + - ${REPO}:${ARCH}-java-jni-manylinux-2-28-vcpkg-${VCPKG} environment: <<: [*common, *ccache] volumes: - .:/arrow:delegated - ${DOCKER_VOLUME_PREFIX}maven-cache:/root/.m2:delegated - - ${DOCKER_VOLUME_PREFIX}python-wheel-manylinux2014-ccache:/ccache:delegated + - ${DOCKER_VOLUME_PREFIX}python-wheel-manylinux-2-28-ccache:/ccache:delegated command: ["pip install -e /arrow/dev/archery && \ /arrow/ci/scripts/java_jni_manylinux_build.sh /arrow /build /arrow/java-dist && \ @@ -1712,10 +1712,12 @@ services: # Usage: # docker-compose build java # docker-compose run java - # Parameters: - # MAVEN: 3.9.5 - # JDK: 8, 11, 17, 21 - image: ghcr.io/laurentgo/multi-jdk-maven-image:main + image: ${REPO}:${ARCH}-java + build: + context: . + dockerfile: ci/docker/java.dockerfile + cache_from: + - ${REPO}:${ARCH}-java shm_size: *shm-size volumes: &java-volumes - .:/arrow:delegated @@ -1742,7 +1744,7 @@ services: repo: ${REPO} arch: ${ARCH} # Use a newer JDK as it seems to improve stability - jdk: 17 + jdk: 21 maven: ${MAVEN} node: ${NODE} go: ${GO} @@ -1761,24 +1763,24 @@ services: ################################ Docs ####################################### - debian-docs: + ubuntu-docs: # Usage: - # docker-compose build debian-cpp - # docker-compose build debian-python - # docker-compose build debian-docs - # docker-compose run --rm debian-docs - image: ${REPO}:${ARCH}-debian-${DEBIAN}-docs + # docker-compose build ubuntu-cpp + # docker-compose build ubuntu-python + # docker-compose build ubuntu-docs + # docker-compose run --rm ubuntu-docs + image: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-docs build: context: . dockerfile: ci/docker/linux-apt-docs.dockerfile cache_from: - - ${REPO}:${ARCH}-debian-${DEBIAN}-docs + - ${REPO}:${ARCH}-ubuntu-${UBUNTU}-docs args: r: ${R} jdk: ${JDK} maven: ${MAVEN} node: ${NODE} - base: ${REPO}:${ARCH}-debian-${DEBIAN}-python-3 + base: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-python-3 # This is for Chromium used by Mermaid. Chromium uses namespace # isolation for security by default. cap_add: @@ -1797,7 +1799,7 @@ services: BUILD_DOCS_JS: "ON" BUILD_DOCS_PYTHON: "ON" BUILD_DOCS_R: "ON" - volumes: *debian-volumes + volumes: *ubuntu-volumes command: > /bin/bash -c " sudo mkdir -p /build /ccache && diff --git a/docs/source/developers/java/building.rst b/docs/source/developers/java/building.rst index 82053e901186c..df6ae21b6a639 100644 --- a/docs/source/developers/java/building.rst +++ b/docs/source/developers/java/building.rst @@ -32,8 +32,12 @@ Arrow Java uses the `Maven `_ build system. Building requires: +* JDK 21+ +* Maven 3.6.3+ + +Testing requires: + * JDK 8+ -* Maven 3+ .. note:: CI will test all supported JDK LTS versions, plus the latest non-LTS version. @@ -321,6 +325,63 @@ Building Java JNI Modules -Darrow.c.jni.dist.dir=/java-dist/lib/ \ -Parrow-jni clean install +Testing +======= + +By default, Maven will use the same Java version to build Arrow and run the tests. +In order to run the test suite against a specific version of the Java runtime, Maven +toolchains needs to be configured beforehand, and then a test specific property needs to +be set. + +Configuring Maven toolchains +---------------------------- + +To be able to use a JDK for testing, it needs to be registered first in Maven ``toolchains.xml`` +configuration file usually located under ``${HOME}/.m2`` with the following snippet added to it: + + .. code-block:: + + + + + [...] + + + jdk + + 21 + temurin + + + path/to/jdk/home + + + + [...] + + + +Testing with a specific JDK +--------------------------- + +To run Arrow tests with a specific JDK version, use the ``arrow.test.jdk-version`` property. + +- To run Arrow tests with JDK 1.8, use the following snippet: + + .. code-block:: + + $ cd arrow/java + $ mvn -Darrow.test.jdk-version=1.8 clean verify + + +- To run Arrow tests with JDK 17, use the following snippet: + + .. code-block:: + + $ cd arrow/java + $ mvn -Darrow.test.jdk-version=17 clean verify + + IDE Configuration ================= diff --git a/docs/source/python/getstarted.rst b/docs/source/python/getstarted.rst index 42e415c40b835..b58c05d8fb93c 100644 --- a/docs/source/python/getstarted.rst +++ b/docs/source/python/getstarted.rst @@ -138,6 +138,7 @@ Loading back the partitioned dataset will detect the chunks and will lazily load chunks of data only when iterating over them .. ipython:: python + :okwarning: import datetime diff --git a/java/bom/pom.xml b/java/bom/pom.xml index 793337ef1751e..b2943f8dc15f9 100644 --- a/java/bom/pom.xml +++ b/java/bom/pom.xml @@ -36,6 +36,7 @@ under the License. + 21 1.8 1.8 8 @@ -51,8 +52,6 @@ under the License. is addressed --> 3.2.2 - 3.6.3 - 3.5.0 diff --git a/java/c/src/test/python/integration_tests.py b/java/c/src/test/python/integration_tests.py index ab2ee1742f366..b48ea27112c5d 100644 --- a/java/c/src/test/python/integration_tests.py +++ b/java/c/src/test/python/integration_tests.py @@ -52,13 +52,15 @@ def setup_jvm(): # This will be the default behaviour in jpype 0.8+ kwargs['convertStrings'] = False - # For debugging purpose please uncomment the following, and include *jvm_args, before **kwargs - # in startJVM function call - # jvm_args = [ + jvm_args = [ + "--add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED" + ] + # For debugging purpose please uncomment the following + # jvm_args += [ # "-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5005" # ] - jpype.startJVM(jpype.getDefaultJVMPath(), "-Djava.class.path=" + jar_path, **kwargs) + jpype.startJVM(jpype.getDefaultJVMPath(), "-Djava.class.path=" + jar_path, *jvm_args, **kwargs) class Bridge: diff --git a/java/maven/pom.xml b/java/maven/pom.xml index c804ce82de79e..2e80128de7e84 100644 --- a/java/maven/pom.xml +++ b/java/maven/pom.xml @@ -43,6 +43,7 @@ under the License. true + 21 1.8 1.8 8 diff --git a/java/pom.xml b/java/pom.xml index 9227f9f2a81b9..200b3ded139cb 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -111,6 +111,7 @@ under the License. --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED + 21 1.8 1.8 8