Skip to content

GH-32566: [C++] Connect parquet to the new scan node #38705

GH-32566: [C++] Connect parquet to the new scan node

GH-32566: [C++] Connect parquet to the new scan node #38705

Workflow file for this run

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
name: R
on:
push:
paths:
- ".github/workflows/r.yml"
- "ci/scripts/r_*.sh"
- "ci/scripts/cpp_*.sh"
- "ci/scripts/PKGBUILD"
- "ci/etc/rprofile"
- "ci/docker/**"
- "cpp/**"
- "r/**"
pull_request:
paths:
- ".github/workflows/r.yml"
- "ci/scripts/r_*.sh"
- "ci/scripts/cpp_*.sh"
- "ci/scripts/PKGBUILD"
- "ci/etc/rprofile"
- "ci/docker/**"
- "cpp/**"
- "r/**"
concurrency:
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
cancel-in-progress: true
permissions:
contents: read
env:
DOCKER_VOLUME_PREFIX: ".docker/"
jobs:
ubuntu:
name: AMD64 Ubuntu ${{ matrix.ubuntu }} R ${{ matrix.r }} Force-Tests ${{ matrix.force-tests }}
runs-on: ubuntu-latest
if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
timeout-minutes: 75
strategy:
fail-fast: false
matrix:
r: ["4.2"]
ubuntu: [20.04]
force-tests: ["true"]
env:
R: ${{ matrix.r }}
UBUNTU: ${{ matrix.ubuntu }}
steps:
- name: Checkout Arrow
uses: actions/checkout@v3
with:
fetch-depth: 0
submodules: recursive
- name: Cache Docker Volumes
uses: actions/cache@v3
with:
path: .docker
# As this key is identical on both matrix builds only one will be able to successfully cache,
# this is fine as there are no differences in the build
key: ubuntu-${{ matrix.ubuntu }}-r-${{ matrix.r }}-${{ hashFiles('cpp/src/**/*.cc','cpp/src/**/*.h)') }}-${{ github.run_id }}
restore-keys: |
ubuntu-${{ matrix.ubuntu }}-r-${{ matrix.r }}-${{ hashFiles('cpp/src/**/*.cc','cpp/src/**/*.h)') }}-
ubuntu-${{ matrix.ubuntu }}-r-${{ matrix.r }}-
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: 3.8
- name: Setup Archery
run: pip install -e dev/archery[docker]
- name: Execute Docker Build
env:
ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
run: |
sudo sysctl -w kernel.core_pattern="core.%e.%p"
ulimit -c unlimited
# Setting a non-default and non-probable Marquesas French Polynesia time
# it has both with a .45 offset and very very few people who live there.
archery docker run -e TZ=MART -e ARROW_R_FORCE_TESTS=${{ matrix.force-tests }} ubuntu-r
- name: Dump install logs
run: cat r/check/arrow.Rcheck/00install.out
if: always()
- name: Dump test logs
run: cat r/check/arrow.Rcheck/tests/testthat.Rout*
if: always()
- name: Save the test output
if: always()
uses: actions/upload-artifact@v3
with:
name: test-output
path: r/check/arrow.Rcheck/tests/testthat.Rout*
- name: Docker Push
if: >-
success() &&
github.event_name == 'push' &&
github.repository == 'apache/arrow' &&
github.ref_name == 'main'
env:
ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
continue-on-error: true
run: archery docker push ubuntu-r
bundled:
name: "${{ matrix.config.org }}/${{ matrix.config.image }}:${{ matrix.config.tag }}"
runs-on: ubuntu-latest
if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
timeout-minutes: 60
strategy:
fail-fast: false
matrix:
config:
- { org: "rhub", image: "debian-gcc-devel", tag: "latest", devtoolset: "" }
env:
R_ORG: ${{ matrix.config.org }}
R_IMAGE: ${{ matrix.config.image }}
R_TAG: ${{ matrix.config.tag }}
DEVTOOLSET_VERSION: ${{ matrix.config.devtoolset }}
steps:
- name: Checkout Arrow
uses: actions/checkout@v3
with:
fetch-depth: 0
submodules: recursive
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: 3.8
- name: Setup Archery
run: pip install -e dev/archery[docker]
- name: Execute Docker Build
env:
ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
run: |
sudo sysctl -w kernel.core_pattern="core.%e.%p"
ulimit -c unlimited
# Don't set a TZ here to test that case. These builds will have the following warning in them:
# System has not been booted with systemd as init system (PID 1). Can't operate.
# Failed to connect to bus: Host is down
archery docker run -e TZ="" r
- name: Dump install logs
run: cat r/check/arrow.Rcheck/00install.out
if: always()
- name: Dump test logs
run: cat r/check/arrow.Rcheck/tests/testthat.Rout*
if: always()
- name: Save the test output
if: always()
uses: actions/upload-artifact@v3
with:
name: test-output
path: r/check/arrow.Rcheck/tests/testthat.Rout*
- name: Docker Push
if: >-
success() &&
github.event_name == 'push' &&
github.repository == 'apache/arrow' &&
github.ref_name == 'main'
env:
ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
continue-on-error: true
run: archery docker push r
windows-cpp:
name: AMD64 Windows C++ RTools ${{ matrix.config.rtools }} ${{ matrix.config.arch }}
runs-on: windows-2019
if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
timeout-minutes: 90
strategy:
fail-fast: false
matrix:
config:
- { rtools: 40, arch: 'ucrt64' }
steps:
- run: git config --global core.autocrlf false
- name: Checkout Arrow
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Setup ccache
shell: bash
run: |
ci/scripts/ccache_setup.sh
echo "CCACHE_DIR=$(cygpath --absolute --windows ccache)" >> $GITHUB_ENV
- name: Cache ccache
uses: actions/cache@v3
with:
path: ccache
key: r-${{ matrix.config.rtools }}-ccache-mingw-${{ matrix.config.arch }}-${{ hashFiles('cpp/src/**/*.cc','cpp/src/**/*.h)') }}-${{ github.run_id }}
restore-keys: |
r-${{ matrix.config.rtools }}-ccache-mingw-${{ matrix.config.arch }}-${{ hashFiles('cpp/src/**/*.cc','cpp/src/**/*.h)') }}-
r-${{ matrix.config.rtools }}-ccache-mingw-${{ matrix.config.arch }}-
- uses: r-lib/actions/setup-r@v2
with:
r-version: "4.1"
rtools-version: 40
Ncpus: 2
- name: Build Arrow C++
shell: bash
env:
MINGW_ARCH: ${{ matrix.config.arch }}
run: ci/scripts/r_windows_build.sh
- name: Rename libarrow.zip
# So that they're unique when multiple are downloaded in the next step
shell: bash
run: mv libarrow.zip libarrow-rtools${{ matrix.config.rtools }}-${{ matrix.config.arch }}.zip
- uses: actions/upload-artifact@v3
with:
name: libarrow-rtools${{ matrix.config.rtools }}-${{ matrix.config.arch }}.zip
path: libarrow-rtools${{ matrix.config.rtools }}-${{ matrix.config.arch }}.zip
windows-r:
needs: [windows-cpp]
name: AMD64 Windows R ${{ matrix.config.rversion }} RTools ${{ matrix.config.rtools }}
runs-on: windows-2019
if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
timeout-minutes: 75
strategy:
fail-fast: false
matrix:
config:
- { rtools: 42, rversion: "4.2" }
- { rtools: 42, rversion: "devel" }
env:
ARROW_R_CXXFLAGS: "-Werror"
_R_CHECK_TESTS_NLINES_: 0
steps:
- run: git config --global core.autocrlf false
- name: Checkout Arrow
uses: actions/checkout@v3
with:
fetch-depth: 0
- run: mkdir r/windows
- name: Download artifacts
if: ${{ matrix.config.rtools == 42 }}
uses: actions/download-artifact@v3
with:
name: libarrow-rtools40-ucrt64.zip
path: r/windows
- name: Unzip and rezip libarrows
shell: bash
run: |
cd r/windows
ls *.zip | xargs -n 1 unzip -uo
rm -rf *.zip
- uses: r-lib/actions/setup-r@v2
with:
r-version: ${{ matrix.config.rversion }}
rtools-version: ${{ matrix.config.rtools }}
Ncpus: 2
- uses: r-lib/actions/setup-r-dependencies@v2
env:
GITHUB_PAT: "${{ github.token }}"
with:
# For some arcane reason caching does not work on the windows runners
# most likely due to https://github.com/actions/cache/issues/815
cache: false
working-directory: 'r'
extra-packages: |
any::rcmdcheck
- name: Install MinIO
shell: bash
run: |
mkdir -p "$HOME/.local/bin"
curl \
--output "$HOME/.local/bin/minio.exe" \
https://dl.min.io/server/minio/release/windows-amd64/archive/minio.RELEASE.2022-05-26T05-48-41Z
chmod +x "$HOME/.local/bin/minio.exe"
echo "$HOME/.local/bin" >> $GITHUB_PATH
# TODO(ARROW-17149): figure out why the GCS tests are hanging on Windows
# - name: Install Google Cloud Storage Testbench
# shell: bash
# run: ci/scripts/install_gcs_testbench.sh default
- name: Check
shell: Rscript {0}
run: |
# Because we do R CMD build and r/windows is in .Rbuildignore,
# assemble the libarrow.zip file and pass it as an env var
setwd("r/windows")
zip("libarrow.zip", ".")
setwd("..")
Sys.setenv(
RWINLIB_LOCAL = file.path(Sys.getenv("GITHUB_WORKSPACE"), "r", "windows", "libarrow.zip"),
MAKEFLAGS = paste0("-j", parallel::detectCores()),
ARROW_R_DEV = TRUE,
"_R_CHECK_FORCE_SUGGESTS_" = FALSE
)
rcmdcheck::rcmdcheck(".",
build_args = '--no-build-vignettes',
args = c('--no-manual', '--as-cran', '--ignore-vignettes', '--run-donttest'),
error_on = 'warning',
check_dir = 'check',
timeout = 3600
)
- name: Run lintr
if: ${{ matrix.config.rversion == '4.2' }}
env:
NOT_CRAN: "true"
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
shell: Rscript {0}
working-directory: r
run: |
Sys.setenv(
RWINLIB_LOCAL = file.path(Sys.getenv("GITHUB_WORKSPACE"), "r", "windows", "libarrow.zip"),
MAKEFLAGS = paste0("-j", parallel::detectCores()),
ARROW_R_DEV = TRUE,
"_R_CHECK_FORCE_SUGGESTS_" = FALSE
)
# we use pak for package installation since it is faster, safer and more convenient
pak::local_install()
pak::pak("lintr")
lintr::expect_lint_free()
- name: Dump install logs
shell: cmd
run: cat r/check/arrow.Rcheck/00install.out
if: always()
- name: Dump test logs
shell: bash
run: find r/check -name 'testthat.Rout*' -exec cat '{}' \; || true
if: always()