Skip to content

Commit

Permalink
Merge branch 'apache:master' into SPARK-44910
Browse files Browse the repository at this point in the history
  • Loading branch information
gbloisi-openaire authored Sep 16, 2023
2 parents bb1b17d + 804f741 commit f7eef65
Show file tree
Hide file tree
Showing 751 changed files with 26,593 additions and 9,939 deletions.
1 change: 1 addition & 0 deletions .asf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,4 @@ notifications:
pullrequests: [email protected]
issues: [email protected]
commits: [email protected]
jira_options: link label
4 changes: 1 addition & 3 deletions .github/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,11 @@ INFRA:
- ".asf.yaml"
- ".gitattributes"
- ".gitignore"
- "dev/github_jira_sync.py"
- "dev/merge_spark_pr.py"
- "dev/run-tests-jenkins*"
BUILD:
# Can be supported when a stable release with correct all/any is released
#- any: ['dev/**/*', '!dev/github_jira_sync.py', '!dev/merge_spark_pr.py', '!dev/.rat-excludes']
#- any: ['dev/**/*', '!dev/merge_spark_pr.py', '!dev/.rat-excludes']
- "dev/**/*"
- "build/**/*"
- "project/**/*"
Expand All @@ -58,7 +57,6 @@ BUILD:
- "scalastyle-config.xml"
# These can be added in the above `any` clause (and the /dev/**/* glob removed) when
# `any`/`all` support is released
# - "!dev/github_jira_sync.py"
# - "!dev/merge_spark_pr.py"
# - "!dev/run-tests-jenkins*"
# - "!dev/.rat-excludes"
Expand Down
44 changes: 28 additions & 16 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ jobs:
sparkr=`./dev/is-changed.py -m sparkr`
tpcds=`./dev/is-changed.py -m sql`
docker=`./dev/is-changed.py -m docker-integration-tests`
# 'build', 'scala-213', and 'java-11-17' are always true for now.
# 'build', 'scala-213', and 'java-other-versions' are always true for now.
# It does not save significant time and most of PRs trigger the build.
precondition="
{
Expand All @@ -96,7 +96,7 @@ jobs:
\"tpcds-1g\": \"$tpcds\",
\"docker-integration-tests\": \"$docker\",
\"scala-213\": \"true\",
\"java-11-17\": \"true\",
\"java-other-versions\": \"true\",
\"lint\" : \"true\",
\"k8s-integration-tests\" : \"true\",
\"breaking-changes-buf\" : \"true\",
Expand Down Expand Up @@ -127,6 +127,7 @@ jobs:
needs: precondition
if: fromJson(needs.precondition.outputs.required).build == 'true'
runs-on: ubuntu-22.04
timeout-minutes: 300
strategy:
fail-fast: false
matrix:
Expand Down Expand Up @@ -255,14 +256,14 @@ jobs:
# We should install one Python that is higher then 3+ for SQL and Yarn because:
# - SQL component also has Python related tests, for example, IntegratedUDFTestUtils.
# - Yarn has a Python specific test too, for example, YarnClusterSuite.
if: contains(matrix.modules, 'yarn') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-'))
if: contains(matrix.modules, 'yarn') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) || contains(matrix.modules, 'connect')
with:
python-version: 3.8
architecture: x64
- name: Install Python packages (Python 3.8)
if: (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-'))
if: (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) || contains(matrix.modules, 'connect')
run: |
python3.8 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy unittest-xml-reporting 'grpcio==1.56.0' 'protobuf==3.20.3'
python3.8 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy unittest-xml-reporting 'grpcio>=1.48,<1.57' 'grpcio-status>=1.48,<1.57' 'protobuf==3.20.3'
python3.8 -m pip list
# Run the tests.
- name: Run tests
Expand All @@ -282,7 +283,7 @@ jobs:
name: test-results-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }}
path: "**/target/test-reports/*.xml"
- name: Upload unit tests log files
if: failure()
if: ${{ !success() }}
uses: actions/upload-artifact@v3
with:
name: unit-tests-log-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }}
Expand Down Expand Up @@ -342,6 +343,7 @@ jobs:
if: (!cancelled()) && fromJson(needs.precondition.outputs.required).pyspark == 'true'
name: "Build modules: ${{ matrix.modules }}"
runs-on: ubuntu-22.04
timeout-minutes: 300
container:
image: ${{ needs.precondition.outputs.image_url }}
strategy:
Expand All @@ -365,9 +367,11 @@ jobs:
- >-
pyspark-connect
- >-
pyspark-pandas-connect
pyspark-pandas-connect-part0
- >-
pyspark-pandas-slow-connect
pyspark-pandas-connect-part1
- >-
pyspark-pandas-connect-part2
env:
MODULES_TO_TEST: ${{ matrix.modules }}
HADOOP_PROFILE: ${{ inputs.hadoop }}
Expand Down Expand Up @@ -421,7 +425,7 @@ jobs:
run: |
if [[ "$MODULES_TO_TEST" != *"pyspark-ml"* ]] && [[ "$BRANCH" != "branch-3.5" ]]; then
# uninstall libraries dedicated for ML testing
python3.9 -m pip uninstall -y torch torchvision torcheval torchtnt tensorboard mlflow
python3.9 -m pip uninstall -y torch torchvision torcheval torchtnt tensorboard mlflow deepspeed
fi
if [ -f ./dev/free_disk_space_container ]; then
./dev/free_disk_space_container
Expand Down Expand Up @@ -466,7 +470,7 @@ jobs:
name: test-results-${{ matrix.modules }}--8-${{ inputs.hadoop }}-hive2.3
path: "**/target/test-reports/*.xml"
- name: Upload unit tests log files
if: failure()
if: ${{ !success() }}
uses: actions/upload-artifact@v3
with:
name: unit-tests-log-${{ matrix.modules }}--8-${{ inputs.hadoop }}-hive2.3
Expand All @@ -478,6 +482,7 @@ jobs:
if: (!cancelled()) && fromJson(needs.precondition.outputs.required).sparkr == 'true'
name: "Build modules: sparkr"
runs-on: ubuntu-22.04
timeout-minutes: 300
container:
image: ${{ needs.precondition.outputs.image_url }}
env:
Expand Down Expand Up @@ -586,6 +591,7 @@ jobs:
if: (!cancelled()) && fromJson(needs.precondition.outputs.required).lint == 'true'
name: Linters, licenses, dependencies and documentation generation
runs-on: ubuntu-22.04
timeout-minutes: 300
env:
LC_ALL: C.UTF-8
LANG: C.UTF-8
Expand Down Expand Up @@ -687,7 +693,7 @@ jobs:
if: inputs.branch != 'branch-3.3' && inputs.branch != 'branch-3.4'
run: |
# See more in "Installation" https://docs.buf.build/installation#tarball
curl -LO https://github.com/bufbuild/buf/releases/download/v1.24.0/buf-Linux-x86_64.tar.gz
curl -LO https://github.com/bufbuild/buf/releases/download/v1.26.1/buf-Linux-x86_64.tar.gz
mkdir -p $HOME/buf
tar -xvzf buf-Linux-x86_64.tar.gz -C $HOME/buf --strip-components 1
rm buf-Linux-x86_64.tar.gz
Expand Down Expand Up @@ -755,17 +761,19 @@ jobs:
path: site.tar.bz2
retention-days: 1

java-11-17:
java-other-versions:
needs: precondition
if: fromJson(needs.precondition.outputs.required).java-11-17 == 'true'
if: fromJson(needs.precondition.outputs.required).java-other-versions == 'true'
name: Java ${{ matrix.java }} build with Maven
strategy:
fail-fast: false
matrix:
java:
- 11
- 17
- 21-ea
runs-on: ubuntu-22.04
timeout-minutes: 300
steps:
- name: Checkout Spark repository
uses: actions/checkout@v3
Expand Down Expand Up @@ -816,6 +824,7 @@ jobs:
if: fromJson(needs.precondition.outputs.required).scala-213 == 'true'
name: Scala 2.13 build with SBT
runs-on: ubuntu-22.04
timeout-minutes: 300
steps:
- name: Checkout Spark repository
uses: actions/checkout@v3
Expand Down Expand Up @@ -864,6 +873,7 @@ jobs:
name: Run TPC-DS queries with SF=1
# Pin to 'Ubuntu 20.04' due to 'databricks/tpcds-kit' compilation
runs-on: ubuntu-20.04
timeout-minutes: 300
env:
SPARK_LOCAL_IP: localhost
steps:
Expand Down Expand Up @@ -951,7 +961,7 @@ jobs:
name: test-results-tpcds--8-${{ inputs.hadoop }}-hive2.3
path: "**/target/test-reports/*.xml"
- name: Upload unit tests log files
if: failure()
if: ${{ !success() }}
uses: actions/upload-artifact@v3
with:
name: unit-tests-log-tpcds--8-${{ inputs.hadoop }}-hive2.3
Expand All @@ -962,6 +972,7 @@ jobs:
if: fromJson(needs.precondition.outputs.required).docker-integration-tests == 'true'
name: Run Docker integration tests
runs-on: ubuntu-22.04
timeout-minutes: 300
env:
HADOOP_PROFILE: ${{ inputs.hadoop }}
HIVE_PROFILE: hive2.3
Expand Down Expand Up @@ -1017,7 +1028,7 @@ jobs:
name: test-results-docker-integration--8-${{ inputs.hadoop }}-hive2.3
path: "**/target/test-reports/*.xml"
- name: Upload unit tests log files
if: failure()
if: ${{ !success() }}
uses: actions/upload-artifact@v3
with:
name: unit-tests-log-docker-integration--8-${{ inputs.hadoop }}-hive2.3
Expand All @@ -1028,6 +1039,7 @@ jobs:
if: fromJson(needs.precondition.outputs.required).k8s-integration-tests == 'true'
name: Run Spark on Kubernetes Integration test
runs-on: ubuntu-22.04
timeout-minutes: 300
steps:
- name: Checkout Spark repository
uses: actions/checkout@v3
Expand Down Expand Up @@ -1091,7 +1103,7 @@ jobs:
eval $(minikube docker-env)
build/sbt -Psparkr -Pkubernetes -Pvolcano -Pkubernetes-integration-tests -Dspark.kubernetes.test.driverRequestCores=0.5 -Dspark.kubernetes.test.executorRequestCores=0.2 -Dspark.kubernetes.test.volcanoMaxConcurrencyJobNum=1 -Dtest.exclude.tags=local "kubernetes-integration-tests/test"
- name: Upload Spark on K8S integration tests log files
if: failure()
if: ${{ !success() }}
uses: actions/upload-artifact@v3
with:
name: spark-on-kubernetes-it-log
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build_java21.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ jobs:
jobs: >-
{
"build": "true",
"pyspark": "false",
"pyspark": "true",
"sparkr": "true",
"tpcds-1g": "true",
"docker-integration-tests": "true"
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ python/.eggs/
python/coverage.xml
python/deps
python/docs/_site/
python/docs/source/development/errors.rst
python/docs/source/reference/**/api/
python/docs/source/user_guide/pandas_on_spark/supported_pandas_api.rst
python/test_coverage/coverage_data
Expand Down
6 changes: 5 additions & 1 deletion LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,6 @@ BSD 3-Clause
python/lib/py4j-*-src.zip
python/pyspark/cloudpickle/*.py
python/pyspark/join.py
core/src/main/resources/org/apache/spark/ui/static/d3.min.js

The CSS style for the navigation sidebar of the documentation was originally
submitted by Óscar Nájera for the scikit-learn project. The scikit-learn project
Expand All @@ -248,6 +247,11 @@ docs/js/vendor/anchor.min.js
docs/js/vendor/jquery*
docs/js/vendor/modernizer*

ISC License
-----------

core/src/main/resources/org/apache/spark/ui/static/d3.min.js


Creative Commons CC0 1.0 Universal Public Domain Dedication
-----------------------------------------------------------
Expand Down
6 changes: 5 additions & 1 deletion LICENSE-binary
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,6 @@ org.jdom:jdom2
python/lib/py4j-*-src.zip
python/pyspark/cloudpickle.py
python/pyspark/join.py
core/src/main/resources/org/apache/spark/ui/static/d3.min.js

The CSS style for the navigation sidebar of the documentation was originally
submitted by Óscar Nájera for the scikit-learn project. The scikit-learn project
Expand Down Expand Up @@ -498,6 +497,11 @@ docs/js/vendor/anchor.min.js
docs/js/vendor/jquery*
docs/js/vendor/modernizer*

ISC License
-----------

core/src/main/resources/org/apache/spark/ui/static/d3.min.js


Common Development and Distribution License (CDDL) 1.0
------------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion R/pkg/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ Authors@R:
License: Apache License (== 2.0)
URL: https://www.apache.org https://spark.apache.org
BugReports: https://spark.apache.org/contributing.html
SystemRequirements: Java (>= 8, < 18)
SystemRequirements: Java (>= 8, < 22)
Depends:
R (>= 3.5),
methods
Expand Down
2 changes: 1 addition & 1 deletion R/pkg/tests/fulltests/test_sparkSQL.R
Original file line number Diff line number Diff line change
Expand Up @@ -4199,7 +4199,7 @@ test_that("catalog APIs, listTables, getTable, listColumns, listFunctions, funct

# recoverPartitions does not work with temporary view
expect_error(recoverPartitions("cars"),
"[UNSUPPORTED_TEMP_VIEW_OPERATION.WITH_SUGGESTION]*`cars`*")
"[UNSUPPORTED_VIEW_OPERATION.WITH_SUGGESTION]*`cars`*")
expect_error(refreshTable("cars"), NA)
expect_error(refreshByPath("/"), NA)

Expand Down
Loading

0 comments on commit f7eef65

Please sign in to comment.