diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
deleted file mode 100644
index 517cef80b7624..0000000000000
--- a/.github/workflows/build_and_test.yml
+++ /dev/null
@@ -1,715 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-name: Build and test
-
-on:
- push:
- branches:
- - '**'
-
-jobs:
- # Build: build Spark and run the tests for specified modules.
- build:
- name: "Build modules: ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ matrix.java }}, ${{ matrix.hadoop }}, ${{ matrix.hive }})"
- # Ubuntu 20.04 is the latest LTS. The next LTS is 22.04.
- runs-on: ubuntu-20.04
- strategy:
- fail-fast: false
- matrix:
- java:
- - 8
- hadoop:
- - hadoop3.2
- hive:
- - hive2.3
- # TODO(SPARK-32246): We don't test 'streaming-kinesis-asl' for now.
- # Kinesis tests depends on external Amazon kinesis service.
- # Note that the modules below are from sparktestsupport/modules.py.
- modules:
- - >-
- core, unsafe, kvstore, avro,
- network-common, network-shuffle, repl, launcher,
- examples, sketch, graphx
- - >-
- catalyst, hive-thriftserver
- - >-
- streaming, sql-kafka-0-10, streaming-kafka-0-10,
- mllib-local, mllib,
- yarn, mesos, kubernetes, hadoop-cloud, spark-ganglia-lgpl
- # Here, we split Hive and SQL tests into some of slow ones and the rest of them.
- included-tags: [""]
- excluded-tags: [""]
- comment: [""]
- include:
- # Hive tests
- - modules: hive
- java: 8
- hadoop: hadoop3.2
- hive: hive2.3
- included-tags: org.apache.spark.tags.SlowHiveTest
- comment: "- slow tests"
- - modules: hive
- java: 8
- hadoop: hadoop3.2
- hive: hive2.3
- excluded-tags: org.apache.spark.tags.SlowHiveTest
- comment: "- other tests"
- # SQL tests
- - modules: sql
- java: 8
- hadoop: hadoop3.2
- hive: hive2.3
- included-tags: org.apache.spark.tags.ExtendedSQLTest
- comment: "- slow tests"
- - modules: sql
- java: 8
- hadoop: hadoop3.2
- hive: hive2.3
- excluded-tags: org.apache.spark.tags.ExtendedSQLTest
- comment: "- other tests"
- env:
- MODULES_TO_TEST: ${{ matrix.modules }}
- EXCLUDED_TAGS: ${{ matrix.excluded-tags }}
- INCLUDED_TAGS: ${{ matrix.included-tags }}
- HADOOP_PROFILE: ${{ matrix.hadoop }}
- HIVE_PROFILE: ${{ matrix.hive }}
- GITHUB_PREV_SHA: ${{ github.event.before }}
- SPARK_LOCAL_IP: localhost
- steps:
- - name: Checkout Spark repository
- uses: actions/checkout@v2
- # In order to fetch changed files
- with:
- fetch-depth: 0
- repository: apache/spark
- ref: branch-3.2
- - name: Sync the current branch with the latest in Apache Spark
- if: github.repository != 'apache/spark'
- run: |
- echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV
- git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
- git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
- git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit"
- # Cache local repositories. Note that GitHub Actions cache has a 2G limit.
- - name: Cache Scala, SBT and Maven
- uses: actions/cache@v2
- with:
- path: |
- build/apache-maven-*
- build/scala-*
- build/*.jar
- ~/.sbt
- key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
- restore-keys: |
- build-
- - name: Cache Coursier local repository
- uses: actions/cache@v2
- with:
- path: ~/.cache/coursier
- key: ${{ matrix.java }}-${{ matrix.hadoop }}-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
- restore-keys: |
- ${{ matrix.java }}-${{ matrix.hadoop }}-coursier-
- - name: Install Java ${{ matrix.java }}
- uses: actions/setup-java@v1
- with:
- java-version: ${{ matrix.java }}
- - name: Install Python 3.8
- uses: actions/setup-python@v2
- # We should install one Python that is higher then 3+ for SQL and Yarn because:
- # - SQL component also has Python related tests, for example, IntegratedUDFTestUtils.
- # - Yarn has a Python specific test too, for example, YarnClusterSuite.
- if: contains(matrix.modules, 'yarn') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-'))
- with:
- python-version: 3.8
- architecture: x64
- - name: Install Python packages (Python 3.8)
- if: (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-'))
- run: |
- python3.8 -m pip install numpy 'pyarrow<5.0.0' pandas scipy xmlrunner
- python3.8 -m pip list
- # Run the tests.
- - name: Run tests
- run: |
- # Hive "other tests" test needs larger metaspace size based on experiment.
- if [[ "$MODULES_TO_TEST" == "hive" ]] && [[ "$EXCLUDED_TAGS" == "org.apache.spark.tags.SlowHiveTest" ]]; then export METASPACE_SIZE=2g; fi
- export SERIAL_SBT_TESTS=1
- ./dev/run-tests --parallelism 1 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS"
- - name: Upload test results to report
- if: always()
- uses: actions/upload-artifact@v2
- with:
- name: test-results-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }}
- path: "**/target/test-reports/*.xml"
- - name: Upload unit tests log files
- if: failure()
- uses: actions/upload-artifact@v2
- with:
- name: unit-tests-log-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }}
- path: "**/target/unit-tests.log"
-
- pyspark:
- name: "Build modules: ${{ matrix.modules }}"
- runs-on: ubuntu-20.04
- container:
- image: dongjoon/apache-spark-github-action-image:20210730
- strategy:
- fail-fast: false
- matrix:
- modules:
- - >-
- pyspark-sql, pyspark-mllib, pyspark-resource
- - >-
- pyspark-core, pyspark-streaming, pyspark-ml
- - >-
- pyspark-pandas
- - >-
- pyspark-pandas-slow
- env:
- MODULES_TO_TEST: ${{ matrix.modules }}
- HADOOP_PROFILE: hadoop3.2
- HIVE_PROFILE: hive2.3
- GITHUB_PREV_SHA: ${{ github.event.before }}
- SPARK_LOCAL_IP: localhost
- SKIP_UNIDOC: true
- SKIP_MIMA: true
- METASPACE_SIZE: 1g
- steps:
- - name: Checkout Spark repository
- uses: actions/checkout@v2
- # In order to fetch changed files
- with:
- fetch-depth: 0
- repository: apache/spark
- ref: branch-3.2
- - name: Sync the current branch with the latest in Apache Spark
- if: github.repository != 'apache/spark'
- run: |
- echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV
- git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
- git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
- git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit"
- # Cache local repositories. Note that GitHub Actions cache has a 2G limit.
- - name: Cache Scala, SBT and Maven
- uses: actions/cache@v2
- with:
- path: |
- build/apache-maven-*
- build/scala-*
- build/*.jar
- ~/.sbt
- key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
- restore-keys: |
- build-
- - name: Cache Coursier local repository
- uses: actions/cache@v2
- with:
- path: ~/.cache/coursier
- key: pyspark-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
- restore-keys: |
- pyspark-coursier-
- - name: Install Python 3.6
- uses: actions/setup-python@v2
- with:
- python-version: 3.6
- architecture: x64
- # This step takes much less time (~30s) than other Python versions so it is not included
- # in the Docker image being used. There is also a technical issue to install Python 3.6 on
- # Ubuntu 20.04. See also SPARK-33162.
- - name: Install Python packages (Python 3.6)
- run: |
- python3.6 -m pip install numpy 'pyarrow<4.0.0' pandas scipy xmlrunner 'plotly>=4.8'
- python3.6 -m pip list
- - name: List Python packages (Python 3.9)
- run: |
- python3.9 -m pip list
- - name: Install Conda for pip packaging test
- run: |
- curl -s https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh > miniconda.sh
- bash miniconda.sh -b -p $HOME/miniconda
- # Run the tests.
- - name: Run tests
- run: |
- export PATH=$PATH:$HOME/miniconda/bin
- ./dev/run-tests --parallelism 1 --modules "$MODULES_TO_TEST"
- - name: Upload test results to report
- if: always()
- uses: actions/upload-artifact@v2
- with:
- name: test-results-${{ matrix.modules }}--8-hadoop3.2-hive2.3
- path: "**/target/test-reports/*.xml"
- - name: Upload unit tests log files
- if: failure()
- uses: actions/upload-artifact@v2
- with:
- name: unit-tests-log-${{ matrix.modules }}--8-hadoop3.2-hive2.3
- path: "**/target/unit-tests.log"
-
- sparkr:
- name: "Build modules: sparkr"
- runs-on: ubuntu-20.04
- container:
- image: dongjoon/apache-spark-github-action-image:20210602
- env:
- HADOOP_PROFILE: hadoop3.2
- HIVE_PROFILE: hive2.3
- GITHUB_PREV_SHA: ${{ github.event.before }}
- SPARK_LOCAL_IP: localhost
- SKIP_MIMA: true
- steps:
- - name: Checkout Spark repository
- uses: actions/checkout@v2
- # In order to fetch changed files
- with:
- fetch-depth: 0
- repository: apache/spark
- ref: branch-3.2
- - name: Sync the current branch with the latest in Apache Spark
- if: github.repository != 'apache/spark'
- run: |
- echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV
- git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
- git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
- git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit"
- # Cache local repositories. Note that GitHub Actions cache has a 2G limit.
- - name: Cache Scala, SBT and Maven
- uses: actions/cache@v2
- with:
- path: |
- build/apache-maven-*
- build/scala-*
- build/*.jar
- ~/.sbt
- key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
- restore-keys: |
- build-
- - name: Cache Coursier local repository
- uses: actions/cache@v2
- with:
- path: ~/.cache/coursier
- key: sparkr-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
- restore-keys: |
- sparkr-coursier-
- - name: Run tests
- run: |
- # The followings are also used by `r-lib/actions/setup-r` to avoid
- # R issues at docker environment
- export TZ=UTC
- export _R_CHECK_SYSTEM_CLOCK_=FALSE
- ./dev/run-tests --parallelism 1 --modules sparkr
- - name: Upload test results to report
- if: always()
- uses: actions/upload-artifact@v2
- with:
- name: test-results-sparkr--8-hadoop3.2-hive2.3
- path: "**/target/test-reports/*.xml"
-
- # Static analysis, and documentation build
- lint:
- name: Linters, licenses, dependencies and documentation generation
- runs-on: ubuntu-20.04
- env:
- LC_ALL: C.UTF-8
- LANG: C.UTF-8
- PYSPARK_DRIVER_PYTHON: python3.9
- container:
- image: dongjoon/apache-spark-github-action-image:20210602
- steps:
- - name: Checkout Spark repository
- uses: actions/checkout@v2
- with:
- fetch-depth: 0
- repository: apache/spark
- ref: branch-3.2
- - name: Sync the current branch with the latest in Apache Spark
- if: github.repository != 'apache/spark'
- run: |
- git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
- git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
- git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit"
- # Cache local repositories. Note that GitHub Actions cache has a 2G limit.
- - name: Cache Scala, SBT and Maven
- uses: actions/cache@v2
- with:
- path: |
- build/apache-maven-*
- build/scala-*
- build/*.jar
- ~/.sbt
- key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
- restore-keys: |
- build-
- - name: Cache Coursier local repository
- uses: actions/cache@v2
- with:
- path: ~/.cache/coursier
- key: docs-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
- restore-keys: |
- docs-coursier-
- - name: Cache Maven local repository
- uses: actions/cache@v2
- with:
- path: ~/.m2/repository
- key: docs-maven-${{ hashFiles('**/pom.xml') }}
- restore-keys: |
- docs-maven-
- - name: Install Python linter dependencies
- run: |
- # TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
- # See also https://github.com/sphinx-doc/sphinx/issues/7551.
- # Jinja2 3.0.0+ causes error when building with Sphinx.
- # See also https://issues.apache.org/jira/browse/SPARK-35375.
- python3.9 -m pip install flake8 pydata_sphinx_theme 'mypy==0.910' numpydoc 'jinja2<3.0.0' 'black==21.5b2'
- - name: Install R linter dependencies and SparkR
- run: |
- apt-get install -y libcurl4-openssl-dev libgit2-dev libssl-dev libxml2-dev
- Rscript -e "install.packages(c('devtools'), repos='https://cloud.r-project.org/')"
- Rscript -e "devtools::install_version('lintr', version='2.0.1', repos='https://cloud.r-project.org')"
- ./R/install-dev.sh
- - name: Instll JavaScript linter dependencies
- run: |
- apt update
- apt-get install -y nodejs npm
- - name: Install dependencies for documentation generation
- run: |
- # pandoc is required to generate PySpark APIs as well in nbsphinx.
- apt-get install -y libcurl4-openssl-dev pandoc
- # TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
- # See also https://github.com/sphinx-doc/sphinx/issues/7551.
- # Jinja2 3.0.0+ causes error when building with Sphinx.
- # See also https://issues.apache.org/jira/browse/SPARK-35375.
- python3.9 -m pip install 'sphinx<3.1.0' mkdocs numpy pydata_sphinx_theme ipython nbsphinx numpydoc 'jinja2<3.0.0'
- python3.9 -m pip install sphinx_plotly_directive 'pyarrow<5.0.0' pandas 'plotly>=4.8'
- apt-get update -y
- apt-get install -y ruby ruby-dev
- Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2'), repos='https://cloud.r-project.org/')"
- gem install bundler
- cd docs
- bundle install
- - name: Scala linter
- run: ./dev/lint-scala
- - name: Java linter
- run: ./dev/lint-java
- - name: Python linter
- run: PYTHON_EXECUTABLE=python3.9 ./dev/lint-python
- - name: R linter
- run: ./dev/lint-r
- - name: JS linter
- run: ./dev/lint-js
- - name: License test
- run: ./dev/check-license
- - name: Dependencies test
- run: ./dev/test-dependencies.sh
- - name: Run documentation build
- run: |
- cd docs
- bundle exec jekyll build
-
- java-11-17:
- name: Java ${{ matrix.java }} build with Maven
- strategy:
- fail-fast: false
- matrix:
- java:
- - 11
- - 17
- runs-on: ubuntu-20.04
- steps:
- - name: Checkout Spark repository
- uses: actions/checkout@v2
- with:
- fetch-depth: 0
- repository: apache/spark
- ref: branch-3.2
- - name: Sync the current branch with the latest in Apache Spark
- if: github.repository != 'apache/spark'
- run: |
- git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
- git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
- git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit"
- - name: Cache Scala, SBT and Maven
- uses: actions/cache@v2
- with:
- path: |
- build/apache-maven-*
- build/scala-*
- build/*.jar
- ~/.sbt
- key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
- restore-keys: |
- build-
- - name: Cache Maven local repository
- uses: actions/cache@v2
- with:
- path: ~/.m2/repository
- key: java${{ matrix.java }}-maven-${{ hashFiles('**/pom.xml') }}
- restore-keys: |
- java${{ matrix.java }}-maven-
- - name: Install Java ${{ matrix.java }}
- uses: actions/setup-java@v1
- with:
- java-version: ${{ matrix.java }}
- - name: Build with Maven
- run: |
- export MAVEN_OPTS="-Xss64m -Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN"
- export MAVEN_CLI_OPTS="--no-transfer-progress"
- export JAVA_VERSION=${{ matrix.java }}
- # It uses Maven's 'install' intentionally, see https://github.com/apache/spark/pull/26414.
- ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=${JAVA_VERSION/-ea} install
- rm -rf ~/.m2/repository/org/apache/spark
-
- scala-213:
- name: Scala 2.13 build with SBT
- runs-on: ubuntu-20.04
- steps:
- - name: Checkout Spark repository
- uses: actions/checkout@v2
- with:
- fetch-depth: 0
- repository: apache/spark
- ref: branch-3.2
- - name: Sync the current branch with the latest in Apache Spark
- if: github.repository != 'apache/spark'
- run: |
- git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
- git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
- git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit"
- - name: Cache Scala, SBT and Maven
- uses: actions/cache@v2
- with:
- path: |
- build/apache-maven-*
- build/scala-*
- build/*.jar
- ~/.sbt
- key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
- restore-keys: |
- build-
- - name: Cache Coursier local repository
- uses: actions/cache@v2
- with:
- path: ~/.cache/coursier
- key: scala-213-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
- restore-keys: |
- scala-213-coursier-
- - name: Install Java 8
- uses: actions/setup-java@v1
- with:
- java-version: 8
- - name: Build with SBT
- run: |
- ./dev/change-scala-version.sh 2.13
- ./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Pdocker-integration-tests -Pkubernetes-integration-tests -Pspark-ganglia-lgpl -Pscala-2.13 compile test:compile
-
- hadoop-2:
- name: Hadoop 2 build with SBT
- runs-on: ubuntu-20.04
- steps:
- - name: Checkout Spark repository
- uses: actions/checkout@v2
- with:
- fetch-depth: 0
- repository: apache/spark
- ref: branch-3.2
- - name: Sync the current branch with the latest in Apache Spark
- if: github.repository != 'apache/spark'
- run: |
- git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
- git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
- git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit"
- - name: Cache Scala, SBT and Maven
- uses: actions/cache@v2
- with:
- path: |
- build/apache-maven-*
- build/scala-*
- build/*.jar
- ~/.sbt
- key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
- restore-keys: |
- build-
- - name: Cache Coursier local repository
- uses: actions/cache@v2
- with:
- path: ~/.cache/coursier
- key: hadoop-2-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
- restore-keys: |
- hadoop-2-coursier-
- - name: Install Java 8
- uses: actions/setup-java@v1
- with:
- java-version: 8
- - name: Build with SBT
- run: |
- ./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Phadoop-2.7 compile test:compile
-
- tpcds-1g:
- name: Run TPC-DS queries with SF=1
- runs-on: ubuntu-20.04
- env:
- SPARK_LOCAL_IP: localhost
- steps:
- - name: Checkout Spark repository
- uses: actions/checkout@v2
- with:
- fetch-depth: 0
- repository: apache/spark
- ref: branch-3.2
- - name: Sync the current branch with the latest in Apache Spark
- if: github.repository != 'apache/spark'
- run: |
- git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
- git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
- git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit"
- - name: Cache Scala, SBT and Maven
- uses: actions/cache@v2
- with:
- path: |
- build/apache-maven-*
- build/scala-*
- build/*.jar
- ~/.sbt
- key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
- restore-keys: |
- build-
- - name: Cache Coursier local repository
- uses: actions/cache@v2
- with:
- path: ~/.cache/coursier
- key: tpcds-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
- restore-keys: |
- tpcds-coursier-
- - name: Install Java 8
- uses: actions/setup-java@v1
- with:
- java-version: 8
- - name: Cache TPC-DS generated data
- id: cache-tpcds-sf-1
- uses: actions/cache@v2
- with:
- path: ./tpcds-sf-1
- key: tpcds-${{ hashFiles('.github/workflows/build_and_test.yml', 'sql/core/src/test/scala/org/apache/spark/sql/TPCDSSchema.scala') }}
- - name: Checkout tpcds-kit repository
- if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
- uses: actions/checkout@v2
- with:
- repository: databricks/tpcds-kit
- ref: 2a5078a782192ddb6efbcead8de9973d6ab4f069
- path: ./tpcds-kit
- - name: Build tpcds-kit
- if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
- run: cd tpcds-kit/tools && make OS=LINUX
- - name: Generate TPC-DS (SF=1) table data
- if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
- run: build/sbt "sql/test:runMain org.apache.spark.sql.GenTPCDSData --dsdgenDir `pwd`/tpcds-kit/tools --location `pwd`/tpcds-sf-1 --scaleFactor 1 --numPartitions 1 --overwrite"
- - name: Run TPC-DS queries
- run: |
- SPARK_TPCDS_DATA=`pwd`/tpcds-sf-1 build/sbt "sql/testOnly org.apache.spark.sql.TPCDSQueryTestSuite"
- - name: Upload test results to report
- if: always()
- uses: actions/upload-artifact@v2
- with:
- name: test-results-tpcds--8-hadoop3.2-hive2.3
- path: "**/target/test-reports/*.xml"
- - name: Upload unit tests log files
- if: failure()
- uses: actions/upload-artifact@v2
- with:
- name: unit-tests-log-tpcds--8-hadoop3.2-hive2.3
- path: "**/target/unit-tests.log"
-
- docker-integration-tests:
- name: Run docker integration tests
- runs-on: ubuntu-20.04
- env:
- HADOOP_PROFILE: hadoop3.2
- HIVE_PROFILE: hive2.3
- GITHUB_PREV_SHA: ${{ github.event.before }}
- SPARK_LOCAL_IP: localhost
- ORACLE_DOCKER_IMAGE_NAME: oracle/database:18.4.0-xe
- SKIP_MIMA: true
- steps:
- - name: Checkout Spark repository
- uses: actions/checkout@v2
- with:
- fetch-depth: 0
- repository: apache/spark
- ref: branch-3.2
- - name: Sync the current branch with the latest in Apache Spark
- if: github.repository != 'apache/spark'
- run: |
- echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV
- git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
- git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
- git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit"
- - name: Cache Scala, SBT and Maven
- uses: actions/cache@v2
- with:
- path: |
- build/apache-maven-*
- build/scala-*
- build/*.jar
- ~/.sbt
- key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
- restore-keys: |
- build-
- - name: Cache Coursier local repository
- uses: actions/cache@v2
- with:
- path: ~/.cache/coursier
- key: docker-integration-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
- restore-keys: |
- docker-integration-coursier-
- - name: Install Java 8
- uses: actions/setup-java@v1
- with:
- java-version: 8
- - name: Cache Oracle docker-images repository
- id: cache-oracle-docker-images
- uses: actions/cache@v2
- with:
- path: ./oracle/docker-images
- # key should contains the commit hash of the Oracle docker images to be checkout.
- key: oracle-docker-images-3f422c4a35b423dfcdbcc57a84f01db6c82eb6c1
- - name: Checkout Oracle docker-images repository
- uses: actions/checkout@v2
- with:
- fetch-depth: 0
- repository: oracle/docker-images
- ref: 3f422c4a35b423dfcdbcc57a84f01db6c82eb6c1
- path: ./oracle/docker-images
- - name: Install Oracle Docker image
- run: |
- cd oracle/docker-images/OracleDatabase/SingleInstance/dockerfiles
- ./buildContainerImage.sh -v 18.4.0 -x
- - name: Run tests
- run: |
- ./dev/run-tests --parallelism 1 --modules docker-integration-tests --included-tags org.apache.spark.tags.DockerTest
- - name: Upload test results to report
- if: always()
- uses: actions/upload-artifact@v2
- with:
- name: test-results-docker-integration--8-hadoop3.2-hive2.3
- path: "**/target/test-reports/*.xml"
- - name: Upload unit tests log files
- if: failure()
- uses: actions/upload-artifact@v2
- with:
- name: unit-tests-log-docker-integration--8-hadoop3.2-hive2.3
- path: "**/target/unit-tests.log"
diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml
deleted file mode 100644
index 98855f4668b45..0000000000000
--- a/.github/workflows/labeler.yml
+++ /dev/null
@@ -1,50 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-# Intentionally has a general name.
-# because the test status check created in GitHub Actions
-# currently randomly picks any associated workflow.
-# So, the name was changed to make sense in that context too.
-# See also https://github.community/t/specify-check-suite-when-creating-a-checkrun/118380/10
-
-name: "On pull requests"
-on: pull_request_target
-
-jobs:
- label:
- name: Label pull requests
- runs-on: ubuntu-latest
- steps:
- # In order to get back the negated matches like in the old config,
- # we need the actinons/labeler concept of `all` and `any` which matches
- # all of the given constraints / glob patterns for either `all`
- # files or `any` file in the change set.
- #
- # Github issue which requests a timeline for a release with any/all support:
- # - https://github.com/actions/labeler/issues/111
- # This issue also references the issue that mentioned that any/all are only
- # supported on main branch (previously called master):
- # - https://github.com/actions/labeler/issues/73#issuecomment-639034278
- #
- # However, these are not in a published release and the current `main` branch
- # has some issues upon testing.
- - uses: actions/labeler@2.2.0
- with:
- repo-token: "${{ secrets.GITHUB_TOKEN }}"
- sync-labels: true
diff --git a/.github/workflows/publish_snapshot.yml b/.github/workflows/publish_snapshot.yml
deleted file mode 100644
index ae71491efd205..0000000000000
--- a/.github/workflows/publish_snapshot.yml
+++ /dev/null
@@ -1,59 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-name: Publish Snapshot
-
-on:
- schedule:
- - cron: '0 0 * * *'
-
-jobs:
- publish-snapshot:
- if: github.repository == 'apache/spark'
- runs-on: ubuntu-latest
- strategy:
- fail-fast: false
- matrix:
- branch:
- - master
- - branch-3.1
- steps:
- - name: Checkout Spark repository
- uses: actions/checkout@master
- with:
- ref: ${{ matrix.branch }}
- - name: Cache Maven local repository
- uses: actions/cache@v2
- with:
- path: ~/.m2/repository
- key: snapshot-maven-${{ hashFiles('**/pom.xml') }}
- restore-keys: |
- snapshot-maven-
- - name: Install Java 8
- uses: actions/setup-java@v1
- with:
- java-version: 8
- - name: Publish snapshot
- env:
- ASF_USERNAME: ${{ secrets.NEXUS_USER }}
- ASF_PASSWORD: ${{ secrets.NEXUS_PW }}
- GPG_KEY: "not_used"
- GPG_PASSPHRASE: "not_used"
- GIT_REF: ${{ matrix.branch }}
- run: ./dev/create-release/release-build.sh publish-snapshot
diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
deleted file mode 100644
index f26100db5612e..0000000000000
--- a/.github/workflows/stale.yml
+++ /dev/null
@@ -1,43 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-name: Close stale PRs
-
-on:
- schedule:
- - cron: "0 0 * * *"
-
-jobs:
- stale:
- runs-on: ubuntu-latest
- steps:
- - uses: actions/stale@v1.1.0
- with:
- repo-token: ${{ secrets.GITHUB_TOKEN }}
- stale-pr-message: >
- We're closing this PR because it hasn't been updated in a while.
- This isn't a judgement on the merit of the PR in any way. It's just
- a way of keeping the PR queue manageable.
-
- If you'd like to revive this PR, please reopen it and ask a
- committer to remove the Stale tag!
- days-before-stale: 100
- # Setting this to 0 is the same as setting it to 1.
- # See: https://github.com/actions/stale/issues/28
- days-before-close: 0
diff --git a/.github/workflows/test_report.yml b/.github/workflows/test_report.yml
deleted file mode 100644
index 327708993d679..0000000000000
--- a/.github/workflows/test_report.yml
+++ /dev/null
@@ -1,44 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-name: Report test results
-on:
- workflow_run:
- workflows: ["Build and test"]
- types:
- - completed
-
-jobs:
- test_report:
- runs-on: ubuntu-latest
- steps:
- - name: Download test results to report
- uses: dawidd6/action-download-artifact@v2
- with:
- github_token: ${{ secrets.GITHUB_TOKEN }}
- workflow: ${{ github.event.workflow_run.workflow_id }}
- commit: ${{ github.event.workflow_run.head_commit.id }}
- workflow_conclusion: completed
- - name: Publish test report
- uses: scacap/action-surefire-report@v1
- with:
- check_name: Report test results
- github_token: ${{ secrets.GITHUB_TOKEN }}
- report_paths: "**/target/test-reports/*.xml"
- commit: ${{ github.event.workflow_run.head_commit.id }}
diff --git a/assembly/pom.xml b/assembly/pom.xml
index a85201168af6a..aab79c293fc3f 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.12
- 3.2.1
+ 3.2.1.0
../pom.xml
@@ -137,7 +137,7 @@
${project.version}
- org.apache.hadoop
+ ${hadoop.group}
hadoop-yarn-server-web-proxy
diff --git a/assembly/src/main/assembly/assembly.xml b/assembly/src/main/assembly/assembly.xml
index 009d4b92f406c..7db5b432aaf50 100644
--- a/assembly/src/main/assembly/assembly.xml
+++ b/assembly/src/main/assembly/assembly.xml
@@ -83,6 +83,8 @@
false
org.apache.hadoop:*:jar
+ io.hops:*:jar
+ io.hops.metadata:*:jar
org.apache.spark:*:jar
org.apache.zookeeper:*:jar
org.apache.avro:*:jar
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index 11cf0cb9fabed..b17610f71d525 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
org.apache.spark
spark-parent_2.12
- 3.2.1
+ 3.2.1.0
../../pom.xml
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 9957a778733ce..4abe4a2b56ee8 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
org.apache.spark
spark-parent_2.12
- 3.2.1
+ 3.2.1.0
../../pom.xml
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index b3ea2877d8ced..b88baa516d03b 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
org.apache.spark
spark-parent_2.12
- 3.2.1
+ 3.2.1.0
../../pom.xml
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 8fb7d4eeb6a14..28f2782c78ab8 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
org.apache.spark
spark-parent_2.12
- 3.2.1
+ 3.2.1.0
../../pom.xml
@@ -64,14 +64,12 @@
- org.apache.hadoop
+ ${hadoop.group}
${hadoop-client-api.artifact}
- ${hadoop.version}
- org.apache.hadoop
+ ${hadoop.group}
${hadoop-client-runtime.artifact}
- ${hadoop.version}
com.google.guava
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 7e4c6c3607476..f3a4af3a91376 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
org.apache.spark
spark-parent_2.12
- 3.2.1
+ 3.2.1.0
../../pom.xml
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index bdf992c58cea2..06b6f091d58d2 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
org.apache.spark
spark-parent_2.12
- 3.2.1
+ 3.2.1.0
../../pom.xml
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index e2db52ba0de60..8fbc4316d5a12 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
org.apache.spark
spark-parent_2.12
- 3.2.1
+ 3.2.1.0
../../pom.xml
diff --git a/core/pom.xml b/core/pom.xml
index 592fc99efac70..e7cb443866a1e 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.12
- 3.2.1
+ 3.2.1.0
../pom.xml
@@ -66,14 +66,14 @@
xbean-asm9-shaded
- org.apache.hadoop
+ ${hadoop.group}
${hadoop-client-api.artifact}
- ${hadoop.version}
+ ${hadoop.deps.scope}
- org.apache.hadoop
+ ${hadoop.group}
${hadoop-client-runtime.artifact}
- ${hadoop.version}
+ ${hadoop.deps.scope}
org.apache.spark
@@ -415,7 +415,7 @@
test
- org.apache.hadoop
+ ${hadoop.group}
hadoop-minikdc
test
@@ -467,13 +467,13 @@
- org.apache.hadoop
+ ${hadoop.group}
hadoop-aws
${hadoop.version}
test
- org.apache.hadoop
+ ${hadoop.group}
hadoop-common
diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh
index 571059be6fd0e..c2dfa47a3757d 100755
--- a/dev/make-distribution.sh
+++ b/dev/make-distribution.sh
@@ -181,7 +181,13 @@ echo "Spark $VERSION$GITREVSTRING built for Hadoop $SPARK_HADOOP_VERSION" > "$DI
echo "Build flags: $@" >> "$DISTDIR/RELEASE"
# Copy jars
-cp "$SPARK_HOME"/assembly/target/scala*/jars/* "$DISTDIR/jars/"
+# Fabio: copy jars from the spark-assemmbly-*-dist directory which
+# contains the distribution prepared by the maven-assembly-plugin
+# The maven-assembly-plugin has rules to remove the hadoop/hops dependencies
+# from the final distribution
+# You need to run the -Pbigtop-dist profile for this to work
+cp "$SPARK_HOME"/assembly/target/spark-assembly_"$SCALA_VERSION"-"$VERSION"-dist/lib/* "$DISTDIR/jars/"
+cp "$SPARK_HOME"/assembly/target/spark-assembly_"$SCALA_VERSION"-"$VERSION"-dist/*.jar "$DISTDIR/jars/"
# Only create the yarn directory if the yarn artifacts were built.
if [ -f "$SPARK_HOME"/common/network-yarn/target/scala*/spark-*-yarn-shuffle.jar ]; then
diff --git a/examples/pom.xml b/examples/pom.xml
index d2cde2b7dddb7..b58d4dbf21ac4 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.12
- 3.2.1
+ 3.2.1.0
../pom.xml
diff --git a/external/avro/pom.xml b/external/avro/pom.xml
index 7482d4f00fcb3..926e3b7f7573c 100644
--- a/external/avro/pom.xml
+++ b/external/avro/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.12
- 3.2.1
+ 3.2.1.0
../../pom.xml
diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml
index 0e937a935ce74..5b01c0de5c713 100644
--- a/external/docker-integration-tests/pom.xml
+++ b/external/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
org.apache.spark
spark-parent_2.12
- 3.2.1
+ 3.2.1.0
../../pom.xml
@@ -108,7 +108,7 @@
test
- org.apache.hadoop
+ ${hadoop.group}
hadoop-minikdc
test
diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml
index 1ece2aa136a7d..680f350af194d 100644
--- a/external/kafka-0-10-assembly/pom.xml
+++ b/external/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.12
- 3.2.1
+ 3.2.1.0
../../pom.xml
@@ -70,13 +70,12 @@
provided
- org.apache.hadoop
+ ${hadoop.group}
${hadoop-client-api.artifact}
- ${hadoop.version}
provided
- org.apache.hadoop
+ ${hadoop.group}
${hadoop-client-runtime.artifact}
${hadoop.version}
diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
index 942e23e668026..e5a11c149efa2 100644
--- a/external/kafka-0-10-sql/pom.xml
+++ b/external/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.12
- 3.2.1
+ 3.2.1.0
../../pom.xml
@@ -121,7 +121,7 @@
- org.apache.hadoop
+ ${hadoop.group}
hadoop-minikdc
diff --git a/external/kafka-0-10-token-provider/pom.xml b/external/kafka-0-10-token-provider/pom.xml
index fe4015963da95..6c3d184878025 100644
--- a/external/kafka-0-10-token-provider/pom.xml
+++ b/external/kafka-0-10-token-provider/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.12
- 3.2.1
+ 3.2.1.0
../../pom.xml
@@ -65,7 +65,7 @@
test
- org.apache.hadoop
+ ${hadoop.group}
${hadoop-client-runtime.artifact}
${hadoop.deps.scope}
diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml
index 88eeb2da15f77..4a9ea643ad14d 100644
--- a/external/kafka-0-10/pom.xml
+++ b/external/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.12
- 3.2.1
+ 3.2.1.0
../../pom.xml
diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml
index 1970e3a7177b9..226ee94edca62 100644
--- a/external/kinesis-asl-assembly/pom.xml
+++ b/external/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.12
- 3.2.1
+ 3.2.1.0
../../pom.xml
@@ -90,13 +90,12 @@
provided
- org.apache.hadoop
+ ${hadoop.group}
${hadoop-client-api.artifact}
- ${hadoop.version}
provided
- org.apache.hadoop
+ ${hadoop.group}
${hadoop-client-runtime.artifact}
${hadoop.version}
diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml
index cb2b46714d3be..20c21d9f49dfc 100644
--- a/external/kinesis-asl/pom.xml
+++ b/external/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent_2.12
- 3.2.1
+ 3.2.1.0
../../pom.xml
diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml
index c272632dca969..d7000ab214958 100644
--- a/external/spark-ganglia-lgpl/pom.xml
+++ b/external/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent_2.12
- 3.2.1
+ 3.2.1.0
../../pom.xml
diff --git a/graphx/pom.xml b/graphx/pom.xml
index de2aa81ab6fac..2bbb97aadc368 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.12
- 3.2.1
+ 3.2.1.0
../pom.xml
diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml
index b6149f4a2f52f..bbe3681f52b8f 100644
--- a/hadoop-cloud/pom.xml
+++ b/hadoop-cloud/pom.xml
@@ -22,7 +22,7 @@
org.apache.spark
spark-parent_2.12
- 3.2.1
+ 3.2.1.0
../pom.xml
@@ -57,13 +57,13 @@
test
- org.apache.hadoop
+ ${hadoop.group}
${hadoop-client-api.artifact}
${hadoop.version}
provided
- org.apache.hadoop
+ ${hadoop.group}
${hadoop-client-runtime.artifact}
${hadoop.version}
@@ -72,13 +72,13 @@
intra-jackson-module version problems.
-->
- org.apache.hadoop
+ ${hadoop.group}
hadoop-aws
${hadoop.version}
${hadoop.deps.scope}
- org.apache.hadoop
+ ${hadoop.group}
hadoop-common
@@ -113,13 +113,13 @@
- org.apache.hadoop
+ ${hadoop.group}
hadoop-openstack
${hadoop.version}
${hadoop.deps.scope}
- org.apache.hadoop
+ ${hadoop.group}
hadoop-common
@@ -175,13 +175,13 @@
${hadoop.deps.scope}
- org.apache.hadoop
+ ${hadoop.group}
hadoop-azure
${hadoop.version}
${hadoop.deps.scope}
- org.apache.hadoop
+ ${hadoop.group}
hadoop-common
@@ -262,13 +262,13 @@
but it still needs some selective exclusion across versions, especially 3.0.x.
-->
- org.apache.hadoop
+ ${hadoop.group}
hadoop-cloud-storage
${hadoop.version}
${hadoop.deps.scope}
- org.apache.hadoop
+ ${hadoop.group}
hadoop-common
diff --git a/launcher/pom.xml b/launcher/pom.xml
index abf13e9c7fef2..3ff52785675f8 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
org.apache.spark
spark-parent_2.12
- 3.2.1
+ 3.2.1.0
../pom.xml
@@ -80,15 +80,13 @@
- org.apache.hadoop
+ ${hadoop.group}
${hadoop-client-api.artifact}
- ${hadoop.version}
test
- org.apache.hadoop
+ ${hadoop.group}
${hadoop-client-runtime.artifact}
- ${hadoop.version}
test
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 3d7033ca52821..d638f82b16b6d 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.12
- 3.2.1
+ 3.2.1.0
../pom.xml
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 1f75cb601ce36..d0cc3872dc80c 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.12
- 3.2.1
+ 3.2.1.0
../pom.xml
diff --git a/pom.xml b/pom.xml
index 183d2588402b1..37086e98bbe16 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
org.apache.spark
spark-parent_2.12
- 3.2.1
+ 3.2.1.0
pom
Spark Project Parent POM
http://spark.apache.org/
@@ -120,18 +120,19 @@
spark
1.7.30
1.2.17
- 3.3.1
+ 3.2.0.2
+ io.hops
2.5.0
${hadoop.version}
3.6.2
2.13.0
- org.apache.hive
+ io.hops.hive
core
- 2.3.9
- 2.3.9
+ 3.0.0.8-SNAPSHOT
+ 3.0.0.8-SNAPSHOT
- 2.3
+ 3.0
2.8.0
@@ -238,12 +239,14 @@
-->
compile
compile
- 2.7.2
+ 2.6.1.2
compile
compile
compile
compile
compile
+ compile
+ compile
compile
compile
test
@@ -314,6 +317,17 @@
false
+
+ Hops
+ Hops Repository
+ https://archiva.hops.works/repository/Hops/
+
+ true
+
+
+ true
+
+
@@ -1109,26 +1123,26 @@
- org.apache.hadoop
+ ${hadoop.group}
${hadoop-client-api.artifact}
${hadoop.version}
${hadoop.deps.scope}
- org.apache.hadoop
+ ${hadoop.group}
${hadoop-client-runtime.artifact}
${hadoop.version}
${hadoop.deps.scope}
- org.apache.hadoop
+ ${hadoop.group}
${hadoop-client-minicluster.artifact}
${yarn.version}
test
- org.apache.hadoop
+ ${hadoop.group}
hadoop-client
${hadoop.version}
${hadoop.deps.scope}
@@ -1210,7 +1224,7 @@
- org.apache.hadoop
+ ${hadoop.group}
hadoop-minikdc
${hadoop.version}
test
@@ -1300,7 +1314,7 @@
${hadoop.deps.scope}
- org.apache.hadoop
+ ${hadoop.group}
hadoop-yarn-api
${yarn.version}
${hadoop.deps.scope}
@@ -1344,7 +1358,7 @@
- org.apache.hadoop
+ ${hadoop.group}
hadoop-yarn-common
${yarn.version}
${hadoop.deps.scope}
@@ -1384,7 +1398,7 @@
- org.apache.hadoop
+ ${hadoop.group}
hadoop-yarn-server-tests
${yarn.version}
tests
@@ -1427,7 +1441,7 @@
*
- org.apache.hadoop
+ ${hadoop.group}
hadoop-yarn-server-resourcemanager
@@ -1439,27 +1453,27 @@
fail, see also SPARK-33104.
-->
- org.apache.hadoop
+ ${hadoop.group}
hadoop-yarn-server-resourcemanager
${yarn.version}
test
- org.apache.hadoop
+ ${hadoop.group}
hadoop-yarn-server-web-proxy
${yarn.version}
${hadoop.deps.scope}
- org.apache.hadoop
+ ${hadoop.group}
hadoop-yarn-server-common
- org.apache.hadoop
+ ${hadoop.group}
hadoop-yarn-common
- org.apache.hadoop
+ ${hadoop.group}
hadoop-yarn-api
@@ -1522,7 +1536,7 @@
- org.apache.hadoop
+ ${hadoop.group}
hadoop-yarn-client
${yarn.version}
${hadoop.deps.scope}
@@ -1738,11 +1752,11 @@
ant
- org.apache.hadoop
+ ${hadoop.group}
hadoop-common
- org.apache.hadoop
+ ${hadoop.group}
hadoop-auth
@@ -1792,7 +1806,7 @@
- org.apache.hive
+ io.hops.hive
hive-storage-api
@@ -1848,11 +1862,6 @@
org.apache.avro
avro-mapred
-
-
- org.apache.calcite
- calcite-core
-
org.apache.calcite
calcite-avatica
@@ -1951,7 +1960,7 @@
${hive.group}
- hive-jdbc
+ hops-jdbc
${hive.version}
@@ -2055,6 +2064,10 @@
org.slf4j
slf4j-log4j12
+
+ org.apache.logging.log4j
+ *
+
@@ -2149,7 +2162,19 @@
${hive.group}
hive-service-rpc
- 3.1.2
+ ${hive.version}
+
+
+ *
+ *
+
+
+
+
+
+ ${hive.group}
+ hive-service
+ ${hive.version}
*
@@ -2157,6 +2182,7 @@
+
net.sf.jpam
jpam
@@ -2184,7 +2210,7 @@
guava
- org.apache.hadoop
+ ${hadoop.group}
hadoop-yarn-server-resourcemanager
@@ -2219,6 +2245,10 @@
org.codehaus.groovy
groovy-all
+
+ com.sun.jersey
+ *
+
@@ -2232,42 +2262,50 @@
- org.apache.hive
+ ${hive.group}
hive-llap-common
${hive23.version}
${hive.deps.scope}
- org.apache.hive
+ ${hive.group}
hive-common
- org.apache.hive
+ ${hive.group}
hive-serde
org.slf4j
slf4j-api
+
+ log4j
+ log4j
+
+
+ com.sun.jersey
+ *
+
- org.apache.hive
+ ${hive.group}
hive-llap-client
${hive23.version}
test
- org.apache.hive
+ ${hive.group}
hive-common
- org.apache.hive
+ ${hive.group}
hive-serde
- org.apache.hive
+ ${hive.group}
hive-llap-common
@@ -2286,6 +2324,10 @@
org.slf4j
slf4j-api
+
+ log4j
+ log4j
+
@@ -2300,15 +2342,15 @@
jaxb-api
- org.apache.hadoop
+ ${hadoop.group}
hadoop-common
- org.apache.hadoop
+ ${hadoop.group}
hadoop-hdfs
- org.apache.hive
+ ${hive.group}
hive-storage-api
@@ -2320,11 +2362,11 @@
${orc.deps.scope}
- org.apache.hadoop
+ ${hadoop.group}
hadoop-common
- org.apache.hadoop
+ ${hadoop.group}
hadoop-mapreduce-client-core
@@ -2332,7 +2374,7 @@
orc-core
- org.apache.hive
+ ${hive.group}
hive-storage-api
@@ -2477,7 +2519,7 @@
2.9.1
- org.apache.hive
+ ${hive.group}
hive-storage-api
${hive.storage.version}
${hive.storage.scope}
@@ -3549,4 +3591,12 @@
+
+
+
+ Hops
+ Hops Repo
+ https://archiva.hops.works/repository/Hops/
+
+
diff --git a/repl/pom.xml b/repl/pom.xml
index 60eaff7e0ed28..2e512ad0fd2e3 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.12
- 3.2.1
+ 3.2.1.0
../pom.xml
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index d56d7ec9437fc..9544d74ed6c3a 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent_2.12
- 3.2.1
+ 3.2.1.0
../../../pom.xml
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index 3f2efccef73e9..da5fb7ea3b998 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent_2.12
- 3.2.1
+ 3.2.1.0
../../../pom.xml
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index 010b0493a94d8..91a665f1429c5 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent_2.12
- 3.2.1
+ 3.2.1.0
../../pom.xml
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index 356b51d5d89f2..89beb4b30d48d 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent_2.12
- 3.2.1
+ 3.2.1.0
../../pom.xml
@@ -37,23 +37,23 @@
hadoop-2.7
- org.apache.hadoop
+ ${hadoop.group}
hadoop-yarn-api
- org.apache.hadoop
+ ${hadoop.group}
hadoop-yarn-common
- org.apache.hadoop
+ ${hadoop.group}
hadoop-yarn-server-web-proxy
- org.apache.hadoop
+ ${hadoop.group}
hadoop-yarn-client
- org.apache.hadoop
+ ${hadoop.group}
hadoop-yarn-server-tests
tests
test
@@ -63,7 +63,7 @@
See the parent pom.xml for more details.
-->
- org.apache.hadoop
+ ${hadoop.group}
hadoop-yarn-server-resourcemanager
test
@@ -76,13 +76,13 @@
- org.apache.hadoop
+ ${hadoop.group}
${hadoop-client-runtime.artifact}
${hadoop.version}
${hadoop.deps.scope}
- org.apache.hadoop
+ ${hadoop.group}
${hadoop-client-minicluster.artifact}
${hadoop.version}
test
@@ -128,9 +128,9 @@
test
- org.apache.hadoop
+ ${hadoop.group}
${hadoop-client-api.artifact}
- ${hadoop.version}
+ ${hadoop.deps.scope}
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 8f942665ef54d..d9c78cc1e1d4d 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
org.apache.spark
spark-parent_2.12
- 3.2.1
+ 3.2.1.0
../../pom.xml
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index e23a6c250f596..8dc3388e14c25 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
org.apache.spark
spark-parent_2.12
- 3.2.1
+ 3.2.1.0
../../pom.xml
@@ -105,7 +105,7 @@
orc-mapreduce
- org.apache.hive
+ ${hive.group}
hive-storage-api
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 4269842d22f36..6d07e22a1afbf 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
org.apache.spark
spark-parent_2.12
- 3.2.1
+ 3.2.1.0
../../pom.xml
@@ -77,7 +77,7 @@
${hive.group}
- hive-jdbc
+ hops-jdbc
${hive.group}
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 51ee1fc49c4ae..24d4a194f7de6 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
org.apache.spark
spark-parent_2.12
- 3.2.1
+ 3.2.1.0
../../pom.xml
@@ -121,15 +121,30 @@
${hive.shims.scope}
- org.apache.hive
+ ${hive.group}
hive-llap-common
${hive.llap.scope}
- org.apache.hive
+ ${hive.group}
hive-llap-client
${hive.llap.scope}
+
+ ${hive.group}
+ hops-jdbc
+ ${hive.jdbc.scope}
+
+
+ ${hive.group}
+ hive-service-rpc
+ ${hive.service.scope}
+
+
+ ${hive.group}
+ hive-service
+ ${hive.service.scope}
+
org.apache.avro
@@ -169,7 +184,7 @@
datanucleus-core
- org.apache.hadoop
+ ${hadoop.group}
${hadoop-client-runtime.artifact}
${hadoop.deps.scope}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index 4703f24022f6a..4e6183f4511ce 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -813,9 +813,10 @@ private[hive] class HiveClientImpl(
// Since HIVE-18238(Hive 3.0.0), the Driver.close function's return type changed
// and the CommandProcessorFactory.clean function removed.
driver.getClass.getMethod("close").invoke(driver)
- if (version != hive.v3_0 && version != hive.v3_1) {
- CommandProcessorFactory.clean(conf)
- }
+ // Fabio: Comment this to avoid compilation issue with Hive3
+ // if (version != hive.v3_0 && version != hive.v3_1) {
+ // CommandProcessorFactory.clean(conf)
+ // }
}
// Hive query needs to start SessionState.
@@ -976,12 +977,14 @@ private[hive] class HiveClientImpl(
val t = table.getTableName
logDebug(s"Deleting table $t")
try {
- client.getIndexes("default", t, 255).asScala.foreach { index =>
- shim.dropIndex(client, "default", t, index.getIndexName)
- }
- if (!table.isIndexTable) {
- client.dropTable("default", t)
- }
+ // Fabio: Index tables don't exists anymore in Hive3
+ // client.getIndexes("default", t, 255).asScala.foreach { index =>
+ // shim.dropIndex(client, "default", t, index.getIndexName)
+ // }
+ // if (!table.isIndexTable) {
+ // client.dropTable("default", t)
+ // }
+ client.dropTable("default", t)
} catch {
case _: NoSuchMethodError =>
// HIVE-18448 Hive 3.0 remove index APIs
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
index 9aa6a09fd57af..153029665af24 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
@@ -90,7 +90,8 @@ private[hive] object IsolatedClientLoader extends Logging {
}
def hiveVersion(version: String): HiveVersion = {
- VersionUtils.majorMinorPatchVersion(version).flatMap {
+ // Fabio: Remove the last digit of the version string as it's the Hopsworks specific version
+ VersionUtils.majorMinorPatchVersion(version.substring(0, version.lastIndexOf("."))).flatMap {
case (12, _, _) | (0, 12, _) => Some(hive.v12)
case (13, _, _) | (0, 13, _) => Some(hive.v13)
case (14, _, _) | (0, 14, _) => Some(hive.v14)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveFileFormat.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveFileFormat.scala
index b6b3cac4130a0..58e7f31a9872a 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveFileFormat.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveFileFormat.scala
@@ -85,7 +85,7 @@ class HiveFileFormat(fileSinkConf: FileSinkDesc)
// Add table properties from storage handler to hadoopConf, so any custom storage
// handler settings can be set to hadoopConf
HiveTableUtil.configureJobPropertiesForStorageHandler(tableDesc, conf, false)
- Utilities.copyTableJobPropertiesToConf(tableDesc, conf)
+ Utilities.copyTableJobPropertiesToConf(tableDesc, new JobConf(conf))
// Avoid referencing the outer object.
val fileSinkConfSer = fileSinkConf
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 895ccf40666ff..116fb8d28b6e5 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.12
- 3.2.1
+ 3.2.1.0
../pom.xml
diff --git a/tools/pom.xml b/tools/pom.xml
index 8dd92927df154..d698da8fab21b 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent_2.12
- 3.2.1
+ 3.2.1.0
../pom.xml