diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml deleted file mode 100644 index 517cef80b7624..0000000000000 --- a/.github/workflows/build_and_test.yml +++ /dev/null @@ -1,715 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -name: Build and test - -on: - push: - branches: - - '**' - -jobs: - # Build: build Spark and run the tests for specified modules. - build: - name: "Build modules: ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ matrix.java }}, ${{ matrix.hadoop }}, ${{ matrix.hive }})" - # Ubuntu 20.04 is the latest LTS. The next LTS is 22.04. - runs-on: ubuntu-20.04 - strategy: - fail-fast: false - matrix: - java: - - 8 - hadoop: - - hadoop3.2 - hive: - - hive2.3 - # TODO(SPARK-32246): We don't test 'streaming-kinesis-asl' for now. - # Kinesis tests depends on external Amazon kinesis service. - # Note that the modules below are from sparktestsupport/modules.py. - modules: - - >- - core, unsafe, kvstore, avro, - network-common, network-shuffle, repl, launcher, - examples, sketch, graphx - - >- - catalyst, hive-thriftserver - - >- - streaming, sql-kafka-0-10, streaming-kafka-0-10, - mllib-local, mllib, - yarn, mesos, kubernetes, hadoop-cloud, spark-ganglia-lgpl - # Here, we split Hive and SQL tests into some of slow ones and the rest of them. - included-tags: [""] - excluded-tags: [""] - comment: [""] - include: - # Hive tests - - modules: hive - java: 8 - hadoop: hadoop3.2 - hive: hive2.3 - included-tags: org.apache.spark.tags.SlowHiveTest - comment: "- slow tests" - - modules: hive - java: 8 - hadoop: hadoop3.2 - hive: hive2.3 - excluded-tags: org.apache.spark.tags.SlowHiveTest - comment: "- other tests" - # SQL tests - - modules: sql - java: 8 - hadoop: hadoop3.2 - hive: hive2.3 - included-tags: org.apache.spark.tags.ExtendedSQLTest - comment: "- slow tests" - - modules: sql - java: 8 - hadoop: hadoop3.2 - hive: hive2.3 - excluded-tags: org.apache.spark.tags.ExtendedSQLTest - comment: "- other tests" - env: - MODULES_TO_TEST: ${{ matrix.modules }} - EXCLUDED_TAGS: ${{ matrix.excluded-tags }} - INCLUDED_TAGS: ${{ matrix.included-tags }} - HADOOP_PROFILE: ${{ matrix.hadoop }} - HIVE_PROFILE: ${{ matrix.hive }} - GITHUB_PREV_SHA: ${{ github.event.before }} - SPARK_LOCAL_IP: localhost - steps: - - name: Checkout Spark repository - uses: actions/checkout@v2 - # In order to fetch changed files - with: - fetch-depth: 0 - repository: apache/spark - ref: branch-3.2 - - name: Sync the current branch with the latest in Apache Spark - if: github.repository != 'apache/spark' - run: | - echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV - git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" - # Cache local repositories. Note that GitHub Actions cache has a 2G limit. - - name: Cache Scala, SBT and Maven - uses: actions/cache@v2 - with: - path: | - build/apache-maven-* - build/scala-* - build/*.jar - ~/.sbt - key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }} - restore-keys: | - build- - - name: Cache Coursier local repository - uses: actions/cache@v2 - with: - path: ~/.cache/coursier - key: ${{ matrix.java }}-${{ matrix.hadoop }}-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }} - restore-keys: | - ${{ matrix.java }}-${{ matrix.hadoop }}-coursier- - - name: Install Java ${{ matrix.java }} - uses: actions/setup-java@v1 - with: - java-version: ${{ matrix.java }} - - name: Install Python 3.8 - uses: actions/setup-python@v2 - # We should install one Python that is higher then 3+ for SQL and Yarn because: - # - SQL component also has Python related tests, for example, IntegratedUDFTestUtils. - # - Yarn has a Python specific test too, for example, YarnClusterSuite. - if: contains(matrix.modules, 'yarn') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) - with: - python-version: 3.8 - architecture: x64 - - name: Install Python packages (Python 3.8) - if: (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) - run: | - python3.8 -m pip install numpy 'pyarrow<5.0.0' pandas scipy xmlrunner - python3.8 -m pip list - # Run the tests. - - name: Run tests - run: | - # Hive "other tests" test needs larger metaspace size based on experiment. - if [[ "$MODULES_TO_TEST" == "hive" ]] && [[ "$EXCLUDED_TAGS" == "org.apache.spark.tags.SlowHiveTest" ]]; then export METASPACE_SIZE=2g; fi - export SERIAL_SBT_TESTS=1 - ./dev/run-tests --parallelism 1 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS" - - name: Upload test results to report - if: always() - uses: actions/upload-artifact@v2 - with: - name: test-results-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }} - path: "**/target/test-reports/*.xml" - - name: Upload unit tests log files - if: failure() - uses: actions/upload-artifact@v2 - with: - name: unit-tests-log-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }} - path: "**/target/unit-tests.log" - - pyspark: - name: "Build modules: ${{ matrix.modules }}" - runs-on: ubuntu-20.04 - container: - image: dongjoon/apache-spark-github-action-image:20210730 - strategy: - fail-fast: false - matrix: - modules: - - >- - pyspark-sql, pyspark-mllib, pyspark-resource - - >- - pyspark-core, pyspark-streaming, pyspark-ml - - >- - pyspark-pandas - - >- - pyspark-pandas-slow - env: - MODULES_TO_TEST: ${{ matrix.modules }} - HADOOP_PROFILE: hadoop3.2 - HIVE_PROFILE: hive2.3 - GITHUB_PREV_SHA: ${{ github.event.before }} - SPARK_LOCAL_IP: localhost - SKIP_UNIDOC: true - SKIP_MIMA: true - METASPACE_SIZE: 1g - steps: - - name: Checkout Spark repository - uses: actions/checkout@v2 - # In order to fetch changed files - with: - fetch-depth: 0 - repository: apache/spark - ref: branch-3.2 - - name: Sync the current branch with the latest in Apache Spark - if: github.repository != 'apache/spark' - run: | - echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV - git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" - # Cache local repositories. Note that GitHub Actions cache has a 2G limit. - - name: Cache Scala, SBT and Maven - uses: actions/cache@v2 - with: - path: | - build/apache-maven-* - build/scala-* - build/*.jar - ~/.sbt - key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }} - restore-keys: | - build- - - name: Cache Coursier local repository - uses: actions/cache@v2 - with: - path: ~/.cache/coursier - key: pyspark-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }} - restore-keys: | - pyspark-coursier- - - name: Install Python 3.6 - uses: actions/setup-python@v2 - with: - python-version: 3.6 - architecture: x64 - # This step takes much less time (~30s) than other Python versions so it is not included - # in the Docker image being used. There is also a technical issue to install Python 3.6 on - # Ubuntu 20.04. See also SPARK-33162. - - name: Install Python packages (Python 3.6) - run: | - python3.6 -m pip install numpy 'pyarrow<4.0.0' pandas scipy xmlrunner 'plotly>=4.8' - python3.6 -m pip list - - name: List Python packages (Python 3.9) - run: | - python3.9 -m pip list - - name: Install Conda for pip packaging test - run: | - curl -s https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh > miniconda.sh - bash miniconda.sh -b -p $HOME/miniconda - # Run the tests. - - name: Run tests - run: | - export PATH=$PATH:$HOME/miniconda/bin - ./dev/run-tests --parallelism 1 --modules "$MODULES_TO_TEST" - - name: Upload test results to report - if: always() - uses: actions/upload-artifact@v2 - with: - name: test-results-${{ matrix.modules }}--8-hadoop3.2-hive2.3 - path: "**/target/test-reports/*.xml" - - name: Upload unit tests log files - if: failure() - uses: actions/upload-artifact@v2 - with: - name: unit-tests-log-${{ matrix.modules }}--8-hadoop3.2-hive2.3 - path: "**/target/unit-tests.log" - - sparkr: - name: "Build modules: sparkr" - runs-on: ubuntu-20.04 - container: - image: dongjoon/apache-spark-github-action-image:20210602 - env: - HADOOP_PROFILE: hadoop3.2 - HIVE_PROFILE: hive2.3 - GITHUB_PREV_SHA: ${{ github.event.before }} - SPARK_LOCAL_IP: localhost - SKIP_MIMA: true - steps: - - name: Checkout Spark repository - uses: actions/checkout@v2 - # In order to fetch changed files - with: - fetch-depth: 0 - repository: apache/spark - ref: branch-3.2 - - name: Sync the current branch with the latest in Apache Spark - if: github.repository != 'apache/spark' - run: | - echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV - git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" - # Cache local repositories. Note that GitHub Actions cache has a 2G limit. - - name: Cache Scala, SBT and Maven - uses: actions/cache@v2 - with: - path: | - build/apache-maven-* - build/scala-* - build/*.jar - ~/.sbt - key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }} - restore-keys: | - build- - - name: Cache Coursier local repository - uses: actions/cache@v2 - with: - path: ~/.cache/coursier - key: sparkr-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }} - restore-keys: | - sparkr-coursier- - - name: Run tests - run: | - # The followings are also used by `r-lib/actions/setup-r` to avoid - # R issues at docker environment - export TZ=UTC - export _R_CHECK_SYSTEM_CLOCK_=FALSE - ./dev/run-tests --parallelism 1 --modules sparkr - - name: Upload test results to report - if: always() - uses: actions/upload-artifact@v2 - with: - name: test-results-sparkr--8-hadoop3.2-hive2.3 - path: "**/target/test-reports/*.xml" - - # Static analysis, and documentation build - lint: - name: Linters, licenses, dependencies and documentation generation - runs-on: ubuntu-20.04 - env: - LC_ALL: C.UTF-8 - LANG: C.UTF-8 - PYSPARK_DRIVER_PYTHON: python3.9 - container: - image: dongjoon/apache-spark-github-action-image:20210602 - steps: - - name: Checkout Spark repository - uses: actions/checkout@v2 - with: - fetch-depth: 0 - repository: apache/spark - ref: branch-3.2 - - name: Sync the current branch with the latest in Apache Spark - if: github.repository != 'apache/spark' - run: | - git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" - # Cache local repositories. Note that GitHub Actions cache has a 2G limit. - - name: Cache Scala, SBT and Maven - uses: actions/cache@v2 - with: - path: | - build/apache-maven-* - build/scala-* - build/*.jar - ~/.sbt - key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }} - restore-keys: | - build- - - name: Cache Coursier local repository - uses: actions/cache@v2 - with: - path: ~/.cache/coursier - key: docs-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }} - restore-keys: | - docs-coursier- - - name: Cache Maven local repository - uses: actions/cache@v2 - with: - path: ~/.m2/repository - key: docs-maven-${{ hashFiles('**/pom.xml') }} - restore-keys: | - docs-maven- - - name: Install Python linter dependencies - run: | - # TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes. - # See also https://github.com/sphinx-doc/sphinx/issues/7551. - # Jinja2 3.0.0+ causes error when building with Sphinx. - # See also https://issues.apache.org/jira/browse/SPARK-35375. - python3.9 -m pip install flake8 pydata_sphinx_theme 'mypy==0.910' numpydoc 'jinja2<3.0.0' 'black==21.5b2' - - name: Install R linter dependencies and SparkR - run: | - apt-get install -y libcurl4-openssl-dev libgit2-dev libssl-dev libxml2-dev - Rscript -e "install.packages(c('devtools'), repos='https://cloud.r-project.org/')" - Rscript -e "devtools::install_version('lintr', version='2.0.1', repos='https://cloud.r-project.org')" - ./R/install-dev.sh - - name: Instll JavaScript linter dependencies - run: | - apt update - apt-get install -y nodejs npm - - name: Install dependencies for documentation generation - run: | - # pandoc is required to generate PySpark APIs as well in nbsphinx. - apt-get install -y libcurl4-openssl-dev pandoc - # TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes. - # See also https://github.com/sphinx-doc/sphinx/issues/7551. - # Jinja2 3.0.0+ causes error when building with Sphinx. - # See also https://issues.apache.org/jira/browse/SPARK-35375. - python3.9 -m pip install 'sphinx<3.1.0' mkdocs numpy pydata_sphinx_theme ipython nbsphinx numpydoc 'jinja2<3.0.0' - python3.9 -m pip install sphinx_plotly_directive 'pyarrow<5.0.0' pandas 'plotly>=4.8' - apt-get update -y - apt-get install -y ruby ruby-dev - Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2'), repos='https://cloud.r-project.org/')" - gem install bundler - cd docs - bundle install - - name: Scala linter - run: ./dev/lint-scala - - name: Java linter - run: ./dev/lint-java - - name: Python linter - run: PYTHON_EXECUTABLE=python3.9 ./dev/lint-python - - name: R linter - run: ./dev/lint-r - - name: JS linter - run: ./dev/lint-js - - name: License test - run: ./dev/check-license - - name: Dependencies test - run: ./dev/test-dependencies.sh - - name: Run documentation build - run: | - cd docs - bundle exec jekyll build - - java-11-17: - name: Java ${{ matrix.java }} build with Maven - strategy: - fail-fast: false - matrix: - java: - - 11 - - 17 - runs-on: ubuntu-20.04 - steps: - - name: Checkout Spark repository - uses: actions/checkout@v2 - with: - fetch-depth: 0 - repository: apache/spark - ref: branch-3.2 - - name: Sync the current branch with the latest in Apache Spark - if: github.repository != 'apache/spark' - run: | - git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" - - name: Cache Scala, SBT and Maven - uses: actions/cache@v2 - with: - path: | - build/apache-maven-* - build/scala-* - build/*.jar - ~/.sbt - key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }} - restore-keys: | - build- - - name: Cache Maven local repository - uses: actions/cache@v2 - with: - path: ~/.m2/repository - key: java${{ matrix.java }}-maven-${{ hashFiles('**/pom.xml') }} - restore-keys: | - java${{ matrix.java }}-maven- - - name: Install Java ${{ matrix.java }} - uses: actions/setup-java@v1 - with: - java-version: ${{ matrix.java }} - - name: Build with Maven - run: | - export MAVEN_OPTS="-Xss64m -Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN" - export MAVEN_CLI_OPTS="--no-transfer-progress" - export JAVA_VERSION=${{ matrix.java }} - # It uses Maven's 'install' intentionally, see https://github.com/apache/spark/pull/26414. - ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=${JAVA_VERSION/-ea} install - rm -rf ~/.m2/repository/org/apache/spark - - scala-213: - name: Scala 2.13 build with SBT - runs-on: ubuntu-20.04 - steps: - - name: Checkout Spark repository - uses: actions/checkout@v2 - with: - fetch-depth: 0 - repository: apache/spark - ref: branch-3.2 - - name: Sync the current branch with the latest in Apache Spark - if: github.repository != 'apache/spark' - run: | - git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" - - name: Cache Scala, SBT and Maven - uses: actions/cache@v2 - with: - path: | - build/apache-maven-* - build/scala-* - build/*.jar - ~/.sbt - key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }} - restore-keys: | - build- - - name: Cache Coursier local repository - uses: actions/cache@v2 - with: - path: ~/.cache/coursier - key: scala-213-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }} - restore-keys: | - scala-213-coursier- - - name: Install Java 8 - uses: actions/setup-java@v1 - with: - java-version: 8 - - name: Build with SBT - run: | - ./dev/change-scala-version.sh 2.13 - ./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Pdocker-integration-tests -Pkubernetes-integration-tests -Pspark-ganglia-lgpl -Pscala-2.13 compile test:compile - - hadoop-2: - name: Hadoop 2 build with SBT - runs-on: ubuntu-20.04 - steps: - - name: Checkout Spark repository - uses: actions/checkout@v2 - with: - fetch-depth: 0 - repository: apache/spark - ref: branch-3.2 - - name: Sync the current branch with the latest in Apache Spark - if: github.repository != 'apache/spark' - run: | - git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" - - name: Cache Scala, SBT and Maven - uses: actions/cache@v2 - with: - path: | - build/apache-maven-* - build/scala-* - build/*.jar - ~/.sbt - key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }} - restore-keys: | - build- - - name: Cache Coursier local repository - uses: actions/cache@v2 - with: - path: ~/.cache/coursier - key: hadoop-2-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }} - restore-keys: | - hadoop-2-coursier- - - name: Install Java 8 - uses: actions/setup-java@v1 - with: - java-version: 8 - - name: Build with SBT - run: | - ./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Phadoop-2.7 compile test:compile - - tpcds-1g: - name: Run TPC-DS queries with SF=1 - runs-on: ubuntu-20.04 - env: - SPARK_LOCAL_IP: localhost - steps: - - name: Checkout Spark repository - uses: actions/checkout@v2 - with: - fetch-depth: 0 - repository: apache/spark - ref: branch-3.2 - - name: Sync the current branch with the latest in Apache Spark - if: github.repository != 'apache/spark' - run: | - git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" - - name: Cache Scala, SBT and Maven - uses: actions/cache@v2 - with: - path: | - build/apache-maven-* - build/scala-* - build/*.jar - ~/.sbt - key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }} - restore-keys: | - build- - - name: Cache Coursier local repository - uses: actions/cache@v2 - with: - path: ~/.cache/coursier - key: tpcds-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }} - restore-keys: | - tpcds-coursier- - - name: Install Java 8 - uses: actions/setup-java@v1 - with: - java-version: 8 - - name: Cache TPC-DS generated data - id: cache-tpcds-sf-1 - uses: actions/cache@v2 - with: - path: ./tpcds-sf-1 - key: tpcds-${{ hashFiles('.github/workflows/build_and_test.yml', 'sql/core/src/test/scala/org/apache/spark/sql/TPCDSSchema.scala') }} - - name: Checkout tpcds-kit repository - if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true' - uses: actions/checkout@v2 - with: - repository: databricks/tpcds-kit - ref: 2a5078a782192ddb6efbcead8de9973d6ab4f069 - path: ./tpcds-kit - - name: Build tpcds-kit - if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true' - run: cd tpcds-kit/tools && make OS=LINUX - - name: Generate TPC-DS (SF=1) table data - if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true' - run: build/sbt "sql/test:runMain org.apache.spark.sql.GenTPCDSData --dsdgenDir `pwd`/tpcds-kit/tools --location `pwd`/tpcds-sf-1 --scaleFactor 1 --numPartitions 1 --overwrite" - - name: Run TPC-DS queries - run: | - SPARK_TPCDS_DATA=`pwd`/tpcds-sf-1 build/sbt "sql/testOnly org.apache.spark.sql.TPCDSQueryTestSuite" - - name: Upload test results to report - if: always() - uses: actions/upload-artifact@v2 - with: - name: test-results-tpcds--8-hadoop3.2-hive2.3 - path: "**/target/test-reports/*.xml" - - name: Upload unit tests log files - if: failure() - uses: actions/upload-artifact@v2 - with: - name: unit-tests-log-tpcds--8-hadoop3.2-hive2.3 - path: "**/target/unit-tests.log" - - docker-integration-tests: - name: Run docker integration tests - runs-on: ubuntu-20.04 - env: - HADOOP_PROFILE: hadoop3.2 - HIVE_PROFILE: hive2.3 - GITHUB_PREV_SHA: ${{ github.event.before }} - SPARK_LOCAL_IP: localhost - ORACLE_DOCKER_IMAGE_NAME: oracle/database:18.4.0-xe - SKIP_MIMA: true - steps: - - name: Checkout Spark repository - uses: actions/checkout@v2 - with: - fetch-depth: 0 - repository: apache/spark - ref: branch-3.2 - - name: Sync the current branch with the latest in Apache Spark - if: github.repository != 'apache/spark' - run: | - echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV - git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" - - name: Cache Scala, SBT and Maven - uses: actions/cache@v2 - with: - path: | - build/apache-maven-* - build/scala-* - build/*.jar - ~/.sbt - key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }} - restore-keys: | - build- - - name: Cache Coursier local repository - uses: actions/cache@v2 - with: - path: ~/.cache/coursier - key: docker-integration-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }} - restore-keys: | - docker-integration-coursier- - - name: Install Java 8 - uses: actions/setup-java@v1 - with: - java-version: 8 - - name: Cache Oracle docker-images repository - id: cache-oracle-docker-images - uses: actions/cache@v2 - with: - path: ./oracle/docker-images - # key should contains the commit hash of the Oracle docker images to be checkout. - key: oracle-docker-images-3f422c4a35b423dfcdbcc57a84f01db6c82eb6c1 - - name: Checkout Oracle docker-images repository - uses: actions/checkout@v2 - with: - fetch-depth: 0 - repository: oracle/docker-images - ref: 3f422c4a35b423dfcdbcc57a84f01db6c82eb6c1 - path: ./oracle/docker-images - - name: Install Oracle Docker image - run: | - cd oracle/docker-images/OracleDatabase/SingleInstance/dockerfiles - ./buildContainerImage.sh -v 18.4.0 -x - - name: Run tests - run: | - ./dev/run-tests --parallelism 1 --modules docker-integration-tests --included-tags org.apache.spark.tags.DockerTest - - name: Upload test results to report - if: always() - uses: actions/upload-artifact@v2 - with: - name: test-results-docker-integration--8-hadoop3.2-hive2.3 - path: "**/target/test-reports/*.xml" - - name: Upload unit tests log files - if: failure() - uses: actions/upload-artifact@v2 - with: - name: unit-tests-log-docker-integration--8-hadoop3.2-hive2.3 - path: "**/target/unit-tests.log" diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml deleted file mode 100644 index 98855f4668b45..0000000000000 --- a/.github/workflows/labeler.yml +++ /dev/null @@ -1,50 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -# Intentionally has a general name. -# because the test status check created in GitHub Actions -# currently randomly picks any associated workflow. -# So, the name was changed to make sense in that context too. -# See also https://github.community/t/specify-check-suite-when-creating-a-checkrun/118380/10 - -name: "On pull requests" -on: pull_request_target - -jobs: - label: - name: Label pull requests - runs-on: ubuntu-latest - steps: - # In order to get back the negated matches like in the old config, - # we need the actinons/labeler concept of `all` and `any` which matches - # all of the given constraints / glob patterns for either `all` - # files or `any` file in the change set. - # - # Github issue which requests a timeline for a release with any/all support: - # - https://github.com/actions/labeler/issues/111 - # This issue also references the issue that mentioned that any/all are only - # supported on main branch (previously called master): - # - https://github.com/actions/labeler/issues/73#issuecomment-639034278 - # - # However, these are not in a published release and the current `main` branch - # has some issues upon testing. - - uses: actions/labeler@2.2.0 - with: - repo-token: "${{ secrets.GITHUB_TOKEN }}" - sync-labels: true diff --git a/.github/workflows/publish_snapshot.yml b/.github/workflows/publish_snapshot.yml deleted file mode 100644 index ae71491efd205..0000000000000 --- a/.github/workflows/publish_snapshot.yml +++ /dev/null @@ -1,59 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -name: Publish Snapshot - -on: - schedule: - - cron: '0 0 * * *' - -jobs: - publish-snapshot: - if: github.repository == 'apache/spark' - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - branch: - - master - - branch-3.1 - steps: - - name: Checkout Spark repository - uses: actions/checkout@master - with: - ref: ${{ matrix.branch }} - - name: Cache Maven local repository - uses: actions/cache@v2 - with: - path: ~/.m2/repository - key: snapshot-maven-${{ hashFiles('**/pom.xml') }} - restore-keys: | - snapshot-maven- - - name: Install Java 8 - uses: actions/setup-java@v1 - with: - java-version: 8 - - name: Publish snapshot - env: - ASF_USERNAME: ${{ secrets.NEXUS_USER }} - ASF_PASSWORD: ${{ secrets.NEXUS_PW }} - GPG_KEY: "not_used" - GPG_PASSPHRASE: "not_used" - GIT_REF: ${{ matrix.branch }} - run: ./dev/create-release/release-build.sh publish-snapshot diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml deleted file mode 100644 index f26100db5612e..0000000000000 --- a/.github/workflows/stale.yml +++ /dev/null @@ -1,43 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -name: Close stale PRs - -on: - schedule: - - cron: "0 0 * * *" - -jobs: - stale: - runs-on: ubuntu-latest - steps: - - uses: actions/stale@v1.1.0 - with: - repo-token: ${{ secrets.GITHUB_TOKEN }} - stale-pr-message: > - We're closing this PR because it hasn't been updated in a while. - This isn't a judgement on the merit of the PR in any way. It's just - a way of keeping the PR queue manageable. - - If you'd like to revive this PR, please reopen it and ask a - committer to remove the Stale tag! - days-before-stale: 100 - # Setting this to 0 is the same as setting it to 1. - # See: https://github.com/actions/stale/issues/28 - days-before-close: 0 diff --git a/.github/workflows/test_report.yml b/.github/workflows/test_report.yml deleted file mode 100644 index 327708993d679..0000000000000 --- a/.github/workflows/test_report.yml +++ /dev/null @@ -1,44 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -name: Report test results -on: - workflow_run: - workflows: ["Build and test"] - types: - - completed - -jobs: - test_report: - runs-on: ubuntu-latest - steps: - - name: Download test results to report - uses: dawidd6/action-download-artifact@v2 - with: - github_token: ${{ secrets.GITHUB_TOKEN }} - workflow: ${{ github.event.workflow_run.workflow_id }} - commit: ${{ github.event.workflow_run.head_commit.id }} - workflow_conclusion: completed - - name: Publish test report - uses: scacap/action-surefire-report@v1 - with: - check_name: Report test results - github_token: ${{ secrets.GITHUB_TOKEN }} - report_paths: "**/target/test-reports/*.xml" - commit: ${{ github.event.workflow_run.head_commit.id }} diff --git a/assembly/pom.xml b/assembly/pom.xml index a85201168af6a..aab79c293fc3f 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.2.1 + 3.2.1.0 ../pom.xml @@ -137,7 +137,7 @@ ${project.version} - org.apache.hadoop + ${hadoop.group} hadoop-yarn-server-web-proxy diff --git a/assembly/src/main/assembly/assembly.xml b/assembly/src/main/assembly/assembly.xml index 009d4b92f406c..7db5b432aaf50 100644 --- a/assembly/src/main/assembly/assembly.xml +++ b/assembly/src/main/assembly/assembly.xml @@ -83,6 +83,8 @@ false org.apache.hadoop:*:jar + io.hops:*:jar + io.hops.metadata:*:jar org.apache.spark:*:jar org.apache.zookeeper:*:jar org.apache.avro:*:jar diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index 11cf0cb9fabed..b17610f71d525 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.2.1 + 3.2.1.0 ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index 9957a778733ce..4abe4a2b56ee8 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.2.1 + 3.2.1.0 ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index b3ea2877d8ced..b88baa516d03b 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.2.1 + 3.2.1.0 ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 8fb7d4eeb6a14..28f2782c78ab8 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.2.1 + 3.2.1.0 ../../pom.xml @@ -64,14 +64,12 @@ - org.apache.hadoop + ${hadoop.group} ${hadoop-client-api.artifact} - ${hadoop.version} - org.apache.hadoop + ${hadoop.group} ${hadoop-client-runtime.artifact} - ${hadoop.version} com.google.guava diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index 7e4c6c3607476..f3a4af3a91376 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.2.1 + 3.2.1.0 ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index bdf992c58cea2..06b6f091d58d2 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.2.1 + 3.2.1.0 ../../pom.xml diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml index e2db52ba0de60..8fbc4316d5a12 100644 --- a/common/unsafe/pom.xml +++ b/common/unsafe/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.2.1 + 3.2.1.0 ../../pom.xml diff --git a/core/pom.xml b/core/pom.xml index 592fc99efac70..e7cb443866a1e 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.2.1 + 3.2.1.0 ../pom.xml @@ -66,14 +66,14 @@ xbean-asm9-shaded - org.apache.hadoop + ${hadoop.group} ${hadoop-client-api.artifact} - ${hadoop.version} + ${hadoop.deps.scope} - org.apache.hadoop + ${hadoop.group} ${hadoop-client-runtime.artifact} - ${hadoop.version} + ${hadoop.deps.scope} org.apache.spark @@ -415,7 +415,7 @@ test - org.apache.hadoop + ${hadoop.group} hadoop-minikdc test @@ -467,13 +467,13 @@ - org.apache.hadoop + ${hadoop.group} hadoop-aws ${hadoop.version} test - org.apache.hadoop + ${hadoop.group} hadoop-common diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh index 571059be6fd0e..c2dfa47a3757d 100755 --- a/dev/make-distribution.sh +++ b/dev/make-distribution.sh @@ -181,7 +181,13 @@ echo "Spark $VERSION$GITREVSTRING built for Hadoop $SPARK_HADOOP_VERSION" > "$DI echo "Build flags: $@" >> "$DISTDIR/RELEASE" # Copy jars -cp "$SPARK_HOME"/assembly/target/scala*/jars/* "$DISTDIR/jars/" +# Fabio: copy jars from the spark-assemmbly-*-dist directory which +# contains the distribution prepared by the maven-assembly-plugin +# The maven-assembly-plugin has rules to remove the hadoop/hops dependencies +# from the final distribution +# You need to run the -Pbigtop-dist profile for this to work +cp "$SPARK_HOME"/assembly/target/spark-assembly_"$SCALA_VERSION"-"$VERSION"-dist/lib/* "$DISTDIR/jars/" +cp "$SPARK_HOME"/assembly/target/spark-assembly_"$SCALA_VERSION"-"$VERSION"-dist/*.jar "$DISTDIR/jars/" # Only create the yarn directory if the yarn artifacts were built. if [ -f "$SPARK_HOME"/common/network-yarn/target/scala*/spark-*-yarn-shuffle.jar ]; then diff --git a/examples/pom.xml b/examples/pom.xml index d2cde2b7dddb7..b58d4dbf21ac4 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.2.1 + 3.2.1.0 ../pom.xml diff --git a/external/avro/pom.xml b/external/avro/pom.xml index 7482d4f00fcb3..926e3b7f7573c 100644 --- a/external/avro/pom.xml +++ b/external/avro/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.2.1 + 3.2.1.0 ../../pom.xml diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml index 0e937a935ce74..5b01c0de5c713 100644 --- a/external/docker-integration-tests/pom.xml +++ b/external/docker-integration-tests/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.2.1 + 3.2.1.0 ../../pom.xml @@ -108,7 +108,7 @@ test - org.apache.hadoop + ${hadoop.group} hadoop-minikdc test diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml index 1ece2aa136a7d..680f350af194d 100644 --- a/external/kafka-0-10-assembly/pom.xml +++ b/external/kafka-0-10-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.2.1 + 3.2.1.0 ../../pom.xml @@ -70,13 +70,12 @@ provided - org.apache.hadoop + ${hadoop.group} ${hadoop-client-api.artifact} - ${hadoop.version} provided - org.apache.hadoop + ${hadoop.group} ${hadoop-client-runtime.artifact} ${hadoop.version} diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml index 942e23e668026..e5a11c149efa2 100644 --- a/external/kafka-0-10-sql/pom.xml +++ b/external/kafka-0-10-sql/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.2.1 + 3.2.1.0 ../../pom.xml @@ -121,7 +121,7 @@ - org.apache.hadoop + ${hadoop.group} hadoop-minikdc diff --git a/external/kafka-0-10-token-provider/pom.xml b/external/kafka-0-10-token-provider/pom.xml index fe4015963da95..6c3d184878025 100644 --- a/external/kafka-0-10-token-provider/pom.xml +++ b/external/kafka-0-10-token-provider/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.2.1 + 3.2.1.0 ../../pom.xml @@ -65,7 +65,7 @@ test - org.apache.hadoop + ${hadoop.group} ${hadoop-client-runtime.artifact} ${hadoop.deps.scope} diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml index 88eeb2da15f77..4a9ea643ad14d 100644 --- a/external/kafka-0-10/pom.xml +++ b/external/kafka-0-10/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.2.1 + 3.2.1.0 ../../pom.xml diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml index 1970e3a7177b9..226ee94edca62 100644 --- a/external/kinesis-asl-assembly/pom.xml +++ b/external/kinesis-asl-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.2.1 + 3.2.1.0 ../../pom.xml @@ -90,13 +90,12 @@ provided - org.apache.hadoop + ${hadoop.group} ${hadoop-client-api.artifact} - ${hadoop.version} provided - org.apache.hadoop + ${hadoop.group} ${hadoop-client-runtime.artifact} ${hadoop.version} diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml index cb2b46714d3be..20c21d9f49dfc 100644 --- a/external/kinesis-asl/pom.xml +++ b/external/kinesis-asl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.2.1 + 3.2.1.0 ../../pom.xml diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml index c272632dca969..d7000ab214958 100644 --- a/external/spark-ganglia-lgpl/pom.xml +++ b/external/spark-ganglia-lgpl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.2.1 + 3.2.1.0 ../../pom.xml diff --git a/graphx/pom.xml b/graphx/pom.xml index de2aa81ab6fac..2bbb97aadc368 100644 --- a/graphx/pom.xml +++ b/graphx/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.2.1 + 3.2.1.0 ../pom.xml diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml index b6149f4a2f52f..bbe3681f52b8f 100644 --- a/hadoop-cloud/pom.xml +++ b/hadoop-cloud/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.2.1 + 3.2.1.0 ../pom.xml @@ -57,13 +57,13 @@ test - org.apache.hadoop + ${hadoop.group} ${hadoop-client-api.artifact} ${hadoop.version} provided - org.apache.hadoop + ${hadoop.group} ${hadoop-client-runtime.artifact} ${hadoop.version} @@ -72,13 +72,13 @@ intra-jackson-module version problems. --> - org.apache.hadoop + ${hadoop.group} hadoop-aws ${hadoop.version} ${hadoop.deps.scope} - org.apache.hadoop + ${hadoop.group} hadoop-common @@ -113,13 +113,13 @@ - org.apache.hadoop + ${hadoop.group} hadoop-openstack ${hadoop.version} ${hadoop.deps.scope} - org.apache.hadoop + ${hadoop.group} hadoop-common @@ -175,13 +175,13 @@ ${hadoop.deps.scope} - org.apache.hadoop + ${hadoop.group} hadoop-azure ${hadoop.version} ${hadoop.deps.scope} - org.apache.hadoop + ${hadoop.group} hadoop-common @@ -262,13 +262,13 @@ but it still needs some selective exclusion across versions, especially 3.0.x. --> - org.apache.hadoop + ${hadoop.group} hadoop-cloud-storage ${hadoop.version} ${hadoop.deps.scope} - org.apache.hadoop + ${hadoop.group} hadoop-common diff --git a/launcher/pom.xml b/launcher/pom.xml index abf13e9c7fef2..3ff52785675f8 100644 --- a/launcher/pom.xml +++ b/launcher/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.2.1 + 3.2.1.0 ../pom.xml @@ -80,15 +80,13 @@ - org.apache.hadoop + ${hadoop.group} ${hadoop-client-api.artifact} - ${hadoop.version} test - org.apache.hadoop + ${hadoop.group} ${hadoop-client-runtime.artifact} - ${hadoop.version} test diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml index 3d7033ca52821..d638f82b16b6d 100644 --- a/mllib-local/pom.xml +++ b/mllib-local/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.2.1 + 3.2.1.0 ../pom.xml diff --git a/mllib/pom.xml b/mllib/pom.xml index 1f75cb601ce36..d0cc3872dc80c 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.2.1 + 3.2.1.0 ../pom.xml diff --git a/pom.xml b/pom.xml index 183d2588402b1..37086e98bbe16 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ org.apache.spark spark-parent_2.12 - 3.2.1 + 3.2.1.0 pom Spark Project Parent POM http://spark.apache.org/ @@ -120,18 +120,19 @@ spark 1.7.30 1.2.17 - 3.3.1 + 3.2.0.2 + io.hops 2.5.0 ${hadoop.version} 3.6.2 2.13.0 - org.apache.hive + io.hops.hive core - 2.3.9 - 2.3.9 + 3.0.0.8-SNAPSHOT + 3.0.0.8-SNAPSHOT - 2.3 + 3.0 2.8.0 @@ -238,12 +239,14 @@ --> compile compile - 2.7.2 + 2.6.1.2 compile compile compile compile compile + compile + compile compile compile test @@ -314,6 +317,17 @@ false + + Hops + Hops Repository + https://archiva.hops.works/repository/Hops/ + + true + + + true + + @@ -1109,26 +1123,26 @@ - org.apache.hadoop + ${hadoop.group} ${hadoop-client-api.artifact} ${hadoop.version} ${hadoop.deps.scope} - org.apache.hadoop + ${hadoop.group} ${hadoop-client-runtime.artifact} ${hadoop.version} ${hadoop.deps.scope} - org.apache.hadoop + ${hadoop.group} ${hadoop-client-minicluster.artifact} ${yarn.version} test - org.apache.hadoop + ${hadoop.group} hadoop-client ${hadoop.version} ${hadoop.deps.scope} @@ -1210,7 +1224,7 @@ - org.apache.hadoop + ${hadoop.group} hadoop-minikdc ${hadoop.version} test @@ -1300,7 +1314,7 @@ ${hadoop.deps.scope} - org.apache.hadoop + ${hadoop.group} hadoop-yarn-api ${yarn.version} ${hadoop.deps.scope} @@ -1344,7 +1358,7 @@ - org.apache.hadoop + ${hadoop.group} hadoop-yarn-common ${yarn.version} ${hadoop.deps.scope} @@ -1384,7 +1398,7 @@ - org.apache.hadoop + ${hadoop.group} hadoop-yarn-server-tests ${yarn.version} tests @@ -1427,7 +1441,7 @@ * - org.apache.hadoop + ${hadoop.group} hadoop-yarn-server-resourcemanager @@ -1439,27 +1453,27 @@ fail, see also SPARK-33104. --> - org.apache.hadoop + ${hadoop.group} hadoop-yarn-server-resourcemanager ${yarn.version} test - org.apache.hadoop + ${hadoop.group} hadoop-yarn-server-web-proxy ${yarn.version} ${hadoop.deps.scope} - org.apache.hadoop + ${hadoop.group} hadoop-yarn-server-common - org.apache.hadoop + ${hadoop.group} hadoop-yarn-common - org.apache.hadoop + ${hadoop.group} hadoop-yarn-api @@ -1522,7 +1536,7 @@ - org.apache.hadoop + ${hadoop.group} hadoop-yarn-client ${yarn.version} ${hadoop.deps.scope} @@ -1738,11 +1752,11 @@ ant - org.apache.hadoop + ${hadoop.group} hadoop-common - org.apache.hadoop + ${hadoop.group} hadoop-auth @@ -1792,7 +1806,7 @@ - org.apache.hive + io.hops.hive hive-storage-api @@ -1848,11 +1862,6 @@ org.apache.avro avro-mapred - - - org.apache.calcite - calcite-core - org.apache.calcite calcite-avatica @@ -1951,7 +1960,7 @@ ${hive.group} - hive-jdbc + hops-jdbc ${hive.version} @@ -2055,6 +2064,10 @@ org.slf4j slf4j-log4j12 + + org.apache.logging.log4j + * + @@ -2149,7 +2162,19 @@ ${hive.group} hive-service-rpc - 3.1.2 + ${hive.version} + + + * + * + + + + + + ${hive.group} + hive-service + ${hive.version} * @@ -2157,6 +2182,7 @@ + net.sf.jpam jpam @@ -2184,7 +2210,7 @@ guava - org.apache.hadoop + ${hadoop.group} hadoop-yarn-server-resourcemanager @@ -2219,6 +2245,10 @@ org.codehaus.groovy groovy-all + + com.sun.jersey + * + @@ -2232,42 +2262,50 @@ - org.apache.hive + ${hive.group} hive-llap-common ${hive23.version} ${hive.deps.scope} - org.apache.hive + ${hive.group} hive-common - org.apache.hive + ${hive.group} hive-serde org.slf4j slf4j-api + + log4j + log4j + + + com.sun.jersey + * + - org.apache.hive + ${hive.group} hive-llap-client ${hive23.version} test - org.apache.hive + ${hive.group} hive-common - org.apache.hive + ${hive.group} hive-serde - org.apache.hive + ${hive.group} hive-llap-common @@ -2286,6 +2324,10 @@ org.slf4j slf4j-api + + log4j + log4j + @@ -2300,15 +2342,15 @@ jaxb-api - org.apache.hadoop + ${hadoop.group} hadoop-common - org.apache.hadoop + ${hadoop.group} hadoop-hdfs - org.apache.hive + ${hive.group} hive-storage-api @@ -2320,11 +2362,11 @@ ${orc.deps.scope} - org.apache.hadoop + ${hadoop.group} hadoop-common - org.apache.hadoop + ${hadoop.group} hadoop-mapreduce-client-core @@ -2332,7 +2374,7 @@ orc-core - org.apache.hive + ${hive.group} hive-storage-api @@ -2477,7 +2519,7 @@ 2.9.1 - org.apache.hive + ${hive.group} hive-storage-api ${hive.storage.version} ${hive.storage.scope} @@ -3549,4 +3591,12 @@ + + + + Hops + Hops Repo + https://archiva.hops.works/repository/Hops/ + + diff --git a/repl/pom.xml b/repl/pom.xml index 60eaff7e0ed28..2e512ad0fd2e3 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.2.1 + 3.2.1.0 ../pom.xml diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index d56d7ec9437fc..9544d74ed6c3a 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.2.1 + 3.2.1.0 ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index 3f2efccef73e9..da5fb7ea3b998 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.2.1 + 3.2.1.0 ../../../pom.xml diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml index 010b0493a94d8..91a665f1429c5 100644 --- a/resource-managers/mesos/pom.xml +++ b/resource-managers/mesos/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.2.1 + 3.2.1.0 ../../pom.xml diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml index 356b51d5d89f2..89beb4b30d48d 100644 --- a/resource-managers/yarn/pom.xml +++ b/resource-managers/yarn/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.2.1 + 3.2.1.0 ../../pom.xml @@ -37,23 +37,23 @@ hadoop-2.7 - org.apache.hadoop + ${hadoop.group} hadoop-yarn-api - org.apache.hadoop + ${hadoop.group} hadoop-yarn-common - org.apache.hadoop + ${hadoop.group} hadoop-yarn-server-web-proxy - org.apache.hadoop + ${hadoop.group} hadoop-yarn-client - org.apache.hadoop + ${hadoop.group} hadoop-yarn-server-tests tests test @@ -63,7 +63,7 @@ See the parent pom.xml for more details. --> - org.apache.hadoop + ${hadoop.group} hadoop-yarn-server-resourcemanager test @@ -76,13 +76,13 @@ - org.apache.hadoop + ${hadoop.group} ${hadoop-client-runtime.artifact} ${hadoop.version} ${hadoop.deps.scope} - org.apache.hadoop + ${hadoop.group} ${hadoop-client-minicluster.artifact} ${hadoop.version} test @@ -128,9 +128,9 @@ test - org.apache.hadoop + ${hadoop.group} ${hadoop-client-api.artifact} - ${hadoop.version} + ${hadoop.deps.scope} diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index 8f942665ef54d..d9c78cc1e1d4d 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.2.1 + 3.2.1.0 ../../pom.xml diff --git a/sql/core/pom.xml b/sql/core/pom.xml index e23a6c250f596..8dc3388e14c25 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.2.1 + 3.2.1.0 ../../pom.xml @@ -105,7 +105,7 @@ orc-mapreduce - org.apache.hive + ${hive.group} hive-storage-api diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index 4269842d22f36..6d07e22a1afbf 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.2.1 + 3.2.1.0 ../../pom.xml @@ -77,7 +77,7 @@ ${hive.group} - hive-jdbc + hops-jdbc ${hive.group} diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index 51ee1fc49c4ae..24d4a194f7de6 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.2.1 + 3.2.1.0 ../../pom.xml @@ -121,15 +121,30 @@ ${hive.shims.scope} - org.apache.hive + ${hive.group} hive-llap-common ${hive.llap.scope} - org.apache.hive + ${hive.group} hive-llap-client ${hive.llap.scope} + + ${hive.group} + hops-jdbc + ${hive.jdbc.scope} + + + ${hive.group} + hive-service-rpc + ${hive.service.scope} + + + ${hive.group} + hive-service + ${hive.service.scope} + org.apache.avro @@ -169,7 +184,7 @@ datanucleus-core - org.apache.hadoop + ${hadoop.group} ${hadoop-client-runtime.artifact} ${hadoop.deps.scope} diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index 4703f24022f6a..4e6183f4511ce 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -813,9 +813,10 @@ private[hive] class HiveClientImpl( // Since HIVE-18238(Hive 3.0.0), the Driver.close function's return type changed // and the CommandProcessorFactory.clean function removed. driver.getClass.getMethod("close").invoke(driver) - if (version != hive.v3_0 && version != hive.v3_1) { - CommandProcessorFactory.clean(conf) - } + // Fabio: Comment this to avoid compilation issue with Hive3 + // if (version != hive.v3_0 && version != hive.v3_1) { + // CommandProcessorFactory.clean(conf) + // } } // Hive query needs to start SessionState. @@ -976,12 +977,14 @@ private[hive] class HiveClientImpl( val t = table.getTableName logDebug(s"Deleting table $t") try { - client.getIndexes("default", t, 255).asScala.foreach { index => - shim.dropIndex(client, "default", t, index.getIndexName) - } - if (!table.isIndexTable) { - client.dropTable("default", t) - } + // Fabio: Index tables don't exists anymore in Hive3 + // client.getIndexes("default", t, 255).asScala.foreach { index => + // shim.dropIndex(client, "default", t, index.getIndexName) + // } + // if (!table.isIndexTable) { + // client.dropTable("default", t) + // } + client.dropTable("default", t) } catch { case _: NoSuchMethodError => // HIVE-18448 Hive 3.0 remove index APIs diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala index 9aa6a09fd57af..153029665af24 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala @@ -90,7 +90,8 @@ private[hive] object IsolatedClientLoader extends Logging { } def hiveVersion(version: String): HiveVersion = { - VersionUtils.majorMinorPatchVersion(version).flatMap { + // Fabio: Remove the last digit of the version string as it's the Hopsworks specific version + VersionUtils.majorMinorPatchVersion(version.substring(0, version.lastIndexOf("."))).flatMap { case (12, _, _) | (0, 12, _) => Some(hive.v12) case (13, _, _) | (0, 13, _) => Some(hive.v13) case (14, _, _) | (0, 14, _) => Some(hive.v14) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveFileFormat.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveFileFormat.scala index b6b3cac4130a0..58e7f31a9872a 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveFileFormat.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveFileFormat.scala @@ -85,7 +85,7 @@ class HiveFileFormat(fileSinkConf: FileSinkDesc) // Add table properties from storage handler to hadoopConf, so any custom storage // handler settings can be set to hadoopConf HiveTableUtil.configureJobPropertiesForStorageHandler(tableDesc, conf, false) - Utilities.copyTableJobPropertiesToConf(tableDesc, conf) + Utilities.copyTableJobPropertiesToConf(tableDesc, new JobConf(conf)) // Avoid referencing the outer object. val fileSinkConfSer = fileSinkConf diff --git a/streaming/pom.xml b/streaming/pom.xml index 895ccf40666ff..116fb8d28b6e5 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.2.1 + 3.2.1.0 ../pom.xml diff --git a/tools/pom.xml b/tools/pom.xml index 8dd92927df154..d698da8fab21b 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.2.1 + 3.2.1.0 ../pom.xml