diff --git a/Makefile b/Makefile index ed39b18c23aba..ce9d65b4c8d83 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ dist: bin/dist.sh -docker: +docker: dist bin/docker.sh test: diff --git a/bin/dist.sh b/bin/dist.sh index bde98305d9939..c478293f8222f 100755 --- a/bin/dist.sh +++ b/bin/dist.sh @@ -3,7 +3,7 @@ # Builds a spark distribution. # # Assumes: Spark source directory exists at "../../spark". -# Output: ../../spark/spark-XYZ.tgz +# Output: build/spark/spark-XYZ.tgz DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" diff --git a/bin/docker.sh b/bin/docker.sh index 8a4fc705cefa4..f580a52185db7 100755 --- a/bin/docker.sh +++ b/bin/docker.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -# Builds and pushes a Spark docker image +# Builds and pushes a Spark docker image. # # ENV vars: # DOCKER_IMAGE - : @@ -11,19 +11,12 @@ set -x -e -o pipefail DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" SPARK_BUILD_DIR="${DIR}/.." -function fetch_spark() { - rm -rf build/dist - mkdir -p build/dist - curl -o "build/dist/${DIST_TGZ}" "${SPARK_DIST_URI}" - tar xvf "build/dist/${DIST_TGZ}" -C build/dist -} - function create_docker_context { - fetch_spark + tar xvf build/dist/spark-*.tgz -C build/dist rm -rf build/docker mkdir -p build/docker/dist - cp -r "build/dist/${DIST}/." build/docker/dist + cp -r build/dist/spark-*/. build/docker/dist cp -r conf/* build/docker/dist/conf cp -r docker/* build/docker } @@ -40,13 +33,6 @@ function push_docker { [[ -n "${DOCKER_IMAGE}" ]] || (echo "DOCKER_IMAGE is a required env var." 1>&2; exit 1) -if [ -z "${SPARK_DIST_URI}" ]; then - SPARK_DIST_URI=$(jq -r ".default_spark_dist.uri" manifest.json) -fi - -DIST_TGZ=$(basename "${SPARK_DIST_URI}") -DIST="${DIST_TGZ%.*}" - pushd "${SPARK_BUILD_DIR}" create_docker_context build_docker diff --git a/bin/jenkins.sh b/bin/jenkins.sh index d9b2f43059a63..a05bd7d91fbd1 100644 --- a/bin/jenkins.sh +++ b/bin/jenkins.sh @@ -6,6 +6,7 @@ set -o pipefail DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" SPARK_DIR="${DIR}/../../spark" SPARK_BUILD_DIR="${DIR}/../../spark-build" +DIST_DIR="${SPARK_BUILD_DIR}/build/dist" function default_hadoop_version { jq -r ".default_spark_dist.hadoop_version" "${SPARK_BUILD_DIR}/manifest.json" @@ -15,21 +16,25 @@ function default_spark_dist { jq -r ".default_spark_dist.uri" "${SPARK_BUILD_DIR}/manifest.json" } +# Writes a Spark distribution to ${SPARK_BUILD_DIR}/build/dist/spark-*.tgz function make_distribution { local HADOOP_VERSION=${HADOOP_VERSION:-$(default_hadoop_version)} - pushd "${SPARK_DIR}" - rm -rf spark-*.tgz + rm -rf "${DIST_DIR}" + mkdir -p "${DIST_DIR}" - if [[ -n "${SPARK_DIST_URI}" ]]; then - wget "${SPARK_DIST_URI}" - elif [[ -n "${DEV}" ]]; then + if [[ "${DIST}" == "dev" ]]; then make_dev_distribution - else + elif [[ "${DIST}" == "prod" ]]; then make_prod_distribution + else + make_manifest_distribution fi +} - popd +function make_manifest_distribution { + SPARK_DIST_URI=$(default_spark_dist) + (cd "${DIST_DIR}" && wget "${SPARK_DIST_URI}") } # Adapted from spark/dev/make-distribution.sh. @@ -39,6 +44,7 @@ function make_distribution { function make_dev_distribution { pushd "${SPARK_DIR}" rm -rf spark-*.tgz + ./build/sbt -Pmesos "-Phadoop-${HADOOP_VERSION}" -Phive -Phive-thriftserver package # jars @@ -68,11 +74,15 @@ function make_dev_distribution { cp -r "${SPARK_DIR}/python" /tmp/spark-SNAPSHOT (cd /tmp && tar czf spark-SNAPSHOT.tgz spark-SNAPSHOT) - cp /tmp/spark-SNAPSHOT.tgz "${SPARK_DIR}" + mkdir -p "${DIST_DIR}" + cp /tmp/spark-SNAPSHOT.tgz "${DIST_DIR}" popd } function make_prod_distribution { + pushd "${SPARK_DIR}" + rm -rf spark-*.tgz + if [ -f make-distribution.sh ]; then # Spark <2.0 ./make-distribution.sh --tgz "-Phadoop-${HADOOP_VERSION}" -Phive -Phive-thriftserver -DskipTests @@ -85,6 +95,11 @@ function make_prod_distribution { fi ./dev/make-distribution.sh --tgz "${MESOS_PROFILE}" "-Phadoop-${HADOOP_VERSION}" -Psparkr -Phive -Phive-thriftserver -DskipTests fi + + mkdir -p "${DIST_DIR}" + cp spark-*.tgz "${DIST_DIR}" + + popd } # rename spark/spark-*.tgz to spark/spark-.tgz @@ -121,7 +136,7 @@ function set_hadoop_versions { } function build_and_test() { - make dist + DIST=prod make dist SPARK_DIST=$(cd ${SPARK_DIR} && ls spark-*.tgz) S3_URL="s3://${S3_BUCKET}/${S3_PREFIX}/spark/${GIT_COMMIT}/" upload_to_s3 diff --git a/dispatcher/bin/universe.sh b/dispatcher/bin/universe.sh index c8f235ff1d91e..2c1cd589c852b 100755 --- a/dispatcher/bin/universe.sh +++ b/dispatcher/bin/universe.sh @@ -1,17 +1,16 @@ #!/usr/bin/env bash -# Builds a universe for this spark package +# Builds a universe for this spark package, and uploads it to S3. # # Manifest config: # cli_version - version label to use for CLI package # spark_uri - where fetch spark distribution from (or SPARK_DIST_URI if provided) # # ENV vars: -# DEV (optional) - if "true", spark will be built from source rather than -# using the distribution specified in manifest.json. +# DIST (optional) - if "dev", spark will be built from source rather than +# using the distribution specified in manifest.json. # DOCKER_IMAGE (optional) - ":", falls back to mesosphere/spark-dev:COMMIT) # COMMONS_TOOLS_DIR (optional) - path to dcos-commons/tools/, or empty to fetch latest release tgz -# SPARK_DIST_URI (optional) - URI of spark distribution to use. # ghprbActualCommit / GIT_COMMIT (optional) - COMMIT value to use for DOCKER_IMAGE, if DOCKER_IMAGE isn't specified set -e -x -o pipefail @@ -21,20 +20,8 @@ DISPATCHER_DIR="${DIR}/.." SPARK_BUILD_DIR="${DIR}/../.." SPARK_DIR="${DIR}/../../../spark" -# set CLI_VERSION, SPARK_DIST_URI, and DOCKER_IMAGE: +# set CLI_VERSION, DOCKER_IMAGE: configure_env() { - if [ -z "${SPARK_DIST_URI}" ]; then - if [[ "${DEV}" = "true" ]]; then - (cd "${SPARK_BUILD_DIR}" && make dist) - SPARK_DIST_URI="file://${SPARK_DIR}/spark-SNAPSHOT.tgz" - else - SPARK_DIST_URI=$(default_spark_dist) - SPARK_DIST_URI="${SPARK_DIST_URI%\"}" - SPARK_DIST_URI="${SPARK_DIST_URI#\"}" - fi - echo "Using Spark dist URI: ${SPARK_DIST_URI}" - fi - CLI_VERSION=$(jq ".cli_version" "${SPARK_BUILD_DIR}/manifest.json") CLI_VERSION="${CLI_VERSION%\"}" CLI_VERSION="${CLI_VERSION#\"}" @@ -73,22 +60,13 @@ make_cli() { } make_docker() { - echo "###" - echo "# Using docker image: $DOCKER_IMAGE" - echo "###" - - (cd "${SPARK_BUILD_DIR}" && - DOCKER_IMAGE=${DOCKER_IMAGE} \ - SPARK_DIST_URI="${SPARK_DIST_URI}" \ - make docker) + (cd "${SPARK_BUILD_DIR}" && DOCKER_IMAGE=${DOCKER_IMAGE} make docker) } upload_cli_and_stub_universe() { # Build/upload package using custom template parameters: TEMPLATE_X_Y_Z => {{x-y-z}} - # ARTIFACT_DIR="https://${S3_BUCKET}.s3.amazonaws.com/${S3_PREFIX}" \ - # S3_DIR_PATH=${S3_PREFIX:-} \ TEMPLATE_CLI_VERSION=${CLI_VERSION} \ - TEMPLATE_SPARK_DIST_URI=${SPARK_DIST_URI} \ + TEMPLATE_SPARK_DIST_URI=$(default_spark_dist) \ TEMPLATE_DOCKER_IMAGE=${DOCKER_IMAGE} \ ${COMMONS_TOOLS_DIR}/ci_upload.py \ spark \