Skip to content

Commit

Permalink
[SPARK-444] Add dist dependency to 'make docker' (apache#134)
Browse files Browse the repository at this point in the history
* Add dist dependency to 'make docker'

* update comment

* copy the correct spark dist directory

* set TEMPLATE_SPARK_DIST_URI
  • Loading branch information
mgummelt authored Apr 21, 2017
1 parent cfab475 commit 58faa6a
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 56 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
dist:
bin/dist.sh

docker:
docker: dist
bin/docker.sh

test:
Expand Down
2 changes: 1 addition & 1 deletion bin/dist.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Builds a spark distribution.
#
# Assumes: Spark source directory exists at "../../spark".
# Output: ../../spark/spark-XYZ.tgz
# Output: build/spark/spark-XYZ.tgz

DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"

Expand Down
20 changes: 3 additions & 17 deletions bin/docker.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env bash

# Builds and pushes a Spark docker image
# Builds and pushes a Spark docker image.
#
# ENV vars:
# DOCKER_IMAGE - <image>:<version>
Expand All @@ -11,19 +11,12 @@ set -x -e -o pipefail
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
SPARK_BUILD_DIR="${DIR}/.."

function fetch_spark() {
rm -rf build/dist
mkdir -p build/dist
curl -o "build/dist/${DIST_TGZ}" "${SPARK_DIST_URI}"
tar xvf "build/dist/${DIST_TGZ}" -C build/dist
}

function create_docker_context {
fetch_spark
tar xvf build/dist/spark-*.tgz -C build/dist

rm -rf build/docker
mkdir -p build/docker/dist
cp -r "build/dist/${DIST}/." build/docker/dist
cp -r build/dist/spark-*/. build/docker/dist
cp -r conf/* build/docker/dist/conf
cp -r docker/* build/docker
}
Expand All @@ -40,13 +33,6 @@ function push_docker {

[[ -n "${DOCKER_IMAGE}" ]] || (echo "DOCKER_IMAGE is a required env var." 1>&2; exit 1)

if [ -z "${SPARK_DIST_URI}" ]; then
SPARK_DIST_URI=$(jq -r ".default_spark_dist.uri" manifest.json)
fi

DIST_TGZ=$(basename "${SPARK_DIST_URI}")
DIST="${DIST_TGZ%.*}"

pushd "${SPARK_BUILD_DIR}"
create_docker_context
build_docker
Expand Down
33 changes: 24 additions & 9 deletions bin/jenkins.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ set -o pipefail
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
SPARK_DIR="${DIR}/../../spark"
SPARK_BUILD_DIR="${DIR}/../../spark-build"
DIST_DIR="${SPARK_BUILD_DIR}/build/dist"

function default_hadoop_version {
jq -r ".default_spark_dist.hadoop_version" "${SPARK_BUILD_DIR}/manifest.json"
Expand All @@ -15,21 +16,25 @@ function default_spark_dist {
jq -r ".default_spark_dist.uri" "${SPARK_BUILD_DIR}/manifest.json"
}

# Writes a Spark distribution to ${SPARK_BUILD_DIR}/build/dist/spark-*.tgz
function make_distribution {
local HADOOP_VERSION=${HADOOP_VERSION:-$(default_hadoop_version)}
pushd "${SPARK_DIR}"

rm -rf spark-*.tgz
rm -rf "${DIST_DIR}"
mkdir -p "${DIST_DIR}"

if [[ -n "${SPARK_DIST_URI}" ]]; then
wget "${SPARK_DIST_URI}"
elif [[ -n "${DEV}" ]]; then
if [[ "${DIST}" == "dev" ]]; then
make_dev_distribution
else
elif [[ "${DIST}" == "prod" ]]; then
make_prod_distribution
else
make_manifest_distribution
fi
}

popd
function make_manifest_distribution {
SPARK_DIST_URI=$(default_spark_dist)
(cd "${DIST_DIR}" && wget "${SPARK_DIST_URI}")
}

# Adapted from spark/dev/make-distribution.sh.
Expand All @@ -39,6 +44,7 @@ function make_distribution {
function make_dev_distribution {
pushd "${SPARK_DIR}"
rm -rf spark-*.tgz

./build/sbt -Pmesos "-Phadoop-${HADOOP_VERSION}" -Phive -Phive-thriftserver package

# jars
Expand Down Expand Up @@ -68,11 +74,15 @@ function make_dev_distribution {
cp -r "${SPARK_DIR}/python" /tmp/spark-SNAPSHOT

(cd /tmp && tar czf spark-SNAPSHOT.tgz spark-SNAPSHOT)
cp /tmp/spark-SNAPSHOT.tgz "${SPARK_DIR}"
mkdir -p "${DIST_DIR}"
cp /tmp/spark-SNAPSHOT.tgz "${DIST_DIR}"
popd
}

function make_prod_distribution {
pushd "${SPARK_DIR}"
rm -rf spark-*.tgz

if [ -f make-distribution.sh ]; then
# Spark <2.0
./make-distribution.sh --tgz "-Phadoop-${HADOOP_VERSION}" -Phive -Phive-thriftserver -DskipTests
Expand All @@ -85,6 +95,11 @@ function make_prod_distribution {
fi
./dev/make-distribution.sh --tgz "${MESOS_PROFILE}" "-Phadoop-${HADOOP_VERSION}" -Psparkr -Phive -Phive-thriftserver -DskipTests
fi

mkdir -p "${DIST_DIR}"
cp spark-*.tgz "${DIST_DIR}"

popd
}

# rename spark/spark-*.tgz to spark/spark-<TAG>.tgz
Expand Down Expand Up @@ -121,7 +136,7 @@ function set_hadoop_versions {
}

function build_and_test() {
make dist
DIST=prod make dist
SPARK_DIST=$(cd ${SPARK_DIR} && ls spark-*.tgz)
S3_URL="s3://${S3_BUCKET}/${S3_PREFIX}/spark/${GIT_COMMIT}/" upload_to_s3

Expand Down
34 changes: 6 additions & 28 deletions dispatcher/bin/universe.sh
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
#!/usr/bin/env bash

# Builds a universe for this spark package
# Builds a universe for this spark package, and uploads it to S3.
#
# Manifest config:
# cli_version - version label to use for CLI package
# spark_uri - where fetch spark distribution from (or SPARK_DIST_URI if provided)
#
# ENV vars:
# DEV (optional) - if "true", spark will be built from source rather than
# using the distribution specified in manifest.json.
# DIST (optional) - if "dev", spark will be built from source rather than
# using the distribution specified in manifest.json.
# DOCKER_IMAGE (optional) - "<image>:<version>", falls back to mesosphere/spark-dev:COMMIT)
# COMMONS_TOOLS_DIR (optional) - path to dcos-commons/tools/, or empty to fetch latest release tgz
# SPARK_DIST_URI (optional) - URI of spark distribution to use.
# ghprbActualCommit / GIT_COMMIT (optional) - COMMIT value to use for DOCKER_IMAGE, if DOCKER_IMAGE isn't specified

set -e -x -o pipefail
Expand All @@ -21,20 +20,8 @@ DISPATCHER_DIR="${DIR}/.."
SPARK_BUILD_DIR="${DIR}/../.."
SPARK_DIR="${DIR}/../../../spark"

# set CLI_VERSION, SPARK_DIST_URI, and DOCKER_IMAGE:
# set CLI_VERSION, DOCKER_IMAGE:
configure_env() {
if [ -z "${SPARK_DIST_URI}" ]; then
if [[ "${DEV}" = "true" ]]; then
(cd "${SPARK_BUILD_DIR}" && make dist)
SPARK_DIST_URI="file://${SPARK_DIR}/spark-SNAPSHOT.tgz"
else
SPARK_DIST_URI=$(default_spark_dist)
SPARK_DIST_URI="${SPARK_DIST_URI%\"}"
SPARK_DIST_URI="${SPARK_DIST_URI#\"}"
fi
echo "Using Spark dist URI: ${SPARK_DIST_URI}"
fi

CLI_VERSION=$(jq ".cli_version" "${SPARK_BUILD_DIR}/manifest.json")
CLI_VERSION="${CLI_VERSION%\"}"
CLI_VERSION="${CLI_VERSION#\"}"
Expand Down Expand Up @@ -73,22 +60,13 @@ make_cli() {
}

make_docker() {
echo "###"
echo "# Using docker image: $DOCKER_IMAGE"
echo "###"

(cd "${SPARK_BUILD_DIR}" &&
DOCKER_IMAGE=${DOCKER_IMAGE} \
SPARK_DIST_URI="${SPARK_DIST_URI}" \
make docker)
(cd "${SPARK_BUILD_DIR}" && DOCKER_IMAGE=${DOCKER_IMAGE} make docker)
}

upload_cli_and_stub_universe() {
# Build/upload package using custom template parameters: TEMPLATE_X_Y_Z => {{x-y-z}}
# ARTIFACT_DIR="https://${S3_BUCKET}.s3.amazonaws.com/${S3_PREFIX}" \
# S3_DIR_PATH=${S3_PREFIX:-} \
TEMPLATE_CLI_VERSION=${CLI_VERSION} \
TEMPLATE_SPARK_DIST_URI=${SPARK_DIST_URI} \
TEMPLATE_SPARK_DIST_URI=$(default_spark_dist) \
TEMPLATE_DOCKER_IMAGE=${DOCKER_IMAGE} \
${COMMONS_TOOLS_DIR}/ci_upload.py \
spark \
Expand Down

0 comments on commit 58faa6a

Please sign in to comment.