Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into SPARK-3278-weight…
Browse files Browse the repository at this point in the history
…edLabeledPoint
  • Loading branch information
zapletal-martin committed Jan 10, 2015
2 parents cab5a46 + 545dfcb commit 8cefd18
Show file tree
Hide file tree
Showing 255 changed files with 6,635 additions and 3,215 deletions.
7 changes: 5 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,19 @@
*.pyc
.idea/
.idea_modules/
sbt/*.jar
build/*.jar
.settings
.cache
cache
.generated-mima*
/build/
work/
out/
.DS_Store
third_party/libmesos.so
third_party/libmesos.dylib
build/apache-maven*
build/zinc*
build/scala*
conf/java-opts
conf/*.sh
conf/*.cmd
Expand Down
20 changes: 20 additions & 0 deletions assembly/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -354,5 +354,25 @@
</dependency>
</dependencies>
</profile>

<!-- Profiles that disable inclusion of certain dependencies. -->
<profile>
<id>hadoop-provided</id>
<properties>
<hadoop.deps.scope>provided</hadoop.deps.scope>
</properties>
</profile>
<profile>
<id>hive-provided</id>
<properties>
<hive.deps.scope>provided</hive.deps.scope>
</properties>
</profile>
<profile>
<id>parquet-provided</id>
<properties>
<parquet.deps.scope>provided</parquet.deps.scope>
</properties>
</profile>
</profiles>
</project>
15 changes: 0 additions & 15 deletions bagel/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,6 @@
<artifactId>spark-core_${scala.binary.version}</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-server</artifactId>
</dependency>
<dependency>
<groupId>org.scalatest</groupId>
<artifactId>scalatest_${scala.binary.version}</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.scalacheck</groupId>
<artifactId>scalacheck_${scala.binary.version}</artifactId>
Expand All @@ -58,11 +49,5 @@
<build>
<outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
<testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
<plugins>
<plugin>
<groupId>org.scalatest</groupId>
<artifactId>scalatest-maven-plugin</artifactId>
</plugin>
</plugins>
</build>
</project>
4 changes: 2 additions & 2 deletions bagel/src/test/resources/log4j.properties
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@
# limitations under the License.
#

# Set everything to be logged to the file bagel/target/unit-tests.log
# Set everything to be logged to the file target/unit-tests.log
log4j.rootCategory=INFO, file
log4j.appender.file=org.apache.log4j.FileAppender
log4j.appender.file.append=false
log4j.appender.file.append=true
log4j.appender.file.file=target/unit-tests.log
log4j.appender.file.layout=org.apache.log4j.PatternLayout
log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
Expand Down
7 changes: 7 additions & 0 deletions bin/compute-classpath.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,13 @@ if "x%YARN_CONF_DIR%"=="x" goto no_yarn_conf_dir
set CLASSPATH=%CLASSPATH%;%YARN_CONF_DIR%
:no_yarn_conf_dir

rem To allow for distributions to append needed libraries to the classpath (e.g. when
rem using the "hadoop-provided" profile to build Spark), check SPARK_DIST_CLASSPATH and
rem append it to tbe final classpath.
if not "x%$SPARK_DIST_CLASSPATH%"=="x" (
set CLASSPATH=%CLASSPATH%;%SPARK_DIST_CLASSPATH%
)

rem A bit of a hack to allow calling this script within run2.cmd without seeing output
if "%DONT_PRINT_CLASSPATH%"=="1" goto exit

Expand Down
7 changes: 7 additions & 0 deletions bin/compute-classpath.sh
Original file line number Diff line number Diff line change
Expand Up @@ -146,4 +146,11 @@ if [ -n "$YARN_CONF_DIR" ]; then
CLASSPATH="$CLASSPATH:$YARN_CONF_DIR"
fi

# To allow for distributions to append needed libraries to the classpath (e.g. when
# using the "hadoop-provided" profile to build Spark), check SPARK_DIST_CLASSPATH and
# append it to tbe final classpath.
if [ -n "$SPARK_DIST_CLASSPATH" ]; then
CLASSPATH="$CLASSPATH:$SPARK_DIST_CLASSPATH"
fi

echo "$CLASSPATH"
10 changes: 9 additions & 1 deletion bin/spark-submit
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,19 @@ while (($#)); do
export SPARK_SUBMIT_CLASSPATH=$2
elif [ "$1" = "--driver-java-options" ]; then
export SPARK_SUBMIT_OPTS=$2
elif [ "$1" = "--master" ]; then
export MASTER=$2
fi
shift
done

DEFAULT_PROPERTIES_FILE="$SPARK_HOME/conf/spark-defaults.conf"
if [ -z "$SPARK_CONF_DIR" ]; then
export SPARK_CONF_DIR="$SPARK_HOME/conf"
fi
DEFAULT_PROPERTIES_FILE="$SPARK_CONF_DIR/spark-defaults.conf"
if [ "$MASTER" == "yarn-cluster" ]; then
SPARK_SUBMIT_DEPLOY_MODE=cluster
fi
export SPARK_SUBMIT_DEPLOY_MODE=${SPARK_SUBMIT_DEPLOY_MODE:-"client"}
export SPARK_SUBMIT_PROPERTIES_FILE=${SPARK_SUBMIT_PROPERTIES_FILE:-"$DEFAULT_PROPERTIES_FILE"}

Expand Down
12 changes: 11 additions & 1 deletion bin/spark-submit2.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,11 @@ set ORIG_ARGS=%*

rem Reset the values of all variables used
set SPARK_SUBMIT_DEPLOY_MODE=client
set SPARK_SUBMIT_PROPERTIES_FILE=%SPARK_HOME%\conf\spark-defaults.conf

if not defined %SPARK_CONF_DIR% (
set SPARK_CONF_DIR=%SPARK_HOME%\conf
)
set SPARK_SUBMIT_PROPERTIES_FILE=%SPARK_CONF_DIR%\spark-defaults.conf
set SPARK_SUBMIT_DRIVER_MEMORY=
set SPARK_SUBMIT_LIBRARY_PATH=
set SPARK_SUBMIT_CLASSPATH=
Expand All @@ -45,11 +49,17 @@ if [%1] == [] goto continue
set SPARK_SUBMIT_CLASSPATH=%2
) else if [%1] == [--driver-java-options] (
set SPARK_SUBMIT_OPTS=%2
) else if [%1] == [--master] (
set MASTER=%2
)
shift
goto loop
:continue

if [%MASTER%] == [yarn-cluster] (
set SPARK_SUBMIT_DEPLOY_MODE=cluster
)

rem For client mode, the driver will be launched in the same JVM that launches
rem SparkSubmit, so we may need to read the properties file for any extra class
rem paths, library paths, java options and memory early on. Otherwise, it will
Expand Down
149 changes: 149 additions & 0 deletions build/mvn
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
#!/usr/bin/env bash

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# Determine the current working directory
_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
# Preserve the calling directory
_CALLING_DIR="$(pwd)"

# Installs any application tarball given a URL, the expected tarball name,
# and, optionally, a checkable binary path to determine if the binary has
# already been installed
## Arg1 - URL
## Arg2 - Tarball Name
## Arg3 - Checkable Binary
install_app() {
local remote_tarball="$1/$2"
local local_tarball="${_DIR}/$2"
local binary="${_DIR}/$3"

# setup `curl` and `wget` silent options if we're running on Jenkins
local curl_opts=""
local wget_opts=""
if [ -n "$AMPLAB_JENKINS" ]; then
curl_opts="-s"
wget_opts="--quiet"
else
curl_opts="--progress-bar"
wget_opts="--progress=bar:force"
fi

if [ -z "$3" -o ! -f "$binary" ]; then
# check if we already have the tarball
# check if we have curl installed
# download application
[ ! -f "${local_tarball}" ] && [ -n "`which curl 2>/dev/null`" ] && \
echo "exec: curl ${curl_opts} ${remote_tarball}" && \
curl ${curl_opts} "${remote_tarball}" > "${local_tarball}"
# if the file still doesn't exist, lets try `wget` and cross our fingers
[ ! -f "${local_tarball}" ] && [ -n "`which wget 2>/dev/null`" ] && \
echo "exec: wget ${wget_opts} ${remote_tarball}" && \
wget ${wget_opts} -O "${local_tarball}" "${remote_tarball}"
# if both were unsuccessful, exit
[ ! -f "${local_tarball}" ] && \
echo -n "ERROR: Cannot download $2 with cURL or wget; " && \
echo "please install manually and try again." && \
exit 2
cd "${_DIR}" && tar -xzf "$2"
rm -rf "$local_tarball"
fi
}

# Install maven under the build/ folder
install_mvn() {
install_app \
"http://apache.claz.org/maven/maven-3/3.2.3/binaries" \
"apache-maven-3.2.3-bin.tar.gz" \
"apache-maven-3.2.3/bin/mvn"
MVN_BIN="${_DIR}/apache-maven-3.2.3/bin/mvn"
}

# Install zinc under the build/ folder
install_zinc() {
local zinc_path="zinc-0.3.5.3/bin/zinc"
[ ! -f "${zinc_path}" ] && ZINC_INSTALL_FLAG=1
install_app \
"http://downloads.typesafe.com/zinc/0.3.5.3" \
"zinc-0.3.5.3.tgz" \
"${zinc_path}"
ZINC_BIN="${_DIR}/${zinc_path}"
}

# Determine the Scala version from the root pom.xml file, set the Scala URL,
# and, with that, download the specific version of Scala necessary under
# the build/ folder
install_scala() {
# determine the Scala version used in Spark
local scala_version=`grep "scala.version" "${_DIR}/../pom.xml" | \
head -1 | cut -f2 -d'>' | cut -f1 -d'<'`
local scala_bin="${_DIR}/scala-${scala_version}/bin/scala"

install_app \
"http://downloads.typesafe.com/scala/${scala_version}" \
"scala-${scala_version}.tgz" \
"scala-${scala_version}/bin/scala"

SCALA_COMPILER="$(cd "$(dirname ${scala_bin})/../lib" && pwd)/scala-compiler.jar"
SCALA_LIBRARY="$(cd "$(dirname ${scala_bin})/../lib" && pwd)/scala-library.jar"
}

# Determines if a given application is already installed. If not, will attempt
# to install
## Arg1 - application name
## Arg2 - Alternate path to local install under build/ dir
check_and_install_app() {
# create the local environment variable in uppercase
local app_bin="`echo $1 | awk '{print toupper(\$0)}'`_BIN"
# some black magic to set the generated app variable (i.e. MVN_BIN) into the
# environment
eval "${app_bin}=`which $1 2>/dev/null`"

if [ -z "`which $1 2>/dev/null`" ]; then
install_$1
fi
}

# Setup healthy defaults for the Zinc port if none were provided from
# the environment
ZINC_PORT=${ZINC_PORT:-"3030"}

# Check and install all applications necessary to build Spark
check_and_install_app "mvn"

# Install the proper version of Scala and Zinc for the build
install_zinc
install_scala

# Reset the current working directory
cd "${_CALLING_DIR}"

# Now that zinc is ensured to be installed, check its status and, if its
# not running or just installed, start it
if [ -n "${ZINC_INSTALL_FLAG}" -o -z "`${ZINC_BIN} -status`" ]; then
${ZINC_BIN} -shutdown
${ZINC_BIN} -start -port ${ZINC_PORT} \
-scala-compiler "${SCALA_COMPILER}" \
-scala-library "${SCALA_LIBRARY}" &>/dev/null
fi

# Set any `mvn` options if not already present
export MAVEN_OPTS=${MAVEN_OPTS:-"-Xmx2g -XX:MaxPermSize=512M -XX:ReservedCodeCacheSize=512m"}

# Last, call the `mvn` command as usual
${MVN_BIN} "$@"
Loading

0 comments on commit 8cefd18

Please sign in to comment.