Skip to content

Commit

Permalink
[SPARK-4281][Build] Package Yarn shuffle service into its own jar
Browse files Browse the repository at this point in the history
This is another addendum to #3082, which added the Yarn shuffle service to run inside the NM. This PR makes the feature much more usable by packaging enough dependencies into the jar to run the service inside an NM. After these changes, the user can run `./make-distribution.sh` and find a `spark-network-yarn*.jar` in their `lib` directory. The equivalent change is done in SBT by making the `network-yarn` module an assembly project.

Author: Andrew Or <[email protected]>

Closes #3147 from andrewor14/yarn-shuffle-build and squashes the following commits:

bda58d0 [Andrew Or] Fix line too long
81e9705 [Andrew Or] Merge branch 'master' of github.com:apache/spark into yarn-shuffle-build
fb7f398 [Andrew Or] Rename jar to spark-{VERSION}-yarn-shuffle.jar
65db822 [Andrew Or] Actually mark slf4j as provided
abcefd1 [Andrew Or] Do the same for SBT
c653028 [Andrew Or] Package network-yarn and its dependencies
  • Loading branch information
Andrew Or committed Nov 12, 2014
1 parent 6e3c5a2 commit aa43a8d
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 10 deletions.
1 change: 1 addition & 0 deletions make-distribution.sh
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ echo "Spark $VERSION$GITREVSTRING built for Hadoop $SPARK_HADOOP_VERSION" > "$DI
# Copy jars
cp "$FWDIR"/assembly/target/scala*/*assembly*hadoop*.jar "$DISTDIR/lib/"
cp "$FWDIR"/examples/target/scala*/spark-examples*.jar "$DISTDIR/lib/"
cp "$FWDIR"/network/yarn/target/scala*/spark-*-yarn-shuffle.jar "$DISTDIR/lib/"

# Copy example sources (needed for python and SQL)
mkdir -p "$DISTDIR/examples/src/main"
Expand Down
5 changes: 3 additions & 2 deletions network/common/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,13 @@
<groupId>io.netty</groupId>
<artifactId>netty-all</artifactId>
</dependency>

<!-- Provided dependencies -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<scope>provided</scope>
</dependency>

<!-- Provided dependencies -->
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
Expand Down
5 changes: 3 additions & 2 deletions network/shuffle/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,13 @@
<artifactId>spark-network-common_${scala.binary.version}</artifactId>
<version>${project.version}</version>
</dependency>

<!-- Provided dependencies -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<scope>provided</scope>
</dependency>

<!-- Provided dependencies -->
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
Expand Down
33 changes: 33 additions & 0 deletions network/yarn/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -54,5 +54,38 @@
<build>
<outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
<testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<configuration>
<shadedArtifactAttached>false</shadedArtifactAttached>
<outputFile>${project.build.directory}/scala-${scala.binary.version}/spark-${project.version}-yarn-shuffle.jar</outputFile>
<artifactSet>
<includes>
<include>*:*</include>
</includes>
</artifactSet>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
</configuration>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
19 changes: 13 additions & 6 deletions project/SparkBuild.scala
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,12 @@ object BuildCommons {
"streaming-flume", "streaming-kafka", "streaming-mqtt", "streaming-twitter",
"streaming-zeromq").map(ProjectRef(buildLocation, _))

val optionallyEnabledProjects@Seq(yarn, yarnStable, yarnAlpha, networkYarn, java8Tests,
sparkGangliaLgpl, sparkKinesisAsl) = Seq("yarn", "yarn-stable", "yarn-alpha", "network-yarn",
val optionallyEnabledProjects@Seq(yarn, yarnStable, yarnAlpha, java8Tests,
sparkGangliaLgpl, sparkKinesisAsl) = Seq("yarn", "yarn-stable", "yarn-alpha",
"java8-tests", "ganglia-lgpl", "kinesis-asl").map(ProjectRef(buildLocation, _))

val assemblyProjects@Seq(assembly, examples) = Seq("assembly", "examples")
.map(ProjectRef(buildLocation, _))
val assemblyProjects@Seq(assembly, examples, networkYarn) =
Seq("assembly", "examples", "network-yarn").map(ProjectRef(buildLocation, _))

val tools = ProjectRef(buildLocation, "tools")
// Root project.
Expand Down Expand Up @@ -289,8 +289,15 @@ object Assembly {

lazy val settings = assemblySettings ++ Seq(
test in assembly := {},
jarName in assembly <<= (version, moduleName) map { (v, mName) => mName + "-"+v + "-hadoop" +
Option(System.getProperty("hadoop.version")).getOrElse("1.0.4") + ".jar" },
jarName in assembly <<= (version, moduleName) map { (v, mName) =>
if (mName.contains("network-yarn")) {
// This must match the same name used in maven (see network/yarn/pom.xml)
"spark-" + v + "-yarn-shuffle.jar"
} else {
mName + "-" + v + "-hadoop" +
Option(System.getProperty("hadoop.version")).getOrElse("1.0.4") + ".jar"
}
},
mergeStrategy in assembly := {
case PathList("org", "datanucleus", xs @ _*) => MergeStrategy.discard
case m if m.toLowerCase.endsWith("manifest.mf") => MergeStrategy.discard
Expand Down

0 comments on commit aa43a8d

Please sign in to comment.