diff --git a/assembly/pom.xml b/assembly/pom.xml index f1f8b0d3682e2..53059eaa34faf 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -92,6 +92,27 @@ true + + + org.apache.maven.plugins + maven-antrun-plugin + + + package + + run + + + + + + + + + + + + org.apache.maven.plugins @@ -196,6 +217,19 @@ maven-assembly-plugin 2.4 + dist package @@ -208,7 +242,7 @@ - + diff --git a/make-distribution.sh b/make-distribution.sh index c9a26d78239b2..738a9c4d69601 100755 --- a/make-distribution.sh +++ b/make-distribution.sh @@ -228,7 +228,6 @@ cp "$SPARK_HOME"/conf/*.template "$DISTDIR"/conf cp "$SPARK_HOME/README.md" "$DISTDIR" cp -r "$SPARK_HOME/bin" "$DISTDIR" cp -r "$SPARK_HOME/python" "$DISTDIR" -zip -r "$DISTDIR"/python/lib/pyspark.zip "$SPARK_HOME"/python/lib/pyspark cp -r "$SPARK_HOME/sbin" "$DISTDIR" cp -r "$SPARK_HOME/ec2" "$DISTDIR" diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 09b4976d10c26..3bd70dc0f6af1 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -361,12 +361,20 @@ object PySparkAssembly { // to be included in the assembly. We can't just add "python/" to the assembly's resource dir // list since that will copy unneeded / unwanted files. resourceGenerators in Compile <+= resourceManaged in Compile map { outDir: File => + val src = new File(BuildCommons.sparkHome, "python/pyspark") + + val zipFile = new File(BuildCommons.sparkHome , "python/lib/pyspark.zip") + IO.delete(zipFile) + def entries(f: File):List[File] = + f :: (if (f.isDirectory) IO.listFiles(f).toList.flatMap(entries(_)) else Nil) + IO.zip(entries(src).map( + d => (d, d.getAbsolutePath.substring(src.getParent.length +1))), + zipFile) + val dst = new File(outDir, "pyspark") if (!dst.isDirectory()) { require(dst.mkdirs()) } - - val src = new File(BuildCommons.sparkHome, "python/pyspark") copy(src, dst) } )