Skip to content

Commit

Permalink
[SPARK-18652] Include the data in pyspark package.
Browse files Browse the repository at this point in the history
  • Loading branch information
lins05 committed Nov 30, 2016
1 parent c24076d commit 43019db
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 0 deletions.
1 change: 1 addition & 0 deletions python/MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
global-exclude *.py[cod] __pycache__ .DS_Store
recursive-include deps/jars *.jar
graft deps/bin
recursive-include deps/data *
recursive-include deps/examples *.py
recursive-include lib *.zip
include README.md
9 changes: 9 additions & 0 deletions python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,11 @@

EXAMPLES_PATH = os.path.join(SPARK_HOME, "examples/src/main/python")
SCRIPTS_PATH = os.path.join(SPARK_HOME, "bin")
DATA_PATH = os.path.join(SPARK_HOME, "data")
SCRIPTS_TARGET = os.path.join(TEMP_PATH, "bin")
JARS_TARGET = os.path.join(TEMP_PATH, "jars")
EXAMPLES_TARGET = os.path.join(TEMP_PATH, "examples")
DATA_TARGET = os.path.join(TEMP_PATH, "data")


# Check and see if we are under the spark path in which case we need to build the symlink farm.
Expand Down Expand Up @@ -114,11 +116,13 @@ def _supports_symlinks():
os.symlink(JARS_PATH, JARS_TARGET)
os.symlink(SCRIPTS_PATH, SCRIPTS_TARGET)
os.symlink(EXAMPLES_PATH, EXAMPLES_TARGET)
os.symlink(DATA_PATH, DATA_TARGET)
else:
# For windows fall back to the slower copytree
copytree(JARS_PATH, JARS_TARGET)
copytree(SCRIPTS_PATH, SCRIPTS_TARGET)
copytree(EXAMPLES_PATH, EXAMPLES_TARGET)
copytree(DATA_PATH, DATA_TARGET)
else:
# If we are not inside of SPARK_HOME verify we have the required symlink farm
if not os.path.exists(JARS_TARGET):
Expand Down Expand Up @@ -161,18 +165,21 @@ def _supports_symlinks():
'pyspark.jars',
'pyspark.python.pyspark',
'pyspark.python.lib',
'pyspark.data',
'pyspark.examples.src.main.python'],
include_package_data=True,
package_dir={
'pyspark.jars': 'deps/jars',
'pyspark.bin': 'deps/bin',
'pyspark.python.lib': 'lib',
'pyspark.data': 'deps/data',
'pyspark.examples.src.main.python': 'deps/examples',
},
package_data={
'pyspark.jars': ['*.jar'],
'pyspark.bin': ['*'],
'pyspark.python.lib': ['*.zip'],
'pyspark.data': ['*'],
'pyspark.examples.src.main.python': ['*.py', '*/*.py']},
scripts=scripts,
license='http://www.apache.org/licenses/LICENSE-2.0',
Expand Down Expand Up @@ -202,8 +209,10 @@ def _supports_symlinks():
os.remove(os.path.join(TEMP_PATH, "jars"))
os.remove(os.path.join(TEMP_PATH, "bin"))
os.remove(os.path.join(TEMP_PATH, "examples"))
os.remove(os.path.join(TEMP_PATH, "data"))
else:
rmtree(os.path.join(TEMP_PATH, "jars"))
rmtree(os.path.join(TEMP_PATH, "bin"))
rmtree(os.path.join(TEMP_PATH, "examples"))
rmtree(os.path.join(TEMP_PATH, "data"))
os.rmdir(TEMP_PATH)

0 comments on commit 43019db

Please sign in to comment.