Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-18652][PYTHON] Include the example data and third-party licenses in pyspark package. #16082

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions python/MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
global-exclude *.py[cod] __pycache__ .DS_Store
recursive-include deps/jars *.jar
graft deps/bin
recursive-include deps/data *.data *.txt
recursive-include deps/licenses *.txt
recursive-include deps/examples *.py
recursive-include lib *.zip
include README.md
20 changes: 19 additions & 1 deletion python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,14 @@

EXAMPLES_PATH = os.path.join(SPARK_HOME, "examples/src/main/python")
SCRIPTS_PATH = os.path.join(SPARK_HOME, "bin")
DATA_PATH = os.path.join(SPARK_HOME, "data")
LICENSES_PATH = os.path.join(SPARK_HOME, "licenses")

SCRIPTS_TARGET = os.path.join(TEMP_PATH, "bin")
JARS_TARGET = os.path.join(TEMP_PATH, "jars")
EXAMPLES_TARGET = os.path.join(TEMP_PATH, "examples")

DATA_TARGET = os.path.join(TEMP_PATH, "data")
LICENSES_TARGET = os.path.join(TEMP_PATH, "licenses")

# Check and see if we are under the spark path in which case we need to build the symlink farm.
# This is important because we only want to build the symlink farm while under Spark otherwise we
Expand Down Expand Up @@ -114,11 +118,15 @@ def _supports_symlinks():
os.symlink(JARS_PATH, JARS_TARGET)
os.symlink(SCRIPTS_PATH, SCRIPTS_TARGET)
os.symlink(EXAMPLES_PATH, EXAMPLES_TARGET)
os.symlink(DATA_PATH, DATA_TARGET)
os.symlink(LICENSES_PATH, LICENSES_TARGET)
else:
# For windows fall back to the slower copytree
copytree(JARS_PATH, JARS_TARGET)
copytree(SCRIPTS_PATH, SCRIPTS_TARGET)
copytree(EXAMPLES_PATH, EXAMPLES_TARGET)
copytree(DATA_PATH, DATA_TARGET)
copytree(LICENSES_PATH, LICENSES_TARGET)
else:
# If we are not inside of SPARK_HOME verify we have the required symlink farm
if not os.path.exists(JARS_TARGET):
Expand Down Expand Up @@ -161,18 +169,24 @@ def _supports_symlinks():
'pyspark.jars',
'pyspark.python.pyspark',
'pyspark.python.lib',
'pyspark.data',
'pyspark.licenses',
'pyspark.examples.src.main.python'],
include_package_data=True,
package_dir={
'pyspark.jars': 'deps/jars',
'pyspark.bin': 'deps/bin',
'pyspark.python.lib': 'lib',
'pyspark.data': 'deps/data',
'pyspark.licenses': 'deps/licenses',
'pyspark.examples.src.main.python': 'deps/examples',
},
package_data={
'pyspark.jars': ['*.jar'],
'pyspark.bin': ['*'],
'pyspark.python.lib': ['*.zip'],
'pyspark.data': ['*.txt', '*.data'],
'pyspark.licenses': ['*.txt'],
'pyspark.examples.src.main.python': ['*.py', '*/*.py']},
scripts=scripts,
license='http://www.apache.org/licenses/LICENSE-2.0',
Expand Down Expand Up @@ -202,8 +216,12 @@ def _supports_symlinks():
os.remove(os.path.join(TEMP_PATH, "jars"))
os.remove(os.path.join(TEMP_PATH, "bin"))
os.remove(os.path.join(TEMP_PATH, "examples"))
os.remove(os.path.join(TEMP_PATH, "data"))
os.remove(os.path.join(TEMP_PATH, "licenses"))
else:
rmtree(os.path.join(TEMP_PATH, "jars"))
rmtree(os.path.join(TEMP_PATH, "bin"))
rmtree(os.path.join(TEMP_PATH, "examples"))
rmtree(os.path.join(TEMP_PATH, "data"))
rmtree(os.path.join(TEMP_PATH, "licenses"))
os.rmdir(TEMP_PATH)