diff --git a/python/MANIFEST.in b/python/MANIFEST.in index bbcce1baa439d..40f1fb2f1ee7e 100644 --- a/python/MANIFEST.in +++ b/python/MANIFEST.in @@ -17,6 +17,8 @@ global-exclude *.py[cod] __pycache__ .DS_Store recursive-include deps/jars *.jar graft deps/bin +recursive-include deps/data *.data *.txt +recursive-include deps/licenses *.txt recursive-include deps/examples *.py recursive-include lib *.zip include README.md diff --git a/python/setup.py b/python/setup.py index 625aea04073f5..bc2eb4ce9dbd0 100644 --- a/python/setup.py +++ b/python/setup.py @@ -69,10 +69,14 @@ EXAMPLES_PATH = os.path.join(SPARK_HOME, "examples/src/main/python") SCRIPTS_PATH = os.path.join(SPARK_HOME, "bin") +DATA_PATH = os.path.join(SPARK_HOME, "data") +LICENSES_PATH = os.path.join(SPARK_HOME, "licenses") + SCRIPTS_TARGET = os.path.join(TEMP_PATH, "bin") JARS_TARGET = os.path.join(TEMP_PATH, "jars") EXAMPLES_TARGET = os.path.join(TEMP_PATH, "examples") - +DATA_TARGET = os.path.join(TEMP_PATH, "data") +LICENSES_TARGET = os.path.join(TEMP_PATH, "licenses") # Check and see if we are under the spark path in which case we need to build the symlink farm. # This is important because we only want to build the symlink farm while under Spark otherwise we @@ -114,11 +118,15 @@ def _supports_symlinks(): os.symlink(JARS_PATH, JARS_TARGET) os.symlink(SCRIPTS_PATH, SCRIPTS_TARGET) os.symlink(EXAMPLES_PATH, EXAMPLES_TARGET) + os.symlink(DATA_PATH, DATA_TARGET) + os.symlink(LICENSES_PATH, LICENSES_TARGET) else: # For windows fall back to the slower copytree copytree(JARS_PATH, JARS_TARGET) copytree(SCRIPTS_PATH, SCRIPTS_TARGET) copytree(EXAMPLES_PATH, EXAMPLES_TARGET) + copytree(DATA_PATH, DATA_TARGET) + copytree(LICENSES_PATH, LICENSES_TARGET) else: # If we are not inside of SPARK_HOME verify we have the required symlink farm if not os.path.exists(JARS_TARGET): @@ -161,18 +169,24 @@ def _supports_symlinks(): 'pyspark.jars', 'pyspark.python.pyspark', 'pyspark.python.lib', + 'pyspark.data', + 'pyspark.licenses', 'pyspark.examples.src.main.python'], include_package_data=True, package_dir={ 'pyspark.jars': 'deps/jars', 'pyspark.bin': 'deps/bin', 'pyspark.python.lib': 'lib', + 'pyspark.data': 'deps/data', + 'pyspark.licenses': 'deps/licenses', 'pyspark.examples.src.main.python': 'deps/examples', }, package_data={ 'pyspark.jars': ['*.jar'], 'pyspark.bin': ['*'], 'pyspark.python.lib': ['*.zip'], + 'pyspark.data': ['*.txt', '*.data'], + 'pyspark.licenses': ['*.txt'], 'pyspark.examples.src.main.python': ['*.py', '*/*.py']}, scripts=scripts, license='http://www.apache.org/licenses/LICENSE-2.0', @@ -202,8 +216,12 @@ def _supports_symlinks(): os.remove(os.path.join(TEMP_PATH, "jars")) os.remove(os.path.join(TEMP_PATH, "bin")) os.remove(os.path.join(TEMP_PATH, "examples")) + os.remove(os.path.join(TEMP_PATH, "data")) + os.remove(os.path.join(TEMP_PATH, "licenses")) else: rmtree(os.path.join(TEMP_PATH, "jars")) rmtree(os.path.join(TEMP_PATH, "bin")) rmtree(os.path.join(TEMP_PATH, "examples")) + rmtree(os.path.join(TEMP_PATH, "data")) + rmtree(os.path.join(TEMP_PATH, "licenses")) os.rmdir(TEMP_PATH)