apache · xinrong-meng · Apr 27, 2021 · Apr 27, 2021 · Apr 29, 2021 · Apr 29, 2021
diff --git a/dev/requirements.txt b/dev/requirements.txt
@@ -6,3 +6,13 @@ pydata_sphinx_theme
 ipython
 nbsphinx
 numpydoc
+
+# dependencies in pandas-on-spark.
+pandas>=0.23.2
+pyarrow>=0.10
+numpy>=1.14,<1.20.0
+
+# Optional dependencies in pandas-on-spark.
+mlflow>=1.0
+plotly>=4.8
+matplotlib>=3.0.0,<3.3.0
diff --git a/python/setup.py b/python/setup.py
@@ -220,6 +220,13 @@ def run(self):
                   'pyspark.bin',
                   'pyspark.sbin',
                   'pyspark.jars',
+                  'pyspark.pandas',
+                  'pyspark.pandas.indexes',
+                  'pyspark.pandas.missing',
+                  'pyspark.pandas.plot',
+                  'pyspark.pandas.spark',
+                  'pyspark.pandas.typedef',
+                  'pyspark.pandas.usage_logging',
                   'pyspark.python.pyspark',
                   'pyspark.python.lib',
                   'pyspark.data',
@@ -250,14 +257,22 @@ def run(self):
         license='http://www.apache.org/licenses/LICENSE-2.0',
         # Don't forget to update python/docs/source/getting_started/install.rst
         # if you're updating the versions or dependencies.
-        install_requires=['py4j==0.10.9.2'],
+        install_requires=[
+            'py4j==0.10.9.2',
+            'pandas>=0.23.2',
+            'pyarrow>=0.10',
+            'numpy>=1.14,<1.20.0',
+        ],
         extras_require={
             'ml': ['numpy>=1.7'],
             'mllib': ['numpy>=1.7'],
             'sql': [
                 'pandas>=%s' % _minimum_pandas_version,
                 'pyarrow>=%s' % _minimum_pyarrow_version,
-            ]
+            ],
+            'pandas.mlflow': ['mlflow>=1.0'],
+            'pandas.plotly': ['plotly>=4.8'],
+            'pandas.matplotlib': ['matplotlib>=3.0.0,<3.3.0'],
         },
         python_requires='>=3.6',
         classifiers=[