diff --git a/xgboost_ray/data_sources/modin.py b/xgboost_ray/data_sources/modin.py index 1f2f1f57..78e229c7 100644 --- a/xgboost_ray/data_sources/modin.py +++ b/xgboost_ray/data_sources/modin.py @@ -16,6 +16,13 @@ import modin # noqa: F401 from modin.config.envvars import Engine from packaging.version import Version + from modin.pandas import ( # noqa: F401 + DataFrame as ModinDataFrame, # noqa: F401 + Series as ModinSeries # noqa: F401 + ) + from modin.distributed.dataframe.pandas import ( # noqa: F401 + unwrap_partitions # noqa: F401 + ) MODIN_INSTALLED = Version(modin.__version__) >= Version("0.9.0") # Check if importing the Ray engine leads to errors @@ -29,8 +36,11 @@ def _assert_modin_installed(): if not MODIN_INSTALLED: raise RuntimeError( "Tried to use Modin as a data source, but modin is not " - "installed. This function shouldn't have been called. " - "\nFIX THIS by installing modin: `pip install modin`. " + "installed or it conflicts with the pandas version. " + "This function shouldn't have been called. " + "\nFIX THIS by installing modin: `pip install modin` " + "and making sure that the installed pandas version is " + "supported by modin." "\nPlease also raise an issue on our GitHub: " "https://github.com/ray-project/xgboost_ray as this part of " "the code should not have been reached.") @@ -53,8 +63,11 @@ def is_data_type(data: Any, filetype: Optional[RayFileType] = None) -> bool: if not MODIN_INSTALLED: return False - from modin.pandas import DataFrame as ModinDataFrame, \ - Series as ModinSeries + # Has to be imported again. + from modin.pandas import ( # noqa: F811 + DataFrame as ModinDataFrame, # noqa: F811 + Series as ModinSeries # noqa: F811 + ) return isinstance(data, (ModinDataFrame, ModinSeries)) @@ -87,8 +100,11 @@ def load_data( @staticmethod def convert_to_series(data: Any) -> pd.Series: _assert_modin_installed() - from modin.pandas import DataFrame as ModinDataFrame, \ - Series as ModinSeries + # Has to be imported again. + from modin.pandas import ( # noqa: F811 + DataFrame as ModinDataFrame, # noqa: F811 + Series as ModinSeries # noqa: F811 + ) if isinstance(data, ModinDataFrame): return pd.Series(data._to_pandas().squeeze()) @@ -104,7 +120,10 @@ def get_actor_shards( Tuple[Any, Optional[Dict[int, Any]]]: _assert_modin_installed() - from modin.distributed.dataframe.pandas import unwrap_partitions + # Has to be imported again. + from modin.distributed.dataframe.pandas import ( # noqa: F811 + unwrap_partitions # noqa: F811 + ) actor_rank_ips = get_actor_rank_ips(actors)