From 91fc628f12f08feea7aa385d1aee9a5bd5479868 Mon Sep 17 00:00:00 2001 From: Vasilij Litvinov Date: Thu, 1 Oct 2020 01:30:52 +0300 Subject: [PATCH 01/27] REFACTOR-#2059: Implement vars in single package Other parts of Modin not adapted yet Signed-off-by: Vasilij Litvinov --- modin/config/__init__.py | 15 +++ modin/config/__main__.py | 16 +++ modin/config/envvars.py | 152 ++++++++++++++++++++++ modin/config/pubsub.py | 125 ++++++++++++++++++ modin/config/test/__init__.py | 12 ++ modin/{ => config}/test/test_publisher.py | 0 6 files changed, 320 insertions(+) create mode 100644 modin/config/__init__.py create mode 100644 modin/config/__main__.py create mode 100644 modin/config/envvars.py create mode 100644 modin/config/pubsub.py create mode 100644 modin/config/test/__init__.py rename modin/{ => config}/test/test_publisher.py (100%) diff --git a/modin/config/__init__.py b/modin/config/__init__.py new file mode 100644 index 00000000000..33ce8a085e3 --- /dev/null +++ b/modin/config/__init__.py @@ -0,0 +1,15 @@ +# Licensed to Modin Development Team under one or more contributor license agreements. +# See the NOTICE file distributed with this work for additional information regarding +# copyright ownership. The Modin Development Team licenses this file to you under the +# Apache License, Version 2.0 (the "License"); you may not use this file except in +# compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under +# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific language +# governing permissions and limitations under the License. + +from .pubsub import Publisher +from .envvars import * diff --git a/modin/config/__main__.py b/modin/config/__main__.py new file mode 100644 index 00000000000..07c56415918 --- /dev/null +++ b/modin/config/__main__.py @@ -0,0 +1,16 @@ +from . import * + + +def get_help(): + for obj in globals().values(): + if ( + isinstance(obj, type) + and issubclass(obj, Publisher) + and obj is not EnvironmentVariable + and obj is not Publisher + ): + print(f"{obj._get_help()}\nvalue={obj.value}") + + +if __name__ == "__main__": + get_help() diff --git a/modin/config/envvars.py b/modin/config/envvars.py new file mode 100644 index 00000000000..4274ba4186f --- /dev/null +++ b/modin/config/envvars.py @@ -0,0 +1,152 @@ +# Licensed to Modin Development Team under one or more contributor license agreements. +# See the NOTICE file distributed with this work for additional information regarding +# copyright ownership. The Modin Development Team licenses this file to you under the +# Apache License, Version 2.0 (the "License"); you may not use this file except in +# compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under +# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific language +# governing permissions and limitations under the License. + +import os +from textwrap import dedent +import warnings + +from .pubsub import Publisher, _TYPE_HELP + + +class EnvironmentVariable(Publisher, type=str): + """ + Base class for environment variables-based configuration + """ + + def __init_subclass__(cls, varname: str, **kw): + cls.varname = varname + if not kw.get("type"): + kw["type"] = EnvironmentVariable.type + super().__init_subclass__(**kw) + + @classmethod + def _get_raw_from_config(cls) -> str: + return os.environ[cls.varname] + + @classmethod + def _get_help(cls) -> str: + help = f"{cls.varname}: {dedent(cls.__doc__ or 'Unknown').strip()}\n\tProvide a {_TYPE_HELP[cls.type]}" + if cls.choices: + help += f" (valid examples are: {', '.join(cls.choices)})" + return help + + +class Engine(EnvironmentVariable, varname="MODIN_ENGINE"): + """ + Distribution engine to run queries by + """ + + choices = ("Ray", "Dask", "Python") + + +class Backend(EnvironmentVariable, varname="MODIN_BACKEND"): + """ + Engine running on a single node of distribution + """ + + choices = ("Pandas", "OmniSci", "Pyarrow") + + +class IsDebug(EnvironmentVariable, varname="MODIN_DEBUG", type=bool): + """ + Forces Modin engine to be "Python" unless specified by $MODIN_ENGINE + """ + + +class IsExperimental(EnvironmentVariable, varname="MODIN_EXPERIMENTAL", type=bool): + """ + Turns on experimental features + """ + + +class IsRayCluster(EnvironmentVariable, varname="MODIN_RAY_CLUSTER", type=bool): + """ + True if Modin is running on pre-initialized Ray cluster + """ + + +class RayRedisAddress(EnvironmentVariable, varname="MODIN_REDIS_ADDRESS"): + """ + What Redis address to connect to when running in Ray cluster + """ + + +class CpuCount(EnvironmentVariable, varname="MODIN_CPUS", type=int): + """ + How may CPU cores to utilize across the whole distribution + """ + + +class Memory(EnvironmentVariable, varname="MODIN_MEMORY", type=int): + """ + How much memory give to each Ray worker (in bytes) + """ + + +class RayPlasmaDir(EnvironmentVariable, varname="MODIN_ON_RAY_PLASMA_DIR"): + """ + Path to Plasma storage for Ray + """ + + +class IsOutOfCore(EnvironmentVariable, varname="MODIN_OUT_OF_CORE", type=bool): + pass + + +class SocksProxy(EnvironmentVariable, varname="MODIN_SOCKS_PROXY"): + """ + SOCKS proxy address if it is needed for SSH to work + """ + + +class DoLogRpyc(EnvironmentVariable, varname="MODIN_LOG_RPYC", type=bool): + """ + Whether to gather RPyC logs (applicable for remote context) + """ + + +class DoTraceRpyc(EnvironmentVariable, varname="MODIN_TRACE_RPYC", type=bool): + """ + Whether to trace RPyC calls (applicable for remote context) + """ + + +class OmnisciFragmentSize( + EnvironmentVariable, varname="MODIN_OMNISCI_FRAGMENT_SIZE", type=int +): + """ + How big a fragment in OmniSci should be when creating a table (in rows) + """ + + +class DoUseCalcite(EnvironmentVariable, varname="MODIN_USE_CALCITE", type=bool): + """ + Whether to use Calcite for OmniSci queries execution + """ + + +class TestDatasetSize(EnvironmentVariable, varname="MODIN_TEST_DATASET_SIZE"): + """ + Dataset size for running some tests + """ + + choices = ("small", "normal", "big") + +def _check_vars(): + valid_names = {obj.varname for obj in globals().values() if obj is not EnvironmentVariable and isinstance(obj, type) and issubclass(obj, EnvironmentVariable)} + found_names = {name for name in os.environ.keys() if name.startswith('MODIN_')} + unknown = found_names - valid_names + if unknown: + warnings.warn(f"Found unknown environment variables, please check their spelling: {', '.join(sorted(unknown))}") + +_check_vars() diff --git a/modin/config/pubsub.py b/modin/config/pubsub.py new file mode 100644 index 00000000000..99047d3a8b7 --- /dev/null +++ b/modin/config/pubsub.py @@ -0,0 +1,125 @@ +# Licensed to Modin Development Team under one or more contributor license agreements. +# See the NOTICE file distributed with this work for additional information regarding +# copyright ownership. The Modin Development Team licenses this file to you under the +# Apache License, Version 2.0 (the "License"); you may not use this file except in +# compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under +# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific language +# governing permissions and limitations under the License. + +import collections +import typing + + +class Caster(typing.NamedTuple): + decode: typing.Callable[[str], object] + normalize: typing.Callable[[object], object] = lambda x: x + encode: typing.Callable[[object], str] = str + + +_CASTERS = { + str: Caster( + decode=lambda value: value.strip().title(), + normalize=lambda value: value.strip().title(), + ), + bool: Caster( + decode=lambda value: value.strip().lower() in {"true", "yes", "1"}, + normalize=bool, + ), + int: Caster(decode=lambda value: int(value.strip()), normalize=int), +} + +_TYPE_HELP = { + str: "string", + bool: "boolean flag (any of 'true', 'yes' or '1' in case insensitive manner is considered positive)", + int: "integer value", +} + + +class _ValueMeta(type): + """ + Metaclass is needed to make classmethod property + """ + + @property + def value(cls): + if cls._value is None: + # get the value from env + try: + raw = cls._get_raw_from_config() + except KeyError: + cls._value = cls.default + else: + cls._value = _CASTERS[cls.type].decode(raw) + return cls._value + + @value.setter + def value(cls, value): + cls._check_callbacks(cls._put_nocallback(value)) + + +class Publisher(object, metaclass=_ValueMeta): + """ + Base class describing interface for configuration entities + """ + + choices: typing.Sequence[str] = None + type = str + default = None + + @classmethod + def _get_raw_from_config(cls) -> str: + """ + The method that really reads the value from config storage, + be it some config file or environment variable or whatever. + + Raises KeyError if value is absent. + """ + raise NotImplementedError() + + @classmethod + def _get_help(cls) -> str: + """ + Generate user-presentable help for the option + """ + raise NotImplementedError() + + def __init_subclass__(cls, type=None, default=None, **kw): + assert type in _CASTERS, f"Unsupported variable type: {type}" + cls.type = type + cls._value = None + cls._subs = [] + cls._once = collections.defaultdict(list) + super().__init_subclass__(**kw) + + @classmethod + def subscribe(cls, callback): + cls._subs.append(callback) + callback(cls) + + @classmethod + def once(cls, onvalue, callback): + onvalue = _CASTERS[cls.type].normalize(onvalue) + if onvalue == cls.value: + callback(cls) + else: + cls._once[onvalue].append(callback) + + @classmethod + def _put_nocallback(cls, value): + value = _CASTERS[cls.type].normalize(value.title) + oldvalue, cls.value = cls.value, value + return oldvalue + + @classmethod + def _check_callbacks(cls, oldvalue): + if oldvalue == cls.value: + return + for callback in cls._subs: + callback(cls) + for callback in cls._once.pop(cls.value, ()): + callback(cls) diff --git a/modin/config/test/__init__.py b/modin/config/test/__init__.py new file mode 100644 index 00000000000..cae6413e559 --- /dev/null +++ b/modin/config/test/__init__.py @@ -0,0 +1,12 @@ +# Licensed to Modin Development Team under one or more contributor license agreements. +# See the NOTICE file distributed with this work for additional information regarding +# copyright ownership. The Modin Development Team licenses this file to you under the +# Apache License, Version 2.0 (the "License"); you may not use this file except in +# compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under +# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific language +# governing permissions and limitations under the License. diff --git a/modin/test/test_publisher.py b/modin/config/test/test_publisher.py similarity index 100% rename from modin/test/test_publisher.py rename to modin/config/test/test_publisher.py From 30b75ec737cb470ffbfce8302bf81fcfbd8222c8 Mon Sep 17 00:00:00 2001 From: Vasilij Litvinov Date: Thu, 1 Oct 2020 11:06:16 +0300 Subject: [PATCH 02/27] REFACTOR-#2059: Simplify passing varname Signed-off-by: Vasilij Litvinov --- modin/config/envvars.py | 85 ++++++++++++++++++++++++++++------------- modin/config/pubsub.py | 2 +- 2 files changed, 59 insertions(+), 28 deletions(-) diff --git a/modin/config/envvars.py b/modin/config/envvars.py index 4274ba4186f..e6535b1b379 100644 --- a/modin/config/envvars.py +++ b/modin/config/envvars.py @@ -23,11 +23,7 @@ class EnvironmentVariable(Publisher, type=str): Base class for environment variables-based configuration """ - def __init_subclass__(cls, varname: str, **kw): - cls.varname = varname - if not kw.get("type"): - kw["type"] = EnvironmentVariable.type - super().__init_subclass__(**kw) + varname: str = None @classmethod def _get_raw_from_config(cls) -> str: @@ -41,112 +37,147 @@ def _get_help(cls) -> str: return help -class Engine(EnvironmentVariable, varname="MODIN_ENGINE"): +class Engine(EnvironmentVariable, type=str): """ Distribution engine to run queries by """ + varname = "MODIN_ENGINE" choices = ("Ray", "Dask", "Python") -class Backend(EnvironmentVariable, varname="MODIN_BACKEND"): +class Backend(EnvironmentVariable, type=str): """ Engine running on a single node of distribution """ + varname = "MODIN_BACKEND" choices = ("Pandas", "OmniSci", "Pyarrow") -class IsDebug(EnvironmentVariable, varname="MODIN_DEBUG", type=bool): +class IsDebug(EnvironmentVariable, type=bool): """ Forces Modin engine to be "Python" unless specified by $MODIN_ENGINE """ + varname = "MODIN_DEBUG" -class IsExperimental(EnvironmentVariable, varname="MODIN_EXPERIMENTAL", type=bool): + +class IsExperimental(EnvironmentVariable, type=bool): """ Turns on experimental features """ + varname = "MODIN_EXPERIMENTAL" + -class IsRayCluster(EnvironmentVariable, varname="MODIN_RAY_CLUSTER", type=bool): +class IsRayCluster(EnvironmentVariable, type=bool): """ True if Modin is running on pre-initialized Ray cluster """ + varname = "MODIN_RAY_CLUSTER" -class RayRedisAddress(EnvironmentVariable, varname="MODIN_REDIS_ADDRESS"): + +class RayRedisAddress(EnvironmentVariable, type=str): """ What Redis address to connect to when running in Ray cluster """ + varname = "MODIN_REDIS_ADDRESS" + -class CpuCount(EnvironmentVariable, varname="MODIN_CPUS", type=int): +class CpuCount(EnvironmentVariable, type=int): """ How may CPU cores to utilize across the whole distribution """ + varname = "MODIN_CPUS" -class Memory(EnvironmentVariable, varname="MODIN_MEMORY", type=int): + +class Memory(EnvironmentVariable, type=int): """ How much memory give to each Ray worker (in bytes) """ + varname = "MODIN_MEMORY" + -class RayPlasmaDir(EnvironmentVariable, varname="MODIN_ON_RAY_PLASMA_DIR"): +class RayPlasmaDir(EnvironmentVariable, type=str): """ Path to Plasma storage for Ray """ + varname = "MODIN_ON_RAY_PLASMA_DIR" + -class IsOutOfCore(EnvironmentVariable, varname="MODIN_OUT_OF_CORE", type=bool): - pass +class IsOutOfCore(EnvironmentVariable, type=bool): + varname = "MODIN_OUT_OF_CORE" -class SocksProxy(EnvironmentVariable, varname="MODIN_SOCKS_PROXY"): +class SocksProxy(EnvironmentVariable, type=str): """ SOCKS proxy address if it is needed for SSH to work """ + varname = "MODIN_SOCKS_PROXY" -class DoLogRpyc(EnvironmentVariable, varname="MODIN_LOG_RPYC", type=bool): + +class DoLogRpyc(EnvironmentVariable, type=bool): """ Whether to gather RPyC logs (applicable for remote context) """ + varname = "MODIN_LOG_RPYC" + -class DoTraceRpyc(EnvironmentVariable, varname="MODIN_TRACE_RPYC", type=bool): +class DoTraceRpyc(EnvironmentVariable, type=bool): """ Whether to trace RPyC calls (applicable for remote context) """ + varname = "MODIN_TRACE_RPYC" -class OmnisciFragmentSize( - EnvironmentVariable, varname="MODIN_OMNISCI_FRAGMENT_SIZE", type=int -): + +class OmnisciFragmentSize(EnvironmentVariable, type=int): """ How big a fragment in OmniSci should be when creating a table (in rows) """ + varname = "MODIN_OMNISCI_FRAGMENT_SIZE" + -class DoUseCalcite(EnvironmentVariable, varname="MODIN_USE_CALCITE", type=bool): +class DoUseCalcite(EnvironmentVariable, type=bool): """ Whether to use Calcite for OmniSci queries execution """ + varname = "MODIN_USE_CALCITE" -class TestDatasetSize(EnvironmentVariable, varname="MODIN_TEST_DATASET_SIZE"): + +class TestDatasetSize(EnvironmentVariable, type=str): """ Dataset size for running some tests """ + varname = "MODIN_TEST_DATASET_SIZE" choices = ("small", "normal", "big") + def _check_vars(): - valid_names = {obj.varname for obj in globals().values() if obj is not EnvironmentVariable and isinstance(obj, type) and issubclass(obj, EnvironmentVariable)} - found_names = {name for name in os.environ.keys() if name.startswith('MODIN_')} + valid_names = { + obj.varname + for obj in globals().values() + if obj is not EnvironmentVariable + and isinstance(obj, type) + and issubclass(obj, EnvironmentVariable) + } + found_names = {name for name in os.environ.keys() if name.startswith("MODIN_")} unknown = found_names - valid_names if unknown: - warnings.warn(f"Found unknown environment variables, please check their spelling: {', '.join(sorted(unknown))}") + warnings.warn( + f"Found unknown environment variables, please check their spelling: {', '.join(sorted(unknown))}" + ) + _check_vars() diff --git a/modin/config/pubsub.py b/modin/config/pubsub.py index 99047d3a8b7..f394ca3eae4 100644 --- a/modin/config/pubsub.py +++ b/modin/config/pubsub.py @@ -88,7 +88,7 @@ def _get_help(cls) -> str: """ raise NotImplementedError() - def __init_subclass__(cls, type=None, default=None, **kw): + def __init_subclass__(cls, type=None, **kw): assert type in _CASTERS, f"Unsupported variable type: {type}" cls.type = type cls._value = None From c238647c3089ab60e78bbc6877f36ceb5aa4d4ce Mon Sep 17 00:00:00 2001 From: Vasilij Litvinov Date: Thu, 1 Oct 2020 11:32:16 +0300 Subject: [PATCH 03/27] REFACTOR-#2059: Simplify getter for parameter Signed-off-by: Vasilij Litvinov --- modin/config/__main__.py | 2 +- modin/config/envvars.py | 6 +-- modin/config/pubsub.py | 86 +++++++++++++++++++--------------------- 3 files changed, 45 insertions(+), 49 deletions(-) diff --git a/modin/config/__main__.py b/modin/config/__main__.py index 07c56415918..999762da70f 100644 --- a/modin/config/__main__.py +++ b/modin/config/__main__.py @@ -9,7 +9,7 @@ def get_help(): and obj is not EnvironmentVariable and obj is not Publisher ): - print(f"{obj._get_help()}\nvalue={obj.value}") + print(f"{obj.get_help()}\nvalue={obj.get()}") if __name__ == "__main__": diff --git a/modin/config/envvars.py b/modin/config/envvars.py index e6535b1b379..946f0005689 100644 --- a/modin/config/envvars.py +++ b/modin/config/envvars.py @@ -15,7 +15,7 @@ from textwrap import dedent import warnings -from .pubsub import Publisher, _TYPE_HELP +from .pubsub import Publisher, _TYPE_PARAMS class EnvironmentVariable(Publisher, type=str): @@ -30,8 +30,8 @@ def _get_raw_from_config(cls) -> str: return os.environ[cls.varname] @classmethod - def _get_help(cls) -> str: - help = f"{cls.varname}: {dedent(cls.__doc__ or 'Unknown').strip()}\n\tProvide a {_TYPE_HELP[cls.type]}" + def get_help(cls) -> str: + help = f"{cls.varname}: {dedent(cls.__doc__ or 'Unknown').strip()}\n\tProvide a {_TYPE_PARAMS[cls.type].help}" if cls.choices: help += f" (valid examples are: {', '.join(cls.choices)})" return help diff --git a/modin/config/pubsub.py b/modin/config/pubsub.py index f394ca3eae4..fdafec3fc85 100644 --- a/modin/config/pubsub.py +++ b/modin/config/pubsub.py @@ -15,54 +15,34 @@ import typing -class Caster(typing.NamedTuple): +class TypeDescriptor(typing.NamedTuple): decode: typing.Callable[[str], object] - normalize: typing.Callable[[object], object] = lambda x: x - encode: typing.Callable[[object], str] = str + normalize: typing.Callable[[object], object] + help: str -_CASTERS = { - str: Caster( +_TYPE_PARAMS = { + str: TypeDescriptor( decode=lambda value: value.strip().title(), normalize=lambda value: value.strip().title(), + help="string", ), - bool: Caster( + bool: TypeDescriptor( decode=lambda value: value.strip().lower() in {"true", "yes", "1"}, normalize=bool, + help="boolean flag (any of 'true', 'yes' or '1' in case insensitive manner is considered positive)", + ), + int: TypeDescriptor( + decode=lambda value: int(value.strip()), normalize=int, help="integer value" ), - int: Caster(decode=lambda value: int(value.strip()), normalize=int), -} - -_TYPE_HELP = { - str: "string", - bool: "boolean flag (any of 'true', 'yes' or '1' in case insensitive manner is considered positive)", - int: "integer value", } - -class _ValueMeta(type): - """ - Metaclass is needed to make classmethod property - """ - - @property - def value(cls): - if cls._value is None: - # get the value from env - try: - raw = cls._get_raw_from_config() - except KeyError: - cls._value = cls.default - else: - cls._value = _CASTERS[cls.type].decode(raw) - return cls._value - - @value.setter - def value(cls, value): - cls._check_callbacks(cls._put_nocallback(value)) +# special marker to distinguish unset value from None value +# as someone may want to use None as a real value for a parameter +_UNSET = object() -class Publisher(object, metaclass=_ValueMeta): +class Publisher(object): """ Base class describing interface for configuration entities """ @@ -82,16 +62,16 @@ def _get_raw_from_config(cls) -> str: raise NotImplementedError() @classmethod - def _get_help(cls) -> str: + def get_help(cls) -> str: """ Generate user-presentable help for the option """ raise NotImplementedError() - def __init_subclass__(cls, type=None, **kw): - assert type in _CASTERS, f"Unsupported variable type: {type}" + def __init_subclass__(cls, type, **kw): + assert type in _TYPE_PARAMS, f"Unsupported variable type: {type}" cls.type = type - cls._value = None + cls._value = _UNSET cls._subs = [] cls._once = collections.defaultdict(list) super().__init_subclass__(**kw) @@ -101,25 +81,41 @@ def subscribe(cls, callback): cls._subs.append(callback) callback(cls) + @classmethod + def get(cls): + if cls._value is _UNSET: + # get the value from env + try: + raw = cls._get_raw_from_config() + except KeyError: + cls._value = cls.default + else: + cls._value = _TYPE_PARAMS[cls.type].decode(raw) + return cls._value + + @classmethod + def put(cls, value): + cls._check_callbacks(cls._put_nocallback(value)) + @classmethod def once(cls, onvalue, callback): - onvalue = _CASTERS[cls.type].normalize(onvalue) - if onvalue == cls.value: + onvalue = _TYPE_PARAMS[cls.type].normalize(onvalue) + if onvalue == cls.get(): callback(cls) else: cls._once[onvalue].append(callback) @classmethod def _put_nocallback(cls, value): - value = _CASTERS[cls.type].normalize(value.title) - oldvalue, cls.value = cls.value, value + value = _TYPE_PARAMS[cls.type].normalize(value.title) + oldvalue, cls._value = cls.get(), value return oldvalue @classmethod def _check_callbacks(cls, oldvalue): - if oldvalue == cls.value: + if oldvalue == cls.get(): return for callback in cls._subs: callback(cls) - for callback in cls._once.pop(cls.value, ()): + for callback in cls._once.pop(cls.get(), ()): callback(cls) From 0ae78dab146e5a23d3b85c0ccedd48170b0f3150 Mon Sep 17 00:00:00 2001 From: Vasilij Litvinov Date: Thu, 1 Oct 2020 11:33:21 +0300 Subject: [PATCH 04/27] REFACTOR-#2059: Rename Publisher -> Parameter for clarity Signed-off-by: Vasilij Litvinov --- modin/config/__init__.py | 2 +- modin/config/__main__.py | 4 ++-- modin/config/envvars.py | 4 ++-- modin/config/pubsub.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/modin/config/__init__.py b/modin/config/__init__.py index 33ce8a085e3..0f14acc21e9 100644 --- a/modin/config/__init__.py +++ b/modin/config/__init__.py @@ -11,5 +11,5 @@ # ANY KIND, either express or implied. See the License for the specific language # governing permissions and limitations under the License. -from .pubsub import Publisher +from .pubsub import Parameter from .envvars import * diff --git a/modin/config/__main__.py b/modin/config/__main__.py index 999762da70f..a512af895a2 100644 --- a/modin/config/__main__.py +++ b/modin/config/__main__.py @@ -5,9 +5,9 @@ def get_help(): for obj in globals().values(): if ( isinstance(obj, type) - and issubclass(obj, Publisher) + and issubclass(obj, Parameter) and obj is not EnvironmentVariable - and obj is not Publisher + and obj is not Parameter ): print(f"{obj.get_help()}\nvalue={obj.get()}") diff --git a/modin/config/envvars.py b/modin/config/envvars.py index 946f0005689..9006719ccf4 100644 --- a/modin/config/envvars.py +++ b/modin/config/envvars.py @@ -15,10 +15,10 @@ from textwrap import dedent import warnings -from .pubsub import Publisher, _TYPE_PARAMS +from .pubsub import Parameter, _TYPE_PARAMS -class EnvironmentVariable(Publisher, type=str): +class EnvironmentVariable(Parameter, type=str): """ Base class for environment variables-based configuration """ diff --git a/modin/config/pubsub.py b/modin/config/pubsub.py index fdafec3fc85..5c1752774bc 100644 --- a/modin/config/pubsub.py +++ b/modin/config/pubsub.py @@ -42,7 +42,7 @@ class TypeDescriptor(typing.NamedTuple): _UNSET = object() -class Publisher(object): +class Parameter(object): """ Base class describing interface for configuration entities """ From cd3610e6a4ec533062d762e712d30b604597bb67 Mon Sep 17 00:00:00 2001 From: Vasilij Litvinov Date: Thu, 1 Oct 2020 11:36:35 +0300 Subject: [PATCH 05/27] REFACTOR-#2059: Improve single vs plural in warning Signed-off-by: Vasilij Litvinov --- modin/config/envvars.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/modin/config/envvars.py b/modin/config/envvars.py index 9006719ccf4..bb1c34b893f 100644 --- a/modin/config/envvars.py +++ b/modin/config/envvars.py @@ -176,7 +176,9 @@ def _check_vars(): unknown = found_names - valid_names if unknown: warnings.warn( - f"Found unknown environment variables, please check their spelling: {', '.join(sorted(unknown))}" + f"Found unknown environment variable{'s' if len(unknown) > 1 else ''}," + f" please check {'their' if len(unknown) > 1 else 'its'} spelling: " + + ", ".join(sorted(unknown)) ) From 0247ad35aad9dadc4219160abd8f8d8080831f7f Mon Sep 17 00:00:00 2001 From: Vasilij Litvinov Date: Thu, 1 Oct 2020 11:43:24 +0300 Subject: [PATCH 06/27] REFACTOR-#2059: Fix defaults for MODIN_ENGINE and MODIN_BACKEND Signed-off-by: Vasilij Litvinov --- modin/config/envvars.py | 51 ++++++++++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 8 deletions(-) diff --git a/modin/config/envvars.py b/modin/config/envvars.py index bb1c34b893f..d64c2b7483d 100644 --- a/modin/config/envvars.py +++ b/modin/config/envvars.py @@ -14,6 +14,7 @@ import os from textwrap import dedent import warnings +from packaging import version from .pubsub import Parameter, _TYPE_PARAMS @@ -37,6 +38,14 @@ def get_help(cls) -> str: return help +class IsDebug(EnvironmentVariable, type=bool): + """ + Forces Modin engine to be "Python" unless specified by $MODIN_ENGINE + """ + + varname = "MODIN_DEBUG" + + class Engine(EnvironmentVariable, type=str): """ Distribution engine to run queries by @@ -45,6 +54,39 @@ class Engine(EnvironmentVariable, type=str): varname = "MODIN_ENGINE" choices = ("Ray", "Dask", "Python") + def __compute_default(): + if IsDebug.get(): + return "Python" + try: + import ray + + except ImportError: + pass + else: + if version.parse(ray.__version__) != version.parse("0.8.7"): + raise ImportError( + "Please `pip install modin[ray]` to install compatible Ray version." + ) + return "Ray" + try: + import dask + import distributed + + except ImportError: + raise ImportError( + "Please `pip install modin[ray]` or `modin[dask]` to install an engine" + ) + if version.parse(dask.__version__) < version.parse("2.1.0") or version.parse( + distributed.__version__ + ) < version.parse("2.3.2"): + raise ImportError( + "Please `pip install modin[dask]` to install compatible Dask version." + ) + return "Dask" + + default = __compute_default() + del __compute_default + class Backend(EnvironmentVariable, type=str): """ @@ -52,17 +94,10 @@ class Backend(EnvironmentVariable, type=str): """ varname = "MODIN_BACKEND" + default = "Pandas" choices = ("Pandas", "OmniSci", "Pyarrow") -class IsDebug(EnvironmentVariable, type=bool): - """ - Forces Modin engine to be "Python" unless specified by $MODIN_ENGINE - """ - - varname = "MODIN_DEBUG" - - class IsExperimental(EnvironmentVariable, type=bool): """ Turns on experimental features From efb007e2146ac0f7545fad04360708ac38de8e93 Mon Sep 17 00:00:00 2001 From: Vasilij Litvinov Date: Thu, 1 Oct 2020 19:02:10 +0300 Subject: [PATCH 07/27] REFACTOR-#2059: Make computed default be on demand Signed-off-by: Vasilij Litvinov --- modin/config/envvars.py | 6 ++---- modin/config/pubsub.py | 6 +++++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/modin/config/envvars.py b/modin/config/envvars.py index d64c2b7483d..53922729344 100644 --- a/modin/config/envvars.py +++ b/modin/config/envvars.py @@ -54,7 +54,8 @@ class Engine(EnvironmentVariable, type=str): varname = "MODIN_ENGINE" choices = ("Ray", "Dask", "Python") - def __compute_default(): + @classmethod + def _get_default(cls): if IsDebug.get(): return "Python" try: @@ -84,9 +85,6 @@ def __compute_default(): ) return "Dask" - default = __compute_default() - del __compute_default - class Backend(EnvironmentVariable, type=str): """ diff --git a/modin/config/pubsub.py b/modin/config/pubsub.py index 5c1752774bc..e2a5618c36d 100644 --- a/modin/config/pubsub.py +++ b/modin/config/pubsub.py @@ -81,6 +81,10 @@ def subscribe(cls, callback): cls._subs.append(callback) callback(cls) + @classmethod + def _get_default(cls): + return cls.default + @classmethod def get(cls): if cls._value is _UNSET: @@ -88,7 +92,7 @@ def get(cls): try: raw = cls._get_raw_from_config() except KeyError: - cls._value = cls.default + cls._value = cls._get_default() else: cls._value = _TYPE_PARAMS[cls.type].decode(raw) return cls._value From 425ea3fca6e54f9a38f0e6092e005c683c1f9fca Mon Sep 17 00:00:00 2001 From: Vasilij Litvinov Date: Thu, 1 Oct 2020 19:13:01 +0300 Subject: [PATCH 08/27] REFACTOR-#2059: Adapt moved test Signed-off-by: Vasilij Litvinov --- modin/config/pubsub.py | 2 +- .../{test_publisher.py => test_parameter.py} | 43 +++++++++++-------- 2 files changed, 27 insertions(+), 18 deletions(-) rename modin/config/test/{test_publisher.py => test_parameter.py} (59%) diff --git a/modin/config/pubsub.py b/modin/config/pubsub.py index e2a5618c36d..a0d72adfb34 100644 --- a/modin/config/pubsub.py +++ b/modin/config/pubsub.py @@ -111,7 +111,7 @@ def once(cls, onvalue, callback): @classmethod def _put_nocallback(cls, value): - value = _TYPE_PARAMS[cls.type].normalize(value.title) + value = _TYPE_PARAMS[cls.type].normalize(value) oldvalue, cls._value = cls.get(), value return oldvalue diff --git a/modin/config/test/test_publisher.py b/modin/config/test/test_parameter.py similarity index 59% rename from modin/config/test/test_publisher.py rename to modin/config/test/test_parameter.py index c7999fad7b7..5977f41e291 100644 --- a/modin/config/test/test_publisher.py +++ b/modin/config/test/test_parameter.py @@ -12,42 +12,51 @@ # governing permissions and limitations under the License. from collections import defaultdict +import pytest -from modin import Publisher +from modin.config.pubsub import Parameter -def test_equals(): - pub = Publisher("name", "value1") - assert pub.get() == "Value1" +@pytest.fixture +def prefilled_parameter(): + class Prefilled(Parameter, type=str): + @classmethod + def _get_raw_from_config(cls): + return "init" - pub.put("value2") - assert pub.get() == "Value2" + return Prefilled -def test_triggers(): +def test_equals(prefilled_parameter): + assert prefilled_parameter.get() == "Init" + + prefilled_parameter.put("value2") + assert prefilled_parameter.get() == "Value2" + + +def test_triggers(prefilled_parameter): results = defaultdict(int) callbacks = [] def make_callback(name, res=results): - def callback(p: Publisher): + def callback(p: Parameter): res[name] += 1 # keep reference to callbacks so they won't be removed by GC callbacks.append(callback) return callback - pub = Publisher("name", "init") - pub.once("init", make_callback("init")) + prefilled_parameter.once("init", make_callback("init")) assert results["init"] == 1 - pub.once("never", make_callback("never")) - pub.once("once", make_callback("once")) - pub.subscribe(make_callback("subscribe")) + prefilled_parameter.once("never", make_callback("never")) + prefilled_parameter.once("once", make_callback("once")) + prefilled_parameter.subscribe(make_callback("subscribe")) - pub.put("multi") - pub.put("once") - pub.put("multi") - pub.put("once") + prefilled_parameter.put("multi") + prefilled_parameter.put("once") + prefilled_parameter.put("multi") + prefilled_parameter.put("once") expected = [("init", 1), ("never", 0), ("once", 1), ("subscribe", 5)] for name, val in expected: From f5092c037e28479c5220c574705dc7ba6a9976bd Mon Sep 17 00:00:00 2001 From: Vasilij Litvinov Date: Thu, 1 Oct 2020 20:02:46 +0300 Subject: [PATCH 09/27] REFACTOR-#2059: Add parameter value validation Signed-off-by: Vasilij Litvinov --- modin/config/__main__.py | 2 +- modin/config/pubsub.py | 17 ++++++++++++++- modin/config/test/test_parameter.py | 34 +++++++++++++++++++++++++---- 3 files changed, 47 insertions(+), 6 deletions(-) diff --git a/modin/config/__main__.py b/modin/config/__main__.py index a512af895a2..dd1fbfed6f2 100644 --- a/modin/config/__main__.py +++ b/modin/config/__main__.py @@ -9,7 +9,7 @@ def get_help(): and obj is not EnvironmentVariable and obj is not Parameter ): - print(f"{obj.get_help()}\nvalue={obj.get()}") + print(f"{obj.get_help()}\nCurrent value={obj.get()}") if __name__ == "__main__": diff --git a/modin/config/pubsub.py b/modin/config/pubsub.py index a0d72adfb34..c966f8464d6 100644 --- a/modin/config/pubsub.py +++ b/modin/config/pubsub.py @@ -18,6 +18,7 @@ class TypeDescriptor(typing.NamedTuple): decode: typing.Callable[[str], object] normalize: typing.Callable[[object], object] + verify: typing.Callable[[object], bool] help: str @@ -25,15 +26,25 @@ class TypeDescriptor(typing.NamedTuple): str: TypeDescriptor( decode=lambda value: value.strip().title(), normalize=lambda value: value.strip().title(), + verify=lambda value: True, help="string", ), bool: TypeDescriptor( decode=lambda value: value.strip().lower() in {"true", "yes", "1"}, normalize=bool, + verify=lambda value: isinstance(value, bool) + or ( + isinstance(value, str) + and value.strip().lower() in {"true", "yes", "1", "false", "no", "0"} + ), help="boolean flag (any of 'true', 'yes' or '1' in case insensitive manner is considered positive)", ), int: TypeDescriptor( - decode=lambda value: int(value.strip()), normalize=int, help="integer value" + decode=lambda value: int(value.strip()), + normalize=int, + verify=lambda value: isinstance(value, int) + or (isinstance(value, str) and value.strip().isdigit()), + help="integer value", ), } @@ -94,6 +105,8 @@ def get(cls): except KeyError: cls._value = cls._get_default() else: + if not _TYPE_PARAMS[cls.type].verify(raw): + raise ValueError(f"Unsupported raw value: {raw}") cls._value = _TYPE_PARAMS[cls.type].decode(raw) return cls._value @@ -111,6 +124,8 @@ def once(cls, onvalue, callback): @classmethod def _put_nocallback(cls, value): + if not _TYPE_PARAMS[cls.type].verify(value): + raise ValueError(f"Unsupported value: {value}") value = _TYPE_PARAMS[cls.type].normalize(value) oldvalue, cls._value = cls.get(), value return oldvalue diff --git a/modin/config/test/test_parameter.py b/modin/config/test/test_parameter.py index 5977f41e291..77502ec64be 100644 --- a/modin/config/test/test_parameter.py +++ b/modin/config/test/test_parameter.py @@ -17,16 +17,20 @@ from modin.config.pubsub import Parameter -@pytest.fixture -def prefilled_parameter(): - class Prefilled(Parameter, type=str): +def make_prefilled(vartype, varinit): + class Prefilled(Parameter, type=vartype): @classmethod def _get_raw_from_config(cls): - return "init" + return varinit return Prefilled +@pytest.fixture +def prefilled_parameter(): + return make_prefilled(str, "init") + + def test_equals(prefilled_parameter): assert prefilled_parameter.get() == "Init" @@ -61,3 +65,25 @@ def callback(p: Parameter): expected = [("init", 1), ("never", 0), ("once", 1), ("subscribe", 5)] for name, val in expected: assert results[name] == val, "{} has wrong count".format(name) + + +@pytest.mark.parametrize( + "parameter,good,bad", + [ + (make_prefilled(bool, "false"), {"1": True, False: False}, ["nope", 2]), + (make_prefilled(int, "10"), {" 15\t": 15, 25: 25}, ["-10", 1.0, "foo"]), + ], +) +def test_validation(parameter, good, bad): + for inval, outval in good.items(): + parameter.put(inval) + assert parameter.get() == outval + for inval in bad: + with pytest.raises(ValueError): + parameter.put(inval) + +@pytest.mark.parametrize("vartype", [bool, int]) +def test_init_validation(vartype): + parameter = make_prefilled(vartype, "bad value") + with pytest.raises(ValueError): + parameter.get() From 227bc26456ad26b4f8e0e6f53d76b360c44f381e Mon Sep 17 00:00:00 2001 From: Vasilij Litvinov Date: Thu, 1 Oct 2020 20:14:32 +0300 Subject: [PATCH 10/27] REFACTOR-#2059: Add tests for envvars Signed-off-by: Vasilij Litvinov --- modin/config/envvars.py | 2 +- modin/config/test/test_envvars.py | 62 +++++++++++++++++++++++++++++ modin/config/test/test_parameter.py | 1 + 3 files changed, 64 insertions(+), 1 deletion(-) create mode 100644 modin/config/test/test_envvars.py diff --git a/modin/config/envvars.py b/modin/config/envvars.py index 53922729344..404e284836c 100644 --- a/modin/config/envvars.py +++ b/modin/config/envvars.py @@ -34,7 +34,7 @@ def _get_raw_from_config(cls) -> str: def get_help(cls) -> str: help = f"{cls.varname}: {dedent(cls.__doc__ or 'Unknown').strip()}\n\tProvide a {_TYPE_PARAMS[cls.type].help}" if cls.choices: - help += f" (valid examples are: {', '.join(cls.choices)})" + help += f" (valid examples are: {', '.join(str(c) for c in cls.choices)})" return help diff --git a/modin/config/test/test_envvars.py b/modin/config/test/test_envvars.py new file mode 100644 index 00000000000..bf4ce04a9aa --- /dev/null +++ b/modin/config/test/test_envvars.py @@ -0,0 +1,62 @@ +# Licensed to Modin Development Team under one or more contributor license agreements. +# See the NOTICE file distributed with this work for additional information regarding +# copyright ownership. The Modin Development Team licenses this file to you under the +# Apache License, Version 2.0 (the "License"); you may not use this file except in +# compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under +# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific language +# governing permissions and limitations under the License. + +import os +import pytest + +from modin.config.envvars import EnvironmentVariable, _check_vars + + +@pytest.fixture +def make_unknown_env(): + varname = "MODIN_UNKNOWN" + os.environ[varname] = "foo" + yield varname + del os.environ[varname] + + +@pytest.fixture +def make_custom_envvar(): + class CustomVar(EnvironmentVariable, type=str): + """ custom var """ + + default = 10 + varname = "MODIN_CUSTOM" + choices = (1, 5, 10) + + return CustomVar + + +@pytest.fixture +def set_custom_envvar(make_custom_envvar): + os.environ[make_custom_envvar.varname] = " custom " + yield "Custom" + del os.environ[make_custom_envvar.varname] + + +def test_unknown(make_unknown_env): + with pytest.warns(UserWarning, match=f"Found unknown .*{make_unknown_env}.*"): + _check_vars() + + +def test_custom_default(make_custom_envvar): + assert make_custom_envvar.get() == 10 + + +def test_custom_set(make_custom_envvar, set_custom_envvar): + assert make_custom_envvar.get() == set_custom_envvar + + +def test_custom_help(make_custom_envvar): + assert "MODIN_CUSTOM" in make_custom_envvar.get_help() + assert "custom var" in make_custom_envvar.get_help() diff --git a/modin/config/test/test_parameter.py b/modin/config/test/test_parameter.py index 77502ec64be..f033892021e 100644 --- a/modin/config/test/test_parameter.py +++ b/modin/config/test/test_parameter.py @@ -82,6 +82,7 @@ def test_validation(parameter, good, bad): with pytest.raises(ValueError): parameter.put(inval) + @pytest.mark.parametrize("vartype", [bool, int]) def test_init_validation(vartype): parameter = make_prefilled(vartype, "bad value") From f757eec97b42fbd380f964c4f8018dba60f2a1ed Mon Sep 17 00:00:00 2001 From: Vasilij Litvinov Date: Thu, 1 Oct 2020 20:24:49 +0300 Subject: [PATCH 11/27] REFACTOR-#2059: Reduce "import *" effect Signed-off-by: Vasilij Litvinov --- modin/config/__main__.py | 4 ++-- modin/config/envvars.py | 6 ++++++ modin/config/pubsub.py | 3 +++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/modin/config/__main__.py b/modin/config/__main__.py index dd1fbfed6f2..db9f6fb4880 100644 --- a/modin/config/__main__.py +++ b/modin/config/__main__.py @@ -1,7 +1,7 @@ from . import * -def get_help(): +def print_config_help(): for obj in globals().values(): if ( isinstance(obj, type) @@ -13,4 +13,4 @@ def get_help(): if __name__ == "__main__": - get_help() + print_config_help() diff --git a/modin/config/envvars.py b/modin/config/envvars.py index 404e284836c..1809b20d463 100644 --- a/modin/config/envvars.py +++ b/modin/config/envvars.py @@ -216,3 +216,9 @@ def _check_vars(): _check_vars() + +__all__ = [ + name + for name, obj in globals().items() + if isinstance(obj, type) and issubclass(obj, EnvironmentVariable) +] diff --git a/modin/config/pubsub.py b/modin/config/pubsub.py index c966f8464d6..6d253302e83 100644 --- a/modin/config/pubsub.py +++ b/modin/config/pubsub.py @@ -138,3 +138,6 @@ def _check_callbacks(cls, oldvalue): callback(cls) for callback in cls._once.pop(cls.get(), ()): callback(cls) + + +__all__ = ["Parameter"] From 86127ded9c9871b51e1003e0b20f8aa28a84cbbe Mon Sep 17 00:00:00 2001 From: Vasilij Litvinov Date: Thu, 1 Oct 2020 20:33:58 +0300 Subject: [PATCH 12/27] REFACTOR-#2059: Beautify help a bit Signed-off-by: Vasilij Litvinov --- modin/__init__.py | 103 +----------------- modin/config/__main__.py | 5 +- modin/config/envvars.py | 10 +- modin/config/pubsub.py | 6 +- modin/data_management/factories/dispatcher.py | 23 ++-- modin/data_management/factories/factories.py | 4 +- .../factories/test/test_dispatcher.py | 15 +-- modin/engines/base/io/file_reader.py | 4 +- modin/experimental/cloud/meta_magic.py | 6 +- .../omnisci_on_ray/test/test_dataframe.py | 8 +- modin/experimental/pandas/numpy_wrap.py | 6 +- modin/experimental/pandas/test/test_io_exp.py | 8 +- modin/pandas/__init__.py | 16 ++- modin/pandas/test/test_api.py | 3 - modin/pandas/test/test_io.py | 8 +- modin/utils.py | 3 +- 16 files changed, 61 insertions(+), 167 deletions(-) diff --git a/modin/__init__.py b/modin/__init__.py index 9bed5a61dca..fe32a0e331b 100644 --- a/modin/__init__.py +++ b/modin/__init__.py @@ -35,95 +35,6 @@ def custom_formatwarning(msg, category, *args, **kwargs): ) -def get_execution_engine(): - # In the future, when there are multiple engines and different ways of - # backing the DataFrame, there will have to be some changed logic here to - # decide these things. In the meantime, we will use the currently supported - # execution engine + backing (Pandas + Ray). - if "MODIN_ENGINE" in os.environ: - # .title allows variants like ray, RAY, Ray - return os.environ["MODIN_ENGINE"].title() - else: - if "MODIN_DEBUG" in os.environ: - return "Python" - else: - try: - import ray - - except ImportError: - pass - else: - if version.parse(ray.__version__) < version.parse("1.0.0"): - raise ImportError( - "Please `pip install modin[ray]` to install compatible Ray version." - ) - return "Ray" - try: - import dask - import distributed - - except ImportError: - raise ImportError( - "Please `pip install modin[ray]` or `modin[dask]` to install an engine" - ) - else: - if version.parse(dask.__version__) < version.parse( - "2.1.0" - ) or version.parse(distributed.__version__) < version.parse("2.3.2"): - raise ImportError( - "Please `pip install modin[dask]` to install compatible Dask version." - ) - return "Dask" - - -def get_partition_format(): - # See note above about engine + backing. - return os.environ.get("MODIN_BACKEND", "Pandas").title() - - -class Publisher(object): - def __init__(self, name, value): - self.name = name - self.__value = value.title() - self.__subs = [] - self.__once = collections.defaultdict(list) - - def subscribe(self, callback): - self.__subs.append(callback) - callback(self) - - def once(self, onvalue, callback): - onvalue = onvalue.title() - if onvalue == self.__value: - callback(self) - else: - self.__once[onvalue].append(callback) - - def get(self): - return self.__value - - def _put_nocallback(self, value): - value = value.title() # normalize the value - oldvalue, self.__value = self.__value, value - return oldvalue - - def _check_callbacks(self, oldvalue): - if oldvalue == self.__value: - return - for callback in self.__subs: - callback(self) - once = self.__once.pop(self.__value, ()) - for callback in once: - callback(self) - - def put(self, value): - self._check_callbacks(self._put_nocallback(value)) - - -execution_engine = Publisher(name="execution_engine", value=get_execution_engine()) -partition_format = Publisher(name="partition_format", value=get_partition_format()) - - def set_backends(engine=None, partition=None): """ Method to set the _pair_ of execution engine and partition format simultaneously. @@ -132,24 +43,22 @@ def set_backends(engine=None, partition=None): The method returns pair of old values, so it is easy to return back. """ + from .config import Engine, Backend + old_engine, old_partition = None, None # defer callbacks until both entities are set if engine is not None: - old_engine = execution_engine._put_nocallback(engine) + old_engine = Engine._put_nocallback(engine) if partition is not None: - old_partition = partition_format._put_nocallback(partition) + old_partition = Backend._put_nocallback(partition) # execute callbacks if something was changed if old_engine is not None: - execution_engine._check_callbacks(old_engine) + Engine._check_callbacks(old_engine) if old_partition is not None: - partition_format._check_callbacks(old_partition) + Backend._check_callbacks(old_partition) return old_engine, old_partition -# We don't want these used outside of this file. -del get_execution_engine -del get_partition_format - __version__ = get_versions()["version"] del get_versions diff --git a/modin/config/__main__.py b/modin/config/__main__.py index db9f6fb4880..d5894869aea 100644 --- a/modin/config/__main__.py +++ b/modin/config/__main__.py @@ -2,14 +2,15 @@ def print_config_help(): - for obj in globals().values(): + for objname in sorted(globals()): + obj = globals()[objname] if ( isinstance(obj, type) and issubclass(obj, Parameter) and obj is not EnvironmentVariable and obj is not Parameter ): - print(f"{obj.get_help()}\nCurrent value={obj.get()}") + print(f"{obj.get_help()}\n\tCurrent value: {obj.get()}") if __name__ == "__main__": diff --git a/modin/config/envvars.py b/modin/config/envvars.py index 1809b20d463..8596fa1fac8 100644 --- a/modin/config/envvars.py +++ b/modin/config/envvars.py @@ -32,7 +32,7 @@ def _get_raw_from_config(cls) -> str: @classmethod def get_help(cls) -> str: - help = f"{cls.varname}: {dedent(cls.__doc__ or 'Unknown').strip()}\n\tProvide a {_TYPE_PARAMS[cls.type].help}" + help = f"{cls.varname}: {dedent(cls.__doc__ or 'Unknown').strip()}\n\tProvide {_TYPE_PARAMS[cls.type].help}" if cls.choices: help += f" (valid examples are: {', '.join(str(c) for c in cls.choices)})" return help @@ -64,7 +64,7 @@ def _get_default(cls): except ImportError: pass else: - if version.parse(ray.__version__) != version.parse("0.8.7"): + if version.parse(ray.__version__) < version.parse("1.0.0"): raise ImportError( "Please `pip install modin[ray]` to install compatible Ray version." ) @@ -216,9 +216,3 @@ def _check_vars(): _check_vars() - -__all__ = [ - name - for name, obj in globals().items() - if isinstance(obj, type) and issubclass(obj, EnvironmentVariable) -] diff --git a/modin/config/pubsub.py b/modin/config/pubsub.py index 6d253302e83..fe1379a0a8b 100644 --- a/modin/config/pubsub.py +++ b/modin/config/pubsub.py @@ -27,7 +27,7 @@ class TypeDescriptor(typing.NamedTuple): decode=lambda value: value.strip().title(), normalize=lambda value: value.strip().title(), verify=lambda value: True, - help="string", + help="a string", ), bool: TypeDescriptor( decode=lambda value: value.strip().lower() in {"true", "yes", "1"}, @@ -37,14 +37,14 @@ class TypeDescriptor(typing.NamedTuple): isinstance(value, str) and value.strip().lower() in {"true", "yes", "1", "false", "no", "0"} ), - help="boolean flag (any of 'true', 'yes' or '1' in case insensitive manner is considered positive)", + help="a boolean flag (any of 'true', 'yes' or '1' in case insensitive manner is considered positive)", ), int: TypeDescriptor( decode=lambda value: int(value.strip()), normalize=int, verify=lambda value: isinstance(value, int) or (isinstance(value, str) and value.strip().isdigit()), - help="integer value", + help="an integer value", ), } diff --git a/modin/data_management/factories/dispatcher.py b/modin/data_management/factories/dispatcher.py index 7a9c130ab60..93771fde33e 100644 --- a/modin/data_management/factories/dispatcher.py +++ b/modin/data_management/factories/dispatcher.py @@ -13,8 +13,9 @@ import os -from modin import execution_engine, partition_format +from modin.config import Engine, Backend, IsExperimental from modin.data_management.factories import factories +from modin.utils import get_current_backend class FactoryNotFoundError(AttributeError): @@ -62,17 +63,11 @@ def get_engine(cls) -> factories.BaseFactory: @classmethod def _update_engine(cls, _): - if os.environ.get("MODIN_EXPERIMENTAL", "").title() == "True": - factory_fmt, experimental = "Experimental{}On{}Factory", True - else: - factory_fmt, experimental = "{}On{}Factory", False - factory_name = factory_fmt.format( - partition_format.get(), execution_engine.get() - ) + factory_name = get_current_backend() + "Factory" try: cls.__engine = getattr(factories, factory_name) except AttributeError: - if not experimental: + if not IsExperimental.get(): # allow missing factories in experimenal mode only if hasattr(factories, "Experimental" + factory_name): msg = ( @@ -83,11 +78,9 @@ def _update_engine(cls, _): msg = ( "Cannot find a factory for partition '{}' and execution engine '{}'. " "Potential reason might be incorrect environment variable value for " - "MODIN_BACKEND or MODIN_ENGINE" + f"{Backend.varname} or {Engine.varname}" ) - raise FactoryNotFoundError( - msg.format(partition_format.get(), execution_engine.get()) - ) + raise FactoryNotFoundError(msg.format(Backend.get(), Engine.get())) cls.__engine = StubFactory.set_failing_name(factory_name) else: cls.__engine.prepare() @@ -181,5 +174,5 @@ def to_pickle(cls, *args, **kwargs): return cls.__engine._to_pickle(*args, **kwargs) -execution_engine.subscribe(EngineDispatcher._update_engine) -partition_format.subscribe(EngineDispatcher._update_engine) +Engine.subscribe(EngineDispatcher._update_engine) +Backend.subscribe(EngineDispatcher._update_engine) diff --git a/modin/data_management/factories/factories.py b/modin/data_management/factories/factories.py index 2f475cc4e23..d0420bbc559 100644 --- a/modin/data_management/factories/factories.py +++ b/modin/data_management/factories/factories.py @@ -15,7 +15,7 @@ import typing import re -from modin import execution_engine +from modin.config import Engine from modin.engines.base.io import BaseIO import pandas @@ -191,7 +191,7 @@ def prepare(cls): class ExperimentalBaseFactory(BaseFactory): @classmethod def _read_sql(cls, **kwargs): - if execution_engine.get() != "Ray": + if Engine.get() != "Ray": if "partition_column" in kwargs: if kwargs["partition_column"] is not None: warnings.warn( diff --git a/modin/data_management/factories/test/test_dispatcher.py b/modin/data_management/factories/test/test_dispatcher.py index 8fe4e0559b1..4f423c1f98b 100644 --- a/modin/data_management/factories/test/test_dispatcher.py +++ b/modin/data_management/factories/test/test_dispatcher.py @@ -13,7 +13,8 @@ import pytest -from modin import execution_engine, partition_format, set_backends +from modin.config import Engine +from modin import set_backends from modin.data_management.factories.dispatcher import ( EngineDispatcher, @@ -78,21 +79,21 @@ def test_default_engine(): def test_engine_switch(): - execution_engine.put("Test") + Engine.put("Test") assert EngineDispatcher.get_engine() == PandasOnTestFactory assert EngineDispatcher.get_engine().io_cls == "Foo" - execution_engine.put("Python") # revert engine to default + Engine.put("Python") # revert engine to default - partition_format.put("Test") + Engine.put("Test") assert EngineDispatcher.get_engine() == TestOnPythonFactory assert EngineDispatcher.get_engine().io_cls == "Bar" - partition_format.put("Pandas") # revert engine to default + Engine.put("Pandas") # revert engine to default def test_engine_wrong_factory(): with pytest.raises(FactoryNotFoundError): - execution_engine.put("BadEngine") - execution_engine.put("Python") # revert engine to default + Engine.put("BadEngine") + Engine.put("Python") # revert engine to default def test_set_backends(): diff --git a/modin/engines/base/io/file_reader.py b/modin/engines/base/io/file_reader.py index 5f53af6bac8..8a8ea6bd1ef 100644 --- a/modin/engines/base/io/file_reader.py +++ b/modin/engines/base/io/file_reader.py @@ -13,7 +13,7 @@ import os import re -from modin import partition_format +from modin.config import Backend S3_ADDRESS_REGEX = re.compile("[sS]3://(.*?)/(.*)") NOT_IMPLEMENTED_MESSAGE = "Implement in children classes!" @@ -29,7 +29,7 @@ def read(cls, *args, **kwargs): query_compiler = cls._read(*args, **kwargs) # TODO (devin-petersohn): Make this section more general for non-pandas kernel # implementations. - if partition_format.get().lower() != "pandas": + if Backend.get() != "Pandas": raise NotImplementedError("FIXME") import pandas diff --git a/modin/experimental/cloud/meta_magic.py b/modin/experimental/cloud/meta_magic.py index 8683ca8b2f5..4149c03f30c 100644 --- a/modin/experimental/cloud/meta_magic.py +++ b/modin/experimental/cloud/meta_magic.py @@ -15,7 +15,7 @@ import inspect import types -from modin import execution_engine +from modin.config import Engine from modin.data_management.factories import REMOTE_ENGINES # the attributes that must be alwasy taken from a local part of dual-nature class, @@ -154,11 +154,11 @@ def __new__(cls, *a, **kw): _KNOWN_DUALS[local_cls] = result def update_class(_): - if execution_engine.get() in REMOTE_ENGINES: + if Engine.get() in REMOTE_ENGINES: from . import rpyc_proxy result.__real_cls__ = getattr(rpyc_proxy, rpyc_wrapper_name)(result) else: result.__real_cls__ = result - execution_engine.subscribe(update_class) + Engine.subscribe(update_class) diff --git a/modin/experimental/engines/omnisci_on_ray/test/test_dataframe.py b/modin/experimental/engines/omnisci_on_ray/test/test_dataframe.py index 3fca2092b7f..3c1c3ac3bb9 100644 --- a/modin/experimental/engines/omnisci_on_ray/test/test_dataframe.py +++ b/modin/experimental/engines/omnisci_on_ray/test/test_dataframe.py @@ -16,9 +16,11 @@ import numpy as np import pytest -os.environ["MODIN_EXPERIMENTAL"] = "True" -os.environ["MODIN_ENGINE"] = "ray" -os.environ["MODIN_BACKEND"] = "omnisci" +from modin.config import IsExperimental, Engine, Backend + +IsExperimental.put(True) +Engine.put("ray") +Backend.put("omnisci") import modin.pandas as pd from modin.pandas.test.utils import ( diff --git a/modin/experimental/pandas/numpy_wrap.py b/modin/experimental/pandas/numpy_wrap.py index 1c4b1fc21bf..d6a0d25e111 100644 --- a/modin/experimental/pandas/numpy_wrap.py +++ b/modin/experimental/pandas/numpy_wrap.py @@ -26,7 +26,7 @@ else: import types import copyreg - from modin import execution_engine + from modin.config import Engine from modin.data_management.factories import REMOTE_ENGINES import modin import pandas @@ -62,7 +62,7 @@ class InterceptedNumpy(types.ModuleType): def __init__(self): self.__own_attrs__ = set(type(self).__dict__.keys()) - execution_engine.subscribe(self.__update_engine) + Engine.subscribe(self.__update_engine) def __swap_numpy(self, other_numpy=None): self.__current_numpy, self.__prev_numpy = ( @@ -79,7 +79,7 @@ def __swap_numpy(self, other_numpy=None): self.__has_to_warn = False def __update_engine(self, _): - if execution_engine.get() in REMOTE_ENGINES: + if Engine.get() in REMOTE_ENGINES: from modin.experimental.cloud import get_connection self.__swap_numpy(get_connection().modules["numpy"]) diff --git a/modin/experimental/pandas/test/test_io_exp.py b/modin/experimental/pandas/test/test_io_exp.py index 78be142a6b1..7c84ca0073a 100644 --- a/modin/experimental/pandas/test/test_io_exp.py +++ b/modin/experimental/pandas/test/test_io_exp.py @@ -11,10 +11,10 @@ # ANY KIND, either express or implied. See the License for the specific language # governing permissions and limitations under the License. -import os import pandas import pytest import modin.experimental.pandas as pd +from modin.config import Engine from modin.pandas.test.test_io import ( # noqa: F401 df_equals, make_sql_connection, @@ -22,11 +22,11 @@ @pytest.mark.skipif( - os.environ.get("MODIN_ENGINE", "Ray").title() == "Dask", + Engine.get() == "Dask", reason="Dask does not have experimental API", ) def test_from_sql_distributed(make_sql_connection): # noqa: F811 - if os.environ.get("MODIN_ENGINE", "") == "Ray": + if Engine.get() == "Ray": filename = "test_from_sql_distributed.db" table = "test_from_sql_distributed" conn = make_sql_connection(filename, table) @@ -45,7 +45,7 @@ def test_from_sql_distributed(make_sql_connection): # noqa: F811 @pytest.mark.skipif( - os.environ.get("MODIN_ENGINE", "Ray").title() == "Dask", + Engine.get() == "Dask", reason="Dask does not have experimental API", ) def test_from_sql_defaults(make_sql_connection): # noqa: F811 diff --git a/modin/pandas/__init__.py b/modin/pandas/__init__.py index d93d7a25ef6..bde417abedf 100644 --- a/modin/pandas/__init__.py +++ b/modin/pandas/__init__.py @@ -87,7 +87,7 @@ import os import multiprocessing -from .. import execution_engine, partition_format, Publisher +from modin.config import Engine, Backend, Parameter # Set this so that Pandas doesn't try to multithread by itself os.environ["OMP_NUM_THREADS"] = "1" @@ -102,8 +102,8 @@ } # engines that don't require initialization, useful for unit tests -def _update_engine(publisher: Publisher): - global DEFAULT_NPARTITIONS, dask_client, num_cpus, partition_format +def _update_engine(publisher: Parameter): + global DEFAULT_NPARTITIONS, dask_client, num_cpus if publisher.get() == "Ray": import ray @@ -111,7 +111,7 @@ def _update_engine(publisher: Publisher): # With OmniSci backend there is only a single worker per node # and we allow it to work on all cores. - if partition_format.get() == "Omnisci": + if Backend.get() == "Omnisci": os.environ["MODIN_CPUS"] = "1" os.environ["OMP_NUM_THREADS"] = str(multiprocessing.cpu_count()) if _is_first_update.get("Ray", True): @@ -157,14 +157,12 @@ def init_remote_ray(partition): override_redis_password=ray_constants.REDIS_DEFAULT_PASSWORD, ) - init_remote_ray(partition_format.get()) + init_remote_ray(Backend.get()) # import EngineDispatcher here to initialize IO class # so it doesn't skew read_csv() timings later on import modin.data_management.factories.dispatcher # noqa: F401 else: - get_connection().modules["modin"].set_backends( - "Ray", partition_format.get() - ) + get_connection().modules["modin"].set_backends("Ray", Backend.get()) num_cpus = remote_ray.cluster_resources()["CPU"] elif publisher.get() == "Cloudpython": @@ -179,7 +177,7 @@ def init_remote_ray(partition): DEFAULT_NPARTITIONS = max(4, int(num_cpus)) -execution_engine.subscribe(_update_engine) +Engine.subscribe(_update_engine) from .. import __version__ from .concat import concat diff --git a/modin/pandas/test/test_api.py b/modin/pandas/test/test_api.py index 13763ab895a..319ae2bf505 100644 --- a/modin/pandas/test/test_api.py +++ b/modin/pandas/test/test_api.py @@ -56,9 +56,6 @@ def test_top_level_api_equality(): "general", "datetimes", "reshape", - "execution_engine", - "partition_format", - "Publisher", "types", "sys", "initialize_ray", diff --git a/modin/pandas/test/test_io.py b/modin/pandas/test/test_io.py index dcb463bae0c..c70bb075a1f 100644 --- a/modin/pandas/test/test_io.py +++ b/modin/pandas/test/test_io.py @@ -35,9 +35,9 @@ eval_general, ) -from modin import execution_engine +from modin.config import Engine, Backend -if os.environ.get("MODIN_BACKEND", "Pandas").lower() == "pandas": +if Backend.get() == "Pandas": import modin.pandas as pd else: import modin.experimental.pandas as pd @@ -1033,9 +1033,7 @@ def test_from_csv_with_usecols(usecols): df_equals(modin_df, pandas_df) -@pytest.mark.skipif( - execution_engine.get().lower() == "python", reason="Using pandas implementation" -) +@pytest.mark.skipif(Engine.get() == "Python", reason="Using pandas implementation") def test_from_csv_s3(make_csv_file): dataset_url = "s3://noaa-ghcn-pds/csv/1788.csv" pandas_df = pandas.read_csv(dataset_url) diff --git a/modin/utils.py b/modin/utils.py index f2a2afdd37f..cd8971eed72 100644 --- a/modin/utils.py +++ b/modin/utils.py @@ -13,6 +13,7 @@ import pandas import modin +from modin.config import Engine, Backend, IsExperimental def _inherit_docstrings(parent, excluded=[]): @@ -117,4 +118,4 @@ def wrapper(*args, **kwargs): def get_current_backend(): - return f"{modin.partition_format.get()}On{modin.execution_engine.get()}" + return f"{'Experimental' if IsExperimental.get() else ''}{Backend.get()}On{Engine.get()}" From c53031894aaa33860323c2892955c59c2c29a008 Mon Sep 17 00:00:00 2001 From: Vasilij Litvinov Date: Fri, 2 Oct 2020 00:43:52 +0300 Subject: [PATCH 13/27] REFACTOR-#2059: Introduce abstract parameter concept Signed-off-by: Vasilij Litvinov --- modin/config/__init__.py | 4 ++-- modin/config/__main__.py | 10 +++------- modin/config/envvars.py | 6 +++--- modin/config/pubsub.py | 4 +++- 4 files changed, 11 insertions(+), 13 deletions(-) diff --git a/modin/config/__init__.py b/modin/config/__init__.py index 0f14acc21e9..2ee9ebe5631 100644 --- a/modin/config/__init__.py +++ b/modin/config/__init__.py @@ -11,5 +11,5 @@ # ANY KIND, either express or implied. See the License for the specific language # governing permissions and limitations under the License. -from .pubsub import Parameter -from .envvars import * +from .pubsub import Parameter # noqa: F401 +from .envvars import * # noqa: F403, F401 diff --git a/modin/config/__main__.py b/modin/config/__main__.py index d5894869aea..b30a3a6f21c 100644 --- a/modin/config/__main__.py +++ b/modin/config/__main__.py @@ -1,15 +1,11 @@ -from . import * +from . import * # noqa: F403, F401 +from .pubsub import Parameter def print_config_help(): for objname in sorted(globals()): obj = globals()[objname] - if ( - isinstance(obj, type) - and issubclass(obj, Parameter) - and obj is not EnvironmentVariable - and obj is not Parameter - ): + if isinstance(obj, type) and issubclass(obj, Parameter) and not obj.is_abstract: print(f"{obj.get_help()}\n\tCurrent value: {obj.get()}") diff --git a/modin/config/envvars.py b/modin/config/envvars.py index 8596fa1fac8..6458b1f48d7 100644 --- a/modin/config/envvars.py +++ b/modin/config/envvars.py @@ -19,7 +19,7 @@ from .pubsub import Parameter, _TYPE_PARAMS -class EnvironmentVariable(Parameter, type=str): +class EnvironmentVariable(Parameter, type=str, abstract=True): """ Base class for environment variables-based configuration """ @@ -201,9 +201,9 @@ def _check_vars(): valid_names = { obj.varname for obj in globals().values() - if obj is not EnvironmentVariable - and isinstance(obj, type) + if isinstance(obj, type) and issubclass(obj, EnvironmentVariable) + and not obj.is_abstract } found_names = {name for name in os.environ.keys() if name.startswith("MODIN_")} unknown = found_names - valid_names diff --git a/modin/config/pubsub.py b/modin/config/pubsub.py index fe1379a0a8b..77aa1cc19b1 100644 --- a/modin/config/pubsub.py +++ b/modin/config/pubsub.py @@ -61,6 +61,7 @@ class Parameter(object): choices: typing.Sequence[str] = None type = str default = None + is_abstract = True @classmethod def _get_raw_from_config(cls) -> str: @@ -79,9 +80,10 @@ def get_help(cls) -> str: """ raise NotImplementedError() - def __init_subclass__(cls, type, **kw): + def __init_subclass__(cls, type, abstract=False, **kw): assert type in _TYPE_PARAMS, f"Unsupported variable type: {type}" cls.type = type + cls.is_abstract = abstract cls._value = _UNSET cls._subs = [] cls._once = collections.defaultdict(list) From 195c0f337e1c9e566096ac8d5096b23a1eeb6751 Mon Sep 17 00:00:00 2001 From: Vasilij Litvinov Date: Fri, 2 Oct 2020 00:44:05 +0300 Subject: [PATCH 14/27] REFACTOR-#2059: Fix flake8 issues Signed-off-by: Vasilij Litvinov --- modin/__init__.py | 3 --- modin/data_management/factories/dispatcher.py | 2 -- modin/utils.py | 1 - 3 files changed, 6 deletions(-) diff --git a/modin/__init__.py b/modin/__init__.py index fe32a0e331b..8bd23fd47b0 100644 --- a/modin/__init__.py +++ b/modin/__init__.py @@ -11,10 +11,7 @@ # ANY KIND, either express or implied. See the License for the specific language # governing permissions and limitations under the License. -import os import warnings -from packaging import version -import collections from ._version import get_versions diff --git a/modin/data_management/factories/dispatcher.py b/modin/data_management/factories/dispatcher.py index 93771fde33e..c54c4a49148 100644 --- a/modin/data_management/factories/dispatcher.py +++ b/modin/data_management/factories/dispatcher.py @@ -11,8 +11,6 @@ # ANY KIND, either express or implied. See the License for the specific language # governing permissions and limitations under the License. -import os - from modin.config import Engine, Backend, IsExperimental from modin.data_management.factories import factories from modin.utils import get_current_backend diff --git a/modin/utils.py b/modin/utils.py index cd8971eed72..6cad48f492c 100644 --- a/modin/utils.py +++ b/modin/utils.py @@ -12,7 +12,6 @@ # governing permissions and limitations under the License. import pandas -import modin from modin.config import Engine, Backend, IsExperimental From d13ffcd04c3f72c5aec7a2f7455bf128b25c365e Mon Sep 17 00:00:00 2001 From: Vasilij Litvinov Date: Fri, 2 Oct 2020 00:56:43 +0300 Subject: [PATCH 15/27] REFACTOR-#2059: Migrate MODIN_EXPERIMENTAL Signed-off-by: Vasilij Litvinov --- modin/conftest.py | 11 +++++------ modin/experimental/pandas/__init__.py | 4 ++-- modin/experimental/pandas/io_exp.py | 6 ++---- modin/pandas/base.py | 4 ++-- modin/pandas/dataframe.py | 4 ++-- modin/pandas/groupby.py | 4 ++-- modin/pandas/series.py | 4 ++-- 7 files changed, 17 insertions(+), 20 deletions(-) diff --git a/modin/conftest.py b/modin/conftest.py index b1cdf37696c..63257a122e1 100644 --- a/modin/conftest.py +++ b/modin/conftest.py @@ -12,7 +12,8 @@ # governing permissions and limitations under the License. import pytest -import os + +from modin.config import IsExperimental def pytest_addoption(parser): @@ -53,9 +54,9 @@ def __exit__(self, *a, **kw): def set_experimental_env(mode): - import os + from modin.config import IsExperimental - os.environ["MODIN_EXPERIMENTAL"] = "True" if mode == "experimental" else "False" + IsExperimental.put(mode == "experimental") @pytest.fixture(scope="session", autouse=True) @@ -67,9 +68,7 @@ def simulate_cloud(request): if mode not in ("normal", "experimental"): raise ValueError(f"Unsupported --simulate-cloud mode: {mode}") - assert ( - os.environ.get("MODIN_EXPERIMENTAL", "").title() == "True" - ), "Simulated cloud must be started in experimental mode" + assert IsExperimental.get(), "Simulated cloud must be started in experimental mode" from modin.experimental.cloud import create_cluster, get_connection import pandas._testing diff --git a/modin/experimental/pandas/__init__.py b/modin/experimental/pandas/__init__.py index ef624f132e2..baf604d4e8b 100644 --- a/modin/experimental/pandas/__init__.py +++ b/modin/experimental/pandas/__init__.py @@ -11,9 +11,9 @@ # ANY KIND, either express or implied. See the License for the specific language # governing permissions and limitations under the License. -import os +from modin.config import IsExperimental -os.environ["MODIN_EXPERIMENTAL"] = "True" +IsExperimental.put(True) # import numpy_wrap as early as possible to intercept all "import numpy" statements # in the user code diff --git a/modin/experimental/pandas/io_exp.py b/modin/experimental/pandas/io_exp.py index 3c24d06f1d6..ccf73460235 100644 --- a/modin/experimental/pandas/io_exp.py +++ b/modin/experimental/pandas/io_exp.py @@ -12,10 +12,10 @@ # governing permissions and limitations under the License. import inspect -import os from . import DataFrame from modin.data_management.factories.dispatcher import EngineDispatcher +from modin.config import IsExperimental def read_sql( @@ -63,8 +63,6 @@ def read_sql( Returns: Pandas Dataframe """ - assert ( - os.environ.get("MODIN_EXPERIMENTAL", "").title() == "True" - ), "This only works in experimental mode" + assert IsExperimental.get(), "This only works in experimental mode" _, _, _, kwargs = inspect.getargvalues(inspect.currentframe()) return DataFrame(query_compiler=EngineDispatcher.read_sql(**kwargs)) diff --git a/modin/pandas/base.py b/modin/pandas/base.py index 02b7ea32800..7820bc9def7 100644 --- a/modin/pandas/base.py +++ b/modin/pandas/base.py @@ -11,7 +11,6 @@ # ANY KIND, either express or implied. See the License for the specific language # governing permissions and limitations under the License. -import os import numpy as np from numpy import nan import pandas @@ -41,6 +40,7 @@ from modin.utils import try_cast_to_pandas from modin.error_message import ErrorMessage from modin.pandas.utils import is_scalar +from modin.config import IsExperimental # Similar to pandas, sentinel value to use as kwarg in place of None when None has # special meaning and needs to be distinguished from a user explicitly passing None. @@ -3498,7 +3498,7 @@ def default_handler(*args, **kwargs): return object.__getattribute__(self, item) -if os.environ.get("MODIN_EXPERIMENTAL", "").title() == "True": +if IsExperimental.get(): from modin.experimental.cloud.meta_magic import make_wrapped_class make_wrapped_class(BasePandasDataset, "make_base_dataset_wrapper") diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py index 6a53e48a782..148bf8bbbcd 100644 --- a/modin/pandas/dataframe.py +++ b/modin/pandas/dataframe.py @@ -29,12 +29,12 @@ import functools import numpy as np import sys -import os from typing import Optional, Sequence, Tuple, Union, Mapping import warnings from modin.error_message import ErrorMessage from modin.utils import _inherit_docstrings, to_pandas, hashable +from modin.config import IsExperimental from .utils import ( from_pandas, from_non_pandas, @@ -3410,7 +3410,7 @@ def _to_pandas(self): return self._query_compiler.to_pandas() -if os.environ.get("MODIN_EXPERIMENTAL", "").title() == "True": +if IsExperimental.get(): from modin.experimental.cloud.meta_magic import make_wrapped_class make_wrapped_class(DataFrame, "make_dataframe_wrapper") diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py index 4bc55dc620b..e4007980010 100644 --- a/modin/pandas/groupby.py +++ b/modin/pandas/groupby.py @@ -11,7 +11,6 @@ # ANY KIND, either express or implied. See the License for the specific language # governing permissions and limitations under the License. -import os import pandas import pandas.core.groupby from pandas.core.dtypes.common import is_list_like @@ -19,6 +18,7 @@ from modin.error_message import ErrorMessage from modin.utils import _inherit_docstrings, wrap_udf_function, try_cast_to_pandas +from modin.config import IsExperimental from .series import Series @@ -771,7 +771,7 @@ def groupby_on_multiple_columns(df, *args, **kwargs): return self._df._default_to_pandas(groupby_on_multiple_columns, *args, **kwargs) -if os.environ.get("MODIN_EXPERIMENTAL", "").title() == "True": +if IsExperimental.get(): from modin.experimental.cloud.meta_magic import make_wrapped_class make_wrapped_class(DataFrameGroupBy, "make_dataframe_groupby_wrapper") diff --git a/modin/pandas/series.py b/modin/pandas/series.py index 452171d4b37..f424077def3 100644 --- a/modin/pandas/series.py +++ b/modin/pandas/series.py @@ -11,7 +11,6 @@ # ANY KIND, either express or implied. See the License for the specific language # governing permissions and limitations under the License. -import os import numpy as np import pandas from pandas.core.common import apply_if_callable, is_bool_indexer @@ -27,6 +26,7 @@ import warnings from modin.utils import _inherit_docstrings, to_pandas +from modin.config import IsExperimental from .base import BasePandasDataset, _ATTRS_NO_LOOKUP from .iterator import PartitionIterator from .utils import from_pandas, is_scalar @@ -2308,7 +2308,7 @@ def _to_pandas(self): return series -if os.environ.get("MODIN_EXPERIMENTAL", "").title() == "True": +if IsExperimental.get(): from modin.experimental.cloud.meta_magic import make_wrapped_class make_wrapped_class(Series, "make_series_wrapper") From a09fca30d4ba246a20cff4bef9979be17a7b6cd8 Mon Sep 17 00:00:00 2001 From: Vasilij Litvinov Date: Fri, 2 Oct 2020 01:03:38 +0300 Subject: [PATCH 16/27] REFACTOR-#2059: Migrate MODIN_RAY_CLUSTER and MODIN_REDIS_ADDRESS Signed-off-by: Vasilij Litvinov --- modin/engines/ray/utils.py | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/modin/engines/ray/utils.py b/modin/engines/ray/utils.py index dba34abbebc..e013d2a8f5b 100644 --- a/modin/engines/ray/utils.py +++ b/modin/engines/ray/utils.py @@ -17,6 +17,8 @@ import sys import multiprocessing +from modin.config import IsRayCluster, RayRedisAddress + def handle_ray_task_error(e): for s in e.traceback_str.split("\n")[::-1]: @@ -89,17 +91,11 @@ def initialize_ray( if threading.current_thread().name == "MainThread" or override_is_cluster: import secrets - cluster = ( - "True" - if override_is_cluster - else os.environ.get("MODIN_RAY_CLUSTER", "").title() - ) - redis_address = override_redis_address or os.environ.get( - "MODIN_REDIS_ADDRESS", None - ) + cluster = override_is_cluster or IsRayCluster.get() + redis_address = override_redis_address or RayRedisAddress.get() redis_password = override_redis_password or secrets.token_hex(16) - if cluster == "True": + if cluster: # We only start ray in a cluster setting for the head node. ray.init( address=redis_address or "auto", @@ -108,7 +104,7 @@ def initialize_ray( _redis_password=redis_password, logging_level=100, ) - elif cluster == "": + else: num_cpus = os.environ.get("MODIN_CPUS", None) or multiprocessing.cpu_count() object_store_memory = os.environ.get("MODIN_MEMORY", None) plasma_directory = os.environ.get("MODIN_ON_RAY_PLASMA_DIR", None) @@ -148,11 +144,6 @@ def initialize_ray( _memory=object_store_memory, _lru_evict=True, ) - else: - raise ValueError( - '"MODIN_RAY_CLUSTER" env variable not correctly set! \ - Did you mean `os.environ["MODIN_RAY_CLUSTER"] = "True"`?' - ) _move_stdlib_ahead_of_site_packages() ray.worker.global_worker.run_function_on_all_workers( From c7481cf6dee2adf3d4f7f8f786cea7911e236606 Mon Sep 17 00:00:00 2001 From: Vasilij Litvinov Date: Fri, 2 Oct 2020 01:10:33 +0300 Subject: [PATCH 17/27] REFACTOR-#2059: Migrate MODIN_CPUS Signed-off-by: Vasilij Litvinov --- modin/config/envvars.py | 6 ++++++ modin/engines/ray/utils.py | 6 ++---- modin/pandas/__init__.py | 9 +++------ 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/modin/config/envvars.py b/modin/config/envvars.py index 6458b1f48d7..c0f78ca0a31 100644 --- a/modin/config/envvars.py +++ b/modin/config/envvars.py @@ -127,6 +127,12 @@ class CpuCount(EnvironmentVariable, type=int): varname = "MODIN_CPUS" + @classmethod + def _get_default(cls): + import multiprocessing + + return multiprocessing.cpu_count() + class Memory(EnvironmentVariable, type=int): """ diff --git a/modin/engines/ray/utils.py b/modin/engines/ray/utils.py index e013d2a8f5b..dcb309af967 100644 --- a/modin/engines/ray/utils.py +++ b/modin/engines/ray/utils.py @@ -15,9 +15,8 @@ import threading import os import sys -import multiprocessing -from modin.config import IsRayCluster, RayRedisAddress +from modin.config import IsRayCluster, RayRedisAddress, CpuCount def handle_ray_task_error(e): @@ -105,7 +104,6 @@ def initialize_ray( logging_level=100, ) else: - num_cpus = os.environ.get("MODIN_CPUS", None) or multiprocessing.cpu_count() object_store_memory = os.environ.get("MODIN_MEMORY", None) plasma_directory = os.environ.get("MODIN_ON_RAY_PLASMA_DIR", None) if os.environ.get("MODIN_OUT_OF_CORE", "False").title() == "True": @@ -133,7 +131,7 @@ def initialize_ray( else: object_store_memory = int(object_store_memory) ray.init( - num_cpus=int(num_cpus), + num_cpus=CpuCount.get(), include_dashboard=False, ignore_reinit_error=True, _plasma_directory=plasma_directory, diff --git a/modin/pandas/__init__.py b/modin/pandas/__init__.py index bde417abedf..4c7e1dfcfb4 100644 --- a/modin/pandas/__init__.py +++ b/modin/pandas/__init__.py @@ -87,7 +87,7 @@ import os import multiprocessing -from modin.config import Engine, Backend, Parameter +from modin.config import Engine, Backend, Parameter, CpuCount # Set this so that Pandas doesn't try to multithread by itself os.environ["OMP_NUM_THREADS"] = "1" @@ -112,7 +112,7 @@ def _update_engine(publisher: Parameter): # With OmniSci backend there is only a single worker per node # and we allow it to work on all cores. if Backend.get() == "Omnisci": - os.environ["MODIN_CPUS"] = "1" + CpuCount.put(1) os.environ["OMP_NUM_THREADS"] = str(multiprocessing.cpu_count()) if _is_first_update.get("Ray", True): initialize_ray() @@ -132,10 +132,7 @@ def _update_engine(publisher: Parameter): except ValueError: from distributed import Client - num_cpus = ( - os.environ.get("MODIN_CPUS", None) or multiprocessing.cpu_count() - ) - dask_client = Client(n_workers=int(num_cpus)) + dask_client = Client(n_workers=CpuCount.get()) elif publisher.get() == "Cloudray": from modin.experimental.cloud import get_connection From 902502ad2d13adde9165b1d3fc08ed5237a2ad7d Mon Sep 17 00:00:00 2001 From: Vasilij Litvinov Date: Fri, 2 Oct 2020 01:12:58 +0300 Subject: [PATCH 18/27] REFACTOR-#2059: Migrate remaining Ray params Signed-off-by: Vasilij Litvinov --- modin/engines/ray/utils.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/modin/engines/ray/utils.py b/modin/engines/ray/utils.py index dcb309af967..af86e63f57f 100644 --- a/modin/engines/ray/utils.py +++ b/modin/engines/ray/utils.py @@ -16,7 +16,14 @@ import os import sys -from modin.config import IsRayCluster, RayRedisAddress, CpuCount +from modin.config import ( + IsRayCluster, + RayRedisAddress, + CpuCount, + Memory, + RayPlasmaDir, + IsOutOfCore, +) def handle_ray_task_error(e): @@ -104,10 +111,9 @@ def initialize_ray( logging_level=100, ) else: - object_store_memory = os.environ.get("MODIN_MEMORY", None) - plasma_directory = os.environ.get("MODIN_ON_RAY_PLASMA_DIR", None) - if os.environ.get("MODIN_OUT_OF_CORE", "False").title() == "True": - + object_store_memory = Memory.get() + plasma_directory = RayPlasmaDir.get() + if IsOutOfCore.get(): if plasma_directory is None: from tempfile import gettempdir From 6707746258f1e2988964dc0ca943329033222ae5 Mon Sep 17 00:00:00 2001 From: Vasilij Litvinov Date: Fri, 2 Oct 2020 01:16:48 +0300 Subject: [PATCH 19/27] REFACTOR-#2059: Migrate SOCKS proxy and RPyC tracing and logging Signed-off-by: Vasilij Litvinov --- modin/experimental/cloud/base.py | 4 +++- modin/experimental/cloud/connection.py | 8 ++------ modin/experimental/cloud/local_cluster.py | 5 +++-- modin/experimental/cloud/rpyc_proxy.py | 7 ++----- 4 files changed, 10 insertions(+), 14 deletions(-) diff --git a/modin/experimental/cloud/base.py b/modin/experimental/cloud/base.py index 8439fecb5b8..a2f96690b70 100644 --- a/modin/experimental/cloud/base.py +++ b/modin/experimental/cloud/base.py @@ -15,6 +15,8 @@ import os import sys +from modin.config import SocksProxy + class ClusterError(Exception): """ @@ -64,7 +66,7 @@ def _which(prog): def _get_ssh_proxy_command(): - socks_proxy = os.environ.get("MODIN_SOCKS_PROXY", None) + socks_proxy = SocksProxy.get() if socks_proxy is None: return None if _which("nc"): diff --git a/modin/experimental/cloud/connection.py b/modin/experimental/cloud/connection.py index 462648621fa..2cccae868c4 100644 --- a/modin/experimental/cloud/connection.py +++ b/modin/experimental/cloud/connection.py @@ -13,13 +13,13 @@ import subprocess import signal -import os import random import time import tempfile import sys from .base import ClusterError, ConnectionDetails, _get_ssh_proxy_command +from modin.config import DoLogRpyc RPYC_REQUEST_TIMEOUT = 2400 @@ -40,11 +40,7 @@ def __wait_noexc(proc: subprocess.Popen, timeout: float): def __init__( self, details: ConnectionDetails, main_python: str, wrap_cmd=None, log_rpyc=None ): - self.log_rpyc = ( - log_rpyc - if log_rpyc is not None - else os.environ.get("MODIN_LOG_RPYC", "").title() == "True" - ) + self.log_rpyc = log_rpyc if log_rpyc is not None else DoLogRpyc.get() self.proc = None self.wrap_cmd = wrap_cmd or subprocess.list2cmdline diff --git a/modin/experimental/cloud/local_cluster.py b/modin/experimental/cloud/local_cluster.py index d023d9daa7c..378ce5f2b63 100644 --- a/modin/experimental/cloud/local_cluster.py +++ b/modin/experimental/cloud/local_cluster.py @@ -18,12 +18,13 @@ from .base import ConnectionDetails from .cluster import BaseCluster from .connection import Connection -from .rpyc_proxy import WrappingConnection, WrappingService, _TRACE_RPYC +from .rpyc_proxy import WrappingConnection, WrappingService from .tracing.tracing_connection import TracingWrappingConnection +from modin.config import DoTraceRpyc class LocalWrappingConnection( - TracingWrappingConnection if _TRACE_RPYC else WrappingConnection + TracingWrappingConnection if DoTraceRpyc.get() else WrappingConnection ): def _init_deliver(self): def ensure_modin(modin_init): diff --git a/modin/experimental/cloud/rpyc_proxy.py b/modin/experimental/cloud/rpyc_proxy.py index 12050c77a16..2dbf6837d38 100644 --- a/modin/experimental/cloud/rpyc_proxy.py +++ b/modin/experimental/cloud/rpyc_proxy.py @@ -13,7 +13,6 @@ import types import collections -import os import rpyc import cloudpickle as pickle @@ -23,6 +22,7 @@ from . import get_connection from .meta_magic import _LOCAL_ATTRS, RemoteMeta, _KNOWN_DUALS +from modin.config import DoTraceRpyc def _batch_loads(items): @@ -38,9 +38,6 @@ def _pickled_array(obj): return pickle.dumps(obj.__array__()) -_TRACE_RPYC = os.environ.get("MODIN_TRACE_RPYC", "").title() == "True" - - class WrappingConnection(rpyc.Connection): def __init__(self, *a, **kw): super().__init__(*a, **kw) @@ -279,7 +276,7 @@ def _init_deliver(self): class WrappingService(rpyc.ClassicService): - if _TRACE_RPYC: + if DoTraceRpyc.get(): from .tracing.tracing_connection import TracingWrappingConnection as _protocol else: _protocol = WrappingConnection From 3590b58ace87f8c44747a223413717a78d6c9d50 Mon Sep 17 00:00:00 2001 From: Vasilij Litvinov Date: Fri, 2 Oct 2020 01:21:33 +0300 Subject: [PATCH 20/27] REFACTOR-#2059: Migrate remaining vars Signed-off-by: Vasilij Litvinov --- .../engines/omnisci_on_ray/frame/omnisci_worker.py | 4 +++- .../engines/omnisci_on_ray/frame/partition_manager.py | 9 +++------ modin/pandas/test/utils.py | 6 ++---- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/modin/experimental/engines/omnisci_on_ray/frame/omnisci_worker.py b/modin/experimental/engines/omnisci_on_ray/frame/omnisci_worker.py index 68db69874bb..ad32f1f1503 100644 --- a/modin/experimental/engines/omnisci_on_ray/frame/omnisci_worker.py +++ b/modin/experimental/engines/omnisci_on_ray/frame/omnisci_worker.py @@ -24,6 +24,8 @@ sys.setdlopenflags(prev) +from modin.config import OmnisciFragmentSize + class OmnisciServer: _server = None @@ -105,7 +107,7 @@ def put_arrow_to_omnisci(cls, table, name=None): if need_cast: table = table.cast(new_schema) - fragment_size = os.environ.get("MODIN_OMNISCI_FRAGMENT_SIZE", None) + fragment_size = OmnisciFragmentSize.get() if fragment_size is None: cpu_count = os.cpu_count() if cpu_count is not None: diff --git a/modin/experimental/engines/omnisci_on_ray/frame/partition_manager.py b/modin/experimental/engines/omnisci_on_ray/frame/partition_manager.py index e88ccb4ab7e..0b4ba472e61 100644 --- a/modin/experimental/engines/omnisci_on_ray/frame/partition_manager.py +++ b/modin/experimental/engines/omnisci_on_ray/frame/partition_manager.py @@ -22,10 +22,10 @@ from .omnisci_worker import OmnisciServer from .calcite_builder import CalciteBuilder from .calcite_serializer import CalciteSerializer +from modin.config import DoUseCalcite import pyarrow import pandas -import os class OmnisciOnRayFrameManager(RayFrameManager): @@ -88,10 +88,7 @@ def run_exec_plan(cls, plan, index_cols, dtypes, columns): cmd_prefix = "execute relalg " - use_calcite_env = os.environ.get("MODIN_USE_CALCITE") - use_calcite = use_calcite_env is not None and use_calcite_env.lower() == "true" - - if use_calcite: + if DoUseCalcite.get(): cmd_prefix = "execute calcite " curs = omniSession.executeRA(cmd_prefix + calcite_json) @@ -102,7 +99,7 @@ def run_exec_plan(cls, plan, index_cols, dtypes, columns): res = np.empty((1, 1), dtype=np.dtype(object)) # workaround for https://github.com/modin-project/modin/issues/1851 - if use_calcite: + if DoUseCalcite.get(): at = at.rename_columns(["F_" + str(c) for c in columns]) res[0][0] = cls._partition_class.put_arrow(at) diff --git a/modin/pandas/test/utils.py b/modin/pandas/test/utils.py index c5143731394..0590554b797 100644 --- a/modin/pandas/test/utils.py +++ b/modin/pandas/test/utils.py @@ -22,13 +22,11 @@ ) import modin.pandas as pd from modin.utils import to_pandas +from modin.config import TestDatasetSize from io import BytesIO -import os random_state = np.random.RandomState(seed=42) -DATASET_SIZE = os.environ.get("MODIN_TEST_DATASET_SIZE", "normal").lower() - DATASET_SIZE_DICT = { "small": (2 ** 2, 2 ** 3), "normal": (2 ** 6, 2 ** 8), @@ -36,7 +34,7 @@ } # Size of test dataframes -NCOLS, NROWS = DATASET_SIZE_DICT.get(DATASET_SIZE, DATASET_SIZE_DICT["normal"]) +NCOLS, NROWS = DATASET_SIZE_DICT.get(TestDatasetSize.get(), DATASET_SIZE_DICT["normal"]) # Range for values for test data RAND_LOW = 0 From e85ab96b504ca22b9c89f09f2ab0acd1dd13839a Mon Sep 17 00:00:00 2001 From: Vasilij Litvinov Date: Fri, 2 Oct 2020 01:59:39 +0300 Subject: [PATCH 21/27] REFACTOR-#2059: Adapt CI to tests change Signed-off-by: Vasilij Litvinov --- .github/workflows/ci.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1f49433881d..8e364e80bdc 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -166,7 +166,9 @@ jobs: conda list - name: Internals tests shell: bash -l {0} - run: python -m pytest modin/test/test_publisher.py modin/data_management/factories/test/test_dispatcher.py modin/experimental/cloud/test/test_cloud.py + run: python -m pytest modin/data_management/factories/test/test_dispatcher.py modin/experimental/cloud/test/test_cloud.py + - shell: bash -l {0} + run: python -m pytest modin/config/test test-defaults: needs: [lint-commit, lint-flake8, lint-black, test-api, test-headers] From 04a42f7e5f19da4278a7428bd951bd9ef4286570 Mon Sep 17 00:00:00 2001 From: Vasilij Litvinov Date: Fri, 2 Oct 2020 02:04:43 +0300 Subject: [PATCH 22/27] REFACTOR-#2059: Fix linting and API testing issues Signed-off-by: Vasilij Litvinov --- modin/config/__main__.py | 15 ++++++++++++++- modin/pandas/__init__.py | 5 +++-- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/modin/config/__main__.py b/modin/config/__main__.py index b30a3a6f21c..9a2f4f7f39f 100644 --- a/modin/config/__main__.py +++ b/modin/config/__main__.py @@ -1,3 +1,16 @@ +# Licensed to Modin Development Team under one or more contributor license agreements. +# See the NOTICE file distributed with this work for additional information regarding +# copyright ownership. The Modin Development Team licenses this file to you under the +# Apache License, Version 2.0 (the "License"); you may not use this file except in +# compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under +# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific language +# governing permissions and limitations under the License. + from . import * # noqa: F403, F401 from .pubsub import Parameter @@ -6,7 +19,7 @@ def print_config_help(): for objname in sorted(globals()): obj = globals()[objname] if isinstance(obj, type) and issubclass(obj, Parameter) and not obj.is_abstract: - print(f"{obj.get_help()}\n\tCurrent value: {obj.get()}") + print(f"{obj.get_help()}\n\tCurrent value: {obj.get()}") # noqa: T001 if __name__ == "__main__": diff --git a/modin/pandas/__init__.py b/modin/pandas/__init__.py index 4c7e1dfcfb4..68b14933318 100644 --- a/modin/pandas/__init__.py +++ b/modin/pandas/__init__.py @@ -87,7 +87,7 @@ import os import multiprocessing -from modin.config import Engine, Backend, Parameter, CpuCount +from modin.config import Engine, Parameter # Set this so that Pandas doesn't try to multithread by itself os.environ["OMP_NUM_THREADS"] = "1" @@ -104,6 +104,7 @@ def _update_engine(publisher: Parameter): global DEFAULT_NPARTITIONS, dask_client, num_cpus + from modin.config import Backend, CpuCount if publisher.get() == "Ray": import ray @@ -330,4 +331,4 @@ def init_remote_ray(partition): "DEFAULT_NPARTITIONS", ] -del pandas +del pandas, Engine, Parameter From b7d522e4df1a61bf96ebe196bf3599c10cb5d6a9 Mon Sep 17 00:00:00 2001 From: Vasilij Litvinov Date: Fri, 2 Oct 2020 02:30:30 +0300 Subject: [PATCH 23/27] REFACTOR-#2059: Fix test_dispatcher Signed-off-by: Vasilij Litvinov --- modin/data_management/factories/test/test_dispatcher.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modin/data_management/factories/test/test_dispatcher.py b/modin/data_management/factories/test/test_dispatcher.py index 4f423c1f98b..09d31b3f85a 100644 --- a/modin/data_management/factories/test/test_dispatcher.py +++ b/modin/data_management/factories/test/test_dispatcher.py @@ -13,7 +13,7 @@ import pytest -from modin.config import Engine +from modin.config import Engine, Backend from modin import set_backends from modin.data_management.factories.dispatcher import ( @@ -84,10 +84,10 @@ def test_engine_switch(): assert EngineDispatcher.get_engine().io_cls == "Foo" Engine.put("Python") # revert engine to default - Engine.put("Test") + Backend.put("Test") assert EngineDispatcher.get_engine() == TestOnPythonFactory assert EngineDispatcher.get_engine().io_cls == "Bar" - Engine.put("Pandas") # revert engine to default + Backend.put("Pandas") # revert engine to default def test_engine_wrong_factory(): From 2650c7ef895193af644016dd7dbb7baac1605ab5 Mon Sep 17 00:00:00 2001 From: Vasilij Litvinov Date: Fri, 2 Oct 2020 09:42:19 +0300 Subject: [PATCH 24/27] REFACTOR-#2059: Mark test_from_sql_distributed as xfail, see #2194 Signed-off-by: Vasilij Litvinov --- modin/experimental/pandas/test/test_io_exp.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modin/experimental/pandas/test/test_io_exp.py b/modin/experimental/pandas/test/test_io_exp.py index 7c84ca0073a..41f49d1a24e 100644 --- a/modin/experimental/pandas/test/test_io_exp.py +++ b/modin/experimental/pandas/test/test_io_exp.py @@ -27,6 +27,7 @@ ) def test_from_sql_distributed(make_sql_connection): # noqa: F811 if Engine.get() == "Ray": + pytest.xfail("Distributed read_sql is broken, see GH#2194") filename = "test_from_sql_distributed.db" table = "test_from_sql_distributed" conn = make_sql_connection(filename, table) From d941efab7daaa560bdbcfea365b51689ffb54f88 Mon Sep 17 00:00:00 2001 From: Vasilij Litvinov Date: Fri, 2 Oct 2020 13:15:29 +0300 Subject: [PATCH 25/27] REFACTOR-#2059: Add missing docstrings Signed-off-by: Vasilij Litvinov --- modin/config/envvars.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/modin/config/envvars.py b/modin/config/envvars.py index c0f78ca0a31..167fe746af4 100644 --- a/modin/config/envvars.py +++ b/modin/config/envvars.py @@ -151,6 +151,10 @@ class RayPlasmaDir(EnvironmentVariable, type=str): class IsOutOfCore(EnvironmentVariable, type=bool): + """ + Changes primary location of the DataFrame to disk, allowing one to exceed total system memory + """ + varname = "MODIN_OUT_OF_CORE" @@ -204,6 +208,10 @@ class TestDatasetSize(EnvironmentVariable, type=str): def _check_vars(): + """ + Look out for any environment variables that start with "MODIN_" prefix + that are unknown - they might be a typo, so warn a user + """ valid_names = { obj.varname for obj in globals().values() From 433b797f74d8dccda6347c411cb5419d7e247979 Mon Sep 17 00:00:00 2001 From: Vasilij Litvinov Date: Mon, 5 Oct 2020 12:57:43 +0300 Subject: [PATCH 26/27] REFACTOR-#2059: Skip coverage for modin/config/tests Signed-off-by: Vasilij Litvinov --- setup.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.cfg b/setup.cfg index 2baceb1be42..631adef140c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -37,6 +37,7 @@ omit = modin/pandas/index/* # Skip tests modin/test/* + modin/config/test/* modin/pandas/test/* modin/data_management/test/* modin/data_management/factories/test/* From 7c0dc905eef873bc2d58cd2b1538d8d8d07223ee Mon Sep 17 00:00:00 2001 From: Vasilij Litvinov Date: Mon, 12 Oct 2020 15:52:18 +0300 Subject: [PATCH 27/27] REFACTOR-#2059: Add catching env access bypassing config Signed-off-by: Vasilij Litvinov --- .github/workflows/ci.yml | 2 + modin/conftest.py | 64 +++++++++++++++++++++++++++++++ modin/test/test_envvar_catcher.py | 32 ++++++++++++++++ 3 files changed, 98 insertions(+) create mode 100644 modin/test/test_envvar_catcher.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8e364e80bdc..9b959ef28ab 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -169,6 +169,8 @@ jobs: run: python -m pytest modin/data_management/factories/test/test_dispatcher.py modin/experimental/cloud/test/test_cloud.py - shell: bash -l {0} run: python -m pytest modin/config/test + - shell: bash -l {0} + run: python -m pytest modin/test/test_envvar_catcher.py test-defaults: needs: [lint-commit, lint-flake8, lint-black, test-api, test-headers] diff --git a/modin/conftest.py b/modin/conftest.py index 63257a122e1..86c0ed252f0 100644 --- a/modin/conftest.py +++ b/modin/conftest.py @@ -11,8 +11,12 @@ # ANY KIND, either express or implied. See the License for the specific language # governing permissions and limitations under the License. +import os +import sys import pytest +import modin +import modin.config from modin.config import IsExperimental @@ -82,3 +86,63 @@ def simulate_cloud(request): (cyx_testing, "assert_almost_equal"), ): yield + + +@pytest.fixture(scope="session", autouse=True) +def enforce_config(): + """ + A fixture that ensures that all checks for MODIN_* variables + are done using modin.config to prevent leakage + """ + orig_env = os.environ + modin_start = os.path.dirname(modin.__file__) + modin_exclude = [os.path.dirname(modin.config.__file__)] + + class PatchedEnv: + @staticmethod + def __check_var(name): + if name.upper().startswith("MODIN_"): + frame = sys._getframe() + try: + # get the path to module where caller of caller is defined; + # caller of this function is inside PatchedEnv, and we're + # interested in whomever called a method on PatchedEnv + caller_file = frame.f_back.f_back.f_code.co_filename + finally: + del frame + pkg_name = os.path.dirname(caller_file) + if pkg_name.startswith(modin_start): + assert any( + pkg_name.startswith(excl) for excl in modin_exclude + ), "Do not access MODIN_ environment variable bypassing modin.config" + + def __getitem__(self, name): + self.__check_var(name) + return orig_env[name] + + def __setitem__(self, name, value): + self.__check_var(name) + orig_env[name] = value + + def __delitem__(self, name): + self.__check_var(name) + del orig_env[name] + + def pop(self, name): + self.__check_var(name) + return orig_env.pop(name) + + def get(self, name, defvalue=None): + self.__check_var(name) + return orig_env.get(name, defvalue) + + def __contains__(self, name): + self.__check_var(name) + return name in orig_env + + def __getattr__(self, name): + return getattr(orig_env, name) + + os.environ = PatchedEnv() + yield + os.environ = orig_env diff --git a/modin/test/test_envvar_catcher.py b/modin/test/test_envvar_catcher.py new file mode 100644 index 00000000000..fd187bf1f4e --- /dev/null +++ b/modin/test/test_envvar_catcher.py @@ -0,0 +1,32 @@ +# Licensed to Modin Development Team under one or more contributor license agreements. +# See the NOTICE file distributed with this work for additional information regarding +# copyright ownership. The Modin Development Team licenses this file to you under the +# Apache License, Version 2.0 (the "License"); you may not use this file except in +# compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under +# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific language +# governing permissions and limitations under the License. + +import os +import pytest + + +@pytest.fixture +def nameset(): + name = "hey_i_am_an_env_var" + os.environ[name] = "i am a value" + yield name + del os.environ[name] + + +def test_envvar_catcher(nameset): + with pytest.raises(AssertionError): + os.environ.get("Modin_FOO", "bar") + with pytest.raises(AssertionError): + "modin_qux" not in os.environ + assert "yay_random_name" not in os.environ + assert os.environ[nameset]