From 30b75ec737cb470ffbfce8302bf81fcfbd8222c8 Mon Sep 17 00:00:00 2001 From: Vasilij Litvinov Date: Thu, 1 Oct 2020 11:06:16 +0300 Subject: [PATCH] REFACTOR-#2059: Simplify passing varname Signed-off-by: Vasilij Litvinov --- modin/config/envvars.py | 85 ++++++++++++++++++++++++++++------------- modin/config/pubsub.py | 2 +- 2 files changed, 59 insertions(+), 28 deletions(-) diff --git a/modin/config/envvars.py b/modin/config/envvars.py index 4274ba4186f..e6535b1b379 100644 --- a/modin/config/envvars.py +++ b/modin/config/envvars.py @@ -23,11 +23,7 @@ class EnvironmentVariable(Publisher, type=str): Base class for environment variables-based configuration """ - def __init_subclass__(cls, varname: str, **kw): - cls.varname = varname - if not kw.get("type"): - kw["type"] = EnvironmentVariable.type - super().__init_subclass__(**kw) + varname: str = None @classmethod def _get_raw_from_config(cls) -> str: @@ -41,112 +37,147 @@ def _get_help(cls) -> str: return help -class Engine(EnvironmentVariable, varname="MODIN_ENGINE"): +class Engine(EnvironmentVariable, type=str): """ Distribution engine to run queries by """ + varname = "MODIN_ENGINE" choices = ("Ray", "Dask", "Python") -class Backend(EnvironmentVariable, varname="MODIN_BACKEND"): +class Backend(EnvironmentVariable, type=str): """ Engine running on a single node of distribution """ + varname = "MODIN_BACKEND" choices = ("Pandas", "OmniSci", "Pyarrow") -class IsDebug(EnvironmentVariable, varname="MODIN_DEBUG", type=bool): +class IsDebug(EnvironmentVariable, type=bool): """ Forces Modin engine to be "Python" unless specified by $MODIN_ENGINE """ + varname = "MODIN_DEBUG" -class IsExperimental(EnvironmentVariable, varname="MODIN_EXPERIMENTAL", type=bool): + +class IsExperimental(EnvironmentVariable, type=bool): """ Turns on experimental features """ + varname = "MODIN_EXPERIMENTAL" + -class IsRayCluster(EnvironmentVariable, varname="MODIN_RAY_CLUSTER", type=bool): +class IsRayCluster(EnvironmentVariable, type=bool): """ True if Modin is running on pre-initialized Ray cluster """ + varname = "MODIN_RAY_CLUSTER" -class RayRedisAddress(EnvironmentVariable, varname="MODIN_REDIS_ADDRESS"): + +class RayRedisAddress(EnvironmentVariable, type=str): """ What Redis address to connect to when running in Ray cluster """ + varname = "MODIN_REDIS_ADDRESS" + -class CpuCount(EnvironmentVariable, varname="MODIN_CPUS", type=int): +class CpuCount(EnvironmentVariable, type=int): """ How may CPU cores to utilize across the whole distribution """ + varname = "MODIN_CPUS" -class Memory(EnvironmentVariable, varname="MODIN_MEMORY", type=int): + +class Memory(EnvironmentVariable, type=int): """ How much memory give to each Ray worker (in bytes) """ + varname = "MODIN_MEMORY" + -class RayPlasmaDir(EnvironmentVariable, varname="MODIN_ON_RAY_PLASMA_DIR"): +class RayPlasmaDir(EnvironmentVariable, type=str): """ Path to Plasma storage for Ray """ + varname = "MODIN_ON_RAY_PLASMA_DIR" + -class IsOutOfCore(EnvironmentVariable, varname="MODIN_OUT_OF_CORE", type=bool): - pass +class IsOutOfCore(EnvironmentVariable, type=bool): + varname = "MODIN_OUT_OF_CORE" -class SocksProxy(EnvironmentVariable, varname="MODIN_SOCKS_PROXY"): +class SocksProxy(EnvironmentVariable, type=str): """ SOCKS proxy address if it is needed for SSH to work """ + varname = "MODIN_SOCKS_PROXY" -class DoLogRpyc(EnvironmentVariable, varname="MODIN_LOG_RPYC", type=bool): + +class DoLogRpyc(EnvironmentVariable, type=bool): """ Whether to gather RPyC logs (applicable for remote context) """ + varname = "MODIN_LOG_RPYC" + -class DoTraceRpyc(EnvironmentVariable, varname="MODIN_TRACE_RPYC", type=bool): +class DoTraceRpyc(EnvironmentVariable, type=bool): """ Whether to trace RPyC calls (applicable for remote context) """ + varname = "MODIN_TRACE_RPYC" -class OmnisciFragmentSize( - EnvironmentVariable, varname="MODIN_OMNISCI_FRAGMENT_SIZE", type=int -): + +class OmnisciFragmentSize(EnvironmentVariable, type=int): """ How big a fragment in OmniSci should be when creating a table (in rows) """ + varname = "MODIN_OMNISCI_FRAGMENT_SIZE" + -class DoUseCalcite(EnvironmentVariable, varname="MODIN_USE_CALCITE", type=bool): +class DoUseCalcite(EnvironmentVariable, type=bool): """ Whether to use Calcite for OmniSci queries execution """ + varname = "MODIN_USE_CALCITE" -class TestDatasetSize(EnvironmentVariable, varname="MODIN_TEST_DATASET_SIZE"): + +class TestDatasetSize(EnvironmentVariable, type=str): """ Dataset size for running some tests """ + varname = "MODIN_TEST_DATASET_SIZE" choices = ("small", "normal", "big") + def _check_vars(): - valid_names = {obj.varname for obj in globals().values() if obj is not EnvironmentVariable and isinstance(obj, type) and issubclass(obj, EnvironmentVariable)} - found_names = {name for name in os.environ.keys() if name.startswith('MODIN_')} + valid_names = { + obj.varname + for obj in globals().values() + if obj is not EnvironmentVariable + and isinstance(obj, type) + and issubclass(obj, EnvironmentVariable) + } + found_names = {name for name in os.environ.keys() if name.startswith("MODIN_")} unknown = found_names - valid_names if unknown: - warnings.warn(f"Found unknown environment variables, please check their spelling: {', '.join(sorted(unknown))}") + warnings.warn( + f"Found unknown environment variables, please check their spelling: {', '.join(sorted(unknown))}" + ) + _check_vars() diff --git a/modin/config/pubsub.py b/modin/config/pubsub.py index 99047d3a8b7..f394ca3eae4 100644 --- a/modin/config/pubsub.py +++ b/modin/config/pubsub.py @@ -88,7 +88,7 @@ def _get_help(cls) -> str: """ raise NotImplementedError() - def __init_subclass__(cls, type=None, default=None, **kw): + def __init_subclass__(cls, type=None, **kw): assert type in _CASTERS, f"Unsupported variable type: {type}" cls.type = type cls._value = None