From bc05a1cd096af61b27197914789f6ab61d492f5e Mon Sep 17 00:00:00 2001 From: Manuel Schlund <32543114+schlunma@users.noreply.github.com> Date: Tue, 1 Oct 2024 17:40:55 +0200 Subject: [PATCH] Merge configuration object from multiple files (instead of one single file) (#2448) Co-authored-by: Bouwe Andela --- doc/api/esmvalcore.config.rst | 60 ++- doc/contributing.rst | 2 +- doc/develop/fixing_data.rst | 23 +- doc/quickstart/configure.rst | 444 ++++++++++-------- doc/quickstart/find_data.rst | 79 ++-- doc/quickstart/install.rst | 8 +- doc/quickstart/output.rst | 37 +- doc/quickstart/run.rst | 23 +- doc/recipe/overview.rst | 15 +- doc/recipe/preprocessor.rst | 11 +- esmvalcore/_main.py | 254 ++++++---- esmvalcore/_recipe/recipe.py | 20 +- esmvalcore/cmor/_fixes/icon/_base_fixes.py | 5 +- esmvalcore/config/__init__.py | 11 +- esmvalcore/config/_config.py | 5 +- esmvalcore/config/_config_object.py | 296 ++++++++++-- esmvalcore/config/_config_validators.py | 69 ++- .../configurations/defaults}/config-user.yml | 10 +- .../configurations/defaults/more_options.yml | 9 + esmvalcore/local.py | 4 +- tests/conftest.py | 24 + tests/integration/conftest.py | 13 - tests/integration/test_deprecated_config.py | 14 +- tests/integration/test_diagnostic_run.py | 72 ++- tests/integration/test_main.py | 175 ++++--- .../experimental/test_run_recipe.py | 9 +- tests/unit/config/test_config.py | 47 +- tests/unit/config/test_config_object.py | 267 +++++++++-- tests/unit/config/test_config_validator.py | 9 + tests/unit/config/test_esgf_pyclient.py | 2 +- tests/unit/conftest.py | 14 - tests/unit/main/test_esmvaltool.py | 72 ++- tests/unit/test_dataset.py | 4 +- 33 files changed, 1414 insertions(+), 693 deletions(-) rename esmvalcore/{ => config/configurations/defaults}/config-user.yml (96%) create mode 100644 esmvalcore/config/configurations/defaults/more_options.yml create mode 100644 tests/conftest.py delete mode 100644 tests/unit/conftest.py diff --git a/doc/api/esmvalcore.config.rst b/doc/api/esmvalcore.config.rst index 659d574509..9b01587263 100644 --- a/doc/api/esmvalcore.config.rst +++ b/doc/api/esmvalcore.config.rst @@ -1,13 +1,15 @@ +.. _api_configuration: + Configuration ============= This section describes the :py:class:`~esmvalcore.config` module. -Config -****** +CFG +*** -Configuration of ESMValCore/Tool is done via the :py:class:`~esmvalcore.config.Config` object. -The global configuration can be imported from the :py:mod:`esmvalcore.config` module as :py:data:`~esmvalcore.config.CFG`: +Configuration of ESMValCore/Tool is done via :py:data:`~esmvalcore.config.CFG` +object: .. code-block:: python @@ -16,7 +18,6 @@ The global configuration can be imported from the :py:mod:`esmvalcore.config` mo Config({'auxiliary_data_dir': PosixPath('/home/user/auxiliary_data'), 'compress_netcdf': False, 'config_developer_file': None, - 'config_file': PosixPath('/home/user/.esmvaltool/config-user.yml'), 'drs': {'CMIP5': 'default', 'CMIP6': 'default'}, 'exit_on_warning': False, 'log_level': 'info', @@ -30,9 +31,10 @@ The global configuration can be imported from the :py:mod:`esmvalcore.config` mo 'default': '~/default_inputpath'}, 'save_intermediary_cubes': False) -The parameters for the user configuration file are listed :ref:`here `. +All configuration parameters are listed :ref:`here `. -:py:data:`~esmvalcore.config.CFG` is essentially a python dictionary with a few extra functions, similar to :py:data:`matplotlib.rcParams`. +:py:data:`~esmvalcore.config.CFG` is essentially a python dictionary with a few +extra functions, similar to :py:data:`matplotlib.rcParams`. This means that values can be updated like this: .. code-block:: python @@ -41,8 +43,10 @@ This means that values can be updated like this: >>> CFG['output_dir'] PosixPath('/home/user/esmvaltool_output') -Notice that :py:data:`~esmvalcore.config.CFG` automatically converts the path to an instance of ``pathlib.Path`` and expands the home directory. -All values entered into the config are validated to prevent mistakes, for example, it will warn you if you make a typo in the key: +Notice that :py:data:`~esmvalcore.config.CFG` automatically converts the path +to an instance of :class:`pathlib.Path` and expands the home directory. +All values entered into the config are validated to prevent mistakes, for +example, it will warn you if you make a typo in the key: .. code-block:: python @@ -56,7 +60,8 @@ Or, if the value entered cannot be converted to the expected type: >>> CFG['max_parallel_tasks'] = '🐜' InvalidConfigParameter: Key `max_parallel_tasks`: Could not convert '🐜' to int -:py:class:`~esmvalcore.config.Config` is also flexible, so it tries to correct the type of your input if possible: +:py:data:`~esmvalcore.config.CFG` is also flexible, so it tries to correct the +type of your input if possible: .. code-block:: python @@ -64,35 +69,44 @@ Or, if the value entered cannot be converted to the expected type: >>> type(CFG['max_parallel_tasks']) int -By default, the config is loaded from the default location (``/home/user/.esmvaltool/config-user.yml``). -If it does not exist, it falls back to the default values. -to load a different file: +By default, the configuration is loaded from YAML files in the user's home +directory at ``~/.config/esmvaltool``. +If set, this can be overwritten with the ``ESMVALTOOL_CONFIG_DIR`` environment +variable. +Defaults for options that are not specified explicitly are listed :ref:`here +`. +To reload the current configuration object according to these rules, use: .. code-block:: python - >>> CFG.load_from_file('~/my-config.yml') + >>> CFG.reload() -Or to reload the current config: +To load the configuration object from custom directories, use: .. code-block:: python - >>> CFG.reload() + >>> dirs = ['my/default/config', 'my/custom/config'] + >>> CFG.load_from_dirs(dirs) Session ******* Recipes and diagnostics will be run in their own directories. -This behaviour can be controlled via the :py:data:`~esmvalcore.config.Session` object. -A :py:data:`~esmvalcore.config.Session` can be initiated from the global :py:class:`~esmvalcore.config.Config`. +This behavior can be controlled via the :py:data:`~esmvalcore.config.Session` +object. +A :py:data:`~esmvalcore.config.Session` must always be initiated from the +global :py:data:`~esmvalcore.config.CFG` object: .. code-block:: python >>> session = CFG.start_session(name='my_session') A :py:data:`~esmvalcore.config.Session` is very similar to the config. -It is also a dictionary, and copies all the keys from the :py:class:`~esmvalcore.config.Config`. -At this moment, ``session`` is essentially a copy of :py:data:`~esmvalcore.config.CFG`: +It is also a dictionary, and copies all the keys from the +:py:data:`~esmvalcore.config.CFG` object. +At this moment, ``session`` is essentially a copy of +:py:data:`~esmvalcore.config.CFG`: .. code-block:: python @@ -102,7 +116,8 @@ At this moment, ``session`` is essentially a copy of :py:data:`~esmvalcore.confi >>> print(session == CFG) # False False -A :py:data:`~esmvalcore.config.Session` also knows about the directories where the data will stored. +A :py:data:`~esmvalcore.config.Session` also knows about the directories where +the data will stored. The session name is used to prefix the directories. .. code-block:: python @@ -118,7 +133,8 @@ The session name is used to prefix the directories. >>> session.plot_dir /home/user/my_output_dir/my_session_20201203_155821/plots -Unlike the global configuration, of which only one can exist, multiple sessions can be initiated from :py:class:`~esmvalcore.config.Config`. +Unlike the global configuration, of which only one can exist, multiple sessions +can be initiated from :py:data:`~esmvalcore.config.CFG`. API reference diff --git a/doc/contributing.rst b/doc/contributing.rst index ee47974e90..a21a005e72 100644 --- a/doc/contributing.rst +++ b/doc/contributing.rst @@ -571,7 +571,7 @@ users. When making changes, e.g. to the :ref:`recipe format `, the :ref:`diagnostic script interface `, the public -:ref:`Python API `, or the :ref:`configuration file format `, +:ref:`Python API `, or the :ref:`configuration format `, keep in mind that this may affect many users. To keep the tool user friendly, try to avoid making changes that are not backward compatible, i.e. changes that require users to change their existing diff --git a/doc/develop/fixing_data.rst b/doc/develop/fixing_data.rst index 174be1815d..68b6e27221 100644 --- a/doc/develop/fixing_data.rst +++ b/doc/develop/fixing_data.rst @@ -329,9 +329,9 @@ severity. From highest to lowest: Users can have control about which levels of issues are interpreted as errors, and therefore make the checker fail or warnings or debug messages. -For this purpose there is an optional command line option `--check-level` -that can take a number of values, listed below from the lowest level of -strictness to the highest: +For this purpose there is an optional :ref:`configuration option +` ``check_level`` that can take a number of values, listed +below from the lowest level of strictness to the highest: - ``ignore``: all issues, regardless of severity, will be reported as warnings. Checker will never fail. Use this at your own risk. @@ -375,8 +375,8 @@ To allow ESMValCore to locate the data files, use the following steps: - If you want to use the ``native6`` project (recommended for datasets whose input files can be easily moved to the usual ``native6`` directory - structure given by the ``rootpath`` in your :ref:`user configuration - file`; this is usually the case for native reanalysis/observational + structure given by the :ref:`configuration option ` + ``rootpath``; this is usually the case for native reanalysis/observational datasets): The entry ``native6`` of ``config-developer.yml`` should be complemented @@ -399,8 +399,8 @@ To allow ESMValCore to locate the data files, use the following steps: To find your native data (e.g., called ``MYDATA``) that is for example located in ``{rootpath}/MYDATA/amip/run1/42-0/atm/run1_1979.nc`` - (``{rootpath}`` is ESMValTool's ``rootpath`` for the project ``native6`` - defined in your :ref:`user configuration file`), use the following dataset + (``{rootpath}`` is ESMValTool's ``rootpath`` :ref:`configuration option + ` for the project ``native6``), use the following dataset entry in your recipe .. code-block:: yaml @@ -408,8 +408,8 @@ To allow ESMValCore to locate the data files, use the following steps: datasets: - {project: native6, dataset: MYDATA, exp: amip, simulation: run1, version: 42-0, type: atm} - and make sure to use the following DRS for the project ``native6`` in your - :ref:`user configuration file`: + and make sure to use the following :ref:`configuration option + ` ``drs``: .. code-block:: yaml @@ -437,9 +437,8 @@ To allow ESMValCore to locate the data files, use the following steps: To find your ICON data that is for example located in files like ``{rootpath}/amip/amip_atm_2d_ml_20000101T000000Z.nc`` (``{rootpath}`` is - ESMValTool ``rootpath`` for the project ``ICON`` defined in your - :ref:`user configuration file`), use the following dataset entry in your - recipe: + ESMValCore's :ref:`configuration option ` ``rootpath`` for + the project ``ICON``), use the following dataset entry in your recipe: .. code-block:: yaml diff --git a/doc/quickstart/configure.rst b/doc/quickstart/configure.rst index 37e6efd230..c65fdbd1c5 100644 --- a/doc/quickstart/configure.rst +++ b/doc/quickstart/configure.rst @@ -1,203 +1,273 @@ .. _config: -******************* -Configuration files -******************* +************* +Configuration +************* + +.. _config_overview: Overview ======== -There are several configuration files in ESMValCore: +Similar to `Dask `__, +ESMValCore provides one single configuration object that consists of a single +nested dictionary for its configuration. -* ``config-user.yml``: sets a number of user-specific options like desired - graphical output format, root paths to data, etc.; -* ``config-developer.yml``: sets a number of standardized file-naming and paths - to data formatting; +.. note:: -and one configuration file which is distributed with ESMValTool: + In v2.12.0, a redesign process of ESMValTool/Core's configuration started. + Its main aim is to simplify the configuration by moving from many different + configuration files for individual components to one configuration object + that consists of a single nested dictionary (similar to `Dask's configuration + `__). + This change will not be implemented in one large pull request but rather in a + step-by-step procedure. + Thus, the configuration might appear inconsistent until this redesign is + finished. + A detailed plan for this new configuration is outlined in :issue:`2371`. -* ``config-references.yml``: stores information on diagnostic and recipe authors and - scientific journals references; -.. _user configuration file: +.. _config_for_cli: -User configuration file -======================= +Specify configuration for ``esmvaltool`` command line tool +========================================================== +When running recipes via the :ref:`command line `, configuration +options can be specified via YAML files and command line arguments. -The ``config-user.yml`` configuration file contains all the global level -information needed by ESMValCore. It can be reused as many times the user needs -to before changing any of the options stored in it. This file is essentially -the gateway between the user and the machine-specific instructions to -``esmvaltool``. By default, esmvaltool looks for it in the home directory, -inside the ``.esmvaltool`` folder. -Users can get a copy of this file with default values by running - -.. code-block:: bash +.. _config_yaml_files: - esmvaltool config get-config-user --path=${TARGET_FOLDER} +YAML files +---------- -If the option ``--path`` is omitted, the file will be created in -``${HOME}/.esmvaltool`` +:ref:`Configuration options ` can be specified via YAML files +(i.e., ``*.yaml`` and ``*.yml``). -The following shows the default settings from the ``config-user.yml`` file -with explanations in a commented line above each option. If only certain values -are allowed for an option, these are listed after ``---``. The option in square -brackets is the default value, i.e., the one that is used if this option is -omitted in the file. +A file could look like this (for example, located at +``~/.config/esmvaltool/config.yml``): .. code-block:: yaml - # Destination directory where all output will be written - # Includes log files and performance stats. output_dir: ~/esmvaltool_output + search_esgf: when_missing + download_dir: ~/downloaded_data + +These files can live in any of the following locations: + +1. The directory specified via the ``--config_dir`` command line argument. - # Auxiliary data directory - # Used by some recipes to look for additional datasets. - auxiliary_data_dir: ~/auxiliary_data - - # Automatic data download from ESGF --- [never]/when_missing/always - # Use automatic download of missing CMIP3, CMIP5, CMIP6, CORDEX, and obs4MIPs - # data from ESGF. ``never`` disables this feature, which is useful if you are - # working on a computer without an internet connection, or if you have limited - # disk space. ``when_missing`` enables the automatic download for files that - # are not available locally. ``always`` will always check ESGF for the latest - # version of a file, and will only use local files if they correspond to that - # latest version. - search_esgf: never - - # Directory for storing downloaded climate data - # Make sure to use a directory where you can store multiple GBs of data. Your - # home directory on a HPC is usually not suited for this purpose, so please - # change the default value in this case! - download_dir: ~/climate_data - - # Rootpaths to the data from different projects - # This default setting will work if files have been downloaded by ESMValTool - # via ``search_esgf``. Lists are also possible. For site-specific entries, - # see the default ``config-user.yml`` file that can be installed with the - # command ``esmvaltool config get_config_user``. For each project, this can - # be either a single path or a list of paths. Comment out these when using a - # site-specific path. - rootpath: - default: ~/climate_data - - # Directory structure for input data --- [default]/ESGF/BADC/DKRZ/ETHZ/etc. - # This default setting will work if files have been downloaded by ESMValTool - # via ``search_esgf``. See ``config-developer.yml`` for definitions. Comment - # out/replace as per needed. - drs: - CMIP3: ESGF - CMIP5: ESGF - CMIP6: ESGF - CORDEX: ESGF - obs4MIPs: ESGF - - # Run at most this many tasks in parallel --- [null]/1/2/3/4/... - # Set to ``null`` to use the number of available CPUs. If you run out of - # memory, try setting max_parallel_tasks to ``1`` and check the amount of - # memory you need for that by inspecting the file ``run/resource_usage.txt`` in - # the output directory. Using the number there you can increase the number of - # parallel tasks again to a reasonable number for the amount of memory - # available in your system. - max_parallel_tasks: null - - # Log level of the console --- debug/[info]/warning/error - # For much more information printed to screen set log_level to ``debug``. - log_level: info - - # Exit on warning --- true/[false] - # Only used in NCL diagnostic scripts. - exit_on_warning: false - - # Plot file format --- [png]/pdf/ps/eps/epsi - output_file_type: png - - # Remove the ``preproc`` directory if the run was successful --- [true]/false - # By default this option is set to ``true``, so all preprocessor output files - # will be removed after a successful run. Set to ``false`` if you need those files. - remove_preproc_dir: true - - # Use netCDF compression --- true/[false] - compress_netcdf: false - - # Save intermediary cubes in the preprocessor --- true/[false] - # Setting this to ``true`` will save the output cube from each preprocessing - # step. These files are numbered according to the preprocessing order. - save_intermediary_cubes: false - - # Use a profiling tool for the diagnostic run --- [false]/true - # A profiler tells you which functions in your code take most time to run. - # For this purpose we use ``vprof``, see below for notes. Only available for - # Python diagnostics. - profile_diagnostic: false - - # Path to custom ``config-developer.yml`` file - # This can be used to customise project configurations. See - # ``config-developer.yml`` for an example. Set to ``null`` to use the default. - config_developer_file: null - -The ``search_esgf`` setting can be used to disable or enable automatic -downloads from ESGF. -If ``search_esgf`` is set to ``never``, the tool does not download any data -from the ESGF. -If ``search_esgf`` is set to ``when_missing``, the tool will download any CMIP3, -CMIP5, CMIP6, CORDEX, and obs4MIPs data that is required to run a recipe but -not available locally and store it in ``download_dir`` using the ``ESGF`` -directory structure defined in the :ref:`config-developer`. -If ``search_esgf`` is set to ``always``, the tool will first check the ESGF for -the needed data, regardless of any local data availability; if the data found -on ESGF is newer than the local data (if any) or the user specifies a version -of the data that is available only from the ESGF, then that data will be -downloaded; otherwise, local data will be used. - -The ``auxiliary_data_dir`` setting is the path to place any required -additional auxiliary data files. This is necessary because certain -Python toolkits, such as cartopy, will attempt to download data files at run -time, typically geographic data files such as coastlines or land surface maps. -This can fail if the machine does not have access to the wider internet. This -location allows the user to specify where to find such files if they can not be -downloaded at runtime. The example user configuration file already contains two valid -locations for ``auxiliary_data_dir`` directories on CEDA-JASMIN and DKRZ, and a number -of such maps and shapefiles (used by current diagnostics) are already there. You will -need ``esmeval`` group workspace membership to access the JASMIN one (see -`instructions `_ -how to gain access to the group workspace. +2. The user configuration directory: by default ``~/.config/esmvaltool``, but + this can be changed with the ``ESMVALTOOL_CONFIG_DIR`` environment variable. + If ``~/.config/esmvaltool`` does not exist, this will be silently ignored. + +ESMValCore searches for all YAML files within each of these directories and +merges them together using :func:`dask.config.collect`. +This properly considers nested objects; see :func:`dask.config.update` for +details. +Preference follows the order in the list above (i.e., the directory specified +via command line argument is preferred over the user configuration directory). +Within a directory, files are sorted alphabetically, and later files (e.g., +``z.yml``) will take precedence over earlier files (e.g., ``a.yml``). .. warning:: - This setting is not for model or observational datasets, rather it is for - extra data files such as shapefiles or other data sources needed by the diagnostics. + ESMValCore will read **all** YAML files in these configuration directories. + Thus, other YAML files in this directory which are not valid configuration + files (like the old ``config-developer.yml`` files) will lead to errors. + Make sure to move these files to a different directory. -The ``profile_diagnostic`` setting triggers profiling of Python diagnostics, -this will tell you which functions in the diagnostic took most time to run. -For this purpose we use `vprof `_. -For each diagnostic script in the recipe, the profiler writes a ``.json`` file -that can be used to plot a -`flame graph `__ -of the profiling information by running +To get a copy of the default configuration file, you can run .. code-block:: bash - vprof --input-file esmvaltool_output/recipe_output/run/diagnostic/script/profile.json + esmvaltool config get_config_user --path=/target/file.yml -Note that it is also possible to use vprof to understand other resources used -while running the diagnostic, including execution time of different code blocks -and memory usage. +If the option ``--path`` is omitted, the file will be copied to +``~/.config/esmvaltool/config-user.yml``. -A detailed explanation of the data finding-related sections of the -``config-user.yml`` (``rootpath`` and ``drs``) is presented in the -:ref:`data-retrieval` section. This section relates directly to the data -finding capabilities of ESMValCore and are very important to be understood by -the user. -.. note:: +Command line arguments +---------------------- + +All :ref:`configuration options ` can also be given as command +line arguments to the ``esmvaltool`` executable. + +Example: + +.. code-block:: bash + + esmvaltool run --search_esgf=when_missing --max_parallel_tasks=2 /path/to/recipe.yml + +Options given via command line arguments will always take precedence over +options specified via YAML files. + + +.. _config_for_api: + +Specify/access configuration for Python API +=========================================== + +When running recipes with the :ref:`experimental Python API +`, configuration options can be specified and accessed via +the :py:data:`~esmvalcore.config.CFG` object. +For example: + +.. code-block:: python + + >>> from esmvalcore.config import CFG + >>> CFG['output_dir'] = '~/esmvaltool_output' + >>> CFG['output_dir'] + PosixPath('/home/user/esmvaltool_output') + +This will also consider YAML configuration files in the user configuration +directory (by default ``~/.config/esmvaltool``, but this can be changed with +the ``ESMVALTOOL_CONFIG_DIR`` environment variable). + +More information about this can be found :ref:`here `. + + +.. _config_options: + +Configuration options +===================== + +Note: the following entries use Python syntax. +For example, Python's ``None`` is YAML's ``null``, Python's ``True`` is YAML's +``true``, and Python's ``False`` is YAML's ``false``. + ++-------------------------------+----------------------------------------+-----------------------------+----------------------------------------+ +| Option | Description | Type | Default value | ++===============================+========================================+=============================+========================================+ +| ``auxiliary_data_dir`` | Directory where auxiliary data is | :obj:`str` | ``~/auxiliary_data`` | +| | stored [#f1]_ | | | ++-------------------------------+----------------------------------------+-----------------------------+----------------------------------------+ +| ``check_level`` | Sensitivity of the CMOR check | :obj:`str` | ``default`` | +| | (``debug``, ``strict``, ``default`` | | | +| | ``relaxed``, ``ignore``), see | | | +| | :ref:`cmor_check_strictness` | | | ++-------------------------------+----------------------------------------+-----------------------------+----------------------------------------+ +| ``compress_netcdf`` | Use netCDF compression | :obj:`bool` | ``False`` | ++-------------------------------+----------------------------------------+-----------------------------+----------------------------------------+ +| ``config_developer_file`` | Path to custom | :obj:`str` | ``None`` (default file) | +| | :ref:`config-developer` | | | ++-------------------------------+----------------------------------------+-----------------------------+----------------------------------------+ +| ``diagnostics`` | Only run the selected diagnostics from | :obj:`list` or :obj:`str` | ``None`` (all diagnostics) | +| | the recipe, see :ref:`running` | | | ++-------------------------------+----------------------------------------+-----------------------------+----------------------------------------+ +| ``download_dir`` | Directory where downloaded data will | :obj:`str` | ``~/climate_data`` | +| | be stored [#f4]_ | | | ++-------------------------------+----------------------------------------+-----------------------------+----------------------------------------+ +| ``drs`` | Directory structure for input data | :obj:`dict` | ``{CMIP3: ESGF, CMIP5: ESGF, CMIP6: | +| | [#f2]_ | | ESGF, CORDEX: ESGF, obs4MIPs: ESGF}`` | ++-------------------------------+----------------------------------------+-----------------------------+----------------------------------------+ +| ``exit_on_warning`` | Exit on warning (only used in NCL | :obj:`bool` | ``False`` | +| | diagnostic scripts) | | | ++-------------------------------+----------------------------------------+-----------------------------+----------------------------------------+ +| ``extra_facets_dir`` | Additional custom directory for | :obj:`list` of :obj:`str` | ``[]`` | +| | :ref:`extra_facets` | | | ++-------------------------------+----------------------------------------+-----------------------------+----------------------------------------+ +| ``log_level`` | Log level of the console (``debug``, | :obj:`str` | ``info`` | +| | ``info``, ``warning``, ``error``) | | | ++-------------------------------+----------------------------------------+-----------------------------+----------------------------------------+ +| ``max_datasets`` | Maximum number of datasets to use, see | :obj:`int` | ``None`` (all datasets from recipe) | +| | :ref:`running` | | | ++-------------------------------+----------------------------------------+-----------------------------+----------------------------------------+ +| ``max_parallel_tasks`` | Maximum number of parallel processes, | :obj:`int` | ``None`` (number of available CPUs) | +| | see also :ref:`task_priority` | | | ++-------------------------------+----------------------------------------+-----------------------------+----------------------------------------+ +| ``max_years`` | Maximum number of years to use, see | :obj:`int` | ``None`` (all years from recipe) | +| | :ref:`running` | | | ++-------------------------------+----------------------------------------+-----------------------------+----------------------------------------+ +| ``output_dir`` | Directory where all output will be | :obj:`str` | ``~/esmvaltool_output`` | +| | written, see :ref:`outputdata` | | | ++-------------------------------+----------------------------------------+-----------------------------+----------------------------------------+ +| ``output_file_type`` | Plot file type | :obj:`str` | ``png`` | ++-------------------------------+----------------------------------------+-----------------------------+----------------------------------------+ +| ``profile_diagnostic`` | Use a profiling tool for the | :obj:`bool` | ``False`` | +| | diagnostic run [#f3]_ | | | ++-------------------------------+----------------------------------------+-----------------------------+----------------------------------------+ +| ``remove_preproc_dir`` | Remove the ``preproc`` directory if | :obj:`bool` | ``True`` | +| | the run was successful, see also | | | +| | :ref:`preprocessed_datasets` | | | ++-------------------------------+----------------------------------------+-----------------------------+----------------------------------------+ +| ``resume_from`` | Resume previous run(s) by using | :obj:`list` of :obj:`str` | ``[]`` | +| | preprocessor output files from these | | | +| | output directories, see :ref:`running` | | | ++-------------------------------+----------------------------------------+-----------------------------+----------------------------------------+ +| ``rootpath`` | Rootpaths to the data from different | :obj:`dict` | ``{default: ~/climate_data}`` | +| | projects [#f2]_ | | | ++-------------------------------+----------------------------------------+-----------------------------+----------------------------------------+ +| ``run_diagnostic`` | Run diagnostic scripts, see | :obj:`bool` | ``True`` | +| | :ref:`running` | | | ++-------------------------------+----------------------------------------+-----------------------------+----------------------------------------+ +| ``save_intermediary_cubes`` | Save intermediary cubes from the | :obj:`bool` | ``False`` | +| | preprocessor, see also | | | +| | :ref:`preprocessed_datasets` | | | ++-------------------------------+----------------------------------------+-----------------------------+----------------------------------------+ +| ``search_esgf`` | Automatic data download from ESGF | :obj:`str` | ``never`` | +| | (``never``, ``when_missing``, | | | +| | ``always``) [#f4]_ | | | ++-------------------------------+----------------------------------------+-----------------------------+----------------------------------------+ +| ``skip_nonexistent`` | Skip non-existent datasets, see | :obj:`bool` | ``False`` | +| | :ref:`running` | | | ++-------------------------------+----------------------------------------+-----------------------------+----------------------------------------+ + +.. [#f1] The ``auxiliary_data_dir`` setting is the path to place any required + additional auxiliary data files. + This is necessary because certain Python toolkits, such as cartopy, will + attempt to download data files at run time, typically geographic data files + such as coastlines or land surface maps. + This can fail if the machine does not have access to the wider internet. + This location allows the user to specify where to find such files if they + can not be downloaded at runtime. + The example configuration file already contains two valid locations for + ``auxiliary_data_dir`` directories on CEDA-JASMIN and DKRZ, and a number of + such maps and shapefiles (used by current diagnostics) are already there. + You will need ``esmeval`` group workspace membership to access the JASMIN + one (see `instructions + `_ + how to gain access to the group workspace. + + .. warning:: + + This setting is not for model or observational datasets, rather it is + for extra data files such as shapefiles or other data sources needed by + the diagnostics. +.. [#f2] A detailed explanation of the data finding-related options ``drs`` + and ``rootpath`` is presented in the :ref:`data-retrieval` section. + These sections relate directly to the data finding capabilities of + ESMValCore and are very important to be understood by the user. +.. [#f3] The ``profile_diagnostic`` setting triggers profiling of Python + diagnostics, this will tell you which functions in the diagnostic took most + time to run. + For this purpose we use `vprof `_. + For each diagnostic script in the recipe, the profiler writes a ``.json`` + file that can be used to plot a `flame graph + `__ of the profiling + information by running + + .. code-block:: bash + + vprof --input-file esmvaltool_output/recipe_output/run/diagnostic/script/profile.json + + Note that it is also possible to use vprof to understand other resources + used while running the diagnostic, including execution time of different + code blocks and memory usage. +.. [#f4] The ``search_esgf`` setting can be used to disable or enable automatic + downloads from ESGF. + If ``search_esgf`` is set to ``never``, the tool does not download any data + from the ESGF. + If ``search_esgf`` is set to ``when_missing``, the tool will download any + CMIP3, CMIP5, CMIP6, CORDEX, and obs4MIPs data that is required to run a + recipe but not available locally and store it in ``download_dir`` using the + ``ESGF`` directory structure defined in the :ref:`config-developer`. + If ``search_esgf`` is set to ``always``, the tool will first check the ESGF + for the needed data, regardless of any local data availability; if the data + found on ESGF is newer than the local data (if any) or the user specifies a + version of the data that is available only from the ESGF, then that data + will be downloaded; otherwise, local data will be used. - You can choose your ``config-user.yml`` file at run time, so you could have several of - them available with different purposes. One for a formalised run, another for - debugging, etc. You can even provide any config user value as a run flag - ``--argument_name argument_value`` .. _config-dask: @@ -397,7 +467,7 @@ Configuring Dask for debugging For debugging purposes, it can be useful to disable all parallelism, as this will often result in more clear error messages. This can be achieved by -settings ``max_parallel_tasks: 1`` in config-user.yml, +setting ``max_parallel_tasks: 1`` in the configuration, commenting out or removing all content of ``~/.esmvaltool/dask.yml``, and creating a file called ``~/.config/dask/dask.yml`` with the following content: @@ -419,12 +489,10 @@ ESGF configuration The ``esmvaltool run`` command can automatically download the files required to run a recipe from ESGF for the projects CMIP3, CMIP5, CMIP6, CORDEX, and obs4MIPs. -The downloaded files will be stored in the ``download_dir`` specified in the -:ref:`user configuration file`. -To enable automatic downloads from ESGF, set ``search_esgf: when_missing`` or -``search_esgf: always`` in the :ref:`user configuration file`, or provide the -corresponding command line arguments ``--search_esgf=when_missing`` or -``--search_esgf=always`` when running the recipe. +The downloaded files will be stored in the directory specified via the +:ref:`configuration option ` ``download_dir``. +To enable automatic downloads from ESGF, use the :ref:`configuration options +` ``search_esgf: when_missing`` or ``search_esgf: always``. .. note:: @@ -534,22 +602,27 @@ out by CMOR and DRS. For a detailed description of these standards and their adoption in ESMValCore, we refer the user to :ref:`CMOR-DRS` section where we relate these standards to the data retrieval mechanism of the ESMValCore. -By default, esmvaltool looks for it in the home directory, -inside the '.esmvaltool' folder. - Users can get a copy of this file with default values by running .. code-block:: bash - esmvaltool config get-config-developer --path=${TARGET_FOLDER} + esmvaltool config get_config_developer --path=${TARGET_FOLDER} If the option ``--path`` is omitted, the file will be created in -```${HOME}/.esmvaltool``. +``~/.esmvaltool``. .. note:: - Remember to change your config-user file if you want to use a custom - config-developer. + Remember to change the configuration option ``config_developer_file`` if you + want to use a custom config developer file. + +.. warning:: + + For now, make sure that the custom ``config-developer.yml`` is **not** saved + in the ESMValTool/Core configuration directories (see + :ref:`config_yaml_files` for details). + This will change in the future due to the :ref:`redesign of ESMValTool/Core's + configuration `. Example of the CMIP6 project configuration: @@ -894,16 +967,15 @@ to support a particular use-case within the ESMValCore project, they will be provided in the sub-folder `extra_facets` inside the package :mod:`esmvalcore.config`. If they are used from the user side, they can be either placed in `~/.esmvaltool/extra_facets` or in any other directory of the users -choosing. In that case this directory must be added to the `config-user.yml` -file under the `extra_facets_dir` setting, which can take a single directory or -a list of directories. +choosing. In that case, the configuration option ``extra_facets_dir`` must be +set, which can take a single directory or a list of directories. The order in which the directories are searched is 1. The internal directory `esmvalcore.config/extra_facets` 2. The default user directory `~/.esmvaltool/extra_facets` -3. The custom user directories in the order in which they are given in - `config-user.yml`. +3. The custom user directories given by the configuration option + ``extra_facets_dir`` The extra facets files within each of these directories are processed in lexicographical order according to their file name. diff --git a/doc/quickstart/find_data.rst b/doc/quickstart/find_data.rst index e9077884f2..b7708fd95f 100644 --- a/doc/quickstart/find_data.rst +++ b/doc/quickstart/find_data.rst @@ -8,7 +8,7 @@ Overview ======== Data discovery and retrieval is the first step in any evaluation process; ESMValCore uses a `semi-automated` data finding mechanism with inputs from both -the user configuration file and the recipe file: this means that the user will +the configuration and the recipe file: this means that the user will have to provide the tool with a set of parameters related to the data needed and once these parameters have been provided, the tool will automatically find the right data. We will detail below the data finding and retrieval process and @@ -105,8 +105,8 @@ Supported native reanalysis/observational datasets The following native reanalysis/observational datasets are supported under the ``native6`` project. To use these datasets, put the files containing the data in the directory that -you have configured for the ``native6`` project in your :ref:`user -configuration file`, in a subdirectory called +you have :ref:`configured ` for the ``rootpath`` of the +``native6`` project, in a subdirectory called ``Tier{tier}/{dataset}/{version}/{frequency}/{short_name}``. Replace the items in curly braces by the values used in the variable/dataset definition in the :ref:`recipe `. @@ -183,7 +183,7 @@ The default naming conventions for input directories and files for CESM are * input files: ``{case}.{scomp}.{type}.{string}*nc`` as configured in the :ref:`config-developer file ` (using the -default DRS ``drs: default`` in the :ref:`user configuration file`). +:ref:`configuration option ` ``drs: default``). More information about CESM naming conventions are given `here `__. @@ -262,7 +262,7 @@ The default naming conventions for input directories and files for EMAC are * input files: ``{exp}*{channel}{postproc_flag}.nc`` as configured in the :ref:`config-developer file ` (using the -default DRS ``drs: default`` in the :ref:`user configuration file`). +:ref:`configuration option ` ``drs: default``). Thus, example dataset entries could look like this: @@ -335,7 +335,7 @@ The default naming conventions for input directories and files for ICON are * input files: ``{exp}_{var_type}*.nc`` as configured in the :ref:`config-developer file ` (using the -default DRS ``drs: default`` in the :ref:`user configuration file`). +:ref:`configuration option ` ``drs: default``). Thus, example dataset entries could look like this: @@ -383,11 +383,10 @@ is always disabled. Usually, ESMValCore will need the corresponding ICON grid file of your simulation to work properly (examples: setting latitude/longitude coordinates if these are not yet present, UGRIDization [see below], etc.). -This grid file can either be specified as absolute or relative (to -``auxiliary_data_dir`` as defined in the :ref:`user configuration file`) path -with the facet ``horizontal_grid`` in the recipe or the extra facets (see -below), or retrieved automatically from the `grid_file_uri` attribute of the -input files. +This grid file can either be specified as absolute or relative (to the +:ref:`configuration option ` ``auxiliary_data_dir``) path with +the facet ``horizontal_grid`` in the recipe or the extra facets (see below), or +retrieved automatically from the `grid_file_uri` attribute of the input files. In the latter case, ESMValCore first searches the input directories specified for ICON for a grid file with that name, and if that was not successful, tries to download the file and cache it. @@ -417,8 +416,8 @@ If neither of these variables are available in the input files, it is possible to specify the location of files that include the corresponding `zg` or `zghalf` variables with the facets ``zg_file`` and/or ``zghalf_file`` in the recipe or the extra facets. -The paths to these files can be specified absolute or relative (to -``auxiliary_data_dir`` as defined in the :ref:`user configuration file`). +The paths to these files can be specified absolute or relative (to the +:ref:`configuration option ` ``auxiliary_data_dir``). .. hint:: @@ -453,10 +452,8 @@ Supported keys for extra facets are: Key Description Default value if not specified =================== ================================ =================================== ``horizontal_grid`` Absolute or relative (to If not given, use file attribute - ``auxiliary_data_dir`` defined ``grid_file_uri`` to retrieve ICON - in the grid file (see details above) - :ref:`user configuration file`) - path to the ICON grid file + ``auxiliary_data_dir``) ``grid_file_uri`` to retrieve ICON + path to the ICON grid file grid file (see details above) ``latitude`` Standard name of the latitude ``latitude`` coordinate in the raw input file @@ -479,17 +476,13 @@ Key Description Default value if not specif variable in the raw input in extra facets or recipe if file default DRS is used) ``zg_file`` Absolute or relative (to If possible, use `zg` variable - ``auxiliary_data_dir`` defined provided by the raw input file - in the - :ref:`user configuration file`) - path to the input file that - contains `zg` + ``auxiliary_data_dir``) path to provided by the raw input file + the the input file that contains + `zg` ``zghalf_file`` Absolute or relative (to If possible, use `zghalf` variable - ``auxiliary_data_dir`` defined provided by the raw input file - in the - :ref:`user configuration file`) - path to the input file that - contains `zghalf` + ``auxiliary_data_dir``) path to provided by the raw input file + the the input file that contains + `zghalf` =================== ================================ =================================== .. hint:: @@ -630,20 +623,18 @@ retrieval parameters is explained below. Enabling automatic downloads from the ESGF ------------------------------------------ -To enable automatic downloads from ESGF, set ``search_esgf: when_missing`` (use -local files whenever possible) or ``search_esgf: always`` (always search ESGF -for latest version of files and only use local data if it is the latest -version) in the :ref:`user configuration file`, or provide the corresponding -command line arguments ``--search_esgf=when_missing`` or -``--search_esgf=always`` when running the recipe. -The files will be stored in the ``download_dir`` set in -the :ref:`user configuration file`. +To enable automatic downloads from ESGF, use the :ref:`configuration option +` ``search_esgf: when_missing`` (use local files +whenever possible) or ``search_esgf: always`` (always search ESGF for latest +version of files and only use local data if it is the latest version). +The files will be stored in the directory specified via the :ref:`configuration +option ` ``download_dir``. Setting the correct root paths ------------------------------ The first step towards providing ESMValCore the correct set of parameters for -data retrieval is setting the root paths to the data. This is done in the user -configuration file ``config-user.yml``. The two sections where the user will +data retrieval is setting the root paths to the data. This is done in the +configuration. The two sections where the user will set the paths are ``rootpath`` and ``drs``. ``rootpath`` contains pointers to ``CMIP``, ``OBS``, ``default`` and ``RAWOBS`` root paths; ``drs`` sets the type of directory structure the root paths are structured by. It is important to @@ -651,10 +642,8 @@ first discuss the ``drs`` parameter: as we've seen in the previous section, the DRS as a standard is used for both file naming conventions and for directory structures. -.. _config-user-drs: - -Explaining ``config-user/drs: CMIP5:`` or ``config-user/drs: CMIP6:`` ---------------------------------------------------------------------- +Explaining ``drs: CMIP5:`` or ``drs: CMIP6:`` +--------------------------------------------- Whereas ESMValCore will by default use the CMOR standard for file naming (please refer above), by setting the ``drs`` parameter the user tells the tool what type of root paths they need the data from, e.g.: @@ -697,10 +686,10 @@ The names of the directories trees that can be used under `drs` are defined in versions of the same file because the files typically have the same name for different versions. -.. _config-user-rootpath: +.. _config_option_rootpath: -Explaining ``config-user/rootpath:`` ------------------------------------- +Explaining ``rootpath:`` +------------------------ ``rootpath`` identifies the root directory for different data types (``ROOT`` as we used it above): @@ -786,7 +775,7 @@ The data finding feature will use this information to find data for **all** the Recap and example ================= Let us look at a practical example for a recap of the information above: -suppose you are using a ``config-user.yml`` that has the following entries for +suppose you are using configuration that has the following entries for data finding: .. code-block:: yaml diff --git a/doc/quickstart/install.rst b/doc/quickstart/install.rst index 0a821a0df9..c190f35e1e 100644 --- a/doc/quickstart/install.rst +++ b/doc/quickstart/install.rst @@ -103,10 +103,10 @@ For example, the following command would run a recipe .. code-block:: bash - docker run -e HOME -v "$HOME":"$HOME" -v /data:/data esmvalgroup/esmvalcore:stable -c ~/config-user.yml ~/recipes/recipe_example.yml + docker run -e HOME -v "$HOME":"$HOME" -v /data:/data esmvalgroup/esmvalcore:stable ~/recipes/recipe_example.yml with the environmental variable ``$HOME`` available inside the container and the data -in the directories ``$HOME`` and ``/data``, so these can be used to find the configuration file, recipe, and data. +in the directories ``$HOME`` and ``/data``, so these can be used to find the configuration, recipe, and data. It might be useful to define a `bash alias `_ @@ -131,7 +131,7 @@ following command .. code-block:: bash - singularity run docker://esmvalgroup/esmvalcore:stable -c ~/config-user.yml ~/recipes/recipe_example.yml + singularity run docker://esmvalgroup/esmvalcore:stable ~/recipes/recipe_example.yml Note that the container does not see the data available in the host by default. You can make host data available with ``-B /path:/path/in/container``. @@ -158,7 +158,7 @@ To run the container using the image file ``esmvalcore.sif`` use: .. code-block:: bash - singularity run esmvalcore.sif -c ~/config-user.yml ~/recipes/recipe_example.yml + singularity run esmvalcore.sif ~/recipes/recipe_example.yml .. _installation-from-source: diff --git a/doc/quickstart/output.rst b/doc/quickstart/output.rst index c30e59c046..2698456c6b 100644 --- a/doc/quickstart/output.rst +++ b/doc/quickstart/output.rst @@ -3,9 +3,10 @@ Output ****** -ESMValTool automatically generates a new output directory with every run. The -location is determined by the output_dir option in the config-user.yml file, -the recipe name, and the date and time, using the the format: ``YYYYMMDD_HHMMSS``. +ESMValTool automatically generates a new output directory with every run. +The location is determined by the ``output_dir`` :ref:`configuration option +`, the recipe name, and the date and time, using the the +format: ``YYYYMMDD_HHMMSS``. For instance, a typical output location would be: ``output_directory/recipe_ocean_amoc_20190118_1027/`` @@ -27,6 +28,8 @@ A summary of the output is produced in the file: ``index.html`` +.. _preprocessed_datasets: + Preprocessed datasets ===================== @@ -34,13 +37,13 @@ The preprocessed datasets will be stored to the preproc/ directory. Each variable in each diagnostic will have its own the `metadata.yml`_ interface files saved in the preproc directory. -If the option ``save_intermediary_cubes`` is set to ``true`` in the -config-user.yml file, then the intermediary cubes will also be saved here. -This option is set to false in the default ``config-user.yml`` file. +If the :ref:`configuration option ` ``save_intermediary_cubes`` +is set to ``true``, then the intermediary cubes will also be saved here +(default: ``false``). -If the option ``remove_preproc_dir`` is set to ``true`` in the config-user.yml -file, then the preproc directory will be deleted after the run completes. This -option is set to true in the default ``config-user.yml`` file. +If the :ref:`configuration option ` ``remove_preproc_dir`` is +set to ``true``, then the preproc directory will be deleted after the run +completes (default: ``true``). Run @@ -70,9 +73,9 @@ the results should be saved to the work directory. Plots ===== -The plots directory is where diagnostics save their output figures. These -plots are saved in the format requested by the option `output_file_type` in the -config-user.yml file. +The plots directory is where diagnostics save their output figures. These +plots are saved in the format requested by the :ref:`configuration option +` ``output_file_type``. Settings.yml @@ -82,10 +85,10 @@ The settings.yml file is automatically generated by ESMValTool. Each diagnostic will produce a unique settings.yml file. The settings.yml file passes several global level keys to diagnostic scripts. -This includes several flags from the config-user.yml file (such as -'log_level'), several paths which are specific to the -diagnostic being run (such as 'plot_dir' and 'run_dir') and the location on -disk of the metadata.yml file (described below). +This includes several flags from the global configuration (such as +``log_level``), several paths which are specific to the +diagnostic being run (such as ``plot_dir`` and ``run_dir``) and the location on +disk of the ``metadata.yml`` file (described below). .. code-block:: yaml @@ -113,7 +116,7 @@ The metadata.yml files is automatically generated by ESMValTool. Along with the settings.yml file, it passes all the paths, boolean flags, and additional arguments that your diagnostic needs to know in order to run. -The metadata is loaded from cfg as a dictionairy object in python diagnostics. +The metadata is loaded from cfg as a dictionary object in python diagnostics. Here is an example metadata.yml file: diff --git a/doc/quickstart/run.rst b/doc/quickstart/run.rst index fec474f290..61709bc778 100644 --- a/doc/quickstart/run.rst +++ b/doc/quickstart/run.rst @@ -46,24 +46,27 @@ and run that. To work with installed recipes, the ESMValTool package provides the ``esmvaltool recipes`` command, see :ref:`esmvaltool:recipes_command`. -If the configuration file is not in the default location -``~/.esmvaltool/config-user.yml``, you can pass its path explicitly: +By default, ESMValTool searches for :ref:`configuration files +` in ``~/.config/esmvaltool``. +If you'd like to use a custom location, you can specify this via the +``--config_dir`` command line argument: .. code:: bash - esmvaltool run --config_file /path/to/config-user.yml recipe_example.yml + esmvaltool run --config_dir /path/to/custom_config recipe_example.yml -It is also possible to explicitly change values from the config file using flags: +It is also possible to explicitly set configuration options with command line +arguments: .. code:: bash esmvaltool run --argument_name argument_value recipe_example.yml -To automatically download the files required to run a recipe from ESGF, set -``search_esgf`` to ``when_missing`` (use local files whenever possible) or -``always`` (always search ESGF for latest version of files and only use local -data if it is the latest version) in the :ref:`user configuration file` or run -the tool with the corresponding commands +To automatically download the files required to run a recipe from ESGF, use the +:ref:`configuration option ` ``search_esgf=when_missing`` (use +local files whenever possible) or ``search_esgf=always`` (always search ESGF +for latest version of files and only use local data if it is the latest +version): .. code:: bash @@ -123,7 +126,7 @@ To run only the preprocessor tasks from a recipe, use .. note:: Only preprocessing :ref:`tasks ` that completed successfully - can be re-used with the ``--resume_from`` option. + can be reused with the ``--resume_from`` option. Preprocessing tasks that completed successfully, contain a file called :ref:`metadata.yml ` in their output directory. diff --git a/doc/recipe/overview.rst b/doc/recipe/overview.rst index e0d63dc06b..dd0f5f643c 100644 --- a/doc/recipe/overview.rst +++ b/doc/recipe/overview.rst @@ -3,8 +3,8 @@ Overview ******** -After ``config-user.yml``, the ``recipe.yml`` is the second file the user needs -to pass to ``esmvaltool`` as command line option, at each run time point. +The recipe is the main control file of ESMValTool. +It is the only required argument for the ``esmvaltool`` command line program. Recipes contain the data and data analysis information and instructions needed to run the diagnostic(s), as well as specific diagnostic-related instructions. @@ -130,9 +130,9 @@ See :ref:`CMOR-DRS` for more information on this kind of file organization. When (some) files are available locally, the tool will not automatically look for more files on ESGF. -To populate a recipe with all available datasets from ESGF, ``search_esgf`` -should be set to ``always`` in the :ref:`user configuration file`. +To populate a recipe with all available datasets from ESGF, the +:ref:`configuration option ` ``search_esgf`` should be set to +``always``. For more control over which datasets are selected, it is recommended to use a Python script or `Jupyter notebook `_ to compose @@ -544,11 +544,14 @@ script will receive the preprocessed air temperature data script will receive the results of diagnostic_a.py and the preprocessed precipitation data (has ancestors ``diagnostic_1/script_a`` and ``diagnostic_2/precip``). +.. _task_priority: + Task priority ------------- Tasks are assigned a priority, with tasks appearing earlier on in the recipe getting higher priority. The tasks will be executed sequentially or in parallel, -depending on the setting of ``max_parallel_tasks`` in the :ref:`user configuration file`. +depending on the :ref:`configuration option ` +``max_parallel_tasks``. When there are fewer than ``max_parallel_tasks`` running, tasks will be started according to their priority. For obvious reasons, only tasks that are not waiting for ancestor tasks can be started. This feature makes it possible to diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst index ddd9d2b472..a02bb4a566 100644 --- a/doc/recipe/preprocessor.rst +++ b/doc/recipe/preprocessor.rst @@ -611,7 +611,7 @@ See also :func:`esmvalcore.preprocessor.weighting_landsea_fraction`. .. _masking: Masking -======== +======= Introduction to masking ----------------------- @@ -1918,9 +1918,10 @@ Parameters: region to be extracted. If the file contains multiple shapes behaviour depends on the ``decomposed`` parameter. - This path can be relative to ``auxiliary_data_dir`` defined in the - :ref:`user configuration file` or relative to - ``esmvalcore/preprocessor/shapefiles`` (in that priority order). + This path can be relative to the directory specified via the + :ref:`configuration option ` ``auxiliary_data_dir`` or + relative to ``esmvalcore/preprocessor/shapefiles`` (in that priority + order). Alternatively, a string (see "Shapefile name" below) can be given to load one of the following shapefiles that are shipped with ESMValCore: @@ -2422,7 +2423,7 @@ See also :func:`esmvalcore.preprocessor.linear_trend_stderr`. .. _detrend: Detrend -======== +======= ESMValCore also supports detrending along any dimension using the preprocessor function 'detrend'. diff --git a/esmvalcore/_main.py b/esmvalcore/_main.py index 32b692e070..d0bd6fcf10 100755 --- a/esmvalcore/_main.py +++ b/esmvalcore/_main.py @@ -28,10 +28,13 @@ """ # pylint: disable=import-outside-toplevel +from __future__ import annotations + import logging import os import sys from pathlib import Path +from typing import Optional if (sys.version_info.major, sys.version_info.minor) < (3, 10): from importlib_metadata import entry_points @@ -119,7 +122,7 @@ def process_recipe(recipe_file: Path, session): ) logger.info( "If you experience memory problems, try reducing " - "'max_parallel_tasks' in your user configuration file." + "'max_parallel_tasks' in your configuration." ) check_distributed_config() @@ -159,64 +162,94 @@ class Config: """ @staticmethod - def _copy_config_file(filename, overwrite, path): + def _copy_config_file( + in_file: Path, + out_file: Path, + overwrite: bool, + ): + """Copy a configuration file.""" import shutil from .config._logging import configure_logging configure_logging(console_log_level="info") - if not path: - path = os.path.join(os.path.expanduser("~/.esmvaltool"), filename) - if os.path.isfile(path): + + if out_file.is_file(): if overwrite: - logger.info("Overwriting file %s.", path) + logger.info("Overwriting file %s.", out_file) else: - logger.info("Copy aborted. File %s already exists.", path) + logger.info("Copy aborted. File %s already exists.", out_file) return - target_folder = os.path.dirname(path) - if not os.path.isdir(target_folder): + target_folder = out_file.parent + if not target_folder.is_dir(): logger.info("Creating folder %s", target_folder) - os.makedirs(target_folder) + target_folder.mkdir(parents=True, exist_ok=True) - conf_file = os.path.join(os.path.dirname(__file__), filename) - logger.info("Copying file %s to path %s.", conf_file, path) - shutil.copy2(conf_file, path) + logger.info("Copying file %s to path %s.", in_file, out_file) + shutil.copy2(in_file, out_file) logger.info("Copy finished.") @classmethod - def get_config_user(cls, overwrite=False, path=None): - """Copy default config-user.yml file to a given path. + def get_config_user( + cls, + overwrite: bool = False, + path: Optional[str | Path] = None, + ) -> None: + """Copy default configuration to a given path. - Copy default config-user.yml file to a given path or, if a path is - not provided, install it in the default `${HOME}/.esmvaltool` folder. + Copy default configuration to a given path or, if a `path` is not + provided, install it in the default `~/.config/esmvaltool/` directory. Parameters ---------- - overwrite: boolean + overwrite: Overwrite an existing file. - path: str + path: If not provided, the file will be copied to - .esmvaltool in the user's home. + `~/.config/esmvaltool/`. + """ - cls._copy_config_file("config-user.yml", overwrite, path) + from .config._config_object import DEFAULT_CONFIG_DIR + + in_file = DEFAULT_CONFIG_DIR / "config-user.yml" + if path is None: + out_file = ( + Path.home() / ".config" / "esmvaltool" / "config-user.yml" + ) + else: + out_file = Path(path) + if not out_file.suffix: # out_file looks like a directory + out_file = out_file / "config-user.yml" + cls._copy_config_file(in_file, out_file, overwrite) @classmethod - def get_config_developer(cls, overwrite=False, path=None): + def get_config_developer( + cls, + overwrite: bool = False, + path: Optional[str | Path] = None, + ) -> None: """Copy default config-developer.yml file to a given path. Copy default config-developer.yml file to a given path or, if a path is - not provided, install it in the default `${HOME}/.esmvaltool` folder. + not provided, install it in the default `~/.esmvaltool` folder. Parameters ---------- overwrite: boolean Overwrite an existing file. path: str - If not provided, the file will be copied to - .esmvaltool in the user's home. + If not provided, the file will be copied to `~/.esmvaltool`. + """ - cls._copy_config_file("config-developer.yml", overwrite, path) + in_file = Path(__file__).parent / "config-developer.yml" + if path is None: + out_file = Path.home() / ".esmvaltool" / "config-developer.yml" + else: + out_file = Path(path) + if not out_file.suffix: # out_file looks like a directory + out_file = out_file / "config-developer.yml" + cls._copy_config_file(in_file, out_file, overwrite) class Recipes: @@ -358,91 +391,75 @@ def version(self): for project, version in self._extra_packages.items(): print(f"{project}: {version}") - def run( - self, - recipe, - config_file=None, - resume_from=None, - max_datasets=None, - max_years=None, - skip_nonexistent=None, - search_esgf=None, - diagnostics=None, - check_level=None, - **kwargs, - ): + def run(self, recipe, **kwargs): """Execute an ESMValTool recipe. `esmvaltool run` executes the given recipe. To see a list of available recipes or create a local copy of any of them, use the `esmvaltool recipes` command group. - Parameters - ---------- - recipe : str - Recipe to run, as either the name of an installed recipe or the - path to a non-installed one. - config_file: str, optional - Configuration file to use. Can be given as absolute or relative - path. In the latter case, search in the current working directory - and `${HOME}/.esmvaltool` (in that order). If not provided, the - file `${HOME}/.esmvaltool/config-user.yml` will be used. - resume_from: list(str), optional - Resume one or more previous runs by using preprocessor output files - from these output directories. - max_datasets: int, optional - Maximum number of datasets to use. - max_years: int, optional - Maximum number of years to use. - skip_nonexistent: bool, optional - If True, the run will not fail if some datasets are not available. - search_esgf: str, optional - If `never`, disable automatic download of data from the ESGF. If - `when_missing`, enable the automatic download of files that are not - available locally. If `always`, always check ESGF for the latest - version of a file, and only use local files if they correspond to - that latest version. - diagnostics: list(str), optional - Only run the selected diagnostics from the recipe. To provide more - than one diagnostic to filter use the syntax 'diag1 diag2/script1' - or '("diag1", "diag2/script1")' and pay attention to the quotes. - check_level: str, optional - Configure the sensitivity of the CMOR check. Possible values are: - `ignore` (all errors will be reported as warnings), - `relaxed` (only fail if there are critical errors), - default (fail if there are any errors), - strict (fail if there are any warnings). + A list of possible flags is given here: + https://docs.esmvaltool.org/projects/ESMValCore/en/latest/quickstart/configure.html#configuration-options + """ from .config import CFG + from .config._config_object import _get_all_config_dirs + from .exceptions import InvalidConfigParameter + + cli_config_dir = kwargs.pop("config_dir", None) + if cli_config_dir is not None: + cli_config_dir = Path(cli_config_dir).expanduser().absolute() + if not cli_config_dir.is_dir(): + raise NotADirectoryError( + f"Invalid --config_dir given: {cli_config_dir} is not an " + f"existing directory" + ) + # TODO: remove in v2.14.0 # At this point, --config_file is already parsed if a valid file has # been given (see # https://github.com/ESMValGroup/ESMValCore/issues/2280), but no error # has been raised if the file does not exist. Thus, reload the file # here with `load_from_file` to make sure a proper error is raised. - CFG.load_from_file(config_file) + if "config_file" in kwargs: + cli_config_dir = kwargs["config_file"] + CFG.load_from_file(kwargs["config_file"]) + + # New in v2.12.0: read additional configuration directory given by CLI + # argument + if CFG.get("config_file") is None: # remove in v2.14.0 + config_dirs = _get_all_config_dirs(cli_config_dir) + try: + CFG.load_from_dirs(config_dirs) + + # Potential errors must come from --config_dir (i.e., + # cli_config_dir) since other sources have already been read (and + # validated) when importing the module with `from .config import + # CFG` + except InvalidConfigParameter as exc: + raise InvalidConfigParameter( + f"Failed to parse configuration directory " + f"{cli_config_dir} (command line argument): " + f"{str(exc)}" + ) from exc recipe = self._get_recipe(recipe) session = CFG.start_session(recipe.stem) - if check_level is not None: - session["check_level"] = check_level - if diagnostics is not None: - session["diagnostics"] = diagnostics - if max_datasets is not None: - session["max_datasets"] = max_datasets - if max_years is not None: - session["max_years"] = max_years - if search_esgf is not None: - session["search_esgf"] = search_esgf - if skip_nonexistent is not None: - session["skip_nonexistent"] = skip_nonexistent - session["resume_from"] = parse_resume(resume_from, recipe) session.update(kwargs) + session["resume_from"] = parse_resume(session["resume_from"], recipe) + + self._run(recipe, session, cli_config_dir) + + # Print warnings about deprecated configuration options again + # TODO: remove in v2.14.0 + if CFG.get("config_file") is not None: + CFG.reload() - self._run(recipe, session) - # Print warnings about deprecated configuration options again: - CFG.reload() + # New in v2.12.0 + else: + config_dirs = _get_all_config_dirs(cli_config_dir) # remove v2.14 + CFG.load_from_dirs(config_dirs) @staticmethod def _create_session_dir(session): @@ -464,7 +481,12 @@ def _create_session_dir(session): " unable to find alternative, aborting to prevent data loss." ) - def _run(self, recipe: Path, session) -> None: + def _run( + self, + recipe: Path, + session, + cli_config_dir: Optional[Path], + ) -> None: """Run `recipe` using `session`.""" self._create_session_dir(session) session.run_dir.mkdir() @@ -475,7 +497,7 @@ def _run(self, recipe: Path, session) -> None: log_files = configure_logging( output_dir=session.run_dir, console_log_level=session["log_level"] ) - self._log_header(session["config_file"], log_files) + self._log_header(log_files, cli_config_dir) # configure resource logger and run program from ._task import resource_usage_logger @@ -509,7 +531,7 @@ def _clean_preproc(session): logger.debug( "If this data is further needed, then set " "`save_intermediary_cubes` to `true` and `remove_preproc_dir` " - "to `false` in your user configuration file" + "to `false` in your configuration" ) shutil.rmtree(session._fixed_file_dir) @@ -519,8 +541,7 @@ def _clean_preproc(session): ) logger.info( "If this data is further needed, then set " - "`remove_preproc_dir` to `false` in your user configuration " - "file" + "`remove_preproc_dir` to `false` in your configuration" ) shutil.rmtree(session.preproc_dir) @@ -535,7 +556,41 @@ def _get_recipe(recipe) -> Path: recipe = Path(os.path.expandvars(recipe)).expanduser().absolute() return recipe - def _log_header(self, config_file, log_files): + @staticmethod + def _get_config_info(cli_config_dir): + """Get information about config files for logging.""" + from .config import CFG + from .config._config_object import ( + DEFAULT_CONFIG_DIR, + _get_all_config_dirs, + _get_all_config_sources, + ) + + # TODO: remove in v2.14.0 + if CFG.get("config_file") is not None: + config_info = [ + (DEFAULT_CONFIG_DIR, "defaults"), + (CFG["config_file"], "single configuration file [deprecated]"), + ] + + # New in v2.12.0 + else: + config_dirs = [] + for path in _get_all_config_dirs(cli_config_dir): + if not path.is_dir(): + config_dirs.append(f"{path} [NOT AN EXISTING DIRECTORY]") + else: + config_dirs.append(str(path)) + config_info = list( + zip( + config_dirs, + _get_all_config_sources(cli_config_dir), + ) + ) + + return "\n".join(f"{i[0]} ({i[1]})" for i in config_info) + + def _log_header(self, log_files, cli_config_dir): from . import __version__ logger.info(HEADER) @@ -545,7 +600,10 @@ def _log_header(self, config_file, log_files): for project, version in self._extra_packages.items(): logger.info("%s: %s", project, version) logger.info("----------------") - logger.info("Using config file %s", config_file) + logger.info( + "Reading configuration files from:\n%s", + self._get_config_info(cli_config_dir), + ) logger.info("Writing program log files to:\n%s", "\n".join(log_files)) diff --git a/esmvalcore/_recipe/recipe.py b/esmvalcore/_recipe/recipe.py index 06bb2fd1a4..41002bbc1b 100644 --- a/esmvalcore/_recipe/recipe.py +++ b/esmvalcore/_recipe/recipe.py @@ -785,23 +785,21 @@ def _log_recipe_errors(self, exc): isinstance(err, InputFilesNotFound) for err in exc.failed_tasks ): logger.error( - "Not all input files required to run the recipe could be" - " found." + "Not all input files required to run the recipe could be " + "found." ) logger.error( - "If the files are available locally, please check" - " your `rootpath` and `drs` settings in your user " - "configuration file %s", - self.session["config_file"], + "If the files are available locally, please check " + "your `rootpath` and `drs` settings in your configuration " + "file(s)" ) logger.error( "To automatically download the required files to " - "`download_dir: %s`, set `search_esgf: when_missing` or " - "`search_esgf: always` in %s, or run the recipe with the " - "extra command line argument --search_esgf=when_missing or " - "--search_esgf=always", + "`download_dir: %s`, use `search_esgf: when_missing` or " + "`search_esgf: always` in your configuration file(s), or run " + "the recipe with the command line argument " + "--search_esgf=when_missing or --search_esgf=always", self.session["download_dir"], - self.session["config_file"], ) logger.info( "Note that automatic download is only available for files" diff --git a/esmvalcore/cmor/_fixes/icon/_base_fixes.py b/esmvalcore/cmor/_fixes/icon/_base_fixes.py index be77c9d6c8..9c551ef4a0 100644 --- a/esmvalcore/cmor/_fixes/icon/_base_fixes.py +++ b/esmvalcore/cmor/_fixes/icon/_base_fixes.py @@ -221,9 +221,8 @@ def add_additional_cubes(self, cubes): Note ---- - Files can be specified as absolute or relative (to - ``auxiliary_data_dir`` as defined in the :ref:`user configuration - file`) paths. + Files can be specified as absolute or relative (to the configuration + option ``auxiliary_data_dir``) paths. Parameters ---------- diff --git a/esmvalcore/config/__init__.py b/esmvalcore/config/__init__.py index f9a632b75c..5d23c6b0e2 100644 --- a/esmvalcore/config/__init__.py +++ b/esmvalcore/config/__init__.py @@ -2,11 +2,14 @@ .. data:: CFG - ESMValCore configuration. + Global ESMValCore configuration object of type + :class:`esmvalcore.config.Config`. - By default, this will be loaded from the file - ``~/.esmvaltool/config-user.yml``. If used within the ``esmvaltool`` - program, this will respect the ``--config_file`` argument. + By default, this will be loaded from YAML files in the user configuration + directory (by default ``~/.config/esmvaltool``, but this can be changed + with the ``ESMVALTOOL_CONFIG_DIR`` environment variable) similar to the way + `Dask handles configuration + `__. """ diff --git a/esmvalcore/config/_config.py b/esmvalcore/config/_config.py index 71617c625e..6df9e9bf52 100644 --- a/esmvalcore/config/_config.py +++ b/esmvalcore/config/_config.py @@ -1,4 +1,4 @@ -"""Functions dealing with config-user.yml / config-developer.yml.""" +"""Functions dealing with config-developer.yml and extra facets.""" from __future__ import annotations @@ -52,7 +52,8 @@ def _load_extra_facets(project, extra_facets_dir): def get_extra_facets(dataset, extra_facets_dir): - """Read configuration files with additional variable information.""" + """Read files with additional variable information ("extra facets").""" + extra_facets_dir = tuple(extra_facets_dir) project_details = _load_extra_facets( dataset.facets["project"], extra_facets_dir, diff --git a/esmvalcore/config/_config_object.py b/esmvalcore/config/_config_object.py index dc78506215..dfe784ef58 100644 --- a/esmvalcore/config/_config_object.py +++ b/esmvalcore/config/_config_object.py @@ -4,29 +4,67 @@ import os import sys +import warnings +from collections.abc import Iterable from datetime import datetime from pathlib import Path -from types import MappingProxyType from typing import Optional +import dask.config import yaml import esmvalcore -from esmvalcore.cmor.check import CheckLevels -from esmvalcore.exceptions import InvalidConfigParameter - -from ._config_validators import ( +from esmvalcore.config._config_validators import ( _deprecated_options_defaults, _deprecators, _validators, ) -from ._validated_config import ValidatedConfig +from esmvalcore.config._validated_config import ValidatedConfig +from esmvalcore.exceptions import ( + ESMValCoreDeprecationWarning, + InvalidConfigParameter, +) URL = ( "https://docs.esmvaltool.org/projects/" "ESMValCore/en/latest/quickstart/configure.html" ) +# Configuration directory in which defaults are stored +DEFAULT_CONFIG_DIR = ( + Path(esmvalcore.__file__).parent / "config" / "configurations" / "defaults" +) + + +def _get_user_config_dir() -> Path: + """Get user configuration directory.""" + if "ESMVALTOOL_CONFIG_DIR" in os.environ: + user_config_dir = ( + Path(os.environ["ESMVALTOOL_CONFIG_DIR"]).expanduser().absolute() + ) + if not user_config_dir.is_dir(): + raise NotADirectoryError( + f"Invalid configuration directory specified via " + f"ESMVALTOOL_CONFIG_DIR environment variable: " + f"{user_config_dir} is not an existing directory" + ) + return user_config_dir + return Path.home() / ".config" / "esmvaltool" + + +def _get_user_config_source() -> str: + """Get source of user configuration directory.""" + if "ESMVALTOOL_CONFIG_DIR" in os.environ: + return "ESMVALTOOL_CONFIG_DIR environment variable" + return "default user configuration directory" + + +# User configuration directory +USER_CONFIG_DIR = _get_user_config_dir() + +# Source of user configuration directory +USER_CONFIG_SOURCE = _get_user_config_source() + class Config(ValidatedConfig): """ESMValTool configuration object. @@ -36,6 +74,7 @@ class Config(ValidatedConfig): """ + # TODO: remove in v2.14.0 _DEFAULT_USER_CONFIG_DIR = Path.home() / ".esmvaltool" _validate = _validators @@ -46,6 +85,16 @@ class Config(ValidatedConfig): ("rootpath", URL), ) + def __init__(self, *args, **kwargs): + """Initialize class instance.""" + super().__init__(*args, **kwargs) + msg = ( + "Do not instantiate `Config` objects directly, this will lead " + "to unexpected behavior. Use `esmvalcore.config.CFG` instead." + ) + warnings.warn(msg, UserWarning) + + # TODO: remove in v2.14.0 @classmethod def _load_user_config( cls, @@ -69,8 +118,15 @@ def _load_user_config( configuration file is given (relevant if used within a script or notebook). """ - new = cls() - new.update(CFG_DEFAULT) + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + message="Do not instantiate `Config` objects directly", + category=UserWarning, + module="esmvalcore", + ) + new = cls() + new.update(Config._load_default_config()) config_user_path = cls._get_config_user_path(filename) @@ -93,31 +149,26 @@ def _load_user_config( return new + # TODO: remove in v2.14.0 @classmethod def _load_default_config(cls): """Load the default configuration.""" - new = cls() - - package_config_user_path = ( - Path(esmvalcore.__file__).parent / "config-user.yml" - ) - mapping = cls._read_config_file(package_config_user_path) - - # Add defaults that are not available in esmvalcore/config-user.yml - mapping["check_level"] = CheckLevels.DEFAULT - mapping["config_file"] = package_config_user_path - mapping["diagnostics"] = None - mapping["extra_facets_dir"] = tuple() - mapping["max_datasets"] = None - mapping["max_years"] = None - mapping["resume_from"] = [] - mapping["run_diagnostic"] = True - mapping["skip_nonexistent"] = False + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + message="Do not instantiate `Config` objects directly", + category=UserWarning, + module="esmvalcore", + ) + new = cls() + paths = [DEFAULT_CONFIG_DIR] + mapping = dask.config.collect(paths=paths, env={}) new.update(mapping) return new + # TODO: remove in v2.14.0 @staticmethod def _read_config_file(config_user_path: Path) -> dict: """Read configuration file and store settings in a dictionary.""" @@ -131,6 +182,7 @@ def _read_config_file(config_user_path: Path) -> dict: return cfg + # TODO: remove in v2.14.0 @staticmethod def _get_config_user_path( filename: Optional[os.PathLike | str] = None, @@ -201,6 +253,7 @@ def _get_config_user_path( return config_user + # TODO: remove in v2.14.0 @staticmethod def _get_config_path_from_cli() -> None | str: """Try to get configuration path from CLI arguments. @@ -237,25 +290,126 @@ def _get_config_path_from_cli() -> None | str: return None + # TODO: remove in v2.14.0 def load_from_file( self, filename: Optional[os.PathLike | str] = None, ) -> None: - """Load user configuration from the given file.""" + """Load user configuration from the given file. + + .. deprecated:: 2.12.0 + This method has been deprecated in ESMValCore version 2.14.0 and is + scheduled for removal in version 2.14.0. Please use + `CFG.load_from_dirs()` instead. + + Parameters + ---------- + filename: + YAML file to load. + + """ + msg = ( + "The method `CFG.load_from_file()` has been deprecated in " + "ESMValCore version 2.12.0 and is scheduled for removal in " + "version 2.14.0. Please use `CFG.load_from_dirs()` instead." + ) + warnings.warn(msg, ESMValCoreDeprecationWarning) self.clear() self.update(Config._load_user_config(filename)) - def reload(self): - """Reload the config file.""" - if "config_file" not in self: - raise ValueError( - "Cannot reload configuration, option 'config_file' is " - "missing; make sure to only use the `CFG` object from the " - "`esmvalcore.config` module" + def load_from_dirs(self, dirs: Iterable[str | Path]) -> None: + """Load configuration object from directories. + + This searches for all YAML files within the given directories and + merges them together using :func:`dask.config.collect`. Nested objects + are properly considered; see :func:`dask.config.update` for details. + Values in the latter directories are preferred to those in the former. + + Options that are not explicitly specified via YAML files are set to the + :ref:`default values `. + + Note + ---- + Just like :func:`dask.config.collect`, this silently ignores + non-existing directories. + + Parameters + ---------- + dirs: + A list of directories to search for YAML configuration files. + + Raises + ------ + esmvalcore.exceptions.InvalidConfigParameter + Invalid configuration option given. + + """ + dirs_str: list[str] = [] + + # Always consider default options; these have the lowest priority + dirs_str.append(str(DEFAULT_CONFIG_DIR)) + + for config_dir in dirs: + config_dir = Path(config_dir).expanduser().absolute() + dirs_str.append(str(config_dir)) + + new_config_dict = dask.config.collect(paths=dirs_str, env={}) + self.clear() + self.update(new_config_dict) + + self.check_missing() + + def reload(self) -> None: + """Reload the configuration object. + + This will read all YAML files in the user configuration directory (by + default ``~/.config/esmvaltool``, but this can be changed with the + ``ESMVALTOOL_CONFIG_DIR`` environment variable) and merges them + together using :func:`dask.config.collect`. Nested objects are properly + considered; see :func:`dask.config.update` for details. + + Options that are not explicitly specified via YAML files are set to the + :ref:`default values `. + + Note + ---- + If the user configuration directory does not exist, this will be + silently ignored. + + Raises + ------ + esmvalcore.exceptions.InvalidConfigParameter + Invalid configuration option given. + + """ + # TODO: remove in v2.14.0 + self.clear() + _deprecated_config_user_path = Config._get_config_user_path() + if _deprecated_config_user_path.is_file(): + deprecation_msg = ( + f"Usage of the single configuration file " + f"~/.esmvaltool/config-user.yml or specifying it via CLI " + f"argument `--config_file` has been deprecated in ESMValCore " + f"version 2.12.0 and is scheduled for removal in version " + f"2.14.0. Please run `mkdir -p ~/.config/esmvaltool && mv " + f"{_deprecated_config_user_path} ~/.config/esmvaltool` (or " + f"alternatively use a custom `--config_dir`) and omit " + f"`--config_file`." ) - self.load_from_file(self["config_file"]) + warnings.warn(deprecation_msg, ESMValCoreDeprecationWarning) + self.update(Config._load_user_config(raise_exception=False)) + return - def start_session(self, name: str): + # New since v2.12.0 + try: + self.load_from_dirs([USER_CONFIG_DIR]) + except InvalidConfigParameter as exc: + raise InvalidConfigParameter( + f"Failed to parse configuration directory {USER_CONFIG_DIR} " + f"({USER_CONFIG_SOURCE}): {str(exc)}" + ) from exc + + def start_session(self, name: str) -> Session: """Start a new session from this configuration object. Parameters @@ -267,7 +421,15 @@ def start_session(self, name: str): ------- Session """ - return Session(config=self.copy(), name=name) + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + message="Do not instantiate `Session` objects directly", + category=UserWarning, + module="esmvalcore", + ) + session = Session(config=self.copy(), name=name) + return session class Session(ValidatedConfig): @@ -302,6 +464,12 @@ def __init__(self, config: dict, name: str = "session"): super().__init__(config) self.session_name: str | None = None self.set_session_name(name) + msg = ( + "Do not instantiate `Session` objects directly, this will lead " + "to unexpected behavior. Use " + "`esmvalcore.config.CFG.start_session` instead." + ) + warnings.warn(msg, UserWarning) def set_session_name(self, name: str = "session"): """Set the name for the session. @@ -337,9 +505,24 @@ def run_dir(self): """Return run directory.""" return self.session_dir / self.relative_run_dir + # TODO: remove in v2.14.0 @property def config_dir(self): - """Return user config directory.""" + """Return user config directory. + + .. deprecated:: 2.12.0 + This attribute has been deprecated in ESMValCore version 2.12.0 and + is scheduled for removal in version 2.14.0. + + """ + msg = ( + "The attribute `Session.config_dir` has been deprecated in " + "ESMValCore version 2.12.0 and is scheduled for removal in " + "version 2.14.0." + ) + warnings.warn(msg, ESMValCoreDeprecationWarning) + if self.get("config_file") is None: + return None return Path(self["config_file"]).parent @property @@ -363,6 +546,41 @@ def _fixed_file_dir(self): return self.session_dir / self._relative_fixed_file_dir +def _get_all_config_dirs(cli_config_dir: Optional[Path]) -> list[Path]: + """Get all configuration directories.""" + config_dirs: list[Path] = [ + DEFAULT_CONFIG_DIR, + USER_CONFIG_DIR, + ] + if cli_config_dir is not None: + config_dirs.append(cli_config_dir) + return config_dirs + + +def _get_all_config_sources(cli_config_dir: Optional[Path]) -> list[str]: + """Get all sources of configuration directories.""" + config_sources: list[str] = [ + "defaults", + USER_CONFIG_SOURCE, + ] + if cli_config_dir is not None: + config_sources.append("command line argument") + return config_sources + + +def _get_global_config() -> Config: + """Get global configuration object.""" + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + message="Do not instantiate `Config` objects directly", + category=UserWarning, + module="esmvalcore", + ) + config_obj = Config() + config_obj.reload() + return config_obj + + # Initialize configuration objects -CFG_DEFAULT = MappingProxyType(Config._load_default_config()) -CFG = Config._load_user_config(raise_exception=False) +CFG = _get_global_config() diff --git a/esmvalcore/config/_config_validators.py b/esmvalcore/config/_config_validators.py index 23034ce5c2..9cc85bee5e 100644 --- a/esmvalcore/config/_config_validators.py +++ b/esmvalcore/config/_config_validators.py @@ -204,10 +204,6 @@ def chained(value): validate_path, docstring="Return a list of paths." ) -validate_pathtuple = _listify_validator( - validate_path, docstring="Return a tuple of paths.", return_type=tuple -) - validate_int_positive = _chain_validator(validate_int, validate_positive) validate_int_positive_or_none = _make_type_validator( validate_int_positive, allow_none=True @@ -222,7 +218,7 @@ def validate_rootpath(value): if key == "obs4mips": logger.warning( "Correcting capitalization, project 'obs4mips' should be " - "written as 'obs4MIPs' in 'rootpath' in config-user.yml" + "written as 'obs4MIPs' in configured 'rootpath'" ) key = "obs4MIPs" if isinstance(paths, Path): @@ -247,7 +243,7 @@ def validate_drs(value): if key == "obs4mips": logger.warning( "Correcting capitalization, project 'obs4mips' should be " - "written as 'obs4MIPs' in 'drs' in config-user.yml" + "written as 'obs4MIPs' in configured 'drs'" ) key = "obs4MIPs" new_mapping[key] = validate_string(drs) @@ -306,34 +302,47 @@ def validate_diagnostics( } +# TODO: remove in v2.14.0 +def validate_extra_facets_dir(value): + """Validate extra_facets_dir.""" + if isinstance(value, tuple): + msg = ( + "Specifying `extra_facets_dir` as tuple has been deprecated in " + "ESMValCore version 2.12.0 and is scheduled for removal in " + "version 2.14.0. Please use a list instead." + ) + warnings.warn(msg, ESMValCoreDeprecationWarning) + value = list(value) + return validate_pathlist(value) + + _validators = { - # From user config "auxiliary_data_dir": validate_path, + "check_level": validate_check_level, "compress_netcdf": validate_bool, "config_developer_file": validate_config_developer, + "diagnostics": validate_diagnostics, "download_dir": validate_path, "drs": validate_drs, "exit_on_warning": validate_bool, - "extra_facets_dir": validate_pathtuple, + "extra_facets_dir": validate_extra_facets_dir, "log_level": validate_string, + "max_datasets": validate_int_positive_or_none, "max_parallel_tasks": validate_int_or_none, + "max_years": validate_int_positive_or_none, "output_dir": validate_path, "output_file_type": validate_string, "profile_diagnostic": validate_bool, "remove_preproc_dir": validate_bool, + "resume_from": validate_pathlist, "rootpath": validate_rootpath, "run_diagnostic": validate_bool, "save_intermediary_cubes": validate_bool, "search_esgf": validate_search_esgf, - # From CLI - "check_level": validate_check_level, - "diagnostics": validate_diagnostics, - "max_datasets": validate_int_positive_or_none, - "max_years": validate_int_positive_or_none, - "resume_from": validate_pathlist, "skip_nonexistent": validate_bool, # From recipe "write_ncl_interface": validate_bool, + # TODO: remove in v2.14.0 # config location "config_file": validate_path, } @@ -365,12 +374,40 @@ def _handle_deprecation( warnings.warn(deprecation_msg, ESMValCoreDeprecationWarning) +# TODO: remove in v2.14.0 +def deprecate_config_file(validated_config, value, validated_value): + """Deprecate ``config_file`` option. + + Parameters + ---------- + validated_config: ValidatedConfig + ``ValidatedConfig`` instance which will be modified in place. + value: Any + Raw input value for ``config_file`` option. + validated_value: Any + Validated value for ``config_file`` option. + + """ + validated_config # noqa + value # noqa + validated_value # noqa + option = "config_file" + deprecated_version = "2.12.0" + remove_version = "2.14.0" + more_info = " Please use the option `config_dir` instead." + _handle_deprecation(option, deprecated_version, remove_version, more_info) + + # Example usage: see removed files in # https://github.com/ESMValGroup/ESMValCore/pull/2213 -_deprecators: dict[str, Callable] = {} +_deprecators: dict[str, Callable] = { + "config_file": deprecate_config_file, # TODO: remove in v2.14.0 +} # Default values for deprecated options # Example usage: see removed files in # https://github.com/ESMValGroup/ESMValCore/pull/2213 -_deprecated_options_defaults: dict[str, Any] = {} +_deprecated_options_defaults: dict[str, Any] = { + "config_file": None, # TODO: remove in v2.14.0 +} diff --git a/esmvalcore/config-user.yml b/esmvalcore/config/configurations/defaults/config-user.yml similarity index 96% rename from esmvalcore/config-user.yml rename to esmvalcore/config/configurations/defaults/config-user.yml index ecdee818fc..39cffb67fb 100644 --- a/esmvalcore/config-user.yml +++ b/esmvalcore/config/configurations/defaults/config-user.yml @@ -1,5 +1,5 @@ ############################################################################### -# Example user configuration file for ESMValTool +# Default configuration settings ############################################################################### # # Note for users: @@ -13,14 +13,6 @@ # file. # ############################################################################### -# -# Note for developers: -# ------------------- -# Two identical copies of this file (``ESMValTool/config-user-example.yml`` and -# ``ESMValCore/esmvalcore/config-user.yml``) exist. If you change one of it, -# make sure to apply the changes to the other. -# -############################################################################### --- # Destination directory where all output will be written diff --git a/esmvalcore/config/configurations/defaults/more_options.yml b/esmvalcore/config/configurations/defaults/more_options.yml new file mode 100644 index 0000000000..c61a70a493 --- /dev/null +++ b/esmvalcore/config/configurations/defaults/more_options.yml @@ -0,0 +1,9 @@ +# Other options not included in config-user.yml +check_level: default +diagnostics: null +extra_facets_dir: [] +max_datasets: null +max_years: null +resume_from: [] +run_diagnostic: true +skip_nonexistent: false diff --git a/esmvalcore/local.py b/esmvalcore/local.py index 61c2782b58..e94a998ed5 100644 --- a/esmvalcore/local.py +++ b/esmvalcore/local.py @@ -469,7 +469,7 @@ def _get_data_sources(project: str) -> list[DataSource]: nonexistent = tuple(p for p in paths if not os.path.exists(p)) if nonexistent and (key, nonexistent) not in _ROOTPATH_WARNED: logger.warning( - "'%s' rootpaths '%s' set in config-user.yml do not exist", + "Configured '%s' rootpaths '%s' do not exist", key, ", ".join(str(p) for p in nonexistent), ) @@ -490,7 +490,7 @@ def _get_data_sources(project: str) -> list[DataSource]: raise KeyError( f"No '{project}' or 'default' path specified under 'rootpath' in " - "the user configuration." + "the configuration." ) diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000000..5fd7be7460 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,24 @@ +from copy import deepcopy +from pathlib import Path + +import pytest + +from esmvalcore.config import CFG + + +@pytest.fixture +def cfg_default(mocker): + """Configuration object with defaults.""" + cfg = deepcopy(CFG) + cfg.load_from_dirs([]) + return cfg + + +@pytest.fixture +def session(tmp_path: Path, cfg_default, monkeypatch): + """Session object with default settings.""" + for key, value in cfg_default.items(): + monkeypatch.setitem(CFG, key, deepcopy(value)) + monkeypatch.setitem(CFG, "rootpath", {"default": {tmp_path: "default"}}) + monkeypatch.setitem(CFG, "output_dir", tmp_path / "esmvaltool_output") + return CFG.start_session("recipe_test") diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 8787b345ee..e32e3ca3fa 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -5,8 +5,6 @@ import pytest import esmvalcore.local -from esmvalcore.config import CFG -from esmvalcore.config._config_object import CFG_DEFAULT from esmvalcore.local import ( LocalFile, _replace_tags, @@ -15,17 +13,6 @@ ) -@pytest.fixture -def session(tmp_path: Path, monkeypatch): - CFG.clear() - CFG.update(CFG_DEFAULT) - monkeypatch.setitem(CFG, "rootpath", {"default": {tmp_path: "default"}}) - - session = CFG.start_session("recipe_test") - session["output_dir"] = tmp_path / "esmvaltool_output" - return session - - def create_test_file(filename, tracking_id=None): dirname = os.path.dirname(filename) if not os.path.exists(dirname): diff --git a/tests/integration/test_deprecated_config.py b/tests/integration/test_deprecated_config.py index cf50f2ea4c..0ae313511f 100644 --- a/tests/integration/test_deprecated_config.py +++ b/tests/integration/test_deprecated_config.py @@ -1,8 +1,6 @@ import warnings -from pathlib import Path -import esmvalcore -from esmvalcore.config import CFG, Config +from esmvalcore.config import CFG from esmvalcore.exceptions import ESMValCoreDeprecationWarning @@ -12,13 +10,3 @@ def test_no_deprecation_default_cfg(): warnings.simplefilter("error", category=ESMValCoreDeprecationWarning) CFG.reload() CFG.start_session("my_session") - - -def test_no_deprecation_user_cfg(): - """Test that user config does not raise any deprecation warnings.""" - config_file = Path(esmvalcore.__file__).parent / "config-user.yml" - with warnings.catch_warnings(): - warnings.simplefilter("error", category=ESMValCoreDeprecationWarning) - cfg = Config(CFG.copy()) - cfg.load_from_file(config_file) - cfg.start_session("my_session") diff --git a/tests/integration/test_diagnostic_run.py b/tests/integration/test_diagnostic_run.py index e66cd925c2..285f86fd15 100644 --- a/tests/integration/test_diagnostic_run.py +++ b/tests/integration/test_diagnostic_run.py @@ -186,12 +186,80 @@ def test_diagnostic_run(tmp_path, script_file, script): # ensure that tags are cleared TAGS.clear() - config_user_file = write_config_user_file(tmp_path) + config_dir = tmp_path / "config" + config_dir.mkdir(parents=True, exist_ok=True) + write_config_user_file(config_dir) + + with arguments( + "esmvaltool", + "run", + "--config_dir", + str(config_dir), + str(recipe_file), + ): + run() + + check(result_file) + + +# TODO: remove in v2.14.0 +@pytest.mark.parametrize( + "script_file, script", + [ + pytest.param( + script_file, + script, + marks=[ + pytest.mark.installation, + pytest.mark.xfail( + interpreter_not_installed(script_file), + run=False, + reason="Interpreter not available", + ), + ], + ) + for script_file, script in SCRIPTS.items() + if script_file != "null" + ], +) +def test_diagnostic_run_old_config(tmp_path, script_file, script): + recipe_file = tmp_path / "recipe_test.yml" + script_file = tmp_path / script_file + result_file = tmp_path / "result.yml" + + # Write script to file + script_file.write_text(str(script)) + + # Create recipe + recipe = dedent( + """ + documentation: + title: Recipe without data + description: Recipe with no data. + authors: [andela_bouwe] + + diagnostics: + diagnostic_name: + scripts: + script_name: + script: {} + setting_name: {} + """.format(script_file, result_file) + ) + recipe_file.write_text(str(recipe)) + + # ensure that tags are cleared + TAGS.clear() + + config_dir = tmp_path / "config" + config_dir.mkdir(parents=True, exist_ok=True) + config_file = write_config_user_file(config_dir) + with arguments( "esmvaltool", "run", "--config_file", - config_user_file, + str(config_file), str(recipe_file), ): run() diff --git a/tests/integration/test_main.py b/tests/integration/test_main.py index e0838fd3e2..b15a0e6129 100644 --- a/tests/integration/test_main.py +++ b/tests/integration/test_main.py @@ -9,13 +9,14 @@ import os import shutil import sys +from pathlib import Path from textwrap import dedent from unittest.mock import patch import pytest -import yaml from fire.core import FireExit +import esmvalcore.config._config from esmvalcore._main import Config, ESMValTool, Recipes, run from esmvalcore.exceptions import RecipeError @@ -34,8 +35,10 @@ def empty(*args, **kwargs): def arguments(*args): backup = sys.argv sys.argv = list(args) - yield - sys.argv = backup + try: + yield + finally: + sys.argv = backup def test_setargs(): @@ -62,8 +65,11 @@ def test_run(): run() -def test_empty_run(tmp_path): +def test_empty_run(tmp_path, monkeypatch): """Test real run with no diags.""" + monkeypatch.delitem( # TODO: remove in v2.14.0 + esmvalcore.config.CFG._mapping, "config_file", raising=False + ) recipe_file = tmp_path / "recipe.yml" content = dedent(""" documentation: @@ -79,17 +85,14 @@ def test_empty_run(tmp_path): diagnostics: null """) recipe_file.write_text(content) - Config.get_config_user(path=tmp_path) log_dir = f"{tmp_path}/esmvaltool_output" - config_file = f"{tmp_path}/config-user.yml" - with open(config_file, "r+", encoding="utf-8") as file: - config = yaml.safe_load(file) - config["output_dir"] = log_dir - yaml.safe_dump(config, file, sort_keys=False) + config_dir = tmp_path / "config" + config_dir.mkdir(parents=True, exist_ok=True) + config_file = config_dir / "config.yml" + config_file.write_text(f"output_dir: {log_dir}") + with pytest.raises(RecipeError) as exc: - ESMValTool().run( - recipe_file, config_file=f"{tmp_path}/config-user.yml" - ) + ESMValTool().run(recipe_file, config_dir=config_dir) assert str(exc.value) == "The given recipe does not have any diagnostic." log_file = os.path.join( log_dir, os.listdir(log_dir)[0], "run", "main_log.txt" @@ -103,65 +106,6 @@ def test_empty_run(tmp_path): assert not filled_recipe -@patch("esmvalcore._main.ESMValTool.run", new=wrapper(ESMValTool.run)) -def test_run_with_config(): - with arguments( - "esmvaltool", "run", "recipe.yml", "--config_file", "config.yml" - ): - run() - - -@patch("esmvalcore._main.ESMValTool.run", new=wrapper(ESMValTool.run)) -def test_run_with_max_years(): - with arguments( - "esmvaltool", - "run", - "recipe.yml", - "--config_file=config.yml", - "--max_years=2", - ): - run() - - -@patch("esmvalcore._main.ESMValTool.run", new=wrapper(ESMValTool.run)) -def test_run_with_max_datasets(): - with arguments("esmvaltool", "run", "recipe.yml", "--max_datasets=2"): - run() - - -@patch("esmvalcore._main.ESMValTool.run", new=wrapper(ESMValTool.run)) -def test_run_with_search_esgf(): - with arguments("esmvaltool", "run", "recipe.yml", "--search_esgf=always"): - run() - - -@patch("esmvalcore._main.ESMValTool.run", new=wrapper(ESMValTool.run)) -def test_run_with_check_level(): - with arguments("esmvaltool", "run", "recipe.yml", "--check_level=default"): - run() - - -@patch("esmvalcore._main.ESMValTool.run", new=wrapper(ESMValTool.run)) -def test_run_with_skip_nonexistent(): - with arguments( - "esmvaltool", "run", "recipe.yml", "--skip_nonexistent=True" - ): - run() - - -@patch("esmvalcore._main.ESMValTool.run", new=wrapper(ESMValTool.run)) -def test_run_with_diagnostics(): - with arguments("esmvaltool", "run", "recipe.yml", "--diagnostics=[badt]"): - run() - - -@patch("esmvalcore._main.ESMValTool.run", new=wrapper(ESMValTool.run)) -def test_run_fails_with_other_params(): - with arguments("esmvaltool", "run", "recipe.yml", "--extra_param=dfa"): - with pytest.raises(SystemExit): - run() - - def test_recipes_get(tmp_path, monkeypatch): """Test version command.""" src_recipe = tmp_path / "recipe.yml" @@ -199,6 +143,66 @@ def test_get_config_developer(): run() +def test_get_config_developer_no_path(): + """Test version command.""" + with arguments("esmvaltool", "config", "get_config_developer"): + run() + config_file = Path.home() / ".esmvaltool" / "config-developer.yml" + assert config_file.is_file() + + +def test_get_config_developer_path(tmp_path): + """Test version command.""" + new_path = tmp_path / "subdir" + with arguments( + "esmvaltool", "config", "get_config_developer", f"--path={new_path}" + ): + run() + assert (new_path / "config-developer.yml").is_file() + + +def test_get_config_developer_overwrite(tmp_path): + """Test version command.""" + config_developer = tmp_path / "config-developer.yml" + config_developer.write_text("old text") + with arguments( + "esmvaltool", + "config", + "get_config_developer", + f"--path={tmp_path}", + "--overwrite", + ): + run() + assert config_developer.read_text() != "old text" + + +def test_get_config_developer_no_overwrite(tmp_path): + """Test version command.""" + config_developer = tmp_path / "configuration_file.yml" + config_developer.write_text("old text") + with arguments( + "esmvaltool", + "config", + "get_config_developer", + f"--path={config_developer}", + ): + run() + assert config_developer.read_text() == "old text" + + +@patch( + "esmvalcore._main.Config.get_config_developer", + new=wrapper(Config.get_config_developer), +) +def test_get_config_developer_bad_option_fails(): + """Test version command.""" + with arguments( + "esmvaltool", "config", "get_config_developer", "--bad_option=path" + ): + with pytest.raises(FireExit): + run() + + @patch( "esmvalcore._main.Config.get_config_user", new=wrapper(Config.get_config_user), @@ -209,19 +213,28 @@ def test_get_config_user(): run() +def test_get_config_user_no_path(): + """Test version command.""" + with arguments("esmvaltool", "config", "get_config_user"): + run() + config_file = Path.home() / ".config" / "esmvaltool" / "config-user.yml" + assert config_file.is_file() + + def test_get_config_user_path(tmp_path): """Test version command.""" + new_path = tmp_path / "subdir" with arguments( - "esmvaltool", "config", "get_config_user", f"--path={tmp_path}" + "esmvaltool", "config", "get_config_user", f"--path={new_path}" ): run() - assert (tmp_path / "config-user.yml").is_file() + assert (new_path / "config-user.yml").is_file() def test_get_config_user_overwrite(tmp_path): """Test version command.""" config_user = tmp_path / "config-user.yml" - config_user.touch() + config_user.write_text("old text") with arguments( "esmvaltool", "config", @@ -230,6 +243,18 @@ def test_get_config_user_overwrite(tmp_path): "--overwrite", ): run() + assert config_user.read_text() != "old text" + + +def test_get_config_user_no_overwrite(tmp_path): + """Test version command.""" + config_user = tmp_path / "configuration_file.yml" + config_user.write_text("old text") + with arguments( + "esmvaltool", "config", "get_config_user", f"--path={config_user}" + ): + run() + assert config_user.read_text() == "old text" @patch( diff --git a/tests/sample_data/experimental/test_run_recipe.py b/tests/sample_data/experimental/test_run_recipe.py index 2abdd22197..141cc74c57 100644 --- a/tests/sample_data/experimental/test_run_recipe.py +++ b/tests/sample_data/experimental/test_run_recipe.py @@ -12,7 +12,6 @@ import pytest import esmvalcore._task -from esmvalcore.config._config_object import CFG_DEFAULT from esmvalcore.config._diagnostics import TAGS from esmvalcore.exceptions import RecipeError from esmvalcore.experimental import CFG, Recipe, get_recipe @@ -59,7 +58,9 @@ def recipe(): @pytest.mark.use_sample_data @pytest.mark.parametrize("ssh", (True, False)) @pytest.mark.parametrize("task", (None, "example/ta")) -def test_run_recipe(monkeypatch, task, ssh, recipe, tmp_path, caplog): +def test_run_recipe( + monkeypatch, cfg_default, task, ssh, recipe, tmp_path, caplog +): """Test running a basic recipe using sample data. Recipe contains no provenance and no diagnostics. @@ -79,9 +80,7 @@ def test_run_recipe(monkeypatch, task, ssh, recipe, tmp_path, caplog): sample_data_config = esmvaltool_sample_data.get_rootpaths() monkeypatch.setitem(CFG, "rootpath", sample_data_config["rootpath"]) monkeypatch.setitem(CFG, "drs", {"CMIP6": "SYNDA"}) - session = CFG.start_session(recipe.path.stem) - session.clear() - session.update(CFG_DEFAULT) + session = cfg_default.start_session(recipe.path.stem) session["output_dir"] = tmp_path / "esmvaltool_output" session["max_parallel_tasks"] = 1 session["remove_preproc_dir"] = False diff --git a/tests/unit/config/test_config.py b/tests/unit/config/test_config.py index 513fd20595..194724a317 100644 --- a/tests/unit/config/test_config.py +++ b/tests/unit/config/test_config.py @@ -4,7 +4,6 @@ import pytest import yaml -import esmvalcore from esmvalcore.cmor.check import CheckLevels from esmvalcore.config import CFG, _config, _config_validators from esmvalcore.config._config import ( @@ -15,7 +14,7 @@ importlib_files, ) from esmvalcore.dataset import Dataset -from esmvalcore.exceptions import RecipeError +from esmvalcore.exceptions import ESMValCoreDeprecationWarning, RecipeError TEST_DEEP_UPDATE = [ ([{}], {}), @@ -167,32 +166,19 @@ def test_get_project_config(mocker): _config.get_project_config("non-existent-project") -CONFIG_USER_FILE = importlib_files("esmvalcore") / "config-user.yml" - - -@pytest.fixture -def default_config(): - # Load default configuration - CFG.load_from_file(CONFIG_USER_FILE) - # Run test - yield - # Restore default configuration - CFG.load_from_file(CONFIG_USER_FILE) - - -def test_load_default_config(monkeypatch, default_config): +def test_load_default_config(cfg_default, monkeypatch): """Test that the default configuration can be loaded.""" project_cfg = {} monkeypatch.setattr(_config, "CFG", project_cfg) default_dev_file = importlib_files("esmvalcore") / "config-developer.yml" - cfg = CFG.start_session("recipe_example") + + session = cfg_default.start_session("recipe_example") default_cfg = { "auxiliary_data_dir": Path.home() / "auxiliary_data", "check_level": CheckLevels.DEFAULT, "compress_netcdf": False, "config_developer_file": default_dev_file, - "config_file": CONFIG_USER_FILE, "diagnostics": None, "download_dir": Path.home() / "climate_data", "drs": { @@ -203,7 +189,7 @@ def test_load_default_config(monkeypatch, default_config): "obs4MIPs": "ESGF", }, "exit_on_warning": False, - "extra_facets_dir": tuple(), + "extra_facets_dir": [], "log_level": "info", "max_datasets": None, "max_parallel_tasks": None, @@ -229,38 +215,39 @@ def test_load_default_config(monkeypatch, default_config): "config_dir", } # Check that only allowed keys are in it - assert set(default_cfg) == set(cfg) + assert set(default_cfg) == set(session) # Check that all required directories are available - assert all(hasattr(cfg, attr) for attr in directory_attrs) + assert all(hasattr(session, attr) for attr in directory_attrs) # Check default values for key in default_cfg: - assert cfg[key] == default_cfg[key] + assert session[key] == default_cfg[key] # Check output directories - assert str(cfg.session_dir).startswith( + assert str(session.session_dir).startswith( str(Path.home() / "esmvaltool_output" / "recipe_example") ) for path in ("preproc", "work", "run"): - assert getattr(cfg, path + "_dir") == cfg.session_dir / path - assert cfg.plot_dir == cfg.session_dir / "plots" - assert cfg.config_dir == Path(esmvalcore.__file__).parent + assert getattr(session, path + "_dir") == session.session_dir / path + assert session.plot_dir == session.session_dir / "plots" + with pytest.warns(ESMValCoreDeprecationWarning): + assert session.config_dir is None # Check that projects were configured assert project_cfg -def test_rootpath_obs4mips_case_correction(default_config): +def test_rootpath_obs4mips_case_correction(monkeypatch): """Test that the name of the obs4MIPs project is correct in rootpath.""" - CFG["rootpath"] = {"obs4mips": "/path/to/data"} + monkeypatch.setitem(CFG, "rootpath", {"obs4mips": "/path/to/data"}) assert "obs4mips" not in CFG["rootpath"] assert CFG["rootpath"]["obs4MIPs"] == [Path("/path/to/data")] -def test_drs_obs4mips_case_correction(default_config): +def test_drs_obs4mips_case_correction(monkeypatch): """Test that the name of the obs4MIPs project is correct in rootpath.""" - CFG["drs"] = {"obs4mips": "ESGF"} + monkeypatch.setitem(CFG, "drs", {"obs4mips": "ESGF"}) assert "obs4mips" not in CFG["drs"] assert CFG["drs"]["obs4MIPs"] == "ESGF" diff --git a/tests/unit/config/test_config_object.py b/tests/unit/config/test_config_object.py index ac301fb43e..fa6c3111b3 100644 --- a/tests/unit/config/test_config_object.py +++ b/tests/unit/config/test_config_object.py @@ -1,28 +1,21 @@ -import contextlib import os -import sys from collections.abc import MutableMapping -from copy import deepcopy from pathlib import Path +from textwrap import dedent import pytest import esmvalcore import esmvalcore.config._config_object from esmvalcore.config import Config, Session -from esmvalcore.exceptions import InvalidConfigParameter +from esmvalcore.config._config_object import DEFAULT_CONFIG_DIR +from esmvalcore.exceptions import ( + ESMValCoreDeprecationWarning, + InvalidConfigParameter, +) from tests.integration.test_main import arguments -@contextlib.contextmanager -def environment(**kwargs): - """Temporary environment variables.""" - backup = deepcopy(os.environ) - os.environ = kwargs - yield - os.environ = backup - - def test_config_class(): config = { "log_level": "info", @@ -69,14 +62,17 @@ def test_config_init(): assert isinstance(config, MutableMapping) +# TODO: remove in v2.14.0 def test_load_from_file(monkeypatch): - default_config_file = Path(esmvalcore.__file__).parent / "config-user.yml" + default_config_file = DEFAULT_CONFIG_DIR / "config-user.yml" config = Config() assert not config - config.load_from_file(default_config_file) + with pytest.warns(ESMValCoreDeprecationWarning): + config.load_from_file(default_config_file) assert config +# TODO: remove in v2.14.0 def test_load_from_file_filenotfound(monkeypatch): """Test `Config.load_from_file`.""" config = Config() @@ -88,6 +84,7 @@ def test_load_from_file_filenotfound(monkeypatch): config.load_from_file("not_existent_file.yml") +# TODO: remove in v2.14.0 def test_load_from_file_invalidconfigparameter(monkeypatch, tmp_path): """Test `Config.load_from_file`.""" monkeypatch.chdir(tmp_path) @@ -111,23 +108,31 @@ def test_config_key_error(): config["invalid_key"] -def test_reload(): +def test_reload(cfg_default, monkeypatch, tmp_path): """Test `Config.reload`.""" - cfg_path = Path(esmvalcore.__file__).parent / "config-user.yml" - config = Config(config_file=cfg_path) - config.reload() - assert config["config_file"] == cfg_path + monkeypatch.setattr( + esmvalcore.config._config_object, + "USER_CONFIG_DIR", + tmp_path / "this" / "is" / "an" / "empty" / "dir", + ) + cfg = Config() + cfg.reload() -def test_reload_fail(): + assert cfg == cfg_default + + +def test_reload_fail(monkeypatch, tmp_path): """Test `Config.reload`.""" - config = Config() - msg = ( - "Cannot reload configuration, option 'config_file' is missing; make " - "sure to only use the `CFG` object from the `esmvalcore.config` module" + config_file = tmp_path / "invalid_config_file.yml" + config_file.write_text("invalid_option: 1") + monkeypatch.setattr( + esmvalcore.config._config_object, "USER_CONFIG_DIR", tmp_path ) - with pytest.raises(ValueError, match=msg): - config.reload() + cfg = Config() + + with pytest.raises(InvalidConfigParameter): + cfg.reload() def test_session(): @@ -146,6 +151,14 @@ def test_session_key_error(): session["invalid_key"] +# TODO: remove in v2.14.0 +def test_session_config_dir(): + session = Session({"config_file": "/path/to/config.yml"}) + with pytest.warns(ESMValCoreDeprecationWarning): + config_dir = session.config_dir + assert config_dir == Path("/path/to") + + TEST_GET_CFG_PATH = [ (None, None, None, "~/.esmvaltool/config-user.yml", False), ( @@ -158,7 +171,7 @@ def test_session_key_error(): ( None, None, - ("esmvaltool", "run", "--max-parallel-tasks=4"), + ("esmvaltool", "run", "--max_parallel_tasks=4"), "~/.esmvaltool/config-user.yml", True, ), @@ -264,6 +277,7 @@ def test_session_key_error(): ] +# TODO: remove in v2.14.0 @pytest.mark.parametrize( "filename,env,cli_args,output,env_var_set", TEST_GET_CFG_PATH ) @@ -271,21 +285,24 @@ def test_get_config_user_path( filename, env, cli_args, output, env_var_set, monkeypatch, tmp_path ): """Test `Config._get_config_user_path`.""" + monkeypatch.delenv("_ESMVALTOOL_USER_CONFIG_FILE_", raising=False) + # Create empty test file monkeypatch.chdir(tmp_path) (tmp_path / "existing_cfg.yml").write_text("") - if env is None: - env = {} - if cli_args is None: - cli_args = sys.argv - if output == "existing_cfg.yml": output = tmp_path / "existing_cfg.yml" else: output = Path(output).expanduser() - with environment(**env), arguments(*cli_args): + if env is not None: + for key, val in env.items(): + monkeypatch.setenv(key, val) + if cli_args is None: + cli_args = ["python"] + + with arguments(*cli_args): config_path = Config._get_config_user_path(filename) if env_var_set: assert os.environ["_ESMVALTOOL_USER_CONFIG_FILE_"] == str(output) @@ -295,6 +312,7 @@ def test_get_config_user_path( assert config_path == output +# TODO: remove in v2.14.0 def test_load_user_config_filenotfound(): """Test `Config._load_user_config`.""" expected_path = Path.home() / ".esmvaltool" / "not_existent_file.yml" @@ -303,6 +321,13 @@ def test_load_user_config_filenotfound(): Config._load_user_config("not_existent_file.yml") +# TODO: remove in v2.14.0 +def test_load_user_config_no_exception(): + """Test `Config._load_user_config`.""" + Config._load_user_config("not_existent_file.yml", raise_exception=False) + + +# TODO: remove in v2.14.0 def test_load_user_config_invalidconfigparameter(monkeypatch, tmp_path): """Test `Config._load_user_config`.""" monkeypatch.chdir(tmp_path) @@ -315,3 +340,177 @@ def test_load_user_config_invalidconfigparameter(monkeypatch, tmp_path): ) with pytest.raises(InvalidConfigParameter, match=msg): Config._load_user_config(cfg_path) + + +def test_get_user_config_dir_and_source_with_env(tmp_path, monkeypatch): + """Test `_get_user_config_dir` and `_get_user_config_source`.""" + monkeypatch.setenv("ESMVALTOOL_CONFIG_DIR", str(tmp_path)) + + config_dir = esmvalcore.config._config_object._get_user_config_dir() + config_src = esmvalcore.config._config_object._get_user_config_source() + + assert config_dir == tmp_path + assert config_src == "ESMVALTOOL_CONFIG_DIR environment variable" + + +def test_get_user_config_dir_and_source_no_env(tmp_path, monkeypatch): + """Test `_get_user_config_dir` and `_get_user_config_source`.""" + monkeypatch.delenv("ESMVALTOOL_CONFIG_DIR", raising=False) + + config_dir = esmvalcore.config._config_object._get_user_config_dir() + config_src = esmvalcore.config._config_object._get_user_config_source() + + assert config_dir == Path("~/.config/esmvaltool").expanduser() + assert config_src == "default user configuration directory" + + +def test_get_user_config_dir_with_env_fail(tmp_path, monkeypatch): + """Test `_get_user_config_dir` and `_get_user_config_source`.""" + empty_path = tmp_path / "this" / "does" / "not" / "exist" + monkeypatch.setenv("ESMVALTOOL_CONFIG_DIR", str(empty_path)) + + msg = ( + "Invalid configuration directory specified via ESMVALTOOL_CONFIG_DIR " + "environment variable:" + ) + with pytest.raises(NotADirectoryError, match=msg): + esmvalcore.config._config_object._get_user_config_dir() + + +# TODO: remove in v2.14.0 +def test_get_global_config_deprecated(mocker, tmp_path): + """Test ``_get_global_config``.""" + config_file = tmp_path / "old_config_user.yml" + config_file.write_text("output_dir: /new/output/dir") + mocker.patch.object( + esmvalcore.config._config_object.Config, + "_get_config_user_path", + return_value=config_file, + ) + with pytest.warns(ESMValCoreDeprecationWarning): + cfg = esmvalcore.config._config_object._get_global_config() + + assert cfg["output_dir"] == Path("/new/output/dir") + + +@pytest.mark.parametrize( + "dirs,output_file_type,rootpath", + [ + ([], "png", {"default": "~/climate_data"}), + (["/this/path/does/not/exist"], "png", {"default": "~/climate_data"}), + (["{tmp_path}/config1"], "1", {"default": "1", "1": "1"}), + ( + ["{tmp_path}/config1", "/this/path/does/not/exist"], + "1", + {"default": "1", "1": "1"}, + ), + ( + ["{tmp_path}/config1", "{tmp_path}/config2"], + "2b", + {"default": "2b", "1": "1", "2": "2b"}, + ), + ( + ["{tmp_path}/config2", "{tmp_path}/config1"], + "1", + {"default": "1", "1": "1", "2": "2b"}, + ), + ], +) +def test_load_from_dirs_always_default( + dirs, output_file_type, rootpath, tmp_path +): + """Test `Config.load_from_dirs`.""" + config1 = tmp_path / "config1" / "1.yml" + config2a = tmp_path / "config2" / "2a.yml" + config2b = tmp_path / "config2" / "2b.yml" + config1.parent.mkdir(parents=True, exist_ok=True) + config2a.parent.mkdir(parents=True, exist_ok=True) + config1.write_text( + dedent( + """ + output_file_type: '1' + rootpath: + default: '1' + '1': '1' + """ + ) + ) + config2a.write_text( + dedent( + """ + output_file_type: '2a' + rootpath: + default: '2a' + '2': '2a' + """ + ) + ) + config2b.write_text( + dedent( + """ + output_file_type: '2b' + rootpath: + default: '2b' + '2': '2b' + """ + ) + ) + + config_dirs = [] + for dir_ in dirs: + config_dirs.append(dir_.format(tmp_path=str(tmp_path))) + for name, path in rootpath.items(): + path = Path(path).expanduser().absolute() + rootpath[name] = [path] + + cfg = Config() + assert not cfg + + cfg.load_from_dirs(config_dirs) + + assert cfg["output_file_type"] == output_file_type + assert cfg["rootpath"] == rootpath + + +@pytest.mark.parametrize( + "cli_config_dir,output", + [ + (None, [DEFAULT_CONFIG_DIR, "~/.config/esmvaltool"]), + (Path("/c"), [DEFAULT_CONFIG_DIR, "~/.config/esmvaltool", "/c"]), + ], +) +def test_get_all_config_dirs(cli_config_dir, output, monkeypatch): + """Test `_get_all_config_dirs`.""" + monkeypatch.delenv("ESMVALTOOL_CONFIG_DIR", raising=False) + excepted = [] + for out in output: + excepted.append(Path(out).expanduser().absolute()) + + config_dirs = esmvalcore.config._config_object._get_all_config_dirs( + cli_config_dir + ) + + assert config_dirs == excepted + + +@pytest.mark.parametrize( + "cli_config_dir,output", + [ + (None, ["defaults", "default user configuration directory"]), + ( + Path("/c"), + [ + "defaults", + "default user configuration directory", + "command line argument", + ], + ), + ], +) +def test_get_all_config_sources(cli_config_dir, output, monkeypatch): + """Test `_get_all_config_sources`.""" + monkeypatch.delenv("ESMVALTOOL_CONFIG_DIR", raising=False) + config_srcs = esmvalcore.config._config_object._get_all_config_sources( + cli_config_dir + ) + assert config_srcs == output diff --git a/tests/unit/config/test_config_validator.py b/tests/unit/config/test_config_validator.py index 1a8283ce4b..eb2bad19cd 100644 --- a/tests/unit/config/test_config_validator.py +++ b/tests/unit/config/test_config_validator.py @@ -6,6 +6,7 @@ import esmvalcore from esmvalcore import __version__ as current_version +from esmvalcore.config import CFG from esmvalcore.config._config_validators import ( _handle_deprecation, _listify_validator, @@ -331,3 +332,11 @@ def test_validate_config_developer(tmp_path): # Restore original config-developer file validate_config_developer(None) + + +# TODO: remove in v2.14.0 +def test_extra_facets_dir_tuple_deprecated(monkeypatch): + """Test extra_facets_dir.""" + with pytest.warns(ESMValCoreDeprecationWarning): + monkeypatch.setitem(CFG, "extra_facets_dir", ("/extra/facets",)) + assert CFG["extra_facets_dir"] == [Path("/extra/facets")] diff --git a/tests/unit/config/test_esgf_pyclient.py b/tests/unit/config/test_esgf_pyclient.py index f23813bf71..4f71674b58 100644 --- a/tests/unit/config/test_esgf_pyclient.py +++ b/tests/unit/config/test_esgf_pyclient.py @@ -46,7 +46,7 @@ def test_read_config_file(monkeypatch, tmp_path): def test_read_v25_config_file(monkeypatch, tmp_path): """Test function read_config_file for v2.5 and earlier. - For v2.5 and earlier, the config-file contained a single `url` + For v2.5 and earlier, the ESGF config file contained a single `url` instead of a list of `urls` to specify the ESGF index node. """ cfg_file = tmp_path / "esgf-pyclient.yml" diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py deleted file mode 100644 index edc9340fb9..0000000000 --- a/tests/unit/conftest.py +++ /dev/null @@ -1,14 +0,0 @@ -import copy - -import pytest - -from esmvalcore.config import CFG -from esmvalcore.config._config_object import CFG_DEFAULT - - -@pytest.fixture -def session(tmp_path, monkeypatch): - for key, value in CFG_DEFAULT.items(): - monkeypatch.setitem(CFG, key, copy.deepcopy(value)) - monkeypatch.setitem(CFG, "output_dir", tmp_path / "esmvaltool_output") - return CFG.start_session("recipe_test") diff --git a/tests/unit/main/test_esmvaltool.py b/tests/unit/main/test_esmvaltool.py index b6a5b96599..e498cef670 100644 --- a/tests/unit/main/test_esmvaltool.py +++ b/tests/unit/main/test_esmvaltool.py @@ -8,11 +8,12 @@ import esmvalcore._main import esmvalcore._task import esmvalcore.config +import esmvalcore.config._config_object import esmvalcore.config._logging import esmvalcore.esgf from esmvalcore import __version__ from esmvalcore._main import HEADER, ESMValTool -from esmvalcore.exceptions import RecipeError +from esmvalcore.exceptions import InvalidConfigParameter, RecipeError LOGGER = logging.getLogger(__name__) @@ -22,9 +23,10 @@ def cfg(mocker, tmp_path): """Mock `esmvalcore.config.CFG`.""" session = mocker.MagicMock() - cfg_dict = {} + cfg_dict = {"resume_from": []} session.__getitem__.side_effect = cfg_dict.__getitem__ session.__setitem__.side_effect = cfg_dict.__setitem__ + session.update.side_effect = cfg_dict.update output_dir = tmp_path / "esmvaltool_output" session.session_dir = output_dir / "recipe_test" @@ -54,7 +56,7 @@ def session(cfg): ("check_level", "strict"), ], ) -def test_run_command_line_config(mocker, cfg, argument, value): +def test_run_command_line_config(mocker, cfg, argument, value, tmp_path): """Check that the configuration is updated from the command line.""" mocker.patch.object( esmvalcore.config, @@ -65,17 +67,19 @@ def test_run_command_line_config(mocker, cfg, argument, value): program = ESMValTool() recipe_file = "/path/to/recipe_test.yml" - config_file = "/path/to/config-user.yml" + config_dir = tmp_path / "config" + config_dir.mkdir(parents=True, exist_ok=True) mocker.patch.object(program, "_get_recipe", return_value=Path(recipe_file)) mocker.patch.object(program, "_run") - program.run(recipe_file, config_file, **{argument: value}) + program.run(recipe_file, config_dir=config_dir, **{argument: value}) - cfg.load_from_file.assert_called_with(config_file) cfg.start_session.assert_called_once_with(Path(recipe_file).stem) program._get_recipe.assert_called_with(recipe_file) - program._run.assert_called_with(program._get_recipe.return_value, session) + program._run.assert_called_with( + program._get_recipe.return_value, session, config_dir + ) assert session[argument] == value @@ -84,7 +88,6 @@ def test_run_command_line_config(mocker, cfg, argument, value): def test_run(mocker, session, search_esgf): session["search_esgf"] = search_esgf session["log_level"] = "default" - session["config_file"] = "/path/to/config-user.yml" session["remove_preproc_dir"] = True session["save_intermediary_cubes"] = False session.cmor_log.read_text.return_value = "WARNING: attribute not present" @@ -113,7 +116,7 @@ def test_run(mocker, session, search_esgf): create_autospec=True, ) - ESMValTool()._run(recipe, session=session) + ESMValTool()._run(recipe, session=session, cli_config_dir=None) # Check that the correct functions have been called esmvalcore.config._logging.configure_logging.assert_called_once_with( @@ -150,6 +153,36 @@ def test_run_session_dir_exists_alternative_fails(mocker, session): program._create_session_dir(session) +def test_run_missing_config_dir(tmp_path): + """Test `ESMValTool.run`.""" + config_dir = tmp_path / "path" / "does" / "not" / "exist" + program = ESMValTool() + + msg = ( + f"Invalid --config_dir given: {config_dir} is not an existing " + f"directory" + ) + with pytest.raises(NotADirectoryError, match=msg): + program.run("/recipe_dir/recipe_test.yml", config_dir=config_dir) + + +def test_run_invalid_config_dir(monkeypatch, tmp_path): + """Test `ESMValTool.run`.""" + monkeypatch.delitem( # TODO: remove in v2.14.0 + esmvalcore.config.CFG._mapping, "config_file", raising=False + ) + config_path = tmp_path / "config.yml" + config_path.write_text("invalid: option") + program = ESMValTool() + + msg = ( + rf"Failed to parse configuration directory {tmp_path} \(command line " + rf"argument\): `invalid` is not a valid config parameter." + ) + with pytest.raises(InvalidConfigParameter, match=msg): + program.run("/recipe_dir/recipe_test.yml", config_dir=tmp_path) + + def test_clean_preproc_dir(session): session.preproc_dir.mkdir(parents=True) session._fixed_file_dir.mkdir(parents=True) @@ -173,16 +206,25 @@ def test_do_not_clean_preproc_dir(session): @mock.patch("esmvalcore._main.entry_points") -def test_header(mock_entry_points, caplog): +def test_header(mock_entry_points, monkeypatch, tmp_path, caplog): + tmp_path.mkdir(parents=True, exist_ok=True) + monkeypatch.setattr( + esmvalcore.config._config_object, "USER_CONFIG_DIR", tmp_path + ) + monkeypatch.setattr( + esmvalcore.config._config_object, "USER_CONFIG_SOURCE", "SOURCE" + ) entry_point = mock.Mock() entry_point.dist.name = "MyEntry" entry_point.dist.version = "v42.42.42" entry_point.name = "Entry name" mock_entry_points.return_value = [entry_point] + cli_config_dir = tmp_path / "this" / "does" / "not" / "exist" + with caplog.at_level(logging.INFO): ESMValTool()._log_header( - "path_to_config_file", ["path_to_log_file1", "path_to_log_file2"], + cli_config_dir, ) assert len(caplog.messages) == 8 @@ -192,7 +234,13 @@ def test_header(mock_entry_points, caplog): assert caplog.messages[3] == f"ESMValCore: {__version__}" assert caplog.messages[4] == "MyEntry: v42.42.42" assert caplog.messages[5] == "----------------" - assert caplog.messages[6] == "Using config file path_to_config_file" + assert caplog.messages[6] == ( + f"Reading configuration files from:\n" + f"{Path(esmvalcore.__file__).parent}/config/configurations/defaults " + f"(defaults)\n" + f"{tmp_path} (SOURCE)\n" + f"{cli_config_dir} [NOT AN EXISTING DIRECTORY] (command line argument)" + ) assert caplog.messages[7] == ( "Writing program log files to:\n" "path_to_log_file1\n" diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index 66d23306ec..8408c622b9 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -9,7 +9,7 @@ import esmvalcore.dataset import esmvalcore.local from esmvalcore.cmor.check import CheckLevels -from esmvalcore.config import CFG +from esmvalcore.config import CFG, Session from esmvalcore.dataset import Dataset from esmvalcore.esgf import ESGFFile from esmvalcore.exceptions import InputFilesNotFound, RecipeError @@ -112,7 +112,7 @@ def test_session_setter(): ds.session - assert isinstance(ds.session, esmvalcore.config.Session) + assert isinstance(ds.session, Session) assert ds.session == ds.supplementaries[0].session