From 541ae51555bfddb62c1d5bf6b404d70075f742c8 Mon Sep 17 00:00:00 2001 From: Oliver Watt-Meyer Date: Thu, 9 Apr 2020 16:50:14 -0700 Subject: [PATCH] Add workflow to run model with mean nudging tendency (#215) * Add workflow runfile, configs, Makefile * Update runtime module * Fix fv3net runtime __init__ * Pin pandas in prognostic run to 1.0.1 * Update runfile and Makefile * Update experiment names in configs * Update Makefile * Remove workflow submit_job.py * Refactor nudge file handling to kube_jobs * Add tests for nudge file handling * Use common transfer_local_to_remote function * Add type hints to nudge_to_obs.py * Lint * Update configurations and Makefile to enable remote runs * Remove leftover debugging logging statement * Use common unique_tag() function * Change outdirs in Makefile * Update rule name in Makefile * Change run length to 91 days * Make layout 2,2 for nudge_mean_T * Make runfile work for multiple procs per tile * Add prepare_config.py script to simplify submission * Add get_fs and get_protocol to vcm.cloud.__init__.py * Fix Makefile * Make sure absolute paths are used for config_url * Update runfile for n_proc>6 * Set layout=2,2 in config * Rename dimensions as well * Load all timesteps of nudging data * Add submit_to_kubernetes script * Refactor runfile * Update scripts to allow local and remote runs * Make run length 91 days * Make flush_nc_files=True in namelist * Change nudging tendency year to model year instead of reverse * Update diagnostic to zarr pipeline * Add post-processing script * Lint * Add GFS analysis data to catalog.yml * Add back runtime get_runfile_config function * Add docstring * Add README.md * Add get_runfile_config back to runtime __init__ * Update postprocessing script * Address Jeremy PR comments * Rename nudging_tendency to mean_nudging_tendency * Update fv3config submdule to v0.3.1 * Use fv3config get_timestep and config_from_yaml * Address Noah PR comments * Update HISTORY.rst and workflow readme * Fix typo * Add quotes to filename_pattern in nudge config yamls * Update length of runs --- HISTORY.rst | 8 +- catalog.yml | 17 +++ docker/learned_nudging_run/Dockerfile | 13 ++ docker/learned_nudging_run/requirements.txt | 6 + external/fv3config | 2 +- external/vcm/vcm/cloud/__init__.py | 2 +- fv3net/pipelines/diagnostics_to_zarr.py | 20 +-- fv3net/pipelines/kube_jobs/__init__.py | 1 + fv3net/pipelines/kube_jobs/nudge_to_obs.py | 102 +++++++++++++++ fv3net/runtime/__init__.py | 2 +- fv3net/runtime/config.py | 10 +- tests/test_kube_jobs_nudge_to_obs.py | 57 +++++++++ workflows/diagnostics_to_zarr/README.md | 7 +- workflows/diagnostics_to_zarr/setup.py | 4 +- workflows/run_with_learned_nudging/Makefile | 28 +++++ workflows/run_with_learned_nudging/README.md | 22 ++++ .../fv3config_template.yml | 44 +++++++ .../mean_nudging_runfile.py | 119 ++++++++++++++++++ .../run_with_learned_nudging/postprocess.sh | 47 +++++++ .../prepare_config.py | 89 +++++++++++++ .../run_with_learned_nudging/submit_job.py | 62 +++++++++ workflows/single_fv3gfs_run/40day_nudged.yml | 3 + .../single_fv3gfs_run/free_GFS_SST_2015.yml | 3 + workflows/single_fv3gfs_run/long_nudged.yml | 3 + workflows/single_fv3gfs_run/nudge_T_2015.yml | 3 + .../single_fv3gfs_run/nudge_T_ps_2015.yml | 3 + .../single_fv3gfs_run/nudge_T_ps_u_v_2015.yml | 3 + workflows/single_fv3gfs_run/submit_job.py | 80 ++---------- 28 files changed, 665 insertions(+), 95 deletions(-) create mode 100644 docker/learned_nudging_run/Dockerfile create mode 100644 docker/learned_nudging_run/requirements.txt create mode 100644 fv3net/pipelines/kube_jobs/nudge_to_obs.py create mode 100644 tests/test_kube_jobs_nudge_to_obs.py create mode 100644 workflows/run_with_learned_nudging/Makefile create mode 100644 workflows/run_with_learned_nudging/README.md create mode 100644 workflows/run_with_learned_nudging/fv3config_template.yml create mode 100644 workflows/run_with_learned_nudging/mean_nudging_runfile.py create mode 100644 workflows/run_with_learned_nudging/postprocess.sh create mode 100644 workflows/run_with_learned_nudging/prepare_config.py create mode 100644 workflows/run_with_learned_nudging/submit_job.py diff --git a/HISTORY.rst b/HISTORY.rst index 70261cff95..c57b699a2b 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -10,13 +10,19 @@ latest * Fixed integration tests to use same version tags of the `fv3net` and `prognostic_run` images * Added makefile targets to submit integration tests to cluster from local machine and to get docker image names * Made simple step output directory names the default in the orchestrator +* Add `run_with_learned_nudging` workflow +* Update fv3config submodule to v0.3.1 +* Add `get_config()` function to fv3net.runtime +* Change API of `diagnostics_to_zarr` workflow so that it saves output zarrs in the given run directory +* Add `nudge_to_obs` module to `kube_jobs`, which helps with the configuration of FV3GFS model runs that are nudged towards GFS analysis * Add public function: vcm.convert_timestamps * Add pipeline to load C384 restart data into a zarr + 0.1.1 (2020-03-25) ------------------ * Updates to make end-to-end workflow work with fv3atm (fv3gfs-python:v0.3.1) -* Added bump2version for automated versioning of `fv3net` resources +* Added bump2version for automated versioning of `fv3net` resources * Add CircleCI build/push capabilities for `fv3net` images diff --git a/catalog.yml b/catalog.yml index 6f40acf219..cec4134365 100644 --- a/catalog.yml +++ b/catalog.yml @@ -106,6 +106,23 @@ sources: access: read_only urlpath: "gs://vcm-ml-data/2020-02-25-additional-november-C3072-simulation-C384-diagnostics/atmos_8xdaily_C3072_to_C384.zarr" + GFS_analysis_T85_2015_2016: + description: 4x daily GFS analysis data at approximately 2deg resolution, typically used for nudging FV3GFS. Spans 2015-01-01T00:00:00 to 2017-01-01T00:00:00. + driver: zarr + args: + storage_options: + project: 'vcm-ml' + access: read_only + urlpath: "gs://vcm-ml-data/2020-03-27-T85-GFS-nudging-data-as-zarr/nudging_data_2015-2016.zarr" + + GFS_analysis_T85_2015_2016_1M_mean: + description: Monthly mean GFS analysis data at approximately 2deg resolution. Spans 2015-01 to 2016-12. + driver: zarr + args: + storage_options: + project: 'vcm-ml' + access: read_only + urlpath: "gs://vcm-ml-data/2020-03-27-T85-GFS-nudging-data-as-zarr/nudging_data_mean_1M.zarr" ## Local Data Intake ## # TODO: Could this be replicated with intake caching? Or switch to an ignored file? diff --git a/docker/learned_nudging_run/Dockerfile b/docker/learned_nudging_run/Dockerfile new file mode 100644 index 0000000000..5ec9f03e04 --- /dev/null +++ b/docker/learned_nudging_run/Dockerfile @@ -0,0 +1,13 @@ +FROM us.gcr.io/vcm-ml/fv3gfs-python:v0.4.0 + + +COPY docker/learned_nudging_run/requirements.txt /tmp/requirements.txt +RUN pip3 install -r /tmp/requirements.txt + +# cache external package installation +COPY external/fv3config /fv3net/external/fv3config +COPY external/vcm /fv3net/external/vcm +RUN pip3 install -e /fv3net/external/vcm -e /fv3net/external/fv3config + +COPY . /fv3net +RUN pip3 install --no-deps -e /fv3net diff --git a/docker/learned_nudging_run/requirements.txt b/docker/learned_nudging_run/requirements.txt new file mode 100644 index 0000000000..646c32ec72 --- /dev/null +++ b/docker/learned_nudging_run/requirements.txt @@ -0,0 +1,6 @@ +scikit-learn==0.22.1 +dask +zarr +scikit-image +google-cloud-logging +pandas==1.0.1 diff --git a/external/fv3config b/external/fv3config index d41cf2a3e4..f9087baf66 160000 --- a/external/fv3config +++ b/external/fv3config @@ -1 +1 @@ -Subproject commit d41cf2a3e4a24c04a68aea6e963cd1c3f0f08ae9 +Subproject commit f9087baf663f15f69966a9f9236abf340d5c12ec diff --git a/external/vcm/vcm/cloud/__init__.py b/external/vcm/vcm/cloud/__init__.py index 04a7249663..b1594fe9e6 100644 --- a/external/vcm/vcm/cloud/__init__.py +++ b/external/vcm/vcm/cloud/__init__.py @@ -1,4 +1,4 @@ -from .fsspec import get_fs +from .fsspec import get_fs, get_protocol __all__ = [item for item in dir() if not item.startswith("_")] diff --git a/fv3net/pipelines/diagnostics_to_zarr.py b/fv3net/pipelines/diagnostics_to_zarr.py index 76248d82bd..7bef4a54e8 100644 --- a/fv3net/pipelines/diagnostics_to_zarr.py +++ b/fv3net/pipelines/diagnostics_to_zarr.py @@ -16,12 +16,11 @@ INITIAL_CHUNKS = {"time": 192} TILES = range(1, 7) COMMON_SUFFIX = ".tile1.nc" -DEFAULT_DIAGNOSTIC_DIR = "diagnostic_zarr" def run(args, pipeline_args): rundir = args.rundir - diagnostic_dir = _parse_diagnostic_dir(args.diagnostic_dir, rundir) + diagnostic_dir = rundir if args.diagnostic_dir is None else args.diagnostic_dir diagnostic_categories = _parse_categories(args.diagnostic_categories, rundir) logger.info(f"Diagnostic zarrs being written to {diagnostic_dir}") logger.info(f"Diagnostic categories to convert are {diagnostic_categories}") @@ -38,7 +37,7 @@ def run(args, pipeline_args): def open_convert_save(diagnostic_category, rundir, diagnostic_dir): - remote_zarr = os.path.join(diagnostic_dir, diagnostic_category) + remote_zarr = os.path.join(diagnostic_dir, f"{diagnostic_category}.zarr") with tempfile.TemporaryDirectory() as local_zarr: for tile in TILES: logger.info(f"Converting category {diagnostic_category} tile {tile}") @@ -63,13 +62,6 @@ def _parse_categories(diagnostic_categories, rundir): return diagnostic_categories -def _parse_diagnostic_dir(diagnostic_dir, rundir): - if diagnostic_dir is None: - return os.path.join(_get_parent_dir(rundir), DEFAULT_DIAGNOSTIC_DIR) - else: - return diagnostic_dir - - def _get_all_diagnostic_categories(rundir, fs): """ get full paths for all files in rundir that end in COMMON_SUFFIX """ full_paths = fs.glob(os.path.join(rundir, f"*{COMMON_SUFFIX}")) @@ -82,12 +74,6 @@ def _get_category_from_path(path): return basename[: -len(COMMON_SUFFIX)] -def _get_parent_dir(path): - if path[-1] == "/": - path = path[:-1] - return os.path.split(path)[0] - - def _get_fs(path): """Return the fsspec filesystem required to handle a given path.""" if path.startswith("gs://"): @@ -108,7 +94,7 @@ def _get_fs(path): "--diagnostic-dir", type=str, default=None, - help="Location to save zarr stores. Defaults to the parent of rundir.", + help="Location to save zarr stores. Defaults to rundir.", ) parser.add_argument( "--diagnostic-categories", diff --git a/fv3net/pipelines/kube_jobs/__init__.py b/fv3net/pipelines/kube_jobs/__init__.py index 245047e669..b19a984da3 100644 --- a/fv3net/pipelines/kube_jobs/__init__.py +++ b/fv3net/pipelines/kube_jobs/__init__.py @@ -5,3 +5,4 @@ update_nested_dict, get_base_fv3config, ) +from .nudge_to_obs import update_config_for_nudging diff --git a/fv3net/pipelines/kube_jobs/nudge_to_obs.py b/fv3net/pipelines/kube_jobs/nudge_to_obs.py new file mode 100644 index 0000000000..ba5cfbb1bc --- /dev/null +++ b/fv3net/pipelines/kube_jobs/nudge_to_obs.py @@ -0,0 +1,102 @@ +from datetime import datetime, timedelta +import os +import numpy as np +from typing import List, Mapping + +import fsspec +import fv3config + + +# this module assumes that analysis files are at 00Z, 06Z, 12Z and 18Z +SECONDS_IN_HOUR = 60 * 60 +NUDGE_HOURS = np.array([0, 6, 12, 18]) # hours at which analysis data is available +NUDGE_FILE_TARGET = "INPUT" # where to put analysis files in rundir + + +def _most_recent_nudge_time(start_time: datetime) -> datetime: + """Return datetime object for the last nudging time preceding or concurrent + with start_time""" + first_nudge_hour = _most_recent_hour(start_time.hour) + return datetime(start_time.year, start_time.month, start_time.day, first_nudge_hour) + + +def _most_recent_hour(current_hour, hour_array=NUDGE_HOURS) -> int: + """Return latest hour in hour_array that precedes or is concurrent with + current_hour""" + first_nudge_hour = hour_array[np.argmax(hour_array > current_hour) - 1] + return first_nudge_hour + + +def _get_nudge_time_list(config: Mapping) -> List[datetime]: + """Return list of datetime objects corresponding to times at which analysis files + are required for nudging for a given model run configuration""" + current_date = config["namelist"]["coupler_nml"]["current_date"] + start_time = datetime(*current_date) + first_nudge_time = _most_recent_nudge_time(start_time) + run_duration = fv3config.get_run_duration(config) + nudge_duration = run_duration + (start_time - first_nudge_time) + nudge_duration_hours = int( + np.ceil(nudge_duration.total_seconds() / SECONDS_IN_HOUR) + ) + nudge_interval = NUDGE_HOURS[1] - NUDGE_HOURS[0] + nudging_hours = range(0, nudge_duration_hours + nudge_interval, nudge_interval) + return [first_nudge_time + timedelta(hours=hour) for hour in nudging_hours] + + +def _get_nudge_filename_list(config: Mapping) -> List[str]: + """Return list of filenames of all nudging files required""" + nudge_filename_pattern = config["gfs_analysis_data"]["filename_pattern"] + time_list = _get_nudge_time_list(config) + return [time.strftime(nudge_filename_pattern) for time in time_list] + + +def _get_nudge_files_asset_list(config: Mapping) -> List[Mapping]: + """Return list of fv3config assets for all nudging files required for a given + model run configuration""" + nudge_url = config["gfs_analysis_data"]["url"] + return [ + fv3config.get_asset_dict(nudge_url, file, target_location=NUDGE_FILE_TARGET) + for file in _get_nudge_filename_list(config) + ] + + +def _get_nudge_files_description_asset(config: Mapping, config_url: str) -> Mapping: + """Return an fv3config asset pointing to the text file that the + model requires to describe the list of nudging files.""" + fname_list_filename = config["namelist"]["fv_nwp_nudge_nml"]["input_fname_list"] + return fv3config.get_asset_dict(config_url, fname_list_filename) + + +def _write_nudge_files_description(config: Mapping, url: str): + """Write a text file with list of all nudging files (which the + model requires to know what the nudging files are called).""" + fname_list_contents = "\n".join(_get_nudge_filename_list(config)) + with fsspec.open(url, "w") as f: + f.write(fname_list_contents) + + +def update_config_for_nudging(config: Mapping, config_url: str) -> Mapping: + """Add assets to config for all nudging files and for the text file listing + nudging files. This text file will be written to config_url. + + Args: + config: an fv3config configuration dictionary + config_url: path where text file describing nudging files will be written. + File will be written to {config_url}/{input_fname_list} where + input_fname_list is a namelist parameter in the fv_nwp_nudge_nml namelist + of config. + + Returns: + config dict updated to include all required nudging files + """ + nudge_files_description = _get_nudge_files_description_asset(config, config_url) + nudge_files_description_url = os.path.join( + nudge_files_description["source_location"], + nudge_files_description["source_name"], + ) + _write_nudge_files_description(config, nudge_files_description_url) + if "patch_files" not in config: + config["patch_files"] = [] + config["patch_files"].append(nudge_files_description) + config["patch_files"].extend(_get_nudge_files_asset_list(config)) + return config diff --git a/fv3net/runtime/__init__.py b/fv3net/runtime/__init__.py index 5cb26a73c1..991fdee2c7 100644 --- a/fv3net/runtime/__init__.py +++ b/fv3net/runtime/__init__.py @@ -1,3 +1,3 @@ from . import sklearn_interface as sklearn from .state_io import init_writers, append_to_writers, CF_TO_RESTART_MAP -from .config import get_runfile_config, get_namelist +from .config import get_runfile_config, get_config, get_namelist diff --git a/fv3net/runtime/config.py b/fv3net/runtime/config.py index 39fe1794be..0aa81cfbed 100644 --- a/fv3net/runtime/config.py +++ b/fv3net/runtime/config.py @@ -1,5 +1,8 @@ import yaml import f90nml +import fv3config + +FV3CONFIG_FILENAME = "fv3config.yml" class dotdict(dict): @@ -11,10 +14,15 @@ class dotdict(dict): def get_runfile_config(): - with open("fv3config.yml") as f: + with open(FV3CONFIG_FILENAME) as f: config = yaml.safe_load(f) return dotdict(config["scikit_learn"]) +def get_config(): + """Return fv3config dictionary""" + return fv3config.config_from_yaml(FV3CONFIG_FILENAME) + + def get_namelist(): return f90nml.read("input.nml") diff --git a/tests/test_kube_jobs_nudge_to_obs.py b/tests/test_kube_jobs_nudge_to_obs.py new file mode 100644 index 0000000000..f62e46fb5c --- /dev/null +++ b/tests/test_kube_jobs_nudge_to_obs.py @@ -0,0 +1,57 @@ +import pytest +from datetime import datetime + +from fv3net.pipelines.kube_jobs import nudge_to_obs + + +@pytest.mark.parametrize( + "start_time, expected", + [ + (datetime(2016, 1, 1), datetime(2016, 1, 1, 0)), + (datetime(2016, 1, 1, 1), datetime(2016, 1, 1, 0)), + (datetime(2016, 1, 1, 7), datetime(2016, 1, 1, 6)), + (datetime(2016, 1, 1, 12), datetime(2016, 1, 1, 12)), + (datetime(2016, 1, 2, 18, 1), datetime(2016, 1, 2, 18)), + ], +) +def test__get_first_nudge_file_time(start_time, expected): + assert nudge_to_obs._most_recent_nudge_time(start_time) == expected + + +@pytest.mark.parametrize( + "coupler_nml, expected_length, expected_first_datetime, expected_last_datetime", + [ + ( + {"current_date": [2016, 1, 1, 0, 0, 0], "days": 1}, + 4 + 1, + datetime(2016, 1, 1), + datetime(2016, 1, 2), + ), + ( + {"current_date": [2016, 1, 1, 0, 0, 0], "days": 1, "hours": 5}, + 4 + 1 + 1, + datetime(2016, 1, 1), + datetime(2016, 1, 2, 6), + ), + ( + {"current_date": [2016, 1, 1, 0, 0, 0], "days": 1, "hours": 7}, + 4 + 2 + 1, + datetime(2016, 1, 1), + datetime(2016, 1, 2, 12), + ), + ( + {"current_date": [2016, 1, 2, 1, 0, 0], "days": 1}, + 4 + 2, + datetime(2016, 1, 2), + datetime(2016, 1, 3, 6), + ), + ], +) +def test__get_nudge_time_list( + coupler_nml, expected_length, expected_first_datetime, expected_last_datetime +): + config = {"namelist": {"coupler_nml": coupler_nml}} + nudge_file_list = nudge_to_obs._get_nudge_time_list(config) + assert len(nudge_file_list) == expected_length + assert nudge_file_list[0] == expected_first_datetime + assert nudge_file_list[-1] == expected_last_datetime diff --git a/workflows/diagnostics_to_zarr/README.md b/workflows/diagnostics_to_zarr/README.md index ed594f17ea..75005c4882 100644 --- a/workflows/diagnostics_to_zarr/README.md +++ b/workflows/diagnostics_to_zarr/README.md @@ -1,7 +1,7 @@ ## Diagnostics-to-zarr workflow This workflow takes a path/url to a run directory as an input and saves zarr stores -of the diagnostic model output to a specified location. This workflow requires a -specific xarray version (0.14.0) and so to run locally, one must ensure your +of the diagnostic model output to a specified location. This workflow requires a +specific xarray version and so to run locally, one must ensure your environment is using that version. For dataflow jobs, a custom setup.py is provided which pins this exact version. @@ -17,8 +17,7 @@ optional arguments: --rundir RUNDIR Location of run directory. May be local or remote path. --diagnostic-dir DIAGNOSTIC_DIR - Location to save zarr stores. Defaults to the parent - of rundir. + Location to save zarr stores. Defaults to rundir. --diagnostic-categories DIAGNOSTIC_CATEGORIES [DIAGNOSTIC_CATEGORIES ...] Optionally specify one or more categories of diagnostic files. Provide part of filename before diff --git a/workflows/diagnostics_to_zarr/setup.py b/workflows/diagnostics_to_zarr/setup.py index f769f7842f..c3a9ce0b00 100644 --- a/workflows/diagnostics_to_zarr/setup.py +++ b/workflows/diagnostics_to_zarr/setup.py @@ -11,7 +11,7 @@ "numba", "scikit-image", "netCDF4", - "xarray==0.14.0", + "xarray==0.15.0", "partd", "pyyaml>=5.0", "xgcm", @@ -19,7 +19,7 @@ ] setup( - name="fv3net", + name="diags-to-zarr", packages=find_packages(), install_requires=dependencies, version="0.1.0", diff --git a/workflows/run_with_learned_nudging/Makefile b/workflows/run_with_learned_nudging/Makefile new file mode 100644 index 0000000000..bd3a88cded --- /dev/null +++ b/workflows/run_with_learned_nudging/Makefile @@ -0,0 +1,28 @@ +IMAGE = us.gcr.io/vcm-ml/learned_nudging_run:v0.1.1 +RUNFILE = mean_nudging_runfile.py +TEMPLATE = fv3config_template.yml +LOCAL_OUTDIR = rundir/nudge_mean_$* +LOCAL_CONFIGDIR = $(LOCAL_OUTDIR)/config +LOCAL_FV3CONFIG = $(LOCAL_CONFIGDIR)/fv3config.yml +GCS_OUTDIR = gs://vcm-ml-data/2020-03-30-learned-nudging-FV3GFS-runs/nudge_mean_$* +GCS_CONFIGDIR = $(GCS_OUTDIR)/config +GCS_FV3CONFIG = $(GCS_CONFIGDIR)/fv3config.yml + +run_all_remote: run_remote_T run_remote_T_ps run_remote_T_ps_u_v + +run_remote_%: prepare_remote_% + python submit_job.py --dockerimage $(IMAGE) --runfile $(RUNFILE) $(GCS_FV3CONFIG) $(GCS_OUTDIR) + +run_local_%: prepare_local_% + fv3run --dockerimage $(IMAGE) --runfile $(RUNFILE) $(LOCAL_FV3CONFIG) $(LOCAL_OUTDIR) + +prepare_remote_%: + python prepare_config.py $(TEMPLATE) $* $(GCS_CONFIGDIR) + +prepare_local_%: + python prepare_config.py $(TEMPLATE) $* $(LOCAL_CONFIGDIR) + +clean: + rm -rf rundir configdir + +.PHONY: run_all_remote diff --git a/workflows/run_with_learned_nudging/README.md b/workflows/run_with_learned_nudging/README.md new file mode 100644 index 0000000000..bc3d019f88 --- /dev/null +++ b/workflows/run_with_learned_nudging/README.md @@ -0,0 +1,22 @@ +## Run with learned nudging workflow + +This workflow (in `workflows/run_with_learned_nudging`) allows an external nudging +tendency be applied to FV3GFS runs. +There are configurations implemented to apply monthly mean nudging tendency of +temperature; temperature and pressure thickness; and finally temperature, +pressure thickness and horizontal winds. See Makefile for examples of how to +submit the jobs. + +This workflow uses output from long nudged simulations generated with the +`single_fv3gfs_runs` workflow. This dependency is in the `prepare_config.py` +script which hard-codes in the location of zarr stores of monthly mean nudging +tendencies from the afore-mentioned long nudged simulations. + +The `postprocess.sh` script must be called from the root of the fv3net repository. +Furthermore, it must be manually called after the jobs initiated by +`make run_all_remote` finish. Those jobs can be monitored by `kubectl get pods`. +For more detailed info, try `kubectl describe job ` where the +``s will be printed to the console by `make run_all_remote`. + +The docker image for this workflow can be built by calling +`make build_image_learned_nudging_run` in the root of the `fv3net` repo. diff --git a/workflows/run_with_learned_nudging/fv3config_template.yml b/workflows/run_with_learned_nudging/fv3config_template.yml new file mode 100644 index 0000000000..e1b1d06958 --- /dev/null +++ b/workflows/run_with_learned_nudging/fv3config_template.yml @@ -0,0 +1,44 @@ +runtime: + nudging_zarr_url: placeholder + variables_to_nudge: placeholder +diag_table: gs://vcm-fv3config/config/diag_table/control_run/v1.0/diag_table +experiment_name: template +forcing: gs://vcm-fv3config/data/base_forcing/v1.1/ +initial_conditions: gs://vcm-ml-data/2019-12-03-C48-20160101.00Z_IC +gfs_analysis_data: + url: gs://vcm-ml-data/2019-12-02-year-2016-T85-nudging-data + filename_pattern: '%Y%m%d_%HZ_T85LR.nc' +namelist: + atmos_model_nml: + fhout: 1.0 + fhmax: 10000 + coupler_nml: + current_date: + - 2016 + - 1 + - 1 + - 0 + - 0 + - 0 + days: 91 + minutes: 0 + seconds: 0 + dt_atmos: 900 # seconds + dt_ocean: 900 # seconds + fv_core_nml: + nudge: true # only doing this in order to be able to use GFS analysis SST + layout: + - 2 + - 2 + gfs_physics_nml: + fhzero: 1.0 + use_analysis_sst: true + fv_nwp_nudge_nml: + input_fname_list: nudging_file_list + nudge_hght: false + nudge_ps: false + nudge_virt: false + nudge_winds: false + track_file_name: No_File_specified + diag_manager_nml: + flush_nc_files: true diff --git a/workflows/run_with_learned_nudging/mean_nudging_runfile.py b/workflows/run_with_learned_nudging/mean_nudging_runfile.py new file mode 100644 index 0000000000..1a521970ca --- /dev/null +++ b/workflows/run_with_learned_nudging/mean_nudging_runfile.py @@ -0,0 +1,119 @@ +import logging +import numpy as np + +import fv3gfs +import fv3config +import fv3util +from mpi4py import MPI +from fv3net import runtime +import fsspec +import xarray as xr +import cftime + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +CF_TO_NUDGE = { + "air_temperature": "t_dt_nudge", + "specific_humidity": "q_dt_nudge", + "pressure_thickness_of_atmospheric_layer": "delp_dt_nudge", + "eastward_wind_after_physics": "u_dt_nudge", + "northward_wind_after_physics": "v_dt_nudge", +} + +DIMENSION_RENAME_DICT = {"grid_xt": "x", "grid_yt": "y", "pfull": "z"} + + +def _ensure_Julian(date): + return cftime.DatetimeJulian( + date.year, date.month, date.day, date.hour, date.minute, date.second + ) + + +def get_current_nudging_tendency(nudging_tendency, nudging_time, model_time): + """Get nudging tendencies for timestep in nudging_tendency dataset closest to + current model_time. Returns a dict of ndarrays.""" + model_year = model_time.year + nudging_time.values = [t.replace(year=model_year) for t in nudging_time.values] + model_time_Julian = _ensure_Julian(model_time) + time_index = np.argmin(np.abs(nudging_time - model_time_Julian)).values.item() + variables = nudging_tendency.keys() + return {var: nudging_tendency[var].sel(time=time_index) for var in variables} + + +def apply_nudging_tendency(state, nudging_tendency, dt): + for variable in nudging_tendency: + state[variable].view[:] += nudging_tendency[variable] * dt.total_seconds() + + +def load_mean_nudging_tendency(url, communicator, variables): + """Given url to zarr store of nudging tendencies, load and scatter""" + rename_dict = {CF_TO_NUDGE[var]: var for var in variables} + rename_dict.update(DIMENSION_RENAME_DICT) + mean_nudging_tendency = {} + rank = communicator.rank + tile = communicator.partitioner.tile_index(rank) + if communicator.tile.rank == 0: + logger.info(f"Loading tile-{tile} nudging tendencies on rank {rank}") + mapper = fsspec.get_mapper(url) + ds_nudging = xr.open_zarr(mapper).isel(tile=tile) + ds_nudging = ds_nudging.rename(rename_dict)[variables].load() + # convert to Quantities so we can use scatter_state + mean_nudging_tendency = { + variable: fv3util.Quantity.from_data_array(ds_nudging[variable]) + for variable in variables + } + mean_nudging_tendency = communicator.tile.scatter_state(mean_nudging_tendency) + # the following handles a bug in fv3gfs-python. See #54 of fv3gfs-python. + while "time" in mean_nudging_tendency: + mean_nudging_tendency.pop("time") + return mean_nudging_tendency + + +def load_time(url): + mapper = fsspec.get_mapper(url) + return xr.open_zarr(mapper)["time"].load() + + +if __name__ == "__main__": + config = runtime.get_config() + nudging_zarr_url = config["runtime"]["nudging_zarr_url"] + variables_to_nudge = config["runtime"]["variables_to_nudge"] + dt = fv3config.get_timestep(config) + communicator = fv3gfs.CubedSphereCommunicator( + MPI.COMM_WORLD, fv3gfs.CubedSpherePartitioner.from_namelist(config["namelist"]) + ) + rank = communicator.rank + if rank == 0: + logger.info(f"Nudging following variables: {variables_to_nudge}") + logger.info(f"Using nudging tendencies from: {nudging_zarr_url}") + mean_nudging_tendency = load_mean_nudging_tendency( + nudging_zarr_url, communicator, variables_to_nudge + ) + mean_nudging_time_coord = load_time(nudging_zarr_url) + fv3gfs.initialize() + for i in range(fv3gfs.get_step_count()): + do_logging = rank == 0 and i % 10 == 0 + + if do_logging: + logger.info(f"Stepping dynamics for timestep {i}") + fv3gfs.step_dynamics() + + if do_logging: + logger.info(f"Computing physics routines for timestep {i}") + fv3gfs.compute_physics() + + if do_logging: + logger.info(f"Adding nudging tendency for timestep {i}") + state = fv3gfs.get_state(names=["time"] + variables_to_nudge) + current_tendency = get_current_nudging_tendency( + mean_nudging_tendency, mean_nudging_time_coord, state["time"] + ) + apply_nudging_tendency(state, current_tendency, dt) + fv3gfs.set_state(state) + + if do_logging: + logger.info(f"Updating atmospheric prognostic state for timestep {i}") + fv3gfs.apply_physics() + + fv3gfs.cleanup() diff --git a/workflows/run_with_learned_nudging/postprocess.sh b/workflows/run_with_learned_nudging/postprocess.sh new file mode 100644 index 0000000000..090cfcabe3 --- /dev/null +++ b/workflows/run_with_learned_nudging/postprocess.sh @@ -0,0 +1,47 @@ +#!/bin/bash + +# this script orchestrates two workflows to convert diagnostic netCDFs to zarr stores +# and to regrid certain variables to a regular lat-lon grid + +# it must be called from root of fv3net repo + +DO_REGRID=true +DO_TO_ZARR=true + +# constants +ROOT_URL=gs://vcm-ml-data/2020-03-30-learned-nudging-FV3GFS-runs +LATLON_VAR_LIST=DLWRFsfc,DSWRFsfc,DSWRFtoa,LHTFLsfc,PRATEsfc,SHTFLsfc,ULWRFsfc,ULWRFtoa,USWRFsfc,USWRFtoa,TMP2m,TMPsfc,SOILM,PRESsfc,ucomp,vcomp,temp,sphum,ps_dt_nudge,delp_dt_nudge,u_dt_nudge,v_dt_nudge,t_dt_nudge +ARGO_CLUSTER=gke_vcm-ml_us-central1-c_ml-cluster-dev + +# what experiments to do post-processing for +RUNS="nudge_mean_T nudge_mean_T_ps nudge_mean_T_ps_u_v" + +if [ "$DO_REGRID" = true ]; then + for RUN in $RUNS; do + # regrid certain monthly-mean variables to lat-lon grid + argo --cluster $ARGO_CLUSTER submit workflows/fregrid_cube_netcdfs/pipeline.yaml \ + -p source_prefix=$ROOT_URL/$RUN/atmos_monthly \ + -p output_bucket=$ROOT_URL/$RUN/atmos_monthly.latlon.nc \ + -p fields=$LATLON_VAR_LIST \ + -p extra_args="--nlat 90 --nlon 180" + done +fi + +if [ "$DO_TO_ZARR" = true ]; then + for RUN in $RUNS; do + # convert diagnostic output to zarr stores + python -m fv3net.pipelines.diagnostics_to_zarr \ + --rundir $ROOT_URL/$RUN \ + --job_name diags-to-zarr-$(uuidgen) \ + --project vcm-ml \ + --region us-central1 \ + --runner DataflowRunner \ + --setup_file workflows/diagnostics_to_zarr/setup.py \ + --temp_location gs://vcm-ml-data/tmp_dataflow \ + --num_workers 1 \ + --max_num_workers 5 \ + --disk_size_gb 500 \ + --worker_machine_type n1-highmem-16 \ + --extra_package external/vcm/dist/vcm-0.1.0.tar.gz & + done +fi diff --git a/workflows/run_with_learned_nudging/prepare_config.py b/workflows/run_with_learned_nudging/prepare_config.py new file mode 100644 index 0000000000..1d8d3dbf26 --- /dev/null +++ b/workflows/run_with_learned_nudging/prepare_config.py @@ -0,0 +1,89 @@ +import argparse +import logging +import os +import fsspec +import yaml +from fv3net.pipelines.kube_jobs import ( + get_base_fv3config, + update_nested_dict, + update_config_for_nudging, +) +import vcm + +logger = logging.getLogger("run_jobs") + +BUCKET = "gs://vcm-ml-data/2020-01-29-baseline-FV3GFS-runs" +CATEGORY = "diagnostic_zarr/atmos_monthly" + +NUDGING_TENDENCY_URL = { + "T": os.path.join(BUCKET, "nudged-T-2015-C48-npz63-fv3atm", CATEGORY), + "T_ps": os.path.join(BUCKET, "nudged-T-ps-2015-C48-npz63-fv3atm", CATEGORY), + "T_ps_u_v": os.path.join(BUCKET, "nudged-T-ps-u-v-2015-C48-npz63-fv3atm", CATEGORY), +} + +VARIABLES_TO_NUDGE = { + "T": ["air_temperature"], + "T_ps": ["air_temperature", "pressure_thickness_of_atmospheric_layer"], + "T_ps_u_v": [ + "air_temperature", + "pressure_thickness_of_atmospheric_layer", + "eastward_wind_after_physics", + "northward_wind_after_physics", + ], +} + +EXPERIMENT_NAME = { + "T": "nudge-mean-t", + "T_ps": "nudge-mean-t-ps", + "T_ps_u_v": "nudge-mean-t-ps-u-v", +} + + +def prepare_config(template, base_config, nudge_label, config_url): + """Get config objects for current job and upload as necessary""" + config = update_nested_dict(base_config, template) + config["runtime"]["nudging_zarr_url"] = NUDGING_TENDENCY_URL[nudge_label] + config["runtime"]["variables_to_nudge"] = VARIABLES_TO_NUDGE[nudge_label] + config["experiment_name"] = EXPERIMENT_NAME[nudge_label] + if config["namelist"]["fv_core_nml"].get("nudge", False): + config = update_config_for_nudging(config, config_url) + return config + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + parser = argparse.ArgumentParser() + parser.add_argument( + "config_template", type=str, help="Path to fv3config yaml template.", + ) + parser.add_argument( + "nudge_label", + type=str, + help="Label for variables to nudge. Must be one of 'T', 'T_ps' or 'T_ps_u_v'.", + ) + parser.add_argument( + "config_url", + type=str, + help="Local or remote location where config files will be saved for later use.", + ) + parser.add_argument( + "--config-version", + type=str, + required=False, + default="v0.3", + help="Default fv3config.yml version to use as the base configuration. " + "This should be consistent with the fv3gfs-python version in the specified " + "docker image.", + ) + args = parser.parse_args() + config_url = args.config_url + base_config = get_base_fv3config(args.config_version) + with open(args.config_template) as f: + template = yaml.load(f, Loader=yaml.FullLoader) + if vcm.cloud.get_protocol(config_url) == "file": + config_url = os.path.abspath(config_url) + config = prepare_config(template, base_config, args.nudge_label, config_url) + fs = vcm.cloud.get_fs(config_url) + fs.mkdirs(config_url, exist_ok=True) + with fsspec.open(os.path.join(config_url, "fv3config.yml"), "w") as f: + yaml.safe_dump(config, f) diff --git a/workflows/run_with_learned_nudging/submit_job.py b/workflows/run_with_learned_nudging/submit_job.py new file mode 100644 index 0000000000..4e0317e13e --- /dev/null +++ b/workflows/run_with_learned_nudging/submit_job.py @@ -0,0 +1,62 @@ +import argparse +import logging +import os +import fsspec +import yaml +import fv3config +from fv3net.pipelines.common import get_alphanumeric_unique_tag +import vcm + +logger = logging.getLogger("run_jobs") + + +def _get_cpu_count_required(config): + layout = config["namelist"]["fv_core_nml"]["layout"] + return 6 * layout[0] * layout[1] + + +def _get_jobname(config): + experiment_name = config["experiment_name"] + unique_tag = get_alphanumeric_unique_tag(8) + return f"{experiment_name}-{unique_tag}" + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + parser = argparse.ArgumentParser() + parser.add_argument( + "config", type=str, help="Path to fv3config yaml.", + ) + parser.add_argument( + "outdir", type=str, help="Remote url where output will be saved.", + ) + parser.add_argument( + "--dockerimage", + type=str, + required=False, + default="us.gcr.io/vcm-ml/fv3gfs-python", + ) + parser.add_argument( + "--runfile", type=str, required=False, default=None, + ) + args = parser.parse_args() + with fsspec.open(args.config) as f: + config = yaml.load(f, Loader=yaml.FullLoader) + cpu_count_required = _get_cpu_count_required(config) + jobname = _get_jobname(config) + fs = vcm.cloud.get_fs(args.outdir) + runfile = args.runfile + if runfile is not None: + remote_runfile = os.path.join(args.outdir, "config", "runfile.py") + fs.put(runfile, remote_runfile) + runfile = remote_runfile + fv3config.run_kubernetes( + args.config, + args.outdir, + args.dockerimage, + runfile=runfile, + jobname=jobname, + memory_gb=15, + cpu_count=cpu_count_required, + ) + logger.info(f"Submitted {jobname}") diff --git a/workflows/single_fv3gfs_run/40day_nudged.yml b/workflows/single_fv3gfs_run/40day_nudged.yml index 2f8fb183a7..6147bd4d53 100644 --- a/workflows/single_fv3gfs_run/40day_nudged.yml +++ b/workflows/single_fv3gfs_run/40day_nudged.yml @@ -6,6 +6,9 @@ fv3config: diag_table: diag_table_long_nudged experiment_name: nudged-40day initial_conditions: gfs_example + gfs_analysis_data: + url: gs://vcm-ml-data/2019-12-02-year-2016-T85-nudging-data + filename_pattern: '%Y%m%d_%HZ_T85LR.nc' namelist: atmos_model_nml: fhout: 1.0 diff --git a/workflows/single_fv3gfs_run/free_GFS_SST_2015.yml b/workflows/single_fv3gfs_run/free_GFS_SST_2015.yml index 3f037040ac..7420c47fbb 100644 --- a/workflows/single_fv3gfs_run/free_GFS_SST_2015.yml +++ b/workflows/single_fv3gfs_run/free_GFS_SST_2015.yml @@ -6,6 +6,9 @@ fv3config: diag_table: diag_table_long_nudged experiment_name: free-gfs-sst initial_conditions: gs://vcm-ml-data/2020-02-10-C48-20150101.00Z_IC + gfs_analysis_data: + url: gs://vcm-ml-data/2019-12-02-year-2016-T85-nudging-data + filename_pattern: '%Y%m%d_%HZ_T85LR.nc' namelist: atmos_model_nml: fhout: 1.0 diff --git a/workflows/single_fv3gfs_run/long_nudged.yml b/workflows/single_fv3gfs_run/long_nudged.yml index ce9c2f12a0..abbcfc9620 100644 --- a/workflows/single_fv3gfs_run/long_nudged.yml +++ b/workflows/single_fv3gfs_run/long_nudged.yml @@ -6,6 +6,9 @@ fv3config: diag_table: diag_table_long_nudged experiment_name: nudged-2016 initial_conditions: gs://vcm-ml-data/2019-12-03-C48-20160101.00Z_IC + gfs_analysis_data: + url: gs://vcm-ml-data/2019-12-02-year-2016-T85-nudging-data + filename_pattern: '%Y%m%d_%HZ_T85LR.nc' namelist: atmos_model_nml: fhout: 1.0 diff --git a/workflows/single_fv3gfs_run/nudge_T_2015.yml b/workflows/single_fv3gfs_run/nudge_T_2015.yml index 50078142af..6b6df94022 100644 --- a/workflows/single_fv3gfs_run/nudge_T_2015.yml +++ b/workflows/single_fv3gfs_run/nudge_T_2015.yml @@ -6,6 +6,9 @@ fv3config: diag_table: diag_table_long_nudged experiment_name: nudgetemp initial_conditions: gs://vcm-ml-data/2020-02-10-C48-20150101.00Z_IC + gfs_analysis_data: + url: gs://vcm-ml-data/2019-12-02-year-2016-T85-nudging-data + filename_pattern: '%Y%m%d_%HZ_T85LR.nc' namelist: atmos_model_nml: fhout: 1.0 diff --git a/workflows/single_fv3gfs_run/nudge_T_ps_2015.yml b/workflows/single_fv3gfs_run/nudge_T_ps_2015.yml index b3dfd696c0..bd6e18c2b1 100644 --- a/workflows/single_fv3gfs_run/nudge_T_ps_2015.yml +++ b/workflows/single_fv3gfs_run/nudge_T_ps_2015.yml @@ -6,6 +6,9 @@ fv3config: diag_table: diag_table_long_nudged experiment_name: nudgetempps initial_conditions: gs://vcm-ml-data/2020-02-10-C48-20150101.00Z_IC + gfs_analysis_data: + url: gs://vcm-ml-data/2019-12-02-year-2016-T85-nudging-data + filename_pattern: '%Y%m%d_%HZ_T85LR.nc' namelist: atmos_model_nml: fhout: 1.0 diff --git a/workflows/single_fv3gfs_run/nudge_T_ps_u_v_2015.yml b/workflows/single_fv3gfs_run/nudge_T_ps_u_v_2015.yml index dfff229612..79f48f44d2 100644 --- a/workflows/single_fv3gfs_run/nudge_T_ps_u_v_2015.yml +++ b/workflows/single_fv3gfs_run/nudge_T_ps_u_v_2015.yml @@ -6,6 +6,9 @@ fv3config: diag_table: diag_table_long_nudged experiment_name: nudgetemppsuv initial_conditions: gs://vcm-ml-data/2020-02-10-C48-20150101.00Z_IC + gfs_analysis_data: + url: gs://vcm-ml-data/2019-12-02-year-2016-T85-nudging-data + filename_pattern: '%Y%m%d_%HZ_T85LR.nc' namelist: atmos_model_nml: fhout: 1.0 diff --git a/workflows/single_fv3gfs_run/submit_job.py b/workflows/single_fv3gfs_run/submit_job.py index cdbeaa12a6..5b19456fc2 100644 --- a/workflows/single_fv3gfs_run/submit_job.py +++ b/workflows/single_fv3gfs_run/submit_job.py @@ -1,12 +1,16 @@ import argparse -from datetime import datetime, timedelta import logging import os -import uuid import fsspec import yaml import fv3config -from fv3net.pipelines.kube_jobs import get_base_fv3config, update_nested_dict +from fv3net.pipelines.kube_jobs import ( + get_base_fv3config, + transfer_local_to_remote, + update_nested_dict, + update_config_for_nudging, +) +from fv3net.pipelines.common import get_alphanumeric_unique_tag logger = logging.getLogger("run_jobs") @@ -20,71 +24,12 @@ "image_pull_policy": "Always", } -HOURS_IN_DAY = 24 -NUDGE_INTERVAL = 6 # hours -NUDGE_FILENAME_PATTERN = "%Y%m%d_%HZ_T85LR.nc" -NUDGE_BUCKET = "gs://vcm-ml-data/2019-12-02-year-2016-T85-nudging-data" - def get_kubernetes_config(config_update): - """Get default kubernetes config and updatedwith provided config_update""" + """Get default kubernetes config and update with provided config_update""" return update_nested_dict(KUBERNETES_CONFIG_DEFAULT, config_update) -def _upload_if_necessary(path, bucket_url): - if not path.startswith("gs://"): - remote_path = os.path.join(bucket_url, os.path.basename(path)) - fsspec.filesystem("gs").put(path, remote_path) - path = remote_path - return path - - -def _get_nudge_file_list(config): - """Return python list of filenames of all nudging files required""" - start_date = _get_0z_start_date(config) - run_duration = fv3config.get_run_duration(config) - run_duration_hours = run_duration.days * HOURS_IN_DAY - nudging_hours = range(0, run_duration_hours + NUDGE_INTERVAL, NUDGE_INTERVAL) - time_list = [start_date + timedelta(hours=hour) for hour in nudging_hours] - return [time.strftime(NUDGE_FILENAME_PATTERN) for time in time_list] - - -def _get_0z_start_date(config): - """Return datetime object for 00:00:00 on current_date""" - current_date = config["namelist"]["coupler_nml"]["current_date"] - return datetime(current_date[0], current_date[1], current_date[2]) - - -def _get_nudge_files_asset_list(config): - """Return list of fv3config assets for all nudging files required""" - return [ - fv3config.get_asset_dict(NUDGE_BUCKET, file, target_location="INPUT") - for file in _get_nudge_file_list(config) - ] - - -def _get_and_write_nudge_files_description_asset(config, config_bucket): - """Write a text file with list of all nudging files required (which the - model requires to know what the nudging files are called) and return an fv3config - asset pointing to this text file.""" - input_fname_list = config["namelist"]["fv_nwp_nudge_nml"]["input_fname_list"] - with fsspec.open(os.path.join(config_bucket, input_fname_list), "w") as remote_file: - remote_file.write("\n".join(_get_nudge_file_list(config))) - return fv3config.get_asset_dict(config_bucket, input_fname_list) - - -def _update_config_for_nudging(model_config, config_bucket): - """Add assets to config for all nudging files and for the text file - listing nudging files""" - if "patch_files" not in model_config: - model_config["patch_files"] = [] - model_config["patch_files"].append( - _get_and_write_nudge_files_description_asset(model_config, config_bucket) - ) - model_config["patch_files"].extend(_get_nudge_files_asset_list(model_config)) - return model_config - - def _get_and_upload_run_config(bucket, run_config, base_model_config): """Get config objects for current job and upload as necessary""" config_bucket = os.path.join(bucket, "config") @@ -93,14 +38,14 @@ def _get_and_upload_run_config(bucket, run_config, base_model_config): # if necessary, upload runfile and diag_table. In future, this should be # replaced with an fv3config function to do the same for all elements of config if kubernetes_config["runfile"] is not None: - kubernetes_config["runfile"] = _upload_if_necessary( + kubernetes_config["runfile"] = transfer_local_to_remote( kubernetes_config["runfile"], config_bucket ) - model_config["diag_table"] = _upload_if_necessary( + model_config["diag_table"] = transfer_local_to_remote( model_config["diag_table"], config_bucket ) if model_config["namelist"]["fv_core_nml"].get("nudge", False): - model_config = _update_config_for_nudging(model_config, config_bucket) + model_config = update_config_for_nudging(model_config, config_bucket) with fsspec.open(os.path.join(config_bucket, "fv3config.yml"), "w") as config_file: config_file.write(yaml.dump(model_config)) return {"kubernetes": kubernetes_config, "fv3config": model_config}, config_bucket @@ -110,7 +55,8 @@ def submit_job(bucket, run_config, base_model_config): run_config, config_bucket = _get_and_upload_run_config( bucket, run_config, base_model_config ) - job_name = run_config["fv3config"]["experiment_name"] + f".{uuid.uuid4()}" + tag = get_alphanumeric_unique_tag(8) + job_name = run_config["fv3config"]["experiment_name"] + f"-{tag}" run_config["kubernetes"]["jobname"] = job_name fv3config.run_kubernetes( os.path.join(config_bucket, "fv3config.yml"),