Skip to content

Commit

Permalink
Feature #2219 SeriesAnalysis multiple input files (#2408)
Browse files Browse the repository at this point in the history
  • Loading branch information
georgemccabe authored Nov 9, 2023
1 parent 7382be7 commit 91beb24
Show file tree
Hide file tree
Showing 4 changed files with 144 additions and 47 deletions.
15 changes: 15 additions & 0 deletions internal/tests/pytests/util/time_util/test_time_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,3 +213,18 @@ def test_ti_calculate(input_dict, expected_time_info):
@pytest.mark.util
def test_ti_get_seconds_from_relativedelta(lead, valid_time, expected_val):
assert time_util.ti_get_seconds_from_relativedelta(lead, valid_time) == expected_val

@pytest.mark.parametrize(
'time_info, expected_result', [
({}, False),
({'init': datetime(2023, 1, 1), 'valid': datetime(2023, 1, 2), 'lead': relativedelta(days=1)}, True),
({'init': '*', 'valid': datetime(2023, 1, 2), 'lead': relativedelta(days=1)}, False),
({'init': datetime(2023, 1, 1), 'valid': '*', 'lead': relativedelta(days=1)}, False),
({'init': datetime(2023, 1, 1), 'valid': datetime(2023, 1, 2), 'lead': '*'}, False),
({'init': datetime(2023, 1, 1), 'lead': relativedelta(days=1)}, False),
({'init': datetime(2023, 1, 1)}, False),
]
)
@pytest.mark.util
def test_is_single_run_time(time_info, expected_result):
assert time_util.is_single_run_time(time_info) == expected_result
4 changes: 2 additions & 2 deletions metplus/util/string_manip.py
Original file line number Diff line number Diff line change
Expand Up @@ -619,7 +619,7 @@ def log_terminal_includes_info(config):
@returns True if log level is set to include INFO messages. False if not.
"""
log_terminal_level = logging.getLevelName(
config.getstr('config', 'LOG_LEVEL_TERMINAL',
config.getstr('runtime', 'LOG_LEVEL_TERMINAL'))
config.getstr_nocheck('config', 'LOG_LEVEL_TERMINAL',
config.getstr('runtime', 'LOG_LEVEL_TERMINAL'))
)
return log_terminal_level <= logging.INFO
20 changes: 20 additions & 0 deletions metplus/util/time_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -531,3 +531,23 @@ def add_field_info_to_time_info(time_info, var_info):
value = format_thresh(value)

time_info[key] = value


def is_single_run_time(time_info):
"""!Determine if a specific run time (init or valid + lead) is being
processed or if a range of run times are being processed. If a wildcard
character is set for any of init/valid/lead or if any of them are unset,
then it is assumed that a range of these values are being processed.
This should be true if the runtime frequency is set to RUN_ONCE_FOR_EACH.
Note that even if a missing time value can be calculated, e.g. init and
lead can be used to compute valid, then this function will still return
False. Input to this function should be run through time_util.ti_calculate
first to compute the missing time values.
@param time_info dictionary containing time information to read
@returns True if init, valid, or lead has a wildcard character
"""
return all(
[str(time_info.get(key, '*')) != '*' for key in ('init', 'valid', 'lead')]
)
152 changes: 107 additions & 45 deletions metplus/wrappers/series_analysis_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,9 @@
from ..util import do_string_sub, parse_template, get_tags
from ..util import get_lead_sequence, get_lead_sequence_groups
from ..util import ti_get_hours_from_lead, ti_get_seconds_from_lead
from ..util import ti_get_lead_string, ti_calculate
from ..util import ti_get_seconds_from_relativedelta
from ..util import ti_get_lead_string
from ..util import parse_var_list
from ..util import add_to_time_input
from ..util import add_to_time_input, is_single_run_time
from ..util import field_read_prob_info, add_field_info_to_time_info
from .plot_data_plane_wrapper import PlotDataPlaneWrapper
from . import RuntimeFreqWrapper
Expand Down Expand Up @@ -793,9 +792,11 @@ def build_and_run_series_request(self, time_info, fcst_path, obs_path):
add_field_info_to_time_info(time_info, var_info)

# get formatted field dictionary to pass into the MET config file
fcst_field, obs_field = self.get_formatted_fields(var_info,
fcst_path,
obs_path)
fcst_field, obs_field = (
self.get_formatted_fields(var_info, time_info, fcst_path, obs_path)
)
if fcst_field is None:
continue

self.format_field('FCST', fcst_field)
self.format_field('OBS', obs_field)
Expand Down Expand Up @@ -978,20 +979,28 @@ def get_fcst_file_info(self, fcst_path):
num = str(len(files_of_interest))

data_type = 'BOTH' if self.c_dict['USING_BOTH'] else 'FCST'
template = os.path.join(self.c_dict[f'{data_type}_INPUT_DIR'],
self.c_dict[f'{data_type}_INPUT_TEMPLATE'])

# handle multiple templates
templates = []
for template in self.c_dict[f'{data_type}_INPUT_TEMPLATE'].split(','):
templates.append(os.path.join(self.c_dict[f'{data_type}_INPUT_DIR'], template.strip()))

smallest_fcst = 99999999
largest_fcst = -99999999
beg = None
end = None
for filepath in files_of_interest:
filepath = filepath.strip()
file_time_info = parse_template(template,
filepath,
self.logger)
if not file_time_info:
found = False
for template in templates:
file_time_info = parse_template(template, filepath, self.logger)
if file_time_info:
found = True
break

if not found:
continue

lead = ti_get_seconds_from_lead(file_time_info.get('lead'),
file_time_info.get('valid'))
if lead < smallest_fcst:
Expand Down Expand Up @@ -1024,71 +1033,124 @@ def _get_netcdf_min_max(filepath, variable_name):
except (FileNotFoundError, KeyError):
return None, None

def get_formatted_fields(self, var_info, fcst_path, obs_path):
def get_formatted_fields(self, var_info, time_info, fcst_path, obs_path):
"""! Get forecast and observation field information for var_info and
format it so it can be passed into the MET config file
@param var_info dictionary containing info to format
@param time_info dictionary containing time information
@param fcst_path path to file list file for forecast data
@param obs_path path to file list file for observation data
@returns tuple containing strings of the formatted forecast and
observation information or None, None if something went wrong
observation information or (None, None) if something went wrong
"""
fcst_field_list = self._get_field_list('fcst', var_info, obs_path)
obs_field_list = self._get_field_list('obs', var_info, fcst_path)
fcst_field_list = (
self._get_field_list('fcst', var_info, time_info, obs_path)
)
obs_field_list = (
self._get_field_list('obs', var_info, time_info, fcst_path)
)

if not fcst_field_list or not obs_field_list:
self.log_error('Could not build formatted fcst and obs field lists')
return None, None

fcst_fields = ','.join(fcst_field_list)
obs_fields = ','.join(obs_field_list)

return fcst_fields, obs_fields

def _get_field_list(self, data_type, var_info, file_list_path):
def _get_field_list(self, data_type, var_info, time_info, file_list_path):
"""!Get formatted field information in a list.
If no time (init/valid/lead) filename template tags were found in the
level value or if the time info contains all init/valid/lead values
(none are wildcards), then return a single formatted field item.
Otherwise, loop through the file list files and use the input template
to extract time information to use for each field entry.
The latter is done when processing one data type that has individual
files for each time and one data type has a single file with all times.
@param data_type type of data to process, e.g. fcst or obs
@param var_info dictionary containing info to format
@param time_info dictionary containing time information
@param file_list_path path to file list file to parse
@returns list containing formatted field info to pass to MET config
"""
other = 'OBS' if data_type == 'fcst' else 'FCST'
# check if time filename template tags are used in field level
if not self._has_time_tag(var_info[f'{data_type}_level']):
# get field info for a single field to pass to the MET config file
return self.get_field_info(
v_level=var_info[f'{data_type}_level'],
v_thresh=var_info[f'{data_type}_thresh'],
v_name=var_info[f'{data_type}_name'],
v_extra=var_info[f'{data_type}_extra'],
d_type=data_type.upper()
)
# if there are no time tags (init/valid/lead) in the field level
# or if init, valid, and lead have values in time_info,
# get field info for a single field to pass to the MET config file
if (not self._has_time_tag(var_info[f'{data_type}_level']) or
is_single_run_time(time_info)):
return self._get_field_sub_level(data_type, var_info, time_info)

field_list = []
# loop through fcst and obs files to extract time info
template = os.path.join(self.c_dict[f'{other}_INPUT_DIR'],
self.c_dict[f'{other}_INPUT_TEMPLATE'])

# handle multiple templates
templates = []
for template in self.c_dict[f'{other}_INPUT_TEMPLATE'].split(','):
templates.append(os.path.join(self.c_dict[f'{other}_INPUT_DIR'], template.strip()))

# loop through fcst/obs files to extract time info
# for each file apply time info to field info and add to list
for file_time_info in self._get_times_from_file_list(file_list_path,
template):
level = do_string_sub(var_info[f'{data_type}_level'],
**file_time_info)
field = self.get_field_info(
v_level=level,
v_thresh=var_info[f'{data_type}_thresh'],
v_name=var_info[f'{data_type}_name'],
v_extra=var_info[f'{data_type}_extra'],
d_type=data_type.upper()
)
templates):
field = self._get_field_sub_level(data_type, var_info, file_time_info)
if field:
field_list.extend(field)

return field_list

@staticmethod
def _has_time_tag(level):
def _has_time_tag(string_to_parse):
"""!Get all filename template tags from raw string and check if any of
the time info tags (init/valid/lead) were found.
@param string_to_parse string to search for filename template tags
@returns True if init, valid, or lead tags, e.g. {lead?fmt=%H},
were found in string. False if none of them were found.
"""
return any(item in ['init', 'valid', 'lead']
for item in get_tags(level))
for item in get_tags(string_to_parse))

def _get_field_sub_level(self, data_type, var_info, time_dict):
"""!Get formatted field information for data type, substituting time
information into level value.
@param data_type type of data to find, e.g. fcst or obs
@param var_info dictionary containing info to format
@param time_dict dictionary containing time information
@returns string with formatted field info or None
"""
level = do_string_sub(var_info[f'{data_type}_level'], **time_dict)
return self.get_field_info(
v_level=level,
v_thresh=var_info[f'{data_type}_thresh'],
v_name=var_info[f'{data_type}_name'],
v_extra=var_info[f'{data_type}_extra'],
d_type=data_type.upper()
)

@staticmethod
def _get_times_from_file_list(file_path, template):
def _get_times_from_file_list(file_path, templates):
"""!Generator that yields time info dictionaries.
Loops through file paths found in text file and use list of filename
templates to parse time information from each file.
@param file_path path to file list file to parse
@param templates list of filename templates to use to parse time info
out of file paths found in file_path file
"""
with open(file_path, 'r') as file_handle:
file_list = file_handle.read().splitlines()[1:]

for file_name in file_list:
file_time_info = parse_template(template, file_name)
if not file_time_info:
found = False
for template in templates:
file_time_info = parse_template(template, file_name)
if file_time_info:
found = True
break
if not found:
continue
yield file_time_info

0 comments on commit 91beb24

Please sign in to comment.