From e1345e6adddab508f5a188964eb03a646e79b168 Mon Sep 17 00:00:00 2001 From: Howard Soh Date: Fri, 4 Nov 2022 09:10:03 -0600 Subject: [PATCH 1/7] 2285 Removed execuable permission to data files --- data/sample_obs/ST4/ST4.2010010112.24h | Bin data/sample_obs/ascii/precip24_2010010112.ascii | 0 2 files changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 data/sample_obs/ST4/ST4.2010010112.24h mode change 100755 => 100644 data/sample_obs/ascii/precip24_2010010112.ascii diff --git a/data/sample_obs/ST4/ST4.2010010112.24h b/data/sample_obs/ST4/ST4.2010010112.24h old mode 100755 new mode 100644 diff --git a/data/sample_obs/ascii/precip24_2010010112.ascii b/data/sample_obs/ascii/precip24_2010010112.ascii old mode 100755 new mode 100644 From ddfbbd2a9f8caf403729d1afb392d92d3bea06e5 Mon Sep 17 00:00:00 2001 From: Howard Soh Date: Wed, 9 Nov 2022 08:44:52 -0700 Subject: [PATCH 2/7] #2285 The class met_point_obs is renamed to base_met_point_obs. csv_met_point_obs and met_point_obs are derived from base_met_point_obs. Added convert_point_data --- scripts/python/met_point_obs.py | 389 ++++++++++++++++++++++++++++---- 1 file changed, 341 insertions(+), 48 deletions(-) diff --git a/scripts/python/met_point_obs.py b/scripts/python/met_point_obs.py index b3cf9b5d6a..5cfc5f7141 100755 --- a/scripts/python/met_point_obs.py +++ b/scripts/python/met_point_obs.py @@ -1,3 +1,4 @@ + ''' Created on Nov 10, 2021 @@ -25,12 +26,13 @@ MET_PYTHON_OBS_ARGS = "MET_POINT_PYTHON_ARGS" -class met_point_obs(ABC): + +class base_met_point_obs(object): ''' classdocs ''' ERROR_P = " ==ERROR_PYTHON==" - INFO_P = " ==INFO_PYTHON==" + INFO_P = " ==INFO_PYTHON==" python_prefix = 'PYTHON_POINT_RAW' @@ -39,7 +41,10 @@ def __init__(self, use_var_id=True): Constructor ''' self.input_name = None + self.ignore_input_file = False self.use_var_id = use_var_id # True if variable index, False if GRIB code + self.error_msg = "" + self.has_error = False # Header self.nhdr = 0 @@ -75,62 +80,65 @@ def __init__(self, use_var_id=True): self.hdr_irpt_typ = [] # optional self.hdr_inst_typ = [] # optional - @abstractmethod - def read_data(self, args): - # args can be input_file_name, list, or dictionary - # - The variables at __init__ should be filled as python list or numpy array - # - set self.input_name - # - # Here is a template - ''' - if isinstance(args, dict): - in_filename = args.get('in_name',None) - elif isinstance(args, list): - in_filename = args[0] + def add_error_msg(self, error_msg): + self.has_error = True + self.log_error_msg(error_msg) + if 0 == len(self.error_msg): + self.error_msg = error_msg else: - in_filename = args - self.input_name = in_filename - ''' - pass + self.error_msg = "{m1}\n{m2}".format(m1=self.error_msg, m2=error_msg) + + def add_error_msgs(self, error_msgs): + self.has_error = True + for error_msg in error_msgs: + self.add_error_msg(error_msg) def check_data_member_float(self, local_var, var_name): if 0 == len(local_var): - self.log_error("{v} is empty".format(v=var_name)) + self.add_error_msg("{v} is empty (float)".format(v=var_name)) elif isinstance(local_var, list): - if 0 <= str(type(local_var[0])).find('numpy'): + if isinstance(local_var[0], str) and not self.is_number(local_var[0]): + self.add_error_msg("Not supported data type ({v} string) for {n}[0] (int or float only)".format( + n=var_name, v=local_var[0])) + elif 0 <= str(type(local_var[0])).find('numpy'): self.log_info("Recommend using numpy instead of python list for {v} ({t}) to avoid side effect".format( v=var_name, t=type(local_var[0]))) elif not isinstance(local_var[0], (int, float)): - self.log_error("Not supported data type ({t}) for {v}[0] (int or float only)".format( - v=var_name, t=local_var[0].type)) + # self.add_error_msg("Not supported data type ({t}) for {v}[0] (int or float only)".format( + # v=var_name, t=local_var[0].type)) + self.add_error_msg("Not supported data type ({t}) for {v}[0] (int or float only)".format( + v=var_name, t=type(local_var[0]))) elif not isinstance(local_var, np.ndarray): - self.log_error("Not supported data type ({t}) for {v} (list and numpy.ndarray)".format( + self.add_error_msg("Not supported data type ({t}) for {v} (list and numpy.ndarray)".format( v=var_name, t=type(local_var))) def check_data_member_int(self, local_var, var_name): if 0 == len(local_var): - self.log_error("{v} is empty".format(v=var_name)) + self.add_error_msg("{v} is empty (int)".format(v=var_name)) elif isinstance(local_var, list): - if 0 <= str(type(local_var[0])).find('numpy'): + if isinstance(local_var[0], str) and not self.is_number(local_var[0]): + self.add_error_msg("Not supported data type ({v} string) for {n}[0] (int or float only)".format( + n=var_name, v=local_var[0])) + elif 0 <= str(type(local_var[0])).find('numpy'): self.log_info("Recommend using numpy instead of python list for {v} ({t}) to avoid side effect".format( v=var_name, t=type(local_var[0]))) elif not isinstance(local_var[0], int): - self.log_error("Not supported data type ({t}) for {v}[0] (int only)".format( + self.add_error_msg("Not supported data type ({t}) for {v}[0] (int only)".format( v=var_name, t=type(local_var[0]))) elif not isinstance(local_var, np.ndarray): - self.log_error("Not supported data type ({t}) for {v} (list and numpy.ndarray)".format( + self.add_error_msg("Not supported data type ({t}) for {v} (list and numpy.ndarray)".format( v=var_name, t=type(local_var))) def check_data_member_string(self, local_var, var_name): if 0 == len(local_var): - self.log_error("{v} is empty".format(v=var_name)) + self.add_error_msg("{v} is empty (string)".format(v=var_name)) elif not isinstance(local_var, (list)): - self.log_error("Not supported data type ({t}) for {v} (list)".format( + self.add_error_msg("Not supported data type ({t}) for {v} (list)".format( v=var_name, t=type(local_var))) def check_point_data(self): - if self.input_name is not None and not os.path.exists(self.input_name): - self.log_error('The netcdf input {f} does not exist'.format(f=self.input_name)) + if not self.ignore_input_file and self.input_name is not None and not os.path.exists(self.input_name): + self.add_error_msg('The netcdf input {f} does not exist'.format(f=self.input_name)) else: self.check_data_member_int(self.hdr_typ,'hdr_typ') self.check_data_member_int(self.hdr_sid,'hdr_sid') @@ -149,7 +157,8 @@ def check_point_data(self): self.check_data_member_float(self.obs_hgt,'obs_hgt') self.check_data_member_float(self.obs_val,'obs_val') self.check_data_member_string(self.obs_qty_table,'obs_qty_table') - self.check_data_member_string(self.obs_var_table,'obs_var_table') + if self.use_var_id: + self.check_data_member_string(self.obs_var_table,'obs_var_table') def get_point_data(self): if self.nhdr <= 0: @@ -171,6 +180,15 @@ def get_point_data(self): self.check_point_data() return self.__dict__ + def get_type(self, value): + return 'string' if isinstance('str') else type(value) + + def is_number(self, num_str): + return num_str.replace('-','1').replace('+','2').replace('.','3').isdigit() + + def log_error_msg(self, err_msg): + print('{p} {m}'.format(p=self.ERROR_P, m=err_msg)) + def log_error(self, err_msgs): print(self.ERROR_P) for err_line in err_msgs.split('\n'): @@ -227,33 +245,302 @@ def print_point_data(met_point_data, print_subset=True): met_point_obs.print_data('obs_lvl',met_point_data['obs_lvl']) met_point_obs.print_data('obs_hgt',met_point_data['obs_hgt']) met_point_obs.print_data('obs_val',met_point_data['obs_val']) - else: print('All',met_point_data) - print(" nhdr: ",met_point_data['nhdr']) - print(" nobs: ",met_point_data['nobs']) - print('use_var_id: ',met_point_data['use_var_id']) - print('hdr_typ: ',met_point_data['hdr_typ']) + print(" nhdr: ",met_point_data['nhdr']) + print(" nobs: ",met_point_data['nobs']) + print(' use_var_id: ',met_point_data['use_var_id']) + print(' hdr_typ: ',met_point_data['hdr_typ']) print('hdr_typ_table: ',met_point_data['hdr_typ_table']) - print('hdr_sid: ',met_point_data['hdr_sid']) + print(' hdr_sid: ',met_point_data['hdr_sid']) print('hdr_sid_table: ',met_point_data['hdr_sid_table']) - print('hdr_vld: ',met_point_data['hdr_vld']) + print(' hdr_vld: ',met_point_data['hdr_vld']) print('hdr_vld_table: ',met_point_data['hdr_vld_table']) - print('hdr_lat: ',met_point_data['hdr_lat']) - print('hdr_lon: ',met_point_data['hdr_lon']) - print('hdr_elv: ',met_point_data['hdr_elv']) - print('obs_hid: ',met_point_data['obs_hid']) - print('obs_vid: ',met_point_data['obs_vid']) + print(' hdr_lat: ',met_point_data['hdr_lat']) + print(' hdr_lon: ',met_point_data['hdr_lon']) + print(' hdr_elv: ',met_point_data['hdr_elv']) + print(' obs_hid: ',met_point_data['obs_hid']) + print(' obs_vid: ',met_point_data['obs_vid']) print('obs_var_table: ',met_point_data['obs_var_table']) - print('obs_qty: ',met_point_data['obs_qty']) + print(' obs_qty: ',met_point_data['obs_qty']) print('obs_qty_table: ',met_point_data['obs_qty_table']) - print('obs_lvl: ',met_point_data['obs_lvl']) - print('obs_hgt: ',met_point_data['obs_hgt']) - print('obs_val: ',met_point_data['obs_val']) + print(' obs_lvl: ',met_point_data['obs_lvl']) + print(' obs_hgt: ',met_point_data['obs_hgt']) + print(' obs_val: ',met_point_data['obs_val']) print(' === MET point data by python embedding ===') +class csv_point_obs(ABC, base_met_point_obs): + + def __init__(self, point_data): + super(csv_point_obs, self).__init__() + + hdr_cnt = obs_cnt = len(point_data) + self.point_data = point_data + self.nhdr = self.nobs = obs_cnt + self.nhdr_typ = self.nhdr_sid = self.nhdr_vld = hdr_cnt + self.nobs_qty = self.nobs_var = obs_cnt + self.input_file = None + self.ignore_input_file = True + + self.obs_qty = [ 0 for _ in range(0, obs_cnt) ] # (nobs_qty) integer, index of self.obs_qty_table + self.obs_hid = [ 0 for _ in range(0, obs_cnt) ] # (nobs) integer + self.obs_vid = [ 0 for _ in range(0, obs_cnt) ] # (nobs) integer, veriable index from self.obs_var_table or GRIB code + self.obs_lvl = [ nc_tools.FILL_VALUE for _ in range(0, obs_cnt) ] # (nobs) float + self.obs_hgt = [ nc_tools.FILL_VALUE for _ in range(0, obs_cnt) ] # (nobs) float + self.obs_val = [ nc_tools.FILL_VALUE for _ in range(0, obs_cnt) ] # (nobs) float + + def check_csv_record(self, csv_point_data, index): + error_msgs = [] + # names=['typ', 'sid', 'vld', 'lat', 'lon', 'elv', 'var', 'lvl', 'hgt', 'qc', 'obs'] + # dtype={'typ':'str', 'sid':'str', 'vld':'str', 'var':'str', 'qc':'str'} + if 11 > len(csv_point_data): + error_msgs.append("{i}-th data: missing columns. should be 11 columns, not {c} columns".format( + i=index, c=len(csv_point_data))) + elif 11 < len(csv_point_data): + print("{i}-th data: ignore after 11-th columns out of {c} columns".format( + i=index, c=len(csv_point_data))) + if not isinstance(csv_point_data[0], str): + error_msgs.append("{i}-th data: message_type is not string".format(i=index)) + if not isinstance(csv_point_data[1], str): + error_msgs.append("{i}-th data: station_id is not string".format(i=index)) + if not isinstance(csv_point_data[2], str): + error_msgs.append("{i}-th data: valid_time is not string".format(i=index)) + if isinstance(csv_point_data[3], str): + error_msgs.append("{i}-th data: latitude can not be a string".format(i=index)) + elif csv_point_data[3] < -90.0 or csv_point_data[3] > 90.0: + error_msgs.append("{i}-th data: latitude ({l}) is out of range".format(i=index, l=csv_point_data[3])) + if isinstance(csv_point_data[4], str): + error_msgs.append("{i}-th data: longitude can not be a string".format(i=index)) + elif csv_point_data[4] < -180.0 or csv_point_data[4] > 360.0: + error_msgs.append("{i}-th data: longitude ({l}) is out of range".format(i=index, l=csv_point_data[4])) + if not isinstance(csv_point_data[6], str): + error_msgs.append("{i}-th data: grib_code/var_name is not string".format(i=index)) + if not isinstance(csv_point_data[9], str): + error_msgs.append("{i}-th data: quality_mark is not string".format(i=index)) + is_string, is_num = self.is_num_string(csv_point_data[5]) + if is_string and not is_num: + error_msgs.append("{i}-th data: elevation: only NA is accepted as string".format(i=index)) + is_string, is_num = self.is_num_string(csv_point_data[7]) + if is_string and not is_num: + error_msgs.append("{i}-th data: obs_level: only NA is accepted as string".format(i=index)) + is_string, is_num = self.is_num_string(csv_point_data[8]) + if is_string and not is_num: + error_msgs.append("{i}-th data: obs_height: only NA is accepted as string".format(i=index)) + is_string, is_num = self.is_num_string(csv_point_data[10]) + if is_string and not is_num: + error_msgs.append("{i}-th data: obs_value: only NA is accepted as string".format(i=index)) + return error_msgs + + def check_csv_point_data(self, all_records=False): + if 0 == len(self.point_data): + self.add_error_msg("No data!") + elif all_records: + data_idx = 0 + for csv_point_data in self.point_data: + data_idx += 1 + error_messages = self.check_csv_record(csv_point_data, data_idx) + if len(error_messages) > 0: + self.add_error_msgs(error_messages) + else: + error_messages = self.check_csv_record(self.point_data[0], index=1) + if len(error_messages) > 0: + self.add_error_msgs(error_messages) + if 1 < len(self.point_data): + error_messages = self.check_csv_record(self.point_data[-1], index=len(self.point_data)) + if len(error_messages) > 0: + self.add_error_msgs(error_messages) + + def convert_point_data(self): + hdr_cnt = hdr_typ_cnt = hdr_sid_cnt = hdr_vld_cnt = 0 + var_name_cnt = qc_cnt = 0 + + hdr_map = {} + hdr_typ_map = {} + hdr_sid_map = {} + hdr_vld_map = {} + obs_var_map = {} + obs_qty_map = {} + + index = 0 + # Build map + # names=['typ', 'sid', 'vld', 'lat', 'lon', 'elv', 'var', 'lvl', 'hgt', 'qc', 'obs'] + for csv_point_data in self.point_data: + hdr_typ_str = csv_point_data[0] + hdr_typ_idx = hdr_typ_map.get(hdr_typ_str,-1) + if hdr_typ_idx < 0: + hdr_typ_idx = hdr_typ_cnt + hdr_typ_map[hdr_typ_str] = hdr_typ_idx + hdr_typ_cnt += 1 + + hdr_sid_str = csv_point_data[1] + hdr_sid_idx = hdr_sid_map.get(hdr_sid_str,-1) + if hdr_sid_idx < 0: + hdr_sid_idx = hdr_sid_cnt + hdr_sid_map[hdr_sid_str] = hdr_sid_idx + hdr_sid_cnt += 1 + + hdr_vld_str = csv_point_data[2] + hdr_vld_idx = hdr_vld_map.get(hdr_vld_str,-1) + if hdr_vld_idx < 0: + hdr_vld_idx = hdr_vld_cnt + hdr_vld_map[hdr_vld_str] = hdr_vld_idx + hdr_vld_cnt += 1 + + lat = csv_point_data[3] + lon = csv_point_data[4] + elv = self.get_num_value(csv_point_data[5] ) + hdr_key = (hdr_typ_idx,hdr_sid_idx,hdr_vld_idx,lat,lon,elv) + hdr_idx = hdr_map.get(hdr_key,-1) + if hdr_idx < 0: + hdr_idx = hdr_cnt + hdr_map[hdr_key] = hdr_idx + hdr_cnt += 1 + + var_id_str = csv_point_data[6] + if self.use_var_id: + var_id = obs_var_map.get(var_id_str,-1) + if var_id < 0: + var_id = var_name_cnt + obs_var_map[var_id_str] = var_id + var_name_cnt += 1 + else: + var_id = int(var_id_str) + + qc_str = csv_point_data[9] + qc_id = obs_qty_map.get(qc_str,-1) + if qc_id < 0: + qc_id = qc_cnt + obs_qty_map[qc_str] = qc_id + qc_cnt += 1 + + # names=['typ', 'sid', 'vld', 'lat', 'lon', 'elv', 'var', 'lvl', 'hgt', 'qc', 'obs'] + self.obs_vid[index] = var_id + self.obs_hid[index] = hdr_idx + self.obs_lvl[index] = self.get_num_value(csv_point_data[7]) + self.obs_hgt[index] = self.get_num_value(csv_point_data[8]) + self.obs_val[index] = self.get_num_value(csv_point_data[10]) + self.obs_qty[index] = qc_id + + index += 1 + + self.nhdr = hdr_cnt + self.nhdr_typ = hdr_typ_cnt + self.nhdr_sid = hdr_sid_cnt + self.nhdr_vld = hdr_vld_cnt + self.nobs_var = var_name_cnt + self.nobs_qty = qc_cnt + + # Fill header array and table array based on the map + self.hdr_typ = [ 0 for _ in range(0, hdr_cnt) ] + self.hdr_sid = [ 0 for _ in range(0, hdr_cnt) ] + self.hdr_vld = [ 0 for _ in range(0, hdr_cnt) ] + self.hdr_lat = [ nc_tools.FILL_VALUE for _ in range(0, hdr_cnt) ] + self.hdr_lon = [ nc_tools.FILL_VALUE for _ in range(0, hdr_cnt) ] + self.hdr_elv = [ nc_tools.FILL_VALUE for _ in range(0, hdr_cnt) ] + for key, idx in hdr_map.items(): + self.hdr_typ[idx] = key[0] + self.hdr_sid[idx] = key[1] + self.hdr_vld[idx] = key[2] + self.hdr_lat[idx] = key[3] + self.hdr_lon[idx] = key[4] + self.hdr_elv[idx] = key[5] + + self.hdr_typ_table = [ "" for _ in range(0, hdr_typ_cnt) ] + self.hdr_sid_table = [ "" for _ in range(0, hdr_sid_cnt) ] + self.hdr_vld_table = [ "" for _ in range(0, hdr_vld_cnt) ] + self.obs_qty_table = [ "" for _ in range(0, qc_cnt) ] + self.obs_var_table = [ "" for _ in range(0, var_name_cnt) ] + for key, idx in hdr_typ_map.items(): + self.hdr_typ_table[idx] = key + for key, idx in hdr_sid_map.items(): + self.hdr_sid_table[idx] = key + for key, idx in hdr_vld_map.items(): + self.hdr_vld_table[idx] = key + for key, idx in obs_qty_map.items(): + self.obs_qty_table[idx] = key + for key, idx in obs_var_map.items(): + self.obs_var_table[idx] = key + + return self.get_point_data() + + def get_num_value(self, column_value): + num_value = column_value + if isinstance(column_value, str): + if column_value.lower() == 'na': + num_value = nc_tools.FILL_VALUE + elif self.is_number(column_value): + num_value = float(column_value) + num_value = nc_tools.FILL_VALUE + return num_value + + def is_grib_code(self): + grib_code = True + for _point_data in self.point_data: + if isinstance(_point_data[6], int): + continue + elif isinstance(_point_data[6], str) and not _point_data[6].isdecimal(): + grib_code = False + break; + return grib_code + + def is_num_string(self, column_value): + is_string = isinstance(column_value, str) + if is_string: + is_num = True if self.is_number(column_value) or column_value.lower() == 'na' else False + else: + is_num = True + return is_string, is_num + + +class met_point_obs(ABC, base_met_point_obs): + + @abstractmethod + def read_data(self, args): + # args can be input_file_name, list, or dictionary + # - The variables at __init__ should be filled as python list or numpy array + # - set self.input_name + # + # Here is a template + ''' + if isinstance(args, dict): + in_filename = args.get('in_name',None) + elif isinstance(args, list): + in_filename = args[0] + else: + in_filename = args + self.input_name = in_filename + ''' + pass + + +# Note: caller should import netCDF4 +# the argements nc_group(dataset) and nc_var should not be None +class nc_tools(): + + FILL_VALUE = -9999. + met_missing = -99999999. + + @staticmethod + def get_num_array(nc_group, var_name): + nc_var = nc_group.variables.get(var_name, None) + #return [] if nc_var is None else nc_var[:].filled(nc_var._FillValue if '_FillValue' in nc_var.ncattrs() else nc_tools.met_missing) + return [] if nc_var is None else nc_var[:] + + @staticmethod + def get_ncbyte_array_to_str(nc_var): + nc_str_data = nc_var[:] + if nc_var.datatype.name == 'bytes8': + nc_str_data = [ str(s.compressed(),"utf-8") for s in nc_var[:] ] + return nc_str_data + + @staticmethod + def get_string_array(nc_group, var_name): + nc_var = nc_group.variables.get(var_name, None) + return [] if nc_var is None else nc_tools.get_ncbyte_array_to_str(nc_var) + + # This is a sample drived class class sample_met_point_obs(met_point_obs): @@ -281,6 +568,12 @@ def read_data(self, arg_map={}): self.obs_var_table = [ "TMP", "RH" ] self.obs_qty_table = [ "NA" ] +def convert_point_data(point_data, check_all_records=False): + _csv_point_data = csv_point_obs(point_data) + if _csv_point_data.is_grib_code(): + _csv_point_data.use_var_id = False + _csv_point_data.check_csv_point_data(check_all_records) + return _csv_point_data.convert_point_data() def main(): args = {} # or args = [] From 038e97b513f4f1e9578b5455552687d6adf927cb Mon Sep 17 00:00:00 2001 From: Howard Soh Date: Wed, 9 Nov 2022 08:45:29 -0700 Subject: [PATCH 3/7] #2285 generated met_point_data by calling convert_point_data --- scripts/python/read_ascii_point.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/scripts/python/read_ascii_point.py b/scripts/python/read_ascii_point.py index ee7d2b7b9f..7fb8eb076a 100755 --- a/scripts/python/read_ascii_point.py +++ b/scripts/python/read_ascii_point.py @@ -1,6 +1,7 @@ import pandas as pd import os import sys +from met_point_obs import convert_point_data ######################################################################## @@ -36,8 +37,10 @@ point_data = pd.read_csv(input_file, header=None, delim_whitespace=True, keep_default_na=False, names=['typ', 'sid', 'vld', 'lat', 'lon', 'elv', 'var', 'lvl', 'hgt', 'qc', 'obs'], dtype={'typ':'str', 'sid':'str', 'vld':'str', 'var':'str', 'qc':'str'}).values.tolist() - print("Data Length:\t" + repr(len(point_data))) - print("Data Type:\t" + repr(type(point_data))) + print(" point_data: Data Length:\t" + repr(len(point_data))) + print(" point_data: Data Type:\t" + repr(type(point_data))) + met_point_data = convert_point_data(point_data) + print(" met_point_data: Data Type:\t" + repr(type(met_point_data))) except NameError: print("Can't find the input file") sys.exit(1) From 911ac9b39df36bc6a68e147e10218e9dae1fb0cc Mon Sep 17 00:00:00 2001 From: Howard Soh Date: Thu, 10 Nov 2022 13:08:55 -0700 Subject: [PATCH 4/7] #2285 Modified python_plot_point_obs_CONFIG_XXX to python_plot_point_obs_with_point_data --- internal/test_unit/xml/unit_python.xml | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/internal/test_unit/xml/unit_python.xml b/internal/test_unit/xml/unit_python.xml index 94dfac5e68..05f9d2650d 100644 --- a/internal/test_unit/xml/unit_python.xml +++ b/internal/test_unit/xml/unit_python.xml @@ -495,22 +495,21 @@ - + &MET_BIN;/plot_point_obs TO_GRID NONE \ - &OUTPUT_DIR;/pb2nc/ndas.20120409.t12z.prepbufr.tm00.nc \ - &OUTPUT_DIR;/python/nam_and_ndas.20120409.t12z.prepbufr_CONFIG.ps \ - -point_obs &OUTPUT_DIR;/ascii2nc/trmm_2012040912_3hr.nc \ - -plot_grid &DATA_DIR_MODEL;/grib2/nam/nam_2012040900_F012.grib2 \ + 'PYTHON_NUMPY=&MET_BASE;/python/read_ascii_point.py &MET_DATA;/sample_obs/ascii/precip24_2010010112.ascii' \ + &OUTPUT_DIR;/python/precip24_2010010112.ps \ -config &CONFIG_DIR;/PlotPointObsConfig \ - -title "NAM 2012040900 F12 vs NDAS 500mb RH and TRMM 3h > 0" \ + -plot_grid &DATA_DIR_MODEL;/grib2/nam/nam_2012040900_F012.grib2 \ + -title "Precip24 from python embedding" -gc 61 \ -v 3 - &OUTPUT_DIR;/python/nam_and_ndas.20120409.t12z.prepbufr_CONFIG.ps + &OUTPUT_DIR;/python/precip24_2010010112.ps From 7082da11cb273255eb22f3e6b582181cd3019309 Mon Sep 17 00:00:00 2001 From: Daniel Adriaansen Date: Fri, 18 Nov 2022 14:11:37 -0700 Subject: [PATCH 5/7] Updates documentation about Python Embedding for point observations ations in Appendix F, and adds a link for the section on Python embedding for MPR data. --- docs/Users_Guide/appendixF.rst | 59 +++++++++++----------------------- 1 file changed, 18 insertions(+), 41 deletions(-) diff --git a/docs/Users_Guide/appendixF.rst b/docs/Users_Guide/appendixF.rst index 1c751dad96..5fc381aea0 100644 --- a/docs/Users_Guide/appendixF.rst +++ b/docs/Users_Guide/appendixF.rst @@ -250,54 +250,31 @@ The Ensemble-Stat, Series-Analysis, and MTD tools support the use of file lists Python Embedding for Point Observations ======================================= -The ASCII2NC tool supports the "-format python" option. With this option, point observations may be passed as input. An example of this is provided in :numref:`ascii2nc-pyembed`. That example uses the **read_ascii_point.py** sample script which is included with the MET code. It reads ASCII data in MET's 11-column point observation format and stores it in a Pandas dataframe to be read by the ASCII2NC tool with Python. +The ASCII2NC tool supports the "-format python" option. With this option, point observations may be passed as input. An example of this is shown below: -The **read_ascii_point.py** sample script can be found in: +.. code-block:: none -• MET installation directory in *MET_BASE/python*. + ascii2nc -format python \ + "MET_BASE/python/read_ascii_point.py sample_ascii_obs.txt" \ + sample_ascii_obs_python.nc -• `MET GitHub repository `_ in *met/scripts/python*. +The example uses the **read_ascii_point.py** sample script which is included with the MET code. It reads ASCII data in MET's 11-column point observation format and stores it in a Pandas DataFrame to be read by the ASCII2NC tool with Python. -The Point2Grid, Plot-Point-Obs, Ensemble-Stat, and Point-Stat tools also process point observations. They support python embedding of point observations directly on the command line by replacing the input MET NetCDF point observation file name with the python command to be run. The command must begin with the prefix 'PYTHON_NUMPY=' and be followed by the path to python script and any arguments. The full command should be enclosed in single quotes to prevent embedded whitespace from causing parsing errors. The customized python script is expected to extend MET_BASE/python/met_point_obs.py. That script creates a python variable named **met_point_data** which is a dictionary containing formatted point observation data. +The Point2Grid, Plot-Point-Obs, Ensemble-Stat, and Point-Stat tools also process point observations. They support POython embedding of point observations directly on the command line by replacing the input MET NetCDF point observation file name with the Python command to be run. The Python command must begin with the prefix 'PYTHON_NUMPY=' and be followed by the path to the User's Python script and any arguments. The full command should be enclosed in single quotes to prevent embedded whitespace from causing parsing errors. An example of this is shown below: .. code-block:: none - met_point_data = { - - 'use_var_id': True/False, # obs_vid are variable index if True, otherwise GRIB codes - - # Header data - 'nhdr': integer_value, # number of headers - 'pbhdr': integer_value, # number of PREPBUFR specific headers - 'nhdr_typ': integer_value, # number of message types - 'nhdr_sid': integer_value, # number of station IDs - 'nhdr_vld': integer_value, # number of valid times - 'hdr_typ': nympy_integer_array, # index of message type - 'hdr_sid': nympy_integer_array, # index of station ID - 'hdr_vld': nympy_integer_array, # index of valid time - 'hdr_lat': nympy_float_array, # latitude - 'hdr_lon': nympy_float_array, # longitude - 'hdr_elv': nympy_float_array, # station elevation - 'hdr_typ_table': string_value, # message types - 'hdr_sid_table': string_value, # station IDs - 'hdr_vld_table': string_value, # valid times "yyyymmdd_hhmmss" - 'hdr_prpt_typ': nympy_integer_array, # optional - 'hdr_irpt_typ': nympy_integer_array, # optional - 'hdr_inst_typ': nympy_integer_array, # optional - - # Observation data - 'nobs': integer_value, # number of observation - 'nobs_qty': integer_value # number of quality marks - 'nobs_var': integer_value # number of variable names - 'obs_qty': nympy_integer_array, # index of quality mark - 'obs_hid': nympy_integer_array, # index of header - 'obs_vid': nympy_integer_array, # index of veriable or GRIB code - 'obs_lvl': nympy_float_array, # pressure level - 'obs_hgt': nympy_float_array, # height of observation data - 'obs_val' nympy_float_array, # observatin value - 'obs_qty_table': string_array, # quality marks - 'obs_var_table': string_array, # variable names - } + plot_point_obs \ + "PYTHON_NUMPY=MET_BASE/python/read_ascii_point.py sample_ascii_obs.txt" \ + output_image.ps + +Both of the above examples use the **read_ascii_point.py** sample script which is included with the MET code. It reads ASCII data in MET's 11-column point observation format and stores it in a Pandas DataFrame to be read by the MET tools using Python embedding for point data. The **read_ascii_point.py** sample script can be found in: + +• MET installation directory in *MET_BASE/python*. + +• `MET GitHub repository `_ in *met/scripts/python*. + +.. _pyembed-mpr-data: Python Embedding for MPR data ============================= From af71f3bd4897e45ac4bb4102f2bacbef01aa40eb Mon Sep 17 00:00:00 2001 From: Daniel Adriaansen Date: Fri, 18 Nov 2022 14:12:50 -0700 Subject: [PATCH 6/7] Removes a redundant section on Python Embedding for Point Observations that is better suited for Appendix F. Appendix F is already referenced in describing ASCII2NC. Cleaned up some wording regarding Point2Grid Python embedding. --- docs/Users_Guide/reformat_point.rst | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/docs/Users_Guide/reformat_point.rst b/docs/Users_Guide/reformat_point.rst index f4430cd8e1..ff0be64878 100644 --- a/docs/Users_Guide/reformat_point.rst +++ b/docs/Users_Guide/reformat_point.rst @@ -538,19 +538,6 @@ In this example, the ASCII2NC tool will reformat the input **sample_ascii_obs.tx .. _ascii2nc-pyembed: -Python Embedding for Point Observations -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Here is an example of processing the same set of observations but using Python embedding instead: - -.. code-block:: none - - ascii2nc -format python \ - "MET_BASE/python/read_ascii_point.py sample_ascii_obs.txt" \ - sample_ascii_obs_python.nc - -Please refer to :numref:`Appendix F, Section %s ` for more details about Python embedding in MET. - ascii2nc configuration file --------------------------- @@ -1070,7 +1057,7 @@ For the GOES-16 and GOES-17 data, the computing lat/long is time consuming. So t When processing GOES-16 data, the **-qc** option may also be used to specify the acceptable quality control flag values. The example above regrids the GOES-16 AOD values to NCEP Grid number 212 (which QC flags are high, medium, and low), writing to the output the maximum AOD value falling inside each grid box. -Listed below is an example of processing the same set of observations but using python embedding instead: +Listed below is an example of processing the same set of observations but using Python embedding instead: .. code-block:: none @@ -1079,11 +1066,8 @@ Listed below is an example of processing the same set of observations but using G212 python_gridded_ascii_python.nc -config Point2GridConfig_edr \ -field 'name="200"; level="*"; valid_time="20130827_205959";' -method MAX -v 1 -The user should replace the python script with the customized python script for the custom point observation data. This is an example for the python embedding. - Please refer to :numref:`Appendix F, Section %s ` for more details about Python embedding in MET. - point2grid output ----------------- From db06e81ff006f0ca2571017bd738431f2b84cabc Mon Sep 17 00:00:00 2001 From: Daniel Adriaansen Date: Fri, 18 Nov 2022 14:24:57 -0700 Subject: [PATCH 7/7] Removes extra text missed the first time. --- docs/Users_Guide/appendixF.rst | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/Users_Guide/appendixF.rst b/docs/Users_Guide/appendixF.rst index 5fc381aea0..9f37d24374 100644 --- a/docs/Users_Guide/appendixF.rst +++ b/docs/Users_Guide/appendixF.rst @@ -258,8 +258,6 @@ The ASCII2NC tool supports the "-format python" option. With this option, point "MET_BASE/python/read_ascii_point.py sample_ascii_obs.txt" \ sample_ascii_obs_python.nc -The example uses the **read_ascii_point.py** sample script which is included with the MET code. It reads ASCII data in MET's 11-column point observation format and stores it in a Pandas DataFrame to be read by the ASCII2NC tool with Python. - The Point2Grid, Plot-Point-Obs, Ensemble-Stat, and Point-Stat tools also process point observations. They support POython embedding of point observations directly on the command line by replacing the input MET NetCDF point observation file name with the Python command to be run. The Python command must begin with the prefix 'PYTHON_NUMPY=' and be followed by the path to the User's Python script and any arguments. The full command should be enclosed in single quotes to prevent embedded whitespace from causing parsing errors. An example of this is shown below: .. code-block:: none