-
Notifications
You must be signed in to change notification settings - Fork 1
/
hms_parser.py
433 lines (363 loc) · 17.5 KB
/
hms_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
# HMS Parser - Parses metadata from an HMS project and the association simulations runs.
# The files asscoaited with an HMS project are similar to a YAML but is invalid, so it is parsed by finding headers and pulling the wanted nested fields from each header
import glob
import copy
import yaml, json
import os
import argparse
from utils import get_wkt_crs
from datetime import datetime
def gage_file_parse(prj_dir, prj_name):
gage_kv ={}
# open the .gage file
try:
with open(os.path.join(prj_dir,f'{prj_name}.gage'), 'r') as r:
gage_file = r.readlines()
except EnvironmentError:
print(f"Gage File not found: {gage_file}")
gage_file = [s.strip('\n') for s in gage_file]
# gage_file
line_start = 0
gageList = []
for i,v in enumerate(gage_file):
if v == 'End:':
# If not the beginning of the file, skip a blank line (+1) for the start of the subList.
if len(gageList) > 0:
gageList.append(gage_file[line_start+1:i])
else:
gageList.append(gage_file[line_start:i])
line_start = i+1
# For each gage in .gage file, Get gage type and associated dss file name.
gage_kv['Gage DSS Files'] ={}
for gage in gageList:
# get gage title
title = gage[0].split(":")[1].strip()
# Init ditionary for each gage title
gage_kv['Gage DSS Files'][title] = {}
# findList is used to search the wanted .gage file fields for each gage title.
findList = ["Gage Type", "DSS File Name"]
# search each gage for the keys in findList, append as key:value pairs for each gage title.
for find_key in findList:
found_value = [s for s in gage if find_key in s]
# Omit blank fields by testing the length
if len(found_value) > 0:
found_value = found_value[0].split(":")[1:][0].strip()
gage_kv['Gage DSS Files'][title][find_key] = found_value
# Remove gage titles that did not contain the findList fields.
if len(gage_kv['Gage DSS Files'][title]) == 0:
del gage_kv['Gage DSS Files'][title]
# get values from dictionary in to a list without the keys.
temp_list = []
for key, value in gage_kv['Gage DSS Files'].items():
temp_list.append(value)
# Get a list of unique DSS File values in a list.
gage_dss_files = []
for t in temp_list:
gage_dss_files.append(t['DSS File Name'])
gage_dss_files = list(set(gage_dss_files))
# Create list in the format needed for the hms simulation json.
gage_dss_json_list = []
for dss_file in gage_dss_files:
for key, value in gage_kv['Gage DSS Files'].items():
# print (key, value)
if gage_kv['Gage DSS Files'][key]['DSS File Name'] == dss_file:
gage_dss_json_list.append(
{
"title": gage_kv['Gage DSS Files'][key]['Gage Type'] + " DSS File",
"source_dataset": None,
"location": dss_file,
"description": f"Parsed from {prj_name}.gage file"
}
)
# Remove duplicates from list
gage_dss_json_list = [dict(t) for t in {tuple(d.items()) for d in gage_dss_json_list}]
return gage_dss_json_list
def get_extra_dss_files(input_dss_dir):
extra_dss_files_list = []
for pFile in glob.glob(rf'{input_dss_dir}/*.dss'):
extra_dss_files_list.append(pFile)
# extra_dss_files_list
dss_common_files_input = []
if len(extra_dss_files_list)>0:
for f in extra_dss_files_list:
head, tail = os.path.split(f)
dss_title = tail.split(".")[0]
dss_common_files_input.append(
{
"description": "User Added from Input DSS File Directory",
"location": f,
"source_dataset": None,
"title": dss_title
},
)
return extra_dss_files_list
def parse_prj(prj, wkt, crs, extra_dss_files_list, output_dir):
prj_dir, prj_file_tail = os.path.split(prj)
prj_name = prj_file_tail.split(".")[0]
try:
with open(prj, "r") as f:
lines = f.readlines()
except EnvironmentError: # parent of IOError, OSError *and* WindowsError where available
print (f'.HMS file not found: {prj}')
lines = [s.strip('\n') for s in lines]
# Break up the file in to headers and fields by using the keyword "End:" to define blocks of text.
nest_start = 0
nestList = []
for i,v in enumerate(lines):
if v == 'End:':
# If not the beginning of the file, skip a blank line (+1) for the start of the subList.
if len(nestList) > 0:
nestList.append(lines[nest_start+1:i])
else:
nestList.append(lines[nest_start:i])
nest_start = i+1
# Create a dictionary based on keys using unique headers with fields of Title, Filename, and Description
kv = {}
kv['Control Files'] = {}
kv['Basin'] = {}
kv['Precipitaion'] = {}
# A list that defines which headers can be parsed the same way to obtain the wanted fields.
headers_with_same_parsing = ['Control', 'Basin', 'Precipitation']
# For each block of text as an item in nestList, parse headers and fields
for subList in nestList:
header = subList[0].split(":")[0]
title = subList[0].split(":")[1]
find_str = 'Description'
description = [s for s in subList if find_str in s][0].split(":")[1:][0].strip()
# The Project header is parsed differently from the headers in the list: headers_with_same_parsing
if header == 'Project':
find_str = 'File Name'
filename = [s for s in subList if find_str in s][0].split(":")[1:][0].strip()
kv['Project'] = {
'Title': title,
'Project Output DSS File': filename,
'Description': description
}
# Search for any of the headers in headers_with_same_parsing and parse them.
if any(header == i for i in headers_with_same_parsing):
find_str = 'filename'
filename = [s for s in subList if find_str in s.lower()][0].split(":")[1:][0].strip()
kv['Project'][title] = {
'File Name': filename,
'Description': description
}
# Add application_date from prj_file modified date
modTimeUnix = os.path.getmtime(prj)
kv['application_date'] = datetime.fromtimestamp(modTimeUnix).strftime('%Y-%m-%d')
# open the model application Json template, del unnecessary keys, update, add, export
with open(r"C:\py\hec_meta_extract\example\input\json\hms_model_application.json", 'r') as f:
model_template_json = json.load(f)
# keys to drop from json template
drop_keys = ['_id', 'linked_resources', 'common_parameters', 'common_software_version', 'authors',
'spatial_extent_resolved', 'spatial_valid_extent_resolved', 'temporal_extent', 'temporal_resolution',
'spatial_valid_extent', 'common_files_details', 'grid']
for key in drop_keys:
del model_template_json[key]
# set basic keywords
model_template_json['keywords'] = ['hec-hms','hec','hms','hydrology','model','lwi']
model_template_json['purpose'] = kv['Project']['Description']
model_template_json['description'] = kv['Project']['Description']
model_template_json['title'] = f"{kv['Project']['Title']} HEC-HMS Model"
# common_files_details[]
model_template_json['common_files_details'] = []
model_template_json['common_files_details'].append(
[{
"description": "The HMS Project File",
"location": prj_file_tail,
"source_dataset": None,
"title": "Project File"
},
{
"description": "There may be multiple basins in the HMS model project",
"location": f"{prj_name}/*.basin",
"source_dataset": None,
"title": "Basin Files"
},
{
"description": "There may be multiple Meteorological Models",
"location": f"{prj_name}/*.met",
"source_dataset": None,
"title": "Meteorological Model Files"
},
{
"description": "There may be multiple control specifications.",
"location": f"{prj_name}/*.control",
"source_dataset": None,
"title": "Control Specification Files"
}]
)
# Add optional input DSS files list to list of input files.
if extra_dss_files_list is not None:
model_template_json['common_files_details'].extend(extra_dss_files_list)
# open the .gage file and pull input dss files
gage_dss_files = gage_file_parse(prj_dir,prj_name)
model_template_json['common_files_details'].extend(gage_dss_files)
# output model application json
output_prj_json = os.path.join(output_dir,f'{prj_name}_model_application.json')
with open(output_prj_json, "w") as outfile:
json.dump(model_template_json, outfile)
print (f'\nmodel_application file output to: {output_prj_json}')
def parse_runs(prj, output_dir):
# Get project name
prj_dir, prj_file_tail = os.path.split(prj)
prj_name = prj_file_tail.split(".")[0]
# Open .run file
run_file_name = os.path.join(prj_dir,f'{prj_name}.run')
try:
with open(run_file_name, 'r') as r:
run_file = r.readlines()
except EnvironmentError:
print (f'Run file not found: {run_file_name}')
run_file = [s.strip('\n') for s in run_file]
# Break run file into text blocks representing each Simulation as a list by the keyword "End:"
line_start = 0
runList = []
for i,v in enumerate(run_file):
if v == 'End:':
# If not the beginning of the file, skip a blank line (+1) for the start of the subList.
if len(runList) > 0:
runList.append(run_file[line_start+1:i])
else:
runList.append(run_file[line_start:i])
line_start = i+1
# Parse each Simulation in the run file.
sim_kv = {}
for subList in runList:
title = subList[0].split(":")[1].strip()
# print(title)
sim_kv[title] = {}
# Create a list of fields to parse for each simulation.
findList = ['Basin', 'DSS File', 'Precip', 'Control']
for find_key in findList:
found_value = [s for s in subList if find_key in s][0].split(":")[1:][0].strip()
sim_kv[title][find_key] = found_value
# Add data from each simulation's control file.
if find_key == 'Control':
control_name = sim_kv[title][find_key].replace(" ","_").replace("(","_").replace(")","_") + '.control'
control_file = os.path.join(prj_dir, control_name)
with open(control_file, 'r') as c:
c_file = c.readlines()
c_file = [s.strip('\n') for s in c_file]
c_findList = ['Description', 'Start Date', 'End Date', 'Time Interval']
for c_find_key in c_findList:
found_value = [s for s in c_file if c_find_key in s][0].split(":")[1:][0].strip()
sim_kv[title][c_find_key] = found_value
# Add data from each simulations's basin file
parameterList = []
if find_key == 'Basin':
basin_name = sim_kv[title][find_key].replace(" ","_").replace("(","_").replace(")","_") + '.basin'
basin_file = os.path.join(prj_dir, basin_name)
with open(basin_file, 'r') as b:
b_file = b.readlines()
# print (b_file)
b_file = [s.strip('\n') for s in b_file]
line_start = 0
basinList = []
# print (b_file)
for i,v in enumerate(b_file):
# print (v + '\n')
if (v == 'End:'):
# print(i, v)
# If not the beginning of the file, skip a blank line (+1) for the start of the subList.
if len(basinList) > 0:
basinList.append(b_file[line_start+1:i])
else:
basinList.append(b_file[line_start:i])
line_start = i+1
# List of Parameters to look for in each .basin file. Each Parameter will be added as a key to a temporary dictionary before formatting.
b_findList = ['Canopy', 'LossRate', 'Transform', 'Baseflow', 'Route']
params = {}
# initialize empty lists for each parameter key
for key in b_findList:
params[key] = []
# For each line of each element block in a .basin file, look for each parameter (key) in b_findList
for el in basinList:
for line in el:
for key in b_findList:
if f'{key}: ' in line:
# Append the parameter values to the parameter dictionary
params[key].append(line.split(': ')[-1])
# remove duplicates from each key's list of values
params[key] = list(set(params[key]))
# Put parameters dictionary into the required Json format and add to kv dictionary for each run title.
for key in params.keys():
parameterList.append(
{
"parameter": key,
"value": params[key]
}
)
sim_kv[title]['parameters'] = parameterList
# open the simulation Json template, del unnecessary keys, update, add, export
with open(r"example\input\json\hms_simulation.json", 'r') as f:
simulation_template_json = json.load(f)
# keys to drop from json template
drop_keys = ['_id', 'model_application', 'model_software', 'linked_resources','type']
for key in drop_keys:
del simulation_template_json[key]
simulation_template_json['description'] = sim_kv[title]['Description']
simulation_template_json['title'] = f"{prj_name} HEC-HMS Simulation: {sim_kv[title]}"
simulation_template_json['output_files'] = [
{
"title": "Output DSS File",
"source_dataset": None,
"location": sim_kv[title]['DSS File'],
"description": None
}
]
simulation_template_json['input_files'] = gage_file_parse(prj_dir, prj_name)
simulation_template_json['temporal_extent'] = [
datetime.strptime(sim_kv[title]['Start Date'], '%d %B %Y').strftime('%Y-%m-%d'),
datetime.strptime(sim_kv[title]['End Date'], '%d %B %Y').strftime('%Y-%m-%d')
]
simulation_template_json["temporal_resolution"] = sim_kv[title]['Time Interval'] + ' Minutes'
simulation_template_json["parameters"] = sim_kv[title]['parameters']
# output each simulation json
output_sim_json = os.path.join(output_dir,f'{prj_name}_{title}_simulation.json')
with open(output_sim_json, "w") as outfile:
json.dump(simulation_template_json, outfile)
print (f'{prj_name}_{title}_simulation.json')
def parse(prj, shp, dss):
# Get project name
prj_dir, prj_file_tail = os.path.split(prj)
prj_name = prj_file_tail.split(".")[0]
# Set output directory
cwd = os.getcwd()
output_dir = os.path.join(cwd, 'output', 'hms', prj_name)
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# Get
# Get WKT and CRS from shp
wkt, crs = get_wkt_crs.parse_shp(shp, prj_name, output_dir)
# if args.dss, get dss input files
if dss is not None:
extra_dss_files_list = get_extra_dss_files(dss)
else:
extra_dss_files_list = None
# Parse project file
parse_prj(prj, wkt, crs, extra_dss_files_list, output_dir)
# Run file parse
parse_runs(prj, output_dir)
if __name__ == '__main__':
# Parse Command Line Arguments
p = argparse.ArgumentParser(description="HEC-RAS metadata extraction. \
Requires a RAS project file (*.prj) and an ESRI shapefile (*.shp) or GeoJson for the spatial boundary of the model.")
p.add_argument(
"--hms", help="The HEC-HMS project file. (Ex: C:\HMS_Models\Amite\Amite_HMS.hms)",
required=True,
type=str
)
p.add_argument(
"--shp", help="The HEC-HMS model boundary spatial extent as an ESRI shapefile or GeoJson. \
(Ex: C:\HMS_Models\Amite\maps\Amite_HMS_Basin_Outline.shp)",
required=True,
type=str
)
p.add_argument(
"--dss", help="Optional. The directory containing any additonal input DSS files beyond what is linked in the .gage file for the HEC-HMS model such as Observed Timeseries, Reservoir Releases, Gridded Rainfall, or Specified Hyetogrpahs. \
(Ex: C:\HMS_Models\Amite\data)",
required=False,
type=str
)
args = p.parse_args()
parse(args.hms, args.shp, args.dss)