Skip to content

Commit

Permalink
Merge pull request #64 from RTIInternational/60-tzdata-error-pyarrow-…
Browse files Browse the repository at this point in the history
…windows

60 tzdata error pyarrow windows
  • Loading branch information
samlamont authored Aug 24, 2023
2 parents 3d5f6ed + 62843d5 commit 55b6157
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 18 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.2.3] - 2023-08-23

### Changed

- Removed pyarrow from time calculations in `nwm_point_data.py` loading due to windows bug
- Updated output file name in `nwm_point_data.py` to include forecast hour if `process_by_z_hour=False`

## [0.2.2] - 2023-08-23

Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ $ python3 -m pip install --upgrade pip
# Build and install from source
$ python3 -m pip install --upgrade build
$ python -m build
$ python -m pip install dist/teehr-0.2.2.tar.gz
$ python -m pip install dist/teehr-0.2.3.tar.gz
```

Install from GitHub
Expand All @@ -29,8 +29,8 @@ $ pip install 'teehr @ git+https://github.com/RTIInternational/teehr@[BRANCH_TAG

Use Docker
```bash
$ docker build -t teehr:v0.2.2 .
$ docker run -it --rm --volume $HOME:$HOME -p 8888:8888 teehr:v0.2.2 jupyter lab --ip 0.0.0.0 $HOME
$ docker build -t teehr:v0.2.3 .
$ docker run -it --rm --volume $HOME:$HOME -p 8888:8888 teehr:v0.2.3 jupyter lab --ip 0.0.0.0 $HOME
```

## Examples
Expand Down
32 changes: 18 additions & 14 deletions src/teehr/loading/nwm22/nwm_point_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,9 @@ def process_chunk_of_files(
configuration: str,
variable_name: str,
output_parquet_dir: str,
process_by_z_hour: bool
) -> None:
"""Assemble a table of NWM values for a chunk of NWM files"""
"""Assemble a table for a chunk of NWM files"""

location_ids = np.array(location_ids).astype(int)

Expand All @@ -89,16 +90,18 @@ def process_chunk_of_files(
output = dask.compute(*results)
output_table = pa.concat_tables(output)

max_ref = pa.compute.max(output_table["reference_time"])
min_ref = pa.compute.min(output_table["reference_time"])

if max_ref != min_ref:
min_ref_str = pa.compute.strftime(min_ref, format="%Y%m%dT%HZ")
max_ref_str = pa.compute.strftime(max_ref, format="%Y%m%dT%HZ")
filename = f"{min_ref_str}_{max_ref_str}.parquet"
if process_by_z_hour:
row = df.iloc[0]
filename = f"{row.day}T{row.z_hour[1:3]}Z.parquet"
else:
min_ref_str = pa.compute.strftime(min_ref, format="%Y%m%dT%HZ")
filename = f"{min_ref_str}.parquet"
# Use start and end dates including forecast hour
# for the output file name
filepath_list = df.filepath.sort_values().tolist()
start_json = filepath_list[0].split("/")[-1].split(".")
start = f"{start_json[1]}T{start_json[3][1:3]}Z{start_json[6][1:]}F"
end_json = filepath_list[-1].split("/")[-1].split(".")
end = f"{end_json[1]}T{end_json[3][1:3]}Z{end_json[6][1:]}F"
filename = f"{start}_{end}.parquet"

pq.write_table(output_table, Path(output_parquet_dir, filename))

Expand Down Expand Up @@ -169,6 +172,7 @@ def fetch_and_format_nwm_points(
configuration,
variable_name,
output_parquet_dir,
process_by_z_hour,
)


Expand Down Expand Up @@ -291,10 +295,8 @@ def nwm_to_parquet(
json_dir = "/mnt/sf_shared/data/ciroh/jsons"
output_parquet_dir = "/mnt/sf_shared/data/ciroh/parquet"

# Start dask client here first? Need to install dask[distributed]
# python -m pip install "dask[distributed]" --upgrade
# from dask.distributed import Client
# client = Client(n_workers=10)
process_by_z_hour = False
stepsize = 100

nwm_to_parquet(
configuration,
Expand All @@ -306,4 +308,6 @@ def nwm_to_parquet(
json_dir,
output_parquet_dir,
t_minus_hours=[0],
process_by_z_hour=process_by_z_hour,
stepsize=stepsize,
)
2 changes: 1 addition & 1 deletion version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.2.2
0.2.3

0 comments on commit 55b6157

Please sign in to comment.