Skip to content

Commit

Permalink
Merge pull request #62 from Sage-Bionetworks/etl-490
Browse files Browse the repository at this point in the history
[ETL-490] Delete local copy of JSON after uploading to S3
  • Loading branch information
philerooski authored Jul 7, 2023
2 parents 07dc05f + dad93b2 commit 40e4e9d
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 1 deletion.
8 changes: 7 additions & 1 deletion src/glue/jobs/s3_to_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ def write_file_to_json_dataset(
json_path: str,
dataset_identifier: str,
metadata: dict,
workflow_run_properties: dict) -> str:
workflow_run_properties: dict,
delete_upon_successful_upload: bool=True) -> str:
"""
Write a JSON from a zipfile to a JSON dataset.
Expand All @@ -34,6 +35,9 @@ def write_file_to_json_dataset(
dataset_identifier (str): The data type of `json_path`.
metadata (dict): Metadata derived from the file basename.
workflow_run_properties (dict): The workflow arguments
delete_upon_successful_upload (bool): Whether to delete the local
copy of the JSON file after uploading to S3. Set to False
during testing.
Returns:
output_path (str) The local path the file was written to.
Expand Down Expand Up @@ -121,6 +125,8 @@ def write_file_to_json_dataset(
Key = s3_output_key,
Metadata = s3_metadata)
logger.debug("S3 Put object response: %s", json.dumps(response))
if delete_upon_successful_upload:
os.remove(output_path)
return output_path

def get_metadata(basename: str) -> dict:
Expand Down
31 changes: 31 additions & 0 deletions tests/test_s3_to_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ def test_write_healthkitv2samples_file_to_json_dataset(self, s3_obj, namespace,
dataset_identifier="HealthKitV2Samples",
metadata=sample_metadata["Metadata"],
workflow_run_properties=workflow_run_properties,
delete_upon_successful_upload=False,
)

with open(output_file, "r") as f_out:
Expand All @@ -121,6 +122,33 @@ def test_write_healthkitv2samples_file_to_json_dataset(self, s3_obj, namespace,
)
break

def test_write_file_to_json_dataset_delete_local_copy(self, s3_obj, namespace, monkeypatch):
monkeypatch.setattr("boto3.client", lambda x: MockAWSClient())
sample_metadata = {
"Metadata": {
"type": "HealthKitV2Samples",
"start_date": datetime.datetime(2022, 1, 12, 0, 0),
"end_date": datetime.datetime(2023, 1, 14, 0, 0),
"subtype": "Weight",
}
}
workflow_run_properties = {
"namespace": namespace,
"json_prefix": "raw-json",
"json_bucket": "json-bucket",
}
with zipfile.ZipFile(io.BytesIO(s3_obj["Body"])) as z:
output_file = s3_to_json.write_file_to_json_dataset(
z=z,
json_path="HealthKitV2Samples_Weight_20230112-20230114.json",
dataset_identifier="HealthKitV2Samples",
metadata=sample_metadata["Metadata"],
workflow_run_properties=workflow_run_properties,
delete_upon_successful_upload=True,
)

assert not os.path.exists(output_file)

def test_write_symptom_log_file_to_json_dataset(self, s3_obj, namespace, monkeypatch):
monkeypatch.setattr("boto3.client", lambda x: MockAWSClient())
sample_metadata = {
Expand All @@ -142,6 +170,7 @@ def test_write_symptom_log_file_to_json_dataset(self, s3_obj, namespace, monkeyp
dataset_identifier="SymptomLog",
metadata=sample_metadata["Metadata"],
workflow_run_properties=workflow_run_properties,
delete_upon_successful_upload=False,
)

with open(output_file, "r") as f_out:
Expand Down Expand Up @@ -179,6 +208,7 @@ def test_write_enrolled_participants_file_to_json_dataset(self, s3_obj, namespac
dataset_identifier="EnrolledParticipants",
metadata=sample_metadata["Metadata"],
workflow_run_properties=workflow_run_properties,
delete_upon_successful_upload=False,
)

with open(output_file, "r") as f_out:
Expand Down Expand Up @@ -218,6 +248,7 @@ def test_write_file_to_json_dataset_record_consistency(self, s3_obj, namespace,
dataset_identifier="FitbitDevices",
metadata=sample_metadata["Metadata"],
workflow_run_properties=workflow_run_properties,
delete_upon_successful_upload=False,
)

with open(output_file, "r") as f_out:
Expand Down

0 comments on commit 40e4e9d

Please sign in to comment.