diff --git a/src/lambda_function/raw/app.py b/src/lambda_function/raw/app.py index 9e695d5..e3a9f14 100644 --- a/src/lambda_function/raw/app.py +++ b/src/lambda_function/raw/app.py @@ -69,10 +69,14 @@ def construct_raw_key(path: str, key: str, raw_key_prefix: str): key_components = key.split("/") # input bucket keys are formatted like `{namespace}/{cohort}/{export_basename}` cohort = key_components[1] + # This matches the logic used in S3 to JSON file_basename = os.path.basename(path) - # The first underscore-delimited component of the JSON basename is the datatype - data_type = file_basename.split("_")[0] - raw_basename = f"{ os.path.splitext(file_basename)[0] }.ndjson.gz" + file_identifier = os.path.splitext(file_basename)[0] + basename_components = file_identifier.split("_") + data_type = basename_components[0] + if "HealthKitV2" in data_type and basename_components[-2] == "Deleted": + data_type = "{}_Deleted".format(data_type) + raw_basename = f"{file_identifier}.ndjson.gz" raw_key = os.path.join( raw_key_prefix, f"dataset={data_type}", diff --git a/tests/test_lambda_raw.py b/tests/test_lambda_raw.py index 9af6592..14dfa15 100644 --- a/tests/test_lambda_raw.py +++ b/tests/test_lambda_raw.py @@ -22,6 +22,18 @@ def test_construct_raw_key(): assert result == expected_raw_key +def test_construct_raw_key_HealthKitV2_Deleted(): + path = "HealthKitV2Heartbeat_Deleted_20230701-20230702.json" + key = "some_namespace/some_cohort/export.zip" + raw_key_prefix = "main/json" + expected_raw_key = ( + "main/json/dataset=HealthKitV2Heartbeat_Deleted/cohort=some_cohort/" + "HealthKitV2Heartbeat_Deleted_20230701-20230702.ndjson.gz" + ) + result = app.construct_raw_key(path=path, key=key, raw_key_prefix=raw_key_prefix) + assert result == expected_raw_key + + @pytest.fixture def s3_setup(): # Fixture to set up a mock S3 client and a test bucket.