diff --git a/src/glue/jobs/s3_to_json.py b/src/glue/jobs/s3_to_json.py index 414b2ff7..4b51dad0 100644 --- a/src/glue/jobs/s3_to_json.py +++ b/src/glue/jobs/s3_to_json.py @@ -130,10 +130,16 @@ def get_metadata(basename: str) -> dict: if metadata["type"] == "HealthKitV2Samples": metadata["subtype"] = basename_components[1] if ( - metadata["type"] == "HealthKitV2Samples" - and basename_components[-2] == "Deleted" - ): - metadata["type"] = "HealthKitV2Samples_Deleted" + metadata["type"] + in [ + "HealthKitV2Samples", + "HealthKitV2Heartbeat", + "HealthKitV2Electrocardiogram", + "HealthKitV2Workouts", + ] + and basename_components[-2] == "Deleted" + ): + metadata["type"] = "{}_Deleted".format(metadata["type"]) logger.debug("metadata = %s", metadata) return metadata diff --git a/tests/Dockerfile b/tests/Dockerfile new file mode 100644 index 00000000..2e365572 --- /dev/null +++ b/tests/Dockerfile @@ -0,0 +1,4 @@ +FROM amazon/aws-glue-libs:glue_libs_3.0.0_image_01 + +RUN pip3 install pytest-datadir +ENTRYPOINT ["bash", "-l"] diff --git a/tests/README.md b/tests/README.md index dae41175..4f6dae61 100644 --- a/tests/README.md +++ b/tests/README.md @@ -1,9 +1,62 @@ ### Running tests -Tests are defined in the `tests` folder in this project. Use pipenv to install the -[pytest](https://docs.pytest.org/en/latest/) and run tests. +Tests are defined in the `tests` folder in this project. + +#### Running tests using Docker +All tests can be run inside a Docker container which includes all the necessary +Glue/Spark dependencies and simulates the environment which the Glue jobs +will be run in. A Dockerfile is included in the `tests` directory + +To run tests locally, first configure your AWS credentials, then launch and attach +to the docker container (see following commands) + +Run the following commands to run tests for the s3_to_json script (in develop). + +1. Navigate to the directory with the Dockerfile + +```shell script +cd tests +``` + +2. Build the docker image from the Dockerfile + +```shell script +docker build -t . +``` + +3. Run the newly built image: + +```shell script +docker run --rm -it \ + -v ~/.aws:/home/glue_user/.aws \ + -v ~/recover/:/home/glue_user/workspace/recover \ + -e DISABLE_SSL=true -p 4040:4040 -p 18080:18080 +``` + +4. Navigate to your repo in the image + +```shell script +cd +``` + +5. Finally run the following (now that you are inside the running container) +to execute the tests: + +```shell script +python3 -m pytest +``` + +#### Running tests using pipenv +Use [pipenv](https://pipenv.pypa.io/en/latest/index.html) to install the +[pytest](https://docs.pytest.org/en/latest/) and run tests locally outside of +a Docker image. + +Note that you can only run the lambda function tests using a pipenv locally because +you'll run into an error with pytest with other tests since `test_s3_to_json.py` +has to be run in a Dockerfile. Run the following command from the repo root to run tests for the lambda function (in develop). +You can run this locally or inside the docker image. ```shell script -$ python -m pytest tests/lambda_function/ -v +python3 -m pytest tests/test_s3_to_glue_lambda.py -v ``` diff --git a/tests/__init__.py b/tests/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/lambda_function/__init__.py b/tests/lambda_function/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/lambda_function/s3_to_glue/__init__.py b/tests/lambda_function/s3_to_glue/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/lambda_function/s3_to_glue/test_app.py b/tests/test_s3_to_glue_lambda.py similarity index 100% rename from tests/lambda_function/s3_to_glue/test_app.py rename to tests/test_s3_to_glue_lambda.py diff --git a/tests/test_s3_to_json.py b/tests/test_s3_to_json.py new file mode 100644 index 00000000..4ad31431 --- /dev/null +++ b/tests/test_s3_to_json.py @@ -0,0 +1,54 @@ +import os +import io +import json +import zipfile + +import boto3 +import pytest + +from src.glue.jobs import s3_to_json + + +class TestS3ToJsonS3: + def test_get_metadata_type(self): + assert ( + s3_to_json.get_metadata("HealthKitV2Samples_AppleExerciseTime_20201022-20211022.json")["type"] + == "HealthKitV2Samples" + ) + + assert ( + s3_to_json.get_metadata( + "HealthKitV2Statistics_20201022-20211022.json" + )["type"] + == "HealthKitV2Statistics" + ) + + # these tests test that the healthkit sample data will + # contain deleted in its type + assert ( + s3_to_json.get_metadata( + "HealthKitV2Samples_Deleted_20201022-20211022.json" + )["type"] + == "HealthKitV2Samples_Deleted" + ) + + assert ( + s3_to_json.get_metadata( + "HealthKitV2Heartbeat_Samples_Deleted_20201022-20211022.json" + )["type"] + == "HealthKitV2Heartbeat_Deleted" + ) + + assert ( + s3_to_json.get_metadata( + "HealthKitV2Electrocardiogram_Samples_Deleted_20201022-20211022.json" + )["type"] + == "HealthKitV2Electrocardiogram_Deleted" + ) + + assert ( + s3_to_json.get_metadata( + "HealthKitV2Workouts_Deleted_20201022-20211022.json" + )["type"] + == "HealthKitV2Workouts_Deleted" + )