Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: Create Deleted DatasetVersion "Clean up" Job #7315

Draft
wants to merge 25 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
64af399
Initial commit
lvreynoso Jul 27, 2024
53bd355
Merge remote-tracking branch 'origin/main' into lvreynoso/dataset_ver…
lvreynoso Jul 27, 2024
a416fc5
Add dataset_version_cleanup to happy config.json
lvreynoso Jul 30, 2024
3f9e639
Merge remote-tracking branch 'origin/main' into lvreynoso/dataset_ver…
lvreynoso Jul 30, 2024
6501ed4
Add LambdaProvider, invoke lambda from schema_migration.py
lvreynoso Jul 31, 2024
b27c0fd
Merge remote-tracking branch 'origin/main' into lvreynoso/dataset_ver…
lvreynoso Jul 31, 2024
1bf2688
Correct port for make command to test lambda
lvreynoso Jul 31, 2024
9a868c0
Add dataset_version_cleanup to github build-images action
lvreynoso Aug 6, 2024
5c7c83e
Add cleanup slice to happy config
lvreynoso Aug 6, 2024
373863d
Fix image label in docker-compose.yml
lvreynoso Aug 6, 2024
bd94950
Fix name of method to invoke cleanup lambda
lvreynoso Aug 6, 2024
3f11ecb
Merge remote-tracking branch 'origin/main' into lvreynoso/dataset_ver…
lvreynoso Aug 15, 2024
df6d717
TEMP test migration
lvreynoso Aug 15, 2024
746baa3
Merge remote-tracking branch 'origin/main' into lvreynoso/dataset_ver…
lvreynoso Aug 22, 2024
9b8e6d1
Make lambda_provider a keyword argument to business_logic
lvreynoso Aug 22, 2024
2686a75
Revert python dependencies
lvreynoso Aug 23, 2024
b705956
Merge remote-tracking branch 'origin/main' into lvreynoso/dataset_ver…
lvreynoso Aug 27, 2024
8e48276
Fix schema_migrate test mocks
lvreynoso Aug 27, 2024
203e837
Only assert a call for failed dataset
lvreynoso Aug 27, 2024
ef437b1
TEMP test rdev migration
lvreynoso Aug 27, 2024
37ec94f
Use updated test migration
lvreynoso Aug 28, 2024
3243d66
Fix IAM roles for lambda
lvreynoso Aug 29, 2024
4b93034
Fix reference to schema migration batch role arn
lvreynoso Aug 29, 2024
55ff5ef
Update principal to batch
lvreynoso Aug 29, 2024
c84aa4c
Try STS principal
lvreynoso Aug 29, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/build-images.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ jobs:
- processing
- wmg_processing
- cellguide_pipeline
- dataset_version_cleanup
runs-on: ubuntu-22.04
outputs:
image_tag: ${{ steps.push_images.outputs.image_tag }}
Expand Down
4 changes: 4 additions & 0 deletions .happy/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
"cellguide_pipeline",
"processing",
"dataset_submissions",
"dataset_version_cleanup",
"upload_failures",
"upload_success",
"wmg_processing"
Expand Down Expand Up @@ -46,6 +47,9 @@
},
"upload_success": {
"profile": "upload_success"
},
"dataset_version_cleanup": {
"profile": "dataset_version_cleanup"
}
},
"environments": {
Expand Down
95 changes: 95 additions & 0 deletions .happy/terraform/modules/ecs-stack/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ locals {
lambda_upload_success_repo = local.secret["ecrs"]["upload_success"]["url"]
lambda_upload_repo = local.secret["ecrs"]["upload_failures"]["url"]
lambda_dataset_submissions_repo = local.secret["ecrs"]["dataset_submissions"]["url"]
lambda_dataset_version_cleanup_repo = local.secret["ecrs"]["dataset_version_cleanup"]["url"]
wmg_upload_image_repo = local.secret["ecrs"]["wmg_processing"]["url"]
cg_upload_image_repo = local.secret["ecrs"]["cellguide_pipeline"]["url"]
batch_role_arn = local.secret["batch_queues"]["upload"]["role_arn"]
Expand Down Expand Up @@ -420,6 +421,22 @@ module dataset_submissions_lambda {
security_groups = local.security_groups
}

module dataset_version_cleanup_lambda {
source = "../lambda"
image = "${local.lambda_dataset_version_cleanup_repo}:${local.image_tag}"
name = "dataset-version-cleanup"
custom_stack_name = local.custom_stack_name
remote_dev_prefix = local.remote_dev_prefix
deployment_stage = local.deployment_stage
artifact_bucket = local.artifact_bucket
cellxgene_bucket = local.cellxgene_bucket
datasets_bucket = local.datasets_bucket
lambda_execution_role = aws_iam_role.dataset_version_cleanup_lambda_service_role.arn
step_function_arn = module.upload_sfn.step_function_arn
subnets = local.subnets
security_groups = local.security_groups
}

module schema_migration {
source = "../schema_migration"
image = "${local.upload_image_repo}:${local.image_tag}"
Expand Down Expand Up @@ -546,3 +563,81 @@ resource "aws_s3_bucket_notification" "on_dataset_submissions_object_created" {

depends_on = [aws_lambda_permission.allow_dataset_submissions_lambda_execution]
}

resource "aws_iam_role" "dataset_version_cleanup_lambda_service_role" {
name = "corpora-dataset-version-cleanup-service-role-${local.custom_stack_name}"
path = "/service-role/"
assume_role_policy = data.aws_iam_policy_document.assume_role.json
}

data "aws_iam_policy_document" "lambda_db_s3_policy_document" {
statement {
sid = "cw"
effect = "Allow"
actions = [
"logs:CreateLogGroup",
"logs:CreateLogStream",
"logs:PutLogEvents"
]
resources = [
"arn:aws:logs:*:*:*"
]
}
statement {
sid = "networking"
effect = "Allow"
actions = [
"ec2:DescribeNetworkInterfaces",
"ec2:CreateNetworkInterface",
"ec2:DeleteNetworkInterface",
"ec2:DescribeInstances",
"ec2:AttachNetworkInterface",
"ec2:DescribeSecurityGroups",
"ec2:DescribeSubnets",
"ec2:DescribeVpcs"
]
resources = ["*"]
}
statement {
actions = [
"secretsmanager:DescribeSecret",
"secretsmanager:GetSecretValue"
]
resources = [
"arn:aws:secretsmanager:us-west-2:${var.aws_account_id}:secret:corpora/backend/${var.deployment_stage}/*"
]
}
statement {
sid = "s3"
effect = "Allow"
actions = [
"s3:ListBucket",
"s3:DeleteObject"
]
resources = [
"arn:aws:s3:::${local.artifact_bucket}",
"arn:aws:s3:::${local.artifact_bucket}/*",
"arn:aws:s3:::${local.cellxgene_bucket}",
"arn:aws:s3:::${local.cellxgene_bucket}/*",
"arn:aws:s3:::${local.datasets_bucket}",
"arn:aws:s3:::${local.datasets_bucket}/*"
]
}
}

resource "aws_iam_policy" "lambda_db_s3_policy" {
name = "lambda-db-s3-policy-${local.custom_stack_name}"
policy = data.aws_iam_policy_document.lambda_db_s3_policy_document.json
}

resource "aws_iam_role_policy_attachment" "lambda_db_s3_policy_attachment" {
role = aws_iam_role.dataset_version_cleanup_lambda_service_role.name
policy_arn = aws_iam_policy.lambda_db_s3_policy.arn
}

resource "aws_lambda_permission" "allow_dataset_version_cleanup_lambda_execution" {
action = "lambda:InvokeFunction"
function_name = module.dataset_version_cleanup_lambda.arn
principal = "sts.amazonaws.com"
source_arn = local.schema_migration_batch_role_arn
}
22 changes: 22 additions & 0 deletions Dockerfile.dataset_version_cleanup
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
FROM public.ecr.aws/lambda/python:3.10

# Update AWS root Certificates
ADD https://truststore.pki.rds.amazonaws.com/global/global-bundle.pem /etc/ssl/certs/rds-global-bundle.pem

COPY backend/layers/processing/dataset_version_cleanup/ ${LAMBDA_TASK_ROOT}
COPY backend/layers ${LAMBDA_TASK_ROOT}/backend/layers

COPY /python_dependencies/dataset_version_cleanup/ ${LAMBDA_TASK_ROOT}
RUN pip3 install -r requirements.txt

COPY backend/__init__.py ${LAMBDA_TASK_ROOT}/backend/__init__.py
COPY backend/common ${LAMBDA_TASK_ROOT}/backend/common

ARG HAPPY_BRANCH="unknown"
ARG HAPPY_COMMIT=""
LABEL branch=${HAPPY_BRANCH}
LABEL commit=${HAPPY_COMMIT}
ENV COMMIT_SHA=${HAPPY_COMMIT}
ENV COMMIT_BRANCH=${HAPPY_BRANCH}

CMD ["app.dataset_version_cleanup_handler"]
5 changes: 5 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,11 @@ local-uploadsuccess: .env.ecr ## Run the upload success lambda with a dataset id
docker compose $(COMPOSE_OPTS) up -d upload_success
curl -v -XPOST "http://127.0.0.1:9001/2015-03-31/functions/function/invocations" -d '{"dataset_id": "$(DATASET_ID)"}'

.PHONY: local-dataset-version-cleanup
local-datasetversioncleanup: .env.ecr ## Run the dataset version cleanup lambda with a list of dataset ids
docker compose $(COMPOSE_OPTS) up -d dataset_version_cleanup
curl -v -XPOST "http://127.0.0.1:9003/2015-03-31/functions/function/invocations" -d '{"dataset_version_ids": "[$(DATASET_VERSION_IDS)]"}'

.PHONY: local-cxguser-cookie
local-cxguser-cookie: ## Get cxguser-cookie
docker compose $(COMPOSE_OPTS) run --rm backend bash -c "cd /single-cell-data-portal && python login.py"
Expand Down
2 changes: 1 addition & 1 deletion backend/common/census_cube/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from backend.common.census_cube.data.snapshot import CensusCubeSnapshot

# exported and used by all modules related to the census cube
ontology_parser = OntologyParser()
ontology_parser = OntologyParser(schema_version="v5.1.0")


def find_all_dim_option_values(snapshot, organism: str, dimension: str) -> list:
Expand Down
3 changes: 3 additions & 0 deletions backend/layers/business/business.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@
from backend.layers.common.regex import S3_URI_REGEX
from backend.layers.persistence.persistence_interface import DatabaseProviderInterface
from backend.layers.thirdparty.batch_job_provider import BatchJobProviderInterface
from backend.layers.thirdparty.lambda_provider import LambdaProviderInterface
from backend.layers.thirdparty.s3_exceptions import S3DeleteException
from backend.layers.thirdparty.s3_provider_interface import S3ProviderInterface
from backend.layers.thirdparty.step_function_provider import StepFunctionProviderInterface
Expand All @@ -104,13 +105,15 @@ def __init__(
step_function_provider: StepFunctionProviderInterface,
s3_provider: S3ProviderInterface,
uri_provider: UriProviderInterface,
lambda_provider: Optional[LambdaProviderInterface] = None,
) -> None:
self.batch_job_provider = batch_job_provider
self.crossref_provider = crossref_provider
self.database_provider = database_provider
self.step_function_provider = step_function_provider
self.s3_provider = s3_provider
self.uri_provider = uri_provider
self.lambda_provider = lambda_provider
super().__init__()

@staticmethod
Expand Down
5 changes: 5 additions & 0 deletions backend/layers/persistence/persistence_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,11 @@ def get_dataset_version(self, dataset_version_id: DatasetVersionId, get_tombston
Returns a dataset version by id.
"""

def get_dataset_versions_by_id(self, ids: List[DatasetVersionId], get_tombstoned: bool) -> List[DatasetVersion]:
"""
Returns a list of dataset versions for the given ids.
"""

def get_all_dataset_versions_for_collection(
self, collection_id: CollectionId, from_date: datetime
) -> List[DatasetVersion]:
Expand Down
Empty file.
30 changes: 30 additions & 0 deletions backend/layers/processing/dataset_version_cleanup/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import logging
from typing import List, TypedDict

from backend.layers.business.business import BusinessLogic
from backend.layers.common.entities import (
DatasetVersionId,
)
from backend.layers.persistence.persistence import DatabaseProvider
from backend.layers.thirdparty.s3_provider import S3Provider

logger = logging.getLogger("processing")

DatasetVersionCleanupEvent = TypedDict("DatasetVersionCleanupEvent", {"dataset_version_ids": List[str]})


def dataset_version_cleanup_handler(event: DatasetVersionCleanupEvent, _context) -> None:
"""
Lambda function invoked by the migration step function that deletes
DatasetArtifacts + DatasetVersions for an input List of DataSetVersionIds
:param event: Lambda's event object
:param context: Lambda's context object
:return:
"""
business_logic = BusinessLogic(DatabaseProvider(), None, None, None, S3Provider(), None)

dataset_version_ids = [DatasetVersionId(entity_id=id) for id in event.get("dataset_version_ids", [])]
dataset_versions = business_logic.database_provider.get_dataset_versions_by_id(
dataset_version_ids, get_tombstoned=False
)
business_logic.delete_dataset_versions(dataset_versions)
3 changes: 3 additions & 0 deletions backend/layers/processing/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from backend.layers.processing.process_seurat import ProcessSeurat
from backend.layers.processing.process_validate import ProcessValidate
from backend.layers.processing.schema_migration import SchemaMigrate
from backend.layers.thirdparty.lambda_provider import LambdaProvider
from backend.layers.thirdparty.s3_provider import S3Provider, S3ProviderInterface
from backend.layers.thirdparty.schema_validator_provider import (
SchemaValidatorProvider,
Expand Down Expand Up @@ -182,6 +183,7 @@
database_provider = DatabaseProvider()
s3_provider = S3Provider()
uri_provider = UriProvider()
lambda_provider = LambdaProvider()

Check warning on line 186 in backend/layers/processing/process.py

View check run for this annotation

Codecov / codecov/patch

backend/layers/processing/process.py#L186

Added line #L186 was not covered by tests

business_logic = BusinessLogic(
database_provider,
Expand All @@ -190,6 +192,7 @@
None,
s3_provider,
uri_provider,
lambda_provider=lambda_provider,
)

schema_validator = SchemaValidatorProvider()
Expand Down
5 changes: 3 additions & 2 deletions backend/layers/processing/schema_migration.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,8 +271,9 @@ def log_errors_and_cleanup(self, collection_version_id: str) -> list:
self._store_sfn_response("report/errors", collection_version_id, errors)
# clean up artifacts for any now-orphaned, rolled back datasets
if rolled_back_datasets:
# TODO: replace with async job to delete orphaned dataset version DB rows + artifacts
self.business_logic.delete_dataset_versions(rolled_back_datasets)
self.business_logic.lambda_provider.invoke_dataset_version_cleanup_handler(
[dataset.version_id for dataset in rolled_back_datasets]
)
return errors

def _store_sfn_response(self, directory: str, file_name: str, response: Dict[str, str]):
Expand Down
42 changes: 42 additions & 0 deletions backend/layers/thirdparty/lambda_provider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import json
import os
from typing import List, TypedDict

import boto3

from backend.layers.common.entities import DatasetVersionId

AsyncLambdaInvocationResponse = TypedDict("AsyncLambdaInvocationResponse", {"StatusCode": int})


class LambdaProviderInterface:
def invoke_dataset_version_cleanup_handler(
self, dataset_version_ids: List[DatasetVersionId]
) -> AsyncLambdaInvocationResponse: # type: ignore
pass

Check warning on line 16 in backend/layers/thirdparty/lambda_provider.py

View check run for this annotation

Codecov / codecov/patch

backend/layers/thirdparty/lambda_provider.py#L16

Added line #L16 was not covered by tests


class LambdaProvider(LambdaProviderInterface):
def __init__(self) -> None:
self.client = boto3.client("lambda")

Check warning on line 21 in backend/layers/thirdparty/lambda_provider.py

View check run for this annotation

Codecov / codecov/patch

backend/layers/thirdparty/lambda_provider.py#L21

Added line #L21 was not covered by tests

def invoke_dataset_version_cleanup_handler(
self, dataset_version_ids: List[DatasetVersionId]
) -> AsyncLambdaInvocationResponse:
"""
Starts a lambda that will ingest a list of DatasetVersion ids and delete those
DatasetVersions and associated artifacts in the db and S3
"""
deployment_stage = os.environ.get("DEPLOYMENT_STAGE")
stack_name = "stagestack" if deployment_stage == "staging" else f"{deployment_stage}stack"
if os.environ.get("REMOTE_DEV_PREFIX") is not None:
stack_name = os.environ.get("REMOTE_DEV_PREFIX").replace("/", "")
function_name = f"dp-{deployment_stage}-{stack_name}-dataset-version-cleanup"

Check warning on line 34 in backend/layers/thirdparty/lambda_provider.py

View check run for this annotation

Codecov / codecov/patch

backend/layers/thirdparty/lambda_provider.py#L30-L34

Added lines #L30 - L34 were not covered by tests

lambda_payload = {

Check warning on line 36 in backend/layers/thirdparty/lambda_provider.py

View check run for this annotation

Codecov / codecov/patch

backend/layers/thirdparty/lambda_provider.py#L36

Added line #L36 was not covered by tests
"dataset_version_ids": [str(version_id) for version_id in dataset_version_ids],
}
response: AsyncLambdaInvocationResponse = self.client.invoke(

Check warning on line 39 in backend/layers/thirdparty/lambda_provider.py

View check run for this annotation

Codecov / codecov/patch

backend/layers/thirdparty/lambda_provider.py#L39

Added line #L39 was not covered by tests
FunctionName=function_name, InvocationType="Event", Payload=json.dumps(lambda_payload)
)
return response

Check warning on line 42 in backend/layers/thirdparty/lambda_provider.py

View check run for this annotation

Codecov / codecov/patch

backend/layers/thirdparty/lambda_provider.py#L42

Added line #L42 was not covered by tests
2 changes: 1 addition & 1 deletion backend/portal/api/enrichment.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from backend.layers.common.entities import DatasetVersion

ONTOLOGY_PARSER = OntologyParser()
ONTOLOGY_PARSER = OntologyParser(schema_version="v5.1.0")

ACCEPTED_TISSUE_ANCESTORS = {
term
Expand Down
33 changes: 33 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,39 @@ services:
corporanet:
aliases:
- uploadsuccess.corporanet.local
dataset_version_cleanup:
image: "${DOCKER_REPO}corpora-dataset-version-cleanup"
platform: linux/amd64
profiles:
- dataset_version_cleanup
build:
context: .
# cache_from:
# - "${DOCKER_REPO}corpora-dataset-version-cleanup:branch-main"
dockerfile: Dockerfile.dataset_version_cleanup
args:
- BUILDKIT_INLINE_CACHE=1
- HAPPY_COMMIT=$HAPPY_COMMIT
- HAPPY_BRANCH=$HAPPY_BRANCH
- HAPPY_TAG
restart: "no"
ports:
- "9003:8080"
environment:
- PYTHONUNBUFFERED=1
- CORPORA_LOCAL_DEV=true
- AWS_REGION=us-west-2
- AWS_DEFAULT_REGION=us-west-2
- AWS_ACCESS_KEY_ID=test
- AWS_SECRET_ACCESS_KEY=test
- BOTO_ENDPOINT_URL=http://localstack:4566
- DEPLOYMENT_STAGE=test
- ARTIFACT_BUCKET=artifact-bucket
- CELLXGENE_BUCKET=cellxgene-bucket
networks:
corporanet:
aliases:
- datasetversioncleanup.corporanet.local
dataset_submissions:
image: "${DOCKER_REPO}dataset-submissions"
platform: linux/amd64
Expand Down
7 changes: 7 additions & 0 deletions python_dependencies/dataset_version_cleanup/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
boto3
dataclasses-json
psycopg2-binary==2.*
python-json-logger
requests
SQLAlchemy==2.*
tenacity
2 changes: 1 addition & 1 deletion python_dependencies/processing/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ anndata==0.10.8
awscli
boto3>=1.11.17
botocore>=1.14.17
cellxgene-schema
git+https://github.com/chanzuckerberg/single-cell-curation/@lvreynoso/temp-migration-test#subdirectory=cellxgene_schema_cli
dataclasses-json
ddtrace==2.1.4
numba==0.59.1
Expand Down
Loading
Loading