Skip to content

Commit

Permalink
Remove resources required for Learning to Rank
Browse files Browse the repository at this point in the history
Learning to Rank is no longer used in search, so we don't need to train or host models any more.

This won't remove any of the actual Sagemaker resources, as this was done in [the deploy pipeline](https://github.com/alphagov/search-api/blob/f037f11c6f0299b8bd4d065a1d947b8f827bbc0d/ltr/sagemaker/deploy.py). We'll remove these separately.
  • Loading branch information
sihugh committed May 16, 2024
1 parent 98085d7 commit d854f7a
Show file tree
Hide file tree
Showing 3 changed files with 0 additions and 283 deletions.
27 changes: 0 additions & 27 deletions terraform/projects/app-search/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,34 +26,11 @@ Search application servers

| Name | Type |
|------|------|
| [aws_autoscaling_group.learntorank-generation](https://registry.terraform.io/providers/hashicorp/aws/2.46.0/docs/resources/autoscaling_group) | resource |
| [aws_ecr_repository.repo](https://registry.terraform.io/providers/hashicorp/aws/2.46.0/docs/resources/ecr_repository) | resource |
| [aws_ecr_repository_policy.policy](https://registry.terraform.io/providers/hashicorp/aws/2.46.0/docs/resources/ecr_repository_policy) | resource |
| [aws_iam_instance_profile.learntorank-generation](https://registry.terraform.io/providers/hashicorp/aws/2.46.0/docs/resources/iam_instance_profile) | resource |
| [aws_iam_policy.scale-learntorank-generation-asg-policy](https://registry.terraform.io/providers/hashicorp/aws/2.46.0/docs/resources/iam_policy) | resource |
| [aws_iam_policy.search_relevancy_bucket_access](https://registry.terraform.io/providers/hashicorp/aws/2.46.0/docs/resources/iam_policy) | resource |
| [aws_iam_policy.sitemaps_bucket_access](https://registry.terraform.io/providers/hashicorp/aws/2.46.0/docs/resources/iam_policy) | resource |
| [aws_iam_policy.use_sagemaker](https://registry.terraform.io/providers/hashicorp/aws/2.46.0/docs/resources/iam_policy) | resource |
| [aws_iam_role.learntorank](https://registry.terraform.io/providers/hashicorp/aws/2.46.0/docs/resources/iam_role) | resource |
| [aws_iam_role_policy_attachment.learntorank-bucket](https://registry.terraform.io/providers/hashicorp/aws/2.46.0/docs/resources/iam_role_policy_attachment) | resource |
| [aws_iam_role_policy_attachment.learntorank-ecr](https://registry.terraform.io/providers/hashicorp/aws/2.46.0/docs/resources/iam_role_policy_attachment) | resource |
| [aws_iam_role_policy_attachment.learntorank-sagemaker](https://registry.terraform.io/providers/hashicorp/aws/2.46.0/docs/resources/iam_role_policy_attachment) | resource |
| [aws_iam_role_policy_attachment.scale-learntorank-generation](https://registry.terraform.io/providers/hashicorp/aws/2.46.0/docs/resources/iam_role_policy_attachment) | resource |
| [aws_iam_role_policy_attachment.search_relevancy_bucket_access_iam_role_policy_attachment](https://registry.terraform.io/providers/hashicorp/aws/2.46.0/docs/resources/iam_role_policy_attachment) | resource |
| [aws_iam_role_policy_attachment.sitemaps_bucket_access_iam_role_policy_attachment](https://registry.terraform.io/providers/hashicorp/aws/2.46.0/docs/resources/iam_role_policy_attachment) | resource |
| [aws_iam_role_policy_attachment.use_sagemaker](https://registry.terraform.io/providers/hashicorp/aws/2.46.0/docs/resources/iam_role_policy_attachment) | resource |
| [aws_key_pair.learntorank-generation-key](https://registry.terraform.io/providers/hashicorp/aws/2.46.0/docs/resources/key_pair) | resource |
| [aws_launch_template.learntorank-generation](https://registry.terraform.io/providers/hashicorp/aws/2.46.0/docs/resources/launch_template) | resource |
| [aws_s3_bucket.search_relevancy_bucket](https://registry.terraform.io/providers/hashicorp/aws/2.46.0/docs/resources/s3_bucket) | resource |
| [aws_s3_bucket.sitemaps_bucket](https://registry.terraform.io/providers/hashicorp/aws/2.46.0/docs/resources/s3_bucket) | resource |
| [null_resource.user_data](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource |
| [aws_ami.ubuntu_focal](https://registry.terraform.io/providers/hashicorp/aws/2.46.0/docs/data-sources/ami) | data source |
| [aws_iam_policy_document.ecr-usage](https://registry.terraform.io/providers/hashicorp/aws/2.46.0/docs/data-sources/iam_policy_document) | data source |
| [aws_iam_policy_document.learntorank-assume-role](https://registry.terraform.io/providers/hashicorp/aws/2.46.0/docs/data-sources/iam_policy_document) | data source |
| [aws_iam_policy_document.scale-learntorank-generation-asg](https://registry.terraform.io/providers/hashicorp/aws/2.46.0/docs/data-sources/iam_policy_document) | data source |
| [aws_iam_policy_document.search_relevancy_bucket_policy](https://registry.terraform.io/providers/hashicorp/aws/2.46.0/docs/data-sources/iam_policy_document) | data source |
| [aws_iam_policy_document.sitemaps_bucket_policy](https://registry.terraform.io/providers/hashicorp/aws/2.46.0/docs/data-sources/iam_policy_document) | data source |
| [aws_iam_policy_document.use_sagemaker](https://registry.terraform.io/providers/hashicorp/aws/2.46.0/docs/data-sources/iam_policy_document) | data source |
| [terraform_remote_state.infra_monitoring](https://registry.terraform.io/providers/hashicorp/terraform/latest/docs/data-sources/remote_state) | data source |
| [terraform_remote_state.infra_networking](https://registry.terraform.io/providers/hashicorp/terraform/latest/docs/data-sources/remote_state) | data source |
| [terraform_remote_state.infra_root_dns_zones](https://registry.terraform.io/providers/hashicorp/terraform/latest/docs/data-sources/remote_state) | data source |
Expand Down Expand Up @@ -87,8 +64,4 @@ Search application servers

| Name | Description |
|------|-------------|
| <a name="output_ecr_repository_url"></a> [ecr\_repository\_url](#output\_ecr\_repository\_url) | URL of the ECR repository |
| <a name="output_ltr_role_arn"></a> [ltr\_role\_arn](#output\_ltr\_role\_arn) | LTR role ARN |
| <a name="output_scale_learntorank_asg_policy_arn"></a> [scale\_learntorank\_asg\_policy\_arn](#output\_scale\_learntorank\_asg\_policy\_arn) | ARN of the policy used by to scale the ASG for learn to rank |
| <a name="output_search_relevancy_s3_policy_arn"></a> [search\_relevancy\_s3\_policy\_arn](#output\_search\_relevancy\_s3\_policy\_arn) | ARN of the policy used to access the search-relevancy S3 bucket |
| <a name="output_sitemaps_s3_policy_arn"></a> [sitemaps\_s3\_policy\_arn](#output\_sitemaps\_s3\_policy\_arn) | ARN of the policy used to access the sitemaps S3 bucket |
236 changes: 0 additions & 236 deletions terraform/projects/app-search/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -89,239 +89,3 @@ data "aws_iam_policy_document" "sitemaps_bucket_policy" {
]
}
}

resource "aws_iam_role_policy_attachment" "use_sagemaker" {
role = module.search.instance_iam_role_name
policy_arn = aws_iam_policy.use_sagemaker.arn
}

resource "aws_iam_policy" "use_sagemaker" {
name = "govuk-${var.aws_environment}-search-use-sagemaker-policy"
policy = data.aws_iam_policy_document.use_sagemaker.json
description = "Allows invoking and describing SageMaker endpoints"
}

data "aws_iam_policy_document" "use_sagemaker" {
statement {
sid = "InvokeSagemaker"
actions = [
"sagemaker:DescribeEndpoint",
"sagemaker:InvokeEndpoint",
]
resources = ["arn:aws:sagemaker:*"]
}
}

resource "aws_s3_bucket" "search_relevancy_bucket" {
bucket = "govuk-${var.aws_environment}-search-relevancy"
region = var.aws_region

tags = {
Name = "govuk-${var.aws_environment}-search-relevancy"
Description = "S3 bucket for Search Relevancy"
aws_environment = var.aws_environment
}

logging {
target_bucket = data.terraform_remote_state.infra_monitoring.outputs.aws_logging_bucket_id
target_prefix = "s3/govuk-${var.aws_environment}-search-relevancy/"
}

lifecycle_rule {
id = "expire_training_data"
prefix = "data/"
enabled = true
expiration { days = 7 }
}

lifecycle_rule {
id = "expire_models"
prefix = "model/"
enabled = true
expiration { days = 7 }
}
}

resource "aws_iam_role_policy_attachment" "search_relevancy_bucket_access_iam_role_policy_attachment" {
role = module.search.instance_iam_role_name
policy_arn = aws_iam_policy.search_relevancy_bucket_access.arn
}

resource "aws_iam_policy" "search_relevancy_bucket_access" {
name = "govuk-${var.aws_environment}-search-relevancy-bucket-access-policy"
policy = data.aws_iam_policy_document.search_relevancy_bucket_policy.json
description = "Allows reading and writing of the search relevancy bucket"
}

data "aws_iam_policy_document" "search_relevancy_bucket_policy" {
statement {
sid = "ReadListOfBuckets"
actions = ["s3:ListAllMyBuckets"]
resources = ["*"]
}

statement {
sid = "SearchRelevancyAccess"
actions = [
"s3:DeleteObject",
"s3:Put*",
"s3:Get*",
"s3:List*",
]
resources = [
"arn:aws:s3:::${aws_s3_bucket.search_relevancy_bucket.id}",
"arn:aws:s3:::${aws_s3_bucket.search_relevancy_bucket.id}/*",
]
}
}

# Daily learn-to-rank

resource "aws_iam_role" "learntorank" {
name = "govuk-${var.aws_environment}-search-learntorank-role"
assume_role_policy = data.aws_iam_policy_document.learntorank-assume-role.json
}

data "aws_iam_policy_document" "learntorank-assume-role" {
statement {
actions = ["sts:AssumeRole"]
principals {
type = "Service"
identifiers = ["sagemaker.amazonaws.com"]
}
principals {
type = "Service"
identifiers = ["ec2.amazonaws.com"]
}
}
}

resource "aws_iam_role_policy_attachment" "learntorank-bucket" {
role = aws_iam_role.learntorank.name
policy_arn = aws_iam_policy.search_relevancy_bucket_access.arn
}

# this grants much broader permissions than we need, so we might want
# to narrow this down in the future.
resource "aws_iam_role_policy_attachment" "learntorank-sagemaker" {
role = aws_iam_role.learntorank.name
policy_arn = "arn:aws:iam::aws:policy/AmazonSageMakerFullAccess"
}

resource "aws_iam_role_policy_attachment" "learntorank-ecr" {
role = aws_iam_role.learntorank.name
policy_arn = "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryPowerUser"
}

resource "aws_ecr_repository" "repo" {
name = "search"
image_tag_mutability = "MUTABLE"
}

resource "aws_ecr_repository_policy" "policy" {
repository = aws_ecr_repository.repo.name
policy = data.aws_iam_policy_document.ecr-usage.json
}

data "aws_iam_policy_document" "ecr-usage" {
statement {
sid = "read"
actions = [
"ecr:BatchCheckLayerAvailability",
"ecr:BatchGetImage",
"ecr:DescribeRepositories",
"ecr:GetDownloadUrlForLayer",
"ecr:GetRepositoryPolicy",
"ecr:ListImages",
]
principals {
type = "AWS"
identifiers = [aws_iam_role.learntorank.arn]
}
principals {
type = "Service"
identifiers = ["sagemaker.amazonaws.com"]
}
}
}

resource "aws_iam_instance_profile" "learntorank-generation" {
name = "govuk-${var.aws_environment}-search-ltr-generation"
role = aws_iam_role.learntorank.name
}

resource "aws_key_pair" "learntorank-generation-key" {
key_name = "govuk-${var.aws_environment}-search-ltr-generation-key"
public_key = "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDGNF9QVcjq9LUioccV8Fw161BrM3EHPRtwzp+2IRaYrgpwgBoIxgK2q1LLrLSwcoLfbDyU3dW0cMN0wpbxFzHTFuDqzm5fdAzhijZJCKMyPtMSPOhUev+/JfHVZj7JGXHV2SOMM1Q1XkEBwgenPmR2Hz6fMs3+R/LdeNkMTU1H/fOXl6WU9DY1XAUdYzfufRXiDt2aCGCOknAWqOAdT+22FZcmgc657tt9xbOJYzVoEAqBArCxixpf5N7Tha0QUac8QGQQxw01LENHRN1S4NLtvUEBqI3m99f8NleOlO4eD7XBkcwPXMrFP7/4IqAPq+JgoD2OrDSX3HiE8HNtJTLr0vmP5plBiwH3Bd+32oILQiw4HqXt8JpTfr/fAJXlsHCmYkxlEzhhZ46H1VZsgU9BM69C/bWTvGWCFAYrWbu2vt9Gbo1nbZVTQjLBfKgY3vxk5Tmj4b43AGI1tprPdBh43IdQvvYu9oiTodzxetaQoK8fUMKPVoQruPJNfKcu3Yukm8DvVmwQqoAgik5iYk7up9gX1L//L0dJIpjWSlU5ytpmG+M5k+Abbg+kkIjnCXXkS2Icwnh3BEIvxLIt9MaMf89Lxi4Jin1uNu727Z9cXGRp8Fyz5GdDEKz37P5k7jFEV70KYLwl3r7qxp66RafXgRx/fRRVHdTNf43O6UqDUQ== concourse-worker"
}

data "aws_ami" "ubuntu_focal" {
most_recent = true
owners = ["099720109477", "696911096973"] # Canonical
filter {
name = "name"
values = ["ubuntu/images/hvm-ssd/ubuntu-focal-20.04-amd64-server-*"]
}
}

resource "aws_launch_template" "learntorank-generation" {
name = "govuk-${var.aws_environment}-search-ltr-generation"
image_id = data.aws_ami.ubuntu_focal.id
instance_type = "c5.large"
vpc_security_group_ids = [data.terraform_remote_state.infra_security_groups.outputs.sg_search-ltr-generation_id]
key_name = aws_key_pair.learntorank-generation-key.key_name

iam_instance_profile { name = aws_iam_instance_profile.learntorank-generation.name }
lifecycle { create_before_destroy = true }
instance_initiated_shutdown_behavior = "terminate"

block_device_mappings {
device_name = "/dev/sda1"
ebs { volume_size = 32 }
}
}

resource "aws_autoscaling_group" "learntorank-generation" {
name = "govuk-${var.aws_environment}-search-ltr-generation"
min_size = 0
max_size = 1
desired_capacity = 0

launch_template {
id = aws_launch_template.learntorank-generation.id
version = "$Latest"
}

vpc_zone_identifier = data.terraform_remote_state.infra_networking.outputs.public_subnet_ids

tag {
key = "Name"
value = "govuk-${var.aws_environment}-search-ltr-generation"
propagate_at_launch = true
}
}

data "aws_iam_policy_document" "scale-learntorank-generation-asg" {
statement {
actions = [
"autoscaling:DescribeAutoScalingGroups",
"ec2:DescribeInstances",
"ec2:DescribeInstanceStatus",
]
resources = ["*"]
}
statement {
actions = ["autoscaling:SetDesiredCapacity"]
resources = [aws_autoscaling_group.learntorank-generation.arn]
}
}

resource "aws_iam_policy" "scale-learntorank-generation-asg-policy" {
name = "govuk-${var.aws_environment}-scale-search-ltr-generation-asg"
policy = data.aws_iam_policy_document.scale-learntorank-generation-asg.json
}

resource "aws_iam_role_policy_attachment" "scale-learntorank-generation" {
role = aws_iam_role.learntorank.name
policy_arn = aws_iam_policy.scale-learntorank-generation-asg-policy.arn
}
20 changes: 0 additions & 20 deletions terraform/projects/app-search/outputs.tf
Original file line number Diff line number Diff line change
@@ -1,23 +1,3 @@
output "scale_learntorank_asg_policy_arn" {
value = aws_iam_policy.scale-learntorank-generation-asg-policy.arn
description = "ARN of the policy used by to scale the ASG for learn to rank"
}

output "ltr_role_arn" {
value = aws_iam_role.learntorank.arn
description = "LTR role ARN"
}

output "ecr_repository_url" {
value = aws_ecr_repository.repo.repository_url
description = "URL of the ECR repository"
}

output "search_relevancy_s3_policy_arn" {
value = aws_iam_policy.search_relevancy_bucket_access.arn
description = "ARN of the policy used to access the search-relevancy S3 bucket"
}

output "sitemaps_s3_policy_arn" {
value = aws_iam_policy.sitemaps_bucket_access.arn
description = "ARN of the policy used to access the sitemaps S3 bucket"
Expand Down

0 comments on commit d854f7a

Please sign in to comment.