From 8aaad0c5e006405ea47f1f4593b3478de324b96f Mon Sep 17 00:00:00 2001 From: mmoutama09 <103177006+mmoutama09@users.noreply.github.com> Date: Sat, 3 Aug 2024 10:44:49 +0200 Subject: [PATCH] feat: add docker autoscaler executor (#1118) ## Description Provides a new executor using the new GitLab autoscaler executor. I've been using the fleeting plugin for AWS only. Prerequisite: Docker must already be installed on the AMI used by worker machines (the Docker autoscaler does not install it, unlike the Docker machine). Additionally, the user used to connect to the workers must also be added to the Docker group. Related to issue https://github.com/cattle-ops/terraform-aws-gitlab-runner/issues/624 ## Verification Built an AMI with Docker based on Amazon Linux 2023. Set up the new executor according to the example. Works! --------- Co-authored-by: Matthias Kay --- .cspell.json | 8 + .github/workflows/ci.yml | 2 +- data.tf | 16 ++ docker_autoscaler.tf | 198 ++++++++++++++++++ docs/usage.md | 73 +++++++ examples/README.md | 6 +- examples/runner-certificates/README.md | 2 +- examples/runner-default/generated/.gitkeep | 0 .../.terraform-version | 0 examples/runner-fleeting-plugin/README.md | 27 +++ examples/runner-fleeting-plugin/main.tf | 135 ++++++++++++ .../providers.tf | 0 .../variables.tf | 6 +- .../versions.tf | 0 .../runner-pre-registered/.terraform.lock.hcl | 177 ---------------- examples/runner-pre-registered/README.md | 62 ------ .../runner-pre-registered/generated/.gitkeep | 0 examples/runner-pre-registered/main.tf | 71 ------- main.tf | 195 ++++++++++++----- outputs.tf | 4 +- packer_images/README.md | 22 ++ packer_images/amz-linux-docker.json | 47 +++++ packer_images/ubuntu-docker.json | 49 +++++ .../instance-docker-autoscaler-policy.json | 34 +++ security_groups.tf | 2 +- template/gitlab-runner.tftpl | 34 +++ template/runner-config.tftpl | 41 +--- .../runner-docker-autoscaler-config.tftpl | 22 ++ template/runner-docker-machine-config.tftpl | 37 ++++ variables.tf | 142 ++++++++++++- 30 files changed, 1002 insertions(+), 410 deletions(-) create mode 100644 docker_autoscaler.tf delete mode 100644 examples/runner-default/generated/.gitkeep rename examples/{runner-pre-registered => runner-fleeting-plugin}/.terraform-version (100%) create mode 100644 examples/runner-fleeting-plugin/README.md create mode 100644 examples/runner-fleeting-plugin/main.tf rename examples/{runner-pre-registered => runner-fleeting-plugin}/providers.tf (100%) rename examples/{runner-pre-registered => runner-fleeting-plugin}/variables.tf (81%) rename examples/{runner-pre-registered => runner-fleeting-plugin}/versions.tf (100%) delete mode 100644 examples/runner-pre-registered/.terraform.lock.hcl delete mode 100644 examples/runner-pre-registered/README.md delete mode 100644 examples/runner-pre-registered/generated/.gitkeep delete mode 100644 examples/runner-pre-registered/main.tf create mode 100644 packer_images/README.md create mode 100644 packer_images/amz-linux-docker.json create mode 100644 packer_images/ubuntu-docker.json create mode 100644 policies/instance-docker-autoscaler-policy.json create mode 100644 template/runner-docker-autoscaler-config.tftpl create mode 100644 template/runner-docker-machine-config.tftpl diff --git a/.cspell.json b/.cspell.json index b071e186b..53437c4af 100644 --- a/.cspell.json +++ b/.cspell.json @@ -10,11 +10,14 @@ "awscli", "boto", "botocore", + "buildx", "certdir", "checkmarx", "concat", + "containerd", "devskim", "dind", + "dpkg", "endfor", "filesha", "formatlist", @@ -22,6 +25,7 @@ "glrunners", "hmarr", "instancelifecycle", + "keyrings", "kics", "joho", "jsonencode", @@ -57,6 +61,7 @@ "tonumber", "trimprefix", "trivy", + "usermod", "userns", "xanzy", "xvda" @@ -64,8 +69,10 @@ "words": [ "aquasecurity", "automerge", + "autoscaler", "backports", "blockquotes", + "bluegreen", "codeowners", "cpu", "cpus", @@ -73,6 +80,7 @@ "gitter", "Niek", "oxsecurity", + "rebalance", "signoff", "typecheck", "userdata", diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d7454dfa4..b36d1269b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -39,7 +39,7 @@ jobs: [ "runner-default", "runner-docker", - "runner-pre-registered", + "runner-fleeting-plugin", "runner-public", "runner-certificates", ] diff --git a/data.tf b/data.tf index 25b5e4661..d43a06b5a 100644 --- a/data.tf +++ b/data.tf @@ -41,3 +41,19 @@ data "aws_ami" "docker-machine" { owners = var.runner_worker_docker_machine_ami_owners } + +data "aws_ami" "docker-autoscaler" { + count = var.runner_worker.type == "docker-autoscaler" ? 1 : 0 + + most_recent = "true" + + dynamic "filter" { + for_each = var.runner_worker_docker_autoscaler_ami_filter + content { + name = filter.key + values = filter.value + } + } + + owners = var.runner_worker_docker_autoscaler_ami_owners +} diff --git a/docker_autoscaler.tf b/docker_autoscaler.tf new file mode 100644 index 000000000..f6b3bf738 --- /dev/null +++ b/docker_autoscaler.tf @@ -0,0 +1,198 @@ +# +# This file is responsible for creating the resources needed to run the docker autoscaler plugin from GitLab. It replaces the +# outdated docker+machine driver. The docker+machine driver is a legacy driver that is no longer maintained by GitLab. +# + +resource "aws_security_group" "docker_autoscaler" { + count = var.runner_worker.type == "docker-autoscaler" ? 1 : 0 + + description = "Docker autoscaler security group" + vpc_id = var.vpc_id + name = "${local.name_sg}-docker-autoscaler" + + tags = merge( + local.tags, + { + "Name" = format("%s", local.name_sg) + }, + ) +} + +resource "aws_security_group_rule" "autoscaler_egress" { + count = var.runner_worker.type == "docker-autoscaler" ? 1 : 0 + + description = "All egress traffic docker autoscaler" + type = "egress" + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + security_group_id = join("", aws_security_group.docker_autoscaler[*].id) +} + +resource "aws_security_group_rule" "autoscaler_ingress" { + count = var.runner_worker.type == "docker-autoscaler" ? 1 : 0 + + description = "All ingress traffic from runner security group" + type = "ingress" + from_port = 0 + to_port = 0 + protocol = "-1" + source_security_group_id = aws_security_group.runner.id + security_group_id = join("", aws_security_group.docker_autoscaler[*].id) +} + +resource "aws_security_group_rule" "extra_autoscaler_ingress" { + count = var.runner_worker.type == "docker-autoscaler" ? length(var.runner_worker_docker_autoscaler_asg.sg_ingresses) : 0 + + description = var.runner_worker_docker_autoscaler_asg.sg_ingresses[count.index].description + type = "ingress" + from_port = var.runner_worker_docker_autoscaler_asg.sg_ingresses[count.index].from_port + to_port = var.runner_worker_docker_autoscaler_asg.sg_ingresses[count.index].to_port + protocol = var.runner_worker_docker_autoscaler_asg.sg_ingresses[count.index].protocol + cidr_blocks = var.runner_worker_docker_autoscaler_asg.sg_ingresses[count.index].cidr_blocks + security_group_id = join("", aws_security_group.docker_autoscaler[*].id) +} + +#################################### +###### Launch template Workers ##### +#################################### +resource "aws_launch_template" "this" { + count = var.runner_worker.type == "docker-autoscaler" ? 1 : 0 + + name = "${local.name_runner_agent_instance}-worker-launch-template" + user_data = base64gzip(var.runner_worker_docker_autoscaler_instance.start_script) + image_id = data.aws_ami.docker-autoscaler[0].id + instance_type = var.runner_worker_docker_autoscaler_asg.types[0] + key_name = aws_key_pair.autoscaler[0].key_name + ebs_optimized = var.runner_worker_docker_autoscaler_instance.ebs_optimized + + monitoring { + enabled = var.runner_worker_docker_autoscaler_instance.monitoring + } + + iam_instance_profile { + name = aws_iam_instance_profile.docker_autoscaler[0].name + } + + network_interfaces { + security_groups = [aws_security_group.docker_autoscaler[0].id] + associate_public_ip_address = !var.runner_worker_docker_autoscaler_instance.private_address_only + } + + block_device_mappings { + device_name = var.runner_worker_docker_autoscaler_instance.root_device_name + + ebs { + volume_size = var.runner_worker_docker_autoscaler_instance.root_size + volume_type = var.runner_worker_docker_autoscaler_instance.volume_type + iops = contains(["gp3", "io1", "io2"], var.runner_worker_docker_autoscaler_instance.volume_type) ? var.runner_worker_docker_autoscaler_instance.volume_iops : null + throughput = var.runner_worker_docker_autoscaler_instance.volume_type == "gp3" ? var.runner_worker_docker_autoscaler_instance.volume_throughput : null + } + } + + tag_specifications { + resource_type = "instance" + tags = local.tags + } + + tag_specifications { + resource_type = "volume" + tags = local.tags + } + + tags = local.tags + + metadata_options { + http_tokens = var.runner_worker_docker_autoscaler_instance.http_tokens + http_put_response_hop_limit = var.runner_worker_docker_autoscaler_instance.http_put_response_hop_limit + instance_metadata_tags = "enabled" + } + + lifecycle { + create_before_destroy = true + } +} + +######################################### +# Autoscaling group with launch template +######################################### +# false positive, tags are created with "dynamic" block +# kics-scan ignore-line +resource "aws_autoscaling_group" "autoscaler" { + count = var.runner_worker.type == "docker-autoscaler" ? 1 : 0 + + name = "${local.name_runner_agent_instance}-asg" + capacity_rebalance = false + protect_from_scale_in = true + + dynamic "launch_template" { + for_each = var.runner_worker_docker_autoscaler_asg.enable_mixed_instances_policy ? [] : [1] + content { + id = aws_launch_template.this[0].id + version = aws_launch_template.this[0].latest_version + } + } + + dynamic "mixed_instances_policy" { + for_each = var.runner_worker_docker_autoscaler_asg.enable_mixed_instances_policy ? [1] : [] + + content { + instances_distribution { + on_demand_base_capacity = var.runner_worker_docker_autoscaler_asg.on_demand_base_capacity + on_demand_percentage_above_base_capacity = var.runner_worker_docker_autoscaler_asg.on_demand_percentage_above_base_capacity + spot_allocation_strategy = var.runner_worker_docker_autoscaler_asg.spot_allocation_strategy + spot_instance_pools = var.runner_worker_docker_autoscaler_asg.spot_instance_pools + } + launch_template { + launch_template_specification { + launch_template_id = aws_launch_template.this[0].id + version = aws_launch_template.this[0].latest_version + } + dynamic "override" { + for_each = var.runner_worker_docker_autoscaler_asg.types + content { + instance_type = override.value + } + } + } + } + } + + dynamic "instance_refresh" { + for_each = var.runner_worker_docker_autoscaler_asg.upgrade_strategy == "rolling" ? [1] : [] + content { + strategy = "Rolling" + preferences { + min_healthy_percentage = var.runner_worker_docker_autoscaler_asg.instance_refresh_min_healthy_percentage + } + triggers = var.runner_worker_docker_autoscaler_asg.instance_refresh_triggers + } + } + + vpc_zone_identifier = var.runner_worker_docker_autoscaler_asg.subnet_ids + max_size = var.runner_worker.max_jobs + min_size = 0 + desired_capacity = 0 # managed by the fleeting plugin + health_check_grace_period = var.runner_worker_docker_autoscaler_asg.health_check_grace_period + health_check_type = var.runner_worker_docker_autoscaler_asg.health_check_type + force_delete = true + + dynamic "tag" { + for_each = local.tags + content { + key = tag.key + value = tag.value + propagate_at_launch = true + } + } + + lifecycle { + # do not change these values as we would immediately scale up/down, which is not wanted + ignore_changes = [ + desired_capacity, + min_size, + max_size + ] + } +} diff --git a/docs/usage.md b/docs/usage.md index 67f3f9923..645164a0a 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -144,6 +144,79 @@ module "runner" { } ``` +### Scenario: Use of Docker autoscaler + +As docker machine is no longer maintained by docker, gitlab recently developed docker autoscaler to replace docker machine (still in beta). An option is available to test it out. + +Tested with amazon-linux-2-x86 as runner manager and ubuntu-server-22-lts-x86 for runner worker. The following commands have been added to the original AMI for the runner worker for the docker-autoscaler to work correctly: + +```bash +# Install docker +# Add Docker's official GPG key: +apt-get update +apt-get install -y ca-certificates curl +install -m 0755 -d /etc/apt/keyrings +curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc +chmod a+r /etc/apt/keyrings/docker.asc + +# Add the repository to Apt sources: +echo \ + "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \ + $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \ + tee /etc/apt/sources.list.d/docker.list > /dev/null +apt-get update + +apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin +usermod -aG docker ubuntu +``` + +#### Configuration example + +```hcl +module "runner" { + # https://registry.terraform.io/modules/cattle-ops/gitlab-runner/aws/ + source = "cattle-ops/gitlab-runner/aws" + + vpc_id = module.vpc.vpc_id + subnet_id = element(module.vpc.private_subnets, 0) + + runner_gitlab = { + tag_list = "runner_worker" + type = "instance" + url = "https://gitlab.com" + + preregistered_runner_token_ssm_parameter_name = "my-gitlab-runner-token-ssm-parameter-name" + } + + runner_manager = { + maximum_concurrent_jobs = 5 + } + + runner_worker = { + max_jobs = 5 + request_concurrency = 5 + type = "docker_autoscaler" + } + + runner_worker_docker_autoscaler_asg = { + on_demand_percentage_above_base_capacity = 0 + enable_mixed_instances_policy = true + idle_time = 600 + subnet_ids = vpc.private_subnets_ids + types = ["t3a.medium", "t3.medium"] + volume_type = "gp3" + private_address_only = true + ebs_optimized = true + root_size = 40 + } + + runner_worker_docker_autoscaler = { + connector_config_user = "ubuntu" + } + +} +``` + ## Examples A few [examples](https://github.com/cattle-ops/terraform-aws-gitlab-runner/tree/main/examples/) are provided. Use the diff --git a/examples/README.md b/examples/README.md index 62bb22fd9..7397f7889 100644 --- a/examples/README.md +++ b/examples/README.md @@ -2,7 +2,9 @@ Please see the readme in per example for more details: +- [runner-certificates](runner-certificates/) Example showing how to add custom TLS certificates to the runner - [runner-default](runner-default/) The default setup, private subnet, auto register, runner on spot instances. -- [runner-public](runner-public/) Runner in a public subnet, auto register, runner on spot instances. - [runner-docker](runner-docker/) Runners run on the same instance as the agent. -- [runner-pre-registered](runner-pre-registered) Runner needs to be preregistered, old setup DEPRECATED. +- [runner-fleeting](runner-fleeting-plugin/) Runners using the AWS fleeting plugin +- [runner-public](runner-public/) Runner in a public subnet, auto register, runner on spot instances. + diff --git a/examples/runner-certificates/README.md b/examples/runner-certificates/README.md index 1d67d74ec..590e2a734 100644 --- a/examples/runner-certificates/README.md +++ b/examples/runner-certificates/README.md @@ -33,7 +33,7 @@ Create a PEM-encoded `.crt` file containing the public certificate of your Gitla ```hcl module { # ... - # Public cert of my companys gitlab instance + # Public cert of my company's gitlab instance runner_gitlab = { certificate = file("${path.module}/my_gitlab_instance_cert.crt") } diff --git a/examples/runner-default/generated/.gitkeep b/examples/runner-default/generated/.gitkeep deleted file mode 100644 index e69de29bb..000000000 diff --git a/examples/runner-pre-registered/.terraform-version b/examples/runner-fleeting-plugin/.terraform-version similarity index 100% rename from examples/runner-pre-registered/.terraform-version rename to examples/runner-fleeting-plugin/.terraform-version diff --git a/examples/runner-fleeting-plugin/README.md b/examples/runner-fleeting-plugin/README.md new file mode 100644 index 000000000..8c132c2c7 --- /dev/null +++ b/examples/runner-fleeting-plugin/README.md @@ -0,0 +1,27 @@ +# Example - AWS Fleeting Plugin - Docker + +This example shows how to deploy a GitLab Runner using the [AWS Fleeting Plugin](https://docs.gitlab.com/runner/configuration/autoscale.html) +with Docker and spot instances. + +This examples shows: + +- You can log into the instance via SSM (Session Manager). +- register the Runner manually in GitLab +- Auto scaling using AWS Fleeting Plugin. + +Multi region deployment is, of course, possible. Just instantiate the module multiple times with different AWS providers. In case +you use the cache, make sure to have one cache per region. + +Attention: You have to built your own AMI before. See the [README.md](../../packer_images/README.md) for more information. + +## Prerequisite + +The Terraform version is managed using [tfenv](https://github.com/Zordrak/tfenv). If you are not using `tfenv` please +check `.terraform-version` for the tested version. + + + + + + + diff --git a/examples/runner-fleeting-plugin/main.tf b/examples/runner-fleeting-plugin/main.tf new file mode 100644 index 000000000..302f1b1c5 --- /dev/null +++ b/examples/runner-fleeting-plugin/main.tf @@ -0,0 +1,135 @@ +data "aws_availability_zones" "available" { + state = "available" +} + +data "aws_security_group" "default" { + name = "default" + vpc_id = module.vpc.vpc_id +} + +# VPC Flow logs are not needed here +# kics-scan ignore-line +module "vpc" { + source = "terraform-aws-modules/vpc/aws" + version = "5.9.0" + + name = "vpc-${var.environment}" + cidr = "10.0.0.0/16" + + azs = [data.aws_availability_zones.available.names[0]] + private_subnets = ["10.0.1.0/24"] + public_subnets = ["10.0.101.0/24"] + map_public_ip_on_launch = false + + enable_nat_gateway = true + single_nat_gateway = true + + tags = { + Environment = var.environment + } +} + +module "vpc_endpoints" { + source = "terraform-aws-modules/vpc/aws//modules/vpc-endpoints" + version = "5.9.0" + + vpc_id = module.vpc.vpc_id + + endpoints = { + s3 = { + service = "s3" + tags = { Name = "s3-vpc-endpoint" } + } + } + + tags = { + Environment = var.environment + } +} + +module "runner" { + source = "../../" + + environment = var.environment + + vpc_id = module.vpc.vpc_id + subnet_id = element(module.vpc.private_subnets, 0) + + runner_instance = { + collect_autoscaling_metrics = ["GroupDesiredCapacity", "GroupInServiceCapacity"] + name = var.runner_name + ssm_access = true + } + + runner_networking = { + allow_incoming_ping_security_group_ids = [data.aws_security_group.default.id] + } + + runner_gitlab = { + url = var.gitlab_url + + preregistered_runner_token_ssm_parameter_name = var.preregistered_runner_token_ssm_parameter_name + } + + runner_worker = { + type = "docker-autoscaler" + } + + runner_worker_gitlab_pipeline = { + pre_build_script = < - - - - -## Requirements - -| Name | Version | -|------|---------| -| [terraform](#requirement\_terraform) | >= 1.3 | -| [aws](#requirement\_aws) | 5.59.0 | -| [local](#requirement\_local) | 2.5.1 | -| [null](#requirement\_null) | 3.2.2 | -| [random](#requirement\_random) | 3.6.2 | -| [tls](#requirement\_tls) | 4.0.5 | - -## Providers - -| Name | Version | -|------|---------| -| [aws](#provider\_aws) | 5.59.0 | - -## Modules - -| Name | Source | Version | -|------|--------|---------| -| [runner](#module\_runner) | ../../ | n/a | -| [vpc](#module\_vpc) | terraform-aws-modules/vpc/aws | 5.9.0 | -| [vpc\_endpoints](#module\_vpc\_endpoints) | terraform-aws-modules/vpc/aws//modules/vpc-endpoints | 5.9.0 | - -## Resources - -| Name | Type | -|------|------| -| [aws_availability_zones.available](https://registry.terraform.io/providers/hashicorp/aws/5.59.0/docs/data-sources/availability_zones) | data source | - -## Inputs - -| Name | Description | Type | Default | Required | -|------|-------------|------|---------|:--------:| -| [aws\_region](#input\_aws\_region) | AWS region. | `string` | `"eu-west-1"` | no | -| [environment](#input\_environment) | A name that identifies the environment, will used as prefix and for tagging. | `string` | `"ci-runners"` | no | -| [gitlab\_url](#input\_gitlab\_url) | URL of the gitlab instance to connect to. | `string` | n/a | yes | -| [preregistered\_runner\_token\_ssm\_parameter\_name](#input\_preregistered\_runner\_token\_ssm\_parameter\_name) | The name of the SSM parameter to read the preregistered GitLab Runner token from. | `string` | n/a | yes | -| [runner\_name](#input\_runner\_name) | Name of the runner, will be used in the runner config.toml | `string` | n/a | yes | -| [timezone](#input\_timezone) | Timezone that will be set for the runner. | `string` | `"Europe/Amsterdam"` | no | - -## Outputs - -No outputs. - diff --git a/examples/runner-pre-registered/generated/.gitkeep b/examples/runner-pre-registered/generated/.gitkeep deleted file mode 100644 index e69de29bb..000000000 diff --git a/examples/runner-pre-registered/main.tf b/examples/runner-pre-registered/main.tf deleted file mode 100644 index a97254dc5..000000000 --- a/examples/runner-pre-registered/main.tf +++ /dev/null @@ -1,71 +0,0 @@ -data "aws_availability_zones" "available" { - state = "available" -} - -# VPC Flow logs are not needed here -# kics-scan ignore-line -module "vpc" { - source = "terraform-aws-modules/vpc/aws" - version = "5.9.0" - - name = "vpc-${var.environment}" - cidr = "10.0.0.0/16" - - azs = [data.aws_availability_zones.available.names[0]] - private_subnets = ["10.0.1.0/24"] - public_subnets = ["10.0.101.0/24"] - map_public_ip_on_launch = false - - enable_nat_gateway = true - single_nat_gateway = true - - tags = { - Environment = var.environment - } -} - -module "vpc_endpoints" { - source = "terraform-aws-modules/vpc/aws//modules/vpc-endpoints" - version = "5.9.0" - - vpc_id = module.vpc.vpc_id - - endpoints = { - s3 = { - service = "s3" - tags = { Name = "s3-vpc-endpoint" } - } - } - - tags = { - Environment = var.environment - } -} - -module "runner" { - source = "../../" - - vpc_id = module.vpc.vpc_id - subnet_id = element(module.vpc.private_subnets, 0) - environment = var.environment - - runner_instance = { - name = var.runner_name - } - - runner_gitlab = { - url = var.gitlab_url - - preregistered_runner_token_ssm_parameter_name = var.preregistered_runner_token_ssm_parameter_name - } - - # working 9 to 5 :) - runner_worker_docker_machine_autoscaling_options = [ - { - periods = ["* * 0-9,17-23 * * mon-fri *", "* * * * * sat,sun *"] - idle_count = 0 - idle_time = 60 - timezone = var.timezone - } - ] -} diff --git a/main.tf b/main.tf index 9ba847c29..c66d8f695 100644 --- a/main.tf +++ b/main.tf @@ -46,6 +46,7 @@ locals { template_gitlab_runner = templatefile("${path.module}/template/gitlab-runner.tftpl", { + aws_region = data.aws_region.current.name gitlab_runner_version = var.runner_gitlab.runner_version docker_machine_version = var.runner_install.docker_machine_version docker_machine_download_url = var.runner_install.docker_machine_download_url @@ -81,6 +82,56 @@ locals { private_key = var.runner_worker_docker_machine_fleet.enable == true ? tls_private_key.fleet[0].private_key_pem : "" use_new_runner_authentication_gitlab_16 = var.runner_gitlab_registration_config.type != "" user_data_trace_log = var.debug.trace_runner_user_data + fleeting_plugin_version = var.runner_worker_docker_autoscaler.fleeting_plugin_version + }) + + template_runner_docker_machine = templatefile("${path.module}/template/runner-docker-machine-config.tftpl", + { + runners_idle_count = var.runner_worker_docker_machine_instance.idle_count + runners_idle_time = var.runner_worker_docker_machine_instance.idle_time + runners_max_builds = local.runners_max_builds_string + docker_machine_name = format("%s-%s", local.runner_tags_merged["Name"], "%s") # %s is always needed + runners_instance_types = var.runner_worker_docker_machine_instance.types + aws_region = data.aws_region.current.name + runners_aws_zone = data.aws_availability_zone.runners.name_suffix + runners_userdata = var.runner_worker_docker_machine_instance.start_script + + runners_vpc_id = var.vpc_id + runners_subnet_id = var.subnet_id + runners_subnet_ids = length(var.runner_worker_docker_machine_instance.subnet_ids) > 0 ? var.runner_worker_docker_machine_instance.subnet_ids : [var.subnet_id] + runners_instance_profile = var.runner_worker.type == "docker+machine" ? aws_iam_instance_profile.docker_machine[0].name : "" + + runners_use_private_address_only = var.runner_worker_docker_machine_instance.private_address_only + runners_use_private_address = !var.runner_worker_docker_machine_instance.private_address_only + runners_request_spot_instance = var.runner_worker_docker_machine_instance_spot.enable + runners_spot_price_bid = var.runner_worker_docker_machine_instance_spot.max_price == "on-demand-price" || var.runner_worker_docker_machine_instance_spot.max_price == null ? "" : var.runner_worker_docker_machine_instance_spot.max_price + runners_security_group_name = var.runner_worker.type == "docker+machine" ? aws_security_group.docker_machine[0].name : "" + + runners_tags = replace(replace(local.runner_tags_string, ",,", ","), "/,$/", "") + runners_ebs_optimized = var.runner_worker_docker_machine_instance.ebs_optimized + runners_monitoring = var.runner_worker_docker_machine_instance.monitoring + runners_iam_instance_profile_name = var.runner_worker_docker_machine_role.profile_name + runners_root_size = var.runner_worker_docker_machine_instance.root_size + runners_volume_type = var.runner_worker_docker_machine_instance.volume_type + runners_ami = var.runner_worker.type == "docker+machine" ? data.aws_ami.docker-machine[0].id : "" + use_fleet = var.runner_worker_docker_machine_fleet.enable + launch_template = var.runner_worker_docker_machine_fleet.enable == true ? aws_launch_template.fleet_gitlab_runner[0].name : "" + docker_machine_options = length(local.docker_machine_options_string) == 1 ? "" : local.docker_machine_options_string + runners_max_growth_rate = var.runner_worker_docker_machine_instance.max_growth_rate + }) + + template_runner_docker_autoscaler = templatefile("${path.module}/template/runner-docker-autoscaler-config.tftpl", + { + docker_autoscaling_name = var.runner_worker.type == "docker-autoscaler" ? aws_autoscaling_group.autoscaler[0].name : "" + connector_config_user = var.runner_worker_docker_autoscaler.connector_config_user + runners_capacity_per_instance = 1 + runners_max_use_count = var.runner_worker_docker_autoscaler.max_use_count + runners_max_instances = var.runner_worker.max_jobs + runners_autoscaling = [for config in var.runner_worker_docker_autoscaler_autoscaling_options : { + for key, value in config : + # Convert key from snake_case to PascalCase which is the casing for this section. + join("", [for subkey in split("_", key) : title(subkey)]) => jsonencode(value) if value != null + }] }) template_runner_config = templatefile("${path.module}/template/runner-config.tftpl", @@ -94,55 +145,30 @@ locals { # Convert key from snake_case to PascalCase which is the casing for this section. join("", [for subkey in split("_", key) : title(subkey)]) => jsonencode(value) if value != null }] - runners_vpc_id = var.vpc_id - runners_subnet_id = var.subnet_id - runners_subnet_ids = length(var.runner_worker_docker_machine_instance.subnet_ids) > 0 ? var.runner_worker_docker_machine_instance.subnet_ids : [var.subnet_id] - runners_aws_zone = data.aws_availability_zone.runners.name_suffix - runners_instance_types = var.runner_worker_docker_machine_instance.types - runners_spot_price_bid = var.runner_worker_docker_machine_instance_spot.max_price == "on-demand-price" || var.runner_worker_docker_machine_instance_spot.max_price == null ? "" : var.runner_worker_docker_machine_instance_spot.max_price - runners_ami = var.runner_worker.type == "docker+machine" ? data.aws_ami.docker-machine[0].id : "" - runners_security_group_name = var.runner_worker.type == "docker+machine" ? aws_security_group.docker_machine[0].name : "" - runners_max_growth_rate = var.runner_worker_docker_machine_instance.max_growth_rate - runners_monitoring = var.runner_worker_docker_machine_instance.monitoring - runners_ebs_optimized = var.runner_worker_docker_machine_instance.ebs_optimized - runners_instance_profile = var.runner_worker.type == "docker+machine" ? aws_iam_instance_profile.docker_machine[0].name : "" - docker_machine_options = length(local.docker_machine_options_string) == 1 ? "" : local.docker_machine_options_string - docker_machine_name = format("%s-%s", local.runner_tags_merged["Name"], "%s") # %s is always needed - runners_name = var.runner_instance.name - runners_tags = replace(replace(local.runner_tags_string, ",,", ","), "/,$/", "") - runners_token = var.runner_gitlab.registration_token - runners_userdata = var.runner_worker_docker_machine_instance.start_script - runners_executor = var.runner_worker.type - runners_limit = var.runner_worker.max_jobs - runners_concurrent = var.runner_manager.maximum_concurrent_jobs - runners_idle_count = var.runner_worker_docker_machine_instance.idle_count - runners_idle_time = var.runner_worker_docker_machine_instance.idle_time - runners_max_builds = local.runners_max_builds_string - runners_root_device_name = var.runner_worker_docker_machine_instance.root_device_name - runners_root_size = var.runner_worker_docker_machine_instance.root_size - runners_volume_type = var.runner_worker_docker_machine_instance.volume_type - runners_iam_instance_profile_name = var.runner_worker_docker_machine_role.profile_name - runners_use_private_address_only = var.runner_worker_docker_machine_instance.private_address_only - runners_use_private_address = !var.runner_worker_docker_machine_instance.private_address_only - runners_request_spot_instance = var.runner_worker_docker_machine_instance_spot.enable - runners_environment_vars = jsonencode(var.runner_worker.environment_variables) - runners_pre_build_script = var.runner_worker_gitlab_pipeline.pre_build_script - runners_post_build_script = var.runner_worker_gitlab_pipeline.post_build_script - runners_pre_clone_script = var.runner_worker_gitlab_pipeline.pre_clone_script - runners_request_concurrency = var.runner_worker.request_concurrency - runners_output_limit = var.runner_worker.output_limit - runners_check_interval = var.runner_manager.gitlab_check_interval - runners_volumes_tmpfs = join("\n", [for v in var.runner_worker_docker_volumes_tmpfs : format("\"%s\" = \"%s\"", v.volume, v.options)]) - runners_services_volumes_tmpfs = join("\n", [for v in var.runner_worker_docker_services_volumes_tmpfs : format("\"%s\" = \"%s\"", v.volume, v.options)]) - runners_docker_services = local.runners_docker_services - runners_docker_options = local.runners_docker_options_toml - bucket_name = local.bucket_name - shared_cache = var.runner_worker_cache.shared - sentry_dsn = var.runner_manager.sentry_dsn - prometheus_listen_address = var.runner_manager.prometheus_listen_address - auth_type = var.runner_worker_cache.authentication_type - use_fleet = var.runner_worker_docker_machine_fleet.enable - launch_template = var.runner_worker_docker_machine_fleet.enable == true ? aws_launch_template.fleet_gitlab_runner[0].name : "" + + runners_name = var.runner_instance.name + runners_token = var.runner_gitlab.registration_token + runners_executor = var.runner_worker.type + runners_limit = var.runner_worker.max_jobs + runners_concurrent = var.runner_manager.maximum_concurrent_jobs + runners_environment_vars = jsonencode(var.runner_worker.environment_variables) + runners_pre_build_script = var.runner_worker_gitlab_pipeline.pre_build_script + runners_post_build_script = var.runner_worker_gitlab_pipeline.post_build_script + runners_pre_clone_script = var.runner_worker_gitlab_pipeline.pre_clone_script + runners_request_concurrency = var.runner_worker.request_concurrency + runners_output_limit = var.runner_worker.output_limit + runners_check_interval = var.runner_manager.gitlab_check_interval + runners_volumes_tmpfs = join("\n", [for v in var.runner_worker_docker_volumes_tmpfs : format("\"%s\" = \"%s\"", v.volume, v.options)]) + runners_services_volumes_tmpfs = join("\n", [for v in var.runner_worker_docker_services_volumes_tmpfs : format("\"%s\" = \"%s\"", v.volume, v.options)]) + runners_docker_services = local.runners_docker_services + runners_docker_options = local.runners_docker_options_toml + bucket_name = local.bucket_name + shared_cache = var.runner_worker_cache.shared + sentry_dsn = var.runner_manager.sentry_dsn + prometheus_listen_address = var.runner_manager.prometheus_listen_address + auth_type = var.runner_worker_cache.authentication_type + runners_docker_autoscaler = var.runner_worker.type == "docker-autoscaler" ? local.template_runner_docker_autoscaler : "" + runners_docker_machine = var.runner_worker.type == "docker+machine" ? local.template_runner_docker_machine : "" } ) } @@ -307,6 +333,13 @@ resource "tls_private_key" "fleet" { rsa_bits = 4096 } +resource "tls_private_key" "autoscaler" { + count = var.runner_worker.type == "docker-autoscaler" ? 1 : 0 + + algorithm = "RSA" + rsa_bits = 4096 +} + resource "aws_key_pair" "fleet" { count = var.runner_worker_docker_machine_fleet.enable == true && var.runner_worker.type == "docker+machine" ? 1 : 0 @@ -316,6 +349,15 @@ resource "aws_key_pair" "fleet" { tags = local.tags } +resource "aws_key_pair" "autoscaler" { + count = var.runner_worker.type == "docker-autoscaler" ? 1 : 0 + + key_name = "${var.environment}-${var.runner_worker_docker_autoscaler.key_pair_name}" + public_key = tls_private_key.autoscaler[0].public_key_openssh + + tags = local.tags +} + resource "aws_launch_template" "fleet_gitlab_runner" { # checkov:skip=CKV_AWS_88:User can decide to add a public IP. # checkov:skip=CKV_AWS_79:User can decide to enable Metadata service V2. V2 is the default. @@ -471,6 +513,24 @@ resource "aws_iam_policy" "instance_docker_machine_policy" { tags = local.tags } +resource "aws_iam_policy" "instance_docker_autoscaler_policy" { + count = var.runner_worker.type == "docker-autoscaler" && var.runner_role.create_role_profile ? 1 : 0 + + name = "${local.name_iam_objects}-docker-autoscaler" + path = "/" + description = "Policy for docker autoscaler." + # see https://gitlab.com/gitlab-org/fleeting/plugins/aws#recommended-iam-policy for needed policies + policy = templatefile("${path.module}/policies/instance-docker-autoscaler-policy.json", + { + aws_region = data.aws_region.current.name + partition = data.aws_partition.current.partition + autoscaler_asg_arn = aws_autoscaling_group.autoscaler[0].arn + autoscaler_asg_name = aws_autoscaling_group.autoscaler[0].name + }) + + tags = local.tags +} + resource "aws_iam_role_policy_attachment" "instance_docker_machine_policy" { count = var.runner_worker.type == "docker+machine" && var.runner_role.create_role_profile ? 1 : 0 @@ -478,6 +538,13 @@ resource "aws_iam_role_policy_attachment" "instance_docker_machine_policy" { policy_arn = aws_iam_policy.instance_docker_machine_policy[0].arn } +resource "aws_iam_role_policy_attachment" "instance_docker_autoscaler_policy" { + count = var.runner_worker.type == "docker-autoscaler" && var.runner_role.create_role_profile ? 1 : 0 + + role = aws_iam_role.instance[0].name + policy_arn = aws_iam_policy.instance_docker_autoscaler_policy[0].arn +} + ################################################################################ ### Policies for runner agent instance to allow connection via Session Manager ################################################################################ @@ -541,8 +608,6 @@ resource "aws_iam_role" "docker_machine" { tags = local.tags } - - resource "aws_iam_instance_profile" "docker_machine" { count = var.runner_worker.type == "docker+machine" ? 1 : 0 name = "${local.name_iam_objects}-docker-machine" @@ -550,6 +615,22 @@ resource "aws_iam_instance_profile" "docker_machine" { tags = local.tags } +resource "aws_iam_role" "docker_autoscaler" { + count = var.runner_worker.type == "docker-autoscaler" ? 1 : 0 + name = "${local.name_iam_objects}-docker-autoscaler" + assume_role_policy = length(var.runner_worker_docker_autoscaler_role.assume_role_policy_json) > 0 ? var.runner_worker_docker_autoscaler_role.assume_role_policy_json : templatefile("${path.module}/policies/instance-role-trust-policy.json", {}) + permissions_boundary = var.iam_permissions_boundary == "" ? null : "arn:${data.aws_partition.current.partition}:iam::${data.aws_caller_identity.current.account_id}:policy/${var.iam_permissions_boundary}" + + tags = local.tags +} + +resource "aws_iam_instance_profile" "docker_autoscaler" { + count = var.runner_worker.type == "docker-autoscaler" ? 1 : 0 + name = "${local.name_iam_objects}-docker-autoscaler" + role = aws_iam_role.docker_autoscaler[0].name + tags = local.tags +} + ################################################################################ ### Add user defined policies ################################################################################ @@ -560,6 +641,13 @@ resource "aws_iam_role_policy_attachment" "docker_machine_user_defined_policies" policy_arn = var.runner_worker_docker_machine_role.policy_arns[count.index] } +resource "aws_iam_role_policy_attachment" "docker_autoscaler_user_defined_policies" { + count = var.runner_worker.type == "docker-autoscaler" ? length(var.runner_worker_docker_autoscaler_role.policy_arns) : 0 + + role = aws_iam_role.docker_autoscaler[0].name + policy_arn = var.runner_worker_docker_autoscaler_role.policy_arns[count.index] +} + ################################################################################ resource "aws_iam_role_policy_attachment" "docker_machine_session_manager_aws_managed" { count = (var.runner_worker.type == "docker+machine" && var.runner_worker.ssm_access) ? 1 : 0 @@ -568,7 +656,12 @@ resource "aws_iam_role_policy_attachment" "docker_machine_session_manager_aws_ma policy_arn = "arn:${data.aws_partition.current.partition}:iam::aws:policy/AmazonSSMManagedInstanceCore" } +resource "aws_iam_role_policy_attachment" "docker_autoscaler_session_manager_aws_managed" { + count = (var.runner_worker.type == "docker-autoscaler" && var.runner_worker.ssm_access) ? 1 : 0 + role = aws_iam_role.docker_autoscaler[0].name + policy_arn = "arn:${data.aws_partition.current.partition}:iam::aws:policy/AmazonSSMManagedInstanceCore" +} ################################################################################ ### Service linked policy, optional diff --git a/outputs.tf b/outputs.tf index 2504aa0fc..c97a56b14 100644 --- a/outputs.tf +++ b/outputs.tf @@ -25,12 +25,12 @@ output "runner_agent_role_name" { output "runner_role_arn" { description = "ARN of the role used for the docker machine runners." - value = length(aws_iam_role.docker_machine) > 0 ? aws_iam_role.docker_machine[0].arn : null + value = length(aws_iam_role.docker_machine) > 0 ? aws_iam_role.docker_machine[0].arn : (length(aws_iam_role.docker_autoscaler) > 0 ? aws_iam_role.docker_autoscaler[0].arn : null) } output "runner_role_name" { description = "Name of the role used for the docker machine runners." - value = length(aws_iam_role.docker_machine) > 0 ? aws_iam_role.docker_machine[0].name : null + value = length(aws_iam_role.docker_machine) > 0 ? aws_iam_role.docker_machine[0].name : (length(aws_iam_role.docker_autoscaler) > 0 ? aws_iam_role.docker_autoscaler[0].name : null) } output "runner_agent_sg_id" { diff --git a/packer_images/README.md b/packer_images/README.md new file mode 100644 index 000000000..9d8a7df08 --- /dev/null +++ b/packer_images/README.md @@ -0,0 +1,22 @@ +To create an Amazon Machine Image (AMI) with Docker installed, you can use HashiCorp Packer. Packer is a tool for creating machine images for multiple platforms from a single source configuration. + +Here is a step-by-step guide: + +Prerequisites +Make sure Packer is installed on your local machine. You can download it from the Packer website. + +Ensure the AWS CLI is installed and configured with appropriate credentials to create and manage resources in your AWS account. + +Use the provided template amz-linux-docker.json for building an amazon linux 2023 AMI and run Packer by executing the following command in your terminal: + +``` +packer build -var 'vpc_id=your_vpc_id' -var 'subnet_id=your_subnet_id' -var 'docker_registry_mirror=docker_registry_url' amz-linux-docker.json +``` + +Use the provided template amz-linux-docker.json for building an ubuntu AMI and run Packer by executing the following command in your terminal: + +``` +packer build -var 'vpc_id=your_vpc_id' -var 'subnet_id=your_subnet_id' -var 'docker_registry_mirror=docker-registry-url' ubuntu-docker.json +``` + +The docker_registry_mirror argument is optional. diff --git a/packer_images/amz-linux-docker.json b/packer_images/amz-linux-docker.json new file mode 100644 index 000000000..7f47ee3fa --- /dev/null +++ b/packer_images/amz-linux-docker.json @@ -0,0 +1,47 @@ +{ + "variables": { + "aws_region": "eu-west-3", + "instance_type": "t3.micro", + "ssh_username": "ec2-user", + "vpc_id": "", + "subnet_id": "", + "docker_registry_mirror": "" + }, + "builders": [ + { + "type": "amazon-ebs", + "region": "{{user `aws_region`}}", + "source_ami_filter": { + "filters": { + "name": "al2023-ami-2023.*-kernel-6.1-x86_64", + "architecture": "x86_64", + "virtualization-type": "hvm", + "root-device-type": "ebs" + }, + "owners": ["137112412989"], + "most_recent": true + }, + "instance_type": "{{user `instance_type`}}", + "ssh_username": "{{user `ssh_username`}}", + "ami_name": "amazon-linux-2023-with-docker", + "ami_description": "Amazon Linux 2023 with Docker installed", + "vpc_id": "{{user `vpc_id`}}", + "subnet_id": "{{user `subnet_id`}}" + } + ], + "provisioners": [ + { + "type": "shell", + "inline": [ + "sudo yum update -y", + "sudo yum install -y docker", + "sudo systemctl start docker", + "sudo systemctl enable docker", + "sudo usermod -aG docker ec2-user", + "{{if user `docker_registry_mirror`}}sudo mkdir -p /etc/docker{{end}}", + "{{if user `docker_registry_mirror`}}echo '{\"registry-mirrors\": [\"{{user `docker_registry_mirror`}}\"] }' | sudo tee /etc/docker/daemon.json{{end}}", + "{{if user `docker_registry_mirror`}}sudo systemctl restart docker{{end}}" + ] + } + ] +} diff --git a/packer_images/ubuntu-docker.json b/packer_images/ubuntu-docker.json new file mode 100644 index 000000000..70763db14 --- /dev/null +++ b/packer_images/ubuntu-docker.json @@ -0,0 +1,49 @@ +{ + "variables": { + "aws_region": "eu-west-3", + "instance_type": "t3.micro", + "ssh_username": "ubuntu", + "vpc_id": "", + "subnet_id": "", + "docker_registry_mirror": "" + }, + "builders": [ + { + "type": "amazon-ebs", + "region": "{{user `aws_region`}}", + "source_ami_filter": { + "filters" : { + "name" : "ubuntu/images/*ubuntu-jammy-22.04-amd64-server-*", + "root-device-type" : "ebs", + "virtualization-type" : "hvm" + }, + "most_recent" : true, + "owners" : ["099720109477"] + }, + "instance_type": "{{user `instance_type`}}", + "ssh_username": "{{user `ssh_username`}}", + "ami_name": "ubuntu-with-docker", + "ami_description": "Ubuntu with Docker installed", + "vpc_id": "{{user `vpc_id`}}", + "subnet_id": "{{user `subnet_id`}}" + } + ], + "provisioners": [ + { + "type": "shell", + "inline": [ + "sudo apt-get update", + "sudo apt-get install -y apt-transport-https ca-certificates curl software-properties-common", + "curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -", + "sudo add-apt-repository \"deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable\"", + "sudo apt-get update", + "sudo apt-get install -y docker-ce", + "sudo usermod -aG docker ubuntu", + "{{if user `docker_registry_mirror`}}sudo mkdir -p /etc/docker{{end}}", + "{{if user `docker_registry_mirror`}}echo '{\"registry-mirrors\": [\"{{user `docker_registry_mirror`}}\"] }' | sudo tee /etc/docker/daemon.json{{end}}", + "{{if user `docker_registry_mirror`}}sudo systemctl restart docker{{end}}" + ] + } + ] +} + diff --git a/policies/instance-docker-autoscaler-policy.json b/policies/instance-docker-autoscaler-policy.json new file mode 100644 index 000000000..4d3707d90 --- /dev/null +++ b/policies/instance-docker-autoscaler-policy.json @@ -0,0 +1,34 @@ +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "autoscaling:SetDesiredCapacity", + "autoscaling:TerminateInstanceInAutoScalingGroup" + ], + "Resource": "${autoscaler_asg_arn}" + }, + { + "Effect": "Allow", + "Action": [ + "autoscaling:DescribeAutoScalingGroups", + "ec2:DescribeInstances" + ], + "Resource": "*" + }, + { + "Effect": "Allow", + "Action": [ + "ec2:GetPasswordData", + "ec2-instance-connect:SendSSHPublicKey" + ], + "Resource": "arn:${partition}:ec2:${aws_region}:*:instance/*", + "Condition": { + "StringEquals": { + "ec2:ResourceTag/aws:autoscaling:groupName": "${autoscaler_asg_name}" + } + } + } + ] +} diff --git a/security_groups.tf b/security_groups.tf index 868123f5f..37fbed416 100644 --- a/security_groups.tf +++ b/security_groups.tf @@ -66,7 +66,7 @@ resource "aws_security_group_rule" "runner_ping_group" { resource "aws_security_group" "docker_machine" { # checkov:skip=CKV2_AWS_5:Security group is used within an template and assigned to the docker machines - count = var.runner_worker.type == "docker+machine" ? 1 : 0 + count = contains(["docker+machine", "docker-autoscaler"], var.runner_worker.type) ? 1 : 0 name_prefix = "${local.name_sg}-docker-machine" vpc_id = var.vpc_id diff --git a/template/gitlab-runner.tftpl b/template/gitlab-runner.tftpl index ee4ff5e1e..780cb617e 100644 --- a/template/gitlab-runner.tftpl +++ b/template/gitlab-runner.tftpl @@ -193,6 +193,40 @@ then unset USER fi +if [[ "${runners_executor}" == "docker-autoscaler" ]]; then + echo "installing docker" + yum install docker -y + usermod -a -G docker ec2-user + service docker start + + echo "Installing Docker fleeting plugin" + # Determine architecture + if [[ "$(uname -m)" == "x86_64" ]]; then + ARCH="amd64" + elif [[ "$(uname -m)" == "i686" ]]; then + ARCH="386" + elif [[ "$(uname -m)" == "armv7l" ]]; then + ARCH="arm" + elif [[ "$(uname -m)" == "aarch64" ]]; then + ARCH="arm64" + else + echo "Unsupported architecture" + exit 1 + fi + + wget "https://gitlab.com/gitlab-org/fleeting/plugins/aws/-/releases/v${fleeting_plugin_version}/downloads/fleeting-plugin-aws-$(uname -s | tr '[:upper:]' '[:lower:]')-$ARCH" + chmod +x fleeting-plugin-aws-* + mv fleeting-plugin-aws-* /bin/fleeting-plugin-aws + + mkdir ~/.aws + cat < ~/.aws/config +[default] +region = "${aws_region}" + +EOF + +fi + # A small script to remove this runner from being registered with Gitlab. Executed at shutdown. cat < /etc/systemd/system/remove-gitlab-registration.service [Unit] diff --git a/template/runner-config.tftpl b/template/runner-config.tftpl index 92c92c832..aabf0f3de 100644 --- a/template/runner-config.tftpl +++ b/template/runner-config.tftpl @@ -41,44 +41,9 @@ listen_address = "${prometheus_listen_address}" BucketLocation = "${aws_region}" Insecure = false - [runners.machine] - IdleCount = ${runners_idle_count} - IdleTime = ${runners_idle_time} - ${runners_max_builds} - MachineDriver = "amazonec2" - MachineName = "${docker_machine_name}" - MachineOptions = [ - %{~ for instance_type in runners_instance_types ~} - "amazonec2-instance-type=${instance_type}", - %{~ endfor ~} - "amazonec2-region=${aws_region}", - "amazonec2-zone=${runners_aws_zone}", - "amazonec2-vpc-id=${runners_vpc_id}", - %{~ for subnet_id in runners_subnet_ids ~} - "amazonec2-subnet-id=${subnet_id}", - %{~ endfor ~} - "amazonec2-private-address-only=${runners_use_private_address_only}", - "amazonec2-use-private-address=${runners_use_private_address}", - "amazonec2-request-spot-instance=${runners_request_spot_instance}", - %{~ if runners_spot_price_bid != "" ~}"amazonec2-spot-price=${runners_spot_price_bid}",%{~ endif ~} - "amazonec2-security-group=${runners_security_group_name}", - "amazonec2-tags=${join(",", compact([runners_tags, "__PARENT_TAG__"]))}", - "amazonec2-use-ebs-optimized-instance=${runners_ebs_optimized}", - "amazonec2-monitoring=${runners_monitoring}", - "amazonec2-iam-instance-profile=%{ if runners_iam_instance_profile_name != "" }${runners_iam_instance_profile_name}%{ else }${runners_instance_profile}%{ endif ~}", - "amazonec2-device-name=${runners_root_device_name}", - "amazonec2-root-size=${runners_root_size}", - "amazonec2-volume-type=${runners_volume_type}", - "amazonec2-userdata=%{~ if runners_userdata != "" ~}/etc/gitlab-runner/runners_userdata.sh%{~ endif ~}", - "amazonec2-ami=${runners_ami}" - %{~ if use_fleet == true ~} - ,"amazonec2-ssh-keypath=/root/.ssh/id_rsa", - "amazonec2-use-fleet=${use_fleet}", - "amazonec2-launch-template=${launch_template}" - %{~ endif ~} - ${docker_machine_options} - ] - MaxGrowthRate = ${runners_max_growth_rate} + ${runners_docker_autoscaler} + + ${runners_docker_machine} %{~ for config in runners_machine_autoscaling ~} [[runners.machine.autoscaling]] diff --git a/template/runner-docker-autoscaler-config.tftpl b/template/runner-docker-autoscaler-config.tftpl new file mode 100644 index 000000000..736fcc76b --- /dev/null +++ b/template/runner-docker-autoscaler-config.tftpl @@ -0,0 +1,22 @@ +# Autoscaler config + [runners.autoscaler] + plugin = "fleeting-plugin-aws" + + capacity_per_instance = ${runners_capacity_per_instance} + max_use_count = ${runners_max_use_count} + max_instances = ${runners_max_instances} + + [runners.autoscaler.plugin_config] # plugin specific configuration (see plugin documentation) + name = "${docker_autoscaling_name}" # AWS Autoscaling Group name + + [runners.autoscaler.connector_config] + username = "${connector_config_user}" + use_external_addr = false + +%{~ for config in runners_autoscaling ~} + [[runners.autoscaler.policy]] + %{~ for key, value in config ~} + ${key} = ${value} + %{~ endfor ~} +%{~ endfor ~} + diff --git a/template/runner-docker-machine-config.tftpl b/template/runner-docker-machine-config.tftpl new file mode 100644 index 000000000..a13487e91 --- /dev/null +++ b/template/runner-docker-machine-config.tftpl @@ -0,0 +1,37 @@ +[runners.machine] + IdleCount = ${runners_idle_count} + IdleTime = ${runners_idle_time} + ${runners_max_builds} + MachineDriver = "amazonec2" + MachineName = "${docker_machine_name}" + MachineOptions = [ + %{~ for instance_type in runners_instance_types ~} + "amazonec2-instance-type=${instance_type}", + %{~ endfor ~} + "amazonec2-region=${aws_region}", + "amazonec2-zone=${runners_aws_zone}", + "amazonec2-vpc-id=${runners_vpc_id}", + %{~ for subnet_id in runners_subnet_ids ~} + "amazonec2-subnet-id=${subnet_id}", + %{~ endfor ~} + "amazonec2-private-address-only=${runners_use_private_address_only}", + "amazonec2-use-private-address=${runners_use_private_address}", + "amazonec2-request-spot-instance=${runners_request_spot_instance}", + %{~ if runners_spot_price_bid != "" ~}"amazonec2-spot-price=${runners_spot_price_bid}",%{~ endif ~} + "amazonec2-security-group=${runners_security_group_name}", + "amazonec2-tags=${join(",", compact([runners_tags, "__PARENT_TAG__"]))}", + "amazonec2-use-ebs-optimized-instance=${runners_ebs_optimized}", + "amazonec2-monitoring=${runners_monitoring}", + "amazonec2-iam-instance-profile=%{ if runners_iam_instance_profile_name != "" }${runners_iam_instance_profile_name}%{ else }${runners_instance_profile}%{ endif ~}", + "amazonec2-root-size=${runners_root_size}", + "amazonec2-volume-type=${runners_volume_type}", + "amazonec2-userdata=%{~ if runners_userdata != "" ~}/etc/gitlab-runner/runners_userdata.sh%{~ endif ~}", + "amazonec2-ami=${runners_ami}" + %{~ if use_fleet == true ~} + ,"amazonec2-ssh-keypath=/root/.ssh/id_rsa", + "amazonec2-use-fleet=${use_fleet}", + "amazonec2-launch-template=${launch_template}" + %{~ endif ~} + ${docker_machine_options} + ] + MaxGrowthRate = ${runners_max_growth_rate} diff --git a/variables.tf b/variables.tf index 1b5fc8e84..082ad0627 100644 --- a/variables.tf +++ b/variables.tf @@ -392,7 +392,7 @@ variable "runner_worker" { output_limit = Sets the maximum build log size in kilobytes. Default is 4MB (output_limit). request_concurrency = Limit number of concurrent requests for new jobs from GitLab (default 1) (request_concurrency). ssm_access = Allows to connect to the Runner Worker via SSM. - type = The Runner Worker type to use. Currently supports `docker+machine` or `docker`. + type = The Runner Worker type to use. Currently supports `docker+machine` or `docker` or `docker-autoscaler`. EOT type = object({ environment_variables = optional(list(string), []) @@ -405,7 +405,7 @@ variable "runner_worker" { default = {} validation { - condition = contains(["docker+machine", "docker"], var.runner_worker.type) + condition = contains(["docker+machine", "docker", "docker-autoscaler"], var.runner_worker.type) error_message = "The executor currently supports `docker+machine` and `docker`." } } @@ -595,6 +595,98 @@ variable "runner_worker_docker_machine_fleet" { } } +variable "runner_worker_docker_autoscaler" { + description = <<-EOT + fleeting_plugin_version = The version of aws fleeting plugin + connector_config_user = User to connect to worker machine + key_pair_name = The name of the key pair used by the Runner to connect to the docker-machine Runner Workers. This variable is only supported when `enables` is set to `true`. + max_use_count = Max job number that can run on a worker + EOT + type = object({ + fleeting_plugin_version = optional(string, "1.0.0") + connector_config_user = optional(string, "ec2-user") + key_pair_name = optional(string, "runner-worker-key") + max_use_count = optional(number, 100) + }) + default = {} +} + +variable "runner_worker_docker_autoscaler_instance" { + description = <<-EOT + ebs_optimized = Enable EBS optimization for the Runner Worker. + http_tokens = Whether or not the metadata service requires session tokens + http_put_response_hop_limit = The desired HTTP PUT response hop limit for instance metadata requests. The larger the number, the further instance metadata requests can travel. + monitoring = Enable detailed monitoring for the Runner Worker. + private_address_only = Restrict Runner Worker to the use of a private IP address. If `runner_instance.use_private_address_only` is set to `true` (default), + root_device_name = The name of the root volume for the Runner Worker. + root_size = The size of the root volume for the Runner Worker. + start_script = Cloud-init user data that will be passed to the Runner Worker. Should not be base64 encrypted. + volume_type = The type of volume to use for the Runner Worker. `gp2`, `gp3`, `io1` or `io2` are supported + volume_iops = Guaranteed IOPS for the volume. Only supported when using `gp3`, `io1` or `io2` as `volume_type`. + volume_throughput = Throughput in MB/s for the volume. Only supported when using `gp3` as `volume_type`. +EOT + + type = object({ + ebs_optimized = optional(bool, true) + http_tokens = optional(string, "required") + http_put_response_hop_limit = optional(number, 2) + monitoring = optional(bool, false) + private_address_only = optional(bool, true) + root_device_name = optional(string, "/dev/sda1") + root_size = optional(number, 8) + start_script = optional(string, "") + volume_type = optional(string, "gp2") + volume_throughput = optional(number, 125) + volume_iops = optional(number, 3000) + }) + default = {} +} + +variable "runner_worker_docker_autoscaler_asg" { + description = <<-EOT + enable_mixed_instances_policy = Make use of autoscaling-group mixed_instances_policy capacities to leverage pools and spot instances. + health_check_grace_period = Time (in seconds) after instance comes into service before checking health + health_check_type = Controls how health checking is done. Values are - EC2 and ELB + instance_refresh_min_healthy_percentage = The amount of capacity in the Auto Scaling group that must remain healthy during an instance refresh to allow the operation to continue, as a percentage of the desired capacity of the Auto Scaling group. + instance_refresh_triggers = Set of additional property names that will trigger an Instance Refresh. A refresh will always be triggered by a change in any of launch_configuration, launch_template, or mixed_instances_policy. + max_growth_rate = The maximum number of machines that can be added to the runner in parallel. + on_demand_base_capacity = Absolute minimum amount of desired capacity that must be fulfilled by on-demand instances. + on_demand_percentage_above_base_capacity = Percentage split between on-demand and Spot instances above the base on-demand capacity. + override_instance_types = List to override the instance type in the Launch Template. Allow to spread spot instances on several types, to reduce interruptions + profile_name = profile_name = Name of the IAM profile to attach to the Runner Workers. + sg_ingresses = Extra security group rule for workers + spot_allocation_strategy = How to allocate capacity across the Spot pools. 'lowest-price' to optimize cost, 'capacity-optimized' to reduce interruptions + spot_instance_pools = Number of Spot pools per availability zone to allocate capacity. EC2 Auto Scaling selects the cheapest Spot pools and evenly allocates Spot capacity across the number of Spot pools that you specify. + subnet_ids = The list of subnet IDs to use for the Runner Worker when the fleet mode is enabled. + types = The type of instance to use for the Runner Worker. In case of fleet mode, multiple instance types are supported. + upgrade_strategy = Auto deploy new instances when launch template changes. Can be either 'bluegreen', 'rolling' or 'off' + EOT + type = object({ + enable_mixed_instances_policy = optional(bool, false) + health_check_grace_period = optional(number, 300) + health_check_type = optional(string, "EC2") + instance_refresh_min_healthy_percentage = optional(number, 90) + instance_refresh_triggers = optional(list(string), []) + max_growth_rate = optional(number, 0) + on_demand_base_capacity = optional(number, 0) + on_demand_percentage_above_base_capacity = optional(number, 100) + profile_name = optional(string, "") + spot_allocation_strategy = optional(string, "lowest-price") + spot_instance_pools = optional(number, 2) + subnet_ids = optional(list(string), []) + types = optional(list(string), ["m5.large"]) + upgrade_strategy = optional(string, "rolling") + sg_ingresses = optional(list(object({ + description = string + from_port = number + to_port = number + protocol = string + cidr_blocks = list(string) + })), []) + }) + default = {} +} + variable "runner_worker_docker_machine_role" { description = <<-EOT additional_tags = Map of tags that will be added to the Runner Worker. @@ -611,6 +703,22 @@ variable "runner_worker_docker_machine_role" { default = {} } +variable "runner_worker_docker_autoscaler_role" { + description = <<-EOT + additional_tags = Map of tags that will be added to the Runner Worker. + assume_role_policy_json = Assume role policy for the Runner Worker. + policy_arns = List of ARNs of IAM policies to attach to the Runner Workers. + profile_name = Name of the IAM profile to attach to the Runner Workers. + EOT + type = object({ + additional_tags = optional(map(string), {}) + assume_role_policy_json = optional(string, "") + policy_arns = optional(list(string), []) + profile_name = optional(string, "") + }) + default = {} +} + variable "runner_worker_docker_machine_extra_egress_rules" { description = "List of egress rules for the Runner Workers." type = list(object({ @@ -654,6 +762,15 @@ variable "runner_worker_docker_machine_ami_filter" { } } +variable "runner_worker_docker_autoscaler_ami_filter" { + description = "List of maps used to create the AMI filter for the Runner Worker." + type = map(list(string)) + + default = { + name = ["ubuntu/images/hvm-ssd/ubuntu-focal-20.04-amd64-server-*"] + } +} + variable "runner_worker_docker_machine_ami_owners" { description = "The list of owners used to select the AMI of the Runner Worker." type = list(string) @@ -662,6 +779,14 @@ variable "runner_worker_docker_machine_ami_owners" { default = ["099720109477"] } +variable "runner_worker_docker_autoscaler_ami_owners" { + description = "The list of owners used to select the AMI of the Runner Worker." + type = list(string) + + # Canonical + default = ["099720109477"] +} + variable "runner_worker_docker_machine_instance" { description = <<-EOT For detailed documentation check https://docs.gitlab.com/runner/configuration/advanced-configuration.html#the-runnersmachine-section @@ -765,6 +890,19 @@ variable "runner_worker_docker_machine_autoscaling_options" { default = [] } +variable "runner_worker_docker_autoscaler_autoscaling_options" { + description = "Set autoscaling parameters based on periods, see https://docs.gitlab.com/runner/configuration/advanced-configuration.html#the-runnersautoscalerpolicy-sections" + type = list(object({ + periods = list(string) + timezone = optional(string, "UTC") + idle_count = optional(number) + idle_time = optional(string) + scale_factor = optional(number) + scale_factor_limit = optional(number, 0) + })) + default = [] +} + variable "debug" { description = <<-EOT trace_runner_user_data: Enable bash trace for the user data script on the Agent. Be aware this could log sensitive data such as you GitLab runner token.