Skip to content

Commit

Permalink
feat(packages): Clean older registry packages
Browse files Browse the repository at this point in the history
To avoid exceed the limit of package storage for the organization on
the defined registry, all packages not tagged as latest or version will
be removed to the minimum limit defined.

Default is allow last 3 intermediary packages (snapshots) remains on
registry.

Signed-off-by: Helio Chissini de Castro <[email protected]>
  • Loading branch information
heliocastro committed Feb 8, 2024
1 parent 35bcb54 commit 586763b
Show file tree
Hide file tree
Showing 5 changed files with 382 additions and 0 deletions.
6 changes: 6 additions & 0 deletions .github/actions/.vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"ruff.format.args": [
"--verbose"
],
"python.analysis.typeCheckingMode": "basic"
}
65 changes: 65 additions & 0 deletions .github/actions/clean_up_package_registry/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Copyright (C) 2023 The ORT Project Authors (see <https://github.com/oss-review-toolkit/ort/blob/main/NOTICE>)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# SPDX-License-Identifier: Apache-2.0
# License-Filename: LICENSE

name: "Delete old non-release packages from Github package registry"
description: "Delete older packages set by a minimal level input"
author: "The ORT Project Authors"

inputs:
registry:
description: "Github container registry"
default: "ghcr.io"
token:
description: "Github token"
required: true
keep:
description: "Number of non-release packages to keep"
required: false
default: "3"
packages:
description: "Name of the packages to be cleaned up"
required: true
dry-run:
description: "Execute a dry run operation to check the execution is correct"
default: "true"
ignore-skip-tagged:
description: "DANGEROUS: Clean up even the packages marked to skip"
default: "false"

runs:
using: "composite"

steps:
- name: Install Python
uses: actions/setup-python@v4
with:
python-version: "3.10"
cache: "pip"

- name: Execute the operation
id: check_image
shell: bash
env:
INPUT_REGISTRY: ${{ inputs.registry }}
INPUT_TOKEN: ${{ inputs.token }}
INPUT_KEEP: ${{ inputs.keep }}
INPUT_PACKAGES: ${{ inputs.packages }}
INPUT_DRY_RUN: ${{ inputs.dry-run}}
INPUT_IGNORE_SKIP_TAGGED: ${{ inputs.ignore-skip-tagged }}
run: |
pip install -q -U pip requests rich
python ./.github/actions/clean_up_package_registry/clean_up_package_registry.py
264 changes: 264 additions & 0 deletions .github/actions/clean_up_package_registry/clean_up_package_registry.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,264 @@
# Copyright (C) 2023 The ORT Project Authors (see <https://github.com/oss-review-toolkit/ort/blob/main/NOTICE>)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# SPDX-License-Identifier: Apache-2.0
# License-Filename: LICENSE


import os
import re
import sys
from time import sleep
from typing import Any
from urllib.parse import parse_qs, urlparse

import requests
from requests.structures import CaseInsensitiveDict
from rich import print

""" Use current Github API to list packages
in registry and remove all but last 3 or custom
set number of packages.
Reference: https://docs.github.com/en/rest/packages/packages?apiVersion=2022-11-28#about-github-packages
"""

dry_run: bool = False if os.getenv("INPUT_DRY_RUN") == "false" else True
input_keep: str | None = os.getenv("INPUT_KEEP")
org = os.getenv("GITHUB_REPOSITORY_OWNER")
input_packages: str | None = os.getenv("INPUT_PACKAGES")
token = os.getenv("INPUT_TOKEN")
ignore_skip: bool = True if os.getenv("INPUT_IGNORE_SKIP_TAGGED") == "true" else False

if not input_packages:
print(":cross_mark: No packages input.")
sys.exit(1)

packages = input_packages.split(",")
keep: int = int(input_keep) if input_keep else 0

headers = {
"Accept": "application/vnd.github+json",
"Authorization": f"Bearer {token}",
"X-GitHub-Api-Version": "2022-11-28",
}

# Assembly organization packages url string
pkg_url: str = f"https://api.github.com/orgs/{org}/packages"

# List of packages that will be deleted
urls_to_be_deleted: list = []

# Exclusion image list
exclusion_list: list = []


def get_last_page(headers: CaseInsensitiveDict[str]) -> int:
"""
Get the last page number from the headers.
Args:
headers (CaseInsensitiveDict[str]): The headers containing the link information.
Returns:
int: The last page number.
"""
if "link" not in headers:
return 1

links = headers["link"].split(", ")

last_page = None
for link in links:
if 'rel="last"' in link:
last_page = link
break

if last_page:
parsed_url = urlparse(
last_page[last_page.index("<") + 1 : last_page.index(">")]
)
return int(parse_qs(parsed_url.query)["page"][0])

return 1


def get_package_layers(package: str, tag: str) -> None:
url = f"https://ghcr.io/v2/{org}/{package}/manifests/{tag}"

# Get ghcr temprary token
ghcr_headers = {"Authorization": f"Bearer {token}"}
auth_response = requests.get(
f"https://ghcr.io/token?service=ghcr.io&scope=repository:{org}/ort:pull",
headers=ghcr_headers,
)
if auth_response.status_code == 200:
access_token = auth_response.json()["token"]

ghcr_headers = {
"Authorization": f"Bearer {access_token}",
"Accept": "application/vnd.oci.image.index.v1+json",
}
url = f"https://ghcr.io/v2/{org}/{package}/manifests/{tag}"
if "DEBUG" in os.environ:
print(url)
response = requests.get(url, headers=ghcr_headers)

main_manifest: dict[str, Any] = {}
if response.status_code == 200:
main_manifest: dict[str, Any] = response.json()
else:
print(f"Failed to get manifest: {response.status_code}, {response.text}")

for manifest in main_manifest["manifests"]:
if "platform" in manifest and manifest["platform"]["architecture"] in [
"amd64",
"arm64",
]:
ghcr_headers["Accept"] = "application/vnd.oci.image.manifest.v1+json"
url = (
f"https://ghcr.io/v2/{org}/{package}/manifests/{manifest['digest']}"
)
response = requests.get(url, headers=ghcr_headers)
if "DEBUG" in os.environ:
from rich.pretty import pprint

pprint(response.json())
layer_manifest = response.json()
if 'layers' in layer_manifest:
for layer in layer_manifest['layers']:
exclusion_list.append(layer['digest'])
print(f":locked: Added digest to exclusion list {layer['digest']}")



def delete_packages():
"""
Deletes packages from the package registry.
This function iterates over the packages and deletes them from the package registry.
It retrieves the versions of each package, sorts them by ID, and deletes the excess versions
based on the specified 'keep' value. It also skips deleting the latest or non-snapshot tagged images.
The function prints the status of each deletion operation and the total number of packages deleted.
Args:
None
Returns:
None
"""
# Number of packages deleted
packages_deleted: int = 0

for package in packages:
# Start page is 1 as stated by documentation
url = f"{pkg_url}/container/{package.replace('/', '%2F')}/versions?page=1&per_page=50"

# Get the header
response = requests.head(url, headers=headers)
pages: int = get_last_page(response.headers)

for page in range(pages, 0, -1):
print(f"Page: {page}")
url = f"{pkg_url}/container/{package.replace('/', '%2F')}/versions?page={page}&per_page=50"

try:
response = requests.get(url, headers=headers)
except requests.exceptions.RequestException as e:
print(f":cross_mark: Connection Error. {e}")
sys.exit(1)

if response.status_code == 404:
print(f":cross_mark: Not found - {url}")
continue
elif response.status_code == 401:
print(f":cross_mark: Requires authentication - {url}")
sys.exit(1)
elif response.status_code == 403:
print(f":cross_mark: Forbidden - {url}")
sys.exit(1)

# Sort all images on id.
images = sorted(response.json(), key=lambda x: x["id"], reverse=True)

# Slice and remove all
if len(images) > keep:
for image in images if page != 1 else images[keep + 1 :]:
url = f"{pkg_url}/container/{package.replace('/', '%2F')}/versions/{image['id']}"

# Never remove latest or non snapshot tagged images
if restrict_delete_tags(image["metadata"]["container"]["tags"]):
print(
f":package: Skip tagged {package} id {image['id']} tags {image['metadata']['container']['tags']}"
)
# Mark sublayers to not be deleted
get_package_layers(
package, image["metadata"]["container"]["tags"][0]
)
continue
urls_to_be_deleted.append(url)
tags = image["metadata"]["container"]["tags"]

if tags:
print(
f":white_heavy_check_mark: Deleted tagged package {package} version id {image['id']}"
f" with tags {tags}."
)
else:
print(
f":white_heavy_check_mark: Deleted untagged package {package} version id {image['id']}"
)
# Make a slow operation to avoid rate limit
sleep(1)

# Effectively delete the packages
if not dry_run:
for url in urls_to_be_deleted:
response = requests.delete(url, headers=headers)
if response.status_code == 404:
print(f":cross_mark: Failed to delete package {url}.")
continue
elif response.status_code == 401:
print(f":cross_mark: Requires authentication - {url}")
sys.exit(1)
elif response.status_code == 403:
print(f":cross_mark: Forbidden - {url}")
sys.exit(1)

packages_deleted = packages_deleted + 1
# Make a slow operation to avoid rate limit
sleep(1)

print(f":package: Deleted {packages_deleted} packages in the organization.")


def restrict_delete_tags(tags: list) -> bool:
if not tags:
return False
for tag in tags:
if tag == "latest":
return True
elif ".sha." in tag:
return False
elif "SNAPSHOT" in tag:
return False
else:
pattern = re.compile(r"^\d+\.\d+\.\d+$")
if pattern.match(tag):
return True
return False


if __name__ == "__main__":
delete_packages()
9 changes: 9 additions & 0 deletions .github/actions/clean_up_package_registry/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
certifi==2023.7.22
charset-normalizer==3.3.2
idna==3.4
markdown-it-py==3.0.0
mdurl==0.1.2
Pygments==2.16.1
requests==2.31.0
rich==13.6.0
urllib3==2.1.0
38 changes: 38 additions & 0 deletions .github/workflows/clean_up_package_registry.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Copyright (C) 2023 The ORT Project Authors (see <https://github.com/oss-review-toolkit/ort/blob/main/NOTICE>)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# SPDX-License-Identifier: Apache-2.0
# License-Filename: LICENSE

name: Clean up packages in Github package registry

on:
workflow_dispatch:
pull_request:
# Runs always Sunday Midnight
# schedule:
# - cron: "0 0 * * 0"

jobs:
clean_all:
name: Clean up package registry
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/clean_up_package_registry
with:
token: ${{ secrets.GITHUB_TOKEN }}
dry-run: "false"
ignore-skip-tagged: "true"
packages: "ort-extended"

0 comments on commit 586763b

Please sign in to comment.