diff --git a/conda-store-server/conda_store_server/__init__.py b/conda-store-server/conda_store_server/__init__.py index d3b075936..a964b50fa 100644 --- a/conda-store-server/conda_store_server/__init__.py +++ b/conda-store-server/conda_store_server/__init__.py @@ -1,6 +1,99 @@ +import datetime +import typing from pathlib import Path __version__ = "2023.10.1" CONDA_STORE_DIR = Path.home() / ".conda-store" + + +class BuildKey: + """ + Used to configure the build key format, which identifies a particular + environment build + + Avoids a cyclic dependency between the `orm` module and the module defining + `CondaStore.build_key_version`. Because the `orm` module is loaded early on + startup, we want to delay initialization of the `Build.build_key_version` + field until `CondaStore.build_key_version` has been read from the config. + + Because the build key version needs to be the same for the entire + application, this class implements the singleton pattern. Users are expected + to use class methods instead of creating class instances. All implementation + details are hidden within the class, preventing potential issues caused by + cyclic imports. + """ + + # Default version, must be None here. Initialized in CondaStore.build_key_version + _current_version = None + + _version2_hash_size = 8 + + def _version1_fmt(build: "Build") -> str: # noqa: F821 + datetime_format = "%Y%m%d-%H%M%S-%f" + hash = build.specification.sha256 + timestamp = build.scheduled_on.strftime(datetime_format) + id = build.id + name = build.specification.name + return f"{hash}-{timestamp}-{id}-{name}" + + def _version2_fmt(build: "Build") -> str: # noqa: F821 + tzinfo = datetime.timezone.utc + hash = build.specification.sha256[: BuildKey._version2_hash_size] + timestamp = int(build.scheduled_on.replace(tzinfo=tzinfo).timestamp()) + id = build.id + name = build.specification.name + return f"{hash}-{timestamp}-{id}-{name}" + + # version -> fmt function + _fmt = { + 1: _version1_fmt, + 2: _version2_fmt, + } + + @classmethod + def _check_version(cls, build_key_version: int): + if build_key_version not in cls.versions(): + raise ValueError( + f"invalid build key version: {build_key_version}, " + f"expected: {cls.versions()}" + ) + + @classmethod + def set_current_version(cls, build_key_version: int) -> int: + """Sets provided build key version as current and returns it""" + cls._check_version(build_key_version) + cls._current_version = build_key_version + return build_key_version + + @classmethod + def current_version(cls) -> int: + """Returns currently selected build key version""" + # None means the value is not set, likely due to an import error + assert cls._current_version is not None + return cls._current_version + + @classmethod + def versions(cls) -> typing.Tuple[int]: + """Returns available build key versions""" + return tuple(cls._fmt.keys()) + + @classmethod + def get_build_key(cls, build: "Build") -> str: # noqa: F821 + """Returns build key for this build""" + cls._check_version(build.build_key_version) + return cls._fmt.get(build.build_key_version)(build) + + @classmethod + def parse_build_key(cls, build_key: str) -> int: + """Returns build id from build key""" + parts = build_key.split("-") + # Note: cannot rely on the number of dashes to differentiate between + # versions because name can contain dashes. Instead, this relies on the + # hash size to infer the format. The name is the last field, so indexing + # to find the id is okay. + if build_key[cls._version2_hash_size] == "-": # v2 + return int(parts[2]) # build_id + else: # v1 + return int(parts[4]) # build_id diff --git a/conda-store-server/conda_store_server/alembic/versions/30b37e725c32_add_build_key_version.py b/conda-store-server/conda_store_server/alembic/versions/30b37e725c32_add_build_key_version.py new file mode 100644 index 000000000..0313a95f0 --- /dev/null +++ b/conda-store-server/conda_store_server/alembic/versions/30b37e725c32_add_build_key_version.py @@ -0,0 +1,28 @@ +"""add build_key_version + +Revision ID: 30b37e725c32 +Revises: d78e9889566a +Create Date: 2023-11-17 14:34:40.688865 + +""" +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision = "30b37e725c32" +down_revision = "d78e9889566a" +branch_labels = None +depends_on = None + + +def upgrade(): + op.add_column( + "build", + sa.Column( + "build_key_version", sa.Integer(), nullable=False, server_default="1" + ), + ) + + +def downgrade(): + op.drop_column("build", "build_key_version") diff --git a/conda-store-server/conda_store_server/app.py b/conda-store-server/conda_store_server/app.py index a3aa62564..72edef33d 100644 --- a/conda-store-server/conda_store_server/app.py +++ b/conda-store-server/conda_store_server/app.py @@ -8,6 +8,7 @@ from celery import Celery, group from conda_store_server import ( CONDA_STORE_DIR, + BuildKey, api, conda_utils, environment, @@ -106,6 +107,19 @@ class CondaStore(LoggingConfigurable): config=True, ) + build_key_version = Integer( + BuildKey.set_current_version(2), + help="Build key version to use: 1 (long, legacy), 2 (short, default)", + config=True, + ) + + @validate("build_key_version") + def _check_build_key_version(self, proposal): + try: + return BuildKey.set_current_version(proposal.value) + except Exception as e: + raise TraitError(f"c.CondaStore.build_key_version: {e}") + conda_command = Unicode( "mamba", help="conda executable to use for solves", @@ -364,6 +378,7 @@ def session_factory(self): url=self.database_url, poolclass=QueuePool, ) + return self._session_factory # Do not define this as a FastAPI dependency! That would cause Sessions diff --git a/conda-store-server/conda_store_server/orm.py b/conda-store-server/conda_store_server/orm.py index 557451c5c..8ab5d1a20 100644 --- a/conda-store-server/conda_store_server/orm.py +++ b/conda-store-server/conda_store_server/orm.py @@ -183,6 +183,22 @@ class Build(Base): ended_on = Column(DateTime, default=None) deleted_on = Column(DateTime, default=None) + @staticmethod + def _get_build_key_version(): + # Uses local import to make sure BuildKey is initialized + from conda_store_server import BuildKey + + return BuildKey.current_version() + + build_key_version = Column(Integer, default=_get_build_key_version, nullable=False) + + @validates("build_key_version") + def validate_build_key_version(self, key, build_key_version): + # Uses local import to make sure BuildKey is initialized + from conda_store_server import BuildKey + + return BuildKey.set_current_version(build_key_version) + build_artifacts = relationship( "BuildArtifact", back_populates="build", cascade="all, delete-orphan" ) @@ -234,15 +250,17 @@ def build_key(self): The build key should be a key that allows for the environment build to be easily identified and found in the database. """ - datetime_format = "%Y%m%d-%H%M%S-%f" - return f"{self.specification.sha256}-{self.scheduled_on.strftime(datetime_format)}-{self.id}-{self.specification.name}" + # Uses local import to make sure BuildKey is initialized + from conda_store_server import BuildKey + + return BuildKey.get_build_key(self) @staticmethod def parse_build_key(key): - parts = key.split("-") - if len(parts) < 5: - return None - return int(parts[4]) # build_id + # Uses local import to make sure BuildKey is initialized + from conda_store_server import BuildKey + + return BuildKey.parse_build_key(key) @property def log_key(self): diff --git a/conda-store-server/tests/test_actions.py b/conda-store-server/tests/test_actions.py index 288808fcf..e3a455489 100644 --- a/conda-store-server/tests/test_actions.py +++ b/conda-store-server/tests/test_actions.py @@ -1,13 +1,24 @@ import asyncio +import datetime import pathlib import re import sys import pytest -from conda_store_server import action, api, conda_utils, orm, schema, server, utils +from conda_store_server import ( + BuildKey, + action, + api, + conda_utils, + orm, + schema, + server, + utils, +) from conda_store_server.server.auth import DummyAuthentication from fastapi import Request from fastapi.responses import RedirectResponse +from traitlets import TraitError def test_action_decorator(): @@ -226,17 +237,32 @@ def test_add_lockfile_packages( @pytest.mark.parametrize( - "is_legacy_build", + "is_legacy_build, build_key_version", [ - False, - True, + (False, 0), # invalid + (False, 1), # long (legacy) + (False, 2), # short (default) + (True, 1), # build_key_version doesn't matter because there's no lockfile ], ) def test_api_get_build_lockfile( - request, conda_store, db, simple_specification_with_pip, conda_prefix, is_legacy_build + request, conda_store, db, simple_specification_with_pip, conda_prefix, is_legacy_build, build_key_version ): + # sets build_key_version + if build_key_version == 0: # invalid + with pytest.raises(TraitError, match=( + r"c.CondaStore.build_key_version: invalid build key version: 0, " + r"expected: \(1, 2\)" + )): + conda_store.build_key_version = build_key_version + return # invalid, nothing more to test + conda_store.build_key_version = build_key_version + assert BuildKey.current_version() == build_key_version + assert BuildKey.versions() == (1, 2) + # initializes data needed to get the lockfile specification = simple_specification_with_pip + specification.name = "this-is-a-long-environment-name" namespace = "pytest" class MyAuthentication(DummyAuthentication): @@ -252,6 +278,11 @@ def authorize_request(self, *args, **kwargs): ) db.commit() build = api.get_build(db, build_id=build_id) + # makes this more visible in the lockfile + build_id = 12345678 + build.id = build_id + # makes sure the timestamp in build_key is always the same + build.scheduled_on = datetime.datetime(2023, 11, 5, 3, 54, 10, 510258) environment = api.get_environment(db, namespace=namespace) # adds packages (returned in the lockfile) @@ -295,12 +326,23 @@ def authorize_request(self, *args, **kwargs): assert re.match("http.*//.*tar.bz2#.*", lines[2]) is not None else: # new build: redirects to lockfile generated by conda-lock - lockfile_url_pattern = ( - "lockfile/" - "89e5a99aa094689b7aafc66c47987fa186e08f9d619a02ab1a469d0759da3b8b-" - ".*-test.yml" - ) + def lockfile_url(build_key): + return f"lockfile/{build_key}.yml" + if build_key_version == 1: + build_key = ( + "c7afdeffbe2bda7d16ca69beecc8bebeb29280a95d4f3ed92849e4047710923b-" + "20231105-035410-510258-12345678-this-is-a-long-environment-name" + ) + elif build_key_version == 2: + build_key = "c7afdeff-1699156450-12345678-this-is-a-long-environment-name" + else: + raise ValueError(f"unexpected build_key_version: {build_key_version}") assert type(res) is RedirectResponse assert key == res.headers['location'] - assert re.match(lockfile_url_pattern, res.headers['location']) is not None + assert build.build_key == build_key + assert BuildKey.get_build_key(build) == build_key + assert build.parse_build_key(build_key) == 12345678 + assert BuildKey.parse_build_key(build_key) == 12345678 + assert lockfile_url(build_key) == build.conda_lock_key + assert lockfile_url(build_key) == res.headers['location'] assert res.status_code == 307 diff --git a/docs/administration.md b/docs/administration.md index 427adc26b..cb520a72d 100644 --- a/docs/administration.md +++ b/docs/administration.md @@ -96,6 +96,9 @@ for symlinking Conda environment builds. Available keys: store_directory, namespace, name. The default will put all environments in the same namespace within the same directory. +`CondaStore.build_key_version` is the [build key version](#build-key-versions) +to use: 1 (long, legacy), 2 (short, default). + `CondaStore.validate_specification` callable function taking `conda_store` and `specification` as input arguments to apply for validating and modifying a given specification. If there are @@ -571,6 +574,61 @@ affect both the `build_path` length and the filename length. [relocatable]: https://docs.conda.io/projects/conda-build/en/latest/resources/make-relocatable.html +### Build key versions + +The part of the build path that identifies a particular environment build is the +build key. Originally, conda-store used the following format, known as version +1: + +``` +c7afdeffbe2bda7d16ca69beecc8bebeb29280a95d4f3ed92849e4047710923b-20231105-035410-510258-12345678-this-is-a-long-environment-name +^ (1) ^ (2) ^ (3) ^ (4) +``` + +It consists of: +1. a SHA-256 hash of the environment specification + (`CondaSpecification`, which represents a user-provided environment, is + converted to a dict and passed to `datastructure_hash`, which recursively sorts + it and calculates the SHA-256 hash) +2. a human-readable timestamp (year, month, day, `-`, hour, minute, second, `-`, microsecond) +3. the id of a build +4. the environment name. + +To help mitigate build path length issues, a shorter build key format was +introduced, known as version 2: + +``` +c7afdeff-1699156450-12345678-this-is-a-long-environment-name +^ (1) ^ (2) ^ (3) ^ (4) +``` + +It consists of: +1. a truncated SHA-256 hash of the environment specification + (`CondaSpecification`, which represents a user-provided environment, is + converted to a dict and passed to `datastructure_hash`, which recursively sorts + it and calculates the SHA-256 hash) +2. a Unix timestamp +3. the id of a build +4. the environment name. + +The version 2 format is now the default. Environments created using the version +1 format will continue to be accessible in the UI, but new builds will use the +version 2 format. No changes are needed for existing deployments of conda-store. + +There is no real reason to use the version 1 format anymore, but it can be +explicitly set via the config: + +``` +c.CondaStore.build_key_version = 1 +``` + +The version 2 format can also be explicitly set if needed (this is the same as +the default): + +``` +c.CondaStore.build_key_version = 2 +``` + ### Long paths on Windows conda-store supports Windows in standalone mode. However, when creating