Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use shorter build_key #652

Merged
merged 12 commits into from
Nov 28, 2023
81 changes: 79 additions & 2 deletions conda-store-server/conda_store_server/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import datetime
from pathlib import Path

__version__ = "2023.10.1"
Expand All @@ -6,5 +7,81 @@
CONDA_STORE_DIR = Path.home() / ".conda-store"


# Default build_key_version. Must be None here, initialized from the config file
_BUILD_KEY_VERSION = None
class BuildKey:
jaimergp marked this conversation as resolved.
Show resolved Hide resolved
# Avoids a cyclic dependency between the orm module and the module defining
# CondaStore.build_key_version. Because the orm module is loaded early on
# startup, we want to delay initialization of the Build.build_key_version
# field until CondaStore.build_key_version has been read from the config.

# Default version, must be None here. Initialized in CondaStore.build_key_version
_current_version = None

_version2_hash_size = 8

def _version1_fmt(build: "Build"): # noqa: F821
datetime_format = "%Y%m%d-%H%M%S-%f"
hash = build.specification.sha256
timestamp = build.scheduled_on.strftime(datetime_format)
id = build.id
name = build.specification.name
return f"{hash}-{timestamp}-{id}-{name}"

def _version2_fmt(build: "Build"): # noqa: F821
tzinfo = datetime.timezone.utc
hash = build.specification.sha256[: BuildKey._version2_hash_size]
timestamp = int(build.scheduled_on.replace(tzinfo=tzinfo).timestamp())
id = build.id
name = build.specification.name
return f"{hash}-{timestamp}-{id}-{name}"
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note: without tzinfo, this would vary between machines because timestamp uses the system clock by default, which might be not in UTC.


# version -> fmt function
_fmt = {
1: _version1_fmt,
2: _version2_fmt,
}

@classmethod
def _check_version(cls, build_key_version):
nkaretnikov marked this conversation as resolved.
Show resolved Hide resolved
if build_key_version not in cls.versions():
raise ValueError(
f"invalid build key version: {build_key_version}, "
f"expected: {cls.versions()}"
)

@classmethod
def set_current_version(cls, build_key_version: int):
"""Sets provided build key version as current and returns it"""
cls._check_version(build_key_version)
cls._current_version = build_key_version
return build_key_version

@classmethod
def current_version(cls):
"""Returns currently selected build key version"""
# None means the value is not set, likely due to an import error
assert cls._current_version is not None
return cls._current_version

@classmethod
def versions(cls):
"""Returns available build key versions"""
return tuple(cls._fmt.keys())

@classmethod
def get_build_key(cls, build: "Build"): # noqa: F821
"""Returns build key for this build"""
cls._check_version(build.build_key_version)
return cls._fmt.get(build.build_key_version)(build)

@classmethod
def parse_build_key(cls, build_key: str):
"""Returns build id from build key"""
parts = build_key.split("-")
# Note: cannot rely on the number of dashes to differentiate between
# versions because name can contain dashes. Instead, this relies on the
# hash size to infer the format. The name is the last field, so indexing
# to find the id is okay.
if build_key[cls._version2_hash_size] == "-": # v2
return int(parts[2]) # build_id
else: # v1
return int(parts[4]) # build_id
18 changes: 6 additions & 12 deletions conda-store-server/conda_store_server/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from celery import Celery, group
from conda_store_server import (
CONDA_STORE_DIR,
BuildKey,
api,
conda_utils,
environment,
Expand Down Expand Up @@ -107,20 +108,17 @@ class CondaStore(LoggingConfigurable):
)

build_key_version = Integer(
2,
BuildKey.set_current_version(2),
help="Build key version to use: 1 (long, legacy), 2 (short, default)",
config=True,
)

@validate("build_key_version")
def _check_build_key_version(self, proposal):
expected = [1, 2]
if proposal.value not in expected:
raise TraitError(
f"c.CondaStore.build_key_version: invalid build key version: "
f"{proposal.value}, expected: {expected}"
)
return proposal.value
try:
return BuildKey.set_current_version(proposal.value)
except Exception as e:
raise TraitError(f"c.CondaStore.build_key_version: {e}")
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The current version is now set right after build_key_version is processed, which is better than before, when it was done only when creating a DB session.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it better to use raise X from e here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In general, yes. But it doesn't work with traitlets. E.g.,:

            raise TraitError("c.CondaStore.build_key_version") from e

would print

[CondaStoreServer] CRITICAL | Bad config encountered during initialization: c.CondaStore.build_key_version

No traceback is included. No additional information is printed.


conda_command = Unicode(
"mamba",
Expand Down Expand Up @@ -381,10 +379,6 @@ def session_factory(self):
poolclass=QueuePool,
)

# Sets the default build_key_version value in the DB based on the config
import conda_store_server

conda_store_server._BUILD_KEY_VERSION = self.build_key_version
return self._session_factory

# Do not define this as a FastAPI dependency! That would cause Sessions
Expand Down
56 changes: 18 additions & 38 deletions conda-store-server/conda_store_server/orm.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,21 +155,6 @@ class Solve(Base):
)


_BUILD_KEY_V2_HASH_SIZE = 8


# Avoids a cyclic dependency between the orm module and the module defining
# CondaStore.build_key_version. Because the orm module is loaded early on
# startup, we want to delay initialization of the build_key_version field until
# it's been read from the config.
def _get_build_key_version():
from conda_store_server import _BUILD_KEY_VERSION

# None means the value is not set, likely due to an import error
assert _BUILD_KEY_VERSION is not None
return _BUILD_KEY_VERSION


class Build(Base):
"""The state of a build of a given specification"""

Expand Down Expand Up @@ -197,14 +182,21 @@ class Build(Base):
started_on = Column(DateTime, default=None)
ended_on = Column(DateTime, default=None)
deleted_on = Column(DateTime, default=None)

@staticmethod
def _get_build_key_version():
# Uses local import to make sure current version is initialized
from conda_store_server import BuildKey

return BuildKey.current_version()
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All conda_store_server imports are local in this file to avoid a cyclic import problem mentioned in a BuildKey class comment.


build_key_version = Column(Integer, default=_get_build_key_version, nullable=False)

@validates("build_key_version")
def validate_build_key_version(self, key, build_key_version):
if build_key_version not in [1, 2]:
raise ValueError(f"invalid build_key_version={build_key_version}")
from conda_store_server import BuildKey

return build_key_version
return BuildKey.set_current_version(build_key_version)

build_artifacts = relationship(
"BuildArtifact", back_populates="build", cascade="all, delete-orphan"
Expand Down Expand Up @@ -257,29 +249,17 @@ def build_key(self):
The build key should be a key that allows for the environment
build to be easily identified and found in the database.
"""
if self.build_key_version == 1:
datetime_format = "%Y%m%d-%H%M%S-%f"
return f"{self.specification.sha256}-{self.scheduled_on.strftime(datetime_format)}-{self.id}-{self.specification.name}"
elif self.build_key_version == 2:
hash = self.specification.sha256[:_BUILD_KEY_V2_HASH_SIZE]
timestamp = int(self.scheduled_on.timestamp())
id = self.id
name = self.specification.name
return f"{hash}-{timestamp}-{id}-{name}"
else:
raise ValueError(f"invalid build key version: {self.build_key_version}")
# Uses local import to make sure BuildKey is initialized
from conda_store_server import BuildKey

return BuildKey.get_build_key(self)

@staticmethod
def parse_build_key(key):
parts = key.split("-")
# Note: cannot rely on the number of dashes to differentiate between
# versions because name can contain dashes. Instead, this relies on the
# hash size to infer the format. The name is the last field, so indexing
# to find the id is okay.
if key[_BUILD_KEY_V2_HASH_SIZE] == "-": # v2
return int(parts[2]) # build_id
else: # v1
return int(parts[4]) # build_id
# Uses local import to make sure BuildKey is initialized
from conda_store_server import BuildKey

return BuildKey.parse_build_key(key)

@property
def log_key(self):
Expand Down
31 changes: 25 additions & 6 deletions conda-store-server/tests/test_actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,20 @@
import sys

import pytest
from conda_store_server import action, api, conda_utils, orm, schema, server, utils
from conda_store_server import (
BuildKey,
action,
api,
conda_utils,
orm,
schema,
server,
utils,
)
from conda_store_server.server.auth import DummyAuthentication
from fastapi import Request
from fastapi.responses import RedirectResponse
from traitlets import TraitError


def test_action_decorator():
Expand Down Expand Up @@ -238,6 +248,18 @@ def test_add_lockfile_packages(
def test_api_get_build_lockfile(
request, conda_store, db, simple_specification_with_pip, conda_prefix, is_legacy_build, build_key_version
):
# sets build_key_version
if build_key_version == 0: # invalid
with pytest.raises(TraitError, match=(
r"c.CondaStore.build_key_version: invalid build key version: 0, "
r"expected: \(1, 2\)"
)):
conda_store.build_key_version = build_key_version
return # invalid, nothing more to test
conda_store.build_key_version = build_key_version
assert BuildKey.current_version() == build_key_version
assert BuildKey.versions() == (1, 2)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This now initializes build_key_version via conda_store as one would in real life (via the config)


# initializes data needed to get the lockfile
specification = simple_specification_with_pip
specification.name = "this-is-a-long-environment-name"
Expand All @@ -259,11 +281,6 @@ def authorize_request(self, *args, **kwargs):
# makes this more visible in the lockfile
build_id = 12345678
build.id = build_id
if build_key_version == 0: # invalid
with pytest.raises(ValueError, match=r"invalid build_key_version=0"):
build.build_key_version = build_key_version
return # invalid, nothing more to test
build.build_key_version = build_key_version
# makes sure the timestamp in build_key is always the same
build.scheduled_on = datetime.datetime(2023, 11, 5, 3, 54, 10, 510258)
environment = api.get_environment(db, namespace=namespace)
Expand Down Expand Up @@ -323,7 +340,9 @@ def lockfile_url(build_key):
assert type(res) is RedirectResponse
assert key == res.headers['location']
assert build.build_key == build_key
assert BuildKey.get_build_key(build) == build_key
assert build.parse_build_key(build_key) == 12345678
assert BuildKey.parse_build_key(build_key) == 12345678
assert lockfile_url(build_key) == build.conda_lock_key
assert lockfile_url(build_key) == res.headers['location']
assert res.status_code == 307
Loading