Skip to content

Commit

Permalink
Merge pull request #747 from nkaretnikov/status-canceled-547
Browse files Browse the repository at this point in the history
Add status `CANCELED`
  • Loading branch information
Nikita Karetnikov authored Feb 29, 2024
2 parents bf36f67 + 61be69f commit f4a563b
Show file tree
Hide file tree
Showing 8 changed files with 89 additions and 14 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
"""add canceled status
Revision ID: 03c839888c82
Revises: 57cd11b949d5
Create Date: 2024-01-29 03:56:36.889909
"""
import sqlalchemy as sa
from alembic import op

# revision identifiers, used by Alembic.
revision = "03c839888c82"
down_revision = "57cd11b949d5"
branch_labels = None
depends_on = None


# Migrating from/to VARCHAR having the same length might look strange, but it
# serves a purpose. This will be a no-op in SQLite because it represents Python
# enums as VARCHAR, but it will convert the enum in PostgreSQL to VARCHAR. The
# old type is set to VARCHAR here because you can cast an enum to VARCHAR, which
# is needed for the migration to work. In the end, both DBs will use VARCHAR to
# represent the Python enum, which makes it easier to support both DBs at the
# same time.
def upgrade():
with op.batch_alter_table(
"build",
schema=None,
) as batch_op:
batch_op.alter_column(
"status",
existing_type=sa.VARCHAR(length=9),
type_=sa.VARCHAR(length=9),
existing_nullable=False,
)
if not str(op.get_bind().engine.url).startswith("sqlite"):
op.execute("DROP TYPE IF EXISTS buildstatus")


def downgrade():
# There are foreign key constraints linking build ids to other tables. So
# just mark the builds as failed, which was the status previously used for
# canceled builds
op.execute("UPDATE build SET status = 'FAILED' WHERE status = 'CANCELED'")
27 changes: 22 additions & 5 deletions conda-store-server/conda_store_server/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,15 @@ def set_build_failed(
db.commit()


def set_build_canceled(
db: Session, build: orm.Build, status_info: typing.Optional[str] = None
):
build.status = schema.BuildStatus.CANCELED
build.status_info = status_info
build.ended_on = datetime.datetime.utcnow()
db.commit()


def set_build_completed(db: Session, conda_store, build: orm.Build):
build.status = schema.BuildStatus.COMPLETED
build.ended_on = datetime.datetime.utcnow()
Expand All @@ -65,17 +74,22 @@ def set_build_completed(db: Session, conda_store, build: orm.Build):


def build_cleanup(
db: Session, conda_store, build_ids: typing.List[str] = None, reason: str = None
db: Session,
conda_store,
build_ids: typing.List[str] = None,
reason: str = None,
is_canceled: bool = False,
):
"""Walk through all builds in BUILDING state and check that they are actively running
Build can get stuck in the building state due to worker
spontaineously dying due to memory errors, killing container, etc.
"""
status = "CANCELED" if is_canceled else "FAILED"
reason = (
reason
or """
Build marked as FAILED on cleanup due to being stuck in BUILDING state
or f"""
Build marked as {status} on cleanup due to being stuck in BUILDING state
and not present on workers. This happens for several reasons: build is
canceled, a worker crash from out of memory errors, worker was killed,
or error in conda-store
Expand Down Expand Up @@ -113,15 +127,18 @@ def build_cleanup(
)
):
conda_store.log.warning(
f"marking build {build.id} as FAILED since stuck in BUILDING state and not present on workers"
f"marking build {build.id} as {status} since stuck in BUILDING state and not present on workers"
)
append_to_logs(
db,
conda_store,
build,
reason,
)
set_build_failed(db, build)
if is_canceled:
set_build_canceled(db, build)
else:
set_build_failed(db, build)


def build_conda_environment(db: Session, conda_store, build):
Expand Down
1 change: 1 addition & 0 deletions conda-store-server/conda_store_server/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ class BuildStatus(enum.Enum):
BUILDING = "BUILDING"
COMPLETED = "COMPLETED"
FAILED = "FAILED"
CANCELED = "CANCELED"


class BuildArtifact(BaseModel):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ <h3 class="card-title">Conda Packages
</div>
{% endif %}

{% if build.status.value in ['BUILDING', 'COMPLETED', 'FAILED'] %}
{% if build.status.value in ['BUILDING', 'COMPLETED', 'FAILED', 'CANCELED'] %}
<div class="card my-2">
<div class="card-body">
<h3 class="card-title">Conda Environment Artifacts</h3>
Expand Down Expand Up @@ -89,7 +89,7 @@ <h3 class="card-title">Conda Environment Artifacts</h3>
</div>
{% endif %}

{% if build.status.value in ['BUILDING', 'COMPLETED', 'FAILED'] %}
{% if build.status.value in ['BUILDING', 'COMPLETED', 'FAILED', 'CANCELED'] %}
<div class="card my-2">
<div class="card-body">
<a href="{{ url_for('api_get_build_logs', build_id=build.id) }}" class="btn btn-primary btn-block">Full Logs</a>
Expand Down
3 changes: 2 additions & 1 deletion conda-store-server/conda_store_server/server/views/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -986,8 +986,9 @@ async def api_put_build_cancel(
tasks.task_cleanup_builds.si(
build_ids=[build_id],
reason=f"""
build {build_id} marked as FAILED due to being canceled from the REST API
build {build_id} marked as CANCELED due to being canceled from the REST API
""",
is_canceled=True,
).apply_async(countdown=5)

return {
Expand Down
9 changes: 7 additions & 2 deletions conda-store-server/conda_store_server/worker/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,10 +106,15 @@ def task_update_storage_metrics(self):


@shared_task(base=WorkerTask, name="task_cleanup_builds", bind=True)
def task_cleanup_builds(self, build_ids: typing.List[str] = None, reason: str = None):
def task_cleanup_builds(
self,
build_ids: typing.List[str] = None,
reason: str = None,
is_canceled: bool = False,
):
conda_store = self.worker.conda_store
with conda_store.session_factory() as db:
build_cleanup(db, conda_store, build_ids, reason)
build_cleanup(db, conda_store, build_ids, reason, is_canceled)


"""
Expand Down
1 change: 1 addition & 0 deletions conda-store-server/tests/user_journeys/utils/api_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ class BuildStatus(Enum):
BUILDING = "BUILDING"
COMPLETED = "COMPLETED"
FAILED = "FAILED"
CANCELED = "CANCELED"


class API:
Expand Down
14 changes: 10 additions & 4 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -1198,7 +1198,7 @@ def test_api_cancel_build_auth(testclient):
assert r.status == schema.APIStatus.OK
assert r.message == f"build {new_build_id} canceled"

failed = False
canceled = False
for _ in range(10):
# Delay to ensure the build is marked as failed
time.sleep(5)
Expand All @@ -1214,8 +1214,14 @@ def test_api_cancel_build_auth(testclient):
r = schema.APIGetBuild.parse_obj(response.json())
assert r.status == schema.APIStatus.OK
assert r.data.id == new_build_id
if r.data.status == schema.BuildStatus.FAILED.value:
failed = True
if r.data.status == schema.BuildStatus.CANCELED.value:
canceled = True
response = testclient.get(f"api/v1/build/{new_build_id}/logs", timeout=10)
response.raise_for_status()
assert (
f"build {new_build_id} marked as CANCELED "
f"due to being canceled from the REST API"
) in response.text
break

assert failed is True
assert canceled is True

0 comments on commit f4a563b

Please sign in to comment.