Skip to content

Commit

Permalink
Merge pull request #1352 from dandi/asset-services
Browse files Browse the repository at this point in the history
  • Loading branch information
danlamanna authored Nov 9, 2022
2 parents f86d5b7 + 566afeb commit a140937
Show file tree
Hide file tree
Showing 9 changed files with 322 additions and 174 deletions.
10 changes: 6 additions & 4 deletions dandiapi/api/management/commands/create_dev_dandiset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
from django.core.files.uploadedfile import SimpleUploadedFile
import djclick as click

from dandiapi.api.models import Asset, AssetBlob
from dandiapi.api.models import AssetBlob
from dandiapi.api.services.asset import add_asset_to_version
from dandiapi.api.services.dandiset import create_dandiset
from dandiapi.api.tasks import calculate_sha256, validate_asset_metadata, validate_version_metadata

Expand Down Expand Up @@ -40,10 +41,11 @@ def create_dev_dandiset(name: str, owner: str):
'schemaVersion': settings.DANDI_SCHEMA_VERSION,
'encodingFormat': 'text/plain',
'schemaKey': 'Asset',
'path': 'foo/bar.txt',
}
asset = Asset(blob=asset_blob, metadata=asset_metadata, path='foo/bar.txt')
asset.save()
draft_version.assets.add(asset)
asset = add_asset_to_version(
user=owner, version=draft_version, asset_blob=asset_blob, metadata=asset_metadata
)

calculate_sha256(blob_id=asset_blob.blob_id)
validate_asset_metadata(asset_id=asset.id)
Expand Down
4 changes: 2 additions & 2 deletions dandiapi/api/management/commands/refresh_metadata.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import djclick as click

from dandiapi.api.models import Asset, Version
from dandiapi.api.services.asset.metadata import bulk_recalculate_asset_metadata


@click.command()
Expand All @@ -16,8 +17,7 @@ def refresh_metadata(assets: bool, versions: bool):
"""
if assets:
click.echo('Refreshing asset metadata')
for asset in Asset.objects.all():
asset.save()
bulk_recalculate_asset_metadata(assets=Asset.objects.all())

if versions:
click.echo('Refreshing draft version metadata')
Expand Down
39 changes: 39 additions & 0 deletions dandiapi/api/models/asset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import datetime
import re
from typing import TYPE_CHECKING
from urllib.parse import urlparse, urlunparse
import uuid

Expand Down Expand Up @@ -39,6 +40,10 @@ def validate_asset_path(path: str):
return path


if TYPE_CHECKING:
from dandiapi.zarr.models import EmbargoedZarrArchive, ZarrArchive


class BaseAssetBlob(TimeStampedModel):
SHA256_REGEX = r'[0-9a-f]{64}'
ETAG_REGEX = r'[0-9a-f]{32}(-[1-9][0-9]*)?'
Expand Down Expand Up @@ -210,6 +215,40 @@ def s3_url(self) -> str:
else:
return self.zarr.s3_url

def is_different_from(
self,
*,
asset_blob: AssetBlob | EmbargoedAssetBlob | None = None,
zarr_archive: ZarrArchive | EmbargoedZarrArchive | None = None,
metadata: dict,
) -> bool:
from dandiapi.zarr.models import EmbargoedZarrArchive, ZarrArchive

if isinstance(asset_blob, AssetBlob) and self.blob is not None and self.blob != asset_blob:
return True

if (
isinstance(asset_blob, EmbargoedAssetBlob)
and self.embargoed_blob is not None
and self.embargoed_blob != asset_blob
):
return True

if (
isinstance(zarr_archive, ZarrArchive)
and self.zarr is not None
and self.zarr != zarr_archive
):
return True

if isinstance(zarr_archive, EmbargoedZarrArchive):
raise NotImplementedError

if self.metadata != metadata:
return True

return False

def _populate_metadata(self):
download_url = settings.DANDI_API_URL + reverse(
'asset-download',
Expand Down
166 changes: 166 additions & 0 deletions dandiapi/api/services/asset/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
from django.db import transaction

from dandiapi.api.asset_paths import add_asset_paths, delete_asset_paths, update_asset_paths
from dandiapi.api.models.asset import Asset, AssetBlob, EmbargoedAssetBlob
from dandiapi.api.models.version import Version
from dandiapi.api.services.asset.exceptions import (
AssetAlreadyExists,
DandisetOwnerRequired,
DraftDandisetNotModifiable,
ZarrArchiveBelongsToDifferentDandiset,
)
from dandiapi.api.services.asset.metadata import _maybe_validate_asset_metadata
from dandiapi.zarr.models import ZarrArchive


def _create_asset(
*,
path: str,
asset_blob: AssetBlob | None = None,
embargoed_asset_blob: EmbargoedAssetBlob | None = None,
zarr_archive: ZarrArchive | None = None,
metadata: dict,
):
metadata = Asset.strip_metadata(metadata)

asset = Asset(
path=path,
blob=asset_blob,
embargoed_blob=embargoed_asset_blob,
zarr=zarr_archive,
metadata=metadata,
status=Asset.Status.PENDING,
)
asset.full_clean(validate_constraints=False)
asset.save()

return asset


def change_asset(
*,
user,
asset: Asset,
version: Version,
new_asset_blob: AssetBlob | EmbargoedAssetBlob | None = None,
new_zarr_archive: ZarrArchive | None = None,
new_metadata: dict,
) -> tuple[Asset, bool]:
"""
Change the blob/zarr/metadata of an asset if necessary.
Returns a tuple of the asset, and whether or not it was changed. When changing an asset, a new
asset is created automatically.
"""
assert (
new_asset_blob or new_zarr_archive
), 'One of new_zarr_archive or new_asset_blob must be given to change_asset_metadata'
assert 'path' in new_metadata, 'Path must be present in new_metadata'

if not user.has_perm('owner', version.dandiset):
raise DandisetOwnerRequired()
elif version.version != 'draft':
raise DraftDandisetNotModifiable()

path = new_metadata['path']
new_metadata_stripped = Asset.strip_metadata(new_metadata)

if not asset.is_different_from(
asset_blob=new_asset_blob, zarr_archive=new_zarr_archive, metadata=new_metadata_stripped
):
return asset, False

# Verify we aren't changing path to the same value as an existing asset
if version.assets.filter(path=path).exclude(asset_id=asset.asset_id).exists():
raise AssetAlreadyExists()

with transaction.atomic():
remove_asset_from_version(user=user, asset=asset, version=version)

new_asset = add_asset_to_version(
user=user,
version=version,
asset_blob=new_asset_blob,
zarr_archive=new_zarr_archive,
metadata=new_metadata,
)
# Set previous asset and save
new_asset.previous = asset
new_asset.save()
update_asset_paths(old_asset=asset, new_asset=new_asset, version=version)

return new_asset, True


def add_asset_to_version(
*,
user,
version: Version,
asset_blob: AssetBlob | EmbargoedAssetBlob | None = None,
zarr_archive: ZarrArchive | None = None,
metadata: dict,
) -> Asset:
"""Create an asset, adding it to a version."""
assert (
asset_blob or zarr_archive
), 'One of zarr_archive or asset_blob must be given to add_asset_to_version'
assert 'path' in metadata, 'Path must be present in metadata'

if not user.has_perm('owner', version.dandiset):
raise DandisetOwnerRequired()
elif version.version != 'draft':
raise DraftDandisetNotModifiable()

# Check if there are already any assets with the same path
if version.assets.filter(path=metadata['path']).exists():
raise AssetAlreadyExists()

# Ensure zarr archive doesn't already belong to a dandiset
if zarr_archive and zarr_archive.dandiset != version.dandiset:
raise ZarrArchiveBelongsToDifferentDandiset()

if isinstance(asset_blob, EmbargoedAssetBlob):
embargoed_asset_blob = asset_blob
asset_blob = None
else:
embargoed_asset_blob = None
asset_blob = asset_blob

with transaction.atomic():
asset = _create_asset(
path=metadata['path'],
asset_blob=asset_blob,
embargoed_asset_blob=embargoed_asset_blob,
zarr_archive=zarr_archive,
metadata=metadata,
)
version.assets.add(asset)
add_asset_paths(asset, version)

# Trigger a version metadata validation, as saving the version might change the metadata
version.status = Version.Status.PENDING
# Save the version so that the modified field is updated
version.save()

_maybe_validate_asset_metadata(asset)

return asset


def remove_asset_from_version(*, user, asset: Asset, version: Version) -> Version:
if not user.has_perm('owner', version.dandiset):
raise DandisetOwnerRequired()
elif version.version != 'draft':
raise DraftDandisetNotModifiable()

with transaction.atomic():
# Remove asset paths and asset itself from version
delete_asset_paths(asset, version)
version.assets.remove(asset)

# Trigger a version metadata validation, as saving the version might change the metadata
version.status = Version.Status.PENDING
# Save the version so that the modified field is updated
version.save()

return version
22 changes: 22 additions & 0 deletions dandiapi/api/services/asset/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from rest_framework import status

from dandiapi.api.services.exceptions import DandiException


class DandisetOwnerRequired(DandiException):
http_status_code = status.HTTP_403_FORBIDDEN


class DraftDandisetNotModifiable(DandiException):
http_status_code = status.HTTP_405_METHOD_NOT_ALLOWED
message = 'Only draft versions can be modified.'


class AssetAlreadyExists(DandiException):
http_status_code = status.HTTP_409_CONFLICT
message = 'An asset with that path already exists'


class ZarrArchiveBelongsToDifferentDandiset(DandiException):
http_status_code = status.HTTP_400_BAD_REQUEST
message = 'The zarr archive belongs to a different dandiset'
35 changes: 35 additions & 0 deletions dandiapi/api/services/asset/metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from django.db import transaction
from django.db.models.query import QuerySet

from dandiapi.api.models.asset import Asset
from dandiapi.api.tasks import validate_asset_metadata


def _maybe_validate_asset_metadata(asset: Asset):
"""
Validate asset metadata if a checksum for its blob has already been computed.
If the checksum isn't there yet, it's the responsibility of the checksum code
to trigger validation for all assets pointing to its blob.
"""
if asset.is_blob:
blob = asset.blob
elif asset.is_embargoed_blob:
blob = asset.embargoed_blob
else:
# TODO: assert? zarr?
return

if blob.sha256 is None:
return

# If the blob already has a sha256, then the asset metadata is ready to validate.
# We do not bother to delay it because it should run very quickly.
validate_asset_metadata(asset.id)


def bulk_recalculate_asset_metadata(*, assets: QuerySet[Asset]):
with transaction.atomic():
for asset in assets.iterator():
# populates metadata
asset.save()
2 changes: 0 additions & 2 deletions dandiapi/api/tasks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,6 @@ def collect_validation_errors(

@shared_task(soft_time_limit=10)
@atomic
# This method takes both a version_id and an asset_id because asset metadata renders differently
# depending on which version the asset belongs to.
def validate_asset_metadata(asset_id: int) -> None:
logger.info('Validating asset metadata for asset %s', asset_id)
asset: Asset = Asset.objects.get(id=asset_id)
Expand Down
2 changes: 1 addition & 1 deletion dandiapi/api/tests/test_asset.py
Original file line number Diff line number Diff line change
Expand Up @@ -640,7 +640,7 @@ def test_asset_create_zarr_wrong_dandiset(
format='json',
)
assert resp.status_code == 400
assert resp.json() == ['The zarr archive belongs to a different dandiset']
assert resp.json() == 'The zarr archive belongs to a different dandiset'


@pytest.mark.django_db
Expand Down
Loading

0 comments on commit a140937

Please sign in to comment.