Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ADD: Repository methods for repo CLI and other features #5156

Merged
merged 4 commits into from
Oct 4, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 14 additions & 20 deletions aiida/backends/general/migrations/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import os
import pathlib
import re
import typing
from typing import Dict, Iterable, List, Optional, Union

from disk_objectstore import Container
from disk_objectstore.utils import LazyOpener
Expand All @@ -42,8 +42,8 @@ def __init__(
self,
name: str = '',
file_type: FileType = FileType.DIRECTORY,
key: typing.Union[str, None, LazyOpener] = None,
objects: typing.Dict[str, 'File'] = None
key: Union[str, None, LazyOpener] = None,
objects: Dict[str, 'File'] = None
):
# pylint: disable=super-init-not-called
if not isinstance(name, str):
Expand Down Expand Up @@ -86,43 +86,37 @@ class NoopRepositoryBackend(AbstractRepositoryBackend):
"""

@property
def uuid(self) -> typing.Optional[str]:
def uuid(self) -> Optional[str]:
"""Return the unique identifier of the repository.

.. note:: A sandbox folder does not have the concept of a unique identifier and so always returns ``None``.
"""
return None

def initialise(self, **kwargs) -> None:
"""Initialise the repository if it hasn't already been initialised.
@property
def key_format(self) -> Optional[str]:
return None

:param kwargs: parameters for the initialisation.
"""
def initialise(self, **kwargs) -> None:
raise NotImplementedError()

@property
def is_initialised(self) -> bool:
"""Return whether the repository has been initialised."""
return True

def erase(self):
raise NotImplementedError()

def _put_object_from_filelike(self, handle: io.BufferedIOBase) -> str:
"""Store the byte contents of a file in the repository.

:param handle: filelike object with the byte content to be stored.
:return: the generated fully qualified identifier for the object within the repository.
:raises TypeError: if the handle is not a byte stream.
"""
return LazyOpener(handle.name)

def has_object(self, key: str) -> bool:
"""Return whether the repository has an object with the given key.
def has_objects(self, keys: List[str]) -> List[bool]:
raise NotImplementedError()

:param key: fully qualified identifier for the object within the repository.
:return: True if the object exists, False otherwise.
"""
def delete_objects(self, keys: List[str]) -> None:
raise NotImplementedError()

def list_objects(self) -> Iterable[str]:
raise NotImplementedError()


Expand Down
61 changes: 52 additions & 9 deletions aiida/repository/backend/abstract.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import hashlib
import io
import pathlib
import typing
from typing import BinaryIO, Iterable, Iterator, List, Optional, Union

from aiida.common.hashing import chunked_file_hash

Expand All @@ -30,9 +30,19 @@ class AbstractRepositoryBackend(metaclass=abc.ABCMeta):

@property
@abc.abstractmethod
def uuid(self) -> typing.Optional[str]:
def uuid(self) -> Optional[str]:
"""Return the unique identifier of the repository."""

@property
@abc.abstractmethod
def key_format(self) -> Optional[str]:
"""Return the format for the keys of the repository.

Important for when migrating between backends (e.g. archive -> main), as if they are not equal then it is
necessary to re-compute all the `Node.repository_metadata` before importing (otherwise they will not match
with the repository).
"""

@abc.abstractmethod
def initialise(self, **kwargs) -> None:
"""Initialise the repository if it hasn't already been initialised.
Expand All @@ -58,7 +68,7 @@ def erase(self) -> None:
def is_readable_byte_stream(handle) -> bool:
return hasattr(handle, 'read') and hasattr(handle, 'mode') and 'b' in handle.mode

def put_object_from_filelike(self, handle: typing.BinaryIO) -> str:
def put_object_from_filelike(self, handle: BinaryIO) -> str:
"""Store the byte contents of a file in the repository.

:param handle: filelike object with the byte content to be stored.
Expand All @@ -70,10 +80,10 @@ def put_object_from_filelike(self, handle: typing.BinaryIO) -> str:
return self._put_object_from_filelike(handle)

@abc.abstractmethod
def _put_object_from_filelike(self, handle: typing.BinaryIO) -> str:
def _put_object_from_filelike(self, handle: BinaryIO) -> str:
pass

def put_object_from_file(self, filepath: typing.Union[str, pathlib.Path]) -> str:
def put_object_from_file(self, filepath: Union[str, pathlib.Path]) -> str:
"""Store a new object with contents of the file located at `filepath` on this file system.

:param filepath: absolute path of file whose contents to copy to the repository.
Expand All @@ -84,15 +94,33 @@ def put_object_from_file(self, filepath: typing.Union[str, pathlib.Path]) -> str
return self.put_object_from_filelike(handle)

@abc.abstractmethod
def has_objects(self, keys: List[str]) -> List[bool]:
"""Return whether the repository has an object with the given key.

:param keys:
list of fully qualified identifiers for objects within the repository.
:return:
list of logicals, in the same order as the keys provided, with value True if the respective
object exists and False otherwise.
"""

def has_object(self, key: str) -> bool:
"""Return whether the repository has an object with the given key.

:param key: fully qualified identifier for the object within the repository.
:return: True if the object exists, False otherwise.
"""
return self.has_objects([key])[0]

@abc.abstractmethod
def list_objects(self) -> Iterable[str]:
"""Return iterable that yields all available objects by key.

:return: An iterable for all the available object keys.
"""

@contextlib.contextmanager
def open(self, key: str) -> typing.Iterator[typing.BinaryIO]:
def open(self, key: str) -> Iterator[BinaryIO]:
ramirezfranciscof marked this conversation as resolved.
Show resolved Hide resolved
"""Open a file handle to an object stored under the given key.

.. note:: this should only be used to open a handle to read an existing file. To write a new file use the method
Expand Down Expand Up @@ -130,12 +158,27 @@ def get_object_hash(self, key: str) -> str:
with self.open(key) as handle: # pylint: disable=not-context-manager
return chunked_file_hash(handle, hashlib.sha256)

def delete_object(self, key: str):
@abc.abstractmethod
def delete_objects(self, keys: List[str]) -> None:
"""Delete the objects from the repository.

:param keys: list of fully qualified identifiers for the objects within the repository.
:raise FileNotFoundError: if any of the files does not exist.
:raise OSError: if any of the files could not be deleted.
"""
keys_exist = self.has_objects(keys)
if not all(keys_exist):
error_message = 'some of the keys provided do not correspond to any object in the repository:\n'
for indx, key_exists in enumerate(keys_exist):
if not key_exists:
error_message += f' > object with key `{keys[indx]}` does not exist.\n'
raise FileNotFoundError(error_message)

def delete_object(self, key: str) -> None:
"""Delete the object from the repository.

:param key: fully qualified identifier for the object within the repository.
:raise FileNotFoundError: if the file does not exist.
:raise OSError: if the file could not be deleted.
"""
if not self.has_object(key):
raise FileNotFoundError(f'object with key `{key}` does not exist.')
return self.delete_objects([key])
36 changes: 16 additions & 20 deletions aiida/repository/backend/disk_object_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"""Implementation of the ``AbstractRepositoryBackend`` using the ``disk-objectstore`` as the backend."""
import contextlib
import shutil
import typing
from typing import BinaryIO, Iterable, Iterator, List, Optional

from disk_objectstore import Container

Expand All @@ -27,12 +27,16 @@ def __str__(self) -> str:
return 'DiskObjectStoreRepository: <uninitialised>'

@property
def uuid(self) -> typing.Optional[str]:
def uuid(self) -> Optional[str]:
"""Return the unique identifier of the repository."""
if not self.is_initialised:
return None
return self.container.container_id

@property
def key_format(self) -> Optional[str]:
return self.container.hash_type

def initialise(self, **kwargs) -> None:
"""Initialise the repository if it hasn't already been initialised.

Expand All @@ -56,25 +60,20 @@ def erase(self):
except FileNotFoundError:
pass

def _put_object_from_filelike(self, handle: typing.BinaryIO) -> str:
def _put_object_from_filelike(self, handle: BinaryIO) -> str:
"""Store the byte contents of a file in the repository.

:param handle: filelike object with the byte content to be stored.
:return: the generated fully qualified identifier for the object within the repository.
:raises TypeError: if the handle is not a byte stream.
"""
return self.container.add_object(handle.read())
return self.container.add_streamed_object(handle)

def has_object(self, key: str) -> bool:
"""Return whether the repository has an object with the given key.

:param key: fully qualified identifier for the object within the repository.
:return: True if the object exists, False otherwise.
"""
return self.container.has_object(key)
def has_objects(self, keys: List[str]) -> List[bool]:
return self.container.has_objects(keys)

@contextlib.contextmanager
def open(self, key: str) -> typing.Iterator[typing.BinaryIO]:
def open(self, key: str) -> Iterator[BinaryIO]:
"""Open a file handle to an object stored under the given key.

.. note:: this should only be used to open a handle to read an existing file. To write a new file use the method
Expand All @@ -90,15 +89,12 @@ def open(self, key: str) -> typing.Iterator[typing.BinaryIO]:
with self.container.get_object_stream(key) as handle:
yield handle # type: ignore[misc]

def delete_object(self, key: str):
"""Delete the object from the repository.
def delete_objects(self, keys: List[str]) -> None:
super().delete_objects(keys)
self.container.delete_objects(keys)

:param key: fully qualified identifier for the object within the repository.
:raise FileNotFoundError: if the file does not exist.
:raise OSError: if the file could not be deleted.
"""
super().delete_object(key)
self.container.delete_objects([key])
def list_objects(self) -> Iterable[str]:
return self.container.list_all_objects()

def get_object_hash(self, key: str) -> str:
"""Return the SHA-256 hash of an object stored under the given key.
Expand Down
41 changes: 21 additions & 20 deletions aiida/repository/backend/sandbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import contextlib
import os
import shutil
import typing
from typing import BinaryIO, Iterable, Iterator, List, Optional
import uuid

from .abstract import AbstractRepositoryBackend
Expand All @@ -16,7 +16,7 @@ class SandboxRepositoryBackend(AbstractRepositoryBackend):

def __init__(self):
from aiida.common.folders import SandboxFolder
self._sandbox: typing.Optional[SandboxFolder] = None
self._sandbox: Optional[SandboxFolder] = None

def __str__(self) -> str:
"""Return the string representation of this repository."""
Expand All @@ -29,13 +29,17 @@ def __del__(self):
self.erase()

@property
def uuid(self) -> typing.Optional[str]:
def uuid(self) -> Optional[str]:
"""Return the unique identifier of the repository.

.. note:: A sandbox folder does not have the concept of a unique identifier and so always returns ``None``.
"""
return None

@property
def key_format(self) -> Optional[str]:
return 'uuid4'

def initialise(self, **kwargs) -> None:
"""Initialise the repository if it hasn't already been initialised.

Expand Down Expand Up @@ -70,7 +74,7 @@ def erase(self):
finally:
self._sandbox = None

def _put_object_from_filelike(self, handle: typing.BinaryIO) -> str:
def _put_object_from_filelike(self, handle: BinaryIO) -> str:
"""Store the byte contents of a file in the repository.

:param handle: filelike object with the byte content to be stored.
Expand All @@ -85,16 +89,15 @@ def _put_object_from_filelike(self, handle: typing.BinaryIO) -> str:

return key

def has_object(self, key: str) -> bool:
"""Return whether the repository has an object with the given key.

:param key: fully qualified identifier for the object within the repository.
:return: True if the object exists, False otherwise.
"""
return key in os.listdir(self.sandbox.abspath)
def has_objects(self, keys: List[str]) -> List[bool]:
result = list()
dirlist = os.listdir(self.sandbox.abspath)
for key in keys:
result.append(key in dirlist)
return result

@contextlib.contextmanager
def open(self, key: str) -> typing.Iterator[typing.BinaryIO]:
def open(self, key: str) -> Iterator[BinaryIO]:
"""Open a file handle to an object stored under the given key.

.. note:: this should only be used to open a handle to read an existing file. To write a new file use the method
Expand All @@ -110,12 +113,10 @@ def open(self, key: str) -> typing.Iterator[typing.BinaryIO]:
with self.sandbox.open(key, mode='rb') as handle:
yield handle

def delete_object(self, key: str):
"""Delete the object from the repository.
def delete_objects(self, keys: List[str]) -> None:
super().delete_objects(keys)
for key in keys:
os.remove(os.path.join(self.sandbox.abspath, key))

:param key: fully qualified identifier for the object within the repository.
:raise FileNotFoundError: if the file does not exist.
:raise OSError: if the file could not be deleted.
"""
super().delete_object(key)
os.remove(os.path.join(self.sandbox.abspath, key))
def list_objects(self) -> Iterable[str]:
return self.sandbox.get_content_list()
Loading