diff --git a/aiida/backends/general/migrations/utils.py b/aiida/backends/general/migrations/utils.py index 734f40839a..e947276282 100644 --- a/aiida/backends/general/migrations/utils.py +++ b/aiida/backends/general/migrations/utils.py @@ -15,7 +15,7 @@ import os import pathlib import re -import typing +from typing import Dict, Iterable, List, Optional, Union from disk_objectstore import Container from disk_objectstore.utils import LazyOpener @@ -42,8 +42,8 @@ def __init__( self, name: str = '', file_type: FileType = FileType.DIRECTORY, - key: typing.Union[str, None, LazyOpener] = None, - objects: typing.Dict[str, 'File'] = None + key: Union[str, None, LazyOpener] = None, + objects: Dict[str, 'File'] = None ): # pylint: disable=super-init-not-called if not isinstance(name, str): @@ -86,43 +86,37 @@ class NoopRepositoryBackend(AbstractRepositoryBackend): """ @property - def uuid(self) -> typing.Optional[str]: + def uuid(self) -> Optional[str]: """Return the unique identifier of the repository. .. note:: A sandbox folder does not have the concept of a unique identifier and so always returns ``None``. """ return None - def initialise(self, **kwargs) -> None: - """Initialise the repository if it hasn't already been initialised. + @property + def key_format(self) -> Optional[str]: + return None - :param kwargs: parameters for the initialisation. - """ + def initialise(self, **kwargs) -> None: raise NotImplementedError() @property def is_initialised(self) -> bool: - """Return whether the repository has been initialised.""" return True def erase(self): raise NotImplementedError() def _put_object_from_filelike(self, handle: io.BufferedIOBase) -> str: - """Store the byte contents of a file in the repository. - - :param handle: filelike object with the byte content to be stored. - :return: the generated fully qualified identifier for the object within the repository. - :raises TypeError: if the handle is not a byte stream. - """ return LazyOpener(handle.name) - def has_object(self, key: str) -> bool: - """Return whether the repository has an object with the given key. + def has_objects(self, keys: List[str]) -> List[bool]: + raise NotImplementedError() - :param key: fully qualified identifier for the object within the repository. - :return: True if the object exists, False otherwise. - """ + def delete_objects(self, keys: List[str]) -> None: + raise NotImplementedError() + + def list_objects(self) -> Iterable[str]: raise NotImplementedError() diff --git a/aiida/repository/backend/abstract.py b/aiida/repository/backend/abstract.py index 9a3527c97a..20621fb584 100644 --- a/aiida/repository/backend/abstract.py +++ b/aiida/repository/backend/abstract.py @@ -10,7 +10,7 @@ import hashlib import io import pathlib -import typing +from typing import BinaryIO, Iterable, Iterator, List, Optional, Union from aiida.common.hashing import chunked_file_hash @@ -30,9 +30,19 @@ class AbstractRepositoryBackend(metaclass=abc.ABCMeta): @property @abc.abstractmethod - def uuid(self) -> typing.Optional[str]: + def uuid(self) -> Optional[str]: """Return the unique identifier of the repository.""" + @property + @abc.abstractmethod + def key_format(self) -> Optional[str]: + """Return the format for the keys of the repository. + + Important for when migrating between backends (e.g. archive -> main), as if they are not equal then it is + necessary to re-compute all the `Node.repository_metadata` before importing (otherwise they will not match + with the repository). + """ + @abc.abstractmethod def initialise(self, **kwargs) -> None: """Initialise the repository if it hasn't already been initialised. @@ -58,7 +68,7 @@ def erase(self) -> None: def is_readable_byte_stream(handle) -> bool: return hasattr(handle, 'read') and hasattr(handle, 'mode') and 'b' in handle.mode - def put_object_from_filelike(self, handle: typing.BinaryIO) -> str: + def put_object_from_filelike(self, handle: BinaryIO) -> str: """Store the byte contents of a file in the repository. :param handle: filelike object with the byte content to be stored. @@ -70,10 +80,10 @@ def put_object_from_filelike(self, handle: typing.BinaryIO) -> str: return self._put_object_from_filelike(handle) @abc.abstractmethod - def _put_object_from_filelike(self, handle: typing.BinaryIO) -> str: + def _put_object_from_filelike(self, handle: BinaryIO) -> str: pass - def put_object_from_file(self, filepath: typing.Union[str, pathlib.Path]) -> str: + def put_object_from_file(self, filepath: Union[str, pathlib.Path]) -> str: """Store a new object with contents of the file located at `filepath` on this file system. :param filepath: absolute path of file whose contents to copy to the repository. @@ -84,15 +94,33 @@ def put_object_from_file(self, filepath: typing.Union[str, pathlib.Path]) -> str return self.put_object_from_filelike(handle) @abc.abstractmethod + def has_objects(self, keys: List[str]) -> List[bool]: + """Return whether the repository has an object with the given key. + + :param keys: + list of fully qualified identifiers for objects within the repository. + :return: + list of logicals, in the same order as the keys provided, with value True if the respective + object exists and False otherwise. + """ + def has_object(self, key: str) -> bool: """Return whether the repository has an object with the given key. :param key: fully qualified identifier for the object within the repository. :return: True if the object exists, False otherwise. """ + return self.has_objects([key])[0] + + @abc.abstractmethod + def list_objects(self) -> Iterable[str]: + """Return iterable that yields all available objects by key. + + :return: An iterable for all the available object keys. + """ @contextlib.contextmanager - def open(self, key: str) -> typing.Iterator[typing.BinaryIO]: + def open(self, key: str) -> Iterator[BinaryIO]: """Open a file handle to an object stored under the given key. .. note:: this should only be used to open a handle to read an existing file. To write a new file use the method @@ -130,12 +158,27 @@ def get_object_hash(self, key: str) -> str: with self.open(key) as handle: # pylint: disable=not-context-manager return chunked_file_hash(handle, hashlib.sha256) - def delete_object(self, key: str): + @abc.abstractmethod + def delete_objects(self, keys: List[str]) -> None: + """Delete the objects from the repository. + + :param keys: list of fully qualified identifiers for the objects within the repository. + :raise FileNotFoundError: if any of the files does not exist. + :raise OSError: if any of the files could not be deleted. + """ + keys_exist = self.has_objects(keys) + if not all(keys_exist): + error_message = 'some of the keys provided do not correspond to any object in the repository:\n' + for indx, key_exists in enumerate(keys_exist): + if not key_exists: + error_message += f' > object with key `{keys[indx]}` does not exist.\n' + raise FileNotFoundError(error_message) + + def delete_object(self, key: str) -> None: """Delete the object from the repository. :param key: fully qualified identifier for the object within the repository. :raise FileNotFoundError: if the file does not exist. :raise OSError: if the file could not be deleted. """ - if not self.has_object(key): - raise FileNotFoundError(f'object with key `{key}` does not exist.') + return self.delete_objects([key]) diff --git a/aiida/repository/backend/disk_object_store.py b/aiida/repository/backend/disk_object_store.py index c4c5306317..387475aff3 100644 --- a/aiida/repository/backend/disk_object_store.py +++ b/aiida/repository/backend/disk_object_store.py @@ -2,7 +2,7 @@ """Implementation of the ``AbstractRepositoryBackend`` using the ``disk-objectstore`` as the backend.""" import contextlib import shutil -import typing +from typing import BinaryIO, Iterable, Iterator, List, Optional from disk_objectstore import Container @@ -27,12 +27,16 @@ def __str__(self) -> str: return 'DiskObjectStoreRepository: ' @property - def uuid(self) -> typing.Optional[str]: + def uuid(self) -> Optional[str]: """Return the unique identifier of the repository.""" if not self.is_initialised: return None return self.container.container_id + @property + def key_format(self) -> Optional[str]: + return self.container.hash_type + def initialise(self, **kwargs) -> None: """Initialise the repository if it hasn't already been initialised. @@ -56,25 +60,20 @@ def erase(self): except FileNotFoundError: pass - def _put_object_from_filelike(self, handle: typing.BinaryIO) -> str: + def _put_object_from_filelike(self, handle: BinaryIO) -> str: """Store the byte contents of a file in the repository. :param handle: filelike object with the byte content to be stored. :return: the generated fully qualified identifier for the object within the repository. :raises TypeError: if the handle is not a byte stream. """ - return self.container.add_object(handle.read()) + return self.container.add_streamed_object(handle) - def has_object(self, key: str) -> bool: - """Return whether the repository has an object with the given key. - - :param key: fully qualified identifier for the object within the repository. - :return: True if the object exists, False otherwise. - """ - return self.container.has_object(key) + def has_objects(self, keys: List[str]) -> List[bool]: + return self.container.has_objects(keys) @contextlib.contextmanager - def open(self, key: str) -> typing.Iterator[typing.BinaryIO]: + def open(self, key: str) -> Iterator[BinaryIO]: """Open a file handle to an object stored under the given key. .. note:: this should only be used to open a handle to read an existing file. To write a new file use the method @@ -90,15 +89,12 @@ def open(self, key: str) -> typing.Iterator[typing.BinaryIO]: with self.container.get_object_stream(key) as handle: yield handle # type: ignore[misc] - def delete_object(self, key: str): - """Delete the object from the repository. + def delete_objects(self, keys: List[str]) -> None: + super().delete_objects(keys) + self.container.delete_objects(keys) - :param key: fully qualified identifier for the object within the repository. - :raise FileNotFoundError: if the file does not exist. - :raise OSError: if the file could not be deleted. - """ - super().delete_object(key) - self.container.delete_objects([key]) + def list_objects(self) -> Iterable[str]: + return self.container.list_all_objects() def get_object_hash(self, key: str) -> str: """Return the SHA-256 hash of an object stored under the given key. diff --git a/aiida/repository/backend/sandbox.py b/aiida/repository/backend/sandbox.py index f4577f4783..719ff6f7bb 100644 --- a/aiida/repository/backend/sandbox.py +++ b/aiida/repository/backend/sandbox.py @@ -3,7 +3,7 @@ import contextlib import os import shutil -import typing +from typing import BinaryIO, Iterable, Iterator, List, Optional import uuid from .abstract import AbstractRepositoryBackend @@ -16,7 +16,7 @@ class SandboxRepositoryBackend(AbstractRepositoryBackend): def __init__(self): from aiida.common.folders import SandboxFolder - self._sandbox: typing.Optional[SandboxFolder] = None + self._sandbox: Optional[SandboxFolder] = None def __str__(self) -> str: """Return the string representation of this repository.""" @@ -29,13 +29,17 @@ def __del__(self): self.erase() @property - def uuid(self) -> typing.Optional[str]: + def uuid(self) -> Optional[str]: """Return the unique identifier of the repository. .. note:: A sandbox folder does not have the concept of a unique identifier and so always returns ``None``. """ return None + @property + def key_format(self) -> Optional[str]: + return 'uuid4' + def initialise(self, **kwargs) -> None: """Initialise the repository if it hasn't already been initialised. @@ -70,7 +74,7 @@ def erase(self): finally: self._sandbox = None - def _put_object_from_filelike(self, handle: typing.BinaryIO) -> str: + def _put_object_from_filelike(self, handle: BinaryIO) -> str: """Store the byte contents of a file in the repository. :param handle: filelike object with the byte content to be stored. @@ -85,16 +89,15 @@ def _put_object_from_filelike(self, handle: typing.BinaryIO) -> str: return key - def has_object(self, key: str) -> bool: - """Return whether the repository has an object with the given key. - - :param key: fully qualified identifier for the object within the repository. - :return: True if the object exists, False otherwise. - """ - return key in os.listdir(self.sandbox.abspath) + def has_objects(self, keys: List[str]) -> List[bool]: + result = list() + dirlist = os.listdir(self.sandbox.abspath) + for key in keys: + result.append(key in dirlist) + return result @contextlib.contextmanager - def open(self, key: str) -> typing.Iterator[typing.BinaryIO]: + def open(self, key: str) -> Iterator[BinaryIO]: """Open a file handle to an object stored under the given key. .. note:: this should only be used to open a handle to read an existing file. To write a new file use the method @@ -110,12 +113,10 @@ def open(self, key: str) -> typing.Iterator[typing.BinaryIO]: with self.sandbox.open(key, mode='rb') as handle: yield handle - def delete_object(self, key: str): - """Delete the object from the repository. + def delete_objects(self, keys: List[str]) -> None: + super().delete_objects(keys) + for key in keys: + os.remove(os.path.join(self.sandbox.abspath, key)) - :param key: fully qualified identifier for the object within the repository. - :raise FileNotFoundError: if the file does not exist. - :raise OSError: if the file could not be deleted. - """ - super().delete_object(key) - os.remove(os.path.join(self.sandbox.abspath, key)) + def list_objects(self) -> Iterable[str]: + return self.sandbox.get_content_list() diff --git a/tests/repository/backend/test_abstract.py b/tests/repository/backend/test_abstract.py index 3a548bd515..293d40fc61 100644 --- a/tests/repository/backend/test_abstract.py +++ b/tests/repository/backend/test_abstract.py @@ -3,7 +3,7 @@ """Tests for the :mod:`aiida.repository.backend.abstract` module.""" import io import tempfile -import typing +from typing import BinaryIO, Iterable, List, Optional import pytest @@ -13,11 +13,12 @@ class RepositoryBackend(AbstractRepositoryBackend): """Concrete implementation of ``AbstractRepositoryBackend``.""" - def has_object(self, key): - return True + @property + def uuid(self) -> Optional[str]: + return None @property - def uuid(self) -> typing.Optional[str]: + def key_format(self) -> Optional[str]: return None def initialise(self, **kwargs) -> None: @@ -30,7 +31,19 @@ def erase(self): def is_initialised(self) -> bool: return True - def _put_object_from_filelike(self, handle: typing.BinaryIO) -> str: + def _put_object_from_filelike(self, handle: BinaryIO) -> str: + pass + + # pylint useless-super-delegation needs to be disabled here because it refuses to + # recognize that this is an abstract method and thus has to be overriden. See the + # following issue: https://github.com/PyCQA/pylint/issues/1594 + def delete_objects(self, keys: List[str]) -> None: # pylint: disable=useless-super-delegation + super().delete_objects(keys) + + def has_objects(self, keys: List[str]) -> List[bool]: + return [True] + + def list_objects(self) -> Iterable[str]: pass @@ -84,3 +97,35 @@ def test_put_object_from_file(repository, generate_directory): repository.put_object_from_file(directory / 'file_a') repository.put_object_from_file(str(directory / 'file_a')) + + +def test_passes_to_batch(repository, monkeypatch): + """Checks that the single object operations call the batch operations""" + + def mock_batch_operation(self, keys): + raise NotImplementedError('this method was intentionally not implemented') + + monkeypatch.setattr(RepositoryBackend, 'has_objects', mock_batch_operation) + with pytest.raises(NotImplementedError) as execinfo: + repository.has_object('object_key') + assert str(execinfo.value) == 'this method was intentionally not implemented' + + monkeypatch.undo() + + monkeypatch.setattr(RepositoryBackend, 'delete_objects', mock_batch_operation) + with pytest.raises(NotImplementedError) as execinfo: + repository.delete_object('object_key') + assert str(execinfo.value) == 'this method was intentionally not implemented' + + +def test_delete_objects_test(repository, monkeypatch): + """Checks that the super of delete_objects will check for existence of the files""" + + def has_objects_mock(self, keys): # pylint: disable=unused-argument + return [False for key in keys] + + monkeypatch.setattr(RepositoryBackend, 'has_objects', has_objects_mock) + with pytest.raises(FileNotFoundError) as execinfo: + repository.delete_objects(['object_key']) + assert 'exist' in str(execinfo.value) + assert 'object_key' in str(execinfo.value) diff --git a/tests/repository/backend/test_disk_object_store.py b/tests/repository/backend/test_disk_object_store.py index 8d668fb0ce..4d3b943d4c 100644 --- a/tests/repository/backend/test_disk_object_store.py +++ b/tests/repository/backend/test_disk_object_store.py @@ -152,3 +152,25 @@ def test_get_object_hash(repository, generate_directory): key = repository.put_object_from_filelike(handle) assert repository.get_object_hash(key) == 'ed7002b439e9ac845f22357d822bac1444730fbdb6016d3ec9432297b9ec9f73' + + +def test_list_objects(repository, generate_directory): + """Test the ``Repository.delete_object`` method.""" + repository.initialise() + keylist = list() + + directory = generate_directory({'file_a': b'content a'}) + with open(directory / 'file_a', 'rb') as handle: + keylist.append(repository.put_object_from_filelike(handle)) + + directory = generate_directory({'file_b': b'content b'}) + with open(directory / 'file_b', 'rb') as handle: + keylist.append(repository.put_object_from_filelike(handle)) + + assert sorted(list(repository.list_objects())) == sorted(keylist) + + +def test_key_format(repository): + """Test the ``key_format`` property.""" + repository.initialise() + assert repository.key_format == repository.container.hash_type diff --git a/tests/repository/backend/test_sandbox.py b/tests/repository/backend/test_sandbox.py index 3ef0694c5b..6828c58549 100644 --- a/tests/repository/backend/test_sandbox.py +++ b/tests/repository/backend/test_sandbox.py @@ -152,3 +152,25 @@ def test_get_object_hash(repository, generate_directory): key = repository.put_object_from_filelike(handle) assert repository.get_object_hash(key) == 'ed7002b439e9ac845f22357d822bac1444730fbdb6016d3ec9432297b9ec9f73' + + +def test_list_objects(repository, generate_directory): + """Test the ``Repository.delete_object`` method.""" + repository.initialise() + keylist = list() + + directory = generate_directory({'file_a': b'content a'}) + with open(directory / 'file_a', 'rb') as handle: + keylist.append(repository.put_object_from_filelike(handle)) + + directory = generate_directory({'file_b': b'content b'}) + with open(directory / 'file_b', 'rb') as handle: + keylist.append(repository.put_object_from_filelike(handle)) + + assert sorted(list(repository.list_objects())) == sorted(keylist) + + +def test_key_format(repository): + """Test the ``key_format`` property.""" + repository.initialise() + assert repository.key_format == 'uuid4'