From a0543a0974f715c3a9fc1e847c64a142d0c4715d Mon Sep 17 00:00:00 2001 From: Francisco Ramirez Date: Wed, 1 Sep 2021 16:03:35 +0000 Subject: [PATCH 01/17] Add proposal for repository maintain functionality It would in principle consist of a single verdi command with one optional flag and one optional argument: > verdi repository maintain [--live] [--pass-down ] The base command will perform all the available and necessary maintainance operations but will require to lock down the profile in order to proceed (it will ask the user to confirm before). Optionally, the `--live` option can be passed so that only the maintainance operations that are safe to perform while actively using AiiDA will be applied. Finnally, the user can also `--pass-down` a backend specific string that will be interpreted by the backend method for even further customization of the process. --- aiida/cmdline/commands/cmd_storage.py | 62 +++++++- aiida/repository/backend/abstract.py | 17 ++ aiida/repository/backend/disk_object_store.py | 50 ++++++ aiida/repository/control.py | 81 ++++++++++ docs/source/reference/command_line.rst | 1 + tests/cmdline/commands/test_storage.py | 32 ++++ tests/repository/backend/test_abstract.py | 18 ++- .../backend/test_disk_object_store.py | 92 +++++++++++ tests/repository/test_control.py | 146 ++++++++++++++++++ 9 files changed, 493 insertions(+), 6 deletions(-) create mode 100644 aiida/repository/control.py create mode 100644 tests/repository/test_control.py diff --git a/aiida/cmdline/commands/cmd_storage.py b/aiida/cmdline/commands/cmd_storage.py index 58b7c96163..e41400dbcf 100644 --- a/aiida/cmdline/commands/cmd_storage.py +++ b/aiida/cmdline/commands/cmd_storage.py @@ -85,7 +85,67 @@ def storage_integrity(): def storage_info(statistics): """Summarise the contents of the storage.""" from aiida.cmdline.utils.common import get_database_summary + from aiida.repository.control import repository_info from aiida.orm import QueryBuilder - data = get_database_summary(QueryBuilder, statistics) + data = {} + data['database'] = get_database_summary(QueryBuilder, statistics) + data['repository'] = repository_info(statistics=statistics) + echo.echo_dictionary(data, sort_keys=False, fmt='yaml') + + +@verdi_storage.command('maintain') +@click.option( + '--full', + is_flag=True, + help='Performs maintenance tasks that are safe to run while other AiiDA instances are working with the profile.' +) +@click.option( + '--dry-run', + is_flag=True, + help='Performs maintenance tasks that are safe to run while other AiiDA instances are working with the profile.' +) +def storage_maintain(full, dry_run): + """Performs maintenance tasks on the repository.""" + from aiida.repository.control import get_repository_report, repository_maintain + + if dry_run and full: + echo.echo_critical('You cannot request both `--dry-run` and `--full` at the same time.') + + if dry_run: + maintainance_report = get_repository_report()['user_info'] + click.echo('Repository:') + click.echo(maintainance_report) + return + + if full: + + click.echo( + '\nIn order to safely perform the full maintenance operations on the internal storage, no other ' + 'process should be using the AiiDA profile being maintained. ' + 'This includes daemon workers, verdi shells, scripts with the profile loaded, etc). ' + 'Please make sure there is nothing like this currently running and that none is started until ' + 'these procedures conclude. ' + 'For performing maintanance operations that are safe to run while actively using AiiDA, just run ' + '`verdi storage maintain`, without the `--full` flag.\n' + ) + + else: + + click.echo( + '\nThis command will perform all maintenance operations on the internal storage that can be safely ' + 'executed while still running AiiDA. ' + 'However, not all operations that are required to fully optimize disk usage and future performance ' + 'can be done in this way. ' + 'Whenever you find the time or opportunity, please consider running `verdi repository maintenance ' + '--full` for a more complete optimization.\n' + ) + + if not click.confirm('Are you sure you want continue in this mode?'): + return + + maintainance_report = repository_maintain(full=full) + click.echo('\nMaintainance procedures finished:\n') + if 'user_info' in maintainance_report: + click.echo(maintainance_report['user_info']) diff --git a/aiida/repository/backend/abstract.py b/aiida/repository/backend/abstract.py index 7903cc60c6..30028d87d7 100644 --- a/aiida/repository/backend/abstract.py +++ b/aiida/repository/backend/abstract.py @@ -119,6 +119,23 @@ def list_objects(self) -> Iterable[str]: :return: An iterable for all the available object keys. """ + def get_info(self, statistics=False, **kwargs) -> dict: # pylint: disable=unused-argument,no-self-use + """Returns relevant information about the content of the repository. + + :return: a dictionary with the information. + """ + return {} + + def maintain(self, full: bool = False, **kwargs) -> dict: # pylint: disable=unused-argument,no-self-use + """Performs maintenance operations. + + :param full: + a flag to perform operations that require to stop using the maintained profile. + :return: + a dictionary with information on the operations performed. + """ + return {} + @contextlib.contextmanager def open(self, key: str) -> Iterator[BinaryIO]: """Open a file handle to an object stored under the given key. diff --git a/aiida/repository/backend/disk_object_store.py b/aiida/repository/backend/disk_object_store.py index 16b06d9dce..a71682cf48 100644 --- a/aiida/repository/backend/disk_object_store.py +++ b/aiida/repository/backend/disk_object_store.py @@ -118,3 +118,53 @@ def get_object_hash(self, key: str) -> str: if self.container.hash_type != 'sha256': return super().get_object_hash(key) return key + + def maintain(self, full: bool = False, **kwargs) -> dict: + # By default it will do only pack_loose if full=False or all operations + # if full=True. Then each can be overriden by the respective kwarg. + pack_loose = kwargs.get('override_pack_loose', True) + do_repack = kwargs.get('override_do_repack', full) + clean_storage = kwargs.get('override_clean_storage', full) + do_vacuum = kwargs.get('override_do_vacuum', full) + + operation_results = {} + + if pack_loose: + self.container.pack_all_loose() + operation_results['pack_loose'] = {'decription': 'Packed all loose files.'} + + if do_repack: + self.container.repack() + operation_results['repack'] = {'decription': 'Repacked all packages for optimal access.'} + + if clean_storage: + self.container.clean_storage(vacuum=do_vacuum) + operation_results['clean_storage'] = {'decription': 'Cleaned up the internal repo database.'} + + return operation_results + + def get_info(self, statistics=False, **kwargs) -> dict: + bytes_to_mb = 9.53674316E-7 + + output_info = {} + output_info['SHA-hash algorithm'] = self.container.hash_type + output_info['Compression algorithm'] = self.container.compression_algorithm + + if not statistics: + return output_info + + files_data = self.container.count_objects() + size_data = self.container.get_total_size() + + output_info['Packs'] = files_data['pack_files'] # type: ignore + + output_info['Objects'] = { # type: ignore + 'unpacked': files_data['loose'], + 'packed': files_data['packed'], + } + output_info['Size (MB)'] = { # type: ignore + 'unpacked': size_data['total_size_loose'] * bytes_to_mb, + 'packed': size_data['total_size_packfiles_on_disk'] * bytes_to_mb, + 'other': size_data['total_size_packindexes_on_disk'] * bytes_to_mb, + } + return output_info diff --git a/aiida/repository/control.py b/aiida/repository/control.py new file mode 100644 index 0000000000..eceefbc13b --- /dev/null +++ b/aiida/repository/control.py @@ -0,0 +1,81 @@ +# -*- coding: utf-8 -*- +########################################################################### +# Copyright (c), The AiiDA team. All rights reserved. # +# This file is part of the AiiDA code. # +# # +# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core # +# For further information on the license, see the LICENSE.txt file # +# For further information please visit http://www.aiida.net # +########################################################################### +"""Module for overall repository control commands.""" +# Note: these functions are not methods of `AbstractRepositoryBackend` because they need access to the orm. +# This is because they have to go through all the nodes to gather the list of keys that AiiDA is keeping +# track of (since they are descentralized in each node entry). +# See the get_unreferenced_keyset function +from aiida.manage.manager import get_manager +from aiida.orm.implementation import Backend + + +def repository_maintain(full: bool = False, backend: Backend = None, **kwargs) -> dict: + """Performs maintenance tasks on the repository.""" + maintainance_report = {'database': {}, 'repository': {}, 'user_info': ''} + + if backend is None: + backend = get_manager().get_backend() + repository_backend = backend.get_repository() + + unreferenced_objects = get_unreferenced_keyset(aiida_backend=backend) + + maintainance_report['repository']['unreferenced'] = len(unreferenced_objects) # type: ignore + maintainance_report['repository']['info'] = repository_backend.get_info() # type: ignore + + repository_backend.delete_objects(list(unreferenced_objects)) + + # Perform the maintainance operations in the repository + maintainance_report['repository']['maintain'] = repository_backend.maintain(full=full, **kwargs) # type: ignore + + return maintainance_report + + +def repository_info(statistics: bool = False, backend: Backend = None, **kwargs) -> dict: + """Returns relevant information on the repository.""" + if backend is None: + backend = get_manager().get_backend() + repository_backend = backend.get_repository() + return repository_backend.get_info(statistics) + + +def get_unreferenced_keyset(check_consistency: bool = True, aiida_backend: Backend = None, **kwargs) -> set: + """Returns the keyset of objects that exist in the repository but are not tracked by AiiDA. + + This should be all the soft-deleted files. + """ + from aiida import orm + + if aiida_backend is None: + aiida_backend = get_manager().get_backend() + + repository_backend = aiida_backend.get_repository() + + keyset_backend = set(repository_backend.list_objects()) + keyset_aiidadb = set(orm.Node.objects(aiida_backend).iter_repo_keys()) + + if check_consistency: + if len(keyset_aiidadb - keyset_backend) > 0: + raise RuntimeError('Database seems corrupted (some tracked objects are not in the repository). Aborting!') + + return keyset_backend - keyset_aiidadb + + +def get_repository_report(backend: Backend = None, **kwargs) -> dict: + """Performs a report on the status of the repository.""" + report = {'user_info': ''} + + if backend is None: + backend = get_manager().get_backend() + unreferenced_objects = get_unreferenced_keyset(aiida_backend=backend) + + num_unref = len(unreferenced_objects) + report['user_info'] += f' > Unreferenced files to delete: {num_unref}' + + return report diff --git a/docs/source/reference/command_line.rst b/docs/source/reference/command_line.rst index 9f2dc32d1a..4e3303a759 100644 --- a/docs/source/reference/command_line.rst +++ b/docs/source/reference/command_line.rst @@ -553,6 +553,7 @@ Below is a list with all available subcommands. Commands: info Summarise the contents of the storage. integrity Checks for the integrity of the data storage. + maintain Performs maintenance tasks on the repository. migrate Migrate the storage to the latest schema version. diff --git a/tests/cmdline/commands/test_storage.py b/tests/cmdline/commands/test_storage.py index da1371c18a..e130b9a4a5 100644 --- a/tests/cmdline/commands/test_storage.py +++ b/tests/cmdline/commands/test_storage.py @@ -103,3 +103,35 @@ def mocked_migrate(self): # pylint: disable=no-self-use assert result.exc_info[0] is SystemExit assert 'Critical:' in result.output assert 'passed error message' in result.output + + +def tests_storage_maintain_full_dry(run_cli_command): + """Test that ``verdi storage migrate`` detects the cancelling of the interactive prompt.""" + result = run_cli_command(cmd_storage.storage_maintain, options=['--full', '--dry-run'], raises=True) + assert '--dry-run' in result.output.lower() + assert '--full' in result.output.lower() + + +def tests_storage_maintain_text(run_cli_command, monkeypatch): + """Test all the information and cases of the storage maintain command.""" + from aiida.repository import control + + def reporter_mock(**kwargs): + return {'user_info': 'Underlying message'} + + monkeypatch.setattr(control, 'get_repository_report', reporter_mock) + monkeypatch.setattr(control, 'repository_maintain', reporter_mock) + + result = run_cli_command(cmd_storage.storage_maintain, options=['--dry-run']) + assert 'repository:' in result.output.lower() + assert 'Underlying message' in result.output + + result = run_cli_command(cmd_storage.storage_maintain, options=['--full'], user_input='Y') + assert 'no other process should be using the aiida profile' in result.output.lower() + assert 'finished' in result.output.lower() + assert 'Underlying message' in result.output + + result = run_cli_command(cmd_storage.storage_maintain, user_input='Y') + assert 'can be safely executed while still running aiida' in result.output.lower() + assert 'finished' in result.output.lower() + assert 'Underlying message' in result.output diff --git a/tests/repository/backend/test_abstract.py b/tests/repository/backend/test_abstract.py index 9fb177c0e7..23acae6217 100644 --- a/tests/repository/backend/test_abstract.py +++ b/tests/repository/backend/test_abstract.py @@ -14,11 +14,11 @@ class RepositoryBackend(AbstractRepositoryBackend): """Concrete implementation of ``AbstractRepositoryBackend``.""" @property - def uuid(self) -> Optional[str]: + def key_format(self) -> Optional[str]: return None @property - def key_format(self) -> Optional[str]: + def uuid(self) -> Optional[str]: return None def initialise(self, **kwargs) -> None: @@ -35,7 +35,7 @@ def _put_object_from_filelike(self, handle: BinaryIO) -> str: pass # pylint useless-super-delegation needs to be disabled here because it refuses to - # recognize that this is an abstract method and thus has to be overriden. See the + # recognize that this is an abstract method and thus has to be overwritten. See the # following issue: https://github.com/PyCQA/pylint/issues/1594 def delete_objects(self, keys: List[str]) -> None: # pylint: disable=useless-super-delegation super().delete_objects(keys) @@ -103,7 +103,7 @@ def test_put_object_from_file(repository, generate_directory): def test_passes_to_batch(repository, monkeypatch): - """Checks that the single object operations call the batch operations""" + """Checks that the single object operations call the batch operations.""" def mock_batch_operation(self, keys): raise NotImplementedError('this method was intentionally not implemented') @@ -122,7 +122,7 @@ def mock_batch_operation(self, keys): def test_delete_objects_test(repository, monkeypatch): - """Checks that the super of delete_objects will check for existence of the files""" + """Checks that the super of delete_objects will check for existence of the files.""" def has_objects_mock(self, keys): # pylint: disable=unused-argument return [False for key in keys] @@ -132,3 +132,11 @@ def has_objects_mock(self, keys): # pylint: disable=unused-argument repository.delete_objects(['object_key']) assert 'exist' in str(execinfo.value) assert 'object_key' in str(execinfo.value) + + +def test_default_methods(repository): + """Checks the behaviour of the default methods.""" + assert repository.get_info() == {} + assert repository.get_info(statistics=True) == {} + assert repository.maintain() == {} + assert repository.maintain(full=True) == {} diff --git a/tests/repository/backend/test_disk_object_store.py b/tests/repository/backend/test_disk_object_store.py index 113c55f4a9..4fa5c19a9e 100644 --- a/tests/repository/backend/test_disk_object_store.py +++ b/tests/repository/backend/test_disk_object_store.py @@ -21,6 +21,30 @@ def repository(tmp_path): yield DiskObjectStoreRepositoryBackend(container=container) +@pytest.fixture(scope='function') +def populated_repository(repository): + """Initializes the storage and database with minimal population.""" + from io import BytesIO + repository.initialise() + + content = BytesIO(b'Packed file number 1') + repository.put_object_from_filelike(content) + + content = BytesIO(b'Packed file number 2') + repository.put_object_from_filelike(content) + + repository.maintain(full=True) + + content = BytesIO(b'Packed file number 3 (also loose)') + repository.put_object_from_filelike(content) + + repository.maintain(full=False) + + content = BytesIO(b'Unpacked file') + repository.put_object_from_filelike(content) + yield repository + + def test_str(repository): """Test the ``__str__`` method.""" assert str(repository) @@ -184,3 +208,71 @@ def test_key_format(repository): """Test the ``key_format`` property.""" repository.initialise() assert repository.key_format == repository.container.hash_type + + +def test_get_info(populated_repository): + """Test the ``get_info`` method.""" + repository_info = populated_repository.get_info() + assert 'SHA-hash algorithm' in repository_info + assert 'Compression algorithm' in repository_info + assert repository_info['SHA-hash algorithm'] == 'sha256' + assert repository_info['Compression algorithm'] == 'zlib+1' + + repository_info = populated_repository.get_info(statistics=True) + assert 'SHA-hash algorithm' in repository_info + assert 'Compression algorithm' in repository_info + assert repository_info['SHA-hash algorithm'] == 'sha256' + assert repository_info['Compression algorithm'] == 'zlib+1' + + assert 'Packs' in repository_info + assert repository_info['Packs'] == 1 + + assert 'Objects' in repository_info + assert 'unpacked' in repository_info['Objects'] + assert 'packed' in repository_info['Objects'] + assert repository_info['Objects']['unpacked'] == 2 + assert repository_info['Objects']['packed'] == 3 + + assert 'Size (MB)' in repository_info + assert 'unpacked' in repository_info['Size (MB)'] + assert 'packed' in repository_info['Size (MB)'] + assert 'other' in repository_info['Size (MB)'] + + +#yapf: disable +@pytest.mark.parametrize(('kwargs', 'output_keys', 'output_info'), ( + ( + {'full': False}, + ['pack_loose'], + {'unpacked': 2, 'packed': 4} + ), + ( + {'full': True}, + ['pack_loose', 'repack', 'clean_storage'], + {'unpacked': 0, 'packed': 4} + ), + ( + { + 'full': True, + 'override_pack_loose': False, + 'override_do_repack': False, + 'override_clean_storage': False, + 'override_do_vacuum': False, + }, + [], + {'unpacked': 2, 'packed': 3} + ), +)) +# yapf: enable +def test_maintain(populated_repository, kwargs, output_keys, output_info): + """Test the ``maintain`` method.""" + maintainance_info = populated_repository.maintain(**kwargs) + repository_info = populated_repository.get_info(statistics=True) + + for output_key in output_keys: + assert output_key in maintainance_info + maintainance_info.pop(output_key) + assert len(maintainance_info) == 0 + + assert repository_info['Objects']['unpacked'] == output_info['unpacked'] + assert repository_info['Objects']['packed'] == output_info['packed'] diff --git a/tests/repository/test_control.py b/tests/repository/test_control.py new file mode 100644 index 0000000000..d46700bd7e --- /dev/null +++ b/tests/repository/test_control.py @@ -0,0 +1,146 @@ +# -*- coding: utf-8 -*- +########################################################################### +# Copyright (c), The AiiDA team. All rights reserved. # +# This file is part of the AiiDA code. # +# # +# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core # +# For further information on the license, see the LICENSE.txt file # +# For further information please visit http://www.aiida.net # +########################################################################### +"""Tests for the :mod:`aiida.repository.control` module.""" +import pytest + +from aiida.manage.manager import get_manager + + +class MockRepositoryBackend(): + """Mock of the RepositoryBackend for testing purposes.""" + + # pylint: disable=no-self-use + + def get_info(self, *args, **kwargs): + """Method to return information.""" + return 'this is information about the repo' + + def delete_objects(self, *args, **kwargs): + """Method to delete objects.""" + + def maintain(self, full=False, **kwargs): + """Method to perform maintainance operations.""" + text = 'this is a report on the maintenance procedure' + if full: + text += ' with the full flag on' + return text + + +@pytest.fixture(scope='function') +@pytest.mark.usefixtures('clear_database_before_test') +def clear_storage_before_test(): + """Clears the storage before a test.""" + #-----------------------------------------------------------------# + # For some reason `clear_database_before_test` is not cleaning out + # the nodes so I have to do it manually. + + from aiida import orm + from aiida.tools import delete_nodes + query = orm.QueryBuilder().append(orm.Node, project=['id']) + delete_nodes(query.all(flat=True), dry_run=False) + + # Also would be good to disable the logging in delete_nodes + #-----------------------------------------------------------------# + + repository = get_manager().get_backend().get_repository() + object_keys = list(repository.list_objects()) + repository.delete_objects(object_keys) + repository.maintain(full=True) + + +@pytest.mark.usefixtures('clear_database_before_test', 'clear_storage_before_test') +def test_get_unreferenced_keyset(): + """Test the ``get_unreferenced_keyset`` method.""" + # NOTE: This tests needs to use the database because there is a call inside + # `get_unreferenced_keyset` that would require to mock a very complex class + # structure: + # + # keyset_aiidadb = set(orm.Node.objects(aiida_backend).iter_repo_keys()) + # + # Ideally this should eventually be replaced by a more direct call to the + # method of a single object that would be easier to mock and would allow + # to unit-test this in isolation. + from io import BytesIO + + from aiida import orm + from aiida.repository.control import get_unreferenced_keyset + + # Coverage code pass + unreferenced_keyset = get_unreferenced_keyset() + assert unreferenced_keyset == set() + + # Error catching: put a file, get the keys from the aiida db, manually delete the keys + # in the repository + datanode = orm.FolderData() + datanode.put_object_from_filelike(BytesIO(b'File content'), 'file.txt') + datanode.store() + + aiida_backend = get_manager().get_backend() + keys = list(orm.Node.objects(aiida_backend).iter_repo_keys()) + + repository_backend = aiida_backend.get_repository() + repository_backend.delete_objects(keys) + + with pytest.raises(RuntimeError, match='Database seems corrupted') as exc: + get_unreferenced_keyset() + assert 'not in the repository' in str(exc.value) + + +def test_repository_maintain(monkeypatch): + """Test the ``repository_maintain`` method.""" + from aiida.repository import control + monkeypatch.setattr(control, 'get_unreferenced_keyset', lambda **kwargs: set([1, 2, 3])) + + BackendClass = get_manager().get_backend().__class__ # pylint: disable=invalid-name + monkeypatch.setattr(BackendClass, 'get_repository', lambda *args, **kwargs: MockRepositoryBackend()) + + final_data = control.repository_maintain() + assert 'database' in final_data + assert 'repository' in final_data + assert 'user_info' in final_data + + assert final_data['repository']['info'] == 'this is information about the repo' + assert final_data['repository']['maintain'] == 'this is a report on the maintenance procedure' + assert final_data['repository']['unreferenced'] == 3 + + final_data = control.repository_maintain(full=True) + assert final_data['repository']['maintain'] == 'this is a report on the maintenance procedure with the full flag on' + + +def test_repository_info(monkeypatch): + """Test the ``repository_info`` method.""" + from aiida.repository.control import repository_info + + def mock_get_info(self, statistics=False, **kwargs): # pylint: disable=unused-argument + text = 'Info from repo' + if statistics: + text += ' with statistics' + return text + + RepositoryClass = get_manager().get_backend().get_repository().__class__ # pylint: disable=invalid-name + monkeypatch.setattr(RepositoryClass, 'get_info', mock_get_info) + + repository_info_out = repository_info() + assert 'Info from repo' in repository_info_out + + repository_info_out = repository_info(statistics=True) + assert 'with statistics' in repository_info_out + + +def test_get_repository_report(monkeypatch): + """Test the ``get_repository_report`` method.""" + from aiida.repository import control + + monkeypatch.setattr(control, 'get_unreferenced_keyset', lambda **kwargs: set()) + + repository_report = control.get_repository_report() + + assert 'user_info' in repository_report + assert 'Unreferenced files to delete' in repository_report['user_info'] From 3bfc85636314bb75cfcc9fef61be4188ae8485f9 Mon Sep 17 00:00:00 2001 From: ramirezfranciscof Date: Tue, 30 Nov 2021 14:56:02 +0000 Subject: [PATCH 02/17] Apply PR corrections. --- aiida/cmdline/commands/cmd_storage.py | 17 +++--- aiida/repository/backend/disk_object_store.py | 57 +++++++++++++++---- aiida/repository/control.py | 23 ++++---- tests/repository/test_control.py | 6 +- 4 files changed, 72 insertions(+), 31 deletions(-) diff --git a/aiida/cmdline/commands/cmd_storage.py b/aiida/cmdline/commands/cmd_storage.py index e41400dbcf..10a5cdf2ec 100644 --- a/aiida/cmdline/commands/cmd_storage.py +++ b/aiida/cmdline/commands/cmd_storage.py @@ -99,12 +99,12 @@ def storage_info(statistics): @click.option( '--full', is_flag=True, - help='Performs maintenance tasks that are safe to run while other AiiDA instances are working with the profile.' + help='Perform all maintenance tasks, including the ones that should not be executed while the profile is in use.' ) @click.option( '--dry-run', is_flag=True, - help='Performs maintenance tasks that are safe to run while other AiiDA instances are working with the profile.' + help='Returns information that allows to estimate the impact of the maintenance operations.' ) def storage_maintain(full, dry_run): """Performs maintenance tasks on the repository.""" @@ -115,13 +115,13 @@ def storage_maintain(full, dry_run): if dry_run: maintainance_report = get_repository_report()['user_info'] - click.echo('Repository:') - click.echo(maintainance_report) + echo.echo('Repository:') + echo.echo(maintainance_report) return if full: - click.echo( + echo.echo_warning( '\nIn order to safely perform the full maintenance operations on the internal storage, no other ' 'process should be using the AiiDA profile being maintained. ' 'This includes daemon workers, verdi shells, scripts with the profile loaded, etc). ' @@ -133,7 +133,7 @@ def storage_maintain(full, dry_run): else: - click.echo( + echo.echo( '\nThis command will perform all maintenance operations on the internal storage that can be safely ' 'executed while still running AiiDA. ' 'However, not all operations that are required to fully optimize disk usage and future performance ' @@ -146,6 +146,5 @@ def storage_maintain(full, dry_run): return maintainance_report = repository_maintain(full=full) - click.echo('\nMaintainance procedures finished:\n') - if 'user_info' in maintainance_report: - click.echo(maintainance_report['user_info']) + echo.echo('\nMaintainance procedures finished:\n') + echo.echo(maintainance_report['user_info']) diff --git a/aiida/repository/backend/disk_object_store.py b/aiida/repository/backend/disk_object_store.py index a71682cf48..74574acb76 100644 --- a/aiida/repository/backend/disk_object_store.py +++ b/aiida/repository/backend/disk_object_store.py @@ -119,13 +119,45 @@ def get_object_hash(self, key: str) -> str: return super().get_object_hash(key) return key - def maintain(self, full: bool = False, **kwargs) -> dict: - # By default it will do only pack_loose if full=False or all operations - # if full=True. Then each can be overriden by the respective kwarg. - pack_loose = kwargs.get('override_pack_loose', True) - do_repack = kwargs.get('override_do_repack', full) - clean_storage = kwargs.get('override_clean_storage', full) - do_vacuum = kwargs.get('override_do_vacuum', full) + def maintain( # pylint: disable=arguments-differ + self, + full: bool = False, + override_pack_loose: bool = None, + override_do_repack: bool = None, + override_clean_storage: bool = None, + override_do_vacuum: bool = None, + ) -> dict: + """Performs maintenance operations. + + :param full: + a flag to perform operations that require to stop using the maintained profile. + :param override_pack_loose: + override flag for forcing the packing of loose files. + :param override_do_repack: + override flag for forcing the re-packing of already packed files. + :param override_clean_storage: + override flag for forcing the cleaning of soft-deleted files from the repository. + :param override_do_vacuum: + override flag for forcing the vacuuming of the internal database when cleaning the repository. + :return: + a dictionary with information on the operations performed. + """ + pack_loose = True + do_repack = full + clean_storage = full + do_vacuum = full + + if override_pack_loose is not None: + pack_loose = override_pack_loose + + if override_do_repack is not None: + do_repack = override_do_repack + + if override_clean_storage is not None: + clean_storage = override_clean_storage + + if override_do_vacuum is not None: + do_vacuum = override_do_vacuum operation_results = {} @@ -135,15 +167,20 @@ def maintain(self, full: bool = False, **kwargs) -> dict: if do_repack: self.container.repack() - operation_results['repack'] = {'decription': 'Repacked all packages for optimal access.'} + operation_results['repack'] = {'decription': 'Repacked all pack files to optimize file access.'} if clean_storage: self.container.clean_storage(vacuum=do_vacuum) - operation_results['clean_storage'] = {'decription': 'Cleaned up the internal repo database.'} + operation_results['clean_storage'] = { + 'decription': f'Cleaned up the internal repo database (with `vacuum={do_vacuum}`).' + } return operation_results - def get_info(self, statistics=False, **kwargs) -> dict: + def get_info( # pylint: disable=arguments-differ + self, + statistics=False, + ) -> dict: bytes_to_mb = 9.53674316E-7 output_info = {} diff --git a/aiida/repository/control.py b/aiida/repository/control.py index eceefbc13b..2c9d1fc9d8 100644 --- a/aiida/repository/control.py +++ b/aiida/repository/control.py @@ -22,17 +22,17 @@ def repository_maintain(full: bool = False, backend: Backend = None, **kwargs) - if backend is None: backend = get_manager().get_backend() - repository_backend = backend.get_repository() + repository = backend.get_repository() unreferenced_objects = get_unreferenced_keyset(aiida_backend=backend) maintainance_report['repository']['unreferenced'] = len(unreferenced_objects) # type: ignore - maintainance_report['repository']['info'] = repository_backend.get_info() # type: ignore + maintainance_report['repository']['info'] = repository.get_info() # type: ignore - repository_backend.delete_objects(list(unreferenced_objects)) + repository.delete_objects(list(unreferenced_objects)) # Perform the maintainance operations in the repository - maintainance_report['repository']['maintain'] = repository_backend.maintain(full=full, **kwargs) # type: ignore + maintainance_report['repository']['maintain'] = repository.maintain(full=full, **kwargs) # type: ignore return maintainance_report @@ -41,8 +41,8 @@ def repository_info(statistics: bool = False, backend: Backend = None, **kwargs) """Returns relevant information on the repository.""" if backend is None: backend = get_manager().get_backend() - repository_backend = backend.get_repository() - return repository_backend.get_info(statistics) + repository = backend.get_repository() + return repository.get_info(statistics) def get_unreferenced_keyset(check_consistency: bool = True, aiida_backend: Backend = None, **kwargs) -> set: @@ -55,14 +55,17 @@ def get_unreferenced_keyset(check_consistency: bool = True, aiida_backend: Backe if aiida_backend is None: aiida_backend = get_manager().get_backend() - repository_backend = aiida_backend.get_repository() + repository = aiida_backend.get_repository() - keyset_backend = set(repository_backend.list_objects()) + keyset_backend = set(repository.list_objects()) keyset_aiidadb = set(orm.Node.objects(aiida_backend).iter_repo_keys()) if check_consistency: - if len(keyset_aiidadb - keyset_backend) > 0: - raise RuntimeError('Database seems corrupted (some tracked objects are not in the repository). Aborting!') + keyset_missing = keyset_aiidadb - keyset_backend + if len(keyset_missing) > 0: + raise RuntimeError( + 'There are objects referenced in the database that are not present in the repository. Aborting!' + ) return keyset_backend - keyset_aiidadb diff --git a/tests/repository/test_control.py b/tests/repository/test_control.py index d46700bd7e..77ffacc614 100644 --- a/tests/repository/test_control.py +++ b/tests/repository/test_control.py @@ -88,9 +88,11 @@ def test_get_unreferenced_keyset(): repository_backend = aiida_backend.get_repository() repository_backend.delete_objects(keys) - with pytest.raises(RuntimeError, match='Database seems corrupted') as exc: + with pytest.raises( + RuntimeError, match='There are objects referenced in the database that are not present in the repository' + ) as exc: get_unreferenced_keyset() - assert 'not in the repository' in str(exc.value) + assert 'aborting' in str(exc.value).lower() def test_repository_maintain(monkeypatch): From 5ee2fa6bce869a095d6f21cf560fde1338964298 Mon Sep 17 00:00:00 2001 From: ramirezfranciscof Date: Tue, 30 Nov 2021 15:56:30 +0000 Subject: [PATCH 03/17] Apply PR corrections - moving control from storage to backend --- aiida/{repository => backends}/control.py | 0 aiida/cmdline/commands/cmd_storage.py | 4 ++-- tests/{repository => backends}/test_control.py | 10 +++++----- tests/cmdline/commands/test_storage.py | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) rename aiida/{repository => backends}/control.py (100%) rename tests/{repository => backends}/test_control.py (95%) diff --git a/aiida/repository/control.py b/aiida/backends/control.py similarity index 100% rename from aiida/repository/control.py rename to aiida/backends/control.py diff --git a/aiida/cmdline/commands/cmd_storage.py b/aiida/cmdline/commands/cmd_storage.py index 10a5cdf2ec..93c58bbbe5 100644 --- a/aiida/cmdline/commands/cmd_storage.py +++ b/aiida/cmdline/commands/cmd_storage.py @@ -85,7 +85,7 @@ def storage_integrity(): def storage_info(statistics): """Summarise the contents of the storage.""" from aiida.cmdline.utils.common import get_database_summary - from aiida.repository.control import repository_info + from aiida.backends.control import repository_info from aiida.orm import QueryBuilder data = {} @@ -108,7 +108,7 @@ def storage_info(statistics): ) def storage_maintain(full, dry_run): """Performs maintenance tasks on the repository.""" - from aiida.repository.control import get_repository_report, repository_maintain + from aiida.backends.control import get_repository_report, repository_maintain if dry_run and full: echo.echo_critical('You cannot request both `--dry-run` and `--full` at the same time.') diff --git a/tests/repository/test_control.py b/tests/backends/test_control.py similarity index 95% rename from tests/repository/test_control.py rename to tests/backends/test_control.py index 77ffacc614..886ec66273 100644 --- a/tests/repository/test_control.py +++ b/tests/backends/test_control.py @@ -7,7 +7,7 @@ # For further information on the license, see the LICENSE.txt file # # For further information please visit http://www.aiida.net # ########################################################################### -"""Tests for the :mod:`aiida.repository.control` module.""" +"""Tests for the :mod:`aiida.backends.control` module.""" import pytest from aiida.manage.manager import get_manager @@ -70,7 +70,7 @@ def test_get_unreferenced_keyset(): from io import BytesIO from aiida import orm - from aiida.repository.control import get_unreferenced_keyset + from aiida.backends.control import get_unreferenced_keyset # Coverage code pass unreferenced_keyset = get_unreferenced_keyset() @@ -97,7 +97,7 @@ def test_get_unreferenced_keyset(): def test_repository_maintain(monkeypatch): """Test the ``repository_maintain`` method.""" - from aiida.repository import control + from aiida.backends import control monkeypatch.setattr(control, 'get_unreferenced_keyset', lambda **kwargs: set([1, 2, 3])) BackendClass = get_manager().get_backend().__class__ # pylint: disable=invalid-name @@ -118,7 +118,7 @@ def test_repository_maintain(monkeypatch): def test_repository_info(monkeypatch): """Test the ``repository_info`` method.""" - from aiida.repository.control import repository_info + from aiida.backends.control import repository_info def mock_get_info(self, statistics=False, **kwargs): # pylint: disable=unused-argument text = 'Info from repo' @@ -138,7 +138,7 @@ def mock_get_info(self, statistics=False, **kwargs): # pylint: disable=unused-a def test_get_repository_report(monkeypatch): """Test the ``get_repository_report`` method.""" - from aiida.repository import control + from aiida.backends import control monkeypatch.setattr(control, 'get_unreferenced_keyset', lambda **kwargs: set()) diff --git a/tests/cmdline/commands/test_storage.py b/tests/cmdline/commands/test_storage.py index e130b9a4a5..2b01738258 100644 --- a/tests/cmdline/commands/test_storage.py +++ b/tests/cmdline/commands/test_storage.py @@ -114,7 +114,7 @@ def tests_storage_maintain_full_dry(run_cli_command): def tests_storage_maintain_text(run_cli_command, monkeypatch): """Test all the information and cases of the storage maintain command.""" - from aiida.repository import control + from aiida.backends import control def reporter_mock(**kwargs): return {'user_info': 'Underlying message'} From 2784998797531e4910fa86ce8852f55cbb7934d6 Mon Sep 17 00:00:00 2001 From: ramirezfranciscof Date: Wed, 1 Dec 2021 11:12:12 +0000 Subject: [PATCH 04/17] Apply PR corrections after rebase --- .pre-commit-config.yaml | 1 + aiida/backends/control.py | 12 +++++++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e4d79f9b65..0f1de34b84 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -70,6 +70,7 @@ repos: pass_filenames: true files: >- (?x)^( + aiida/backends/control.py| aiida/common/progress_reporter.py| aiida/engine/.*py| aiida/manage/manager.py| diff --git a/aiida/backends/control.py b/aiida/backends/control.py index 2c9d1fc9d8..c3c716bbb6 100644 --- a/aiida/backends/control.py +++ b/aiida/backends/control.py @@ -12,11 +12,13 @@ # This is because they have to go through all the nodes to gather the list of keys that AiiDA is keeping # track of (since they are descentralized in each node entry). # See the get_unreferenced_keyset function +from typing import Optional, Set + from aiida.manage.manager import get_manager from aiida.orm.implementation import Backend -def repository_maintain(full: bool = False, backend: Backend = None, **kwargs) -> dict: +def repository_maintain(full: bool = False, backend: Optional[Backend] = None, **kwargs) -> dict: """Performs maintenance tasks on the repository.""" maintainance_report = {'database': {}, 'repository': {}, 'user_info': ''} @@ -37,7 +39,7 @@ def repository_maintain(full: bool = False, backend: Backend = None, **kwargs) - return maintainance_report -def repository_info(statistics: bool = False, backend: Backend = None, **kwargs) -> dict: +def repository_info(statistics: bool = False, backend: Optional[Backend] = None) -> dict: """Returns relevant information on the repository.""" if backend is None: backend = get_manager().get_backend() @@ -45,7 +47,7 @@ def repository_info(statistics: bool = False, backend: Backend = None, **kwargs) return repository.get_info(statistics) -def get_unreferenced_keyset(check_consistency: bool = True, aiida_backend: Backend = None, **kwargs) -> set: +def get_unreferenced_keyset(check_consistency: bool = True, aiida_backend: Optional[Backend] = None) -> Set[str]: """Returns the keyset of objects that exist in the repository but are not tracked by AiiDA. This should be all the soft-deleted files. @@ -70,7 +72,7 @@ def get_unreferenced_keyset(check_consistency: bool = True, aiida_backend: Backe return keyset_backend - keyset_aiidadb -def get_repository_report(backend: Backend = None, **kwargs) -> dict: +def get_repository_report(backend: Optional[Backend] = None) -> dict: """Performs a report on the status of the repository.""" report = {'user_info': ''} @@ -79,6 +81,6 @@ def get_repository_report(backend: Backend = None, **kwargs) -> dict: unreferenced_objects = get_unreferenced_keyset(aiida_backend=backend) num_unref = len(unreferenced_objects) - report['user_info'] += f' > Unreferenced files to delete: {num_unref}' + report['user_info'] += f'Unreferenced files to delete: {num_unref}' return report From 766709db662d8bd04b5098a4d8202bf0d550efaf Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 1 Dec 2021 11:15:36 +0000 Subject: [PATCH 05/17] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- aiida/cmdline/commands/cmd_storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiida/cmdline/commands/cmd_storage.py b/aiida/cmdline/commands/cmd_storage.py index 93c58bbbe5..4873164a9a 100644 --- a/aiida/cmdline/commands/cmd_storage.py +++ b/aiida/cmdline/commands/cmd_storage.py @@ -84,8 +84,8 @@ def storage_integrity(): @click.option('--statistics', is_flag=True, help='Provides more in-detail statistically relevant data.') def storage_info(statistics): """Summarise the contents of the storage.""" - from aiida.cmdline.utils.common import get_database_summary from aiida.backends.control import repository_info + from aiida.cmdline.utils.common import get_database_summary from aiida.orm import QueryBuilder data = {} From df3bfd41149eee1acc8225e6540c06ec4ee0cb5b Mon Sep 17 00:00:00 2001 From: ramirezfranciscof Date: Wed, 1 Dec 2021 11:35:40 +0000 Subject: [PATCH 06/17] Add corrections from PR - method naming --- aiida/backends/control.py | 2 +- aiida/cmdline/commands/cmd_storage.py | 4 ++-- tests/backends/test_control.py | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/aiida/backends/control.py b/aiida/backends/control.py index c3c716bbb6..a1a18ea6b9 100644 --- a/aiida/backends/control.py +++ b/aiida/backends/control.py @@ -39,7 +39,7 @@ def repository_maintain(full: bool = False, backend: Optional[Backend] = None, * return maintainance_report -def repository_info(statistics: bool = False, backend: Optional[Backend] = None) -> dict: +def get_repository_info(statistics: bool = False, backend: Optional[Backend] = None) -> dict: """Returns relevant information on the repository.""" if backend is None: backend = get_manager().get_backend() diff --git a/aiida/cmdline/commands/cmd_storage.py b/aiida/cmdline/commands/cmd_storage.py index 4873164a9a..4fc25b5fc4 100644 --- a/aiida/cmdline/commands/cmd_storage.py +++ b/aiida/cmdline/commands/cmd_storage.py @@ -84,13 +84,13 @@ def storage_integrity(): @click.option('--statistics', is_flag=True, help='Provides more in-detail statistically relevant data.') def storage_info(statistics): """Summarise the contents of the storage.""" - from aiida.backends.control import repository_info + from aiida.backends.control import get_repository_info from aiida.cmdline.utils.common import get_database_summary from aiida.orm import QueryBuilder data = {} data['database'] = get_database_summary(QueryBuilder, statistics) - data['repository'] = repository_info(statistics=statistics) + data['repository'] = get_repository_info(statistics=statistics) echo.echo_dictionary(data, sort_keys=False, fmt='yaml') diff --git a/tests/backends/test_control.py b/tests/backends/test_control.py index 886ec66273..c6d6847324 100644 --- a/tests/backends/test_control.py +++ b/tests/backends/test_control.py @@ -118,7 +118,7 @@ def test_repository_maintain(monkeypatch): def test_repository_info(monkeypatch): """Test the ``repository_info`` method.""" - from aiida.backends.control import repository_info + from aiida.backends.control import get_repository_info def mock_get_info(self, statistics=False, **kwargs): # pylint: disable=unused-argument text = 'Info from repo' @@ -129,10 +129,10 @@ def mock_get_info(self, statistics=False, **kwargs): # pylint: disable=unused-a RepositoryClass = get_manager().get_backend().get_repository().__class__ # pylint: disable=invalid-name monkeypatch.setattr(RepositoryClass, 'get_info', mock_get_info) - repository_info_out = repository_info() + repository_info_out = get_repository_info() assert 'Info from repo' in repository_info_out - repository_info_out = repository_info(statistics=True) + repository_info_out = get_repository_info(statistics=True) assert 'with statistics' in repository_info_out From 5b5b38ed0404213bf13b61ba799a2e6b6613851d Mon Sep 17 00:00:00 2001 From: ramirezfranciscof Date: Wed, 1 Dec 2021 13:52:42 +0000 Subject: [PATCH 07/17] Apply PR corrections - Method defaults --- aiida/backends/control.py | 17 ++++++++++++++--- aiida/repository/backend/abstract.py | 4 ++-- tests/backends/test_control.py | 12 ------------ tests/repository/backend/test_abstract.py | 16 ++++++++++++---- 4 files changed, 28 insertions(+), 21 deletions(-) diff --git a/aiida/backends/control.py b/aiida/backends/control.py index a1a18ea6b9..980a8e957a 100644 --- a/aiida/backends/control.py +++ b/aiida/backends/control.py @@ -29,12 +29,19 @@ def repository_maintain(full: bool = False, backend: Optional[Backend] = None, * unreferenced_objects = get_unreferenced_keyset(aiida_backend=backend) maintainance_report['repository']['unreferenced'] = len(unreferenced_objects) # type: ignore - maintainance_report['repository']['info'] = repository.get_info() # type: ignore + + try: + maintainance_report['repository']['info'] = repository.get_info() # type: ignore + except NotImplementedError: + maintainance_report['repository']['info'] = {} # type: ignore repository.delete_objects(list(unreferenced_objects)) # Perform the maintainance operations in the repository - maintainance_report['repository']['maintain'] = repository.maintain(full=full, **kwargs) # type: ignore + try: + maintainance_report['repository']['maintain'] = repository.maintain(full=full, **kwargs) # type: ignore + except NotImplementedError: + maintainance_report['repository']['maintain'] = {} # type: ignore return maintainance_report @@ -44,7 +51,11 @@ def get_repository_info(statistics: bool = False, backend: Optional[Backend] = N if backend is None: backend = get_manager().get_backend() repository = backend.get_repository() - return repository.get_info(statistics) + try: + output_info = repository.get_info(statistics) + except NotImplementedError: + output_info = {} + return output_info def get_unreferenced_keyset(check_consistency: bool = True, aiida_backend: Optional[Backend] = None) -> Set[str]: diff --git a/aiida/repository/backend/abstract.py b/aiida/repository/backend/abstract.py index 30028d87d7..45845a41aa 100644 --- a/aiida/repository/backend/abstract.py +++ b/aiida/repository/backend/abstract.py @@ -124,7 +124,7 @@ def get_info(self, statistics=False, **kwargs) -> dict: # pylint: disable=unuse :return: a dictionary with the information. """ - return {} + raise NotImplementedError def maintain(self, full: bool = False, **kwargs) -> dict: # pylint: disable=unused-argument,no-self-use """Performs maintenance operations. @@ -134,7 +134,7 @@ def maintain(self, full: bool = False, **kwargs) -> dict: # pylint: disable=unu :return: a dictionary with information on the operations performed. """ - return {} + raise NotImplementedError @contextlib.contextmanager def open(self, key: str) -> Iterator[BinaryIO]: diff --git a/tests/backends/test_control.py b/tests/backends/test_control.py index c6d6847324..85e592fb7f 100644 --- a/tests/backends/test_control.py +++ b/tests/backends/test_control.py @@ -37,18 +37,6 @@ def maintain(self, full=False, **kwargs): @pytest.mark.usefixtures('clear_database_before_test') def clear_storage_before_test(): """Clears the storage before a test.""" - #-----------------------------------------------------------------# - # For some reason `clear_database_before_test` is not cleaning out - # the nodes so I have to do it manually. - - from aiida import orm - from aiida.tools import delete_nodes - query = orm.QueryBuilder().append(orm.Node, project=['id']) - delete_nodes(query.all(flat=True), dry_run=False) - - # Also would be good to disable the logging in delete_nodes - #-----------------------------------------------------------------# - repository = get_manager().get_backend().get_repository() object_keys = list(repository.list_objects()) repository.delete_objects(object_keys) diff --git a/tests/repository/backend/test_abstract.py b/tests/repository/backend/test_abstract.py index 23acae6217..f4ca8193f1 100644 --- a/tests/repository/backend/test_abstract.py +++ b/tests/repository/backend/test_abstract.py @@ -136,7 +136,15 @@ def has_objects_mock(self, keys): # pylint: disable=unused-argument def test_default_methods(repository): """Checks the behaviour of the default methods.""" - assert repository.get_info() == {} - assert repository.get_info(statistics=True) == {} - assert repository.maintain() == {} - assert repository.maintain(full=True) == {} + + with pytest.raises(NotImplementedError): + repository.get_info() + + with pytest.raises(NotImplementedError): + repository.get_info(statistics=True) + + with pytest.raises(NotImplementedError): + repository.maintain() + + with pytest.raises(NotImplementedError): + repository.maintain(full=True) From a4ec0d1e1f15ed46a8880f87172cfe82dc4195d9 Mon Sep 17 00:00:00 2001 From: ramirezfranciscof Date: Wed, 1 Dec 2021 16:18:44 +0000 Subject: [PATCH 08/17] Fix test problems --- tests/backends/test_control.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/backends/test_control.py b/tests/backends/test_control.py index 85e592fb7f..643cb6ece4 100644 --- a/tests/backends/test_control.py +++ b/tests/backends/test_control.py @@ -34,8 +34,7 @@ def maintain(self, full=False, **kwargs): @pytest.fixture(scope='function') -@pytest.mark.usefixtures('clear_database_before_test') -def clear_storage_before_test(): +def clear_storage_before_test(clear_database_before_test): # pylint: disable=unused-argument """Clears the storage before a test.""" repository = get_manager().get_backend().get_repository() object_keys = list(repository.list_objects()) @@ -43,7 +42,7 @@ def clear_storage_before_test(): repository.maintain(full=True) -@pytest.mark.usefixtures('clear_database_before_test', 'clear_storage_before_test') +@pytest.mark.usefixtures('clear_storage_before_test') def test_get_unreferenced_keyset(): """Test the ``get_unreferenced_keyset`` method.""" # NOTE: This tests needs to use the database because there is a call inside From 9fa51eaf212fd48c905fe62685bf33a962fd6bee Mon Sep 17 00:00:00 2001 From: ramirezfranciscof Date: Mon, 6 Dec 2021 11:33:11 +0000 Subject: [PATCH 09/17] Modifications due to PR review - Added a logger for immediate info / feedback - Modified returned variables from the methods to be more "machine readable" (dicts). --- aiida/backends/control.py | 62 +++++++++---------- aiida/cmdline/commands/cmd_storage.py | 12 ++-- aiida/repository/backend/disk_object_store.py | 14 +++-- .../backend/test_disk_object_store.py | 47 +++++++++++--- 4 files changed, 86 insertions(+), 49 deletions(-) diff --git a/aiida/backends/control.py b/aiida/backends/control.py index 980a8e957a..d8adf2dbc6 100644 --- a/aiida/backends/control.py +++ b/aiida/backends/control.py @@ -14,56 +14,44 @@ # See the get_unreferenced_keyset function from typing import Optional, Set +from aiida.common.log import AIIDA_LOGGER from aiida.manage.manager import get_manager from aiida.orm.implementation import Backend +__all__ = ('MAINTAIN_LOGGER',) + +MAINTAIN_LOGGER = AIIDA_LOGGER.getChild('maintain') + def repository_maintain(full: bool = False, backend: Optional[Backend] = None, **kwargs) -> dict: """Performs maintenance tasks on the repository.""" - maintainance_report = {'database': {}, 'repository': {}, 'user_info': ''} + maintainance_report = {'database': {}, 'repository': {}} # type: ignore if backend is None: backend = get_manager().get_backend() repository = backend.get_repository() unreferenced_objects = get_unreferenced_keyset(aiida_backend=backend) - - maintainance_report['repository']['unreferenced'] = len(unreferenced_objects) # type: ignore - - try: - maintainance_report['repository']['info'] = repository.get_info() # type: ignore - except NotImplementedError: - maintainance_report['repository']['info'] = {} # type: ignore - + MAINTAIN_LOGGER.info('Deleting unreferenced objects ...') repository.delete_objects(list(unreferenced_objects)) - + maintainance_report['repository']['unreferenced'] = {'files deleted': len(unreferenced_objects)} # Perform the maintainance operations in the repository + MAINTAIN_LOGGER.info('Performing repository-specific maintenance ...') try: - maintainance_report['repository']['maintain'] = repository.maintain(full=full, **kwargs) # type: ignore + maintainance_report['repository']['maintain'] = repository.maintain(full=full, **kwargs) except NotImplementedError: - maintainance_report['repository']['maintain'] = {} # type: ignore + maintainance_report['repository']['maintain'] = {} return maintainance_report -def get_repository_info(statistics: bool = False, backend: Optional[Backend] = None) -> dict: - """Returns relevant information on the repository.""" - if backend is None: - backend = get_manager().get_backend() - repository = backend.get_repository() - try: - output_info = repository.get_info(statistics) - except NotImplementedError: - output_info = {} - return output_info - - def get_unreferenced_keyset(check_consistency: bool = True, aiida_backend: Optional[Backend] = None) -> Set[str]: """Returns the keyset of objects that exist in the repository but are not tracked by AiiDA. This should be all the soft-deleted files. """ from aiida import orm + MAINTAIN_LOGGER.info('Obtaining unreferenced object keys ...') if aiida_backend is None: aiida_backend = get_manager().get_backend() @@ -83,15 +71,27 @@ def get_unreferenced_keyset(check_consistency: bool = True, aiida_backend: Optio return keyset_backend - keyset_aiidadb -def get_repository_report(backend: Optional[Backend] = None) -> dict: - """Performs a report on the status of the repository.""" - report = {'user_info': ''} +def get_repository_info(statistics: bool = False, backend: Optional[Backend] = None) -> dict: + """Returns general information on the repository.""" + if backend is None: + backend = get_manager().get_backend() + repository = backend.get_repository() + try: + output_info = repository.get_info(statistics) + except NotImplementedError: + output_info = {} + + return output_info + + +def get_repository_report(backend: Optional[Backend] = None) -> dict: + """Returns information specifically related to the maintenance needs of the repository.""" if backend is None: backend = get_manager().get_backend() - unreferenced_objects = get_unreferenced_keyset(aiida_backend=backend) - num_unref = len(unreferenced_objects) - report['user_info'] += f'Unreferenced files to delete: {num_unref}' + report_items = {} + unreferenced_objects = get_unreferenced_keyset(aiida_backend=backend) + report_items['Unreferenced files to delete'] = len(unreferenced_objects) - return report + return report_items diff --git a/aiida/cmdline/commands/cmd_storage.py b/aiida/cmdline/commands/cmd_storage.py index 4fc25b5fc4..f78aca7fa7 100644 --- a/aiida/cmdline/commands/cmd_storage.py +++ b/aiida/cmdline/commands/cmd_storage.py @@ -114,9 +114,9 @@ def storage_maintain(full, dry_run): echo.echo_critical('You cannot request both `--dry-run` and `--full` at the same time.') if dry_run: - maintainance_report = get_repository_report()['user_info'] - echo.echo('Repository:') - echo.echo(maintainance_report) + maintainance_report = get_repository_report() + echo.echo('\nReport on storage maintainance status:\n') + echo.echo_dictionary(maintainance_report, sort_keys=False, fmt='yaml') return if full: @@ -146,5 +146,7 @@ def storage_maintain(full, dry_run): return maintainance_report = repository_maintain(full=full) - echo.echo('\nMaintainance procedures finished:\n') - echo.echo(maintainance_report['user_info']) + echo.echo('\nMaintainance procedures finished.') + if len(maintainance_report) > 0: + echo.echo('\nMaintainance report:\n') + echo.echo_dictionary(maintainance_report, sort_keys=False, fmt='yaml') diff --git a/aiida/repository/backend/disk_object_store.py b/aiida/repository/backend/disk_object_store.py index 74574acb76..f5d13f1b22 100644 --- a/aiida/repository/backend/disk_object_store.py +++ b/aiida/repository/backend/disk_object_store.py @@ -142,6 +142,9 @@ def maintain( # pylint: disable=arguments-differ :return: a dictionary with information on the operations performed. """ + from aiida.backends.control import MAINTAIN_LOGGER + DOSTORE_LOGGER = MAINTAIN_LOGGER.getChild('disk_object_store') # pylint: disable=invalid-name + pack_loose = True do_repack = full clean_storage = full @@ -162,18 +165,19 @@ def maintain( # pylint: disable=arguments-differ operation_results = {} if pack_loose: + DOSTORE_LOGGER.info('Packing all loose files ...') self.container.pack_all_loose() - operation_results['pack_loose'] = {'decription': 'Packed all loose files.'} + operation_results['packing'] = 'All loose files have been packed.' if do_repack: + DOSTORE_LOGGER.info('Re-packing all pack files ...') self.container.repack() - operation_results['repack'] = {'decription': 'Repacked all pack files to optimize file access.'} + operation_results['repacking'] = 'All pack files have been re-packed.' if clean_storage: + DOSTORE_LOGGER.info(f'Cleaning the repository database (with `vacuum={do_vacuum}`) ...') self.container.clean_storage(vacuum=do_vacuum) - operation_results['clean_storage'] = { - 'decription': f'Cleaned up the internal repo database (with `vacuum={do_vacuum}`).' - } + operation_results['cleaning'] = f'The repository database has been cleaned (with `vacuum={do_vacuum}`).' return operation_results diff --git a/tests/repository/backend/test_disk_object_store.py b/tests/repository/backend/test_disk_object_store.py index 4fa5c19a9e..807e839ff3 100644 --- a/tests/repository/backend/test_disk_object_store.py +++ b/tests/repository/backend/test_disk_object_store.py @@ -243,12 +243,17 @@ def test_get_info(populated_repository): @pytest.mark.parametrize(('kwargs', 'output_keys', 'output_info'), ( ( {'full': False}, - ['pack_loose'], + ['packing'], {'unpacked': 2, 'packed': 4} ), ( {'full': True}, - ['pack_loose', 'repack', 'clean_storage'], + ['packing', 'repacking', 'cleaning'], + {'unpacked': 0, 'packed': 4} + ), + ( + {'full': True, 'override_do_vacuum': False}, + ['packing', 'repacking', 'cleaning'], {'unpacked': 0, 'packed': 4} ), ( @@ -267,12 +272,38 @@ def test_get_info(populated_repository): def test_maintain(populated_repository, kwargs, output_keys, output_info): """Test the ``maintain`` method.""" maintainance_info = populated_repository.maintain(**kwargs) - repository_info = populated_repository.get_info(statistics=True) - - for output_key in output_keys: - assert output_key in maintainance_info - maintainance_info.pop(output_key) - assert len(maintainance_info) == 0 + print(maintainance_info) + assert len(maintainance_info) == len(output_keys) + for keyword in output_keys: + assert keyword in maintainance_info + repository_info = populated_repository.get_info(statistics=True) assert repository_info['Objects']['unpacked'] == output_info['unpacked'] assert repository_info['Objects']['packed'] == output_info['packed'] + + +@pytest.mark.parametrize('do_vacuum', [True, False]) +def test_maintain_logging(caplog, populated_repository, do_vacuum): + """Test the logging of the ``maintain`` method.""" + import logging + + from aiida.backends.control import MAINTAIN_LOGGER + + MAINTAIN_LOGGER.setLevel(logging.INFO) + maintainance_info = populated_repository.maintain(full=True, override_do_vacuum=do_vacuum) + assert len(maintainance_info) == 3 + + list_of_logmsg = [] + for record in caplog.records: + assert record.levelname == 'INFO' + assert record.name == 'aiida.maintain.disk_object_store' + list_of_logmsg.append(record.msg) + + assert 'packing' in list_of_logmsg[0].lower() + assert 're-packing' in list_of_logmsg[1].lower() + assert 'cleaning' in list_of_logmsg[2].lower() + + if do_vacuum: + assert 'vacuum=true' in list_of_logmsg[2].lower() + else: + assert 'vacuum=false' in list_of_logmsg[2].lower() From fd071a996148cae6fe213f0c3bb8a802953403fd Mon Sep 17 00:00:00 2001 From: ramirezfranciscof Date: Tue, 7 Dec 2021 08:01:09 +0000 Subject: [PATCH 10/17] Fixing auto generation --- aiida/backends/__init__.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/aiida/backends/__init__.py b/aiida/backends/__init__.py index 81095dac98..e138a9beb7 100644 --- a/aiida/backends/__init__.py +++ b/aiida/backends/__init__.py @@ -9,6 +9,21 @@ ########################################################################### """Module for implementations of database backends.""" +# AUTO-GENERATED + +# yapf: disable +# pylint: disable=wildcard-import + +from .control import * + +__all__ = ( + 'MAINTAIN_LOGGER', +) + +# yapf: enable + +# END AUTO-GENERATED + BACKEND_DJANGO = 'django' BACKEND_SQLA = 'sqlalchemy' From aa5908959a6ddbde52aaf09a7073045fdabb1de3 Mon Sep 17 00:00:00 2001 From: ramirezfranciscof Date: Tue, 7 Dec 2021 08:59:41 +0000 Subject: [PATCH 11/17] Re-adapt cli tests --- aiida/backends/control.py | 2 +- aiida/cmdline/commands/cmd_storage.py | 2 +- tests/backends/test_control.py | 18 ++++++++++-------- tests/cmdline/commands/test_storage.py | 11 +++++++---- 4 files changed, 19 insertions(+), 14 deletions(-) diff --git a/aiida/backends/control.py b/aiida/backends/control.py index d8adf2dbc6..03abd2e279 100644 --- a/aiida/backends/control.py +++ b/aiida/backends/control.py @@ -34,7 +34,7 @@ def repository_maintain(full: bool = False, backend: Optional[Backend] = None, * unreferenced_objects = get_unreferenced_keyset(aiida_backend=backend) MAINTAIN_LOGGER.info('Deleting unreferenced objects ...') repository.delete_objects(list(unreferenced_objects)) - maintainance_report['repository']['unreferenced'] = {'files deleted': len(unreferenced_objects)} + maintainance_report['repository']['unreferenced'] = {'deleted_files': len(unreferenced_objects)} # Perform the maintainance operations in the repository MAINTAIN_LOGGER.info('Performing repository-specific maintenance ...') try: diff --git a/aiida/cmdline/commands/cmd_storage.py b/aiida/cmdline/commands/cmd_storage.py index f78aca7fa7..069e85f168 100644 --- a/aiida/cmdline/commands/cmd_storage.py +++ b/aiida/cmdline/commands/cmd_storage.py @@ -114,7 +114,7 @@ def storage_maintain(full, dry_run): echo.echo_critical('You cannot request both `--dry-run` and `--full` at the same time.') if dry_run: - maintainance_report = get_repository_report() + maintainance_report = {'repository': get_repository_report()} echo.echo('\nReport on storage maintainance status:\n') echo.echo_dictionary(maintainance_report, sort_keys=False, fmt='yaml') return diff --git a/tests/backends/test_control.py b/tests/backends/test_control.py index 643cb6ece4..ef3539f65c 100644 --- a/tests/backends/test_control.py +++ b/tests/backends/test_control.py @@ -30,7 +30,7 @@ def maintain(self, full=False, **kwargs): text = 'this is a report on the maintenance procedure' if full: text += ' with the full flag on' - return text + return {'procedure': text} @pytest.fixture(scope='function') @@ -93,14 +93,17 @@ def test_repository_maintain(monkeypatch): final_data = control.repository_maintain() assert 'database' in final_data assert 'repository' in final_data - assert 'user_info' in final_data - assert final_data['repository']['info'] == 'this is information about the repo' - assert final_data['repository']['maintain'] == 'this is a report on the maintenance procedure' - assert final_data['repository']['unreferenced'] == 3 + assert final_data['repository']['unreferenced']['deleted_files'] == 3 + assert 'procedure' in final_data['repository']['maintain'] + procedure_text = 'this is a report on the maintenance procedure' + assert final_data['repository']['maintain']['procedure'] == procedure_text final_data = control.repository_maintain(full=True) - assert final_data['repository']['maintain'] == 'this is a report on the maintenance procedure with the full flag on' + assert final_data['repository']['unreferenced']['deleted_files'] == 3 + assert 'procedure' in final_data['repository']['maintain'] + procedure_text = 'this is a report on the maintenance procedure with the full flag on' + assert final_data['repository']['maintain']['procedure'] == procedure_text def test_repository_info(monkeypatch): @@ -131,5 +134,4 @@ def test_get_repository_report(monkeypatch): repository_report = control.get_repository_report() - assert 'user_info' in repository_report - assert 'Unreferenced files to delete' in repository_report['user_info'] + assert 'Unreferenced files to delete' in repository_report diff --git a/tests/cmdline/commands/test_storage.py b/tests/cmdline/commands/test_storage.py index 2b01738258..6a8939eefe 100644 --- a/tests/cmdline/commands/test_storage.py +++ b/tests/cmdline/commands/test_storage.py @@ -117,21 +117,24 @@ def tests_storage_maintain_text(run_cli_command, monkeypatch): from aiida.backends import control def reporter_mock(**kwargs): - return {'user_info': 'Underlying message'} + return {'piece_of_data': 'data_value'} monkeypatch.setattr(control, 'get_repository_report', reporter_mock) monkeypatch.setattr(control, 'repository_maintain', reporter_mock) result = run_cli_command(cmd_storage.storage_maintain, options=['--dry-run']) assert 'repository:' in result.output.lower() - assert 'Underlying message' in result.output + assert 'piece_of_data' in result.output + assert 'data_value' in result.output result = run_cli_command(cmd_storage.storage_maintain, options=['--full'], user_input='Y') assert 'no other process should be using the aiida profile' in result.output.lower() assert 'finished' in result.output.lower() - assert 'Underlying message' in result.output + assert 'piece_of_data' in result.output + assert 'data_value' in result.output result = run_cli_command(cmd_storage.storage_maintain, user_input='Y') assert 'can be safely executed while still running aiida' in result.output.lower() assert 'finished' in result.output.lower() - assert 'Underlying message' in result.output + assert 'piece_of_data' in result.output + assert 'data_value' in result.output From 2501cde0b25683491e0f4f126f8d510fbe5a606d Mon Sep 17 00:00:00 2001 From: ramirezfranciscof Date: Wed, 8 Dec 2021 14:33:03 +0000 Subject: [PATCH 12/17] Major re-structure of the features --- aiida/backends/control.py | 45 ++++-------- aiida/backends/general/migrations/utils.py | 6 ++ aiida/cmdline/commands/cmd_storage.py | 24 ++----- aiida/repository/backend/abstract.py | 24 ++++--- aiida/repository/backend/disk_object_store.py | 68 +++++++++++++------ aiida/repository/backend/sandbox.py | 6 ++ .../archive/implementations/sqlite/backend.py | 6 ++ tests/repository/backend/test_abstract.py | 22 ++---- .../backend/test_disk_object_store.py | 57 ++++++++-------- 9 files changed, 135 insertions(+), 123 deletions(-) diff --git a/aiida/backends/control.py b/aiida/backends/control.py index 03abd2e279..cc46dcdb94 100644 --- a/aiida/backends/control.py +++ b/aiida/backends/control.py @@ -23,26 +23,27 @@ MAINTAIN_LOGGER = AIIDA_LOGGER.getChild('maintain') -def repository_maintain(full: bool = False, backend: Optional[Backend] = None, **kwargs) -> dict: +def repository_maintain( + full: bool = False, + dry_run: bool = False, + backend: Optional[Backend] = None, + **kwargs, +) -> dict: """Performs maintenance tasks on the repository.""" - maintainance_report = {'database': {}, 'repository': {}} # type: ignore if backend is None: backend = get_manager().get_backend() repository = backend.get_repository() unreferenced_objects = get_unreferenced_keyset(aiida_backend=backend) - MAINTAIN_LOGGER.info('Deleting unreferenced objects ...') - repository.delete_objects(list(unreferenced_objects)) - maintainance_report['repository']['unreferenced'] = {'deleted_files': len(unreferenced_objects)} - # Perform the maintainance operations in the repository - MAINTAIN_LOGGER.info('Performing repository-specific maintenance ...') - try: - maintainance_report['repository']['maintain'] = repository.maintain(full=full, **kwargs) - except NotImplementedError: - maintainance_report['repository']['maintain'] = {} + if dry_run: + MAINTAIN_LOGGER.info(f'Would delete {len(unreferenced_objects)} unreferenced objects ...') + else: + MAINTAIN_LOGGER.info(f'Deleting {len(unreferenced_objects)} unreferenced objects ...') + repository.delete_objects(list(unreferenced_objects)) - return maintainance_report + MAINTAIN_LOGGER.info('Starting repository-specific operations ...') + repository.maintain(live=not full, dry_run=dry_run, **kwargs) def get_unreferenced_keyset(check_consistency: bool = True, aiida_backend: Optional[Backend] = None) -> Set[str]: @@ -76,22 +77,4 @@ def get_repository_info(statistics: bool = False, backend: Optional[Backend] = N if backend is None: backend = get_manager().get_backend() repository = backend.get_repository() - - try: - output_info = repository.get_info(statistics) - except NotImplementedError: - output_info = {} - - return output_info - - -def get_repository_report(backend: Optional[Backend] = None) -> dict: - """Returns information specifically related to the maintenance needs of the repository.""" - if backend is None: - backend = get_manager().get_backend() - - report_items = {} - unreferenced_objects = get_unreferenced_keyset(aiida_backend=backend) - report_items['Unreferenced files to delete'] = len(unreferenced_objects) - - return report_items + return repository.get_info(statistics) diff --git a/aiida/backends/general/migrations/utils.py b/aiida/backends/general/migrations/utils.py index 139c522883..94c03075d7 100644 --- a/aiida/backends/general/migrations/utils.py +++ b/aiida/backends/general/migrations/utils.py @@ -122,6 +122,12 @@ def list_objects(self) -> Iterable[str]: def iter_object_streams(self, keys: List[str]): raise NotImplementedError() + def maintain(self, dry_run: bool = False, live: bool = True, **kwargs) -> None: + raise NotImplementedError + + def get_info(self, statistics: bool = False, **kwargs) -> dict: + raise NotImplementedError + def migrate_legacy_repository(shard=None): """Migrate the legacy file repository to the new disk object store and return mapping of repository metadata. diff --git a/aiida/cmdline/commands/cmd_storage.py b/aiida/cmdline/commands/cmd_storage.py index 069e85f168..b31330ca12 100644 --- a/aiida/cmdline/commands/cmd_storage.py +++ b/aiida/cmdline/commands/cmd_storage.py @@ -108,19 +108,12 @@ def storage_info(statistics): ) def storage_maintain(full, dry_run): """Performs maintenance tasks on the repository.""" - from aiida.backends.control import get_repository_report, repository_maintain + from aiida.backends.control import repository_maintain if dry_run and full: echo.echo_critical('You cannot request both `--dry-run` and `--full` at the same time.') - if dry_run: - maintainance_report = {'repository': get_repository_report()} - echo.echo('\nReport on storage maintainance status:\n') - echo.echo_dictionary(maintainance_report, sort_keys=False, fmt='yaml') - return - if full: - echo.echo_warning( '\nIn order to safely perform the full maintenance operations on the internal storage, no other ' 'process should be using the AiiDA profile being maintained. ' @@ -131,8 +124,7 @@ def storage_maintain(full, dry_run): '`verdi storage maintain`, without the `--full` flag.\n' ) - else: - + elif not dry_run: echo.echo( '\nThis command will perform all maintenance operations on the internal storage that can be safely ' 'executed while still running AiiDA. ' @@ -142,11 +134,9 @@ def storage_maintain(full, dry_run): '--full` for a more complete optimization.\n' ) - if not click.confirm('Are you sure you want continue in this mode?'): - return + if not dry_run: + if not click.confirm('Are you sure you want continue in this mode?'): + return - maintainance_report = repository_maintain(full=full) - echo.echo('\nMaintainance procedures finished.') - if len(maintainance_report) > 0: - echo.echo('\nMaintainance report:\n') - echo.echo_dictionary(maintainance_report, sort_keys=False, fmt='yaml') + repository_maintain(full=full, dry_run=dry_run) + echo.echo('\nRequested maintainance procedures finished.') diff --git a/aiida/repository/backend/abstract.py b/aiida/repository/backend/abstract.py index 45845a41aa..628bb87edb 100644 --- a/aiida/repository/backend/abstract.py +++ b/aiida/repository/backend/abstract.py @@ -119,22 +119,30 @@ def list_objects(self) -> Iterable[str]: :return: An iterable for all the available object keys. """ - def get_info(self, statistics=False, **kwargs) -> dict: # pylint: disable=unused-argument,no-self-use + @abc.abstractmethod + def get_info(self, statistics: bool = False, **kwargs) -> dict: """Returns relevant information about the content of the repository. + :param statistics: + flag to enable extra information (statistics=False by default, only returns basic information). + :return: a dictionary with the information. """ - raise NotImplementedError - def maintain(self, full: bool = False, **kwargs) -> dict: # pylint: disable=unused-argument,no-self-use + @abc.abstractmethod + def maintain(self, dry_run: bool = False, live: bool = True, **kwargs) -> None: """Performs maintenance operations. - :param full: - a flag to perform operations that require to stop using the maintained profile. - :return: - a dictionary with information on the operations performed. + :param dry_run: + flag to only run + + :param live: + flag to indicate to the backend whether AiiDA is live or not (i.e. if the profile of the + backend is currently being used/accessed). The backend is expected then to only allow (and + thus set by default) the operations that are safe to perform in this state. + + :return: None """ - raise NotImplementedError @contextlib.contextmanager def open(self, key: str) -> Iterator[BinaryIO]: diff --git a/aiida/repository/backend/disk_object_store.py b/aiida/repository/backend/disk_object_store.py index f5d13f1b22..665ef80c63 100644 --- a/aiida/repository/backend/disk_object_store.py +++ b/aiida/repository/backend/disk_object_store.py @@ -12,6 +12,8 @@ __all__ = ('DiskObjectStoreRepositoryBackend',) +BYTES_TO_MB = 9.53674316E-7 + class DiskObjectStoreRepositoryBackend(AbstractRepositoryBackend): """Implementation of the ``AbstractRepositoryBackend`` using the ``disk-object-store`` as the backend.""" @@ -119,9 +121,10 @@ def get_object_hash(self, key: str) -> str: return super().get_object_hash(key) return key - def maintain( # pylint: disable=arguments-differ + def maintain( # type: ignore # pylint: disable=arguments-differ,too-many-branches self, - full: bool = False, + dry_run: bool = False, + live: bool = True, override_pack_loose: bool = None, override_do_repack: bool = None, override_clean_storage: bool = None, @@ -146,9 +149,20 @@ def maintain( # pylint: disable=arguments-differ DOSTORE_LOGGER = MAINTAIN_LOGGER.getChild('disk_object_store') # pylint: disable=invalid-name pack_loose = True - do_repack = full - clean_storage = full - do_vacuum = full + do_repack = not live + clean_storage = not live + do_vacuum = not live + + if live: + if override_do_repack or override_clean_storage or override_do_vacuum: + errmsg = 'a specifically resquest keyword cannot be applied while the profile is in use:\n' + if override_do_repack is not None: + errmsg = ' > override_do_repack = {override_do_repack}\n' + if override_clean_storage is not None: + errmsg = ' > override_clean_storage = {override_clean_storage}\n' + if override_do_vacuum is not None: + errmsg = ' > override_do_vacuum = {override_do_vacuum}\n' + raise ValueError(errmsg) if override_pack_loose is not None: pack_loose = override_pack_loose @@ -162,30 +176,40 @@ def maintain( # pylint: disable=arguments-differ if override_do_vacuum is not None: do_vacuum = override_do_vacuum - operation_results = {} - if pack_loose: - DOSTORE_LOGGER.info('Packing all loose files ...') - self.container.pack_all_loose() - operation_results['packing'] = 'All loose files have been packed.' + files_numb = self.container.count_objects()['loose'] + files_size = self.container.get_total_size()['total_size_loose'] * BYTES_TO_MB + if dry_run: + DOSTORE_LOGGER.report(f'Would pack all loose files ({files_numb} files occupying {files_size} MB) ...') + else: + DOSTORE_LOGGER.report(f'Packing all loose files ({files_numb} files occupying {files_size} MB) ...') + self.container.pack_all_loose() if do_repack: - DOSTORE_LOGGER.info('Re-packing all pack files ...') - self.container.repack() - operation_results['repacking'] = 'All pack files have been re-packed.' + files_numb = self.container.count_objects()['packed'] + files_size = self.container.get_total_size()['total_size_packfiles_on_disk'] * BYTES_TO_MB + if dry_run: + DOSTORE_LOGGER.report( + f'Would re-pack all pack files ({files_numb} files in packs, occupying {files_size} MB) ...' + ) + else: + DOSTORE_LOGGER.report( + f'Re-packing all pack files ({files_numb} files in packs, occupying {files_size} MB) ...' + ) + self.container.repack() if clean_storage: - DOSTORE_LOGGER.info(f'Cleaning the repository database (with `vacuum={do_vacuum}`) ...') - self.container.clean_storage(vacuum=do_vacuum) - operation_results['cleaning'] = f'The repository database has been cleaned (with `vacuum={do_vacuum}`).' + if dry_run: + DOSTORE_LOGGER.report(f'Would clean the repository database (with `vacuum={do_vacuum}`) ...') + else: + DOSTORE_LOGGER.report(f'Cleaning the repository database (with `vacuum={do_vacuum}`) ...') + self.container.clean_storage(vacuum=do_vacuum) - return operation_results - def get_info( # pylint: disable=arguments-differ + def get_info( # type: ignore # pylint: disable=arguments-differ self, statistics=False, ) -> dict: - bytes_to_mb = 9.53674316E-7 output_info = {} output_info['SHA-hash algorithm'] = self.container.hash_type @@ -204,8 +228,8 @@ def get_info( # pylint: disable=arguments-differ 'packed': files_data['packed'], } output_info['Size (MB)'] = { # type: ignore - 'unpacked': size_data['total_size_loose'] * bytes_to_mb, - 'packed': size_data['total_size_packfiles_on_disk'] * bytes_to_mb, - 'other': size_data['total_size_packindexes_on_disk'] * bytes_to_mb, + 'unpacked': size_data['total_size_loose'] * BYTES_TO_MB, + 'packed': size_data['total_size_packfiles_on_disk'] * BYTES_TO_MB, + 'other': size_data['total_size_packindexes_on_disk'] * BYTES_TO_MB, } return output_info diff --git a/aiida/repository/backend/sandbox.py b/aiida/repository/backend/sandbox.py index 6d17ffc87d..ed4d46407f 100644 --- a/aiida/repository/backend/sandbox.py +++ b/aiida/repository/backend/sandbox.py @@ -115,3 +115,9 @@ def delete_objects(self, keys: List[str]) -> None: def list_objects(self) -> Iterable[str]: return self.sandbox.get_content_list() + + def maintain(self, dry_run: bool = False, live: bool = True, **kwargs) -> None: + raise NotImplementedError + + def get_info(self, statistics: bool = False, **kwargs) -> dict: + raise NotImplementedError diff --git a/aiida/tools/archive/implementations/sqlite/backend.py b/aiida/tools/archive/implementations/sqlite/backend.py index 52fdeb5f36..a089633bf0 100644 --- a/aiida/tools/archive/implementations/sqlite/backend.py +++ b/aiida/tools/archive/implementations/sqlite/backend.py @@ -193,6 +193,12 @@ def delete_objects(self, keys: List[str]) -> None: def get_object_hash(self, key: str) -> str: return key + def maintain(self, dry_run: bool = False, live: bool = True, **kwargs) -> None: + raise NotImplementedError + + def get_info(self, statistics: bool = False, **kwargs) -> dict: + raise NotImplementedError + class ArchiveBackendQueryBuilder(SqlaQueryBuilder): """Archive query builder""" diff --git a/tests/repository/backend/test_abstract.py b/tests/repository/backend/test_abstract.py index f4ca8193f1..a8c5aee836 100644 --- a/tests/repository/backend/test_abstract.py +++ b/tests/repository/backend/test_abstract.py @@ -49,6 +49,12 @@ def list_objects(self) -> Iterable[str]: def iter_object_streams(self, keys: List[str]): raise NotImplementedError + def maintain(self, dry_run: bool = False, live: bool = True, **kwargs) -> None: + raise NotImplementedError + + def get_info(self, statistics: bool = False, **kwargs) -> dict: + raise NotImplementedError + @pytest.fixture(scope='function') def repository(): @@ -132,19 +138,3 @@ def has_objects_mock(self, keys): # pylint: disable=unused-argument repository.delete_objects(['object_key']) assert 'exist' in str(execinfo.value) assert 'object_key' in str(execinfo.value) - - -def test_default_methods(repository): - """Checks the behaviour of the default methods.""" - - with pytest.raises(NotImplementedError): - repository.get_info() - - with pytest.raises(NotImplementedError): - repository.get_info(statistics=True) - - with pytest.raises(NotImplementedError): - repository.maintain() - - with pytest.raises(NotImplementedError): - repository.maintain(full=True) diff --git a/tests/repository/backend/test_disk_object_store.py b/tests/repository/backend/test_disk_object_store.py index 807e839ff3..ca456aa892 100644 --- a/tests/repository/backend/test_disk_object_store.py +++ b/tests/repository/backend/test_disk_object_store.py @@ -33,12 +33,12 @@ def populated_repository(repository): content = BytesIO(b'Packed file number 2') repository.put_object_from_filelike(content) - repository.maintain(full=True) + repository.maintain(live=False) content = BytesIO(b'Packed file number 3 (also loose)') repository.put_object_from_filelike(content) - repository.maintain(full=False) + repository.maintain(live=True) content = BytesIO(b'Unpacked file') repository.put_object_from_filelike(content) @@ -240,62 +240,47 @@ def test_get_info(populated_repository): #yapf: disable -@pytest.mark.parametrize(('kwargs', 'output_keys', 'output_info'), ( +@pytest.mark.parametrize(('kwargs', 'output_info'), ( ( - {'full': False}, - ['packing'], + {'live': True}, {'unpacked': 2, 'packed': 4} ), ( - {'full': True}, - ['packing', 'repacking', 'cleaning'], + {'live': False}, {'unpacked': 0, 'packed': 4} ), ( - {'full': True, 'override_do_vacuum': False}, - ['packing', 'repacking', 'cleaning'], + {'live': False, 'override_do_vacuum': False}, {'unpacked': 0, 'packed': 4} ), ( { - 'full': True, + 'live': False, 'override_pack_loose': False, 'override_do_repack': False, 'override_clean_storage': False, 'override_do_vacuum': False, }, - [], {'unpacked': 2, 'packed': 3} ), )) # yapf: enable -def test_maintain(populated_repository, kwargs, output_keys, output_info): +def test_maintain(populated_repository, kwargs, output_info): """Test the ``maintain`` method.""" - maintainance_info = populated_repository.maintain(**kwargs) - print(maintainance_info) - assert len(maintainance_info) == len(output_keys) - for keyword in output_keys: - assert keyword in maintainance_info - - repository_info = populated_repository.get_info(statistics=True) - assert repository_info['Objects']['unpacked'] == output_info['unpacked'] - assert repository_info['Objects']['packed'] == output_info['packed'] + populated_repository.maintain(**kwargs) + file_info = populated_repository.container.count_objects() + assert file_info['loose'] == output_info['unpacked'] + assert file_info['packed'] == output_info['packed'] @pytest.mark.parametrize('do_vacuum', [True, False]) def test_maintain_logging(caplog, populated_repository, do_vacuum): """Test the logging of the ``maintain`` method.""" - import logging - - from aiida.backends.control import MAINTAIN_LOGGER - - MAINTAIN_LOGGER.setLevel(logging.INFO) - maintainance_info = populated_repository.maintain(full=True, override_do_vacuum=do_vacuum) - assert len(maintainance_info) == 3 + populated_repository.maintain(live=False, override_do_vacuum=do_vacuum) list_of_logmsg = [] for record in caplog.records: - assert record.levelname == 'INFO' + assert record.levelname == 'REPORT' assert record.name == 'aiida.maintain.disk_object_store' list_of_logmsg.append(record.msg) @@ -307,3 +292,17 @@ def test_maintain_logging(caplog, populated_repository, do_vacuum): assert 'vacuum=true' in list_of_logmsg[2].lower() else: assert 'vacuum=false' in list_of_logmsg[2].lower() + + +#yapf: disable +@pytest.mark.parametrize('kwargs', [ + {'override_do_repack': True}, + {'override_clean_storage': True}, + {'override_do_vacuum': True} +]) +# yapf: enable +def test_maintain_live_overload(populated_repository, kwargs): + """Test the ``maintain`` method.""" + + with pytest.raises(ValueError): + populated_repository.maintain(live=True, **kwargs) From 8cb095109143cc708366f91e9359a2dfb567a6f8 Mon Sep 17 00:00:00 2001 From: ramirezfranciscof Date: Wed, 8 Dec 2021 16:31:28 +0000 Subject: [PATCH 13/17] Fix control tests --- tests/backends/test_control.py | 102 +++++++++++++++---------- tests/cmdline/commands/test_storage.py | 52 ++++++++----- 2 files changed, 94 insertions(+), 60 deletions(-) diff --git a/tests/backends/test_control.py b/tests/backends/test_control.py index ef3539f65c..cc111b8266 100644 --- a/tests/backends/test_control.py +++ b/tests/backends/test_control.py @@ -25,12 +25,20 @@ def get_info(self, *args, **kwargs): def delete_objects(self, *args, **kwargs): """Method to delete objects.""" - def maintain(self, full=False, **kwargs): + def maintain(self, live=True, dry_run=False, **kwargs): """Method to perform maintainance operations.""" - text = 'this is a report on the maintenance procedure' - if full: - text += ' with the full flag on' - return {'procedure': text} + + if live: + raise ValueError('Signal that live=True') + + elif dry_run: + raise ValueError('Signal that dry_run=True') + + elif len(kwargs) > 0: + raise ValueError('Signal that kwargs were passed') + + else: + raise ValueError('Signal that live and dry_run are False') @pytest.fixture(scope='function') @@ -39,7 +47,7 @@ def clear_storage_before_test(clear_database_before_test): # pylint: disable=un repository = get_manager().get_backend().get_repository() object_keys = list(repository.list_objects()) repository.delete_objects(object_keys) - repository.maintain(full=True) + repository.maintain(live=False) @pytest.mark.usefixtures('clear_storage_before_test') @@ -82,28 +90,46 @@ def test_get_unreferenced_keyset(): assert 'aborting' in str(exc.value).lower() -def test_repository_maintain(monkeypatch): +#yapf: disable +@pytest.mark.parametrize(('kwargs', 'logged_texts'), ( + ( + {}, + [' > live: True', ' > dry_run: False'] + ), + ( + {'full': True, 'dry_run': True}, + [' > live: False', ' > dry_run: True'] + ), + ( + {'extra_kwarg': 'molly'}, + [' > live: True', ' > dry_run: False', ' > extra_kwarg: molly'] + ), +)) +# yapf: enable +@pytest.mark.usefixtures('clear_storage_before_test') +def test_repository_maintain(caplog, monkeypatch, kwargs, logged_texts): """Test the ``repository_maintain`` method.""" - from aiida.backends import control - monkeypatch.setattr(control, 'get_unreferenced_keyset', lambda **kwargs: set([1, 2, 3])) + import logging + + from aiida.backends.control import repository_maintain - BackendClass = get_manager().get_backend().__class__ # pylint: disable=invalid-name - monkeypatch.setattr(BackendClass, 'get_repository', lambda *args, **kwargs: MockRepositoryBackend()) + def mock_maintain(self, live=True, dry_run=False, **kwargs): # pylint: disable=unused-argument + logmsg = 'keywords provided:\n' + logmsg += f' > live: {live}\n' + logmsg += f' > dry_run: {dry_run}\n' + for key, val in kwargs.items(): + logmsg += f' > {key}: {val}\n' + logging.info(logmsg) - final_data = control.repository_maintain() - assert 'database' in final_data - assert 'repository' in final_data + RepoBackendClass = get_manager().get_backend().get_repository().__class__ # pylint: disable=invalid-name + monkeypatch.setattr(RepoBackendClass, 'maintain', mock_maintain) - assert final_data['repository']['unreferenced']['deleted_files'] == 3 - assert 'procedure' in final_data['repository']['maintain'] - procedure_text = 'this is a report on the maintenance procedure' - assert final_data['repository']['maintain']['procedure'] == procedure_text + with caplog.at_level(logging.INFO): + repository_maintain(**kwargs) - final_data = control.repository_maintain(full=True) - assert final_data['repository']['unreferenced']['deleted_files'] == 3 - assert 'procedure' in final_data['repository']['maintain'] - procedure_text = 'this is a report on the maintenance procedure with the full flag on' - assert final_data['repository']['maintain']['procedure'] == procedure_text + message_list = caplog.records[0].msg.splitlines() + for text in logged_texts: + assert text in message_list def test_repository_info(monkeypatch): @@ -111,27 +137,21 @@ def test_repository_info(monkeypatch): from aiida.backends.control import get_repository_info def mock_get_info(self, statistics=False, **kwargs): # pylint: disable=unused-argument - text = 'Info from repo' + output = {'value': 42} if statistics: - text += ' with statistics' - return text + output['extra_value'] = 0 + return output - RepositoryClass = get_manager().get_backend().get_repository().__class__ # pylint: disable=invalid-name - monkeypatch.setattr(RepositoryClass, 'get_info', mock_get_info) + RepoBackendClass = get_manager().get_backend().get_repository().__class__ # pylint: disable=invalid-name + monkeypatch.setattr(RepoBackendClass, 'get_info', mock_get_info) repository_info_out = get_repository_info() - assert 'Info from repo' in repository_info_out + assert 'value' in repository_info_out + assert 'extra_value' not in repository_info_out + assert repository_info_out['value'] == 42 repository_info_out = get_repository_info(statistics=True) - assert 'with statistics' in repository_info_out - - -def test_get_repository_report(monkeypatch): - """Test the ``get_repository_report`` method.""" - from aiida.backends import control - - monkeypatch.setattr(control, 'get_unreferenced_keyset', lambda **kwargs: set()) - - repository_report = control.get_repository_report() - - assert 'Unreferenced files to delete' in repository_report + assert 'value' in repository_info_out + assert 'extra_value' in repository_info_out + assert repository_info_out['value'] == 42 + assert repository_info_out['extra_value'] == 0 diff --git a/tests/cmdline/commands/test_storage.py b/tests/cmdline/commands/test_storage.py index 6a8939eefe..3a3add5954 100644 --- a/tests/cmdline/commands/test_storage.py +++ b/tests/cmdline/commands/test_storage.py @@ -112,29 +112,43 @@ def tests_storage_maintain_full_dry(run_cli_command): assert '--full' in result.output.lower() -def tests_storage_maintain_text(run_cli_command, monkeypatch): +def tests_storage_maintain_logging(run_cli_command, monkeypatch, caplog): """Test all the information and cases of the storage maintain command.""" + import logging + from aiida.backends import control - def reporter_mock(**kwargs): - return {'piece_of_data': 'data_value'} + def mock_maintain(**kwargs): + logmsg = 'Provided kwargs:\n' + for key, val in kwargs.items(): + logmsg += f' > {key}: {val}\n' + logging.info(logmsg) + + monkeypatch.setattr(control, 'repository_maintain', mock_maintain) + + with caplog.at_level(logging.INFO): + _ = run_cli_command(cmd_storage.storage_maintain, user_input='Y') + + message_list = caplog.records[0].msg.splitlines() + assert ' > full: False' in message_list + assert ' > dry_run: False' in message_list + + with caplog.at_level(logging.INFO): + _ = run_cli_command(cmd_storage.storage_maintain, options=['--dry-run']) + + message_list = caplog.records[1].msg.splitlines() + assert ' > full: False' in message_list + assert ' > dry_run: True' in message_list - monkeypatch.setattr(control, 'get_repository_report', reporter_mock) - monkeypatch.setattr(control, 'repository_maintain', reporter_mock) + with caplog.at_level(logging.INFO): + _ = run_cli_command(cmd_storage.storage_maintain, options=['--full'], user_input='Y') - result = run_cli_command(cmd_storage.storage_maintain, options=['--dry-run']) - assert 'repository:' in result.output.lower() - assert 'piece_of_data' in result.output - assert 'data_value' in result.output + message_list = caplog.records[2].msg.splitlines() + assert ' > full: True' in message_list + assert ' > dry_run: False' in message_list - result = run_cli_command(cmd_storage.storage_maintain, options=['--full'], user_input='Y') - assert 'no other process should be using the aiida profile' in result.output.lower() - assert 'finished' in result.output.lower() - assert 'piece_of_data' in result.output - assert 'data_value' in result.output + with pytest.raises(AssertionError) as execinfo: + _ = run_cli_command(cmd_storage.storage_maintain, options=['--full', '--dry-run']) - result = run_cli_command(cmd_storage.storage_maintain, user_input='Y') - assert 'can be safely executed while still running aiida' in result.output.lower() - assert 'finished' in result.output.lower() - assert 'piece_of_data' in result.output - assert 'data_value' in result.output + error_message = str(execinfo.value) + assert 'cannot request both `--dry-run` and `--full` at the same time' in error_message From 7a6a939ba3a81b4c10c3a5b45a108f1f0d936519 Mon Sep 17 00:00:00 2001 From: Francisco Ramirez Date: Wed, 8 Dec 2021 22:42:42 +0100 Subject: [PATCH 14/17] Apply suggestions from code review Co-authored-by: Sebastiaan Huber --- aiida/cmdline/commands/cmd_storage.py | 11 ++++++----- aiida/repository/backend/disk_object_store.py | 5 ++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/aiida/cmdline/commands/cmd_storage.py b/aiida/cmdline/commands/cmd_storage.py index b31330ca12..7fd521fc82 100644 --- a/aiida/cmdline/commands/cmd_storage.py +++ b/aiida/cmdline/commands/cmd_storage.py @@ -88,9 +88,10 @@ def storage_info(statistics): from aiida.cmdline.utils.common import get_database_summary from aiida.orm import QueryBuilder - data = {} - data['database'] = get_database_summary(QueryBuilder, statistics) - data['repository'] = get_repository_info(statistics=statistics) + data = { + 'database': get_database_summary(QueryBuilder, statistics). + 'repository': get_repository_info(statistics=statistics) + } echo.echo_dictionary(data, sort_keys=False, fmt='yaml') @@ -104,7 +105,7 @@ def storage_info(statistics): @click.option( '--dry-run', is_flag=True, - help='Returns information that allows to estimate the impact of the maintenance operations.' + help='Run the maintenance in dry-run mode which will print actions that would be taken without actually executing them.' ) def storage_maintain(full, dry_run): """Performs maintenance tasks on the repository.""" @@ -139,4 +140,4 @@ def storage_maintain(full, dry_run): return repository_maintain(full=full, dry_run=dry_run) - echo.echo('\nRequested maintainance procedures finished.') + echo.echo_success('Requested maintenance procedures finished.') diff --git a/aiida/repository/backend/disk_object_store.py b/aiida/repository/backend/disk_object_store.py index 665ef80c63..5e5fbe1a2a 100644 --- a/aiida/repository/backend/disk_object_store.py +++ b/aiida/repository/backend/disk_object_store.py @@ -12,7 +12,7 @@ __all__ = ('DiskObjectStoreRepositoryBackend',) -BYTES_TO_MB = 9.53674316E-7 +BYTES_TO_MB = 1 / 1024 ** 2 class DiskObjectStoreRepositoryBackend(AbstractRepositoryBackend): @@ -153,8 +153,7 @@ def maintain( # type: ignore # pylint: disable=arguments-differ,too-many-branch clean_storage = not live do_vacuum = not live - if live: - if override_do_repack or override_clean_storage or override_do_vacuum: + if live and (override_do_repack or override_clean_storage or override_do_vacuum): errmsg = 'a specifically resquest keyword cannot be applied while the profile is in use:\n' if override_do_repack is not None: errmsg = ' > override_do_repack = {override_do_repack}\n' From 2402bf7c217577335e7b97f698ba453388717ad4 Mon Sep 17 00:00:00 2001 From: ramirezfranciscof Date: Wed, 8 Dec 2021 22:07:12 +0000 Subject: [PATCH 15/17] Apply latest corrections from PR --- aiida/backends/control.py | 38 +++++++--- aiida/cmdline/commands/cmd_storage.py | 12 ++- aiida/repository/backend/abstract.py | 4 +- aiida/repository/backend/disk_object_store.py | 73 +++++++------------ tests/cmdline/commands/test_storage.py | 15 +--- .../backend/test_disk_object_store.py | 20 ++--- 6 files changed, 69 insertions(+), 93 deletions(-) diff --git a/aiida/backends/control.py b/aiida/backends/control.py index cc46dcdb94..42c9cb939b 100644 --- a/aiida/backends/control.py +++ b/aiida/backends/control.py @@ -28,18 +28,26 @@ def repository_maintain( dry_run: bool = False, backend: Optional[Backend] = None, **kwargs, -) -> dict: - """Performs maintenance tasks on the repository.""" +) -> None: + """Performs maintenance tasks on the repository. + + :param full: + flag to perform operations that require to stop using the maintained profile. + + :param dry_run: + flag to only print the actions that would be taken without actually executing them. + + :param backend: + specific backend in which to apply the maintenance (defaults to current profile). + """ if backend is None: backend = get_manager().get_backend() repository = backend.get_repository() unreferenced_objects = get_unreferenced_keyset(aiida_backend=backend) - if dry_run: - MAINTAIN_LOGGER.info(f'Would delete {len(unreferenced_objects)} unreferenced objects ...') - else: - MAINTAIN_LOGGER.info(f'Deleting {len(unreferenced_objects)} unreferenced objects ...') + MAINTAIN_LOGGER.info(f'Deleting {len(unreferenced_objects)} unreferenced objects ...') + if not dry_run: repository.delete_objects(list(unreferenced_objects)) MAINTAIN_LOGGER.info('Starting repository-specific operations ...') @@ -50,6 +58,16 @@ def get_unreferenced_keyset(check_consistency: bool = True, aiida_backend: Optio """Returns the keyset of objects that exist in the repository but are not tracked by AiiDA. This should be all the soft-deleted files. + + :param check_consistency: + toggle for a check that raises if there are references in the database with no actual object in the + underlying repository. + + :param aiida_backend: + specific backend in which to apply the operation (defaults to current profile). + + :return: + a set with all the objects in the underlying repository that are not referenced in the database. """ from aiida import orm MAINTAIN_LOGGER.info('Obtaining unreferenced object keys ...') @@ -59,17 +77,17 @@ def get_unreferenced_keyset(check_consistency: bool = True, aiida_backend: Optio repository = aiida_backend.get_repository() - keyset_backend = set(repository.list_objects()) - keyset_aiidadb = set(orm.Node.objects(aiida_backend).iter_repo_keys()) + keyset_repository = set(repository.list_objects()) + keyset_database = set(orm.Node.objects(aiida_backend).iter_repo_keys()) if check_consistency: - keyset_missing = keyset_aiidadb - keyset_backend + keyset_missing = keyset_database - keyset_repository if len(keyset_missing) > 0: raise RuntimeError( 'There are objects referenced in the database that are not present in the repository. Aborting!' ) - return keyset_backend - keyset_aiidadb + return keyset_repository - keyset_database def get_repository_info(statistics: bool = False, backend: Optional[Backend] = None) -> dict: diff --git a/aiida/cmdline/commands/cmd_storage.py b/aiida/cmdline/commands/cmd_storage.py index 7fd521fc82..a11c3a128b 100644 --- a/aiida/cmdline/commands/cmd_storage.py +++ b/aiida/cmdline/commands/cmd_storage.py @@ -89,8 +89,8 @@ def storage_info(statistics): from aiida.orm import QueryBuilder data = { - 'database': get_database_summary(QueryBuilder, statistics). - 'repository': get_repository_info(statistics=statistics) + 'database': get_database_summary(QueryBuilder, statistics), + 'repository': get_repository_info(statistics=statistics), } echo.echo_dictionary(data, sort_keys=False, fmt='yaml') @@ -105,15 +105,13 @@ def storage_info(statistics): @click.option( '--dry-run', is_flag=True, - help='Run the maintenance in dry-run mode which will print actions that would be taken without actually executing them.' + help= + 'Run the maintenance in dry-run mode which will print actions that would be taken without actually executing them.' ) def storage_maintain(full, dry_run): """Performs maintenance tasks on the repository.""" from aiida.backends.control import repository_maintain - if dry_run and full: - echo.echo_critical('You cannot request both `--dry-run` and `--full` at the same time.') - if full: echo.echo_warning( '\nIn order to safely perform the full maintenance operations on the internal storage, no other ' @@ -125,7 +123,7 @@ def storage_maintain(full, dry_run): '`verdi storage maintain`, without the `--full` flag.\n' ) - elif not dry_run: + else: echo.echo( '\nThis command will perform all maintenance operations on the internal storage that can be safely ' 'executed while still running AiiDA. ' diff --git a/aiida/repository/backend/abstract.py b/aiida/repository/backend/abstract.py index 628bb87edb..135b901be8 100644 --- a/aiida/repository/backend/abstract.py +++ b/aiida/repository/backend/abstract.py @@ -134,14 +134,12 @@ def maintain(self, dry_run: bool = False, live: bool = True, **kwargs) -> None: """Performs maintenance operations. :param dry_run: - flag to only run + flag to only print the actions that would be taken without actually executing them. :param live: flag to indicate to the backend whether AiiDA is live or not (i.e. if the profile of the backend is currently being used/accessed). The backend is expected then to only allow (and thus set by default) the operations that are safe to perform in this state. - - :return: None """ @contextlib.contextmanager diff --git a/aiida/repository/backend/disk_object_store.py b/aiida/repository/backend/disk_object_store.py index 5e5fbe1a2a..14ed0d8615 100644 --- a/aiida/repository/backend/disk_object_store.py +++ b/aiida/repository/backend/disk_object_store.py @@ -12,7 +12,7 @@ __all__ = ('DiskObjectStoreRepositoryBackend',) -BYTES_TO_MB = 1 / 1024 ** 2 +BYTES_TO_MB = 1 / 1024**2 class DiskObjectStoreRepositoryBackend(AbstractRepositoryBackend): @@ -125,15 +125,15 @@ def maintain( # type: ignore # pylint: disable=arguments-differ,too-many-branch self, dry_run: bool = False, live: bool = True, - override_pack_loose: bool = None, - override_do_repack: bool = None, - override_clean_storage: bool = None, - override_do_vacuum: bool = None, + pack_loose: bool = None, + do_repack: bool = None, + clean_storage: bool = None, + do_vacuum: bool = None, ) -> dict: """Performs maintenance operations. :param full: - a flag to perform operations that require to stop using the maintained profile. + flag to perform operations that require to stop using the maintained profile. :param override_pack_loose: override flag for forcing the packing of loose files. :param override_do_repack: @@ -148,60 +148,41 @@ def maintain( # type: ignore # pylint: disable=arguments-differ,too-many-branch from aiida.backends.control import MAINTAIN_LOGGER DOSTORE_LOGGER = MAINTAIN_LOGGER.getChild('disk_object_store') # pylint: disable=invalid-name - pack_loose = True - do_repack = not live - clean_storage = not live - do_vacuum = not live + if live and (do_repack or clean_storage or do_vacuum): + overrides = {'do_repack': do_repack, 'clean_storage': clean_storage, 'do_vacuum': do_vacuum} + keys = ', '.join([key for key, override in overrides if override is True]) # type: ignore + raise ValueError(f'The following overrides were enabled but cannot be if `live=True`: {keys}') - if live and (override_do_repack or override_clean_storage or override_do_vacuum): - errmsg = 'a specifically resquest keyword cannot be applied while the profile is in use:\n' - if override_do_repack is not None: - errmsg = ' > override_do_repack = {override_do_repack}\n' - if override_clean_storage is not None: - errmsg = ' > override_clean_storage = {override_clean_storage}\n' - if override_do_vacuum is not None: - errmsg = ' > override_do_vacuum = {override_do_vacuum}\n' - raise ValueError(errmsg) + pack_loose = True if pack_loose is None else pack_loose - if override_pack_loose is not None: - pack_loose = override_pack_loose - - if override_do_repack is not None: - do_repack = override_do_repack - - if override_clean_storage is not None: - clean_storage = override_clean_storage - - if override_do_vacuum is not None: - do_vacuum = override_do_vacuum + if live: + do_repack = False + clean_storage = False + do_vacuum = False + else: + do_repack = True if do_repack is None else do_repack + clean_storage = True if clean_storage is None else clean_storage + do_vacuum = True if do_vacuum is None else do_vacuum if pack_loose: files_numb = self.container.count_objects()['loose'] files_size = self.container.get_total_size()['total_size_loose'] * BYTES_TO_MB - if dry_run: - DOSTORE_LOGGER.report(f'Would pack all loose files ({files_numb} files occupying {files_size} MB) ...') - else: - DOSTORE_LOGGER.report(f'Packing all loose files ({files_numb} files occupying {files_size} MB) ...') + DOSTORE_LOGGER.report(f'Packing all loose files ({files_numb} files occupying {files_size} MB) ...') + if not dry_run: self.container.pack_all_loose() if do_repack: files_numb = self.container.count_objects()['packed'] files_size = self.container.get_total_size()['total_size_packfiles_on_disk'] * BYTES_TO_MB - if dry_run: - DOSTORE_LOGGER.report( - f'Would re-pack all pack files ({files_numb} files in packs, occupying {files_size} MB) ...' - ) - else: - DOSTORE_LOGGER.report( - f'Re-packing all pack files ({files_numb} files in packs, occupying {files_size} MB) ...' - ) + DOSTORE_LOGGER.report( + f'Re-packing all pack files ({files_numb} files in packs, occupying {files_size} MB) ...' + ) + if not dry_run: self.container.repack() if clean_storage: - if dry_run: - DOSTORE_LOGGER.report(f'Would clean the repository database (with `vacuum={do_vacuum}`) ...') - else: - DOSTORE_LOGGER.report(f'Cleaning the repository database (with `vacuum={do_vacuum}`) ...') + DOSTORE_LOGGER.report(f'Cleaning the repository database (with `vacuum={do_vacuum}`) ...') + if not dry_run: self.container.clean_storage(vacuum=do_vacuum) diff --git a/tests/cmdline/commands/test_storage.py b/tests/cmdline/commands/test_storage.py index 3a3add5954..e7737b3c18 100644 --- a/tests/cmdline/commands/test_storage.py +++ b/tests/cmdline/commands/test_storage.py @@ -105,13 +105,6 @@ def mocked_migrate(self): # pylint: disable=no-self-use assert 'passed error message' in result.output -def tests_storage_maintain_full_dry(run_cli_command): - """Test that ``verdi storage migrate`` detects the cancelling of the interactive prompt.""" - result = run_cli_command(cmd_storage.storage_maintain, options=['--full', '--dry-run'], raises=True) - assert '--dry-run' in result.output.lower() - assert '--full' in result.output.lower() - - def tests_storage_maintain_logging(run_cli_command, monkeypatch, caplog): """Test all the information and cases of the storage maintain command.""" import logging @@ -141,14 +134,8 @@ def mock_maintain(**kwargs): assert ' > dry_run: True' in message_list with caplog.at_level(logging.INFO): - _ = run_cli_command(cmd_storage.storage_maintain, options=['--full'], user_input='Y') + run_cli_command(cmd_storage.storage_maintain, options=['--full'], user_input='Y') message_list = caplog.records[2].msg.splitlines() assert ' > full: True' in message_list assert ' > dry_run: False' in message_list - - with pytest.raises(AssertionError) as execinfo: - _ = run_cli_command(cmd_storage.storage_maintain, options=['--full', '--dry-run']) - - error_message = str(execinfo.value) - assert 'cannot request both `--dry-run` and `--full` at the same time' in error_message diff --git a/tests/repository/backend/test_disk_object_store.py b/tests/repository/backend/test_disk_object_store.py index ca456aa892..52e320b01f 100644 --- a/tests/repository/backend/test_disk_object_store.py +++ b/tests/repository/backend/test_disk_object_store.py @@ -250,16 +250,16 @@ def test_get_info(populated_repository): {'unpacked': 0, 'packed': 4} ), ( - {'live': False, 'override_do_vacuum': False}, + {'live': False, 'do_vacuum': False}, {'unpacked': 0, 'packed': 4} ), ( { 'live': False, - 'override_pack_loose': False, - 'override_do_repack': False, - 'override_clean_storage': False, - 'override_do_vacuum': False, + 'pack_loose': False, + 'do_repack': False, + 'clean_storage': False, + 'do_vacuum': False, }, {'unpacked': 2, 'packed': 3} ), @@ -276,7 +276,7 @@ def test_maintain(populated_repository, kwargs, output_info): @pytest.mark.parametrize('do_vacuum', [True, False]) def test_maintain_logging(caplog, populated_repository, do_vacuum): """Test the logging of the ``maintain`` method.""" - populated_repository.maintain(live=False, override_do_vacuum=do_vacuum) + populated_repository.maintain(live=False, do_vacuum=do_vacuum) list_of_logmsg = [] for record in caplog.records: @@ -294,13 +294,7 @@ def test_maintain_logging(caplog, populated_repository, do_vacuum): assert 'vacuum=false' in list_of_logmsg[2].lower() -#yapf: disable -@pytest.mark.parametrize('kwargs', [ - {'override_do_repack': True}, - {'override_clean_storage': True}, - {'override_do_vacuum': True} -]) -# yapf: enable +@pytest.mark.parametrize('kwargs', [{'do_repack': True}, {'clean_storage': True}, {'do_vacuum': True}]) def test_maintain_live_overload(populated_repository, kwargs): """Test the ``maintain`` method.""" From 1432dcf28c44331f523d8efecd6b9bf3b7b20f00 Mon Sep 17 00:00:00 2001 From: ramirezfranciscof Date: Thu, 9 Dec 2021 10:44:34 +0000 Subject: [PATCH 16/17] Last PR corrections --- aiida/backends/control.py | 12 ++++++---- aiida/repository/backend/disk_object_store.py | 24 +++++++++---------- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/aiida/backends/control.py b/aiida/backends/control.py index 42c9cb939b..73496e80b1 100644 --- a/aiida/backends/control.py +++ b/aiida/backends/control.py @@ -12,11 +12,13 @@ # This is because they have to go through all the nodes to gather the list of keys that AiiDA is keeping # track of (since they are descentralized in each node entry). # See the get_unreferenced_keyset function -from typing import Optional, Set +from typing import TYPE_CHECKING, Optional, Set from aiida.common.log import AIIDA_LOGGER from aiida.manage.manager import get_manager -from aiida.orm.implementation import Backend + +if TYPE_CHECKING: + from aiida.orm.implementation import Backend __all__ = ('MAINTAIN_LOGGER',) @@ -26,7 +28,7 @@ def repository_maintain( full: bool = False, dry_run: bool = False, - backend: Optional[Backend] = None, + backend: Optional['Backend'] = None, **kwargs, ) -> None: """Performs maintenance tasks on the repository. @@ -54,7 +56,7 @@ def repository_maintain( repository.maintain(live=not full, dry_run=dry_run, **kwargs) -def get_unreferenced_keyset(check_consistency: bool = True, aiida_backend: Optional[Backend] = None) -> Set[str]: +def get_unreferenced_keyset(check_consistency: bool = True, aiida_backend: Optional['Backend'] = None) -> Set[str]: """Returns the keyset of objects that exist in the repository but are not tracked by AiiDA. This should be all the soft-deleted files. @@ -90,7 +92,7 @@ def get_unreferenced_keyset(check_consistency: bool = True, aiida_backend: Optio return keyset_repository - keyset_database -def get_repository_info(statistics: bool = False, backend: Optional[Backend] = None) -> dict: +def get_repository_info(statistics: bool = False, backend: Optional['Backend'] = None) -> dict: """Returns general information on the repository.""" if backend is None: backend = get_manager().get_backend() diff --git a/aiida/repository/backend/disk_object_store.py b/aiida/repository/backend/disk_object_store.py index 14ed0d8615..05f4cb8c76 100644 --- a/aiida/repository/backend/disk_object_store.py +++ b/aiida/repository/backend/disk_object_store.py @@ -121,7 +121,7 @@ def get_object_hash(self, key: str) -> str: return super().get_object_hash(key) return key - def maintain( # type: ignore # pylint: disable=arguments-differ,too-many-branches + def maintain( # pylint: disable=arguments-differ,too-many-branches self, dry_run: bool = False, live: bool = True, @@ -132,16 +132,16 @@ def maintain( # type: ignore # pylint: disable=arguments-differ,too-many-branch ) -> dict: """Performs maintenance operations. - :param full: - flag to perform operations that require to stop using the maintained profile. - :param override_pack_loose: - override flag for forcing the packing of loose files. - :param override_do_repack: - override flag for forcing the re-packing of already packed files. - :param override_clean_storage: - override flag for forcing the cleaning of soft-deleted files from the repository. - :param override_do_vacuum: - override flag for forcing the vacuuming of the internal database when cleaning the repository. + :param live: + if True, will only perform operations that are safe to do while the repository is in use. + :param pack_loose: + flag for forcing the packing of loose files. + :param do_repack: + flag for forcing the re-packing of already packed files. + :param clean_storage: + flag for forcing the cleaning of soft-deleted files from the repository. + :param do_vacuum: + flag for forcing the vacuuming of the internal database when cleaning the repository. :return: a dictionary with information on the operations performed. """ @@ -186,7 +186,7 @@ def maintain( # type: ignore # pylint: disable=arguments-differ,too-many-branch self.container.clean_storage(vacuum=do_vacuum) - def get_info( # type: ignore # pylint: disable=arguments-differ + def get_info( # pylint: disable=arguments-differ self, statistics=False, ) -> dict: From f0aa923a0380dee1e45af5c04885300ac346a5a5 Mon Sep 17 00:00:00 2001 From: ramirezfranciscof Date: Thu, 9 Dec 2021 11:14:17 +0000 Subject: [PATCH 17/17] Add mypy ignores --- aiida/repository/backend/disk_object_store.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aiida/repository/backend/disk_object_store.py b/aiida/repository/backend/disk_object_store.py index 05f4cb8c76..66e3e886c5 100644 --- a/aiida/repository/backend/disk_object_store.py +++ b/aiida/repository/backend/disk_object_store.py @@ -121,7 +121,7 @@ def get_object_hash(self, key: str) -> str: return super().get_object_hash(key) return key - def maintain( # pylint: disable=arguments-differ,too-many-branches + def maintain( # type: ignore # pylint: disable=arguments-differ,too-many-branches self, dry_run: bool = False, live: bool = True, @@ -186,7 +186,7 @@ def maintain( # pylint: disable=arguments-differ,too-many-branches self.container.clean_storage(vacuum=do_vacuum) - def get_info( # pylint: disable=arguments-differ + def get_info( # type: ignore # pylint: disable=arguments-differ self, statistics=False, ) -> dict: