Skip to content

Commit

Permalink
Caching: Remove core and plugin information from hash calculation
Browse files Browse the repository at this point in the history
When the caching mechanism was first introduced, precaution was taken to
reduce the chances for false positive cache hits to a miniumum as much
as possible. For that reason, the version of the package that provides
the node class was included in the calculation of the hash. This would
cover `Data` plugins, however, since the `ProcessNode` class is not
subclassable, there the version information was added slightly
differently. When a `Process` is run, the `version` attribute was added
to the attributes of its `ProcessNode` which includes the version of
`aiida-core` that is currently installed, as well as that of the plugin
package providing the process plugin.

Now that caching has been battle-tested and shown to be useful in
practice, this approach turns out to be too limiting. Now, whenever a
new version of `aiida-core` or a plugin is installed, the associated
nodes are invalidated as valid cache sources as future node instances
will have a different hash, merely because the version has changed.

Therefore, the version information is removed from the calculation of
the hash. The explicit version of the package providing the node class
that was explicitly added in `NodeCaching.get_objects_to_hash` is
removed and the `version` attribute is added to the list of
`_hash_ignored_attributes`.
  • Loading branch information
unkcpz authored and sphuber committed Apr 16, 2024
1 parent b544f7c commit 2222c65
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 13 deletions.
8 changes: 0 additions & 8 deletions src/aiida/orm/nodes/caching.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

from __future__ import annotations

import importlib
import typing as t

from aiida.common import exceptions
Expand Down Expand Up @@ -62,16 +61,9 @@ def _get_objects_to_hash(self) -> list[t.Any]:

def get_objects_to_hash(self) -> list[t.Any]:
"""Return a list of objects which should be included in the hash."""
top_level_module = self._node.__module__.split('.', 1)[0]

try:
version = importlib.import_module(top_level_module).__version__
except (ImportError, AttributeError) as exc:
raise exceptions.HashingError("The node's package version could not be determined") from exc

return {
'class': str(self._node.__class__),
'version': version,
'attributes': {
key: val
for key, val in self._node.base.attributes.items()
Expand Down
1 change: 1 addition & 0 deletions src/aiida/orm/nodes/process/calculation/calcjob.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ def _hash_ignored_attributes(cls) -> Tuple[str, ...]: # noqa: N805
'priority',
'max_wallclock_seconds',
'max_memory_kb',
'version',
)

@property
Expand Down
24 changes: 24 additions & 0 deletions tests/engine/processes/calcjobs/test_calc_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,6 +512,30 @@ def test_get_hash(self, get_calcjob_builder):
_, node = launch.run_get_node(builder)
assert node.base.extras.get(node.base.caching._HASH_EXTRA_KEY) == node.base.caching.get_hash()

def test_get_objects_attributes(self, get_calcjob_builder):
"""Test that :meth:`aiida.orm.CalcJobNode._get_objects_to_hash` returns the expected objects."""
builder = get_calcjob_builder()
_, node = launch.run_get_node(builder)
objects = node.base.caching.get_objects_to_hash()

assert 'version' not in objects
assert 'version' not in objects['attributes']

def test_compute_hash_version_independent(self, get_calcjob_builder, monkeypatch, manager):
"""Test that :meth:`aiida.orm.CalcJobNode.compute_hash` is independent of the version of ``aiida-core``."""
import aiida
_, node_a = launch.run_get_node(get_calcjob_builder())

monkeypatch.setattr(aiida, '__version__', '0.0.0')

# The global runner uses an instance of the ``PluginVersionProvider`` that caches versions so need to reset it
manager.reset_runner()

_, node_b = launch.run_get_node(get_calcjob_builder())
assert node_b.base.attributes.get('version')['core'] == '0.0.0'
assert node_b.base.caching.compute_hash() == node_a.base.caching.compute_hash()
assert node_b.base.caching.get_hash() == node_a.base.caching.get_hash()

def test_process_status(self):
"""Test that the process status is properly reset if calculation ends successfully."""
_, node = launch.run_get_node(ArithmeticAddCalculation, code=self.remote_code, **self.inputs)
Expand Down
2 changes: 1 addition & 1 deletion tests/engine/processes/calcjobs/test_monitors.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def define(cls, spec):
spec.output_namespace('messages', valid_type=Str)


def monitor_store_message(node, transport, **kwargs): # pylint: disable=unused-argument
def monitor_store_message(node, transport, **kwargs):
"""Test monitor that returns an output node."""
import datetime
import secrets
Expand Down
15 changes: 11 additions & 4 deletions tests/orm/nodes/test_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -982,15 +982,22 @@ def test_store_from_cache(self, tmp_path):
assert clone.base.caching.get_cache_source() == data.uuid
assert data.base.caching.get_hash() == clone.base.caching.get_hash()

def test_hashing_errors(self, aiida_caplog):
def test_hashing_errors(self, caplog, monkeypatch):
"""Tests that ``compute_hash`` fails in an expected manner."""
from aiida.orm.nodes.caching import NodeCaching
node = Data().store()
node.__module__ = 'unknown' # this will inhibit package version determination

# monkeypatch `get_objects_to_hash` to raise a fake error
monkeypatch.setattr(
NodeCaching,
'get_objects_to_hash',
lambda _: (_ for _ in ()).throw(exceptions.HashingError('fake hashing error')),
)
result = node.base.caching.compute_hash(ignore_errors=True)
assert result is None
assert aiida_caplog.record_tuples == [(node.logger.name, logging.ERROR, 'Node hashing failed')]
assert caplog.record_tuples == [(node.logger.name, logging.ERROR, 'Node hashing failed')]

with pytest.raises(exceptions.HashingError, match='package version could not be determined'):
with pytest.raises(exceptions.HashingError, match='fake hashing error'):
result = node.base.caching.compute_hash(ignore_errors=False)
assert result is None

Expand Down

0 comments on commit 2222c65

Please sign in to comment.