From a000899e8dd0e4df28ff39dfffcd971595b4d166 Mon Sep 17 00:00:00 2001 From: asle85 Date: Mon, 14 Jan 2019 17:01:54 +0100 Subject: [PATCH] Add data migrations for `TrajectoryData` nodes In the previous commit, the ORM for `TrajectoryData` was changed to start storing the `symbols` as a list in the attributes of the node instead of a `numpy` array in the repository, to make it possible to query for the symbols. This requires a data migration which is implemented in this commit. Since it involves the deletion of the data from the repository after it has been copied to the database, the migration is performed in two stages. The first will copy the data from the repository to the attributes on the node and the second will delete the data in the repository. In this way, if an exception occurs during the migration, no data will be lost. --- .../0026_trajectory_symbols_to_attribute.py | 77 ++++ .../0027_delete_trajectory_symbols_array.py | 83 ++++ .../backends/djsite/db/migrations/__init__.py | 375 +++++++++++++++++- .../backends/djsite/db/subtests/migrations.py | 71 ++++ aiida/backends/djsite/manage.py | 61 +++ ...798d4d3_trajectory_symbols_to_attribute.py | 56 +++ ...5c2ea1439d_move_data_within_node_module.py | 2 +- ...84bcc35_delete_trajectory_symbols_array.py | 63 +++ aiida/backends/sqlalchemy/tests/migrations.py | 103 ++++- aiida/orm/node/data/array/trajectory.py | 6 +- aiida/orm/node/data/cif.py | 4 +- aiida/orm/node/data/structure.py | 4 +- docs/source/concepts/workflows.rst | 2 +- .../core/modifying_the_schema.rst | 2 +- 14 files changed, 894 insertions(+), 15 deletions(-) create mode 100644 aiida/backends/djsite/db/migrations/0026_trajectory_symbols_to_attribute.py create mode 100644 aiida/backends/djsite/db/migrations/0027_delete_trajectory_symbols_array.py create mode 100644 aiida/backends/djsite/manage.py create mode 100644 aiida/backends/sqlalchemy/migrations/versions/12536798d4d3_trajectory_symbols_to_attribute.py create mode 100644 aiida/backends/sqlalchemy/migrations/versions/ce56d84bcc35_delete_trajectory_symbols_array.py diff --git a/aiida/backends/djsite/db/migrations/0026_trajectory_symbols_to_attribute.py b/aiida/backends/djsite/db/migrations/0026_trajectory_symbols_to_attribute.py new file mode 100644 index 0000000000..9d8d4ff48a --- /dev/null +++ b/aiida/backends/djsite/db/migrations/0026_trajectory_symbols_to_attribute.py @@ -0,0 +1,77 @@ +# -*- coding: utf-8 -*- +########################################################################### +# Copyright (c), The AiiDA team. All rights reserved. # +# This file is part of the AiiDA code. # +# # +# The code is hosted on GitHub at https://github.com/aiidateam/aiida_core # +# For further information on the license, see the LICENSE.txt file # +# For further information please visit http://www.aiida.net # +########################################################################### +# pylint: disable=invalid-name,too-few-public-methods +"""Data migration for `TrajectoryData` nodes where symbol lists are moved from repository array to attribute. + +This process has to be done in two separate consecutive migrations to prevent data loss in between. +""" +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals +from __future__ import absolute_import + +# Remove when https://github.com/PyCQA/pylint/issues/1931 is fixed +# pylint: disable=no-member,no-name-in-module,import-error +from django.db import migrations + +from aiida.backends.djsite.db.migrations import upgrade_schema_version +from . import ModelModifierV0025 + +REVISION = '1.0.26' +DOWN_REVISION = '1.0.25' + + +def create_trajectory_symbols_attribute(apps, _): + """Create the symbols attribute from the repository array for all `TrajectoryData` nodes.""" + from aiida.orm import load_node + + DbAttribute = apps.get_model('db', 'DbAttribute') + + modifier = ModelModifierV0025(DbAttribute) + + DbNode = apps.get_model('db', 'DbNode') + trajectories_pk = DbNode.objects.filter(type='node.data.array.trajectory.TrajectoryData.').values_list( + 'id', flat=True) + for t_pk in trajectories_pk: + trajectory = load_node(t_pk) + symbols = trajectory.get_array('symbols').tolist() + modifier.set_value_for_node(DbNode.objects.get(pk=trajectory.pk), 'symbols', symbols) + + +def delete_trajectory_symbols_attribute(apps, _): + """Delete the symbols attribute for all `TrajectoryData` nodes.""" + from aiida.orm import load_node + + DbAttribute = apps.get_model('db', 'DbAttribute') + + modifier = ModelModifierV0025(DbAttribute) + + DbNode = apps.get_model('db', 'DbNode') + trajectories_pk = DbNode.objects.filter(type='node.data.array.trajectory.TrajectoryData.').values_list( + 'id', flat=True) + for t_pk in trajectories_pk: + trajectory = load_node(t_pk) + modifier.del_value_for_node(DbNode.objects.get(pk=trajectory.pk), 'symbols') + + +class Migration(migrations.Migration): + """Storing symbols in TrajectoryData nodes as attributes, while keeping numpy arrays. + TrajectoryData symbols arrays are deleted in the next migration. + We split the migration into two because every migration is wrapped in an atomic transaction and we want to avoid + to delete the data while it is written in the database""" + + dependencies = [ + ('db', '0025_move_data_within_node_module'), + ] + + operations = [ + migrations.RunPython(create_trajectory_symbols_attribute, reverse_code=delete_trajectory_symbols_attribute), + upgrade_schema_version(REVISION, DOWN_REVISION) + ] diff --git a/aiida/backends/djsite/db/migrations/0027_delete_trajectory_symbols_array.py b/aiida/backends/djsite/db/migrations/0027_delete_trajectory_symbols_array.py new file mode 100644 index 0000000000..8c84c9d8a3 --- /dev/null +++ b/aiida/backends/djsite/db/migrations/0027_delete_trajectory_symbols_array.py @@ -0,0 +1,83 @@ +# -*- coding: utf-8 -*- +########################################################################### +# Copyright (c), The AiiDA team. All rights reserved. # +# This file is part of the AiiDA code. # +# # +# The code is hosted on GitHub at https://github.com/aiidateam/aiida_core # +# For further information on the license, see the LICENSE.txt file # +# For further information please visit http://www.aiida.net # +########################################################################### +# pylint: disable=invalid-name,too-few-public-methods +"""Data migration for `TrajectoryData` nodes where symbol lists are moved from repository array to attribute. + +This process has to be done in two separate consecutive migrations to prevent data loss in between. +""" +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals +from __future__ import absolute_import + +# Remove when https://github.com/PyCQA/pylint/issues/1931 is fixed +# pylint: disable=no-name-in-module,import-error +from django.db import migrations + +from aiida.backends.djsite.db.migrations import upgrade_schema_version +from . import ModelModifierV0025 + +REVISION = '1.0.27' +DOWN_REVISION = '1.0.26' + + +def delete_trajectory_symbols_array(apps, _): + """Delete the symbols array from all `TrajectoryData` nodes.""" + from aiida.orm import load_node + + DbAttribute = apps.get_model('db', 'DbAttribute') + + modifier = ModelModifierV0025(DbAttribute) + + DbNode = apps.get_model('db', 'DbNode') + trajectories_pk = DbNode.objects.filter(type='node.data.array.trajectory.TrajectoryData.').values_list( + 'id', flat=True) + for t_pk in trajectories_pk: + trajectory = load_node(t_pk) + modifier.del_value_for_node(DbNode.objects.get(pk=trajectory.pk), 'array|symbols') + # Remove the .npy file (using delete_array raises ModificationNotAllowed error) + trajectory._get_folder_pathsubfolder.remove_path('symbols.npy') # pylint: disable=protected-access + + +def create_trajectory_symbols_array(apps, _): + """Create the symbols array for all `TrajectoryData` nodes.""" + import numpy + import tempfile + from aiida.orm import load_node + + DbAttribute = apps.get_model('db', 'DbAttribute') + + modifier = ModelModifierV0025(DbAttribute) + + DbNode = apps.get_model('db', 'DbNode') + trajectories_pk = DbNode.objects.filter(type='node.data.array.trajectory.TrajectoryData.').values_list( + 'id', flat=True) + for t_pk in trajectories_pk: + trajectory = load_node(t_pk) + symbols = numpy.array(trajectory.get_attr('symbols')) + # Save the .npy file (using set_array raises ModificationNotAllowed error) + with tempfile.NamedTemporaryFile() as _file: + numpy.save(_file, symbols) + _file.flush() + trajectory._get_folder_pathsubfolder.insert_path(_file.name, 'symbols.npy') # pylint: disable=protected-access + modifier.set_value_for_node(DbNode.objects.get(pk=trajectory.pk), 'array|symbols', list(symbols.shape)) + + +class Migration(migrations.Migration): + """Deleting duplicated information stored in TrajectoryData symbols numpy arrays""" + + dependencies = [ + ('db', '0026_trajectory_symbols_to_attribute'), + ] + + operations = [ + migrations.RunPython(delete_trajectory_symbols_array, reverse_code=create_trajectory_symbols_array), + upgrade_schema_version(REVISION, DOWN_REVISION) + ] diff --git a/aiida/backends/djsite/db/migrations/__init__.py b/aiida/backends/djsite/db/migrations/__init__.py index 060d3a888f..0e6f56d9d2 100644 --- a/aiida/backends/djsite/db/migrations/__init__.py +++ b/aiida/backends/djsite/db/migrations/__init__.py @@ -10,8 +10,11 @@ from __future__ import division from __future__ import print_function from __future__ import absolute_import +from django.apps import apps -LATEST_MIGRATION = '0025_move_data_within_node_module' +import six + +LATEST_MIGRATION = '0027_delete_trajectory_symbols_array' def _update_schema_version(version, apps, schema_editor): @@ -36,3 +39,373 @@ def current_schema_version(): fromlist=['REVISION'] ) return latest_migration.REVISION + + +# Here I copied the class method definitions from aiida.backends.djsite.db.models +# used to set and delete values for nodes. +# This was done because: +# 1) The DbAttribute object loaded with apps.get_model() does not provide the class methods +# 2) When the django model changes the migration will continue to work +# 3) If we defined in the migration a new class with these methodds as an extension of the DbAttribute class, +# django detects a change in the model and creates a new migration + + +class ModelModifierV0025(object): + + from aiida.backends.utils import AIIDA_ATTRIBUTE_SEP + + _subspecifier_field_name = 'dbnode' + _sep = AIIDA_ATTRIBUTE_SEP + + def __init__(self, model_class): + self._model_class = model_class + + def validate_key(self, key): + """ + Validate the key string to check if it is valid (e.g., if it does not + contain the separator symbol.). + + :return: None if the key is valid + :raise ValidationError: if the key is not valid + """ + from aiida.backends.utils import validate_attribute_key + return validate_attribute_key(key) + + def set_value_for_node(self, dbnode, key, value, with_transaction=False, + stop_if_existing=False): + """ + This is the raw-level method that accesses the DB. No checks are done + to prevent the user from (re)setting a valid key. + To be used only internally. + + :todo: there may be some error on concurrent write; + not checked in this unlucky case! + + :param dbnode: the dbnode for which the attribute should be stored; + in an integer is passed, this is used as the PK of the dbnode, + without any further check (for speed reasons) + :param key: the key of the attribute to store; must be a level-zero + attribute (i.e., no separators in the key) + :param value: the value of the attribute to store + :param with_transaction: if True (default), do this within a transaction, + so that nothing gets stored if a subitem cannot be created. + Otherwise, if this parameter is False, no transaction management + is performed. + :param stop_if_existing: if True, it will stop with an + UniquenessError exception if the key already exists + for the given node. Otherwise, it will + first delete the old value, if existent. The use with True is + useful if you want to use a given attribute as a "locking" value, + e.g. to avoid to perform an action twice on the same node. + Note that, if you are using transactions, you may get the error + only when the transaction is committed. + + :raise ValueError: if the key contains the separator symbol used + internally to unpack dictionaries and lists (defined in cls._sep). + """ + cls = self._model_class + DbNode = apps.get_model('db', 'DbNode') + + if isinstance(dbnode, six.integer_types): + dbnode_node = DbNode(id=dbnode) + else: + dbnode_node = dbnode + + self.set_value(key, value, with_transaction=with_transaction, + subspecifier_value=dbnode_node, + stop_if_existing=stop_if_existing) + + def del_value_for_node(self, dbnode, key): + """ + Delete an attribute from the database for the given dbnode. + + :note: no exception is raised if no attribute with the given key is + found in the DB. + + :param dbnode: the dbnode for which you want to delete the key. + :param key: the key to delete. + """ + self.del_value(key, subspecifier_value=dbnode) + + def del_value(self, key, only_children=False, subspecifier_value=None): + """ + Delete a value associated with the given key (if existing). + + :note: No exceptions are raised if no entry is found. + + :param key: the key to delete. Can contain the separator self._sep if + you want to delete a subkey. + :param only_children: if True, delete only children and not the + entry itself. + :param subspecifier_value: must be None if this class has no + subspecifier set (e.g., the DbSetting class). + Must be the value of the subspecifier (e.g., the dbnode) for classes + that define it (e.g. DbAttribute and DbExtra) + """ + cls = self._model_class + from django.db.models import Q + + if self._subspecifier_field_name is None: + if subspecifier_value is not None: + raise ValueError("You cannot specify a subspecifier value for " + "class {} because it has no subspecifiers" + "".format(cls.__name__)) + subspecifiers_dict = {} + else: + if subspecifier_value is None: + raise ValueError("You also have to specify a subspecifier value " + "for class {} (the {})".format(self.__name__, + self._subspecifier_field_name)) + subspecifiers_dict = {self._subspecifier_field_name: + subspecifier_value} + + query = Q(key__startswith="{parentkey}{sep}".format( + parentkey=key, sep=self._sep), + **subspecifiers_dict) + + if not only_children: + query.add(Q(key=key, **subspecifiers_dict), Q.OR) + + cls.objects.filter(query).delete() + + def set_value(self, key, value, with_transaction=False, + subspecifier_value=None, other_attribs={}, + stop_if_existing=False): + """ + Set a new value in the DB, possibly associated to the given subspecifier. + + :note: This method also stored directly in the DB. + + :param key: a string with the key to create (must be a level-0 + attribute, that is it cannot contain the separator cls._sep). + :param value: the value to store (a basic data type or a list or a dict) + :param subspecifier_value: must be None if this class has no + subspecifier set (e.g., the DbSetting class). + Must be the value of the subspecifier (e.g., the dbnode) for classes + that define it (e.g. DbAttribute and DbExtra) + :param with_transaction: True if you want this function to be managed + with transactions. Set to False if you already have a manual + management of transactions in the block where you are calling this + function (useful for speed improvements to avoid recursive + transactions) + :param other_attribs: a dictionary of other parameters, to store + only on the level-zero attribute (e.g. for description in DbSetting). + :param stop_if_existing: if True, it will stop with an + UniquenessError exception if the new entry would violate an + uniqueness constraint in the DB (same key, or same key+node, + depending on the specific subclass). Otherwise, it will + first delete the old value, if existent. The use with True is + useful if you want to use a given attribute as a "locking" value, + e.g. to avoid to perform an action twice on the same node. + Note that, if you are using transactions, you may get the error + only when the transaction is committed. + """ + cls = self._model_class + from django.db import transaction + + self.validate_key(key) + + try: + if with_transaction: + sid = transaction.savepoint() + + # create_value returns a list of nodes to store + to_store = self.create_value(key, value, + subspecifier_value=subspecifier_value, + other_attribs=other_attribs) + + if to_store: + if not stop_if_existing: + # Delete the old values if stop_if_existing is False, + # otherwise don't delete them and hope they don't + # exist. If they exist, I'll get an UniquenessError + + # NOTE! Be careful in case the extra/attribute to + # store is not a simple attribute but a list or dict: + # like this, it should be ok because if we are + # overwriting an entry it will stop anyway to avoid + # to overwrite the main entry, but otherwise + # there is the risk that trailing pieces remain + # so in general it is good to recursively clean + # all sub-items. + self.del_value(key, + subspecifier_value=subspecifier_value) + cls.objects.bulk_create(to_store) + + if with_transaction: + transaction.savepoint_commit(sid) + except BaseException as exc: # All exceptions including CTRL+C, ... + from django.db.utils import IntegrityError + from aiida.common.exceptions import UniquenessError + + if with_transaction: + transaction.savepoint_rollback(sid) + if isinstance(exc, IntegrityError) and stop_if_existing: + raise UniquenessError("Impossible to create the required " + "entry " + "in table '{}', " + "another entry already exists and the creation would " + "violate an uniqueness constraint.\nFurther details: " + "{}".format(cls.__name__, exc)) + raise + + def create_value(self, key, value, subspecifier_value=None, + other_attribs={}): + """ + Create a new list of attributes, without storing them, associated + with the current key/value pair (and to the given subspecifier, + e.g. the DbNode for DbAttributes and DbExtras). + + :note: No hits are done on the DB, in particular no check is done + on the existence of the given nodes. + + :param key: a string with the key to create (can contain the + separator self._sep if this is a sub-attribute: indeed, this + function calls itself recursively) + :param value: the value to store (a basic data type or a list or a dict) + :param subspecifier_value: must be None if this class has no + subspecifier set (e.g., the DbSetting class). + Must be the value of the subspecifier (e.g., the dbnode) for classes + that define it (e.g. DbAttribute and DbExtra) + :param other_attribs: a dictionary of other parameters, to store + only on the level-zero attribute (e.g. for description in DbSetting). + + :return: always a list of class instances; it is the user + responsibility to store such entries (typically with a Django + bulk_create() call). + """ + cls = self._model_class + import datetime + + import aiida.common.json as json + from aiida.common.timezone import is_naive, make_aware, get_current_timezone + + if self._subspecifier_field_name is None: + if subspecifier_value is not None: + raise ValueError("You cannot specify a subspecifier value for " + "class {} because it has no subspecifiers" + "".format(cls.__name__)) + new_entry = cls(key=key, **other_attribs) + else: + if subspecifier_value is None: + raise ValueError("You also have to specify a subspecifier value " + "for class {} (the {})".format(cls.__name__, + self._subspecifier_field_name)) + further_params = other_attribs.copy() + further_params.update({self._subspecifier_field_name: + subspecifier_value}) + new_entry = cls(key=key, **further_params) + + list_to_return = [new_entry] + + if value is None: + new_entry.datatype = 'none' + new_entry.bval = None + new_entry.tval = '' + new_entry.ival = None + new_entry.fval = None + new_entry.dval = None + + elif isinstance(value, bool): + new_entry.datatype = 'bool' + new_entry.bval = value + new_entry.tval = '' + new_entry.ival = None + new_entry.fval = None + new_entry.dval = None + + elif isinstance(value, six.integer_types): + new_entry.datatype = 'int' + new_entry.ival = value + new_entry.tval = '' + new_entry.bval = None + new_entry.fval = None + new_entry.dval = None + + elif isinstance(value, float): + new_entry.datatype = 'float' + new_entry.fval = value + new_entry.tval = '' + new_entry.ival = None + new_entry.bval = None + new_entry.dval = None + + elif isinstance(value, six.string_types): + new_entry.datatype = 'txt' + new_entry.tval = value + new_entry.bval = None + new_entry.ival = None + new_entry.fval = None + new_entry.dval = None + + elif isinstance(value, datetime.datetime): + + # current timezone is taken from the settings file of django + if is_naive(value): + value_to_set = make_aware(value, get_current_timezone()) + else: + value_to_set = value + + new_entry.datatype = 'date' + # TODO: time-aware and time-naive datetime objects, see + # https://docs.djangoproject.com/en/dev/topics/i18n/timezones/#naive-and-aware-datetime-objects + new_entry.dval = value_to_set + new_entry.tval = '' + new_entry.bval = None + new_entry.ival = None + new_entry.fval = None + + elif isinstance(value, (list, tuple)): + + new_entry.datatype = 'list' + new_entry.dval = None + new_entry.tval = '' + new_entry.bval = None + new_entry.ival = len(value) + new_entry.fval = None + + for i, subv in enumerate(value): + # I do not need get_or_create here, because + # above I deleted all children (and I + # expect no concurrency) + # NOTE: I do not pass other_attribs + list_to_return.extend(self.create_value( + key=("{}{}{:d}".format(key, self._sep, i)), + value=subv, + subspecifier_value=subspecifier_value)) + + elif isinstance(value, dict): + + new_entry.datatype = 'dict' + new_entry.dval = None + new_entry.tval = '' + new_entry.bval = None + new_entry.ival = len(value) + new_entry.fval = None + + for subk, subv in value.items(): + self.validate_key(subk) + + # I do not need get_or_create here, because + # above I deleted all children (and I + # expect no concurrency) + # NOTE: I do not pass other_attribs + list_to_return.extend(self.create_value( + key="{}{}{}".format(key, self._sep, subk), + value=subv, + subspecifier_value=subspecifier_value)) + else: + try: + jsondata = json.dumps(value) + except TypeError: + raise ValueError( + "Unable to store the value: it must be either a basic datatype, or json-serializable: {}".format( + value)) + + new_entry.datatype = 'json' + new_entry.tval = jsondata + new_entry.bval = None + new_entry.ival = None + new_entry.fval = None + + return list_to_return diff --git a/aiida/backends/djsite/db/subtests/migrations.py b/aiida/backends/djsite/db/subtests/migrations.py index bcbf5a541f..23345f62d6 100644 --- a/aiida/backends/djsite/db/subtests/migrations.py +++ b/aiida/backends/djsite/db/subtests/migrations.py @@ -10,8 +10,10 @@ from __future__ import division from __future__ import print_function from __future__ import absolute_import + from six.moves import range +import numpy import tempfile from django.apps import apps @@ -587,3 +589,72 @@ def test_data_node_type_string(self): self.assertEqual(node_data.type, 'node.data.int.Int.') self.assertEqual(node_calc.type, 'node.process.calculation.calcjob.CalcJobNode.') + + +class TestTrajectoryDataMigration(TestMigrations): + + migrate_from = '0025_move_data_within_node_module' + migrate_to = '0027_delete_trajectory_symbols_array' + + # I create sample data + stepids = numpy.array([60, 70]) + times = stepids * 0.01 + positions = numpy.array([[[0., 0., 0.], [0.5, 0.5, 0.5], [1.5, 1.5, 1.5]], [[0., 0., 0.], [0.5, 0.5, 0.5], + [1.5, 1.5, 1.5]]]) + velocities = numpy.array([[[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]], [[0.5, 0.5, 0.5], [0.5, 0.5, 0.5], + [-0.5, -0.5, -0.5]]]) + cells = numpy.array([[[ + 2., + 0., + 0., + ], [ + 0., + 2., + 0., + ], [ + 0., + 0., + 2., + ]], [[ + 3., + 0., + 0., + ], [ + 0., + 3., + 0., + ], [ + 0., + 0., + 3., + ]]]) + + def setUpBeforeMigration(self): + from aiida.orm.node.data.array.trajectory import TrajectoryData + + # Create a TrajectoryData node + node = TrajectoryData() + symbols = numpy.array(['H', 'O', 'C']) + + # I set the node + node.set_array('steps', self.stepids) + node.set_array('cells', self.cells) + node.set_array('symbols', symbols) + node.set_array('positions', self.positions) + node.set_array('times', self.times) + node.set_array('velocities', self.velocities) + + # Reset validate to avoid raising of validation error according to the new TrajectoryData definition + node._validate = lambda: True + node.store() + + self.trajectory_pk = node.pk + + def test_trajectory_symbols(self): + from aiida.orm import load_node + trajectory = load_node(self.trajectory_pk) + self.assertSequenceEqual(trajectory.get_attr('symbols'), ['H', 'O', 'C']) + self.assertSequenceEqual(trajectory.get_array('velocities').tolist(), self.velocities.tolist()) + self.assertSequenceEqual(trajectory.get_array('positions').tolist(), self.positions.tolist()) + with self.assertRaises(KeyError): + trajectory.get_array('symbols') diff --git a/aiida/backends/djsite/manage.py b/aiida/backends/djsite/manage.py new file mode 100644 index 0000000000..09857a799c --- /dev/null +++ b/aiida/backends/djsite/manage.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +########################################################################### +# Copyright (c), The AiiDA team. All rights reserved. # +# This file is part of the AiiDA code. # +# # +# The code is hosted on GitHub at https://github.com/aiidateam/aiida_core # +# For further information on the license, see the LICENSE.txt file # +# For further information please visit http://www.aiida.net # +########################################################################### +from __future__ import division +from __future__ import print_function +from __future__ import absolute_import +import sys + + +if __name__ == "__main__": + from django.core.management import execute_from_command_line + + # Copy sys.argv + actual_argv = sys.argv[:] + + # Check if there is also a cmdline option is --aiida-profile=PROFILENAME + try: + first_cmdline_option = sys.argv[1] + except IndexError: + first_cmdline_option = None + + profile_name = None # Use the default profile if not specified + if first_cmdline_option is not None: + cmdprefix = "--aiida-profile=" + if first_cmdline_option.startswith(cmdprefix): + profile_name = first_cmdline_option[len(cmdprefix):] + # I remove the argument I just read + actual_argv = [actual_argv[0]] + actual_argv[2:] + + if actual_argv[1] == 'migrate': + # Perform the same loading procedure as the normal load_dbenv does + from aiida.backends import settings + settings.LOAD_DBENV_CALLED = True + # We load the needed profile. + # This is going to set global variables in settings, including + # settings.BACKEND + from aiida.backends.profile import load_profile, BACKEND_DJANGO + load_profile(profile=profile_name) + if settings.BACKEND != BACKEND_DJANGO: + from aiida.common.exceptions import InvalidOperation + raise InvalidOperation("A Django migration procedure is initiated " + "but a different backend is used!") + # We load the Django specific _load_dbenv_noschemacheck + # When there will be a need for SQLAlchemy for a schema migration, + # we may abstract thw _load_dbenv_noschemacheck and make a common + # one for both backends + from aiida.backends.djsite.utils import _load_dbenv_noschemacheck + _load_dbenv_noschemacheck(profile=profile_name) + else: + # Load the general load_dbenv. + from aiida.backends.utils import load_dbenv + load_dbenv(profile=profile_name) + + execute_from_command_line(actual_argv) diff --git a/aiida/backends/sqlalchemy/migrations/versions/12536798d4d3_trajectory_symbols_to_attribute.py b/aiida/backends/sqlalchemy/migrations/versions/12536798d4d3_trajectory_symbols_to_attribute.py new file mode 100644 index 0000000000..2240f0c56e --- /dev/null +++ b/aiida/backends/sqlalchemy/migrations/versions/12536798d4d3_trajectory_symbols_to_attribute.py @@ -0,0 +1,56 @@ +"""trajectory symbols to attribute + +Revision ID: 12536798d4d3 +Revises: 37f3d4882837 +Create Date: 2019-01-21 10:15:02.451308 + +""" +# pylint: disable=invalid-name +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals +from __future__ import absolute_import + +# Remove when https://github.com/PyCQA/pylint/issues/1931 is fixed +# pylint: disable=no-member,no-name-in-module,import-error + +from alembic import op +from sqlalchemy.orm.session import Session + +from aiida.backends.sqlalchemy.utils import flag_modified +from aiida.backends.sqlalchemy.models.node import DbNode +from aiida.orm import load_node + +# revision identifiers, used by Alembic. +revision = '12536798d4d3' +down_revision = '37f3d4882837' +branch_labels = None +depends_on = None + +# Here we duplicate the data stored in a TrajectoryData symbols array, storing it as an attribute. +# We delete the duplicates in the following migration (ce56d84bcc35) to avoid to delete data + + +def upgrade(): + """Migrations for the upgrade.""" + session = Session(bind=op.get_bind()) + trajectories = session.query(DbNode).filter_by(type='node.data.array.trajectory.TrajectoryData.').all() + for t in trajectories: + symbols = load_node(pk=t.id).get_array('symbols').tolist() + t.attributes['symbols'] = symbols + flag_modified(t, 'attributes') + session.add(t) + session.commit() + session.close() + + +def downgrade(): + """Migrations for the downgrade.""" + session = Session(bind=op.get_bind()) + trajectories = session.query(DbNode).filter_by(type='node.data.array.trajectory.TrajectoryData.').all() + for t in trajectories: + t.del_attr('symbols') + flag_modified(t, 'attributes') + session.add(t) + session.commit() + session.close() diff --git a/aiida/backends/sqlalchemy/migrations/versions/6a5c2ea1439d_move_data_within_node_module.py b/aiida/backends/sqlalchemy/migrations/versions/6a5c2ea1439d_move_data_within_node_module.py index 8fa7a5fdb2..19f4f289d4 100644 --- a/aiida/backends/sqlalchemy/migrations/versions/6a5c2ea1439d_move_data_within_node_module.py +++ b/aiida/backends/sqlalchemy/migrations/versions/6a5c2ea1439d_move_data_within_node_module.py @@ -1,7 +1,7 @@ """Data migration for `Data` nodes after it was moved in the `aiida.orm.node` module changing the type string. Revision ID: 6a5c2ea1439d -Revises: 041a79fc615f +Revises: 375c2db70663 Create Date: 2019-01-18 19:44:32.156083 """ diff --git a/aiida/backends/sqlalchemy/migrations/versions/ce56d84bcc35_delete_trajectory_symbols_array.py b/aiida/backends/sqlalchemy/migrations/versions/ce56d84bcc35_delete_trajectory_symbols_array.py new file mode 100644 index 0000000000..714497b3e9 --- /dev/null +++ b/aiida/backends/sqlalchemy/migrations/versions/ce56d84bcc35_delete_trajectory_symbols_array.py @@ -0,0 +1,63 @@ +"""delete trajectory symbols array + +Revision ID: ce56d84bcc35 +Revises: 12536798d4d3 +Create Date: 2019-01-21 15:35:07.280805 + +""" +# pylint: disable=invalid-name +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals +from __future__ import absolute_import + +# Remove when https://github.com/PyCQA/pylint/issues/1931 is fixed +# pylint: disable=no-member,no-name-in-module,import-error + +import numpy + +from alembic import op +from sqlalchemy.orm.session import Session + +from aiida.backends.sqlalchemy.models.node import DbNode +from aiida.backends.sqlalchemy.utils import flag_modified +from aiida.orm import load_node + +# revision identifiers, used by Alembic. +revision = 'ce56d84bcc35' +down_revision = '12536798d4d3' +branch_labels = None +depends_on = None + + +def upgrade(): + """Migrations for the upgrade.""" + session = Session(bind=op.get_bind()) + trajectories = session.query(DbNode).filter_by(type='node.data.array.trajectory.TrajectoryData.').all() + for t in trajectories: + del t.attributes['array|symbols'] + flag_modified(t, 'attributes') + # Remove the .npy file (using delete_array raises ModificationNotAllowed error) + load_node(pk=t.id)._get_folder_pathsubfolder.remove_path('symbols.npy') # pylint: disable=protected-access + session.add(t) + session.commit() + session.close() + + +def downgrade(): + """Migrations for the downgrade.""" + import tempfile + session = Session(bind=op.get_bind()) + trajectories = session.query(DbNode).filter_by(type='node.data.array.trajectory.TrajectoryData.').all() + for t in trajectories: + symbols = numpy.array(t.get_attr('symbols')) + # Save the .npy file (using set_array raises ModificationNotAllowed error) + with tempfile.NamedTemporaryFile() as _file: + numpy.save(_file, symbols) + _file.flush() + load_node(pk=t.id)._get_folder_pathsubfolder.insert_path(_file.name, 'symbols.npy') # pylint: disable=protected-access + t.attributes['array|symbols'] = list(symbols.shape) + flag_modified(t, 'attributes') + session.add(t) + session.commit() + session.close() diff --git a/aiida/backends/sqlalchemy/tests/migrations.py b/aiida/backends/sqlalchemy/tests/migrations.py index c7104c1d30..f49cb2802e 100644 --- a/aiida/backends/sqlalchemy/tests/migrations.py +++ b/aiida/backends/sqlalchemy/tests/migrations.py @@ -7,10 +7,8 @@ # For further information on the license, see the LICENSE.txt file # # For further information please visit http://www.aiida.net # ########################################################################### -""" -Tests for the migration engine (Alembic) as well as for the AiiDA migrations -for SQLAlchemy. -""" +# pylint: disable=too-many-lines +"""Tests for the migration engine (Alembic) as well as for the AiiDA migrations for SQLAlchemy.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -28,6 +26,7 @@ from aiida.backends.sqlalchemy.models.base import Base from aiida.backends.sqlalchemy.tests.utils import new_database from aiida.backends.testbase import AiidaTestCase +from aiida.orm import load_node class TestMigrationsSQLA(AiidaTestCase): @@ -603,6 +602,7 @@ def setUpBeforeMigration(self): unknown_exp_logs_no) # Getting the serialized legacy workflow logs + # yapf: disable leg_wf = session.query(DbLog).filter(DbLog.objpk == leg_workf.id).filter( DbLog.objname == 'aiida.workflows.user.topologicalworkflows.topo.TopologicalWorkflow' ).with_entities(*cols_to_project).one() @@ -902,3 +902,98 @@ def test_data_node_type_string(self): self.assertEqual(node_calc.type, 'node.process.calculation.calcjob.CalcJobNode.') finally: session.close() + + +class TestTrajectoryDataMigration(TestMigrationsSQLA): + """Test the migration of the symbols from numpy array to attribute for TrajectoryData nodes.""" + import numpy + + migrate_from = '37f3d4882837' # 37f3d4882837_make_all_uuid_columns_unique + migrate_to = 'ce56d84bcc35' # ce56d84bcc35_delete_trajectory_symbols_array + + # I create sample data + stepids = numpy.array([60, 70]) + times = stepids * 0.01 + positions = numpy.array([[[0., 0., 0.], [0.5, 0.5, 0.5], [1.5, 1.5, 1.5]], [[0., 0., 0.], [0.5, 0.5, 0.5], + [1.5, 1.5, 1.5]]]) + velocities = numpy.array([[[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]], [[0.5, 0.5, 0.5], [0.5, 0.5, 0.5], + [-0.5, -0.5, -0.5]]]) + cells = numpy.array([[[ + 2., + 0., + 0., + ], [ + 0., + 2., + 0., + ], [ + 0., + 0., + 2., + ]], [[ + 3., + 0., + 0., + ], [ + 0., + 3., + 0., + ], [ + 0., + 0., + 3., + ]]]) + + def setUpBeforeMigration(self): + from sqlalchemy.orm import Session # pylint: disable=import-error,no-name-in-module + + with sa.engine.begin() as connection: + try: + session = Session(connection.engine) + import numpy + from aiida.plugins.factory import DataFactory + + TrajectoryData = DataFactory('array.trajectory') # pylint: disable=invalid-name + symbols = numpy.array(['H', 'O', 'C']) + + # Create a TrajectoryData node + node = TrajectoryData() + + # I set the node + node.set_array('steps', self.stepids) + node.set_array('cells', self.cells) + node.set_array('symbols', symbols) + node.set_array('positions', self.positions) + node.set_array('times', self.times) + node.set_array('velocities', self.velocities) + + # Reset validate to avoid raising of validation error according to the new TrajectoryData definition + node._validate = lambda: True # pylint: disable=protected-access + + node.store() + + self.trajectory_pk = node.pk + finally: + session.close() + + def test_trajectory_symbols(self): + """Verify that migration of symbols from repository array to attribute works properly.""" + from sqlalchemy.orm import Session # pylint: disable=import-error,no-name-in-module + from aiida.backends.sqlalchemy.models.node import DbNode + + with sa.engine.begin() as connection: + try: + session = Session(connection.engine) + + trajectory = session.query(DbNode).filter(DbNode.id == self.trajectory_pk).one() + + self.assertSequenceEqual(trajectory.attributes['symbols'], ['H', 'O', 'C']) + self.assertSequenceEqual( + load_node(pk=trajectory.id).get_array('velocities').tolist(), self.velocities.tolist()) + self.assertSequenceEqual( + load_node(pk=trajectory.id).get_array('positions').tolist(), self.positions.tolist()) + with self.assertRaises(KeyError): + load_node(pk=trajectory.id).get_array('symbols') + + finally: + session.close() diff --git a/aiida/orm/node/data/array/trajectory.py b/aiida/orm/node/data/array/trajectory.py index 43620fb53c..29799cd49f 100644 --- a/aiida/orm/node/data/array/trajectory.py +++ b/aiida/orm/node/data/array/trajectory.py @@ -499,7 +499,7 @@ def _get_aiida_structure(self, store=False, **kwargs): def get_structure(self, store=False, **kwargs): """ - Creates :py:class:`aiida.orm.data.structure.StructureData`. + Creates :py:class:`aiida.orm.node.data.structure.StructureData`. .. versionadded:: 1.0 Renamed from _get_aiida_structure @@ -507,7 +507,7 @@ def get_structure(self, store=False, **kwargs): :param converter: specify the converter. Default 'ase'. :param store: If True, intermediate calculation gets stored in the AiiDA database for record. Default False. - :return: :py:class:`aiida.orm.data.structure.StructureData` node. + :return: :py:class:`aiida.orm.node.data.structure.StructureData` node. """ from aiida.orm.node.data.parameter import ParameterData @@ -528,7 +528,7 @@ def _get_cif(self, index=None, **kwargs): def get_cif(self, index=None, **kwargs): """ - Creates :py:class:`aiida.orm.data.cif.CifData` + Creates :py:class:`aiida.orm.node.data.cif.CifData` .. versionadded:: 1.0 Renamed from _get_cif diff --git a/aiida/orm/node/data/cif.py b/aiida/orm/node/data/cif.py index 6029ece3f5..54aaaeed36 100644 --- a/aiida/orm/node/data/cif.py +++ b/aiida/orm/node/data/cif.py @@ -899,7 +899,7 @@ def _get_aiida_structure(self, converter='pymatgen', store=False, **kwargs): def get_structure(self, converter='pymatgen', store=False, **kwargs): """ - Creates :py:class:`aiida.orm.data.structure.StructureData`. + Creates :py:class:`aiida.orm.node.data.structure.StructureData`. .. versionadded:: 1.0 Renamed from _get_aiida_structure @@ -913,7 +913,7 @@ def get_structure(self, converter='pymatgen', store=False, **kwargs): the occupancies will be scaled down to 1. (pymatgen only) :param site_tolerance: This tolerance is used to determine if two sites are sitting in the same position, in which case they will be combined to a single disordered site. Defaults to 1e-4. (pymatgen only) - :return: :py:class:`aiida.orm.data.structure.StructureData` node. + :return: :py:class:`aiida.orm.node.data.structure.StructureData` node. """ from . import cif # pylint: disable=import-self from aiida.orm.node.data.parameter import ParameterData diff --git a/aiida/orm/node/data/structure.py b/aiida/orm/node/data/structure.py index db7b310ea4..d3b0c5765a 100644 --- a/aiida/orm/node/data/structure.py +++ b/aiida/orm/node/data/structure.py @@ -1811,7 +1811,7 @@ def _get_cif(self, converter='ase', store=False, **kwargs): def get_cif(self, converter='ase', store=False, **kwargs): """ - Creates :py:class:`aiida.orm.data.cif.CifData`. + Creates :py:class:`aiida.orm.node.data.cif.CifData`. .. versionadded:: 1.0 Renamed from _get_cif @@ -1819,7 +1819,7 @@ def get_cif(self, converter='ase', store=False, **kwargs): :param converter: specify the converter. Default 'ase'. :param store: If True, intermediate calculation gets stored in the AiiDA database for record. Default False. - :return: :py:class:`aiida.orm.data.cif.CifData` node. + :return: :py:class:`aiida.orm.node.data.cif.CifData` node. """ from .parameter import ParameterData from . import structure # This same module diff --git a/docs/source/concepts/workflows.rst b/docs/source/concepts/workflows.rst index 9b1fc64c07..3767f2c4e7 100644 --- a/docs/source/concepts/workflows.rst +++ b/docs/source/concepts/workflows.rst @@ -922,6 +922,6 @@ However, these workchains can be updated with just a few minor updates that we w * The method ``RemoteData.is_empty()`` has been changes and is now accessed through the ``RemoteData.is_empty``. * The method ``.is_alloy()`` for classes ``StructureData`` and ``Kind`` is now accessed through the ``.is_alloy`` property. * The method ``.has_vacancies()`` for classes ``StructureData`` and ``Kind`` is now accessed through the ``.has_vacancies`` property. -* The arguments ``stepids`` and ``cells`` of the :meth:`TrajectoryData.set_trajectory()` method are made optional +* The arguments ``stepids`` and ``cells`` of the :meth:`TrajectoryData.set_trajectory()` method are made optional which has implications on the ordering of the arguments passed to this method. * The list of atomic symbols for trajectories is no longer stored as array data but is now accessible through the ``TrajectoryData.symbols`` attribute. diff --git a/docs/source/developer_guide/core/modifying_the_schema.rst b/docs/source/developer_guide/core/modifying_the_schema.rst index 0e74913663..7417f0a804 100644 --- a/docs/source/developer_guide/core/modifying_the_schema.rst +++ b/docs/source/developer_guide/core/modifying_the_schema.rst @@ -1,4 +1,4 @@ -Mofidying the schema +Modifying the schema ++++++++++++++++++++ Django