diff --git a/aiida/backends/djsite/db/migrations/0026_trajectory_symbols_to_attribute.py b/aiida/backends/djsite/db/migrations/0026_trajectory_symbols_to_attribute.py new file mode 100644 index 0000000000..9d8d4ff48a --- /dev/null +++ b/aiida/backends/djsite/db/migrations/0026_trajectory_symbols_to_attribute.py @@ -0,0 +1,77 @@ +# -*- coding: utf-8 -*- +########################################################################### +# Copyright (c), The AiiDA team. All rights reserved. # +# This file is part of the AiiDA code. # +# # +# The code is hosted on GitHub at https://github.com/aiidateam/aiida_core # +# For further information on the license, see the LICENSE.txt file # +# For further information please visit http://www.aiida.net # +########################################################################### +# pylint: disable=invalid-name,too-few-public-methods +"""Data migration for `TrajectoryData` nodes where symbol lists are moved from repository array to attribute. + +This process has to be done in two separate consecutive migrations to prevent data loss in between. +""" +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals +from __future__ import absolute_import + +# Remove when https://github.com/PyCQA/pylint/issues/1931 is fixed +# pylint: disable=no-member,no-name-in-module,import-error +from django.db import migrations + +from aiida.backends.djsite.db.migrations import upgrade_schema_version +from . import ModelModifierV0025 + +REVISION = '1.0.26' +DOWN_REVISION = '1.0.25' + + +def create_trajectory_symbols_attribute(apps, _): + """Create the symbols attribute from the repository array for all `TrajectoryData` nodes.""" + from aiida.orm import load_node + + DbAttribute = apps.get_model('db', 'DbAttribute') + + modifier = ModelModifierV0025(DbAttribute) + + DbNode = apps.get_model('db', 'DbNode') + trajectories_pk = DbNode.objects.filter(type='node.data.array.trajectory.TrajectoryData.').values_list( + 'id', flat=True) + for t_pk in trajectories_pk: + trajectory = load_node(t_pk) + symbols = trajectory.get_array('symbols').tolist() + modifier.set_value_for_node(DbNode.objects.get(pk=trajectory.pk), 'symbols', symbols) + + +def delete_trajectory_symbols_attribute(apps, _): + """Delete the symbols attribute for all `TrajectoryData` nodes.""" + from aiida.orm import load_node + + DbAttribute = apps.get_model('db', 'DbAttribute') + + modifier = ModelModifierV0025(DbAttribute) + + DbNode = apps.get_model('db', 'DbNode') + trajectories_pk = DbNode.objects.filter(type='node.data.array.trajectory.TrajectoryData.').values_list( + 'id', flat=True) + for t_pk in trajectories_pk: + trajectory = load_node(t_pk) + modifier.del_value_for_node(DbNode.objects.get(pk=trajectory.pk), 'symbols') + + +class Migration(migrations.Migration): + """Storing symbols in TrajectoryData nodes as attributes, while keeping numpy arrays. + TrajectoryData symbols arrays are deleted in the next migration. + We split the migration into two because every migration is wrapped in an atomic transaction and we want to avoid + to delete the data while it is written in the database""" + + dependencies = [ + ('db', '0025_move_data_within_node_module'), + ] + + operations = [ + migrations.RunPython(create_trajectory_symbols_attribute, reverse_code=delete_trajectory_symbols_attribute), + upgrade_schema_version(REVISION, DOWN_REVISION) + ] diff --git a/aiida/backends/djsite/db/migrations/0027_delete_trajectory_symbols_array.py b/aiida/backends/djsite/db/migrations/0027_delete_trajectory_symbols_array.py new file mode 100644 index 0000000000..8c84c9d8a3 --- /dev/null +++ b/aiida/backends/djsite/db/migrations/0027_delete_trajectory_symbols_array.py @@ -0,0 +1,83 @@ +# -*- coding: utf-8 -*- +########################################################################### +# Copyright (c), The AiiDA team. All rights reserved. # +# This file is part of the AiiDA code. # +# # +# The code is hosted on GitHub at https://github.com/aiidateam/aiida_core # +# For further information on the license, see the LICENSE.txt file # +# For further information please visit http://www.aiida.net # +########################################################################### +# pylint: disable=invalid-name,too-few-public-methods +"""Data migration for `TrajectoryData` nodes where symbol lists are moved from repository array to attribute. + +This process has to be done in two separate consecutive migrations to prevent data loss in between. +""" +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals +from __future__ import absolute_import + +# Remove when https://github.com/PyCQA/pylint/issues/1931 is fixed +# pylint: disable=no-name-in-module,import-error +from django.db import migrations + +from aiida.backends.djsite.db.migrations import upgrade_schema_version +from . import ModelModifierV0025 + +REVISION = '1.0.27' +DOWN_REVISION = '1.0.26' + + +def delete_trajectory_symbols_array(apps, _): + """Delete the symbols array from all `TrajectoryData` nodes.""" + from aiida.orm import load_node + + DbAttribute = apps.get_model('db', 'DbAttribute') + + modifier = ModelModifierV0025(DbAttribute) + + DbNode = apps.get_model('db', 'DbNode') + trajectories_pk = DbNode.objects.filter(type='node.data.array.trajectory.TrajectoryData.').values_list( + 'id', flat=True) + for t_pk in trajectories_pk: + trajectory = load_node(t_pk) + modifier.del_value_for_node(DbNode.objects.get(pk=trajectory.pk), 'array|symbols') + # Remove the .npy file (using delete_array raises ModificationNotAllowed error) + trajectory._get_folder_pathsubfolder.remove_path('symbols.npy') # pylint: disable=protected-access + + +def create_trajectory_symbols_array(apps, _): + """Create the symbols array for all `TrajectoryData` nodes.""" + import numpy + import tempfile + from aiida.orm import load_node + + DbAttribute = apps.get_model('db', 'DbAttribute') + + modifier = ModelModifierV0025(DbAttribute) + + DbNode = apps.get_model('db', 'DbNode') + trajectories_pk = DbNode.objects.filter(type='node.data.array.trajectory.TrajectoryData.').values_list( + 'id', flat=True) + for t_pk in trajectories_pk: + trajectory = load_node(t_pk) + symbols = numpy.array(trajectory.get_attr('symbols')) + # Save the .npy file (using set_array raises ModificationNotAllowed error) + with tempfile.NamedTemporaryFile() as _file: + numpy.save(_file, symbols) + _file.flush() + trajectory._get_folder_pathsubfolder.insert_path(_file.name, 'symbols.npy') # pylint: disable=protected-access + modifier.set_value_for_node(DbNode.objects.get(pk=trajectory.pk), 'array|symbols', list(symbols.shape)) + + +class Migration(migrations.Migration): + """Deleting duplicated information stored in TrajectoryData symbols numpy arrays""" + + dependencies = [ + ('db', '0026_trajectory_symbols_to_attribute'), + ] + + operations = [ + migrations.RunPython(delete_trajectory_symbols_array, reverse_code=create_trajectory_symbols_array), + upgrade_schema_version(REVISION, DOWN_REVISION) + ] diff --git a/aiida/backends/djsite/db/migrations/__init__.py b/aiida/backends/djsite/db/migrations/__init__.py index 060d3a888f..0e6f56d9d2 100644 --- a/aiida/backends/djsite/db/migrations/__init__.py +++ b/aiida/backends/djsite/db/migrations/__init__.py @@ -10,8 +10,11 @@ from __future__ import division from __future__ import print_function from __future__ import absolute_import +from django.apps import apps -LATEST_MIGRATION = '0025_move_data_within_node_module' +import six + +LATEST_MIGRATION = '0027_delete_trajectory_symbols_array' def _update_schema_version(version, apps, schema_editor): @@ -36,3 +39,373 @@ def current_schema_version(): fromlist=['REVISION'] ) return latest_migration.REVISION + + +# Here I copied the class method definitions from aiida.backends.djsite.db.models +# used to set and delete values for nodes. +# This was done because: +# 1) The DbAttribute object loaded with apps.get_model() does not provide the class methods +# 2) When the django model changes the migration will continue to work +# 3) If we defined in the migration a new class with these methodds as an extension of the DbAttribute class, +# django detects a change in the model and creates a new migration + + +class ModelModifierV0025(object): + + from aiida.backends.utils import AIIDA_ATTRIBUTE_SEP + + _subspecifier_field_name = 'dbnode' + _sep = AIIDA_ATTRIBUTE_SEP + + def __init__(self, model_class): + self._model_class = model_class + + def validate_key(self, key): + """ + Validate the key string to check if it is valid (e.g., if it does not + contain the separator symbol.). + + :return: None if the key is valid + :raise ValidationError: if the key is not valid + """ + from aiida.backends.utils import validate_attribute_key + return validate_attribute_key(key) + + def set_value_for_node(self, dbnode, key, value, with_transaction=False, + stop_if_existing=False): + """ + This is the raw-level method that accesses the DB. No checks are done + to prevent the user from (re)setting a valid key. + To be used only internally. + + :todo: there may be some error on concurrent write; + not checked in this unlucky case! + + :param dbnode: the dbnode for which the attribute should be stored; + in an integer is passed, this is used as the PK of the dbnode, + without any further check (for speed reasons) + :param key: the key of the attribute to store; must be a level-zero + attribute (i.e., no separators in the key) + :param value: the value of the attribute to store + :param with_transaction: if True (default), do this within a transaction, + so that nothing gets stored if a subitem cannot be created. + Otherwise, if this parameter is False, no transaction management + is performed. + :param stop_if_existing: if True, it will stop with an + UniquenessError exception if the key already exists + for the given node. Otherwise, it will + first delete the old value, if existent. The use with True is + useful if you want to use a given attribute as a "locking" value, + e.g. to avoid to perform an action twice on the same node. + Note that, if you are using transactions, you may get the error + only when the transaction is committed. + + :raise ValueError: if the key contains the separator symbol used + internally to unpack dictionaries and lists (defined in cls._sep). + """ + cls = self._model_class + DbNode = apps.get_model('db', 'DbNode') + + if isinstance(dbnode, six.integer_types): + dbnode_node = DbNode(id=dbnode) + else: + dbnode_node = dbnode + + self.set_value(key, value, with_transaction=with_transaction, + subspecifier_value=dbnode_node, + stop_if_existing=stop_if_existing) + + def del_value_for_node(self, dbnode, key): + """ + Delete an attribute from the database for the given dbnode. + + :note: no exception is raised if no attribute with the given key is + found in the DB. + + :param dbnode: the dbnode for which you want to delete the key. + :param key: the key to delete. + """ + self.del_value(key, subspecifier_value=dbnode) + + def del_value(self, key, only_children=False, subspecifier_value=None): + """ + Delete a value associated with the given key (if existing). + + :note: No exceptions are raised if no entry is found. + + :param key: the key to delete. Can contain the separator self._sep if + you want to delete a subkey. + :param only_children: if True, delete only children and not the + entry itself. + :param subspecifier_value: must be None if this class has no + subspecifier set (e.g., the DbSetting class). + Must be the value of the subspecifier (e.g., the dbnode) for classes + that define it (e.g. DbAttribute and DbExtra) + """ + cls = self._model_class + from django.db.models import Q + + if self._subspecifier_field_name is None: + if subspecifier_value is not None: + raise ValueError("You cannot specify a subspecifier value for " + "class {} because it has no subspecifiers" + "".format(cls.__name__)) + subspecifiers_dict = {} + else: + if subspecifier_value is None: + raise ValueError("You also have to specify a subspecifier value " + "for class {} (the {})".format(self.__name__, + self._subspecifier_field_name)) + subspecifiers_dict = {self._subspecifier_field_name: + subspecifier_value} + + query = Q(key__startswith="{parentkey}{sep}".format( + parentkey=key, sep=self._sep), + **subspecifiers_dict) + + if not only_children: + query.add(Q(key=key, **subspecifiers_dict), Q.OR) + + cls.objects.filter(query).delete() + + def set_value(self, key, value, with_transaction=False, + subspecifier_value=None, other_attribs={}, + stop_if_existing=False): + """ + Set a new value in the DB, possibly associated to the given subspecifier. + + :note: This method also stored directly in the DB. + + :param key: a string with the key to create (must be a level-0 + attribute, that is it cannot contain the separator cls._sep). + :param value: the value to store (a basic data type or a list or a dict) + :param subspecifier_value: must be None if this class has no + subspecifier set (e.g., the DbSetting class). + Must be the value of the subspecifier (e.g., the dbnode) for classes + that define it (e.g. DbAttribute and DbExtra) + :param with_transaction: True if you want this function to be managed + with transactions. Set to False if you already have a manual + management of transactions in the block where you are calling this + function (useful for speed improvements to avoid recursive + transactions) + :param other_attribs: a dictionary of other parameters, to store + only on the level-zero attribute (e.g. for description in DbSetting). + :param stop_if_existing: if True, it will stop with an + UniquenessError exception if the new entry would violate an + uniqueness constraint in the DB (same key, or same key+node, + depending on the specific subclass). Otherwise, it will + first delete the old value, if existent. The use with True is + useful if you want to use a given attribute as a "locking" value, + e.g. to avoid to perform an action twice on the same node. + Note that, if you are using transactions, you may get the error + only when the transaction is committed. + """ + cls = self._model_class + from django.db import transaction + + self.validate_key(key) + + try: + if with_transaction: + sid = transaction.savepoint() + + # create_value returns a list of nodes to store + to_store = self.create_value(key, value, + subspecifier_value=subspecifier_value, + other_attribs=other_attribs) + + if to_store: + if not stop_if_existing: + # Delete the old values if stop_if_existing is False, + # otherwise don't delete them and hope they don't + # exist. If they exist, I'll get an UniquenessError + + # NOTE! Be careful in case the extra/attribute to + # store is not a simple attribute but a list or dict: + # like this, it should be ok because if we are + # overwriting an entry it will stop anyway to avoid + # to overwrite the main entry, but otherwise + # there is the risk that trailing pieces remain + # so in general it is good to recursively clean + # all sub-items. + self.del_value(key, + subspecifier_value=subspecifier_value) + cls.objects.bulk_create(to_store) + + if with_transaction: + transaction.savepoint_commit(sid) + except BaseException as exc: # All exceptions including CTRL+C, ... + from django.db.utils import IntegrityError + from aiida.common.exceptions import UniquenessError + + if with_transaction: + transaction.savepoint_rollback(sid) + if isinstance(exc, IntegrityError) and stop_if_existing: + raise UniquenessError("Impossible to create the required " + "entry " + "in table '{}', " + "another entry already exists and the creation would " + "violate an uniqueness constraint.\nFurther details: " + "{}".format(cls.__name__, exc)) + raise + + def create_value(self, key, value, subspecifier_value=None, + other_attribs={}): + """ + Create a new list of attributes, without storing them, associated + with the current key/value pair (and to the given subspecifier, + e.g. the DbNode for DbAttributes and DbExtras). + + :note: No hits are done on the DB, in particular no check is done + on the existence of the given nodes. + + :param key: a string with the key to create (can contain the + separator self._sep if this is a sub-attribute: indeed, this + function calls itself recursively) + :param value: the value to store (a basic data type or a list or a dict) + :param subspecifier_value: must be None if this class has no + subspecifier set (e.g., the DbSetting class). + Must be the value of the subspecifier (e.g., the dbnode) for classes + that define it (e.g. DbAttribute and DbExtra) + :param other_attribs: a dictionary of other parameters, to store + only on the level-zero attribute (e.g. for description in DbSetting). + + :return: always a list of class instances; it is the user + responsibility to store such entries (typically with a Django + bulk_create() call). + """ + cls = self._model_class + import datetime + + import aiida.common.json as json + from aiida.common.timezone import is_naive, make_aware, get_current_timezone + + if self._subspecifier_field_name is None: + if subspecifier_value is not None: + raise ValueError("You cannot specify a subspecifier value for " + "class {} because it has no subspecifiers" + "".format(cls.__name__)) + new_entry = cls(key=key, **other_attribs) + else: + if subspecifier_value is None: + raise ValueError("You also have to specify a subspecifier value " + "for class {} (the {})".format(cls.__name__, + self._subspecifier_field_name)) + further_params = other_attribs.copy() + further_params.update({self._subspecifier_field_name: + subspecifier_value}) + new_entry = cls(key=key, **further_params) + + list_to_return = [new_entry] + + if value is None: + new_entry.datatype = 'none' + new_entry.bval = None + new_entry.tval = '' + new_entry.ival = None + new_entry.fval = None + new_entry.dval = None + + elif isinstance(value, bool): + new_entry.datatype = 'bool' + new_entry.bval = value + new_entry.tval = '' + new_entry.ival = None + new_entry.fval = None + new_entry.dval = None + + elif isinstance(value, six.integer_types): + new_entry.datatype = 'int' + new_entry.ival = value + new_entry.tval = '' + new_entry.bval = None + new_entry.fval = None + new_entry.dval = None + + elif isinstance(value, float): + new_entry.datatype = 'float' + new_entry.fval = value + new_entry.tval = '' + new_entry.ival = None + new_entry.bval = None + new_entry.dval = None + + elif isinstance(value, six.string_types): + new_entry.datatype = 'txt' + new_entry.tval = value + new_entry.bval = None + new_entry.ival = None + new_entry.fval = None + new_entry.dval = None + + elif isinstance(value, datetime.datetime): + + # current timezone is taken from the settings file of django + if is_naive(value): + value_to_set = make_aware(value, get_current_timezone()) + else: + value_to_set = value + + new_entry.datatype = 'date' + # TODO: time-aware and time-naive datetime objects, see + # https://docs.djangoproject.com/en/dev/topics/i18n/timezones/#naive-and-aware-datetime-objects + new_entry.dval = value_to_set + new_entry.tval = '' + new_entry.bval = None + new_entry.ival = None + new_entry.fval = None + + elif isinstance(value, (list, tuple)): + + new_entry.datatype = 'list' + new_entry.dval = None + new_entry.tval = '' + new_entry.bval = None + new_entry.ival = len(value) + new_entry.fval = None + + for i, subv in enumerate(value): + # I do not need get_or_create here, because + # above I deleted all children (and I + # expect no concurrency) + # NOTE: I do not pass other_attribs + list_to_return.extend(self.create_value( + key=("{}{}{:d}".format(key, self._sep, i)), + value=subv, + subspecifier_value=subspecifier_value)) + + elif isinstance(value, dict): + + new_entry.datatype = 'dict' + new_entry.dval = None + new_entry.tval = '' + new_entry.bval = None + new_entry.ival = len(value) + new_entry.fval = None + + for subk, subv in value.items(): + self.validate_key(subk) + + # I do not need get_or_create here, because + # above I deleted all children (and I + # expect no concurrency) + # NOTE: I do not pass other_attribs + list_to_return.extend(self.create_value( + key="{}{}{}".format(key, self._sep, subk), + value=subv, + subspecifier_value=subspecifier_value)) + else: + try: + jsondata = json.dumps(value) + except TypeError: + raise ValueError( + "Unable to store the value: it must be either a basic datatype, or json-serializable: {}".format( + value)) + + new_entry.datatype = 'json' + new_entry.tval = jsondata + new_entry.bval = None + new_entry.ival = None + new_entry.fval = None + + return list_to_return diff --git a/aiida/backends/djsite/db/subtests/migrations.py b/aiida/backends/djsite/db/subtests/migrations.py index bcbf5a541f..23345f62d6 100644 --- a/aiida/backends/djsite/db/subtests/migrations.py +++ b/aiida/backends/djsite/db/subtests/migrations.py @@ -10,8 +10,10 @@ from __future__ import division from __future__ import print_function from __future__ import absolute_import + from six.moves import range +import numpy import tempfile from django.apps import apps @@ -587,3 +589,72 @@ def test_data_node_type_string(self): self.assertEqual(node_data.type, 'node.data.int.Int.') self.assertEqual(node_calc.type, 'node.process.calculation.calcjob.CalcJobNode.') + + +class TestTrajectoryDataMigration(TestMigrations): + + migrate_from = '0025_move_data_within_node_module' + migrate_to = '0027_delete_trajectory_symbols_array' + + # I create sample data + stepids = numpy.array([60, 70]) + times = stepids * 0.01 + positions = numpy.array([[[0., 0., 0.], [0.5, 0.5, 0.5], [1.5, 1.5, 1.5]], [[0., 0., 0.], [0.5, 0.5, 0.5], + [1.5, 1.5, 1.5]]]) + velocities = numpy.array([[[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]], [[0.5, 0.5, 0.5], [0.5, 0.5, 0.5], + [-0.5, -0.5, -0.5]]]) + cells = numpy.array([[[ + 2., + 0., + 0., + ], [ + 0., + 2., + 0., + ], [ + 0., + 0., + 2., + ]], [[ + 3., + 0., + 0., + ], [ + 0., + 3., + 0., + ], [ + 0., + 0., + 3., + ]]]) + + def setUpBeforeMigration(self): + from aiida.orm.node.data.array.trajectory import TrajectoryData + + # Create a TrajectoryData node + node = TrajectoryData() + symbols = numpy.array(['H', 'O', 'C']) + + # I set the node + node.set_array('steps', self.stepids) + node.set_array('cells', self.cells) + node.set_array('symbols', symbols) + node.set_array('positions', self.positions) + node.set_array('times', self.times) + node.set_array('velocities', self.velocities) + + # Reset validate to avoid raising of validation error according to the new TrajectoryData definition + node._validate = lambda: True + node.store() + + self.trajectory_pk = node.pk + + def test_trajectory_symbols(self): + from aiida.orm import load_node + trajectory = load_node(self.trajectory_pk) + self.assertSequenceEqual(trajectory.get_attr('symbols'), ['H', 'O', 'C']) + self.assertSequenceEqual(trajectory.get_array('velocities').tolist(), self.velocities.tolist()) + self.assertSequenceEqual(trajectory.get_array('positions').tolist(), self.positions.tolist()) + with self.assertRaises(KeyError): + trajectory.get_array('symbols') diff --git a/aiida/backends/djsite/manage.py b/aiida/backends/djsite/manage.py new file mode 100644 index 0000000000..09857a799c --- /dev/null +++ b/aiida/backends/djsite/manage.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +########################################################################### +# Copyright (c), The AiiDA team. All rights reserved. # +# This file is part of the AiiDA code. # +# # +# The code is hosted on GitHub at https://github.com/aiidateam/aiida_core # +# For further information on the license, see the LICENSE.txt file # +# For further information please visit http://www.aiida.net # +########################################################################### +from __future__ import division +from __future__ import print_function +from __future__ import absolute_import +import sys + + +if __name__ == "__main__": + from django.core.management import execute_from_command_line + + # Copy sys.argv + actual_argv = sys.argv[:] + + # Check if there is also a cmdline option is --aiida-profile=PROFILENAME + try: + first_cmdline_option = sys.argv[1] + except IndexError: + first_cmdline_option = None + + profile_name = None # Use the default profile if not specified + if first_cmdline_option is not None: + cmdprefix = "--aiida-profile=" + if first_cmdline_option.startswith(cmdprefix): + profile_name = first_cmdline_option[len(cmdprefix):] + # I remove the argument I just read + actual_argv = [actual_argv[0]] + actual_argv[2:] + + if actual_argv[1] == 'migrate': + # Perform the same loading procedure as the normal load_dbenv does + from aiida.backends import settings + settings.LOAD_DBENV_CALLED = True + # We load the needed profile. + # This is going to set global variables in settings, including + # settings.BACKEND + from aiida.backends.profile import load_profile, BACKEND_DJANGO + load_profile(profile=profile_name) + if settings.BACKEND != BACKEND_DJANGO: + from aiida.common.exceptions import InvalidOperation + raise InvalidOperation("A Django migration procedure is initiated " + "but a different backend is used!") + # We load the Django specific _load_dbenv_noschemacheck + # When there will be a need for SQLAlchemy for a schema migration, + # we may abstract thw _load_dbenv_noschemacheck and make a common + # one for both backends + from aiida.backends.djsite.utils import _load_dbenv_noschemacheck + _load_dbenv_noschemacheck(profile=profile_name) + else: + # Load the general load_dbenv. + from aiida.backends.utils import load_dbenv + load_dbenv(profile=profile_name) + + execute_from_command_line(actual_argv) diff --git a/aiida/backends/sqlalchemy/migrations/versions/12536798d4d3_trajectory_symbols_to_attribute.py b/aiida/backends/sqlalchemy/migrations/versions/12536798d4d3_trajectory_symbols_to_attribute.py new file mode 100644 index 0000000000..2240f0c56e --- /dev/null +++ b/aiida/backends/sqlalchemy/migrations/versions/12536798d4d3_trajectory_symbols_to_attribute.py @@ -0,0 +1,56 @@ +"""trajectory symbols to attribute + +Revision ID: 12536798d4d3 +Revises: 37f3d4882837 +Create Date: 2019-01-21 10:15:02.451308 + +""" +# pylint: disable=invalid-name +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals +from __future__ import absolute_import + +# Remove when https://github.com/PyCQA/pylint/issues/1931 is fixed +# pylint: disable=no-member,no-name-in-module,import-error + +from alembic import op +from sqlalchemy.orm.session import Session + +from aiida.backends.sqlalchemy.utils import flag_modified +from aiida.backends.sqlalchemy.models.node import DbNode +from aiida.orm import load_node + +# revision identifiers, used by Alembic. +revision = '12536798d4d3' +down_revision = '37f3d4882837' +branch_labels = None +depends_on = None + +# Here we duplicate the data stored in a TrajectoryData symbols array, storing it as an attribute. +# We delete the duplicates in the following migration (ce56d84bcc35) to avoid to delete data + + +def upgrade(): + """Migrations for the upgrade.""" + session = Session(bind=op.get_bind()) + trajectories = session.query(DbNode).filter_by(type='node.data.array.trajectory.TrajectoryData.').all() + for t in trajectories: + symbols = load_node(pk=t.id).get_array('symbols').tolist() + t.attributes['symbols'] = symbols + flag_modified(t, 'attributes') + session.add(t) + session.commit() + session.close() + + +def downgrade(): + """Migrations for the downgrade.""" + session = Session(bind=op.get_bind()) + trajectories = session.query(DbNode).filter_by(type='node.data.array.trajectory.TrajectoryData.').all() + for t in trajectories: + t.del_attr('symbols') + flag_modified(t, 'attributes') + session.add(t) + session.commit() + session.close() diff --git a/aiida/backends/sqlalchemy/migrations/versions/6a5c2ea1439d_move_data_within_node_module.py b/aiida/backends/sqlalchemy/migrations/versions/6a5c2ea1439d_move_data_within_node_module.py index 8fa7a5fdb2..19f4f289d4 100644 --- a/aiida/backends/sqlalchemy/migrations/versions/6a5c2ea1439d_move_data_within_node_module.py +++ b/aiida/backends/sqlalchemy/migrations/versions/6a5c2ea1439d_move_data_within_node_module.py @@ -1,7 +1,7 @@ """Data migration for `Data` nodes after it was moved in the `aiida.orm.node` module changing the type string. Revision ID: 6a5c2ea1439d -Revises: 041a79fc615f +Revises: 375c2db70663 Create Date: 2019-01-18 19:44:32.156083 """ diff --git a/aiida/backends/sqlalchemy/migrations/versions/ce56d84bcc35_delete_trajectory_symbols_array.py b/aiida/backends/sqlalchemy/migrations/versions/ce56d84bcc35_delete_trajectory_symbols_array.py new file mode 100644 index 0000000000..714497b3e9 --- /dev/null +++ b/aiida/backends/sqlalchemy/migrations/versions/ce56d84bcc35_delete_trajectory_symbols_array.py @@ -0,0 +1,63 @@ +"""delete trajectory symbols array + +Revision ID: ce56d84bcc35 +Revises: 12536798d4d3 +Create Date: 2019-01-21 15:35:07.280805 + +""" +# pylint: disable=invalid-name +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals +from __future__ import absolute_import + +# Remove when https://github.com/PyCQA/pylint/issues/1931 is fixed +# pylint: disable=no-member,no-name-in-module,import-error + +import numpy + +from alembic import op +from sqlalchemy.orm.session import Session + +from aiida.backends.sqlalchemy.models.node import DbNode +from aiida.backends.sqlalchemy.utils import flag_modified +from aiida.orm import load_node + +# revision identifiers, used by Alembic. +revision = 'ce56d84bcc35' +down_revision = '12536798d4d3' +branch_labels = None +depends_on = None + + +def upgrade(): + """Migrations for the upgrade.""" + session = Session(bind=op.get_bind()) + trajectories = session.query(DbNode).filter_by(type='node.data.array.trajectory.TrajectoryData.').all() + for t in trajectories: + del t.attributes['array|symbols'] + flag_modified(t, 'attributes') + # Remove the .npy file (using delete_array raises ModificationNotAllowed error) + load_node(pk=t.id)._get_folder_pathsubfolder.remove_path('symbols.npy') # pylint: disable=protected-access + session.add(t) + session.commit() + session.close() + + +def downgrade(): + """Migrations for the downgrade.""" + import tempfile + session = Session(bind=op.get_bind()) + trajectories = session.query(DbNode).filter_by(type='node.data.array.trajectory.TrajectoryData.').all() + for t in trajectories: + symbols = numpy.array(t.get_attr('symbols')) + # Save the .npy file (using set_array raises ModificationNotAllowed error) + with tempfile.NamedTemporaryFile() as _file: + numpy.save(_file, symbols) + _file.flush() + load_node(pk=t.id)._get_folder_pathsubfolder.insert_path(_file.name, 'symbols.npy') # pylint: disable=protected-access + t.attributes['array|symbols'] = list(symbols.shape) + flag_modified(t, 'attributes') + session.add(t) + session.commit() + session.close() diff --git a/aiida/backends/sqlalchemy/tests/migrations.py b/aiida/backends/sqlalchemy/tests/migrations.py index c7104c1d30..f49cb2802e 100644 --- a/aiida/backends/sqlalchemy/tests/migrations.py +++ b/aiida/backends/sqlalchemy/tests/migrations.py @@ -7,10 +7,8 @@ # For further information on the license, see the LICENSE.txt file # # For further information please visit http://www.aiida.net # ########################################################################### -""" -Tests for the migration engine (Alembic) as well as for the AiiDA migrations -for SQLAlchemy. -""" +# pylint: disable=too-many-lines +"""Tests for the migration engine (Alembic) as well as for the AiiDA migrations for SQLAlchemy.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -28,6 +26,7 @@ from aiida.backends.sqlalchemy.models.base import Base from aiida.backends.sqlalchemy.tests.utils import new_database from aiida.backends.testbase import AiidaTestCase +from aiida.orm import load_node class TestMigrationsSQLA(AiidaTestCase): @@ -603,6 +602,7 @@ def setUpBeforeMigration(self): unknown_exp_logs_no) # Getting the serialized legacy workflow logs + # yapf: disable leg_wf = session.query(DbLog).filter(DbLog.objpk == leg_workf.id).filter( DbLog.objname == 'aiida.workflows.user.topologicalworkflows.topo.TopologicalWorkflow' ).with_entities(*cols_to_project).one() @@ -902,3 +902,98 @@ def test_data_node_type_string(self): self.assertEqual(node_calc.type, 'node.process.calculation.calcjob.CalcJobNode.') finally: session.close() + + +class TestTrajectoryDataMigration(TestMigrationsSQLA): + """Test the migration of the symbols from numpy array to attribute for TrajectoryData nodes.""" + import numpy + + migrate_from = '37f3d4882837' # 37f3d4882837_make_all_uuid_columns_unique + migrate_to = 'ce56d84bcc35' # ce56d84bcc35_delete_trajectory_symbols_array + + # I create sample data + stepids = numpy.array([60, 70]) + times = stepids * 0.01 + positions = numpy.array([[[0., 0., 0.], [0.5, 0.5, 0.5], [1.5, 1.5, 1.5]], [[0., 0., 0.], [0.5, 0.5, 0.5], + [1.5, 1.5, 1.5]]]) + velocities = numpy.array([[[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]], [[0.5, 0.5, 0.5], [0.5, 0.5, 0.5], + [-0.5, -0.5, -0.5]]]) + cells = numpy.array([[[ + 2., + 0., + 0., + ], [ + 0., + 2., + 0., + ], [ + 0., + 0., + 2., + ]], [[ + 3., + 0., + 0., + ], [ + 0., + 3., + 0., + ], [ + 0., + 0., + 3., + ]]]) + + def setUpBeforeMigration(self): + from sqlalchemy.orm import Session # pylint: disable=import-error,no-name-in-module + + with sa.engine.begin() as connection: + try: + session = Session(connection.engine) + import numpy + from aiida.plugins.factory import DataFactory + + TrajectoryData = DataFactory('array.trajectory') # pylint: disable=invalid-name + symbols = numpy.array(['H', 'O', 'C']) + + # Create a TrajectoryData node + node = TrajectoryData() + + # I set the node + node.set_array('steps', self.stepids) + node.set_array('cells', self.cells) + node.set_array('symbols', symbols) + node.set_array('positions', self.positions) + node.set_array('times', self.times) + node.set_array('velocities', self.velocities) + + # Reset validate to avoid raising of validation error according to the new TrajectoryData definition + node._validate = lambda: True # pylint: disable=protected-access + + node.store() + + self.trajectory_pk = node.pk + finally: + session.close() + + def test_trajectory_symbols(self): + """Verify that migration of symbols from repository array to attribute works properly.""" + from sqlalchemy.orm import Session # pylint: disable=import-error,no-name-in-module + from aiida.backends.sqlalchemy.models.node import DbNode + + with sa.engine.begin() as connection: + try: + session = Session(connection.engine) + + trajectory = session.query(DbNode).filter(DbNode.id == self.trajectory_pk).one() + + self.assertSequenceEqual(trajectory.attributes['symbols'], ['H', 'O', 'C']) + self.assertSequenceEqual( + load_node(pk=trajectory.id).get_array('velocities').tolist(), self.velocities.tolist()) + self.assertSequenceEqual( + load_node(pk=trajectory.id).get_array('positions').tolist(), self.positions.tolist()) + with self.assertRaises(KeyError): + load_node(pk=trajectory.id).get_array('symbols') + + finally: + session.close() diff --git a/aiida/orm/node/data/array/trajectory.py b/aiida/orm/node/data/array/trajectory.py index 43620fb53c..29799cd49f 100644 --- a/aiida/orm/node/data/array/trajectory.py +++ b/aiida/orm/node/data/array/trajectory.py @@ -499,7 +499,7 @@ def _get_aiida_structure(self, store=False, **kwargs): def get_structure(self, store=False, **kwargs): """ - Creates :py:class:`aiida.orm.data.structure.StructureData`. + Creates :py:class:`aiida.orm.node.data.structure.StructureData`. .. versionadded:: 1.0 Renamed from _get_aiida_structure @@ -507,7 +507,7 @@ def get_structure(self, store=False, **kwargs): :param converter: specify the converter. Default 'ase'. :param store: If True, intermediate calculation gets stored in the AiiDA database for record. Default False. - :return: :py:class:`aiida.orm.data.structure.StructureData` node. + :return: :py:class:`aiida.orm.node.data.structure.StructureData` node. """ from aiida.orm.node.data.parameter import ParameterData @@ -528,7 +528,7 @@ def _get_cif(self, index=None, **kwargs): def get_cif(self, index=None, **kwargs): """ - Creates :py:class:`aiida.orm.data.cif.CifData` + Creates :py:class:`aiida.orm.node.data.cif.CifData` .. versionadded:: 1.0 Renamed from _get_cif diff --git a/aiida/orm/node/data/cif.py b/aiida/orm/node/data/cif.py index 6029ece3f5..54aaaeed36 100644 --- a/aiida/orm/node/data/cif.py +++ b/aiida/orm/node/data/cif.py @@ -899,7 +899,7 @@ def _get_aiida_structure(self, converter='pymatgen', store=False, **kwargs): def get_structure(self, converter='pymatgen', store=False, **kwargs): """ - Creates :py:class:`aiida.orm.data.structure.StructureData`. + Creates :py:class:`aiida.orm.node.data.structure.StructureData`. .. versionadded:: 1.0 Renamed from _get_aiida_structure @@ -913,7 +913,7 @@ def get_structure(self, converter='pymatgen', store=False, **kwargs): the occupancies will be scaled down to 1. (pymatgen only) :param site_tolerance: This tolerance is used to determine if two sites are sitting in the same position, in which case they will be combined to a single disordered site. Defaults to 1e-4. (pymatgen only) - :return: :py:class:`aiida.orm.data.structure.StructureData` node. + :return: :py:class:`aiida.orm.node.data.structure.StructureData` node. """ from . import cif # pylint: disable=import-self from aiida.orm.node.data.parameter import ParameterData diff --git a/aiida/orm/node/data/structure.py b/aiida/orm/node/data/structure.py index db7b310ea4..d3b0c5765a 100644 --- a/aiida/orm/node/data/structure.py +++ b/aiida/orm/node/data/structure.py @@ -1811,7 +1811,7 @@ def _get_cif(self, converter='ase', store=False, **kwargs): def get_cif(self, converter='ase', store=False, **kwargs): """ - Creates :py:class:`aiida.orm.data.cif.CifData`. + Creates :py:class:`aiida.orm.node.data.cif.CifData`. .. versionadded:: 1.0 Renamed from _get_cif @@ -1819,7 +1819,7 @@ def get_cif(self, converter='ase', store=False, **kwargs): :param converter: specify the converter. Default 'ase'. :param store: If True, intermediate calculation gets stored in the AiiDA database for record. Default False. - :return: :py:class:`aiida.orm.data.cif.CifData` node. + :return: :py:class:`aiida.orm.node.data.cif.CifData` node. """ from .parameter import ParameterData from . import structure # This same module diff --git a/docs/source/concepts/workflows.rst b/docs/source/concepts/workflows.rst index 9b1fc64c07..3767f2c4e7 100644 --- a/docs/source/concepts/workflows.rst +++ b/docs/source/concepts/workflows.rst @@ -922,6 +922,6 @@ However, these workchains can be updated with just a few minor updates that we w * The method ``RemoteData.is_empty()`` has been changes and is now accessed through the ``RemoteData.is_empty``. * The method ``.is_alloy()`` for classes ``StructureData`` and ``Kind`` is now accessed through the ``.is_alloy`` property. * The method ``.has_vacancies()`` for classes ``StructureData`` and ``Kind`` is now accessed through the ``.has_vacancies`` property. -* The arguments ``stepids`` and ``cells`` of the :meth:`TrajectoryData.set_trajectory()` method are made optional +* The arguments ``stepids`` and ``cells`` of the :meth:`TrajectoryData.set_trajectory()` method are made optional which has implications on the ordering of the arguments passed to this method. * The list of atomic symbols for trajectories is no longer stored as array data but is now accessible through the ``TrajectoryData.symbols`` attribute. diff --git a/docs/source/developer_guide/core/modifying_the_schema.rst b/docs/source/developer_guide/core/modifying_the_schema.rst index 0e74913663..7417f0a804 100644 --- a/docs/source/developer_guide/core/modifying_the_schema.rst +++ b/docs/source/developer_guide/core/modifying_the_schema.rst @@ -1,4 +1,4 @@ -Mofidying the schema +Modifying the schema ++++++++++++++++++++ Django