Skip to content

Commit

Permalink
Update pyyaml to prevent arbitrary code execution (#3675)
Browse files Browse the repository at this point in the history
* Fix various deprecation warnings

 * Do not use `Test` as prefix for dummy classes
 * Replace `imp` for `importlib` in REST API
 * Use `identifier` instead of deprecated `node_class`

* Update `pyyaml` to prevent arbitrary code execution

Before `pyyaml==5.1` the `yaml.load` function was vulnerable to
arbitrary code execution, because it loaded the full set of YAML. There
was an alternative `safe_load` but this was not the default and could
only load a sub set of the markup language. The new version of pyyaml
deprecates the old vulnerable code and provides the `FullLoader` that
can load the full set without being vulnerable.
  • Loading branch information
sphuber authored and ltalirz committed Dec 16, 2019
1 parent a73e4b6 commit 999ae3a
Show file tree
Hide file tree
Showing 12 changed files with 48 additions and 43 deletions.
42 changes: 21 additions & 21 deletions aiida/backends/tests/cmdline/commands/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from aiida.orm import Group, ArrayData, BandsData, KpointsData, CifData, Dict, RemoteData, StructureData, TrajectoryData


class TestVerdiDataExportable:
class DummyVerdiDataExportable:
"""Test exportable data objects."""

NODE_ID_STR = 'node_id'
Expand Down Expand Up @@ -98,7 +98,7 @@ def data_export_test(self, datatype, ids, supported_formats):
shutil.rmtree(tmpd)


class TestVerdiDataListable:
class DummyVerdiDataListable:
"""Test listable data objects."""

NODE_ID_STR = 'node_id'
Expand Down Expand Up @@ -240,7 +240,7 @@ def test_arrayshow(self):
self.assertEqual(res.exit_code, 0, 'The command did not finish correctly')


class TestVerdiDataBands(AiidaTestCase, TestVerdiDataListable):
class TestVerdiDataBands(AiidaTestCase, DummyVerdiDataListable):
"""Testing verdi data bands."""

@staticmethod
Expand Down Expand Up @@ -298,9 +298,9 @@ def connect_structure_bands(strct): # pylint: disable=unused-argument
g_e.store()

return {
TestVerdiDataListable.NODE_ID_STR: bands.id,
TestVerdiDataListable.NON_EMPTY_GROUP_ID_STR: g_ne.id,
TestVerdiDataListable.EMPTY_GROUP_ID_STR: g_e.id
DummyVerdiDataListable.NODE_ID_STR: bands.id,
DummyVerdiDataListable.NON_EMPTY_GROUP_ID_STR: g_ne.id,
DummyVerdiDataListable.EMPTY_GROUP_ID_STR: g_e.id
}

@classmethod
Expand All @@ -327,7 +327,7 @@ def test_bandexporthelp(self):
self.assertIn(b'Usage:', output, 'Sub-command verdi data bands export --help failed.')

def test_bandsexport(self):
options = [str(self.ids[TestVerdiDataListable.NODE_ID_STR])]
options = [str(self.ids[DummyVerdiDataListable.NODE_ID_STR])]
res = self.cli_runner.invoke(cmd_bands.bands_export, options, catch_exceptions=False)
self.assertEqual(res.exit_code, 0, 'The command did not finish correctly')
self.assertIn(b'[1.0, 3.0]', res.stdout_bytes, 'The string [1.0, 3.0] was not found in the bands' 'export')
Expand Down Expand Up @@ -428,7 +428,7 @@ def test_remotecat(self):
)


class TestVerdiDataTrajectory(AiidaTestCase, TestVerdiDataListable, TestVerdiDataExportable):
class TestVerdiDataTrajectory(AiidaTestCase, DummyVerdiDataListable, DummyVerdiDataExportable):
"""Test verdi data trajectory."""

@staticmethod
Expand Down Expand Up @@ -487,9 +487,9 @@ def create_trajectory_data():
g_e.store()

return {
TestVerdiDataListable.NODE_ID_STR: traj.id,
TestVerdiDataListable.NON_EMPTY_GROUP_ID_STR: g_ne.id,
TestVerdiDataListable.EMPTY_GROUP_ID_STR: g_e.id
DummyVerdiDataListable.NODE_ID_STR: traj.id,
DummyVerdiDataListable.NON_EMPTY_GROUP_ID_STR: g_ne.id,
DummyVerdiDataListable.EMPTY_GROUP_ID_STR: g_e.id
}

@classmethod
Expand All @@ -516,15 +516,15 @@ def test_showhelp(self):
)

def test_list(self):
self.data_listing_test(TrajectoryData, str(self.ids[TestVerdiDataListable.NODE_ID_STR]), self.ids)
self.data_listing_test(TrajectoryData, str(self.ids[DummyVerdiDataListable.NODE_ID_STR]), self.ids)

@unittest.skipUnless(has_pycifrw(), 'Unable to import PyCifRW')
def test_export(self):
new_supported_formats = list(cmd_trajectory.EXPORT_FORMATS)
self.data_export_test(TrajectoryData, self.ids, new_supported_formats)


class TestVerdiDataStructure(AiidaTestCase, TestVerdiDataListable, TestVerdiDataExportable):
class TestVerdiDataStructure(AiidaTestCase, DummyVerdiDataListable, DummyVerdiDataExportable):
"""Test verdi data structure."""
from aiida.orm.nodes.data.structure import has_ase

Expand Down Expand Up @@ -568,9 +568,9 @@ def create_structure_data():
g_e.store()

return {
TestVerdiDataListable.NODE_ID_STR: struc.id,
TestVerdiDataListable.NON_EMPTY_GROUP_ID_STR: g_ne.id,
TestVerdiDataListable.EMPTY_GROUP_ID_STR: g_e.id
DummyVerdiDataListable.NODE_ID_STR: struc.id,
DummyVerdiDataListable.NON_EMPTY_GROUP_ID_STR: g_ne.id,
DummyVerdiDataListable.EMPTY_GROUP_ID_STR: g_e.id
}

@classmethod
Expand Down Expand Up @@ -707,7 +707,7 @@ def test_export(self):


@unittest.skipUnless(has_pycifrw(), 'Unable to import PyCifRW')
class TestVerdiDataCif(AiidaTestCase, TestVerdiDataListable, TestVerdiDataExportable):
class TestVerdiDataCif(AiidaTestCase, DummyVerdiDataListable, DummyVerdiDataExportable):
"""Test verdi data cif."""
valid_sample_cif_str = '''
data_test
Expand Down Expand Up @@ -749,9 +749,9 @@ def create_cif_data(cls):
cls.cif = a_cif

return {
TestVerdiDataListable.NODE_ID_STR: a_cif.id,
TestVerdiDataListable.NON_EMPTY_GROUP_ID_STR: g_ne.id,
TestVerdiDataListable.EMPTY_GROUP_ID_STR: g_e.id
DummyVerdiDataListable.NODE_ID_STR: a_cif.id,
DummyVerdiDataListable.NON_EMPTY_GROUP_ID_STR: g_ne.id,
DummyVerdiDataListable.EMPTY_GROUP_ID_STR: g_e.id
}

@classmethod
Expand Down Expand Up @@ -822,7 +822,7 @@ def test_export(self):
self.data_export_test(CifData, self.ids, cmd_cif.EXPORT_FORMATS)


class TestVerdiDataSinglefile(AiidaTestCase, TestVerdiDataListable, TestVerdiDataExportable):
class TestVerdiDataSinglefile(AiidaTestCase, DummyVerdiDataListable, DummyVerdiDataExportable):
"""Test verdi data singlefile."""
sample_str = '''
data_test
Expand Down
2 changes: 1 addition & 1 deletion aiida/backends/tests/engine/test_process_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def test_process_builder_set_attributes(self):
self.assertEqual(self.builder.metadata.description, description)

def test_dynamic_setters(self):
"""Verify that the attributes of the TestWorkChain can be set but defaults are not there."""
"""Verify that the attributes of the DummyWorkChain can be set but defaults are not there."""
self.builder_workchain.dynamic.namespace = self.inputs['dynamic']['namespace']
self.builder_workchain.name.spaced = self.inputs['name']['spaced']
self.builder_workchain.name_spaced = self.inputs['name_spaced']
Expand Down
2 changes: 1 addition & 1 deletion aiida/backends/tests/engine/test_workfunctions.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def test_workfunction_caching(self):
_ = self.test_workfunction(self.default_int)

# Caching should always be disabled for a WorkFunctionNode
with enable_caching(WorkFunctionNode):
with enable_caching(identifier=WorkFunctionNode):
_, cached = self.test_workfunction.run_get_node(self.default_int)
self.assertFalse(cached.is_created_from_cache)

Expand Down
5 changes: 2 additions & 3 deletions aiida/cmdline/params/options/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,16 @@
.. py:module::config
:synopsis: Convenience class for configuration file option
"""

import yaml
import click_config_file
import yaml

from .overridable import OverridableOption


def yaml_config_file_provider(file_path, cmd_name): # pylint: disable=unused-argument
"""Read yaml config file."""
with open(file_path, 'r') as handle:
return yaml.load(handle)
return yaml.safe_load(handle)


class ConfigFileOption(OverridableOption):
Expand Down
3 changes: 1 addition & 2 deletions aiida/manage/caching.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
# For further information please visit http://www.aiida.net #
###########################################################################
"""Definition of caching mechanism and configuration for calculations."""

import os
import copy
import warnings
Expand Down Expand Up @@ -55,7 +54,7 @@ def _get_config(config_file):

try:
with open(config_file, 'r', encoding='utf8') as handle:
config = yaml.load(handle)[profile.name]
config = yaml.safe_load(handle)[profile.name]
except (OSError, IOError, KeyError):
# No config file, or no config for this profile
return DEFAULT_CONFIG
Expand Down
13 changes: 10 additions & 3 deletions aiida/orm/utils/serialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
checkpoints and messages in the RabbitMQ queue so do so with caution. It is fine to add representers
for new types though.
"""

from functools import partial
import yaml

Expand Down Expand Up @@ -177,8 +176,14 @@ def represent_data(self, data):
return super().represent_data(data)


class AiiDALoader(yaml.Loader):
"""AiiDA specific yaml loader"""
class AiiDALoader(yaml.FullLoader):
"""AiiDA specific yaml loader
.. note:: we subclass the `FullLoader` which is the one that since `pyyaml>=5.1` is the loader that prevents
arbitrary code execution. Even though this is in principle only used internally, one could imagine someone
sharing a database with a maliciously crafted process instance dump, which when reloaded could execute arbitrary
code. This load prevents this: https://github.com/yaml/pyyaml/wiki/PyYAML-yaml.load(input)-Deprecation
"""


yaml.add_representer(Bundle, represent_bundle, Dumper=AiiDADumper)
Expand Down Expand Up @@ -217,6 +222,8 @@ def serialize(data, encoding=None):
def deserialize(serialized):
"""Deserialize a yaml dump that represents a serialized data structure.
.. note:: no need to use `yaml.safe_load` here because the `Loader` will ensure that loading is safe.
:param serialized: a yaml serialized string representation
:return: the deserialized data structure
"""
Expand Down
7 changes: 4 additions & 3 deletions aiida/restapi/run_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@
"""
It defines the method with all required parameters to run restapi locally.
"""

import imp
import importlib
import os

from flask_cors import CORS
Expand Down Expand Up @@ -53,7 +52,9 @@ def run_api(flask_app, flask_api, **kwargs):
hookup = kwargs['hookup']

# Import the right configuration file
confs = imp.load_source(os.path.join(config, 'config'), os.path.join(config, 'config.py'))
spec = importlib.util.spec_from_file_location(os.path.join(config, 'config'), os.path.join(config, 'config.py'))
confs = importlib.util.module_from_spec(spec)
spec.loader.exec_module(confs)

# Instantiate an app
app_kwargs = dict(catch_internal_server=catch_internal_server)
Expand Down
9 changes: 3 additions & 6 deletions aiida/restapi/translator/nodes/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,10 @@
# For further information please visit http://www.aiida.net #
###########################################################################
"""Translator for node"""


from aiida.common.exceptions import InputValidationError, ValidationError, \
InvalidOperation
from aiida.restapi.translator.base import BaseTranslator
from aiida.manage.manager import get_manager
from aiida import orm
from aiida.common.exceptions import InputValidationError, ValidationError, InvalidOperation
from aiida.manage.manager import get_manager
from aiida.restapi.translator.base import BaseTranslator


class NodeTranslator(BaseTranslator):
Expand Down
2 changes: 1 addition & 1 deletion docs/requirements_for_rtd.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ pytest~=5.3
python-dateutil~=2.8
python-memcached~=1.59
pytz~=2019.3
pyyaml~=3.13
pyyaml~=5.1
reentry~=1.3
seekpath~=1.9,>=1.9.3
simplejson~=3.16
Expand Down
2 changes: 2 additions & 0 deletions docs/source/nitpick-exceptions
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,8 @@ py:class yaml.Dumper
py:class yaml.Loader
py:class yaml.dumper.Dumper
py:class yaml.loader.Loader
py:class yaml.FullLoader
py:class yaml.loader.FullLoader

py:class uuid.UUID

Expand Down
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ dependencies:
- psycopg2~=2.8,>=2.8.3
- python-dateutil~=2.8
- pytz~=2019.3
- pyyaml~=3.13
- pyyaml~=5.1
- reentry~=1.3
- simplejson~=3.16
- sqlalchemy-utils~=0.34.2
Expand Down
2 changes: 1 addition & 1 deletion setup.json
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
"pyblake2~=1.1; python_version<'3.6'",
"python-dateutil~=2.8",
"pytz~=2019.3",
"pyyaml~=3.13",
"pyyaml~=5.1",
"reentry~=1.3",
"simplejson~=3.16",
"sqlalchemy-utils~=0.34.2",
Expand Down

0 comments on commit 999ae3a

Please sign in to comment.