From 9378c8f8fc453a0ceb5556a37a26f41e913a9170 Mon Sep 17 00:00:00 2001 From: Erdal Karaca Date: Sat, 28 May 2022 10:30:06 +0200 Subject: [PATCH 1/5] new BIDSLayoutV2 interface to be used in parallel with legacy BIDSLayout --- bids/layout/__init__.py | 8 + bids/layout/layout_v2.py | 364 +++++++++++++++ bids/layout/tests/test_layout_v2.py | 668 ++++++++++++++++++++++++++++ 3 files changed, 1040 insertions(+) create mode 100644 bids/layout/layout_v2.py create mode 100644 bids/layout/tests/test_layout_v2.py diff --git a/bids/layout/__init__.py b/bids/layout/__init__.py index 1df026057..9d5970913 100644 --- a/bids/layout/__init__.py +++ b/bids/layout/__init__.py @@ -1,3 +1,5 @@ +import warnings + from .layout import BIDSLayout, Query from .models import (BIDSFile, BIDSImageFile, BIDSDataFile, BIDSJSONFile, Config, Entity, Tag) @@ -21,3 +23,9 @@ "Tag", "Query" ] + +try: + from .layout_v2 import BIDSLayoutV2 + __all__ += ["BIDSLayoutV2"] +except: + warnings.warn("Could not load BIDSLayoutV2: make sure you installed the ancpBIDS package") diff --git a/bids/layout/layout_v2.py b/bids/layout/layout_v2.py new file mode 100644 index 000000000..a7d7ef5a0 --- /dev/null +++ b/bids/layout/layout_v2.py @@ -0,0 +1,364 @@ +import difflib +import os.path +from collections import OrderedDict +from functools import partial +from pathlib import Path +from typing import List, Union, Dict + +from .utils import BIDSMetadata +from ..exceptions import ( + BIDSEntityError, + BIDSValidationError, + NoMatchError, + TargetError, +) + +from ancpbids import CustomOpExpr, EntityExpr, AllExpr, ValidationPlugin, load_dataset, validate_dataset, \ + write_derivative +from ancpbids.query import query, query_entities, FnMatchExpr, AnyExpr +from ancpbids.utils import deepupdate, resolve_segments, convert_to_relative + +__all__ = ['BIDSLayoutV2'] + +class BIDSLayoutMRIMixin: + def get_tr(self, derivatives=False, **entities): + + """Return the scanning repetition time (TR) for one or more runs. + + Parameters + ---------- + derivatives : bool + If True, also checks derivatives images. + filters : dict + Optional keywords used to constrain the selected runs. + Can be any arguments valid for a .get call (e.g., BIDS entities + or JSON sidecar keys). + + Returns + ------- + float + A single float. + + Notes + ----- + Raises an exception if more than one unique TR is found. + """ + # Constrain search to functional images + scope = 'all' if derivatives else 'raw' + images = self.get(scope=scope, extension=['.nii', '.nii.gz'], suffix='bold', **entities) + if not images: + raise NoMatchError("No functional images that match criteria found.") + + all_trs = set() + for img in images: + md = img.get_metadata() + all_trs.add(round(float(md['RepetitionTime']), 5)) + + if len(all_trs) > 1: + raise NoMatchError("Unique TR cannot be found given filters {!r}" + .format(entities)) + return all_trs.pop() + +class BIDSLayoutV2(BIDSLayoutMRIMixin): + """A convenience class to provide access to an in-memory representation of a BIDS dataset. + + .. code-block:: + + dataset_path = 'path/to/your/dataset' + layout = BIDSLayout(dataset_path) + + Parameters + ---------- + ds_dir: + the (absolute) path to the dataset to load + """ + + def __init__(self, ds_dir: Union[str, Path], validate=True, **kwargs): + if isinstance(ds_dir, Path): + ds_dir = ds_dir.absolute() + self.dataset = load_dataset(ds_dir) + self.schema = self.dataset.get_schema() + self.validationReport = None + if validate: + self.validationReport = self.validate() + if self.validationReport.has_errors(): + error_message = os.linesep.join(map(lambda error: error['message'], self.validationReport.get_errors())) + raise BIDSValidationError(error_message) + + def __getattr__(self, key): + # replace arbitrary get functions with calls to get + if key.startswith("get_"): + return partial(self.get, "id", key[4:]) + + # give up if the above don't work + raise AttributeError(key) + + def get_metadata(self, path, include_entities=False, scope='all'): + """Return metadata found in JSON sidecars for the specified file. + + Parameters + ---------- + path : str + Path to the file to get metadata for. + include_entities : bool, optional + If True, all available entities extracted + from the filename (rather than JSON sidecars) are included in + the returned metadata dictionary. + scope : str or list, optional + The scope of the search space. Each element must + be one of 'all', 'raw', 'self', 'derivatives', or a + BIDS-Derivatives pipeline name. Defaults to searching all + available datasets. + + Returns + ------- + dict + A dictionary of key/value pairs extracted from all of the + target file's associated JSON sidecars. + + Notes + ----- + A dictionary containing metadata extracted from all matching .json + files is returned. In cases where the same key is found in multiple + files, the values in files closer to the input filename will take + precedence, per the inheritance rules in the BIDS specification. + + """ + path = convert_to_relative(self.dataset, path) + file = self.dataset.get_file(path) + md = file.get_metadata() + if md and include_entities: + schema_entities = {e.entity_: e.literal_ for e in list(self.schema.EntityEnum)} + md.update({schema_entities[e.key]: e.value for e in file.entities}) + bmd = BIDSMetadata(file.get_absolute_path()) + bmd.update(md) + return bmd + + def get(self, return_type: str = 'object', target: str = None, scope: str = None, + extension: Union[str, List[str]] = None, suffix: Union[str, List[str]] = None, + regex_search=False, + **entities) -> Union[List[str], List[object]]: + """Depending on the return_type value returns either paths to files that matched the filtering criteria + or :class:`Artifact ` objects for further processing by the caller. + + Note that all provided filter criteria are AND combined, i.e. subj='02',task='lang' will match files containing + '02' as a subject AND 'lang' as a task. If you provide a list of values for a criteria, they will be OR combined. + + .. code-block:: + + file_paths = layout.get(subj='02', task='lang', suffix='bold', return_type='files') + + file_paths = layout.get(subj=['02', '03'], task='lang', return_type='files') + + Parameters + ---------- + return_type: + Either 'files' to return paths of matched files + or 'object' to return :class:`Artifact ` object, defaults to 'object' + + target: + Either `suffixes`, `extensions` or one of any valid BIDS entities key + (see :class:`EntityEnum `, defaults to `None` + scope: + a hint where to search for files + If passed, only nodes/directories that match the specified scope will be + searched. Possible values include: + 'all' (default): search all available directories. + 'derivatives': search all derivatives directories. + 'raw': search only BIDS-Raw directories. + 'self': search only the directly called BIDSLayout. + : the name of a BIDS-Derivatives pipeline. + extension: + criterion to match any files containing the provided extension only + suffix: + criterion to match any files containing the provided suffix only + entities + a list of key-values to match the entities of interest, example: subj='02',task='lang' + + Returns + ------- + depending on the return_type value either paths to files that matched the filtering criteria + or Artifact objects for further processing by the caller + """ + # Provide some suggestions if target is specified and invalid. + self_entities = self.get_entities() + if target is not None and target not in self_entities: + potential = list(self_entities.keys()) + suggestions = difflib.get_close_matches(target, potential) + if suggestions: + message = "Did you mean one of: {}?".format(suggestions) + else: + message = "Valid targets are: {}".format(potential) + raise TargetError(("Unknown target '{}'. " + message) + .format(target)) + folder = self.dataset + return query(folder, return_type, target, scope, extension, suffix, regex_search, **entities) + + @property + def entities(self): + return self.get_entities() + + def get_entities(self, scope: str = None, sort: bool = False) -> dict: + """Returns a unique set of entities found within the dataset as a dict. + Each key of the resulting dict contains a list of values (with at least one element). + + Example dict: + .. code-block:: + + { + 'sub': ['01', '02', '03'], + 'task': ['gamblestask'] + } + + Parameters + ---------- + scope: + see BIDSLayout.get() + sort: default is `False` + whether to sort the keys by name + + Returns + ------- + dict + a unique set of entities found within the dataset as a dict + """ + return query_entities(self.dataset, scope, sort) + + def get_dataset_description(self, scope='self', all_=False) -> Union[List[Dict], Dict]: + """Return contents of dataset_description.json. + + Parameters + ---------- + scope : str + The scope of the search space. Only descriptions of + BIDSLayouts that match the specified scope will be returned. + See :obj:`bids.layout.BIDSLayout.get` docstring for valid values. + Defaults to 'self' --i.e., returns the dataset_description.json + file for only the directly-called BIDSLayout. + all_ : bool + If True, returns a list containing descriptions for + all matching layouts. If False (default), returns for only the + first matching layout. + + Returns + ------- + dict or list of dict + a dictionary or list of dictionaries (depending on all_). + """ + all_descriptions = self.dataset.select(self.schema.DatasetDescriptionFile).objects(as_list=True) + if all_: + return all_descriptions + return all_descriptions[0] if all_descriptions else None + + def get_dataset(self) -> object: + """ + Returns + ------- + the in-memory representation of this layout/dataset + """ + return self.dataset + + def add_derivatives(self, path): + path = convert_to_relative(self.dataset, path) + self.dataset.create_derivative(path=path) + + def write_derivative(self, derivative): + """Writes the provided derivative folder to the dataset. + Note that a 'derivatives' folder will be created if not present. + + Parameters + ---------- + derivative: + the derivative folder to write + """ + assert isinstance(derivative, self.schema.DerivativeFolder) + write_derivative(self.dataset, derivative) + + def validate(self) -> ValidationPlugin.ValidationReport: + """Validates a dataset and returns a report object containing any detected validation errors. + + Example + ---------- + + .. code-block:: + + report = layout.validate() + for message in report.messages: + print(message) + if report.has_errors(): + raise "The dataset contains validation errors, cannot continue". + + Returns + ------- + a report object containing any detected validation errors or warning + """ + return validate_dataset(self.dataset) + + @property + def files(self): + return self.get_files() + + def get_files(self, scope='all'): + """Get BIDSFiles for all layouts in the specified scope. + + Parameters + ---------- + scope : str + The scope of the search space. Indicates which + BIDSLayouts' entities to extract. + See :obj:`bids.layout.BIDSLayout.get` docstring for valid values. + + + Returns: + A dict, where keys are file paths and values + are :obj:`bids.layout.BIDSFile` instances. + + """ + all_files = self.get(return_type="object", scope=scope) + files = {file.get_absolute_path(): file for file in all_files} + return files + + def get_file(self, filename, scope='all'): + """Return the BIDSFile object with the specified path. + + Parameters + ---------- + filename : str + The path of the file to retrieve. Must be either an absolute path, + or relative to the root of this BIDSLayout. + scope : str or list, optional + Scope of the search space. If passed, only BIDSLayouts that match + the specified scope will be searched. See :obj:`BIDSLayout.get` + docstring for valid values. Default is 'all'. + + Returns + ------- + :obj:`bids.layout.BIDSFile` or None + File found, or None if no match was found. + """ + context = self.dataset + filename = convert_to_relative(self.dataset, filename) + if scope and scope not in ['all', 'raw', 'self']: + context, _ = resolve_segments(context, scope) + return context.get_file(filename) + + @property + def description(self): + return self.get_dataset_description() + + @property + def root(self): + return self.dataset.base_dir_ + + def __repr__(self): + """Provide a tidy summary of key properties.""" + ents = self.get_entities() + n_subjects = len(set(ents['sub'])) if 'sub' in ents else 0 + n_sessions = len(set(ents['ses'])) if 'ses' in ents else 0 + n_runs = len(set(ents['run'])) if 'run' in ents else 0 + s = ("BIDS Layout: ...{} | Subjects: {} | Sessions: {} | " + "Runs: {}".format(self.dataset.base_dir_, n_subjects, n_sessions, n_runs)) + return s + + + diff --git a/bids/layout/tests/test_layout_v2.py b/bids/layout/tests/test_layout_v2.py new file mode 100644 index 000000000..c2f6d6a81 --- /dev/null +++ b/bids/layout/tests/test_layout_v2.py @@ -0,0 +1,668 @@ +""" Tests of functionality in the layout module--mostly related to the +BIDSLayout class.""" + +import json +import os +import re +import shutil +from os.path import join, abspath, basename +from pathlib import Path + +import numpy as np +import pytest + +from bids.exceptions import ( + BIDSDerivativesValidationError, + BIDSValidationError, + NoMatchError, + TargetError, +) +from bids.layout import BIDSLayoutV2 as BIDSLayout, Query +from bids.layout.index import BIDSLayoutIndexer +from bids.layout.models import Config +from bids.layout.utils import PaddedInt +from bids.tests import get_test_data_path +from bids.utils import natural_sort + + +# Fixture uses in the rest of the tests +@pytest.fixture(scope="module") +def layout_7t_trt(): + data_dir = join(get_test_data_path(), '7t_trt') + return BIDSLayout(data_dir) + + +@pytest.fixture(scope="module") +def layout_ds005_derivs(): + data_dir = join(get_test_data_path(), 'ds005') + layout = BIDSLayout(data_dir, validate=False) + return layout + +@pytest.fixture(scope="module") +def layout_ds117(): + data_dir = join(get_test_data_path(), 'ds000117') + return BIDSLayout(data_dir) + +def test_layout_init(layout_7t_trt): + assert isinstance(layout_7t_trt.files, dict) + + +@pytest.mark.parametrize( + 'index_metadata,query,result', + [ + (True, {}, 3.0), + (True, {}, 3.0), + (True, {'task': 'rest'}, 3.0), + (True, {'task': 'rest', 'extension': ['.nii.gz']}, 3.0), + (True, {'task': 'rest', 'extension': '.nii.gz'}, 3.0), + (True, {'task': 'rest', 'extension': ['.nii.gz', '.json'], 'return_type': 'file'}, 3.0), + ]) +def test_file_get_metadata(index_metadata, query, result, mock_config): + data_dir = join(get_test_data_path(), '7t_trt') + layout = BIDSLayout(data_dir, index_metadata=index_metadata, **query) + sample_file = layout.get(task='rest', extension='.nii.gz', + acquisition='fullbrain')[0] + metadata = sample_file.get_metadata() + assert metadata.get('RepetitionTime') == result + + +def test_layout_repr(layout_7t_trt): + assert "Subjects: 10 | Sessions: 2 | Runs: 2" in str(layout_7t_trt) + + +def test_invalid_dataset_description(tmp_path): + shutil.copytree(join(get_test_data_path(), '7t_trt'), tmp_path / "7t_dset") + (tmp_path / "7t_dset" / "dataset_description.json").write_text( + "I am not a valid json file" + ) + with pytest.raises(BIDSValidationError) as exc: + BIDSLayout(tmp_path / "7t_dset") + + +def test_layout_repr_overshadow_run(tmp_path): + """A test creating a layout to replicate #681.""" + shutil.copytree(join(get_test_data_path(), '7t_trt'), tmp_path / "7t_trt") + (tmp_path / "7t_trt" / "sub-01" / "ses-1" / "sub-01_ses-1_scans.json").write_text( + json.dumps({"run": {"Description": "metadata to cause #681"}}) + ) + assert "Subjects: 10 | Sessions: 2 | Runs: 2" in str(BIDSLayout(tmp_path / "7t_trt")) + + +def test_load_description(layout_7t_trt): + # Should not raise an error + assert hasattr(layout_7t_trt, 'description') + assert layout_7t_trt.description['Name'] == '7t_trt' + assert layout_7t_trt.description['BIDSVersion'] == "1.0.0rc3" + + +def test_get_file(layout_ds005_derivs): + layout = layout_ds005_derivs + + # relative path in BIDS-Raw + orig_file = 'sub-13/func/sub-13_task-mixedgamblestask_run-01_bold.nii.gz' + target = os.path.join(*orig_file.split('/')) + assert layout.get_file(target) + assert layout.get_file(target, scope='raw') + assert not layout.get_file(target, scope='derivatives') + + # absolute path in BIDS-Raw + target = os.path.join(layout.root, *orig_file.split('/')) + assert layout.get_file(target) + assert layout.get_file(target, scope='raw') + assert not layout.get_file(target, scope='derivatives') + + # relative path in derivatives pipeline + orig_file = 'events/sub-01/func/sub-01_task-mixedgamblestask_run-01_desc-extra_events.tsv' + target = os.path.join(*orig_file.split('/')) + assert not layout.get_file(target) + assert layout.get_file(target, scope='derivatives') + + # absolute path in derivatives pipeline + orig_file = 'sub-01/func/sub-01_task-mixedgamblestask_run-01_desc-extra_events.tsv' + target = os.path.join(*orig_file.split('/')) + assert not layout.get_file(target) + assert not layout.get_file(target, scope='derivatives') + assert layout.get_file(target, scope='derivatives/events') + + # No such file + assert not layout.get_file('bleargh') + assert not layout.get_file('/absolute/bleargh') + + +class TestDerivativeAsRoot: + def test_dataset_without_datasettype_parsed_as_raw(self): + dataset_path = Path("ds005_derivs", "format_errs", "no_dataset_type") + unvalidated = BIDSLayout( + Path(get_test_data_path()) / dataset_path, + validate=False + ) + assert len(unvalidated.get()) == 4 + with pytest.raises(ValueError): + unvalidated.get(desc="preproc") + + validated = BIDSLayout(Path(get_test_data_path()) / dataset_path) + assert len(validated.get()) == 1 + + def test_dataset_missing_generatedby_fails_validation(self): + dataset_path = Path("ds005_derivs", "format_errs", "no_pipeline_description") + with pytest.raises(BIDSDerivativesValidationError): + BIDSLayout(Path(get_test_data_path()) / dataset_path) + + def test_correctly_formatted_derivative_loads_as_derivative(self): + dataset_path = Path("ds005_derivs", "dummy") + layout = BIDSLayout(Path(get_test_data_path()) / dataset_path) + assert len(layout.get()) == 4 + assert len(layout.get(desc="preproc")) == 3 + + @pytest.mark.parametrize( + "dataset_path", + [ + Path("ds005_derivs", "dummy"), + Path("ds005_derivs", "format_errs", "no_pipeline_description") + ] + ) + def test_derivative_datasets_load_with_no_validation(self, dataset_path): + layout = BIDSLayout( + Path(get_test_data_path()) / dataset_path, + validate=False + ) + assert len(layout.get()) == 4 + assert len(layout.get(desc="preproc")) == 3 + + +def test_get_metadata(layout_7t_trt): + target = 'sub-03/ses-2/func/sub-03_ses-2_task-' \ + 'rest_acq-fullbrain_run-2_bold.nii.gz' + target = target.split('/') + result = layout_7t_trt.get_metadata(join(layout_7t_trt.root, *target)) + assert result['RepetitionTime'] == 3.0 + + +def test_get_metadata2(layout_7t_trt): + target = 'sub-03/ses-1/fmap/sub-03_ses-1_run-1_phasediff.nii.gz' + target = target.split('/') + result = layout_7t_trt.get_metadata(join(layout_7t_trt.root, *target)) + assert result['EchoTime1'] == 0.006 + + +def test_get_metadata3(layout_7t_trt): + target = 'sub-01/ses-1/func/sub-01_ses-1_task-rest_acq-fullbrain_run-1_bold.nii.gz' + target = target.split('/') + result = layout_7t_trt.get_metadata(join(layout_7t_trt.root, *target)) + assert result['EchoTime'] == 0.020 + + target = 'sub-01/ses-1/func/sub-01_ses-1_task-rest_acq-fullbrain_run-2_bold.nii.gz' + target = target.split('/') + result = layout_7t_trt.get_metadata(join(layout_7t_trt.root, *target)) + assert result['EchoTime'] == 0.017 + + +def test_get_metadata4(layout_ds005): + target = 'sub-03/anat/sub-03_T1w.nii.gz' + target = target.split('/') + result = layout_ds005.get_metadata(join(layout_ds005.root, *target)) + assert result == {} + + +def test_get_metadata_meg(layout_ds117): + funcs = ['get_subjects', 'get_sessions', 'get_tasks', 'get_runs', + 'get_acquisitions', 'get_procs'] + assert all([hasattr(layout_ds117, f) for f in funcs]) + procs = layout_ds117.get_procs() + assert procs == ['sss'] + target = 'sub-02/ses-meg/meg/sub-02_ses-meg_task-facerecognition_run-01_meg.fif' + target = target.split('/') + result = layout_ds117.get_metadata(join(layout_ds117.root, *target)) + metadata_keys = ['MEGChannelCount', 'SoftwareFilters', 'SubjectArtefactDescription'] + assert all([k in result for k in metadata_keys]) + + +def test_get_metadata5(layout_7t_trt): + target = 'sub-01/ses-1/func/sub-01_ses-1_task-rest_acq-fullbrain_run-1_bold.nii.gz' + target = target.split('/') + result = layout_7t_trt.get_metadata( + join(layout_7t_trt.root, *target), include_entities=True) + assert result['EchoTime'] == 0.020 + assert result['subject'] == '01' + assert result['acquisition'] == 'fullbrain' + + +def test_get_metadata_via_bidsfile(layout_7t_trt): + ''' Same as test_get_metadata5, but called through BIDSFile. ''' + target = 'sub-01/ses-1/func/sub-01_ses-1_task-rest_acq-fullbrain_run-1_bold.nii.gz' + target = target.split('/') + path = join(layout_7t_trt.root, *target) + result = layout_7t_trt.files[path].get_metadata() + assert result['EchoTime'] == 0.020 + # include_entities is False when called through a BIDSFile + assert 'subject' not in result + + +def test_get_metadata_error(layout_7t_trt): + ''' Same as test_get_metadata5, but called through BIDSFile. ''' + target = 'sub-01/ses-1/func/sub-01_ses-1_task-rest_acq-fullbrain_run-1_bold.nii.gz' + target = target.split('/') + path = join(layout_7t_trt.root, *target) + result = layout_7t_trt.files[path].get_metadata() + with pytest.raises(KeyError) as err: + result['Missing'] + + result = layout_7t_trt.get_metadata(path) + with pytest.raises(KeyError) as err: + result['Missing'] + + +def test_get_with_bad_target(layout_7t_trt): + with pytest.raises(TargetError) as exc: + layout_7t_trt.get(target='unicorn') + msg = str(exc.value) + assert 'subject' in msg and 'reconstruction' in msg and 'proc' in msg + with pytest.raises(TargetError) as exc: + layout_7t_trt.get(target='sub') + msg = str(exc.value) + assert 'subject' in msg and 'reconstruction' not in msg + + +def test_get_bvals_bvecs(layout_ds005): + dwifile = layout_ds005.get(subject="01", datatype="dwi")[0] + result = layout_ds005.get_bval(dwifile.path) + assert result == abspath(join(layout_ds005.root, 'dwi.bval')) + + result = layout_ds005.get_bvec(dwifile.path) + assert result == abspath(join(layout_ds005.root, 'dwi.bvec')) + + +def test_get_subjects(layout_7t_trt): + result = layout_7t_trt.get_subjects() + predicted = ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10'] + assert set(predicted) == set(result) + + +def test_get_fieldmap(layout_7t_trt): + target = 'sub-03/ses-1/func/sub-03_ses-1_task-' \ + 'rest_acq-fullbrain_run-1_bold.nii.gz' + target = target.split('/') + result = layout_7t_trt.get_fieldmap(join(layout_7t_trt.root, *target)) + assert result["suffix"] == "phasediff" + assert result["phasediff"].endswith('sub-03_ses-1_run-1_phasediff.nii.gz') + + +def test_get_fieldmap2(layout_7t_trt): + target = 'sub-03/ses-2/func/sub-03_ses-2_task-' \ + 'rest_acq-fullbrain_run-2_bold.nii.gz' + target = target.split('/') + result = layout_7t_trt.get_fieldmap(join(layout_7t_trt.root, *target)) + assert result["suffix"] == "phasediff" + assert result["phasediff"].endswith('sub-03_ses-2_run-2_phasediff.nii.gz') + + +def test_bids_json(layout_7t_trt): + res = layout_7t_trt.get(return_type='id', target='run') + assert set(res) == {1, 2} + res = layout_7t_trt.get(return_type='id', target='session') + assert set(res) == {'1', '2'} + + +def test_get_return_type_dir(layout_7t_trt, layout_7t_trt_relpath): + query = dict(target='subject', return_type='dir') + # In case of relative paths + res_relpath = layout_7t_trt_relpath.get(**query) + # returned directories should be in sorted order so we can match exactly + target_relpath = ["sub-{:02d}".format(i) for i in range(1, 11)] + assert target_relpath == res_relpath + + res = layout_7t_trt.get(**query) + target = [ + os.path.join(get_test_data_path(), '7t_trt', p) + for p in target_relpath + ] + assert target == res + + # and we can overload the value for absolute_path in .get call + res_relpath2 = layout_7t_trt.get(absolute_paths=False, **query) + assert target_relpath == res_relpath2 + res2 = layout_7t_trt_relpath.get(absolute_paths=True, **query) + assert target == res2 + + +@pytest.mark.parametrize("acq", [None, Query.NONE]) +def test_get_val_none(layout_7t_trt, acq): + t1w_files = layout_7t_trt.get(subject='01', session='1', suffix='T1w') + assert len(t1w_files) == 1 + assert 'acq' not in t1w_files[0].path + t1w_files = layout_7t_trt.get( + subject='01', session='1', suffix='T1w', acquisition=acq) + assert len(t1w_files) == 1 + bold_files = layout_7t_trt.get( + subject='01', session='1', suffix='bold', acquisition=acq) + assert len(bold_files) == 0 + + +def test_get_val_enum_any(layout_7t_trt): + t1w_files = layout_7t_trt.get( + subject='01', session='1', suffix='T1w', acquisition=Query.ANY, + extension=Query.ANY) + assert not t1w_files + bold_files = layout_7t_trt.get(subject='01', session='1', run=1, + suffix='bold', acquisition=Query.ANY) + assert len(bold_files) == 2 + + +def test_get_val_enum_any_optional(layout_7t_trt, layout_ds005): + # layout with sessions + query = { + "subject": "01", + "run": 1, + "suffix": "bold", + } + bold_files = layout_7t_trt.get(session=Query.OPTIONAL, **query) + assert len(bold_files) == 3 + + # layout without sessions + bold_files = layout_ds005.get(session=Query.REQUIRED, **query) + assert not bold_files + bold_files = layout_ds005.get(session=Query.OPTIONAL, **query) + assert len(bold_files) == 1 + + +def test_get_return_sorted(layout_7t_trt): + bids_files = layout_7t_trt.get(target='subject') + paths = [r.path for r in bids_files] + assert natural_sort(paths) == paths + + files = layout_7t_trt.get(target='subject', return_type='file') + assert files == paths + + +def test_ignore_files(layout_ds005): + data_dir = join(get_test_data_path(), 'ds005') + target1 = join(data_dir, 'models', 'ds-005_type-test_model.json') + target2 = join(data_dir, 'models', 'extras', 'ds-005_type-test_model.json') + layout1 = BIDSLayout(data_dir, validate=False) + assert target1 not in layout_ds005.files + assert target1 not in layout1.files + assert target2 not in layout1.files + # now the models/ dir should show up, because passing ignore explicitly + # overrides the default - but 'model/extras/' should still be ignored + # because of the regex. + ignore = [re.compile('xtra'), 'dummy'] + indexer = BIDSLayoutIndexer(validate=False, ignore=ignore) + layout2 = BIDSLayout(data_dir, indexer=indexer) + assert target1 in layout2.files + assert target2 not in layout2.files + + +def test_force_index(layout_ds005): + data_dir = join(get_test_data_path(), 'ds005') + target = join(data_dir, 'models', 'ds-005_type-test_model.json') + indexer = BIDSLayoutIndexer(force_index=['models']) + model_layout = BIDSLayout(data_dir, validate=True, indexer=indexer) + assert target not in layout_ds005.files + assert target in model_layout.files + assert 'all' not in model_layout.get_subjects() + for f in model_layout.files.values(): + assert 'derivatives' not in f.path + + +def test_nested_include_exclude(): + data_dir = join(get_test_data_path(), 'ds005') + target1 = join(data_dir, 'models', 'ds-005_type-test_model.json') + target2 = join(data_dir, 'models', 'extras', 'ds-005_type-test_model.json') + + # Nest a directory exclusion within an inclusion + layout = BIDSLayout(data_dir, validate=True, force_index=['models'], + ignore=[os.path.join('models', 'extras')]) + assert layout.get_file(target1) + assert not layout.get_file(target2) + + # Nest a directory inclusion within an exclusion + layout = BIDSLayout(data_dir, validate=True, ignore=['models'], + force_index=[os.path.join('models', 'extras')]) + assert not layout.get_file(target1) + assert layout.get_file(target2) + + # Force file inclusion despite directory-level exclusion + models = ['models', target2] + layout = BIDSLayout(data_dir, validate=True, force_index=models, + ignore=[os.path.join('models', 'extras')]) + assert layout.get_file(target1) + assert layout.get_file(target2) + + +def test_nested_include_exclude_with_regex(): + # ~same as above test, but use regexps instead of strings + patt1 = re.compile('.*dels$') + patt2 = re.compile('xtra') + data_dir = join(get_test_data_path(), 'ds005') + target1 = join(data_dir, 'models', 'ds-005_type-test_model.json') + target2 = join(data_dir, 'models', 'extras', 'ds-005_type-test_model.json') + + layout = BIDSLayout(data_dir, ignore=[patt2], force_index=[patt1]) + assert layout.get_file(target1) + assert not layout.get_file(target2) + + layout = BIDSLayout(data_dir, ignore=[patt1], force_index=[patt2]) + assert not layout.get_file(target1) + assert layout.get_file(target2) + + +def test_layout_with_derivs(layout_ds005_derivs): + assert layout_ds005_derivs.root == join(get_test_data_path(), 'ds005') + assert isinstance(layout_ds005_derivs.files, dict) + assert len(layout_ds005_derivs.derivatives) == 1 + deriv = layout_ds005_derivs.derivatives['events'] + assert deriv.files + assert len(deriv.files) == 2 + event_file = "sub-01_task-mixedgamblestask_run-01_desc-extra_events.tsv" + deriv_files = [basename(f) for f in list(deriv.files.keys())] + assert event_file in deriv_files + assert 'roi' in deriv.entities + assert 'subject' in deriv.entities + + +def test_layout_with_multi_derivs(layout_ds005_multi_derivs): + assert layout_ds005_multi_derivs.root == join(get_test_data_path(), 'ds005') + assert isinstance(layout_ds005_multi_derivs.files, dict) + assert len(layout_ds005_multi_derivs.derivatives) == 2 + deriv = layout_ds005_multi_derivs.derivatives['events'] + assert deriv.files + assert len(deriv.files) == 2 + deriv = layout_ds005_multi_derivs.derivatives['dummy'] + assert deriv.files + assert len(deriv.files) == 4 + assert 'roi' in deriv.entities + assert 'subject' in deriv.entities + preproc = layout_ds005_multi_derivs.get(desc='preproc') + assert len(preproc) == 3 + + +def test_query_derivatives(layout_ds005_derivs): + result = layout_ds005_derivs.get(suffix='events', return_type='object', + extension='.tsv') + result = [f.filename for f in result] + assert len(result) == 49 + assert 'sub-01_task-mixedgamblestask_run-01_desc-extra_events.tsv' in result + result = layout_ds005_derivs.get(suffix='events', return_type='object', + scope='raw', extension='.tsv') + assert len(result) == 48 + result = [f.filename for f in result] + assert 'sub-01_task-mixedgamblestask_run-01_desc-extra_events.tsv' not in result + result = layout_ds005_derivs.get(suffix='events', return_type='object', + desc='extra', extension='.tsv') + assert len(result) == 1 + result = [f.filename for f in result] + assert 'sub-01_task-mixedgamblestask_run-01_desc-extra_events.tsv' in result + + +def test_restricted_words_in_path(tmpdir): + orig_path = join(get_test_data_path(), 'synthetic') + parent_dir = str(tmpdir / 'derivatives' / 'pipeline') + os.makedirs(parent_dir) + new_path = join(parent_dir, 'sourcedata') + os.symlink(orig_path, new_path) + orig_layout = BIDSLayout(orig_path) + new_layout = BIDSLayout(new_path) + + orig_files = set(f.replace(orig_path, '') for f in orig_layout.files) + new_files = set(f.replace(new_path, '') for f in new_layout.files) + assert orig_files == new_files + + +def test_derivative_getters(): + synth_path = join(get_test_data_path(), 'synthetic') + bare_layout = BIDSLayout(synth_path, derivatives=False) + full_layout = BIDSLayout(synth_path, derivatives=True) + assert bare_layout.get_spaces() == [] + assert set(full_layout.get_spaces()) == {'MNI152NLin2009cAsym', 'T1w'} + + +def test_get_tr(layout_7t_trt): + # Bad subject, should fail + with pytest.raises(NoMatchError) as exc: + layout_7t_trt.get_tr(subject="zzz") + assert str(exc.value).startswith("No functional images") + # There are multiple tasks with different TRs, so this should fail + with pytest.raises(NoMatchError) as exc: + layout_7t_trt.get_tr(subject=['01', '02']) + assert str(exc.value).startswith("Unique TR") + # This should work + tr = layout_7t_trt.get_tr(subject=['01', '02'], acquisition="fullbrain") + assert tr == 3.0 + tr = layout_7t_trt.get_tr(subject=['01', '02'], acquisition="prefrontal") + assert tr == 4.0 + + +# XXX 0.14: Add dot to extension (difficult to parametrize with module-scoped fixture) +def test_parse_file_entities_from_layout(layout_synthetic): + layout = layout_synthetic + filename = '/sub-03_ses-07_run-4_desc-bleargh_sekret.nii.gz' + + # Test with entities taken from bids config + target = {'subject': '03', 'session': '07', 'run': 4, 'suffix': 'sekret', + 'extension': '.nii.gz'} + assert target == layout.parse_file_entities(filename, config='bids') + config = Config.load('bids') + assert target == layout.parse_file_entities(filename, config=[config]) + assert target == layout.parse_file_entities(filename, scope='raw') + + # Test with default scope--i.e., everything + target = {'subject': '03', 'session': '07', 'run': 4, 'suffix': 'sekret', + 'desc': 'bleargh', 'extension': '.nii.gz'} + assert target == layout.parse_file_entities(filename) + # Test with only the fmriprep pipeline (which includes both configs) + assert target == layout.parse_file_entities(filename, scope='fmriprep') + assert target == layout.parse_file_entities(filename, scope='derivatives') + + # Test with only the derivative config + target = {'desc': 'bleargh'} + assert target == layout.parse_file_entities(filename, config='derivatives') + + +def test_path_arguments(): + data_dir = join(get_test_data_path(), 'ds005') + deriv_dir = join(data_dir, 'derivatives', 'events') + + layout = BIDSLayout(Path(data_dir), validate=False) + assert layout.get(scope='derivatives/events') + assert not layout.get(scope='nonexistent') + + +def test_get_dataset_description(layout_ds005_derivs): + dd = layout_ds005_derivs.get_dataset_description() + assert isinstance(dd, dict) + assert dd['Name'] == 'Mixed-gambles task' + dd = layout_ds005_derivs.get_dataset_description('all', True) + assert isinstance(dd, list) + assert len(dd) == 2 + names = {'Mixed-gambles task'} + assert set([d['Name'] for d in dd]) == names + + +def test_get_with_wrong_dtypes(layout_7t_trt): + ''' Test automatic dtype sanitization. ''' + l = layout_7t_trt + assert (l.get(run=1) == l.get(run='1') == l.get(run=np.int64(1)) == + l.get(run=[1, '15']) == l.get(run='01')) + assert not l.get(run='not_numeric') + assert l.get(session=1) == l.get(session='1') + + +def test_get_with_regex_search(layout_7t_trt): + """ Tests that regex-based searching works. """ + l = layout_7t_trt + + # subject matches both '10' and '01' + results = l.get(subject='1', session='1', task='rest', suffix='bold', + acquisition='fron.al', extension='.nii.gz', + regex_search=True) + assert len(results) == 2 + + # subject matches '10' + results = l.get(subject='^1', session='1', task='rest', suffix='bold', + acquisition='fron.al', extension='.nii.gz', + regex_search=True, return_type='filename') + assert len(results) == 1 + assert results[0].endswith('sub-10_ses-1_task-rest_acq-prefrontal_bold.nii.gz') + + +def test_get_with_regex_search_bad_dtype(layout_7t_trt): + """ Tests that passing in a non-string dtype for an entity doesn't crash + regexp-based searching (i.e., that implicit conversion is done + appropriately). """ + l = layout_7t_trt + results = l.get(subject='1', run=1, task='rest', suffix='bold', + acquisition='fullbrain', extension='.nii.gz', + regex_search=True) + # Two runs (1 per session) for each of subjects '10' and '01' + assert len(results) == 4 + + +def test_get_with_invalid_filters(layout_ds005): + l = layout_ds005 + # Raise error with suggestions + with pytest.raises(ValueError, match='session'): + l.get(subject='12', ses=True, invalid_filters='error') + with pytest.raises(ValueError, match='session'): + l.get(subject='12', ses=True) + # Silently drop amazing + res_without = l.get(subject='12', suffix='bold') + res_drop = l.get(subject='12', suffix='bold', amazing='!!!', + invalid_filters='drop') + assert res_without == res_drop + assert len(res_drop) == 3 + # Retain amazing, producing empty set + allow_res = l.get(subject='12', amazing=True, invalid_filters='allow') + assert allow_res == [] + + # assert warning when filters are passed in + filters = {'subject': '1'} + with pytest.raises(RuntimeError, match='You passed in filters as a dictionary'): + l.get(filters=filters) + # Correct call: + l.get(**filters) + + +def test_get_with_query_constants_in_match_list(layout_ds005): + l = layout_ds005 + get1 = l.get(subject='12', run=1, suffix='bold') + get_none = l.get(subject='12', run=None, suffix='bold') + get_any = l.get(subject='12', run=Query.ANY, suffix='bold') + get1_and_none = l.get(subject='12', run=[None, 1], suffix='bold') + get1_and_any = l.get(subject='12', run=[Query.ANY, 1], suffix='bold') + get_none_and_any = l.get(subject='12', run=[Query.ANY, Query.NONE], suffix='bold') + assert set(get1_and_none) == set(get1) | set(get_none) + assert set(get1_and_any) == set(get1) | set(get_any) + assert set(get_none_and_any) == set(get_none) | set(get_any) + + +def test_padded_run_roundtrip(layout_ds005): + for run in (1, "1", "01"): + res = layout_ds005.get(subject="01", task="mixedgamblestask", + run=run, extension=".nii.gz") + assert len(res) == 1 + boldfile = res[0] + ents = boldfile.get_entities() + assert isinstance(ents["run"], PaddedInt) + assert ents["run"] == 1 + newpath = layout_ds005.build_path(ents, absolute_paths=False) + assert newpath == "sub-01/func/sub-01_task-mixedgamblestask_run-01_bold.nii.gz" From 8fe9ad7e7a7b14f158914606960f48367c320000 Mon Sep 17 00:00:00 2001 From: Erdal Karaca Date: Tue, 31 May 2022 23:24:57 +0200 Subject: [PATCH 2/5] Update bids/layout/__init__.py define BIDSLayoutV2 as function if ancpbids package not installed Co-authored-by: Chris Markiewicz --- bids/layout/__init__.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/bids/layout/__init__.py b/bids/layout/__init__.py index 9d5970913..0b6507dd5 100644 --- a/bids/layout/__init__.py +++ b/bids/layout/__init__.py @@ -21,11 +21,12 @@ "Config", "Entity", "Tag", - "Query" + "Query", + "BIDSLayoutV2", ] try: from .layout_v2 import BIDSLayoutV2 - __all__ += ["BIDSLayoutV2"] -except: - warnings.warn("Could not load BIDSLayoutV2: make sure you installed the ancpBIDS package") +except Exception as err: + def BIDSLayoutV2(*args, **kwargs): + raise RuntimeError("Cannot create BIDSLayoutV2 - please install the ancpbids package.") from err From 7b4881d6b9dea51e6fca57cfdb2ff0a9554e970b Mon Sep 17 00:00:00 2001 From: Erdal Karaca Date: Tue, 7 Jun 2022 19:52:26 +0200 Subject: [PATCH 3/5] get_() with fuzzy matching entity name --- bids/layout/layout_v2.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/bids/layout/layout_v2.py b/bids/layout/layout_v2.py index a7d7ef5a0..82fee9675 100644 --- a/bids/layout/layout_v2.py +++ b/bids/layout/layout_v2.py @@ -86,12 +86,15 @@ def __init__(self, ds_dir: Union[str, Path], validate=True, **kwargs): raise BIDSValidationError(error_message) def __getattr__(self, key): - # replace arbitrary get functions with calls to get - if key.startswith("get_"): - return partial(self.get, "id", key[4:]) - - # give up if the above don't work - raise AttributeError(key) + """Dynamically inspect missing methods for get_() calls + and return a partial function of get() if a match is found.""" + if key.startswith('get_'): + ent_name = key.replace('get_', '') + ent_name = self.schema.fuzzy_match_entity_key(ent_name) + return partial(self.get, return_type='id', target=ent_name) + # Spit out default message if we get this far + raise AttributeError("%s object has no attribute named %r" % + (self.__class__.__name__, key)) def get_metadata(self, path, include_entities=False, scope='all'): """Return metadata found in JSON sidecars for the specified file. From c1a5a9847f95562b7ca6a6cc85c689b51011105b Mon Sep 17 00:00:00 2001 From: Erdal Karaca Date: Sat, 18 Jun 2022 21:20:08 +0200 Subject: [PATCH 4/5] WIP: unit tests stabilization, added missing functionality --- bids/layout/layout_v2.py | 91 +++++++++----- bids/layout/tests/test_layout_v2.py | 179 ++++++---------------------- 2 files changed, 97 insertions(+), 173 deletions(-) diff --git a/bids/layout/layout_v2.py b/bids/layout/layout_v2.py index 82fee9675..29e89734f 100644 --- a/bids/layout/layout_v2.py +++ b/bids/layout/layout_v2.py @@ -20,6 +20,9 @@ __all__ = ['BIDSLayoutV2'] +from ..utils import natural_sort + + class BIDSLayoutMRIMixin: def get_tr(self, derivatives=False, **entities): @@ -59,6 +62,7 @@ def get_tr(self, derivatives=False, **entities): .format(entities)) return all_trs.pop() + class BIDSLayoutV2(BIDSLayoutMRIMixin): """A convenience class to provide access to an in-memory representation of a BIDS dataset. @@ -141,47 +145,61 @@ def get(self, return_type: str = 'object', target: str = None, scope: str = None extension: Union[str, List[str]] = None, suffix: Union[str, List[str]] = None, regex_search=False, **entities) -> Union[List[str], List[object]]: - """Depending on the return_type value returns either paths to files that matched the filtering criteria - or :class:`Artifact ` objects for further processing by the caller. - - Note that all provided filter criteria are AND combined, i.e. subj='02',task='lang' will match files containing - '02' as a subject AND 'lang' as a task. If you provide a list of values for a criteria, they will be OR combined. - - .. code-block:: - - file_paths = layout.get(subj='02', task='lang', suffix='bold', return_type='files') - - file_paths = layout.get(subj=['02', '03'], task='lang', return_type='files') + """Retrieve files and/or metadata from the current Layout. Parameters ---------- - return_type: - Either 'files' to return paths of matched files - or 'object' to return :class:`Artifact ` object, defaults to 'object' - - target: - Either `suffixes`, `extensions` or one of any valid BIDS entities key - (see :class:`EntityEnum `, defaults to `None` - scope: - a hint where to search for files - If passed, only nodes/directories that match the specified scope will be + return_type : str, optional + Type of result to return. Valid values: + 'object' (default): return a list of matching BIDSFile objects. + 'file' or 'filename': return a list of matching filenames. + 'dir': return a list of directories. + 'id': return a list of unique IDs. Must be used together + with a valid target. + target : str, optional + Optional name of the target entity to get results for + (only used if return_type is 'dir' or 'id'). + scope : str or list, optional + Scope of the search space. If passed, only + nodes/directories that match the specified scope will be searched. Possible values include: 'all' (default): search all available directories. 'derivatives': search all derivatives directories. 'raw': search only BIDS-Raw directories. 'self': search only the directly called BIDSLayout. : the name of a BIDS-Derivatives pipeline. - extension: - criterion to match any files containing the provided extension only - suffix: - criterion to match any files containing the provided suffix only - entities - a list of key-values to match the entities of interest, example: subj='02',task='lang' + regex_search : bool or None, optional + Whether to require exact matching + (False) or regex search (True) when comparing the query string + to each entity. + absolute_paths : bool, optional + Optionally override the instance-wide option + to report either absolute or relative (to the top of the + dataset) paths. If None, will fall back on the value specified + at BIDSLayout initialization. + invalid_filters (str): Controls behavior when named filters are + encountered that don't exist in the database (e.g., in the case of + a typo like subbject='0.1'). Valid values: + 'error' (default): Raise an explicit error. + 'drop': Silently drop invalid filters (equivalent to not having + passed them as arguments in the first place). + 'allow': Include the invalid filters in the query, resulting + in no results being returned. + filters : dict + Any optional key/values to filter the entities on. + Keys are entity names, values are regexes to filter on. For + example, passing filters={'subject': 'sub-[12]'} would return + only files that match the first two subjects. In addition to + ordinary data types, the following enums are defined (in the + Query class): + * Query.NONE: The named entity must not be defined. + * Query.ANY: the named entity must be defined, but can have any + value. Returns ------- - depending on the return_type value either paths to files that matched the filtering criteria - or Artifact objects for further processing by the caller + list of :obj:`bids.layout.BIDSFile` or str + A list of BIDSFiles (default) or strings (see return_type). """ # Provide some suggestions if target is specified and invalid. self_entities = self.get_entities() @@ -195,7 +213,10 @@ def get(self, return_type: str = 'object', target: str = None, scope: str = None raise TargetError(("Unknown target '{}'. " + message) .format(target)) folder = self.dataset - return query(folder, return_type, target, scope, extension, suffix, regex_search, **entities) + result = query(folder, return_type, target, scope, extension, suffix, regex_search, **entities) + if return_type in 'files': + result = natural_sort(result) + return result @property def entities(self): @@ -349,6 +370,13 @@ def get_file(self, filename, scope='all'): def description(self): return self.get_dataset_description() + @property + def derivatives(self): + derivatives = self.dataset.select(self.schema.DerivativeFolder).where( + CustomOpExpr(lambda df: df.dataset_description is not None)).objects(as_list=True) + # a dict where the key is the name of the derivative + return {derivative.name: derivative for derivative in derivatives} + @property def root(self): return self.dataset.base_dir_ @@ -362,6 +390,3 @@ def __repr__(self): s = ("BIDS Layout: ...{} | Subjects: {} | Sessions: {} | " "Runs: {}".format(self.dataset.base_dir_, n_subjects, n_sessions, n_runs)) return s - - - diff --git a/bids/layout/tests/test_layout_v2.py b/bids/layout/tests/test_layout_v2.py index c2f6d6a81..417e4da0a 100644 --- a/bids/layout/tests/test_layout_v2.py +++ b/bids/layout/tests/test_layout_v2.py @@ -32,17 +32,31 @@ def layout_7t_trt(): return BIDSLayout(data_dir) +@pytest.fixture(scope="module") +def layout_7t_trt_relpath(): + data_dir = join(get_test_data_path(), '7t_trt') + return BIDSLayout(data_dir, validate=False) + + @pytest.fixture(scope="module") def layout_ds005_derivs(): data_dir = join(get_test_data_path(), 'ds005') layout = BIDSLayout(data_dir, validate=False) return layout + @pytest.fixture(scope="module") def layout_ds117(): data_dir = join(get_test_data_path(), 'ds000117') return BIDSLayout(data_dir) + +@pytest.fixture(scope="module") +def layout_ds005(): + data_dir = join(get_test_data_path(), 'ds005') + return BIDSLayout(data_dir, validate=False) + + def test_layout_init(layout_7t_trt): assert isinstance(layout_7t_trt.files, dict) @@ -303,161 +317,62 @@ def test_bids_json(layout_7t_trt): assert set(res) == {'1', '2'} -def test_get_return_type_dir(layout_7t_trt, layout_7t_trt_relpath): - query = dict(target='subject', return_type='dir') - # In case of relative paths - res_relpath = layout_7t_trt_relpath.get(**query) - # returned directories should be in sorted order so we can match exactly +def test_get_return_type_dir(layout_7t_trt): + res_relpath = layout_7t_trt.get(target='sub', return_type='dir') target_relpath = ["sub-{:02d}".format(i) for i in range(1, 11)] - assert target_relpath == res_relpath - - res = layout_7t_trt.get(**query) - target = [ - os.path.join(get_test_data_path(), '7t_trt', p) - for p in target_relpath - ] - assert target == res - - # and we can overload the value for absolute_path in .get call - res_relpath2 = layout_7t_trt.get(absolute_paths=False, **query) - assert target_relpath == res_relpath2 - res2 = layout_7t_trt_relpath.get(absolute_paths=True, **query) - assert target == res2 + assert all([tp in res_relpath for tp in target_relpath]) -@pytest.mark.parametrize("acq", [None, Query.NONE]) -def test_get_val_none(layout_7t_trt, acq): +def test_get_val_none(layout_7t_trt): t1w_files = layout_7t_trt.get(subject='01', session='1', suffix='T1w') assert len(t1w_files) == 1 - assert 'acq' not in t1w_files[0].path + assert 'acq' not in t1w_files[0].name t1w_files = layout_7t_trt.get( - subject='01', session='1', suffix='T1w', acquisition=acq) + subject='01', session='1', suffix='T1w', acquisition=None) assert len(t1w_files) == 1 bold_files = layout_7t_trt.get( - subject='01', session='1', suffix='bold', acquisition=acq) + subject='01', session='1', suffix='bold', acquisition=None) assert len(bold_files) == 0 def test_get_val_enum_any(layout_7t_trt): t1w_files = layout_7t_trt.get( - subject='01', session='1', suffix='T1w', acquisition=Query.ANY, - extension=Query.ANY) + subject='01', session='1', suffix='T1w', acquisition="*", + extension='*') assert not t1w_files bold_files = layout_7t_trt.get(subject='01', session='1', run=1, - suffix='bold', acquisition=Query.ANY) + suffix='bold', acquisition="*") assert len(bold_files) == 2 def test_get_val_enum_any_optional(layout_7t_trt, layout_ds005): # layout with sessions - query = { - "subject": "01", - "run": 1, - "suffix": "bold", - } - bold_files = layout_7t_trt.get(session=Query.OPTIONAL, **query) + bold_files = layout_7t_trt.get(suffix='bold', run=1, subject='01') assert len(bold_files) == 3 # layout without sessions - bold_files = layout_ds005.get(session=Query.REQUIRED, **query) + bold_files = layout_ds005.get(suffix='bold', run=1, subject='01', session='*') assert not bold_files - bold_files = layout_ds005.get(session=Query.OPTIONAL, **query) + bold_files = layout_ds005.get(suffix='bold', run=1, subject='01') assert len(bold_files) == 1 def test_get_return_sorted(layout_7t_trt): - bids_files = layout_7t_trt.get(target='subject') - paths = [r.path for r in bids_files] + paths = layout_7t_trt.get(target='sub', return_type='file') assert natural_sort(paths) == paths - files = layout_7t_trt.get(target='subject', return_type='file') - assert files == paths - - -def test_ignore_files(layout_ds005): - data_dir = join(get_test_data_path(), 'ds005') - target1 = join(data_dir, 'models', 'ds-005_type-test_model.json') - target2 = join(data_dir, 'models', 'extras', 'ds-005_type-test_model.json') - layout1 = BIDSLayout(data_dir, validate=False) - assert target1 not in layout_ds005.files - assert target1 not in layout1.files - assert target2 not in layout1.files - # now the models/ dir should show up, because passing ignore explicitly - # overrides the default - but 'model/extras/' should still be ignored - # because of the regex. - ignore = [re.compile('xtra'), 'dummy'] - indexer = BIDSLayoutIndexer(validate=False, ignore=ignore) - layout2 = BIDSLayout(data_dir, indexer=indexer) - assert target1 in layout2.files - assert target2 not in layout2.files - - -def test_force_index(layout_ds005): - data_dir = join(get_test_data_path(), 'ds005') - target = join(data_dir, 'models', 'ds-005_type-test_model.json') - indexer = BIDSLayoutIndexer(force_index=['models']) - model_layout = BIDSLayout(data_dir, validate=True, indexer=indexer) - assert target not in layout_ds005.files - assert target in model_layout.files - assert 'all' not in model_layout.get_subjects() - for f in model_layout.files.values(): - assert 'derivatives' not in f.path - - -def test_nested_include_exclude(): - data_dir = join(get_test_data_path(), 'ds005') - target1 = join(data_dir, 'models', 'ds-005_type-test_model.json') - target2 = join(data_dir, 'models', 'extras', 'ds-005_type-test_model.json') - - # Nest a directory exclusion within an inclusion - layout = BIDSLayout(data_dir, validate=True, force_index=['models'], - ignore=[os.path.join('models', 'extras')]) - assert layout.get_file(target1) - assert not layout.get_file(target2) - - # Nest a directory inclusion within an exclusion - layout = BIDSLayout(data_dir, validate=True, ignore=['models'], - force_index=[os.path.join('models', 'extras')]) - assert not layout.get_file(target1) - assert layout.get_file(target2) - - # Force file inclusion despite directory-level exclusion - models = ['models', target2] - layout = BIDSLayout(data_dir, validate=True, force_index=models, - ignore=[os.path.join('models', 'extras')]) - assert layout.get_file(target1) - assert layout.get_file(target2) - - -def test_nested_include_exclude_with_regex(): - # ~same as above test, but use regexps instead of strings - patt1 = re.compile('.*dels$') - patt2 = re.compile('xtra') - data_dir = join(get_test_data_path(), 'ds005') - target1 = join(data_dir, 'models', 'ds-005_type-test_model.json') - target2 = join(data_dir, 'models', 'extras', 'ds-005_type-test_model.json') - - layout = BIDSLayout(data_dir, ignore=[patt2], force_index=[patt1]) - assert layout.get_file(target1) - assert not layout.get_file(target2) - - layout = BIDSLayout(data_dir, ignore=[patt1], force_index=[patt2]) - assert not layout.get_file(target1) - assert layout.get_file(target2) - def test_layout_with_derivs(layout_ds005_derivs): assert layout_ds005_derivs.root == join(get_test_data_path(), 'ds005') assert isinstance(layout_ds005_derivs.files, dict) assert len(layout_ds005_derivs.derivatives) == 1 deriv = layout_ds005_derivs.derivatives['events'] - assert deriv.files - assert len(deriv.files) == 2 + files = deriv.query() event_file = "sub-01_task-mixedgamblestask_run-01_desc-extra_events.tsv" - deriv_files = [basename(f) for f in list(deriv.files.keys())] + deriv_files = [f.name for f in files] assert event_file in deriv_files - assert 'roi' in deriv.entities - assert 'subject' in deriv.entities + entities = deriv.query_entities() + assert 'sub' in entities def test_layout_with_multi_derivs(layout_ds005_multi_derivs): @@ -479,40 +394,24 @@ def test_layout_with_multi_derivs(layout_ds005_multi_derivs): def test_query_derivatives(layout_ds005_derivs): result = layout_ds005_derivs.get(suffix='events', return_type='object', extension='.tsv') - result = [f.filename for f in result] + result = [f.name for f in result] assert len(result) == 49 assert 'sub-01_task-mixedgamblestask_run-01_desc-extra_events.tsv' in result result = layout_ds005_derivs.get(suffix='events', return_type='object', scope='raw', extension='.tsv') assert len(result) == 48 - result = [f.filename for f in result] + result = [f.name for f in result] assert 'sub-01_task-mixedgamblestask_run-01_desc-extra_events.tsv' not in result result = layout_ds005_derivs.get(suffix='events', return_type='object', desc='extra', extension='.tsv') assert len(result) == 1 - result = [f.filename for f in result] + result = [f.name for f in result] assert 'sub-01_task-mixedgamblestask_run-01_desc-extra_events.tsv' in result -def test_restricted_words_in_path(tmpdir): - orig_path = join(get_test_data_path(), 'synthetic') - parent_dir = str(tmpdir / 'derivatives' / 'pipeline') - os.makedirs(parent_dir) - new_path = join(parent_dir, 'sourcedata') - os.symlink(orig_path, new_path) - orig_layout = BIDSLayout(orig_path) - new_layout = BIDSLayout(new_path) - - orig_files = set(f.replace(orig_path, '') for f in orig_layout.files) - new_files = set(f.replace(new_path, '') for f in new_layout.files) - assert orig_files == new_files - - def test_derivative_getters(): synth_path = join(get_test_data_path(), 'synthetic') - bare_layout = BIDSLayout(synth_path, derivatives=False) - full_layout = BIDSLayout(synth_path, derivatives=True) - assert bare_layout.get_spaces() == [] + full_layout = BIDSLayout(synth_path) assert set(full_layout.get_spaces()) == {'MNI152NLin2009cAsym', 'T1w'} @@ -646,10 +545,10 @@ def test_get_with_query_constants_in_match_list(layout_ds005): l = layout_ds005 get1 = l.get(subject='12', run=1, suffix='bold') get_none = l.get(subject='12', run=None, suffix='bold') - get_any = l.get(subject='12', run=Query.ANY, suffix='bold') + get_any = l.get(subject='12', run='*', suffix='bold') get1_and_none = l.get(subject='12', run=[None, 1], suffix='bold') - get1_and_any = l.get(subject='12', run=[Query.ANY, 1], suffix='bold') - get_none_and_any = l.get(subject='12', run=[Query.ANY, Query.NONE], suffix='bold') + get1_and_any = l.get(subject='12', run=['*', 1], suffix='bold') + get_none_and_any = l.get(subject='12', run=['*', None], suffix='bold') assert set(get1_and_none) == set(get1) | set(get_none) assert set(get1_and_any) == set(get1) | set(get_any) assert set(get_none_and_any) == set(get_none) | set(get_any) From f50f657e79f5b1a4851c70894f3427d4cc504cd0 Mon Sep 17 00:00:00 2001 From: Erdal Karaca Date: Wed, 22 Jun 2022 20:58:25 +0200 Subject: [PATCH 5/5] WIP: return int instead of str for index values of entities (for example, run) --- bids/layout/tests/test_layout_v2.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/bids/layout/tests/test_layout_v2.py b/bids/layout/tests/test_layout_v2.py index 417e4da0a..59db8500d 100644 --- a/bids/layout/tests/test_layout_v2.py +++ b/bids/layout/tests/test_layout_v2.py @@ -561,7 +561,8 @@ def test_padded_run_roundtrip(layout_ds005): assert len(res) == 1 boldfile = res[0] ents = boldfile.get_entities() - assert isinstance(ents["run"], PaddedInt) + assert isinstance(ents["run"], int) assert ents["run"] == 1 - newpath = layout_ds005.build_path(ents, absolute_paths=False) - assert newpath == "sub-01/func/sub-01_task-mixedgamblestask_run-01_bold.nii.gz" + # TODO buld_path() not supported yet + # newpath = layout_ds005.build_path(ents, absolute_paths=False) + # assert newpath == "sub-01/func/sub-01_task-mixedgamblestask_run-01_bold.nii.gz"