Skip to content

Commit

Permalink
RF: populate singular scans.json at top level, and not one per sub/ses
Browse files Browse the repository at this point in the history
Breeding those identical files is not useful at all:

- causes some pains for datalad (git-annex) since committed under
  annex, and if published online and identical -- there might be
  thousands of urls associated with that file annex key

- BIDS is not explicit (yet) about possibility to have scans.json
  on top but it seems to follow nicely from inheritance principles.
  See bids-standard/bids-specification#789
  and references there-in

- so why waste inodes and clutter the file tree?
  • Loading branch information
yarikoptic committed Apr 30, 2021
1 parent 753e6b2 commit f7905ed
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 8 deletions.
10 changes: 4 additions & 6 deletions heudiconv/bids.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
load_json,
save_json,
create_file_if_missing,
json_dumps_pretty,
json_dumps,
set_readonly,
is_readonly,
get_datetime,
Expand Down Expand Up @@ -120,6 +120,9 @@ def populate_bids_templates(path, defaults={}):
create_file_if_missing(op.join(path, 'README'),
"TODO: Provide description for the dataset -- basic details about the "
"study, possibly pointing to pre-registration (if public or embargoed)")
create_file_if_missing(op.join(path, 'scans.json'),
json_dumps(SCANS_FILE_FIELDS, sort_keys=False)
)

populate_aggregated_jsons(path)

Expand Down Expand Up @@ -404,11 +407,6 @@ def add_rows_to_scans_keys_file(fn, newrows):
os.unlink(fn)
else:
fnames2info = newrows
# Populate _scans.json (an optional file to describe column names in
# _scans.tsv). This auto generation will make BIDS-validator happy.
scans_json = '.'.join(fn.split('.')[:-1] + ['json'])
if not op.lexists(scans_json):
save_json(scans_json, SCANS_FILE_FIELDS, sort_keys=False)

header = SCANS_FILE_FIELDS
# prepare all the data rows
Expand Down
12 changes: 10 additions & 2 deletions heudiconv/tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,16 @@
from heudiconv.main import workflow
from heudiconv import __version__
from heudiconv.utils import (create_file_if_missing,
load_json,
set_readonly,
is_readonly)
from heudiconv.bids import (populate_bids_templates,
add_participant_record,
get_formatted_scans_key_row,
add_rows_to_scans_keys_file,
find_subj_ses)
find_subj_ses,
SCANS_FILE_FIELDS,
)
from heudiconv.external.dlad import MIN_VERSION, add_to_datalad

from .utils import TESTS_DATA_PATH
Expand Down Expand Up @@ -81,6 +84,8 @@ def test_populate_bids_templates(tmpdir):
assert "something" not in description_file.read()
assert "TODO" in description_file.read()

assert load_json(tmpdir / "scans.json") == SCANS_FILE_FIELDS


def test_add_participant_record(tmpdir):
tf = tmpdir.join('participants.tsv')
Expand Down Expand Up @@ -127,6 +132,7 @@ def test_prepare_for_datalad(tmpdir):
'.gitattributes',
'.datalad/config', '.datalad/.gitattributes',
'dataset_description.json',
'scans.json',
'CHANGES', 'README'}
assert set(ds.repo.get_indexed_files()) == target_files
# and all are under git
Expand Down Expand Up @@ -217,7 +223,9 @@ def _check_rows(fn, rows):
assert dates == sorted(dates)

_check_rows(fn, rows)
assert op.exists(opj(tmpdir.strpath, 'file.json'))
# we no longer produce a sidecar .json file there and only generate
# it while populating templates for BIDS
assert not op.exists(opj(tmpdir.strpath, 'file.json'))
# add a new one
extra_rows = {
'a_new_file.nii.gz': ['2016adsfasd23', '', 'fasadfasdf'],
Expand Down

0 comments on commit f7905ed

Please sign in to comment.