RF: populate singular scans.json at top level, and not one per sub/ses

Breeding those identical files is not useful at all: - causes some pains for datalad (git-annex) since committed under annex, and if published online and identical -- there might be thousands of urls associated with that file annex key - BIDS is not explicit (yet) about possibility to have scans.json on top but it seems to follow nicely from inheritance principles. See bids-standard/bids-specification#789 and references there-in - so why waste inodes and clutter the file tree?
dbic · Apr 30, 2021 · f7905ed · f7905ed
1 parent 753e6b2
commit f7905ed
Show file tree

Hide file tree

Showing 2 changed files with 14 additions and 8 deletions.
diff --git a/heudiconv/bids.py b/heudiconv/bids.py
@@ -18,7 +18,7 @@
     load_json,
     save_json,
     create_file_if_missing,
-    json_dumps_pretty,
+    json_dumps,
     set_readonly,
     is_readonly,
     get_datetime,
@@ -120,6 +120,9 @@ def populate_bids_templates(path, defaults={}):
     create_file_if_missing(op.join(path, 'README'),
         "TODO: Provide description for the dataset -- basic details about the "
         "study, possibly pointing to pre-registration (if public or embargoed)")
+    create_file_if_missing(op.join(path, 'scans.json'),
+        json_dumps(SCANS_FILE_FIELDS, sort_keys=False)
+    )
 
     populate_aggregated_jsons(path)
 
@@ -404,11 +407,6 @@ def add_rows_to_scans_keys_file(fn, newrows):
         os.unlink(fn)
     else:
         fnames2info = newrows
-        # Populate _scans.json (an optional file to describe column names in
-        # _scans.tsv). This auto generation will make BIDS-validator happy.
-        scans_json = '.'.join(fn.split('.')[:-1] + ['json'])
-        if not op.lexists(scans_json):
-            save_json(scans_json, SCANS_FILE_FIELDS, sort_keys=False)
 
     header = SCANS_FILE_FIELDS
     # prepare all the data rows

diff --git a/heudiconv/tests/test_main.py b/heudiconv/tests/test_main.py
@@ -4,13 +4,16 @@
 from heudiconv.main import workflow
 from heudiconv import __version__
 from heudiconv.utils import (create_file_if_missing,
+                             load_json,
                              set_readonly,
                              is_readonly)
 from heudiconv.bids import (populate_bids_templates,
                             add_participant_record,
                             get_formatted_scans_key_row,
                             add_rows_to_scans_keys_file,
-                            find_subj_ses)
+                            find_subj_ses,
+                            SCANS_FILE_FIELDS,
+                            )
 from heudiconv.external.dlad import MIN_VERSION, add_to_datalad
 
 from .utils import TESTS_DATA_PATH
@@ -81,6 +84,8 @@ def test_populate_bids_templates(tmpdir):
     assert "something" not in description_file.read()
     assert "TODO" in description_file.read()
 
+    assert load_json(tmpdir / "scans.json") == SCANS_FILE_FIELDS
+
 
 def test_add_participant_record(tmpdir):
     tf = tmpdir.join('participants.tsv')
@@ -127,6 +132,7 @@ def test_prepare_for_datalad(tmpdir):
         '.gitattributes',
         '.datalad/config', '.datalad/.gitattributes',
         'dataset_description.json',
+        'scans.json',
         'CHANGES', 'README'}
     assert set(ds.repo.get_indexed_files()) == target_files
     # and all are under git
@@ -217,7 +223,9 @@ def _check_rows(fn, rows):
         assert dates == sorted(dates)
 
     _check_rows(fn, rows)
-    assert op.exists(opj(tmpdir.strpath, 'file.json'))
+    # we no longer produce a sidecar .json file there and only generate
+    # it while populating templates for BIDS
+    assert not op.exists(opj(tmpdir.strpath, 'file.json'))
     # add a new one
     extra_rows = {
         'a_new_file.nii.gz': ['2016adsfasd23', '', 'fasadfasdf'],