diff --git a/pdf_build_src/process_markdowns.py b/pdf_build_src/process_markdowns.py index 4175ad228f..6145d33b99 100644 --- a/pdf_build_src/process_markdowns.py +++ b/pdf_build_src/process_markdowns.py @@ -7,13 +7,13 @@ well. """ -from datetime import datetime import json import os import posixpath import re import subprocess import sys +from datetime import datetime import numpy as np @@ -626,9 +626,14 @@ def process_macros(duplicated_src_dir_path): # switch "use_pipe" flag OFF to render examples if "make_filetree_example" in function_string: function_string = function_string.replace( - ")", - ", False)" + ")", + ", False)", ) + + # switch "pdf_format" ON to render filename templates + if "make_filename_template" in function_string: + function_string = function_string.replace(")", ", pdf_format=True)") + # Run the function to get the output new = eval(function_string) # Replace the code snippet with the function output diff --git a/tools/mkdocs_macros_bids/macros.py b/tools/mkdocs_macros_bids/macros.py index 823ce14bd9..c2e6b3b638 100644 --- a/tools/mkdocs_macros_bids/macros.py +++ b/tools/mkdocs_macros_bids/macros.py @@ -61,12 +61,17 @@ def _get_source_path(level=1): return caller.f_locals["_Context__self"]["page"].file.src_path -def make_filename_template(**kwargs): - """Generate a filename template snippet from the schema, based on specific - filters. +def make_filename_template(pdf_format=False, **kwargs): + """Generate a filename template snippet from the schema, based on specific filters. Parameters ---------- + pdf_format : bool, optional + If True, the filename template will be compiled as a standard markdown code block, + without any hyperlinks, so that the specification's PDF build will look right. + If False, the filename template will use HTML and include hyperlinks. + This works on the website. + Default is False. kwargs : dict Keyword arguments used to filter the schema. Example kwargs that may be used include: "suffixes", "datatypes", @@ -79,7 +84,11 @@ def make_filename_template(**kwargs): in the schema, after filtering. """ schema_obj = schema.load_schema() - codeblock = render.make_filename_template(schema_obj, **kwargs) + codeblock = render.make_filename_template( + schema_obj, + pdf_format=pdf_format, + **kwargs, + ) return codeblock diff --git a/tools/schemacode/bidsschematools/render.py b/tools/schemacode/bidsschematools/render.py index ee68e2bfa9..812c38d1b2 100644 --- a/tools/schemacode/bidsschematools/render.py +++ b/tools/schemacode/bidsschematools/render.py @@ -10,7 +10,7 @@ from tabulate import tabulate from . import utils -from .schema import BIDSSchemaError, Namespace, filter_schema +from .schema import BIDSSchemaError, Namespace, filter_schema, load_schema lgr = utils.get_logger() # Basic settings for output, for now just basic @@ -239,17 +239,35 @@ def _add_entity(filename_template, entity_pattern, requirement_level): return filename_template -def make_filename_template(schema, n_dupes_to_combine=6, **kwargs): +def make_filename_template( + schema=None, + src_path=None, + n_dupes_to_combine=6, + pdf_format=False, + **kwargs, +): """Create codeblocks containing example filename patterns for a given datatype. + By default, this function uses HTML, instead of direct Markdown codeblocks, + so that it can embed hyperlinks within the filenames. + Parameters ---------- schema : dict The schema object, which is a dictionary with nested dictionaries and lists stored within it. + src_path : str | None + The file where this macro is called, which may be explicitly provided + by the "page.file.src_path" variable. n_dupes_to_combine : int The minimum number of suffixes/extensions to combine in the template as /. + pdf_format : bool, optional + If True, the filename template will be compiled as a standard markdown code block, + without any hyperlinks, so that the specification's PDF build will look right. + If False, the filename template will use HTML and include hyperlinks. + This works on the website. + Default is False. kwargs : dict Keyword arguments used to filter the schema. Example kwargs that may be used include: "suffixes", "datatypes", @@ -260,26 +278,62 @@ def make_filename_template(schema, n_dupes_to_combine=6, **kwargs): codeblock : str A multiline string containing the filename templates for file types in the schema, after filtering. + + Notes + ----- + This function doesn't use src_path, because the hyperlinks use absolute paths to HTML files. + It would be nice, at some point, to use src_path in conjunction with paths to markdown files, + like other functions do, instead. """ + if not schema: + schema = load_schema() + schema = Namespace(filter_schema(schema.to_dict(), **kwargs)) entity_order = schema["rules"]["entities"] + entities_path = "/99-appendices/09-entities.html" + glossary_path = "/99-appendices/14-glossary.html" paragraph = "" # Parent directories - paragraph += "{}-<{}>/\n\t[{}-<{}>/]\n".format( - schema["objects"]["entities"]["subject"]["name"], - schema["objects"]["entities"]["subject"]["format"], - schema["objects"]["entities"]["session"]["name"], - schema["objects"]["entities"]["session"]["format"], + sub_string = ( + f'{schema["objects"]["entities"]["subject"]["name"]}-' + f'<{schema["objects"]["entities"]["subject"]["format"]}>' ) + paragraph += utils._link_with_html( + sub_string, + html_path=entities_path, + heading="sub", + pdf_format=pdf_format, + ) + paragraph += "/\n\t[" + ses_string = ( + f'{schema["objects"]["entities"]["session"]["name"]}-' + f'<{schema["objects"]["entities"]["session"]["format"]}>' + ) + paragraph += utils._link_with_html( + ses_string, + html_path=entities_path, + heading="ses", + pdf_format=pdf_format, + ) + paragraph += "/]\n" datatypes = schema.rules.datatypes for datatype in datatypes: - # XXX We should have a full rethink of the schema hierarchy... + # NOTE: We should have a full rethink of the schema hierarchy + # so that derivatives aren't treated like a "datatype" if datatype == "derivatives": continue - paragraph += "\t\t{}/\n".format(datatype) + + paragraph += "\t\t" + paragraph += utils._link_with_html( + datatype, + html_path=glossary_path, + heading=f"{datatype.lower()}-datatypes", + pdf_format=pdf_format, + ) + paragraph += "/\n" # Unique filename patterns for group in datatypes[datatype].values(): @@ -287,22 +341,41 @@ def make_filename_template(schema, n_dupes_to_combine=6, **kwargs): for ent in entity_order: if "enum" in schema["objects"]["entities"][ent].keys(): # Entity key-value pattern with specific allowed values - ent_format = "{}-<{}>".format( - schema["objects"]["entities"][ent]["name"], - "|".join(schema["objects"]["entities"][ent]["enum"]), + ent_format = ( + f'{schema["objects"]["entities"][ent]["name"]}-' + f'<{"|".join(schema["objects"]["entities"][ent]["enum"])}>' + ) + ent_format = utils._link_with_html( + ent_format, + html_path=entities_path, + heading=schema["objects"]["entities"][ent]["name"], + pdf_format=pdf_format, ) else: # Standard entity key-value pattern with simple label/index - ent_format = "{}-<{}>".format( + ent_format = utils._link_with_html( schema["objects"]["entities"][ent]["name"], + html_path=entities_path, + heading=schema["objects"]["entities"][ent]["name"], + pdf_format=pdf_format, + ) + ent_format += "-" + ent_format += "<" if pdf_format else "<" + ent_format += utils._link_with_html( schema["objects"]["entities"][ent].get("format", "label"), + html_path=glossary_path, + heading=( + f'{schema["objects"]["entities"][ent].get("format", "label")}-formats' + ), + pdf_format=pdf_format, ) + ent_format += ">" if pdf_format else ">" if ent in group["entities"]: if isinstance(group["entities"][ent], dict): if "enum" in group["entities"][ent].keys(): - # Overwrite the filename pattern based on the valid values - ent_format = "{}-<{}>".format( + # Overwrite the filename pattern using valid values + ent_format = "{}-<{}>".format( schema["objects"]["entities"][ent]["name"], "|".join(group["entities"][ent]["enum"]), ) @@ -318,17 +391,38 @@ def make_filename_template(schema, n_dupes_to_combine=6, **kwargs): # In cases of large numbers of suffixes, # we use the "suffix" variable and expect a table later in the spec if len(group["suffixes"]) >= n_dupes_to_combine: - suffix = "_" - string += suffix + string += "_" + string += "<" if pdf_format else "<" + string += utils._link_with_html( + "suffix", + html_path=glossary_path, + heading="suffix-common_principles", + pdf_format=pdf_format, + ) + string += ">" if pdf_format else ">" strings = [string] else: - strings = [string + "_" + suffix for suffix in group["suffixes"]] + strings = [] + for suffix in group["suffixes"]: + # The glossary indexes by the suffix identifier (TwoPE instead of 2PE), + # but the rules reference the actual suffix string (2PE instead of TwoPE), + # so we need to look it up. + suffix_id = [ + k for k, v in schema["objects"]["suffixes"].items() if v["value"] == suffix + ][0] + + suffix_string = utils._link_with_html( + suffix, + html_path=glossary_path, + heading=f"{suffix_id.lower()}-suffixes", + pdf_format=pdf_format, + ) + strings.append(f"{string}_{suffix_string}") # Add extensions full_strings = [] extensions = group["extensions"] extensions = [ext if ext != "*" else "." for ext in extensions] - extensions = utils.combine_extensions(extensions) if len(extensions) >= n_dupes_to_combine: # Combine exts when there are many, but keep JSON separate if ".json" in extensions: @@ -336,9 +430,32 @@ def make_filename_template(schema, n_dupes_to_combine=6, **kwargs): else: extensions = ["."] + ext_headings = [] + for extension in extensions: + # The glossary indexes by the extension identifier (niigz instead of .nii.gz), + # but the rules reference the actual suffix string (.nii.gz instead of niigz), + # so we need to look it up. + ext_id = [ + k + for k, v in schema["objects"]["extensions"].items() + if v["value"] == extension + ] + if ext_id: + ext_id = ext_id[0] + ext_headings.append(f"{ext_id.lower()}-extensions") + else: + ext_headings.append("extension-common_principles") + + extensions = utils.combine_extensions( + extensions, + html_path=glossary_path, + heading_lst=ext_headings, + pdf_format=pdf_format, + ) + for extension in extensions: for string in strings: - new_string = string + extension + new_string = f"{string}{extension}" full_strings.append(new_string) full_strings = sorted(full_strings) @@ -346,8 +463,16 @@ def make_filename_template(schema, n_dupes_to_combine=6, **kwargs): paragraph += "\n".join(full_strings) + "\n" paragraph = paragraph.rstrip() - codeblock = "Template:\n```Text\n" + paragraph + "\n```" + if pdf_format: + codeblock = f"Template:\n```Text\n{paragraph}\n```" + else: + codeblock = ( + f'Template:\n
{paragraph}\n
' + ) + codeblock = codeblock.expandtabs(4) + codeblock = codeblock.replace("SPEC_ROOT", get_relpath(src_path)) + return codeblock diff --git a/tools/schemacode/bidsschematools/tests/test_render.py b/tools/schemacode/bidsschematools/tests/test_render.py index 02d09f4fd6..cab6113f5b 100644 --- a/tools/schemacode/bidsschematools/tests/test_render.py +++ b/tools/schemacode/bidsschematools/tests/test_render.py @@ -72,7 +72,7 @@ def test_make_filename_template(schema_obj, schema_dir): * all files under the datatype rules subdirectory have corresponding entries. This may need to be updated for schema hierarchy changes. """ - filename_template = render.make_filename_template(schema_obj) + filename_template = render.make_filename_template(schema_obj, pdf_format=True) # Test predefined substrings expected_template_part = """ diff --git a/tools/schemacode/bidsschematools/tests/test_utils.py b/tools/schemacode/bidsschematools/tests/test_utils.py index c2d1c1be55..2261e57178 100644 --- a/tools/schemacode/bidsschematools/tests/test_utils.py +++ b/tools/schemacode/bidsschematools/tests/test_utils.py @@ -5,7 +5,7 @@ def test_combine_extensions(): """A unit test for utils.combine_extensions.""" test_extensions = ["nii.gz", "nii", "json"] target_combined = ["nii[.gz]", "json"] - test_combined = utils.combine_extensions(test_extensions) + test_combined = utils.combine_extensions(test_extensions, pdf_format=True) assert test_combined == target_combined diff --git a/tools/schemacode/bidsschematools/utils.py b/tools/schemacode/bidsschematools/utils.py index 7099c5e820..3eaefcd8f8 100644 --- a/tools/schemacode/bidsschematools/utils.py +++ b/tools/schemacode/bidsschematools/utils.py @@ -16,7 +16,36 @@ def get_schema_path(): return op.abspath(op.join(op.dirname(__file__), "data", "schema")) -def combine_extensions(lst): +def _link_with_html(string, html_path=None, heading=None, pdf_format=False): + """Wrap a string in an HTML hyperlink. + + Parameters + ---------- + string : str + The string to wrap a hyperlink around. + html_path : None or str, optional + Path to the HTML file that the string should link to. + heading : None or str, optional + The heading on the HTML page the string should link to. + pdf_format : bool, optional + If True, the string will be returned unmodified. + If False, a hyperlink will be generated around the string, + linking to the ``heading`` heading in the ``html_path`` page. + Default is False. + + Returns + ------- + string : str + The modified (or unmodified) string. + """ + if not pdf_format: + string = string.replace("<", "<").replace(">", ">") + string = f'{string}' + + return string + + +def combine_extensions(lst, html_path=None, heading_lst=None, pdf_format=True): """Combine extensions with their compressed versions in a list. Valid combinations are hardcoded in the function, @@ -26,6 +55,22 @@ def combine_extensions(lst): ---------- lst : list of str Raw list of extensions. + html_path : None or str + Path to the HTML file that each extension should link to. + Only used if pdf_format is False. + Default is None. + heading_lst : None or list of str + List of headings in the HTML page to link to. + Should be one heading for each extension in lst. + Only used if pdf_format is False. + Default is None. + pdf_format : bool, optional + If True, the extensions will be compiled as markdown strings, + without any hyperlinks, so that the specification's PDF build will look right. + If False, the extensions will use HTML and include hyperlinks to the their + associated glossary entries. + This works on the website. + Default is True. Returns ------- @@ -34,20 +79,48 @@ def combine_extensions(lst): combined. """ COMPRESSION_EXTENSIONS = [".gz"] + if pdf_format and not heading_lst: + heading_lst = lst[:] new_lst = [] items_to_remove = [] - for item in lst: + for i_item, item in enumerate(lst): for ext in COMPRESSION_EXTENSIONS: if item.endswith(ext) and item.replace(ext, "") in lst: - temp_item = item.replace(ext, "") + "[" + ext + "]" + base_item_idx = lst.index(item.replace(ext, "")) + temp_item = _link_with_html( + lst[base_item_idx], + html_path=html_path, + heading=heading_lst[base_item_idx].lower(), + pdf_format=pdf_format, + ) + ext_string = _link_with_html( + ext, + html_path=html_path, + heading=heading_lst[i_item].lower(), + pdf_format=pdf_format, + ) + + temp_item = temp_item + "[" + ext_string + "]" new_lst.append(temp_item) items_to_remove.append(item) items_to_remove.append(item.replace(ext, "")) continue + heading_lst = [head for i, head in enumerate(heading_lst) if lst[i] not in items_to_remove] items_to_add = [item for item in lst if item not in items_to_remove] - new_lst += items_to_add + item_strings_to_add = [] + for i_item, item in enumerate(items_to_add): + item_strings_to_add.append( + _link_with_html( + item, + html_path=html_path, + heading=heading_lst[i_item], + pdf_format=pdf_format, + ) + ) + + new_lst += item_strings_to_add return new_lst