diff --git a/nf_core/__main__.py b/nf_core/__main__.py index 0efea13ec..3f1722105 100644 --- a/nf_core/__main__.py +++ b/nf_core/__main__.py @@ -1231,11 +1231,14 @@ def command_modules_test(ctx, tool, directory, no_prompts, update, once, profile is_flag=True, help="Fix the module version if a newer version is available", ) -def command_modules_lint(ctx, tool, directory, registry, key, all, fail_warned, local, passed, sort_by, fix_version): +@click.option("--fix", is_flag=True, help="Fix all linting tests if possible.") +def command_modules_lint( + ctx, tool, directory, registry, key, all, fail_warned, local, passed, sort_by, fix_version, fix +): """ Lint one or more modules in a directory. """ - modules_lint(ctx, tool, directory, registry, key, all, fail_warned, local, passed, sort_by, fix_version) + modules_lint(ctx, tool, directory, registry, key, all, fail_warned, local, passed, sort_by, fix_version, fix) # nf-core modules info diff --git a/nf_core/commands_modules.py b/nf_core/commands_modules.py index 57c8e9777..33b1f7516 100644 --- a/nf_core/commands_modules.py +++ b/nf_core/commands_modules.py @@ -261,7 +261,7 @@ def modules_test(ctx, tool, directory, no_prompts, update, once, profile, migrat sys.exit(1) -def modules_lint(ctx, tool, directory, registry, key, all, fail_warned, local, passed, sort_by, fix_version): +def modules_lint(ctx, tool, directory, registry, key, all, fail_warned, local, passed, sort_by, fix_version, fix): """ Lint one or more modules in a directory. @@ -278,6 +278,7 @@ def modules_lint(ctx, tool, directory, registry, key, all, fail_warned, local, p module_lint = ModuleLint( directory, fail_warned=fail_warned, + fix=fix, registry=ctx.params["registry"], remote_url=ctx.obj["modules_repo_url"], branch=ctx.obj["modules_repo_branch"], diff --git a/nf_core/components/components_utils.py b/nf_core/components/components_utils.py index c37de84f6..67e05e0ce 100644 --- a/nf_core/components/components_utils.py +++ b/nf_core/components/components_utils.py @@ -4,6 +4,7 @@ from typing import TYPE_CHECKING, List, Optional, Tuple, Union import questionary +import requests import rich.prompt if TYPE_CHECKING: @@ -162,3 +163,29 @@ def get_components_to_install(subworkflow_dir: Union[str, Path]) -> Tuple[List[s elif link.startswith("../"): subworkflows.append(name.lower()) return modules, subworkflows + + +def get_biotools_id(tool_name) -> str: + """ + Try to find a bio.tools ID for 'tool' + """ + url = f"https://bio.tools/api/t/?q={tool_name}&format=json" + try: + # Send a GET request to the API + response = requests.get(url) + response.raise_for_status() # Raise an error for bad status codes + # Parse the JSON response + data = response.json() + + # Iterate through the tools in the response to find the tool name + for tool in data["list"]: + if tool["name"].lower() == tool_name: + return tool["biotoolsCURIE"] + + # If the tool name was not found in the response + log.warning(f"Could not find a bio.tools ID for '{tool_name}'") + return "" + + except requests.exceptions.RequestException as e: + log.warning(f"Could not find a bio.tools ID for '{tool_name}': {e}") + return "" diff --git a/nf_core/components/create.py b/nf_core/components/create.py index c71b12841..c0095da23 100644 --- a/nf_core/components/create.py +++ b/nf_core/components/create.py @@ -21,6 +21,7 @@ import nf_core import nf_core.utils from nf_core.components.components_command import ComponentCommand +from nf_core.components.components_utils import get_biotools_id from nf_core.pipelines.lint_utils import run_prettier_on_file log = logging.getLogger(__name__) @@ -61,6 +62,7 @@ def __init__( self.file_paths: Dict[str, Path] = {} self.not_empty_template = not empty_template self.migrate_pytest = migrate_pytest + self.tool_identifier = "" def create(self) -> bool: """ @@ -149,6 +151,8 @@ def create(self) -> bool: if self.component_type == "modules": # Try to find a bioconda package for 'component' self._get_bioconda_tool() + # Try to find a biotools entry for 'component' + self.tool_identifier = get_biotools_id(self.component) # Prompt for GitHub username self._get_username() diff --git a/nf_core/components/lint/__init__.py b/nf_core/components/lint/__init__.py index c1b1f24cb..fcc3b414d 100644 --- a/nf_core/components/lint/__init__.py +++ b/nf_core/components/lint/__init__.py @@ -57,6 +57,7 @@ def __init__( component_type: str, directory: Union[str, Path], fail_warned: bool = False, + fix: bool = False, remote_url: Optional[str] = None, branch: Optional[str] = None, no_pull: bool = False, @@ -73,6 +74,7 @@ def __init__( ) self.fail_warned = fail_warned + self.fix = fix self.passed: List[LintResult] = [] self.warned: List[LintResult] = [] self.failed: List[LintResult] = [] diff --git a/nf_core/components/nfcore_component.py b/nf_core/components/nfcore_component.py index 0f3cdcdfb..84c9a651e 100644 --- a/nf_core/components/nfcore_component.py +++ b/nf_core/components/nfcore_component.py @@ -49,7 +49,7 @@ def __init__( self.passed: List[Tuple[str, str, Path]] = [] self.warned: List[Tuple[str, str, Path]] = [] self.failed: List[Tuple[str, str, Path]] = [] - self.inputs: List[str] = [] + self.inputs: List[list[dict[str, dict[str, str]]]] = [] self.outputs: List[str] = [] self.has_meta: bool = False self.git_sha: Optional[str] = None @@ -170,7 +170,7 @@ def _get_included_components_in_chained_tests(self, main_nf_test: Union[Path, st def get_inputs_from_main_nf(self) -> None: """Collect all inputs from the main.nf file.""" - inputs: List[str] = [] + inputs: list[list[dict[str, dict[str, str]]]] = [] with open(self.main_nf) as f: data = f.read() # get input values from main.nf after "input:", which can be formatted as tuple val(foo) path(bar) or val foo or val bar or path bar or path foo @@ -184,16 +184,22 @@ def get_inputs_from_main_nf(self) -> None: # don't match anything inside comments or after "output:" if "input:" not in data: log.debug(f"Could not find any inputs in {self.main_nf}") + return input_data = data.split("input:")[1].split("output:")[0] - regex = r"(val|path)\s*(\(([^)]+)\)|\s*([^)\s,]+))" - matches = re.finditer(regex, input_data, re.MULTILINE) - for _, match in enumerate(matches, start=1): - if match.group(3): - input_val = match.group(3).split(",")[0] # handle `files, stageAs: "inputs/*"` cases - inputs.append(input_val) - elif match.group(4): - input_val = match.group(4).split(",")[0] # handle `files, stageAs: "inputs/*"` cases - inputs.append(input_val) + for line in input_data.split("\n"): + channel_elements: list[dict[str, dict[str, str]]] = [] + regex = r"(val|path)\s*(\(([^)]+)\)|\s*([^)\s,]+))" + matches = re.finditer(regex, line) + for _, match in enumerate(matches, start=1): + input_val = None + if match.group(3): + input_val = match.group(3).split(",")[0] # handle `files, stageAs: "inputs/*"` cases + elif match.group(4): + input_val = match.group(4).split(",")[0] # handle `files, stageAs: "inputs/*"` cases + if input_val: + channel_elements.append({input_val: {}}) + if len(channel_elements) > 0: + inputs.append(channel_elements) log.debug(f"Found {len(inputs)} inputs in {self.main_nf}") self.inputs = inputs @@ -206,9 +212,23 @@ def get_outputs_from_main_nf(self): log.debug(f"Could not find any outputs in {self.main_nf}") return outputs output_data = data.split("output:")[1].split("when:")[0] - regex = r"emit:\s*([^)\s,]+)" - matches = re.finditer(regex, output_data, re.MULTILINE) - for _, match in enumerate(matches, start=1): - outputs.append(match.group(1)) + regex_emit = r"emit:\s*([^)\s,]+)" + regex_elements = r"(val|path|env|stdout)\s*(\(([^)]+)\)|\s*([^)\s,]+))" + for line in output_data.split("\n"): + match_emit = re.search(regex_emit, line) + matches_elements = re.finditer(regex_elements, line) + if not match_emit: + continue + output_channel = {match_emit.group(1): []} + for _, match_element in enumerate(matches_elements, start=1): + output_val = None + if match_element.group(3): + output_val = match_element.group(3) + elif match_element.group(4): + output_val = match_element.group(4) + if output_val: + output_val = output_val.strip("'").strip('"') # remove quotes + output_channel[match_emit.group(1)].append({output_val: {}}) + outputs.append(output_channel) log.debug(f"Found {len(outputs)} outputs in {self.main_nf}") self.outputs = outputs diff --git a/nf_core/module-template/meta.yml b/nf_core/module-template/meta.yml index 9d3f3c1c1..c7c16dcb3 100644 --- a/nf_core/module-template/meta.yml +++ b/nf_core/module-template/meta.yml @@ -20,48 +20,67 @@ tools: tool_dev_url: "{{ tool_dev_url }}" doi: "" licence: {{ tool_licence }} + identifier: {{ tool_identifier }} {% if not_empty_template -%} ## TODO nf-core: Add a description of all of the variables used as input {% endif -%} input: #{% if has_meta %} Only when we have meta - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` {% endif %} {% if not_empty_template -%} ## TODO nf-core: Delete / customise this example input {%- endif %} - - {{ 'bam:' if not_empty_template else "input:" }} - type: file - description: {{ 'Sorted BAM/CRAM/SAM file' if not_empty_template else "" }} - pattern: {{ '"*.{bam,cram,sam}"' if not_empty_template else "" }} + - {{ 'bam:' if not_empty_template else "input:" }} + type: file + description: {{ 'Sorted BAM/CRAM/SAM file' if not_empty_template else "" }} + pattern: {{ '"*.{bam,cram,sam}"' if not_empty_template else "" }} + ontologies: + {% if not_empty_template -%} + - edam: "http://edamontology.org/format_25722" + - edam: "http://edamontology.org/format_2573" + - edam: "http://edamontology.org/format_3462" + {% else %} + - edam: "" + {%- endif %} {% if not_empty_template -%} ## TODO nf-core: Add a description of all of the variables used as output {% endif -%} output: - #{% if has_meta -%} Only when we have meta - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - {% endif %} - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - "versions.yml": + type: file + description: File containing software versions + pattern: "versions.yml" + - {{ 'bam:' if not_empty_template else "output:" }} + #{% if has_meta -%} Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + {%- endif %} {% if not_empty_template -%} - ## TODO nf-core: Delete / customise this example output + ## TODO nf-core: Delete / customise this example output {%- endif %} - - {{ 'bam:' if not_empty_template else "output:" }} - type: file - description: {{ 'Sorted BAM/CRAM/SAM file' if not_empty_template else "" }} - pattern: {{ '"*.{bam,cram,sam}"' if not_empty_template else "" }} + - {{ '"*.bam":' if not_empty_template else '"*":' }} + type: file + description: {{ 'Sorted BAM/CRAM/SAM file' if not_empty_template else "" }} + pattern: {{ '"*.{bam,cram,sam}"' if not_empty_template else "" }} + ontologies: + {% if not_empty_template -%} + - edam: "http://edamontology.org/format_25722" + - edam: "http://edamontology.org/format_2573" + - edam: "http://edamontology.org/format_3462" + {% else -%} + - edam: "" + {%- endif %} authors: - "{{ author }}" diff --git a/nf_core/modules/lint/__init__.py b/nf_core/modules/lint/__init__.py index 017b3965b..49012cff4 100644 --- a/nf_core/modules/lint/__init__.py +++ b/nf_core/modules/lint/__init__.py @@ -14,20 +14,22 @@ import questionary import rich import rich.progress +import ruamel.yaml import nf_core.components import nf_core.components.nfcore_component import nf_core.modules.modules_utils import nf_core.utils +from nf_core.components.components_utils import get_biotools_id from nf_core.components.lint import ComponentLint, LintExceptionError, LintResult from nf_core.components.nfcore_component import NFCoreComponent -from nf_core.pipelines.lint_utils import console +from nf_core.pipelines.lint_utils import console, run_prettier_on_file log = logging.getLogger(__name__) from .environment_yml import environment_yml from .main_nf import main_nf -from .meta_yml import meta_yml +from .meta_yml import meta_yml, obtain_correct_and_specified_inputs, obtain_correct_and_specified_outputs, read_meta_yml from .module_changes import module_changes from .module_deprecations import module_deprecations from .module_patch import module_patch @@ -46,6 +48,9 @@ class ModuleLint(ComponentLint): environment_yml = environment_yml main_nf = main_nf meta_yml = meta_yml + obtain_correct_and_specified_inputs = obtain_correct_and_specified_inputs + obtain_correct_and_specified_outputs = obtain_correct_and_specified_outputs + read_meta_yml = read_meta_yml module_changes = module_changes module_deprecations = module_deprecations module_patch = module_patch @@ -57,6 +62,7 @@ def __init__( self, directory: Union[str, Path], fail_warned: bool = False, + fix: bool = False, remote_url: Optional[str] = None, branch: Optional[str] = None, no_pull: bool = False, @@ -67,6 +73,7 @@ def __init__( component_type="modules", directory=directory, fail_warned=fail_warned, + fix=fix, remote_url=remote_url, branch=branch, no_pull=no_pull, @@ -237,6 +244,12 @@ def lint_module( # Otherwise run all the lint tests else: + mod.get_inputs_from_main_nf() + mod.get_outputs_from_main_nf() + # Update meta.yml file if requested + if self.fix: + self.update_meta_yml_file(mod) + if self.repo_type == "pipeline" and self.modules_json and mod.repo_url: # Set correct sha version = self.modules_json.get_module_version(mod.component_name, mod.repo_url, mod.org) @@ -256,3 +269,104 @@ def lint_module( self.failed += warned self.failed += [LintResult(mod, *m) for m in mod.failed] + + def update_meta_yml_file(self, mod): + """ + Update the meta.yml file with the correct inputs and outputs + """ + meta_yml = self.read_meta_yml(mod) + corrected_meta_yml = meta_yml.copy() + yaml = ruamel.yaml.YAML() + yaml.preserve_quotes = True + yaml.indent(mapping=2, sequence=2, offset=0) + + # Obtain inputs and outputs from main.nf and meta.yml + # Used to compare only the structure of channels and elements + # Do not compare features to allow for custom features in meta.yml (i.e. pattern) + if "input" in meta_yml: + correct_inputs, meta_inputs = self.obtain_correct_and_specified_inputs(mod, meta_yml) + if "output" in meta_yml: + correct_outputs, meta_outputs = self.obtain_correct_and_specified_outputs(mod, meta_yml) + + if "input" in meta_yml and correct_inputs != meta_inputs: + log.debug( + f"Correct inputs: '{correct_inputs}' differ from current inputs: '{meta_inputs}' in '{mod.meta_yml}'" + ) + corrected_meta_yml["input"] = mod.inputs.copy() # list of lists (channels) of dicts (elements) + for i, channel in enumerate(corrected_meta_yml["input"]): + for j, element in enumerate(channel): + element_name = list(element.keys())[0] + for k, meta_element in enumerate(meta_yml["input"]): + try: + # Handle old format of meta.yml: list of dicts (channels) + if element_name in meta_element.keys(): + # Copy current features of that input element form meta.yml + for feature in meta_element[element_name].keys(): + if feature not in element[element_name].keys(): + corrected_meta_yml["input"][i][j][element_name][feature] = meta_element[ + element_name + ][feature] + break + except AttributeError: + # Handle new format of meta.yml: list of lists (channels) of elements (dicts) + for x, meta_ch_element in enumerate(meta_element): + if element_name in meta_ch_element.keys(): + # Copy current features of that input element form meta.yml + for feature in meta_element[x][element_name].keys(): + if feature not in element[element_name].keys(): + corrected_meta_yml["input"][i][j][element_name][feature] = meta_element[x][ + element_name + ][feature] + break + + if "output" in meta_yml and correct_outputs != meta_outputs: + log.debug( + f"Correct outputs: '{correct_outputs}' differ from current outputs: '{meta_outputs}' in '{mod.meta_yml}'" + ) + corrected_meta_yml["output"] = mod.outputs.copy() # list of dicts (channels) with list of dicts (elements) + for i, channel in enumerate(corrected_meta_yml["output"]): + ch_name = list(channel.keys())[0] + for j, element in enumerate(channel[ch_name]): + element_name = list(element.keys())[0] + for k, meta_element in enumerate(meta_yml["output"]): + if element_name in meta_element.keys(): + # Copy current features of that output element form meta.yml + for feature in meta_element[element_name].keys(): + if feature not in element[element_name].keys(): + corrected_meta_yml["output"][i][ch_name][j][element_name][feature] = meta_element[ + element_name + ][feature] + break + elif ch_name in meta_element.keys(): + # When the previous output element was using the name of the channel + # Copy current features of that output element form meta.yml + try: + # Handle old format of meta.yml + for feature in meta_element[ch_name].keys(): + if feature not in element[element_name].keys(): + corrected_meta_yml["output"][i][ch_name][j][element_name][feature] = ( + meta_element[ch_name][feature] + ) + except AttributeError: + # Handle new format of meta.yml + for x, meta_ch_element in enumerate(meta_element[ch_name]): + for meta_ch_element_name in meta_ch_element.keys(): + for feature in meta_ch_element[meta_ch_element_name].keys(): + if feature not in element[element_name].keys(): + corrected_meta_yml["output"][i][ch_name][j][element_name][feature] = ( + meta_ch_element[meta_ch_element_name][feature] + ) + break + + # Add bio.tools identifier + for i, tool in enumerate(corrected_meta_yml["tools"]): + tool_name = list(tool.keys())[0] + if "identifier" not in tool[tool_name]: + corrected_meta_yml["tools"][i][tool_name]["identifier"] = get_biotools_id( + mod.component_name if "/" not in mod.component_name else mod.component_name.split("/")[0] + ) + + with open(mod.meta_yml, "w") as fh: + log.info(f"Updating {mod.meta_yml}") + yaml.dump(corrected_meta_yml, fh) + run_prettier_on_file(fh.name) diff --git a/nf_core/modules/lint/meta_yml.py b/nf_core/modules/lint/meta_yml.py index 4a0ef6e01..4f7ffd073 100644 --- a/nf_core/modules/lint/meta_yml.py +++ b/nf_core/modules/lint/meta_yml.py @@ -1,13 +1,17 @@ import json +import logging from pathlib import Path +from typing import Union -import yaml +import ruamel.yaml from jsonschema import exceptions, validators from nf_core.components.lint import ComponentLint, LintExceptionError from nf_core.components.nfcore_component import NFCoreComponent from nf_core.modules.modules_differ import ModulesDiffer +log = logging.getLogger(__name__) + def meta_yml(module_lint_object: ComponentLint, module: NFCoreComponent) -> None: """ @@ -39,10 +43,8 @@ def meta_yml(module_lint_object: ComponentLint, module: NFCoreComponent) -> None """ - module.get_inputs_from_main_nf() - module.get_outputs_from_main_nf() # Check if we have a patch file, get original file in that case - meta_yaml = None + meta_yaml = read_meta_yml(module_lint_object, module) if module.is_patched and module_lint_object.modules_repo.repo_path is not None: lines = ModulesDiffer.try_apply_patch( module.component_name, @@ -52,17 +54,15 @@ def meta_yml(module_lint_object: ComponentLint, module: NFCoreComponent) -> None reverse=True, ).get("meta.yml") if lines is not None: + yaml = ruamel.yaml.YAML() meta_yaml = yaml.safe_load("".join(lines)) if module.meta_yml is None: raise LintExceptionError("Module does not have a `meta.yml` file") if meta_yaml is None: - try: - with open(module.meta_yml) as fh: - meta_yaml = yaml.safe_load(fh) - module.passed.append(("meta_yml_exists", "Module `meta.yml` exists", module.meta_yml)) - except FileNotFoundError: - module.failed.append(("meta_yml_exists", "Module `meta.yml` does not exist", module.meta_yml)) - return + module.failed.append(("meta_yml_exists", "Module `meta.yml` does not exist", module.meta_yml)) + return + else: + module.passed.append(("meta_yml_exists", "Module `meta.yml` exists", module.meta_yml)) # Confirm that the meta.yml file is valid according to the JSON schema valid_meta_yml = False @@ -93,93 +93,181 @@ def meta_yml(module_lint_object: ComponentLint, module: NFCoreComponent) -> None ) ) - # Confirm that all input and output channels are specified + # Confirm that all input and output channels are correctly specified if valid_meta_yml: + # Check that inputs are specified in meta.yml + if len(module.inputs) > 0 and "input" not in meta_yaml: + module.failed.append( + ( + "meta_input", + "Inputs not specified in module `meta.yml`", + module.meta_yml, + ) + ) + elif len(module.inputs) > 0: + module.passed.append( + ( + "meta_input", + "Inputs specified in module `meta.yml`", + module.meta_yml, + ) + ) + else: + log.debug(f"No inputs specified in module `main.nf`: {module.component_name}") + # Check that all inputs are correctly specified if "input" in meta_yaml: - meta_input = [list(x.keys())[0] for x in meta_yaml["input"]] - for input in module.inputs: - if input in meta_input: - module.passed.append(("meta_input_main_only", f"`{input}` specified", module.meta_yml)) - else: - module.warned.append( - ( - "meta_input_main_only", - f"`{input}` is present as an input in the `main.nf`, but missing in `meta.yml`", - module.meta_yml, - ) - ) - # check if there are any inputs in meta.yml that are not in main.nf - for input in meta_input: - if input in module.inputs: - module.passed.append( - ( - "meta_input_meta_only", - f"`{input}` is present as an input in `meta.yml` and `main.nf`", - module.meta_yml, - ) - ) - else: - module.warned.append( - ( - "meta_input_meta_only", - f"`{input}` is present as an input in `meta.yml` but not in `main.nf`", - module.meta_yml, - ) - ) + correct_inputs, meta_inputs = obtain_correct_and_specified_inputs(module_lint_object, module, meta_yaml) - if "output" in meta_yaml and meta_yaml["output"] is not None: - meta_output = [list(x.keys())[0] for x in meta_yaml["output"]] - for output in module.outputs: - if output in meta_output: - module.passed.append(("meta_output_main_only", f"`{output}` specified", module.meta_yml)) - else: - module.warned.append( - ( - "meta_output_main_only", - f"`{output}` is present as an output in the `main.nf`, but missing in `meta.yml`", - module.meta_yml, - ) - ) - # check if there are any outputs in meta.yml that are not in main.nf - for output in meta_output: - if output in module.outputs: - module.passed.append( - ( - "meta_output_meta_only", - f"`{output}` is present as an output in `meta.yml` and `main.nf`", - module.meta_yml, - ) + if correct_inputs == meta_inputs: + module.passed.append( + ( + "correct_meta_inputs", + "Correct inputs specified in module `meta.yml`", + module.meta_yml, ) - elif output == "meta": - module.passed.append( - ( - "meta_output_meta_only", - f"`{output}` is skipped for `meta.yml` outputs", - module.meta_yml, - ) - ) - else: - module.warned.append( - ( - "meta_output_meta_only", - f"`{output}` is present as an output in `meta.yml` but not in `main.nf`", - module.meta_yml, - ) + ) + else: + module.failed.append( + ( + "correct_meta_inputs", + f"Module `meta.yml` does not match `main.nf`. Inputs should contain: {correct_inputs}\nRun `nf-core modules lint --fix` to update the `meta.yml` file.", + module.meta_yml, ) - # confirm that the name matches the process name in main.nf - if meta_yaml["name"].upper() == module.process_name: - module.passed.append( + ) + + # Check that outputs are specified in meta.yml + if len(module.outputs) > 0 and "output" not in meta_yaml: + module.failed.append( ( - "meta_name", - "Correct name specified in `meta.yml`.", + "meta_output", + "Outputs not specified in module `meta.yml`", module.meta_yml, ) ) - else: - module.failed.append( + elif len(module.outputs) > 0: + module.passed.append( ( - "meta_name", - f"Conflicting `process` name between meta.yml (`{meta_yaml['name']}`) and main.nf (`{module.process_name}`)", + "meta_output", + "Outputs specified in module `meta.yml`", module.meta_yml, ) ) + # Check that all outputs are correctly specified + if "output" in meta_yaml: + correct_outputs, meta_outputs = obtain_correct_and_specified_outputs(module_lint_object, module, meta_yaml) + + if correct_outputs == meta_outputs: + module.passed.append( + ( + "correct_meta_outputs", + "Correct outputs specified in module `meta.yml`", + module.meta_yml, + ) + ) + else: + module.failed.append( + ( + "correct_meta_outputs", + f"Module `meta.yml` does not match `main.nf`. Outputs should contain: {correct_outputs}\nRun `nf-core modules lint --fix` to update the `meta.yml` file.", + module.meta_yml, + ) + ) + + +def read_meta_yml(module_lint_object: ComponentLint, module: NFCoreComponent) -> Union[dict, None]: + """ + Read a `meta.yml` file and return it as a dictionary + + Args: + module_lint_object (ComponentLint): The lint object for the module + module (NFCoreComponent): The module to read + + Returns: + dict: The `meta.yml` file as a dictionary + """ + meta_yaml = None + yaml = ruamel.yaml.YAML() + yaml.preserve_quotes = True + # Check if we have a patch file, get original file in that case + if module.is_patched: + lines = ModulesDiffer.try_apply_patch( + module.component_name, + module_lint_object.modules_repo.repo_path, + module.patch_path, + Path(module.component_dir).relative_to(module.base_dir), + reverse=True, + ).get("meta.yml") + if lines is not None: + meta_yaml = yaml.load("".join(lines)) + if meta_yaml is None: + if module.meta_yml is None: + return None + with open(module.meta_yml) as fh: + meta_yaml = yaml.load(fh) + return meta_yaml + + +def obtain_correct_and_specified_inputs(_, module, meta_yaml): + """ + Obtain the list of correct inputs and the elements of each input channel. + + Args: + module (object): The module object. + meta_yaml (dict): The meta.yml dictionary. + + Returns: + tuple: A tuple containing two lists. The first list contains the correct inputs, + and the second list contains the inputs specified in meta.yml. + """ + correct_inputs = [] + for input_channel in module.inputs: + channel_elements = [] + for element in input_channel: + channel_elements.append(list(element.keys())[0]) + correct_inputs.append(channel_elements) + + meta_inputs = [] + for input_channel in meta_yaml["input"]: + if isinstance(input_channel, list): # Correct format + channel_elements = [] + for element in input_channel: + channel_elements.append(list(element.keys())[0]) + meta_inputs.append(channel_elements) + elif isinstance(input_channel, dict): # Old format + meta_inputs.append(list(input_channel.keys())[0]) + + return correct_inputs, meta_inputs + + +def obtain_correct_and_specified_outputs(_, module, meta_yaml): + """ + Obtain the dictionary of correct outputs and elements of each output channel. + + Args: + module (object): The module object. + meta_yaml (dict): The meta.yml dictionary. + + Returns: + correct_outputs (dict): A dictionary containing the correct outputs and their elements. + meta_outputs (dict): A dictionary containing the outputs specified in meta.yml. + """ + correct_outputs = {} + for output_channel in module.outputs: + channel_name = list(output_channel.keys())[0] + channel_elements = [] + for element in output_channel[channel_name]: + channel_elements.append(list(element.keys())[0]) + correct_outputs[channel_name] = channel_elements + + meta_outputs = {} + for output_channel in meta_yaml["output"]: + channel_name = list(output_channel.keys())[0] + if isinstance(output_channel[channel_name], list): # Correct format + channel_elements = [] + for element in output_channel[channel_name]: + channel_elements.append(list(element.keys())[0]) + meta_outputs[channel_name] = channel_elements + elif isinstance(output_channel[channel_name], dict): # Old format + meta_outputs[channel_name] = [] + + return correct_outputs, meta_outputs diff --git a/requirements.txt b/requirements.txt index eba6460f0..f167a5580 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,3 +22,4 @@ tabulate textual==0.71.0 trogon pdiff +ruamel.yaml diff --git a/tests/modules/test_lint.py b/tests/modules/test_lint.py index 07e12924c..f4adedfcb 100644 --- a/tests/modules/test_lint.py +++ b/tests/modules/test_lint.py @@ -210,6 +210,14 @@ def test_modules_lint_new_modules(self): assert len(module_lint.passed) > 0 assert len(module_lint.warned) >= 0 + def test_modules_lint_update_meta_yml(self): + """update the meta.yml of a module""" + module_lint = nf_core.modules.ModuleLint(directory=self.nfcore_modules, fix=True) + module_lint.lint(print_results=False, module="fastqc") + assert len(module_lint.failed) == 0, f"Linting failed with {[x.__dict__ for x in module_lint.failed]}" + assert len(module_lint.passed) > 0 + assert len(module_lint.warned) >= 0 + def test_modules_lint_no_gitlab(self): """Test linting a pipeline with no modules installed""" self.mods_remove.remove("fastqc", force=True)