Skip to content

Commit

Permalink
Merge pull request #3032 from mirpedrol/modules-yml-ontology
Browse files Browse the repository at this point in the history
Modules meta.yml ontology
  • Loading branch information
mirpedrol committed Sep 20, 2024
2 parents b2e6397 + c8c4fbe commit 282e8fe
Show file tree
Hide file tree
Showing 11 changed files with 419 additions and 132 deletions.
7 changes: 5 additions & 2 deletions nf_core/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1231,11 +1231,14 @@ def command_modules_test(ctx, tool, directory, no_prompts, update, once, profile
is_flag=True,
help="Fix the module version if a newer version is available",
)
def command_modules_lint(ctx, tool, directory, registry, key, all, fail_warned, local, passed, sort_by, fix_version):
@click.option("--fix", is_flag=True, help="Fix all linting tests if possible.")
def command_modules_lint(
ctx, tool, directory, registry, key, all, fail_warned, local, passed, sort_by, fix_version, fix
):
"""
Lint one or more modules in a directory.
"""
modules_lint(ctx, tool, directory, registry, key, all, fail_warned, local, passed, sort_by, fix_version)
modules_lint(ctx, tool, directory, registry, key, all, fail_warned, local, passed, sort_by, fix_version, fix)


# nf-core modules info
Expand Down
3 changes: 2 additions & 1 deletion nf_core/commands_modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ def modules_test(ctx, tool, directory, no_prompts, update, once, profile, migrat
sys.exit(1)


def modules_lint(ctx, tool, directory, registry, key, all, fail_warned, local, passed, sort_by, fix_version):
def modules_lint(ctx, tool, directory, registry, key, all, fail_warned, local, passed, sort_by, fix_version, fix):
"""
Lint one or more modules in a directory.
Expand All @@ -278,6 +278,7 @@ def modules_lint(ctx, tool, directory, registry, key, all, fail_warned, local, p
module_lint = ModuleLint(
directory,
fail_warned=fail_warned,
fix=fix,
registry=ctx.params["registry"],
remote_url=ctx.obj["modules_repo_url"],
branch=ctx.obj["modules_repo_branch"],
Expand Down
27 changes: 27 additions & 0 deletions nf_core/components/components_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import TYPE_CHECKING, List, Optional, Tuple, Union

import questionary
import requests
import rich.prompt

if TYPE_CHECKING:
Expand Down Expand Up @@ -162,3 +163,29 @@ def get_components_to_install(subworkflow_dir: Union[str, Path]) -> Tuple[List[s
elif link.startswith("../"):
subworkflows.append(name.lower())
return modules, subworkflows


def get_biotools_id(tool_name) -> str:
"""
Try to find a bio.tools ID for 'tool'
"""
url = f"https://bio.tools/api/t/?q={tool_name}&format=json"
try:
# Send a GET request to the API
response = requests.get(url)
response.raise_for_status() # Raise an error for bad status codes
# Parse the JSON response
data = response.json()

# Iterate through the tools in the response to find the tool name
for tool in data["list"]:
if tool["name"].lower() == tool_name:
return tool["biotoolsCURIE"]

# If the tool name was not found in the response
log.warning(f"Could not find a bio.tools ID for '{tool_name}'")
return ""

except requests.exceptions.RequestException as e:
log.warning(f"Could not find a bio.tools ID for '{tool_name}': {e}")
return ""
4 changes: 4 additions & 0 deletions nf_core/components/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import nf_core
import nf_core.utils
from nf_core.components.components_command import ComponentCommand
from nf_core.components.components_utils import get_biotools_id
from nf_core.pipelines.lint_utils import run_prettier_on_file

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -61,6 +62,7 @@ def __init__(
self.file_paths: Dict[str, Path] = {}
self.not_empty_template = not empty_template
self.migrate_pytest = migrate_pytest
self.tool_identifier = ""

def create(self) -> bool:
"""
Expand Down Expand Up @@ -149,6 +151,8 @@ def create(self) -> bool:
if self.component_type == "modules":
# Try to find a bioconda package for 'component'
self._get_bioconda_tool()
# Try to find a biotools entry for 'component'
self.tool_identifier = get_biotools_id(self.component)

# Prompt for GitHub username
self._get_username()
Expand Down
2 changes: 2 additions & 0 deletions nf_core/components/lint/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def __init__(
component_type: str,
directory: Union[str, Path],
fail_warned: bool = False,
fix: bool = False,
remote_url: Optional[str] = None,
branch: Optional[str] = None,
no_pull: bool = False,
Expand All @@ -73,6 +74,7 @@ def __init__(
)

self.fail_warned = fail_warned
self.fix = fix
self.passed: List[LintResult] = []
self.warned: List[LintResult] = []
self.failed: List[LintResult] = []
Expand Down
50 changes: 35 additions & 15 deletions nf_core/components/nfcore_component.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def __init__(
self.passed: List[Tuple[str, str, Path]] = []
self.warned: List[Tuple[str, str, Path]] = []
self.failed: List[Tuple[str, str, Path]] = []
self.inputs: List[str] = []
self.inputs: List[list[dict[str, dict[str, str]]]] = []
self.outputs: List[str] = []
self.has_meta: bool = False
self.git_sha: Optional[str] = None
Expand Down Expand Up @@ -170,7 +170,7 @@ def _get_included_components_in_chained_tests(self, main_nf_test: Union[Path, st

def get_inputs_from_main_nf(self) -> None:
"""Collect all inputs from the main.nf file."""
inputs: List[str] = []
inputs: list[list[dict[str, dict[str, str]]]] = []
with open(self.main_nf) as f:
data = f.read()
# get input values from main.nf after "input:", which can be formatted as tuple val(foo) path(bar) or val foo or val bar or path bar or path foo
Expand All @@ -184,16 +184,22 @@ def get_inputs_from_main_nf(self) -> None:
# don't match anything inside comments or after "output:"
if "input:" not in data:
log.debug(f"Could not find any inputs in {self.main_nf}")
return
input_data = data.split("input:")[1].split("output:")[0]
regex = r"(val|path)\s*(\(([^)]+)\)|\s*([^)\s,]+))"
matches = re.finditer(regex, input_data, re.MULTILINE)
for _, match in enumerate(matches, start=1):
if match.group(3):
input_val = match.group(3).split(",")[0] # handle `files, stageAs: "inputs/*"` cases
inputs.append(input_val)
elif match.group(4):
input_val = match.group(4).split(",")[0] # handle `files, stageAs: "inputs/*"` cases
inputs.append(input_val)
for line in input_data.split("\n"):
channel_elements: list[dict[str, dict[str, str]]] = []
regex = r"(val|path)\s*(\(([^)]+)\)|\s*([^)\s,]+))"
matches = re.finditer(regex, line)
for _, match in enumerate(matches, start=1):
input_val = None
if match.group(3):
input_val = match.group(3).split(",")[0] # handle `files, stageAs: "inputs/*"` cases
elif match.group(4):
input_val = match.group(4).split(",")[0] # handle `files, stageAs: "inputs/*"` cases
if input_val:
channel_elements.append({input_val: {}})
if len(channel_elements) > 0:
inputs.append(channel_elements)
log.debug(f"Found {len(inputs)} inputs in {self.main_nf}")
self.inputs = inputs

Expand All @@ -206,9 +212,23 @@ def get_outputs_from_main_nf(self):
log.debug(f"Could not find any outputs in {self.main_nf}")
return outputs
output_data = data.split("output:")[1].split("when:")[0]
regex = r"emit:\s*([^)\s,]+)"
matches = re.finditer(regex, output_data, re.MULTILINE)
for _, match in enumerate(matches, start=1):
outputs.append(match.group(1))
regex_emit = r"emit:\s*([^)\s,]+)"
regex_elements = r"(val|path|env|stdout)\s*(\(([^)]+)\)|\s*([^)\s,]+))"
for line in output_data.split("\n"):
match_emit = re.search(regex_emit, line)
matches_elements = re.finditer(regex_elements, line)
if not match_emit:
continue
output_channel = {match_emit.group(1): []}
for _, match_element in enumerate(matches_elements, start=1):
output_val = None
if match_element.group(3):
output_val = match_element.group(3)
elif match_element.group(4):
output_val = match_element.group(4)
if output_val:
output_val = output_val.strip("'").strip('"') # remove quotes
output_channel[match_emit.group(1)].append({output_val: {}})
outputs.append(output_channel)
log.debug(f"Found {len(outputs)} outputs in {self.main_nf}")
self.outputs = outputs
67 changes: 43 additions & 24 deletions nf_core/module-template/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,48 +20,67 @@ tools:
tool_dev_url: "{{ tool_dev_url }}"
doi: ""
licence: {{ tool_licence }}
identifier: {{ tool_identifier }}

{% if not_empty_template -%}
## TODO nf-core: Add a description of all of the variables used as input
{% endif -%}
input:
#{% if has_meta %} Only when we have meta
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
{% endif %}
{% if not_empty_template -%}
## TODO nf-core: Delete / customise this example input
{%- endif %}
- {{ 'bam:' if not_empty_template else "input:" }}
type: file
description: {{ 'Sorted BAM/CRAM/SAM file' if not_empty_template else "" }}
pattern: {{ '"*.{bam,cram,sam}"' if not_empty_template else "" }}
- {{ 'bam:' if not_empty_template else "input:" }}
type: file
description: {{ 'Sorted BAM/CRAM/SAM file' if not_empty_template else "" }}
pattern: {{ '"*.{bam,cram,sam}"' if not_empty_template else "" }}
ontologies:
{% if not_empty_template -%}
- edam: "http://edamontology.org/format_25722"
- edam: "http://edamontology.org/format_2573"
- edam: "http://edamontology.org/format_3462"
{% else %}
- edam: ""
{%- endif %}

{% if not_empty_template -%}
## TODO nf-core: Add a description of all of the variables used as output
{% endif -%}
output:
#{% if has_meta -%} Only when we have meta
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
{% endif %}
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- "versions.yml":
type: file
description: File containing software versions
pattern: "versions.yml"
- {{ 'bam:' if not_empty_template else "output:" }}
#{% if has_meta -%} Only when we have meta
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
{%- endif %}
{% if not_empty_template -%}
## TODO nf-core: Delete / customise this example output
## TODO nf-core: Delete / customise this example output
{%- endif %}
- {{ 'bam:' if not_empty_template else "output:" }}
type: file
description: {{ 'Sorted BAM/CRAM/SAM file' if not_empty_template else "" }}
pattern: {{ '"*.{bam,cram,sam}"' if not_empty_template else "" }}
- {{ '"*.bam":' if not_empty_template else '"*":' }}
type: file
description: {{ 'Sorted BAM/CRAM/SAM file' if not_empty_template else "" }}
pattern: {{ '"*.{bam,cram,sam}"' if not_empty_template else "" }}
ontologies:
{% if not_empty_template -%}
- edam: "http://edamontology.org/format_25722"
- edam: "http://edamontology.org/format_2573"
- edam: "http://edamontology.org/format_3462"
{% else -%}
- edam: ""
{%- endif %}

authors:
- "{{ author }}"
Expand Down
Loading

0 comments on commit 282e8fe

Please sign in to comment.