From 91004b184daaad0e97bf21c770751db3655eec6f Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Wed, 3 Jan 2024 17:30:18 +0000 Subject: [PATCH 1/4] Be more flexible on attribute values in GTFs --- bin/tx2gene.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/bin/tx2gene.py b/bin/tx2gene.py index 8e0c1c6a5..20f57524c 100755 --- a/bin/tx2gene.py +++ b/bin/tx2gene.py @@ -6,6 +6,7 @@ import argparse import glob import os +import re from collections import Counter, defaultdict, OrderedDict from collections.abc import Set from typing import Dict @@ -50,14 +51,18 @@ def discover_transcript_attribute(gtf_file: str, transcripts: Set[str]) -> str: Returns: str: The attribute name that corresponds to transcripts in the GTF file. """ + votes = Counter() with open(gtf_file) as inh: - # Read GTF file, skipping header lines + # Read GTF file, skipping header lines for line in filter(lambda x: not x.startswith("#"), inh): cols = line.split("\t") - # Parse attribute column and update votes for each attribute found - attributes = dict(item.strip().split(" ", 1) for item in cols[8].split(";") if item.strip()) - votes.update(key for key, value in attributes.items() if value.strip('"') in transcripts) + + # Use regular expression to correctly split the attributes string + attributes_str = cols[8] + attributes = dict(re.findall(r'(\S+) "(.*?)(? Date: Wed, 3 Jan 2024 17:32:31 +0000 Subject: [PATCH 2/4] Update tx2gene.py --- bin/tx2gene.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/tx2gene.py b/bin/tx2gene.py index 20f57524c..963676ef1 100755 --- a/bin/tx2gene.py +++ b/bin/tx2gene.py @@ -54,7 +54,7 @@ def discover_transcript_attribute(gtf_file: str, transcripts: Set[str]) -> str: votes = Counter() with open(gtf_file) as inh: - # Read GTF file, skipping header lines + # Read GTF file, skipping header lines for line in filter(lambda x: not x.startswith("#"), inh): cols = line.split("\t") From 745696ad1015ec3b23a205729e2c79288f9a51f2 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Wed, 3 Jan 2024 17:33:52 +0000 Subject: [PATCH 3/4] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index de4b47e4d..2dfb7c48b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,6 +33,7 @@ Special thanks to the following for their contributions to the release: - [PR #1141](https://github.com/nf-core/rnaseq/pull/1141) - Important! Template update for nf-core/tools v2.11 - [PR #1149](https://github.com/nf-core/rnaseq/pull/1149) - Fix and patch version commands for Fastp, FastQC and UMI-tools modules ([#1103](https://github.com/nf-core/rnaseq/issues/1103)) - [PR #1144](https://github.com/nf-core/rnaseq/pull/1144) - Interface to kmer size for pseudoaligners +- [PR #1150](https://github.com/nf-core/rnaseq/pull/1150) - Be more flexible on attribute values in GTFs ## [[3.13.2](https://github.com/nf-core/rnaseq/releases/tag/3.13.2)] - 2023-11-21 From c08e204925a34abefba854cdfa0c29c4facb794d Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Wed, 3 Jan 2024 19:37:47 +0100 Subject: [PATCH 4/4] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2dfb7c48b..a0e997952 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,7 +33,7 @@ Special thanks to the following for their contributions to the release: - [PR #1141](https://github.com/nf-core/rnaseq/pull/1141) - Important! Template update for nf-core/tools v2.11 - [PR #1149](https://github.com/nf-core/rnaseq/pull/1149) - Fix and patch version commands for Fastp, FastQC and UMI-tools modules ([#1103](https://github.com/nf-core/rnaseq/issues/1103)) - [PR #1144](https://github.com/nf-core/rnaseq/pull/1144) - Interface to kmer size for pseudoaligners -- [PR #1150](https://github.com/nf-core/rnaseq/pull/1150) - Be more flexible on attribute values in GTFs +- [PR #1150](https://github.com/nf-core/rnaseq/pull/1150) - Be more flexible on attribute values in GTFs ([#1132](https://github.com/nf-core/rnaseq/issues/1132)) ## [[3.13.2](https://github.com/nf-core/rnaseq/releases/tag/3.13.2)] - 2023-11-21