From 4dc0c43d4e2e3afb410e86c5df91b42071be39e9 Mon Sep 17 00:00:00 2001 From: Laura Luebbert <56094636+lauraluebbert@users.noreply.github.com> Date: Mon, 29 May 2023 19:12:46 -0700 Subject: [PATCH 1/4] Update biotables.py --- dnachisel/biotools/biotables.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dnachisel/biotools/biotables.py b/dnachisel/biotools/biotables.py index 29f08a4..3a435e8 100644 --- a/dnachisel/biotools/biotables.py +++ b/dnachisel/biotools/biotables.py @@ -59,4 +59,8 @@ def get_backtranslation_table(table_name="Standard"): back_translation_table[amino_acid].append(codon) back_translation_table["*"] = table.stop_codons back_translation_table["START"] = table.start_codons + back_translation_table["X"] = ["NNN"] + back_translation_table["B"] = back_translation_table["N"] + back_translation_table["D"] + back_translation_table["J"] = back_translation_table["L"] + back_translation_table["I"] + back_translation_table["Z"] = back_translation_table["E"] + back_translation_table["Q"] return back_translation_table From 8f1a900befa73fb7255b24c5ca7ab59c2fa8535c Mon Sep 17 00:00:00 2001 From: Laura Luebbert <56094636+lauraluebbert@users.noreply.github.com> Date: Mon, 29 May 2023 19:44:09 -0700 Subject: [PATCH 2/4] Change 'X' to the union of all other amino-acid codon sets --- dnachisel/biotools/biotables.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dnachisel/biotools/biotables.py b/dnachisel/biotools/biotables.py index 3a435e8..f81aa94 100644 --- a/dnachisel/biotools/biotables.py +++ b/dnachisel/biotools/biotables.py @@ -49,6 +49,8 @@ def dict_from_csv(filepath, sep=";"): iupac_file = os.path.join(data_dir, "iupac_notation.csv") IUPAC_NOTATION = {k: set(v) for k, v in dict_from_csv(iupac_file).items()} +def flatten(l): + return [item for sublist in l for item in sublist] def get_backtranslation_table(table_name="Standard"): table = CodonTable.unambiguous_dna_by_name[table_name] @@ -59,7 +61,7 @@ def get_backtranslation_table(table_name="Standard"): back_translation_table[amino_acid].append(codon) back_translation_table["*"] = table.stop_codons back_translation_table["START"] = table.start_codons - back_translation_table["X"] = ["NNN"] + back_translation_table["X"] = flatten(backtranslation_table.values()) back_translation_table["B"] = back_translation_table["N"] + back_translation_table["D"] back_translation_table["J"] = back_translation_table["L"] + back_translation_table["I"] back_translation_table["Z"] = back_translation_table["E"] + back_translation_table["Q"] From beb093f94912f4f4e1d827a661bac8013577553c Mon Sep 17 00:00:00 2001 From: Laura Luebbert <56094636+lauraluebbert@users.noreply.github.com> Date: Tue, 30 May 2023 14:12:33 -0700 Subject: [PATCH 3/4] Bug: `backtranslation_table` should be `back_translation_table` --- dnachisel/biotools/biotables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dnachisel/biotools/biotables.py b/dnachisel/biotools/biotables.py index f81aa94..633362c 100644 --- a/dnachisel/biotools/biotables.py +++ b/dnachisel/biotools/biotables.py @@ -61,7 +61,7 @@ def get_backtranslation_table(table_name="Standard"): back_translation_table[amino_acid].append(codon) back_translation_table["*"] = table.stop_codons back_translation_table["START"] = table.start_codons - back_translation_table["X"] = flatten(backtranslation_table.values()) + back_translation_table["X"] = flatten(back_translation_table.values()) back_translation_table["B"] = back_translation_table["N"] + back_translation_table["D"] back_translation_table["J"] = back_translation_table["L"] + back_translation_table["I"] back_translation_table["Z"] = back_translation_table["E"] + back_translation_table["Q"] From 6188a9772ad67402ebf963d5c1c784875fb2929e Mon Sep 17 00:00:00 2001 From: Laura Luebbert <56094636+lauraluebbert@users.noreply.github.com> Date: Wed, 31 May 2023 13:42:35 -0700 Subject: [PATCH 4/4] Exclude stop codons from rev translation for 'X' --- dnachisel/biotools/biotables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dnachisel/biotools/biotables.py b/dnachisel/biotools/biotables.py index 633362c..f3bb712 100644 --- a/dnachisel/biotools/biotables.py +++ b/dnachisel/biotools/biotables.py @@ -61,7 +61,7 @@ def get_backtranslation_table(table_name="Standard"): back_translation_table[amino_acid].append(codon) back_translation_table["*"] = table.stop_codons back_translation_table["START"] = table.start_codons - back_translation_table["X"] = flatten(back_translation_table.values()) + back_translation_table["X"] = list(set(flatten(back_translation_table.values())) - set(back_translation_table["*"])) back_translation_table["B"] = back_translation_table["N"] + back_translation_table["D"] back_translation_table["J"] = back_translation_table["L"] + back_translation_table["I"] back_translation_table["Z"] = back_translation_table["E"] + back_translation_table["Q"]