From 8cead0bfef49a67ffa755facab1a58b2fda41c4f Mon Sep 17 00:00:00 2001 From: Matthijs Pon <63122826+MatthijsPon@users.noreply.github.com> Date: Wed, 10 Jul 2024 16:41:53 +0200 Subject: [PATCH] Add published mutational signatures to pancan_pcawg_2020 (#1824) * add published mutational signatures pcawg * rename matrix to counts * filter DBS counts * fix letter casing in profile name --------- Co-authored-by: Ramya Madupuri <34350829+rmadupuri@users.noreply.github.com> Co-authored-by: rmadupuri --- public/pancan_pcawg_2020/README.md | 22 +++++++++++++++++++ ...mutational_signatures_contribution_DBS.txt | 3 +++ ..._mutational_signatures_contribution_ID.txt | 3 +++ ...mutational_signatures_contribution_SBS.txt | 3 +++ .../data_mutational_signatures_counts_DBS.txt | 3 +++ .../data_mutational_signatures_counts_ID.txt | 3 +++ .../data_mutational_signatures_counts_SBS.txt | 3 +++ ...mutational_signatures_contribution_DBS.txt | 12 ++++++++++ ..._mutational_signatures_contribution_ID.txt | 12 ++++++++++ ...mutational_signatures_contribution_SBS.txt | 12 ++++++++++ .../meta_mutational_signatures_counts_DBS.txt | 10 +++++++++ .../meta_mutational_signatures_counts_ID.txt | 10 +++++++++ .../meta_mutational_signatures_counts_SBS.txt | 10 +++++++++ 13 files changed, 106 insertions(+) create mode 100644 public/pancan_pcawg_2020/README.md create mode 100644 public/pancan_pcawg_2020/data_mutational_signatures_contribution_DBS.txt create mode 100644 public/pancan_pcawg_2020/data_mutational_signatures_contribution_ID.txt create mode 100644 public/pancan_pcawg_2020/data_mutational_signatures_contribution_SBS.txt create mode 100644 public/pancan_pcawg_2020/data_mutational_signatures_counts_DBS.txt create mode 100644 public/pancan_pcawg_2020/data_mutational_signatures_counts_ID.txt create mode 100644 public/pancan_pcawg_2020/data_mutational_signatures_counts_SBS.txt create mode 100644 public/pancan_pcawg_2020/meta_mutational_signatures_contribution_DBS.txt create mode 100644 public/pancan_pcawg_2020/meta_mutational_signatures_contribution_ID.txt create mode 100644 public/pancan_pcawg_2020/meta_mutational_signatures_contribution_SBS.txt create mode 100644 public/pancan_pcawg_2020/meta_mutational_signatures_counts_DBS.txt create mode 100644 public/pancan_pcawg_2020/meta_mutational_signatures_counts_ID.txt create mode 100644 public/pancan_pcawg_2020/meta_mutational_signatures_counts_SBS.txt diff --git a/public/pancan_pcawg_2020/README.md b/public/pancan_pcawg_2020/README.md new file mode 100644 index 0000000000..215f1418e4 --- /dev/null +++ b/public/pancan_pcawg_2020/README.md @@ -0,0 +1,22 @@ +# Mutational Signatures inclusion +The original publication contains [mutational signature activity scores](https://www.synapse.org/#!Synapse:syn11804065) +extracted by SigProfiler. + +## Input files +- [single-base substitution signature activity](https://www.synapse.org/#!Synapse:syn11738669) - PCAWG_sigProfiler_SBS_signatures_in_samples.csv +- [double-base substitution signature activity](https://www.synapse.org/#!Synapse:syn11738667) - PCAWG_sigProfiler_DBS_signatures_in_samples.csv +- [insertion-deletion signature activity](https://www.synapse.org/#!Synapse:syn11738668) - PCAWG_SigProfiler_ID_signatures_in_samples.csv +- [mutations](data_mutations.txt) - data_mutations.txt + +## Contribution files +The original files contain activity scores. Activity and contribution scores can be calculated from each other. +For each signature type (SBS/DBS/INDEL) the contribution scores were calculated per sample as follows: +``` +contribution (signature x) = activity (signature x) / total activity of all signatures in sample +``` +Only samples present in the cBioPortal study were included. + +## Mutational matrix +The mutational matrix was extracted from the `data_mutations.txt` file using SigProfilerMatrixGenerator python package +(v1.2.15) and the included reference genome GRCh37 (`SigProfilerMatrixGenerator.install.install('GRCh37')`). +Only samples with contribution scores were included. diff --git a/public/pancan_pcawg_2020/data_mutational_signatures_contribution_DBS.txt b/public/pancan_pcawg_2020/data_mutational_signatures_contribution_DBS.txt new file mode 100644 index 0000000000..915961c850 --- /dev/null +++ b/public/pancan_pcawg_2020/data_mutational_signatures_contribution_DBS.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5792082048c163b0c95dc92fe66b766287057890608feabf904e546d64717bda +size 192105 diff --git a/public/pancan_pcawg_2020/data_mutational_signatures_contribution_ID.txt b/public/pancan_pcawg_2020/data_mutational_signatures_contribution_ID.txt new file mode 100644 index 0000000000..8adaffd821 --- /dev/null +++ b/public/pancan_pcawg_2020/data_mutational_signatures_contribution_ID.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efc3cf24afc423e1bbd1805f455a8e6f26ec5475d8624838c084180d5bd16ab8 +size 318896 diff --git a/public/pancan_pcawg_2020/data_mutational_signatures_contribution_SBS.txt b/public/pancan_pcawg_2020/data_mutational_signatures_contribution_SBS.txt new file mode 100644 index 0000000000..dba9d118aa --- /dev/null +++ b/public/pancan_pcawg_2020/data_mutational_signatures_contribution_SBS.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a6569a5d6fb2a18239c46bb0dca5ecb5aaa563158d503fd3d1b1e36dba63f5a +size 650496 diff --git a/public/pancan_pcawg_2020/data_mutational_signatures_counts_DBS.txt b/public/pancan_pcawg_2020/data_mutational_signatures_counts_DBS.txt new file mode 100644 index 0000000000..0910434da5 --- /dev/null +++ b/public/pancan_pcawg_2020/data_mutational_signatures_counts_DBS.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf00c7b3d06cf3fae64fd482437d047038fa71af55070fa0086758582bc5be14 +size 429721 diff --git a/public/pancan_pcawg_2020/data_mutational_signatures_counts_ID.txt b/public/pancan_pcawg_2020/data_mutational_signatures_counts_ID.txt new file mode 100644 index 0000000000..76f5f8a5eb --- /dev/null +++ b/public/pancan_pcawg_2020/data_mutational_signatures_counts_ID.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14f8bfec7e1f13386dbeca118624d25034bc7ecbd3629b810492b287bfb6a60d +size 472394 diff --git a/public/pancan_pcawg_2020/data_mutational_signatures_counts_SBS.txt b/public/pancan_pcawg_2020/data_mutational_signatures_counts_SBS.txt new file mode 100644 index 0000000000..dec53c55cb --- /dev/null +++ b/public/pancan_pcawg_2020/data_mutational_signatures_counts_SBS.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f7628ac03322200fb726b1080fead62dfcbfbd360ca2cc5f06a4f9524409234 +size 546845 diff --git a/public/pancan_pcawg_2020/meta_mutational_signatures_contribution_DBS.txt b/public/pancan_pcawg_2020/meta_mutational_signatures_contribution_DBS.txt new file mode 100644 index 0000000000..aea34aea06 --- /dev/null +++ b/public/pancan_pcawg_2020/meta_mutational_signatures_contribution_DBS.txt @@ -0,0 +1,12 @@ +cancer_study_identifier: pancan_pcawg_2020 +genetic_alteration_type: GENERIC_ASSAY +generic_assay_type: MUTATIONAL_SIGNATURE +datatype: LIMIT-VALUE +stable_id: mutational_signatures_contribution_DBS +profile_name: Mutational signatures contribution DBS +profile_description: Mutational signature contribution, calculated from PCAWG DBS activity +data_filename: data_mutational_signatures_contribution_DBS.txt +show_profile_in_analysis_tab: true +generic_entity_meta_properties: NAME,DESCRIPTION,URL +value_sort_order: DESC +pivot_threshold_value: 0.0 \ No newline at end of file diff --git a/public/pancan_pcawg_2020/meta_mutational_signatures_contribution_ID.txt b/public/pancan_pcawg_2020/meta_mutational_signatures_contribution_ID.txt new file mode 100644 index 0000000000..09c701fa94 --- /dev/null +++ b/public/pancan_pcawg_2020/meta_mutational_signatures_contribution_ID.txt @@ -0,0 +1,12 @@ +cancer_study_identifier: pancan_pcawg_2020 +genetic_alteration_type: GENERIC_ASSAY +generic_assay_type: MUTATIONAL_SIGNATURE +datatype: LIMIT-VALUE +stable_id: mutational_signatures_contribution_ID +profile_name: Mutational signatures contribution ID +profile_description: Mutational signature contribution, calculated from PCAWG ID activity +data_filename: data_mutational_signatures_contribution_ID.txt +show_profile_in_analysis_tab: true +generic_entity_meta_properties: NAME,DESCRIPTION,URL +value_sort_order: DESC +pivot_threshold_value: 0.0 \ No newline at end of file diff --git a/public/pancan_pcawg_2020/meta_mutational_signatures_contribution_SBS.txt b/public/pancan_pcawg_2020/meta_mutational_signatures_contribution_SBS.txt new file mode 100644 index 0000000000..f8ef6b87df --- /dev/null +++ b/public/pancan_pcawg_2020/meta_mutational_signatures_contribution_SBS.txt @@ -0,0 +1,12 @@ +cancer_study_identifier: pancan_pcawg_2020 +genetic_alteration_type: GENERIC_ASSAY +generic_assay_type: MUTATIONAL_SIGNATURE +datatype: LIMIT-VALUE +stable_id: mutational_signatures_contribution_SBS +profile_name: Mutational signatures contribution SBS +profile_description: Mutational signature contribution, calculated from PCAWG SBS activity +data_filename: data_mutational_signatures_contribution_SBS.txt +show_profile_in_analysis_tab: true +generic_entity_meta_properties: NAME,DESCRIPTION,URL +value_sort_order: DESC +pivot_threshold_value: 0.0 \ No newline at end of file diff --git a/public/pancan_pcawg_2020/meta_mutational_signatures_counts_DBS.txt b/public/pancan_pcawg_2020/meta_mutational_signatures_counts_DBS.txt new file mode 100644 index 0000000000..e0a97e8b53 --- /dev/null +++ b/public/pancan_pcawg_2020/meta_mutational_signatures_counts_DBS.txt @@ -0,0 +1,10 @@ +cancer_study_identifier: pancan_pcawg_2020 +genetic_alteration_type: GENERIC_ASSAY +generic_assay_type: MUTATIONAL_SIGNATURE +datatype: LIMIT-VALUE +stable_id: mutational_signatures_counts_DBS +profile_name: Mutational signatures counts DBS +profile_description: Mutational count matrix, extracted from data_mutations.txt using SigProfilerMatrixGenerator v1.2.15 +data_filename: data_mutational_signatures_counts_DBS.txt +show_profile_in_analysis_tab: false +generic_entity_meta_properties: NAME diff --git a/public/pancan_pcawg_2020/meta_mutational_signatures_counts_ID.txt b/public/pancan_pcawg_2020/meta_mutational_signatures_counts_ID.txt new file mode 100644 index 0000000000..29a32d6f9c --- /dev/null +++ b/public/pancan_pcawg_2020/meta_mutational_signatures_counts_ID.txt @@ -0,0 +1,10 @@ +cancer_study_identifier: pancan_pcawg_2020 +genetic_alteration_type: GENERIC_ASSAY +generic_assay_type: MUTATIONAL_SIGNATURE +datatype: LIMIT-VALUE +stable_id: mutational_signatures_counts_ID +profile_name: Mutational signatures counts ID +profile_description: Mutational count matrix, extracted from data_mutations.txt using SigProfilerMatrixGenerator v1.2.15 +data_filename: data_mutational_signatures_counts_ID.txt +show_profile_in_analysis_tab: false +generic_entity_meta_properties: NAME diff --git a/public/pancan_pcawg_2020/meta_mutational_signatures_counts_SBS.txt b/public/pancan_pcawg_2020/meta_mutational_signatures_counts_SBS.txt new file mode 100644 index 0000000000..a043929350 --- /dev/null +++ b/public/pancan_pcawg_2020/meta_mutational_signatures_counts_SBS.txt @@ -0,0 +1,10 @@ +cancer_study_identifier: pancan_pcawg_2020 +genetic_alteration_type: GENERIC_ASSAY +generic_assay_type: MUTATIONAL_SIGNATURE +datatype: LIMIT-VALUE +stable_id: mutational_signatures_counts_SBS +profile_name: Mutational signatures counts SBS +profile_description: Mutational count matrix, extracted from data_mutations.txt using SigProfilerMatrixGenerator v1.2.15 +data_filename: data_mutational_signatures_counts_SBS.txt +show_profile_in_analysis_tab: false +generic_entity_meta_properties: NAME