Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge external FRASER counts #169

Open
wants to merge 11 commits into
base: dev
Choose a base branch
from
33 changes: 23 additions & 10 deletions drop/config/SampleAnnotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@


class SampleAnnotation:
FILE_TYPES = ["RNA_BAM_FILE", "DNA_VCF_FILE", "GENE_COUNTS_FILE"]
FILE_TYPES = ["RNA_BAM_FILE", "DNA_VCF_FILE", "GENE_COUNTS_FILE", "SPLICE_COUNTS_DIR"]
SAMPLE_ANNOTATION_COLUMNS = FILE_TYPES + [
"RNA_ID", "DNA_ID", "DROP_GROUP", "GENE_ANNOTATION",
"PAIRED_END", "COUNT_MODE", "COUNT_OVERLAPS", "STRAND", "GENOME"
Expand All @@ -32,6 +32,7 @@ def __init__(self, file, root, genome):
self.dnaIDs = self.createGroupIds(file_type="DNA_VCF_FILE", sep=',')
# external counts
self.extGeneCountIDs = self.createGroupIds(file_type="GENE_COUNTS_FILE", sep=',')
self.extSpliceCountIDs = self.createGroupIds(file_type="SPLICE_COUNTS_DIR", sep=',')

def parse(self, sep='\t'):
"""
Expand Down Expand Up @@ -80,10 +81,12 @@ def createSampleFileMapping(self):
columns: [ID | ASSAY | FILE_TYPE | FILE_PATH ]
"""

assay_mapping = {'RNA_ID': ['RNA_BAM_FILE', 'GENE_COUNTS_FILE'], 'DNA_ID': ['DNA_VCF_FILE']}
assay_mapping = {'RNA_ID': ['RNA_BAM_FILE', 'GENE_COUNTS_FILE', 'SPLICE_COUNTS_DIR'], 'DNA_ID': ['DNA_VCF_FILE']}
assay_subsets = []
for id_, file_types in assay_mapping.items():
for file_type in file_types:
if file_type not in self.sa.columns:
continue
df = self.annotationTable[[id_, file_type]].dropna().drop_duplicates().copy()
df.rename(columns={id_: 'ID', file_type: 'FILE_PATH'}, inplace=True)
df['ASSAY'] = id_
Expand Down Expand Up @@ -249,17 +252,24 @@ def getGenomes(self, value, group, file_type="RNA_ID",

return {sample_id: value for sample_id in subset[file_type].tolist()}

def getImportCountFiles(self, annotation, group, file_type="GENE_COUNTS_FILE",
annotation_key="GENE_ANNOTATION", group_key="DROP_GROUP",exact_match = True):
def getImportCountFiles(self, annotation, group, file_type: str = "GENE_COUNTS_FILE",
annotation_key: str = "GENE_ANNOTATION", group_key: str = "DROP_GROUP",
exact_match = True, asSet: bool = True):
"""
:param annotation: annotation name as specified in config and GENE_ANNOTATION column
:param group: a group of the DROP_GROUP column. exact match is passed to subsetter, false allows for substring matching
:param annotation: annotation name as specified in config and GENE_ANNOTATION column. Can be None
:param group: a group of the DROP_GROUP column.
:param exact_match: exact_match is passed to subsetter, False allows for substring matching
:param asSet: if True, default, a set is returned else a list.
:return: set of unique external count file names
"""

#subset for the annotation_key in the annotation group and the group_key in the group
subset = self.subsetSampleAnnotation(annotation_key, annotation,exact_match=exact_match)
subset = self.subsetSampleAnnotation(group_key, group, subset,exact_match=exact_match)
return set(subset[file_type].tolist())
subset = self.subsetSampleAnnotation(annotation_key, annotation, exact_match=exact_match)
subset = self.subsetSampleAnnotation(group_key, group, subset, exact_match=exact_match)
ans = subset[file_type].tolist()
if asSet:
ans = set(ans)
return ans

def getRow(self, column, value):
sa = self.annotationTable
Expand All @@ -277,7 +287,8 @@ def getGroupedIDs(self, assays):
Get group to IDs mapping
:param assays: list of or single assay the IDs should be from. Can be file_type or 'RNA'/'DNA'
"""
assays = [assays] if isinstance(assays, str) else assays
if isinstance(assays, str):
assays = [assays]
groupedIDs = defaultdict(list)
for assay in assays:
if "RNA" in assay:
Expand All @@ -286,6 +297,8 @@ def getGroupedIDs(self, assays):
groupedIDs.update(self.dnaIDs)
elif "GENE_COUNT" in assay:
groupedIDs.update(self.extGeneCountIDs)
elif "SPLICE_COUNT" in assay:
groupedIDs.update(self.extSpliceCountIDs)
else:
raise ValueError(f"'{assay}' is not a valid assay name")
return groupedIDs
Expand Down
35 changes: 28 additions & 7 deletions drop/config/submodules/AberrantSplicing.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import numpy as np
import pandas as pd

from snakemake.io import expand

from drop import utils
Expand Down Expand Up @@ -43,10 +46,10 @@ def getSplitCountFiles(self, dataset):
:return: list of files
"""
ids = self.sampleAnnotation.getIDsByGroup(dataset, assay="RNA")
file_stump = self.processedDataDir / "aberrant_splicing" / "datasets" / "cache" / f"raw-{dataset}" / \
"sample_tmp" / "splitCounts"
done_files = str(file_stump / "sample_{sample_id}.done")
return expand(done_files, sample_id=ids)
file_stump = self.processedDataDir / "aberrant_splicing" / "datasets" / "cache"
count_file = str(file_stump / "splitCounts-{sample_id}.RDS")
return expand(count_file, sample_id=ids)


def getNonSplitCountFiles(self, dataset):
"""
Expand All @@ -55,7 +58,25 @@ def getNonSplitCountFiles(self, dataset):
:return: list of files
"""
ids = self.sampleAnnotation.getIDsByGroup(dataset, assay="RNA")
file_stump = self.processedDataDir / "aberrant_splicing" / "datasets" / "cache" / f"raw-{dataset}" / \
"sample_tmp" / "nonSplitCounts"
done_files = str(file_stump / "sample_{sample_id}.done")
file_stump = self.processedDataDir / "aberrant_splicing" / "datasets" / \
"cache" / "nonSplicedCounts" / f"raw-{dataset}"
done_files = str(file_stump / "nonSplicedCounts-{sample_id}.h5")
return expand(done_files, sample_id=ids)


def getExternalCounts(self, group: str, fileType: str = "k_j_counts"):
"""
Get externally provided splice count data dir based on the given group.
If a file type is given the corresponding file within the folder is returned.
:param group: DROP group name from wildcard
:param fileType: name of the file without extension which is to be returned
:return: list of directories or files
"""
ids = self.sa.getIDsByGroup(group, assay="SPLICE_COUNT")
extCountFiles = self.sa.getImportCountFiles(annotation=None, group=group,
file_type="SPLICE_COUNTS_DIR", asSet=False)
if fileType is not None:
extCountFiles = np.asarray(extCountFiles)[pd.isna(extCountFiles) == False].tolist()
extCountFiles = [x + "/" + fileType + ".tsv.gz" for x in extCountFiles]
return extCountFiles

2 changes: 2 additions & 0 deletions drop/demo/config_relative.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ exportCounts:
excludeGroups:
- mae
- import_exp
- fraser_ex

aberrantExpression:
run: true
Expand All @@ -36,6 +37,7 @@ aberrantSplicing:
run: true
groups:
- fraser
- fraser_ex
recount: true
longRead: false
keepNonStandardChrs: true
Expand Down
48 changes: 25 additions & 23 deletions drop/demo/sample_annotation_relative.tsv
100755 → 100644
Original file line number Diff line number Diff line change
@@ -1,23 +1,25 @@
RNA_ID RNA_BAM_FILE DNA_VCF_FILE DNA_ID DROP_GROUP PAIRED_END COUNT_MODE COUNT_OVERLAPS STRAND HPO_TERMS GENE_COUNTS_FILE GENE_ANNOTATION GENOME
HG00096.1.M_111124_6 Data/rna_bam/HG00096.1.M_111124_6_chr21.bam Data/dna_vcf/demo_chr21.vcf.gz HG00096 outrider,mae TRUE IntersectionStrict TRUE no HP:0009802,HP:0010896
HG00103.4.M_120208_3 Data/rna_bam/HG00103.4.M_120208_3_chr21.bam Data/dna_vcf/demo_chr21.vcf.gz HG00103 outrider,mae TRUE IntersectionStrict TRUE no HP:0004582,HP:0031959
HG00106.4.M_120208_5 Data/rna_bam/HG00106.4.M_120208_5_chr21.bam Data/dna_vcf/demo_chr21.vcf.gz HG00106 outrider,import_exp TRUE IntersectionStrict TRUE no HP:0002895,HP:0006731
HG00111.2.M_111215_4 Data/rna_bam/HG00111.2.M_111215_4_chr21.bam Data/dna_vcf/demo_chr21.vcf.gz HG00111 outrider,import_exp TRUE IntersectionStrict TRUE no HP:0100491,HP:0100871
HG00116.2.M_120131_1 Data/rna_bam/HG00116.2.M_120131_1_chr21.bam Data/dna_vcf/demo_chr21.vcf.gz HG00116 outrider,import_exp TRUE IntersectionStrict TRUE no HP:0030613,HP:0012767
HG00126.1.M_111124_8 Data/rna_bam/HG00126.1.M_111124_8_chr21.bam Data/dna_vcf/demo_chr21.vcf.gz HG00126 outrider,import_exp TRUE IntersectionStrict TRUE no HP:0000290,HP:0000293
HG00132.2.M_111215_4 Data/rna_bam/HG00132.2.M_111215_4_chr21.bam Data/dna_vcf/demo_chr21.vcf.gz HG00132 outrider,import_exp TRUE IntersectionStrict TRUE no HP:0006489,HP:0006490
HG00149.1.M_111124_6 Data/rna_bam/HG00149.1.M_111124_6_chr21.bam Data/dna_vcf/demo_chr21.vcf.gz HG00149 outrider,import_exp TRUE IntersectionStrict TRUE no HP:0000014,HP:0000020,HP:0032663
HG00150.4.M_120208_7 Data/rna_bam/HG00150.4.M_120208_7_chr21.bam Data/dna_vcf/demo_chr21.vcf.gz HG00150 outrider,import_exp TRUE IntersectionStrict TRUE no HP:0030809,HP:0006144
HG00176.4.M_120208_2 Data/rna_bam/HG00176.4.M_120208_2_chr21.bam Data/dna_vcf/demo_chr21.vcf.gz HG00176 outrider,import_exp TRUE IntersectionStrict TRUE no HP:0005215,HP:0010234
HG00096.1.M_111124_6_trunc Data/rna_bam/HG00096.1.M_111124_6_chr21.bam_trunc.bam Data/dna_vcf/demo_chr21.vcf.gz HG00096 fraser TRUE IntersectionStrict TRUE no HP:0009802,HP:0010896
HG00103.4.M_120208_3_trunc Data/rna_bam/HG00103.4.M_120208_3_chr21.bam_trunc.bam Data/dna_vcf/demo_chr21.vcf.gz HG00103 fraser TRUE IntersectionStrict TRUE no HP:0004582,HP:0031959
HG00106.4.M_120208_5_trunc Data/rna_bam/HG00106.4.M_120208_5_chr21.bam_trunc.bam Data/dna_vcf/demo_chr21.vcf.gz HG00106 fraser TRUE IntersectionStrict TRUE no HP:0002895,HP:0006731
HG00111.2.M_111215_4_trunc Data/rna_bam/HG00111.2.M_111215_4_chr21.bam_trunc.bam Data/dna_vcf/demo_chr21.vcf.gz HG00111 fraser TRUE IntersectionStrict TRUE no HP:0100491,HP:0100871
HG00116.2.M_120131_1_trunc Data/rna_bam/HG00116.2.M_120131_1_chr21.bam_trunc.bam Data/dna_vcf/demo_chr21.vcf.gz HG00116 fraser TRUE IntersectionStrict TRUE no HP:0030613,HP:0012767
HG00126.1.M_111124_8_trunc Data/rna_bam/HG00126.1.M_111124_8_chr21.bam_trunc.bam Data/dna_vcf/demo_chr21.vcf.gz HG00126 fraser TRUE IntersectionStrict TRUE no HP:0000290,HP:0000293
HG00132.2.M_111215_4_trunc Data/rna_bam/HG00132.2.M_111215_4_chr21.bam_trunc.bam Data/dna_vcf/demo_chr21.vcf.gz HG00132 fraser TRUE IntersectionStrict TRUE no HP:0006489,HP:0006490
HG00149.1.M_111124_6_trunc Data/rna_bam/HG00149.1.M_111124_6_chr21.bam_trunc.bam Data/dna_vcf/demo_chr21.vcf.gz HG00149 fraser TRUE IntersectionStrict TRUE no HP:0000014,HP:0000020,HP:0032663
HG00150.4.M_120208_7_trunc Data/rna_bam/HG00150.4.M_120208_7_chr21.bam_trunc.bam Data/dna_vcf/demo_chr21.vcf.gz HG00150 fraser TRUE IntersectionStrict TRUE no HP:0030809,HP:0006144
HG00176.4.M_120208_2_trunc Data/rna_bam/HG00176.4.M_120208_2_chr21.bam_trunc.bam Data/dna_vcf/demo_chr21.vcf.gz HG00176 fraser TRUE IntersectionStrict TRUE no HP:0005215,HP:0010234
HG00178.4.M_120208_8 import_exp Data/external_geneCounts.tsv.gz v29
HG00181.4.M_120208_4 import_exp Data/external_geneCounts.tsv.gz v29
RNA_ID RNA_BAM_FILE DNA_VCF_FILE DNA_ID DROP_GROUP PAIRED_END COUNT_MODE COUNT_OVERLAPS STRAND HPO_TERMS GENE_COUNTS_FILE GENE_ANNOTATION GENOME SPLICE_COUNTS_DIR
HG00096.1.M_111124_6 Data/rna_bam/HG00096.1.M_111124_6_chr21.bam Data/dna_vcf/demo_chr21.vcf.gz HG00096 outrider,mae TRUE IntersectionStrict TRUE no HP:0009802,HP:0010896
HG00103.4.M_120208_3 Data/rna_bam/HG00103.4.M_120208_3_chr21.bam Data/dna_vcf/demo_chr21.vcf.gz HG00103 outrider,mae TRUE IntersectionStrict TRUE no HP:0004582,HP:0031959
HG00106.4.M_120208_5 Data/rna_bam/HG00106.4.M_120208_5_chr21.bam Data/dna_vcf/demo_chr21.vcf.gz HG00106 outrider,import_exp TRUE IntersectionStrict TRUE no HP:0002895,HP:0006731
HG00111.2.M_111215_4 Data/rna_bam/HG00111.2.M_111215_4_chr21.bam Data/dna_vcf/demo_chr21.vcf.gz HG00111 outrider,import_exp TRUE IntersectionStrict TRUE no HP:0100491,HP:0100871
HG00116.2.M_120131_1 Data/rna_bam/HG00116.2.M_120131_1_chr21.bam Data/dna_vcf/demo_chr21.vcf.gz HG00116 outrider,import_exp TRUE IntersectionStrict TRUE no HP:0030613,HP:0012767
HG00126.1.M_111124_8 Data/rna_bam/HG00126.1.M_111124_8_chr21.bam Data/dna_vcf/demo_chr21.vcf.gz HG00126 outrider,import_exp TRUE IntersectionStrict TRUE no HP:0000290,HP:0000293
HG00132.2.M_111215_4 Data/rna_bam/HG00132.2.M_111215_4_chr21.bam Data/dna_vcf/demo_chr21.vcf.gz HG00132 outrider,import_exp TRUE IntersectionStrict TRUE no HP:0006489,HP:0006490
HG00149.1.M_111124_6 Data/rna_bam/HG00149.1.M_111124_6_chr21.bam Data/dna_vcf/demo_chr21.vcf.gz HG00149 outrider,import_exp TRUE IntersectionStrict TRUE no HP:0000014,HP:0000020,HP:0032663
HG00150.4.M_120208_7 Data/rna_bam/HG00150.4.M_120208_7_chr21.bam Data/dna_vcf/demo_chr21.vcf.gz HG00150 fraser_ex,outrider,import_exp TRUE IntersectionStrict TRUE no HP:0030809,HP:0006144
HG00176.4.M_120208_2 Data/rna_bam/HG00176.4.M_120208_2_chr21.bam Data/dna_vcf/demo_chr21.vcf.gz HG00176 fraser_ex,outrider,import_exp TRUE IntersectionStrict TRUE no HP:0005215,HP:0010234
HG00096.1.M_111124_6_trunc Data/rna_bam/HG00096.1.M_111124_6_chr21.bam_trunc.bam Data/dna_vcf/demo_chr21.vcf.gz HG00096 fraser,fraser_ex TRUE IntersectionStrict TRUE no HP:0009802,HP:0010896
HG00103.4.M_120208_3_trunc Data/rna_bam/HG00103.4.M_120208_3_chr21.bam_trunc.bam Data/dna_vcf/demo_chr21.vcf.gz HG00103 fraser,fraser_ex TRUE IntersectionStrict TRUE no HP:0004582,HP:0031959
HG00106.4.M_120208_5_trunc Data/rna_bam/HG00106.4.M_120208_5_chr21.bam_trunc.bam Data/dna_vcf/demo_chr21.vcf.gz HG00106 fraser,fraser_ex TRUE IntersectionStrict TRUE no HP:0002895,HP:0006731
HG00111.2.M_111215_4_trunc Data/rna_bam/HG00111.2.M_111215_4_chr21.bam_trunc.bam Data/dna_vcf/demo_chr21.vcf.gz HG00111 fraser,fraser_ex TRUE IntersectionStrict TRUE no HP:0100491,HP:0100871
HG00116.2.M_120131_1_trunc Data/rna_bam/HG00116.2.M_120131_1_chr21.bam_trunc.bam Data/dna_vcf/demo_chr21.vcf.gz HG00116 fraser,fraser_ex TRUE IntersectionStrict TRUE no HP:0030613,HP:0012767
HG00126.1.M_111124_8_trunc Data/rna_bam/HG00126.1.M_111124_8_chr21.bam_trunc.bam Data/dna_vcf/demo_chr21.vcf.gz HG00126 fraser,fraser_ex TRUE IntersectionStrict TRUE no HP:0000290,HP:0000293
HG00132.2.M_111215_4_trunc Data/rna_bam/HG00132.2.M_111215_4_chr21.bam_trunc.bam Data/dna_vcf/demo_chr21.vcf.gz HG00132 fraser,fraser_ex TRUE IntersectionStrict TRUE no HP:0006489,HP:0006490
HG00149.1.M_111124_6_trunc Data/rna_bam/HG00149.1.M_111124_6_chr21.bam_trunc.bam Data/dna_vcf/demo_chr21.vcf.gz HG00149 fraser,fraser_ex TRUE IntersectionStrict TRUE no HP:0000014,HP:0000020,HP:0032663
HG00150.4.M_120208_7_trunc Data/rna_bam/HG00150.4.M_120208_7_chr21.bam_trunc.bam Data/dna_vcf/demo_chr21.vcf.gz HG00150 fraser TRUE IntersectionStrict TRUE no HP:0030809,HP:0006144
HG00176.4.M_120208_2_trunc Data/rna_bam/HG00176.4.M_120208_2_chr21.bam_trunc.bam Data/dna_vcf/demo_chr21.vcf.gz HG00176 fraser TRUE IntersectionStrict TRUE no HP:0005215,HP:0010234
HG00178.4.M_120208_8 import_exp Data/external_count_data/geneCounts.tsv.gz v29
HG00181.4.M_120208_4 fraser_ex,import_exp Data/external_count_data/geneCounts.tsv.gz v29 Data/external_count_data
HG00191.3.M_120208_3 fraser_ex Data/external_count_data
HG00201.1.M_120208_6 fraser_ex Data/external_count_data
7 changes: 4 additions & 3 deletions drop/download_data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
set -e

# get data
resource_url="https://www.cmm.in.tum.de/public/paper/drop_analysis/resource.tar.gz"
tmpdir="$(dirname "$(mktemp)")"
wget -nc -P $tmpdir $resource_url
resource_url="https://www.cmm.in.tum.de/public/paper/drop_analysis/resource_splice_merge.tar.gz"
tmpdir=$(dirname "$(mktemp -u)")
wget -nc -O "$tmpdir/resource.tar.gz" "$resource_url"

mkdir -p Data
if [ -z "$(ls Data)" ]; then
tar -zxvf "$tmpdir/resource.tar.gz" -C .
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@
#' log:
#' - snakemake: '`sm str(tmp_dir / "AS" / "{dataset}" / "00_defineDataset.Rds")`'
#' params:
#' - setup: '`sm cfg.AS.getWorkdir() + "/config.R"`'
#' - ids: '`sm lambda w: sa.getIDsByGroup(w.dataset, assay="RNA")`'
#' - fileMappingFile: '`sm cfg.getRoot() + "/file_mapping.csv"`'
#' input:
#' - sampleAnnoFile: '`sm config["sampleAnnotation"]`'
#' output:
Expand All @@ -23,30 +21,30 @@
#'---

saveRDS(snakemake, snakemake@log$snakemake)
source(snakemake@params$setup, echo=FALSE)
suppressPackageStartupMessages(library(data.table))

#+ input
outFile <- snakemake@output$colData
annoFile <- snakemake@input$sampleAnnoFile
fileMapFile <- snakemake@params$fileMapping

#+ dataset name

name <- snakemake@wildcards$dataset
anno <- fread(annoFile)
mapping <- fread(fileMapFile)

subset_ids <- snakemake@params$ids
annoSub <- anno[RNA_ID %in% subset_ids]
colData <- merge(
annoSub[,.(sampleID = RNA_ID, STRAND, PAIRED_END)],
mapping[FILE_TYPE == "RNA_BAM_FILE", .(sampleID=ID, bamFile=FILE_PATH)])
setnames(annoSub, "RNA_ID", "sampleID")
setnames(annoSub, "RNA_BAM_FILE", "bamFile")
setnames(annoSub, "PAIRED_END", "pairedEnd")
setcolorder(annoSub, unique(c("sampleID", "STRAND", "pairedEnd", "bamFile"),
colnames(annoSub)))

#'
#' ## Dataset: `r name`
#'
#+ echo=FALSE
finalTable <- colData
finalTable <- annoSub

#'
#' ## Final sample table `r name`
Expand All @@ -55,4 +53,4 @@ finalTable <- colData
DT::datatable(finalTable, options=list(scrollX=TRUE))

dim(finalTable)
write_tsv(finalTable, file=outFile)
write.table(x=finalTable, file=outFile, quote=FALSE, sep='\t', row.names=FALSE)
Original file line number Diff line number Diff line change
Expand Up @@ -4,37 +4,32 @@
#' wb:
#' log:
#' - snakemake: '`sm str(tmp_dir / "AS" / "{dataset}" / "01_0_init.Rds")`'
#' params:
#' - setup: '`sm cfg.AS.getWorkdir() + "/config.R"`'
#' - workingDir: '`sm cfg.getProcessedDataDir() + "/aberrant_splicing/datasets"`'
#' input:
#' - colData: '`sm cfg.getProcessedDataDir() +
#' "/aberrant_splicing/annotations/{dataset}.tsv"`'
#' output:
#' - fdsobj: '`sm cfg.getProcessedDataDir() +
#' "/aberrant_splicing/datasets/savedObjects/raw-{dataset}/fds-object.RDS"`'
#' - done_fds: '`sm cfg.getProcessedDataDir() +
#' "/aberrant_splicing/datasets/cache/raw-{dataset}/fds.done" `'
#' - fds_init: '`sm cfg.getProcessedDataDir() +
#' "/aberrant_splicing/datasets/savedObjects/raw-{dataset}/fds-init.done"`'
#' type: script
#'---

saveRDS(snakemake, snakemake@log$snakemake)
source(snakemake@params$setup, echo=FALSE)
suppressPackageStartupMessages(library(FRASER))

dataset <- snakemake@wildcards$dataset
# input
dataset <- snakemake@wildcards$dataset
colDataFile <- snakemake@input$colData
workingDir <- snakemake@params$workingDir
params <- snakemake@config$aberrantSplicing
fds_init <- snakemake@output$fds_init
params <- snakemake@config$aberrantSplicing

# Create initial FRASER object
col_data <- fread(colDataFile)

fds <- FraserDataSet(colData = col_data,
workingDir = workingDir,
workingDir = dirname(dirname(dirname(fds_init))),
name = paste0("raw-", dataset))

# Add paired end and strand specificity to the fds
pairedEnd(fds) <- colData(fds)$PAIRED_END
# Add strand specificity to the fds
strandSpecific(fds) <- 'no'
if(uniqueN(colData(fds)$STRAND) == 1){
strandSpecific(fds) <- unique(colData(fds)$STRAND)
Expand All @@ -45,4 +40,4 @@ fds <- saveFraserDataSet(fds)

message(date(), ": FRASER object initialized for ", dataset)

file.create(snakemake@output$done_fds)
file.create(fds_init)
Loading