-
Notifications
You must be signed in to change notification settings - Fork 1
/
make_panel_of_normals.r
84 lines (70 loc) · 2.33 KB
/
make_panel_of_normals.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#' make_panel_of_normals.r
#'
#' Construct the GATK Panel of Normals of MDA-MB-231 cell lineage from the GSE181410 data.
import::from("src/sra.r", "get_srr_samples", "srr_download_sample")
import::from("phyloRNA",
"cellranger_mkref", "cellranger_count",
"gatk_prepare", "gatk_make_pon",
"mkdir"
)
import::from("parallel", "mcMap")
main = function(){
outdir = "pon/MDA-MB-231"
fastqdir = file.path(outdir, "fastq")
mapdir = file.path(outdir, "map")
refdir = "reference/ref"
gse = "GSE181410"
reference = "reference/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna"
annotation = "reference/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.gtf"
vcf = "reference/00-common_all.vcf.gz"
# create annotation:
cellranger_mkref(reference, annotation, refdir, nthreads=8)
# get samples
mkdir(outdir)
samples = get_srr_samples(gse, save=file.path(outdir, paste0(gse, ".rds")))
# construct names from samples
samples$name = make_sample_names(samples$name)
ncores = nrow(samples)
# download samples
mkdir(fastqdir)
mcMap(srr_download_sample, srr=samples$srr, prefix=samples$name, outdir=fastqdir,
mc.cores=ncores)
# map and demultiplex
mkdir(mapdir)
outputs = mcMap(
cellranger_count,
id = samples$name, sample = samples$name,
fastqdir = fastqdir, refdir = refdir,
outdir = file.path(mapdir, samples$name),
nthreads = 8,
mc.cores = ncores
)
aligned = sapply(outputs, getElement, "bam")
cleaned = file.path(mapdir, paste0(samples$name, ".cleaned.bam"))
mcMap(gatk_prepare,
input = aligned, output = cleaned,
reference = reference, vcf = vcf,
mc.cores=ncores
)
gatk_make_pon(
bam = cleaned,
reference = reference,
vcf = file.path(outdir, paste0("pon.vcf")),
outdir = file.path(outdir, "mutect2")
)
}
make_sample_names = function(x){
CancerMitotransfer = grepl("mito-mEmerald+", x, fixed=TRUE)
MacrophageMitotransfer = grepl("mitoRFP+", x, fixed=TRUE)
rep = sub(".*(rep[1-9])", "\\1", x)
names = paste0(
"MDAMB231",
ifelse(CancerMitotransfer, "mt", ""), "-",
ifelse(MacrophageMitotransfer, "MPmt", "MP"), "-",
rep
)
names
}
if(sys.nframe() == 0){
main()
}