-
Notifications
You must be signed in to change notification settings - Fork 0
/
process_STARR.r
69 lines (52 loc) · 1.61 KB
/
process_STARR.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#!/usr/bin/Rscript
suppressPackageStartupMessages({
library(Rsamtools)
library(GenomicAlignments)
library(GenomicFiles)
library(optparse)
library(rtracklayer)
library(GenomicRanges)
});
# Shorthand for "pretty number" formatting
pn = function(value) {
prettyNum(value, big.mark=",")
}
# Shorthand to print the full argument list
msgout = function(...) {
write(paste(...), stdout());
}
option_list = list(
make_option(c("-i", "--input"),
type="character",
help="Input BAM file to process")
)
opt = parse_args(OptionParser(option_list=option_list));
bfilters = ScanBamParam(mapqFilter=10, flag=scanBamFlag(isSecondaryAlignment = F));
collapse_reads = function(reads) {
uniq = unique(reads);
# sum over duplicates to get a count for each unique 5'/3' end
uniq$count = countOverlaps( uniq, reads, type="equal" );
return( uniq );
}
yield.bam = function(X) {
y = GRanges( readGAlignmentPairs(X, use.names=F, param=bfilters ));
return(y);
}
map.bam = function(X) {
return(X);
}
reduce.bam = function(x, y) {
x = append(x, y);
# print the number of readpairs processed
msgout(pn(length(x)), 'mapped human reads');
return(x);
}
ctFile = opt$input;
msgout( "Processing ", ctFile );
infile = BamFile(ctFile, yieldSize=1 * 10^6, asMates=T );
aligned = reduceByYield( infile, yield.bam, map.bam, reduce.bam, parallel=F );
#msgout(pn(length(aligned)), 'mapped reads');
# compute coverage from identical reads => 'count' column
seqlib = collapse_reads(aligned);
froot = gsub(".bam", "", opt$input);
save( seqlib, file=paste0(froot, ".Rdata") );