-
Notifications
You must be signed in to change notification settings - Fork 0
/
scoring.py
executable file
·65 lines (59 loc) · 2.53 KB
/
scoring.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/usr/bin/env python
from collections import defaultdict
from operator import itemgetter
import sys
import numpy as np
from util import read_fields, parse_dat, overlap_coords, index_coords, parse_extra_data
from peakdetect import peakdetect, smooth
def overlap(cne_dict, extra):
window_len = sum([len(d['hg_seq']) for d in extra.itervalues()])/float(len(extra))
results = defaultdict(dict)
for cne, scores in cne_dict.iteritems():
scores = sorted(enumerate(scores), key=itemgetter(1), reverse=True)
for hit_rank, (i, score) in enumerate(scores):
if overlap_coords(index_coords(extra[cne]['dr_co'], i, l=window_len),
extra[cne]['dr_valid_co']):
results[cne]['rank'] = hit_rank + 1
results[cne]['places'] = int(len(scores) - window_len)
break
return results
def ranked_peaks(cne_dict, extra):
def dist(cne, i):
v_avg = (extra[cne]['dr_valid_co']['start'] + extra[cne]['dr_valid_co']['end']) / 2.0
v_i = v_avg - extra[cne]['dr_co']['start']
return abs(i - v_i)
results = defaultdict(dict)
for cne, scores in cne_dict.iteritems():
lookahead = int(len(scores) / 50) + 1
scores = np.array([float(x) for x in scores])
signal = smooth(scores, window_len=40, window='bartlett')
maxima = peakdetect(signal, lookahead=lookahead)[0]
maxima.sort(key=itemgetter(1), reverse=True)
if not maxima:
print cne
continue
hit_rank = np.argmin(np.array([dist(cne, i) for i, score in maxima]))
results[cne]['rank'] = hit_rank + 1
results[cne]['places'] = len(maxima)
return results
def main():
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('-f', '--file', nargs='?', type=argparse.FileType('r'), default=sys.stdin,
help='Input file e.g. d2z.dat.')
parser.add_argument('--extra_data', type=argparse.FileType('r'), default='hg18.toDanRer5.seqs.txt',
help='Extra data file e.g. hg18.toDanRer5.seqs.txt.')
parser.add_argument('scoring', choices=['ranked_peaks', 'overlap'])
OPTS = parser.parse_args()
line_tups = read_fields(f=OPTS.file)
cne_dict = parse_dat(line_tups)
extra = parse_extra_data(read_fields(f=OPTS.extra_data))
if OPTS.scoring == 'ranked_peaks':
results = ranked_peaks(cne_dict, extra)
elif OPTS.scoring == 'overlap':
results = overlap(cne_dict, extra)
for cne, result in sorted(results.iteritems()):
sys.stdout.write('\t'.join([cne, str(result['rank']),
str(result['places'])]) + '\n')
if __name__ == '__main__':
main()