-
Notifications
You must be signed in to change notification settings - Fork 1
/
20080912a.py
128 lines (118 loc) · 4.84 KB
/
20080912a.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
"""Characterize the efficiency of a distance matrix sampler.
Each distance matrix is sampled in three steps.
First, a nucleotide alignment is sampled from the distribution
implied by the tree using a Jukes-Cantor model.
Second, the maximum likelihood distance
between each sequence pair is calculated.
Third, the sampled matrix may be rejected
if it has elements that are zero or infinity.
"""
from StringIO import StringIO
import time
from SnippetUtil import HandlingError
import NewickIO
import FelTree
import DMSampler
import Form
import FormOut
import const
g_default_string = const.read('20100730q')
def get_form():
"""
@return: the body of a form
"""
# define the default tree string
tree = NewickIO.parse(g_default_string, FelTree.NewickTree)
formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60)
# define the form objects
form_objects = [
Form.MultiLine('tree', 'newick tree',
formatted_tree_string),
Form.Integer('length', 'use sequences that are this long',
100, low=1),
Form.RadioGroup('assumption', 'distance matrix sampling model', [
Form.RadioItem('infinite_alleles', 'infinite alleles', True),
Form.RadioItem('jukes_cantor',
'Jukes-Cantor model (4 alleles)')]),
Form.RadioGroup('infinity', 'infinite distance estimates', [
Form.RadioItem('reject_infinity', 'reject these matrices'),
Form.RadioItem('replace_infinity',
'replace inf with 20', True)]),
Form.RadioGroup('zero', 'distance estimates of zero', [
Form.RadioItem('reject_zero', 'reject these matrices'),
Form.RadioItem('replace_zero', 'use .00001 instead of zero'),
Form.RadioItem('remain_zero', 'use 0 unmodified', True)])]
return form_objects
def get_form_out():
return FormOut.Report()
def get_response_content(fs):
# read the tree
tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
# get arbitrarily ordered leaf names
ordered_names = list(node.name for node in tree.gen_tips())
# define the distance matrix sampler
if fs.infinite_alleles:
sampler = DMSampler.InfiniteAllelesSampler(
tree, ordered_names, fs.length)
elif fs.jukes_cantor:
sampler = DMSampler.DMSampler(
tree, ordered_names, fs.length)
if fs.reject_infinity:
sampler.set_inf_replacement(None)
elif fs.replace_infinity:
sampler.set_inf_replacement(20)
if fs.reject_zero:
sampler.set_zero_replacement(None)
elif fs.replace_zero:
sampler.set_zero_replacement(0.00001)
elif fs.remain_zero:
sampler.set_zero_replacement(0.0)
# define the amount of time allotted to the sampler
allocated_seconds = 2
# do some sampling, saving a summary but discarding the samples
start_time = time.clock()
run_seconds = 0
for result in sampler.gen_samples_or_none():
run_seconds = time.clock() - start_time
if run_seconds > allocated_seconds:
break
# define the response
out = StringIO()
print >> out, 'these are the results for a', run_seconds, 'second run:'
print >> out, sampler.proposed, 'samples were proposed'
print >> out, sampler.accepted, 'samples were accepted'
print >> out, sampler.proposals_with_zero, 'proposals had a distance estimate of zero'
print >> out, sampler.proposals_with_inf, 'proposals had a distance estimate of infinity'
# write the response
return out.getvalue()
def main():
# use the default sequence length
sequence_length = 100
# use the default tree
tree_string = '(((a:0.05, b:0.05):0.15, c:0.2):0.8, x:1.0, (((m:0.05, n:0.05):0.15, p:0.2):0.8, y:1.0):1.0);'
tree = NewickIO.parse(tree_string, FelTree.NewickTree)
# get arbitrarily ordered leaf names
ordered_names = list(node.name for node in tree.gen_tips())
# create the sampler
sampler = DMSampler.InfiniteAllelesSampler(
tree, ordered_names, sequence_length)
sampler.set_inf_replacement(20)
sampler.set_zero_replacement(0.0)
# do some sampling, saving a summary but discarding the samples
allocated_seconds = 2
start_time = time.clock()
run_seconds = 0
for result in sampler.gen_samples_or_none():
run_seconds = time.clock() - start_time
if run_seconds > allocated_seconds:
break
# define the response
print 'these are the results for a', run_seconds, 'second run:'
print sampler.proposed, 'samples were proposed'
print sampler.accepted, 'samples were accepted'
msg = 'proposals had a distance estimate of zero'
print sampler.proposals_with_zero, msg
msg = 'proposals had a distance estimate of infinity'
print sampler.proposals_with_inf, msg
if __name__ == '__main__':
main()