-
Notifications
You must be signed in to change notification settings - Fork 11
/
cointerfere.cc
255 lines (217 loc) · 8 KB
/
cointerfere.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
// ped-sim: pedigree simulation tool
//
// This program is distributed under the terms of the GNU General Public License
// This code was ported from the xoi R package written by Karl Broman and
// implemented in C (https://github.com/kbroman/xoi)
#include <string.h>
#include <math.h>
#include <random>
#include <algorithm>
#include "cointerfere.h"
// Note: we don't use GSL version of gamma cdf to reduce library dependencies,
// but it is almost 2x faster than boost
#ifndef USEGSL
#include <boost/math/special_functions/gamma.hpp>
#else // USEGSL:
#include <gsl/gsl_sf_gamma.h>
#endif // USEGSL
uniform_real_distribution<double> unif_prob(0.0, 1.0);
extern uniform_int_distribution<int> coinFlip;
// approximate distribution to first (start) crossover using <N_BINS4START>
// bins
// store away for computational efficiency
void COInterfere::initStartProb() {
for(int s = 0; s < 2; s++) {
// original code used scale = 1 / rate, but rate is more convenient for the
// libraries we use
double rate = (2.0 * nu[s] * (1.0 - p[s]));
double step = length[s] / N_BINS4START;
// Using R library:
// startProb[s][0] = 2.0 * (1.0 - p[s]) * pgamma(0.5*step, nu[s], 1/rate, 0, 0) * step;
startProb[s][0] = 2.0 * (1.0 - p[s]) *
#ifndef USEGSL
boost::math::gamma_q(/*shape=*/nu[s], 0.5*step * rate) * step;
#else // USEGSL:
gsl_sf_gamma_inc_Q(/*shape=*/nu[s], 0.5*step * rate) * step;
#endif // USEGSL
for(int i = 1; i < N_BINS4START; i++) {
startProb[s][i] = startProb[s][i-1] +
// 2.0*(1.0 - p[s]) * pgamma((i + 0.5)*step, nu[s], 1/rate, 0, 0) * step;
2.0*(1.0 - p[s]) *
#ifndef USEGSL
boost::math::gamma_q(/*shape=*/nu[s], (i + 0.5)*step * rate) * step;
#else // USEGSL:
gsl_sf_gamma_inc_Q(/*shape=*/nu[s], (i+0.5)*step * rate) * step;
#endif // USEGSL
}
}
}
// Reads in interference parameters nu and p for males and females from
// <interfereFile> and stores them in <intfParams>.
void COInterfere::read(vector<COInterfere> &coIntf, char *interfereFile,
GeneticMap &map, bool &sexSpecificMaps) {
if (!sexSpecificMaps) {
fprintf(stderr, "ERROR: Must use sex specific genetic maps in order to simulate with interference\n");
exit(6);
}
size_t bytesRead = 1024;
char *buffer = (char *) malloc(bytesRead + 1);
if (buffer == NULL) {
printf("ERROR: out of memory");
exit(5);
}
const char *delim = " \t\n";
FILE *in = fopen(interfereFile, "r");
if (!in) {
printf("ERROR: could not open interference file %s!\n", interfereFile);
exit(1);
}
// Which chromosome index (into <map>) are we on? This allows us to ensure
// the names of the chromosomes listed in the interference file match those
// in <map>
unsigned int chrIdx = 0;
while (getline(&buffer, &bytesRead, in) >= 0) {
char *chrom, *nuStr[2], *pStr[2];
char *saveptr, *endptr;
double nu[2], p[2];
if (buffer[0] == '#')
continue; // comment
// get chromosome tokens:
chrom = strtok_r(buffer, delim, &saveptr);
if (chrIdx >= map.size()) {
fprintf(stderr, "ERROR: read chrom %s from interference file, but last genetic map chromosome\n",
chrom);
fprintf(stderr, " is %s\n", map.chromName(chrIdx - 1));
exit(5);
}
// read remaining tokens:
for(int i = 0; i < 2; i++) {
nuStr[i] = strtok_r(NULL, delim, &saveptr);
pStr[i] = strtok_r(NULL, delim, &saveptr);
errno = 0; // initially
nu[i] = strtod(nuStr[i], &endptr);
if (errno != 0 || *endptr != '\0') {
fprintf(stderr, "ERROR: chrom %s, could not parse %s interference nu parameter\n",
chrom, (i == 0) ? "male" : "female");
if (errno != 0)
perror("strtod");
exit(5);
}
p[i] = strtod(pStr[i], &endptr);
if (errno != 0 || *endptr != '\0') {
fprintf(stderr, "ERROR: chrom %s, could not parse %s interference p parameter\n",
chrom, (i == 0) ? "male" : "female");
if (errno != 0)
perror("strtod");
exit(5);
}
}
char *tok;
if ((tok = strtok_r(NULL, delim, &saveptr)) != NULL) {
fprintf(stderr, "ERROR: read extra token %s in interference file (chrom %s)\n",
tok, chrom);
exit(5);
}
if (strcmp(chrom, map.chromName(chrIdx)) != 0) {
fprintf(stderr, "ERROR: order of interference chromosomes different from genetic map:\n");
fprintf(stderr, " expected chromosome %s in interference file, read %s\n",
map.chromName(chrIdx), chrom);
exit(10);
}
// Get the genetic lengths of the male and female maps for this chromosome
double len[2];
for(int i = 0; i < 2; i++)
len[i] = map.chromGenetLength(chrIdx, /*sex=*/ i) / 100; // in Morgans
coIntf.emplace_back(nu, p, len);
chrIdx++;
}
if (chrIdx != map.size()) {
fprintf(stderr, "ERROR: read %u chromosomes from interference file, but genetic map has %lu\n",
chrIdx, map.size());
exit(5);
}
free(buffer);
fclose(in);
}
// locations: stores sampled crossover locations (assumed empty initially)
// sex: 0 or 1 for male or female meiosis, respectively
// randomGen: random number generator
void COInterfere::simStahl(vector<double> &locations, int sex,
mt19937 &randomGen) {
if (fabs(nu[sex] - 1.0) < 1e-8) { // looks like a Poisson model
poisson_distribution<int> standard(length[sex]);
int nxo = standard(randomGen);
for(int j = 0; j < nxo; j++)
locations.push_back( unif_prob(randomGen) * length[sex] );
sort(locations.begin(), locations.end());
return; // done; below code is for mixture model
}
/////////////////////////////////////////////////////////////////////////////
// using mixture model
// original code used scale = 1 / rate, but rate is more convenient for the
// libraries we use
double rate = (2.0 * nu[sex] * (1.0 - p[sex]));
double step = length[sex] / N_BINS4START;
// sample location of current crossover -- initially none: so chr start
double curloc = 0.0;
// locations of chiasmata from the gamma model
// shape = nu, rate = 2*nu*(1-p) [scale = 1/{2*nu*(1-p)}]
double u = unif_prob(randomGen);
if ( u > startProb[sex][ N_BINS4START - 1 ] )
curloc = length[sex]+1; // no crossovers: at end of chromosome
else {
// faster binary search:
if (u <= startProb[sex][0])
// corner case that doesn't work well with binary search
curloc = 0.5 * step;
else {
int low = 0, high = N_BINS4START;
while (high - low > 1) {
int mid = (high - low) / 2 + low;
if (u <= startProb[sex][mid])
high = mid;
else if (u > startProb[sex][mid])
low = mid;
}
curloc = ((double) high + 0.5) * step;
}
if(coinFlip(randomGen)) // on this chromatid? coin toss
locations.push_back(curloc);
// original linear search:
// for(int j = 0; j < N_BINS4START; j++) {
// if(u <= startProb[sex][j]) {
// curloc = ((double) j + 0.5) * step;
// if(coinFlip(randomGen)) // on this chromatid? coin toss
// locations.push_back(curloc);
//
// break;
// }
// }
}
gamma_distribution<double> gammaRand(nu[sex], /*scale=*/ 1 / rate);
while(curloc < length[sex]) {
curloc += gammaRand(randomGen); // location of next chiasmata
// is it before the end of the chromosome and on this chromatid?
if(curloc < length[sex] && coinFlip(randomGen))
// coin toss decides chromatid
locations.push_back(curloc);
}
// locations of crossovers from the no interference mechanism
if(p[sex] > 0) {
poisson_distribution<int> no_interfere(length[sex] * p[sex]);
int n_nixo = no_interfere(randomGen);
for(int j = 0; j < n_nixo; j++)
// sample position of the non-interference derived crossovers uniformly
locations.push_back( unif_prob(randomGen) * length[sex] );
// Same as above but drawing distance to next event from exponential:
// double curCOPos = 0.0;
// while (curCOPos < length[sex]) {
// double distToNext = crossoverDist(randomGen);
// curCOPos += distToNext;
// if (curCOPos < length[sex] && unif_prob(randomGen) < p[sex]) {
// locations.push_back(curCOPos);
// }
// }
}
sort(locations.begin(), locations.end());
}