Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

edits for DFE-alpha #119

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions workflows/dfe.snake
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ rule dadi_infer_dfe:
opts = 100,
prefix = output_dir + "/inference/{demog}/dadi/{dfes}/{annots}/{seeds}/pop{ids}/pop{ids}.two_epoch.gamma"
resources:
time=4000
time=10080
threads: 8
shell:
"""
Expand Down Expand Up @@ -290,7 +290,7 @@ rule generate_dfe_alpha_fs:
ts_dict[chrm] = fp
index = int(wildcards.ids)
ts2fs.generate_fs(ts_dict, index, mask_file, wildcards.annots,
species, output, format='DFE-alpha', is_folded=True,
species, output, format='DFE-alpha', is_folded=False,
data_path_1=dfe_alpha_data_path_1, data_path_2=dfe_alpha_data_path_2,
sfs_input_file=output[2], est_dfe_results_dir=params.output,
est_dfe_demography_results_file=params.output+"/neu/est_dfe.out")
Expand Down
95 changes: 90 additions & 5 deletions workflows/ts2fs.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,57 @@
import dadi
import numpy as np
from numpy.random import default_rng
import tskit
import masks
import msprime

def mask_to_ratemap(annot_intervals, sequence_length, mutation_rate, proportion):
"""
Convert an interval mask [a, b) to a mutation rate map

Args:
annot_intervals: np.array of shape (n_intervals, 2)
sequence_length: int
mutation_rate: float
proportion: float -- proportion of mutations in interval that contribute to count
"""
assert annot_intervals.shape[1] == 2
bitmask = np.full(sequence_length, False)
for (a, b) in annot_intervals:
assert a >= 0, "Mask has negative coordinates"
assert b <= sequence_length, "Mask exceeds sequence boundary"
bitmask[int(a):int(b)] = True
switch = np.logical_xor(bitmask[:-1], bitmask[1:])
coords = np.append(0, np.flatnonzero(switch) + 1)
values = bitmask[coords].astype(np.float64)
coords = np.append(coords, sequence_length).astype(np.float64)
values[values > 0] = mutation_rate * proportion
return msprime.RateMap(position = coords, rate = values)

def get_expected_netural_subs(ts, mutation_rate):
"""
Extract the expected number of neutral substitutions from a tree sequence

Args:
ts: tskit.TreeSequence
mutation_rate: float
"""
sequence_length = int(ts.sequence_length)
exp_neutral_subs = 0.0
for dfe in ts.metadata['stdpopsim']['DFEs']:
for mtypes in dfe['mutation_types']:
if mtypes['is_neutral']:
annot_intervals = np.array(dfe['intervals'])
#assumes netural is first proportion in list
proportion = dfe['proportions'][0]
ratemap = mask_to_ratemap(annot_intervals, sequence_length, mutation_rate, proportion)
slim_simulation_time = ts.metadata['SLiM']['tick']
for t in ts.trees():
if t.num_edges > 0:
exp_neutral_subs += max(0, (slim_simulation_time - t.root) *
silastittes marked this conversation as resolved.
Show resolved Hide resolved
(ratemap.get_cumulative_mass(t.interval.right) -
ratemap.get_cumulative_mass(t.interval.left)))
return int(exp_neutral_subs)

def _generate_fs_from_ts(ts_dict, pop_idx, mask_file, annot, species, max_haplos):
"""
Expand All @@ -27,15 +77,24 @@ def f(x):

mut_afs = {"neutral":[], "non_neutral":[]}
seq_len = 0
neutral_anc_count = 0
non_neutral_anc_count = 0
neutral_prop = 0.3
nonneu_prop = 0.7
total_neutral_subs = 0
for chrm in ts_dict:
ts = tskit.load(ts_dict[chrm])
contig = species.get_contig(chrm)
chrom_length = contig.recombination_map.sequence_length
sample_sets = ts.samples(population=pop_idx)
if max_haplos:
sample_sets = sample_sets[:max_haplos]

# apply masks
if mask_file is not None:
mask_intervals = masks.get_mask_from_file_dfe(mask_file, chrm)
assert mask_intervals.shape[1] == 2
mask_length = sum([interval[1] - interval[0] for interval in mask_intervals])
ts = ts.delete_intervals(mask_intervals)

# grab coding regions
Expand All @@ -45,9 +104,16 @@ def f(x):
ts = ts.keep_intervals(annot_intervals)
exon_len = np.sum(annot_intervals[:,1]-annot_intervals[:,0])
seq_len += exon_len
non_neutral_anc_count += exon_len * nonneu_prop
neutral_anc_count += exon_len * neutral_prop
else:
contig = species.get_contig(chrm)
seq_len += contig.recombination_map.sequence_length
seq_len += chrom_length - mask_length
non_neutral_anc_count += chrom_length * nonneu_prop
neutral_anc_count += chrom_length * neutral_prop

# Get count of substitutions for neutral SFS
mutation_rate = species.genome.mean_mutation_rate
total_neutral_subs += get_expected_netural_subs(ts, mutation_rate)

# Mapping mutation type IDs to class of mutation (e.g., neutral, non-neutral)
mut_types = {}
Expand Down Expand Up @@ -85,6 +151,22 @@ def f(x):
mut_afs["neutral"] = np.sum(mut_afs["neutral"], axis=0)
mut_afs["non_neutral"] = np.sum(mut_afs["non_neutral"], axis=0)

#random draw of neutral subs from expected
rng = default_rng(42)
neutral_subs_draw = rng.poisson(total_neutral_subs)

#add neutral subs to last bin
mut_afs["neutral"][-1] = neutral_subs_draw

#add in ancestral counts
mut_afs["neutral"][0] = neutral_anc_count
mut_afs["non_neutral"][0] = non_neutral_anc_count

# Remove mutated sites from first bin
mut_afs["neutral"][0] = mut_afs["neutral"][0] - sum(mut_afs["neutral"][1:])
mut_afs["non_neutral"][0] = mut_afs["non_neutral"][0] - sum(mut_afs["non_neutral"][1:])

print(f"AFSs\nNeutral: {mut_afs['neutral']}\nNon-neutral: {mut_afs['non_neutral']}")
Comment on lines +157 to +171
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

poisson draw and removing mutations from ancestral monomorphic counts

return mut_afs,seq_len,len(sample_sets)


Expand Down Expand Up @@ -189,6 +271,7 @@ def _generate_dfe_alpha_fs(neu_fs, nonneu_fs, output, is_folded, **kwargs):
Arguments:
neu_fs numpy.ndarray: Frequency spectrum for neutral mutations.
nonneu_fs numpy.ndarray: Frequency spectrum for non-neutral mutations.
seq_len int: Length of the genomic sequence generates mutations.
output list: Names of output files.
is_folded bool: True, generates a folded frequency spectrum; False, generates an unfolded frequency spectrum.

Expand Down Expand Up @@ -248,13 +331,14 @@ def generate_config(data_path_1, data_path_2, sfs_input_file, est_dfe_results_di
## DFE-alpha 2.16 manual Page 3
## If folded SFSs are provided by the user, then their length must be the number of alleles sampled +1,
## and the upper half of the SFS vectors should be zeros

if is_folded:
if len(neu_fs) % 2 == 1: upper_half = np.zeros(len(neu_fs))
else: upper_half = np.zeros(len(neu_fs)-1)
else: upper_half = np.zeros(len(neu_fs)-1)
neu_fs = np.append(neu_fs, upper_half)

if len(nonneu_fs) % 2 == 1: upper_half = np.zeros(len(nonneu_fs))
else: upper_half = np.zeros(len(nonneu_fs)-1)
else: upper_half = np.zeros(len(nonneu_fs)-1)
nonneu_fs = np.append(nonneu_fs, upper_half)

with open(neu_config_out, 'w') as o:
Expand All @@ -267,7 +351,6 @@ def generate_config(data_path_1, data_path_2, sfs_input_file, est_dfe_results_di
o.write(" ".join([str(round(f)) for f in nonneu_fs]) + "\n")
o.write(" ".join([str(round(f)) for f in neu_fs]) + "\n")


def _generate_grapes_fs(neu_fs, nonneu_fs, output, is_folded, seq_len, sample_size, **kwargs):
"""
Description:
Expand All @@ -288,7 +371,9 @@ def _generate_grapes_fs(neu_fs, nonneu_fs, output, is_folded, seq_len, sample_si
nonneu_prop float: Proportion of non-neutral mutations
"""
neu_len = round(seq_len * kwargs['neu_prop'])
neu_fs[0] = neu_len - sum(neu_fs[1:])
nonneu_len = round(seq_len * kwargs['nonneu_prop'])
nonneu_fs[0] = nonneu_len - sum(nonneu_fs[1:])

with open(output[0], 'w') as o:
o.write(kwargs['header']+"\n")
Expand Down
Loading