popsim-consortium · RyanGutenkunst · Sep 28, 2024
diff --git a/analysis2_manuscript.tex b/analysis2_manuscript.tex
@@ -498,26 +498,31 @@ \section*{Estimation of the Distribution of Fitness effects}
     \begin{figure}
         \centering
         \includegraphics[width=\textwidth]{figures/PhoSin/Vaquita2Epoch_1R22/PhoSin_Vaquita2Epoch_1R22_Gamma_R22_Phocoena_sinus.mPhoSin1.pri.110_exons_DFE_plot.pdf}
+\includegraphics{figures/PhoSin/Vaquita2Epoch_1R22/hs_hist.pdf}
         \caption{
         \label{fig:vaquita-dfe}
         A comparison of methods for inferring the distribution of fitness effects (DFE) from genetic data.
         Simulations were performed using a model of vaquita porpoise demography \citep{robinson2022critically} with a Gamma distributed DFE
-        acting on exons parameterized by a mean selection coefficient and a shape parameter. Estimates of the 
+        acting on exons parameterized by a mean selection coefficient, a shape parameter, and a relationship between selection and domiance. Estimates of the 
         parameters of the DFE are shown in the left ($\lvert E(s) \rvert $) and right hand panels (shape) respectively.
         This vaquita model has a single extant population, labelled pop 0, and parameter estimates from each
         of the three different methods are shown: 1) GRAPES \cite{galtier2016adaptive}, 2) polyDFE \citep{tataru2020polydfe},
-        and 3) dadi \citep{gutenkunst2009inferring}.}
+        and 3) dadi \citep{gutenkunst2009inferring,kim2017inference}.
+        C) Shown is the simulated distribution of $2 h s$, where $h$ is the dominance coefficient, along with the simulated distribution of $s$ and the median inferred DFEs. The distribution of $2 h s$ is multimodal because the simulated relationship between $h$ and $s$ is not continuous.
+        % RNG: For C, need to extract inferred values from pipeline.
+        }
     \end{figure}
 
     In Figures \ref{fig:vaquita-dfe.constant} and \ref{fig:vaquita-dfe} we show a comparison of estimates
     from the same three software packages for inferring the DFE from genetic data, but for simulations of the
     vaquita porpoise genome. In the constant size model (fig \ref{fig:vaquita-dfe.constant}) we see that all methods
     perform uniformly worse that in the human genome simulations, which consistent underestimation of the mean selection coefficient    
     and overestimation of the shape parameter. 
-    In context of our model of vaquita porpoise demography \citep{robinson2022critically}, which models a population
-    decline towards the present, we see that methods to infer the DFE perform no better-- again each method 
-    signficiantly underestimates the mean selection coefficient and overestimates the shape parameter.
-    %ADK: add why we think this is the case-- is this a bug? or a feature of the genome?
+A critical distinction is that the previous human simulations employed a DFE that assumed all selected mutations had additive effects (no dominance, $h = 1/2$), whereas the vaquita simulations use a DFE in which more deleterious mutations are more recessive ($h < 1/2$).
+For all three software packages, the inferred mean and shape of the distribution of selection coefficients is consistent with the mean and shape of the distribution of $2 h s$, the product of the dominance coefficient and the selection coefficient (Fig.~\ref{fig:vaquita-dfe}C).
+This is likely because deleterious alleles are typically at low frequency and thus heterozygous, where their selective effect is $h s$, and all these tools assume additivity  ($h = 1/2$).
+Strongly deleterious alleles are often observed to be recessive in many species \citep{}, so these results suggest caution in interpreting DFE inferences.
+%RNG: Could use Kirk's help with the best references here
 
 \section*{Detection of Selective Sweeps}
     \label{sweeps}

diff --git a/figures/PhoSin/Vaquita2Epoch_1R22/hs_fig.py b/figures/PhoSin/Vaquita2Epoch_1R22/hs_fig.py
@@ -0,0 +1,70 @@
+import numpy as np
+import scipy.stats.distributions as ssd
+
+# From https://github.com/popsim-consortium/stdpopsim/blob/main/stdpopsim/catalog/PhoSin/dfes.py
+gamma_shape = 0.131
+gamma_mean = -0.0257
+dominance_coeff_list=[0.0, 0.01, 0.1, 0.4]
+dominance_coeff_breaks=[-0.1, -0.01, -0.001]
+
+# Construct the corresponding gamma distribution
+gamma_scale = -gamma_mean / gamma_shape
+dfe = ssd.gamma(gamma_shape, scale=gamma_scale)
+assert(dfe.mean() == abs(gamma_mean))
+
+# Simulate draws from the DFE
+N = 100000
+s_array = -dfe.rvs(N)
+
+# Sanity check, using method of moments to recover simulated shape and scale.
+# (Fixing location parameter to zero.)
+# mean = shape * scale, var = shape * scale**2
+# So scale = var/mean, and shape = mean/scale
+fit_s_scale = s_array.var()/s_array.mean()
+fit_s_shape = s_array.mean()/fit_s_scale
+# Or just use fit_s_shape, _, fit_s_scale = ssd.gamma.fit(s_array, method='MM', floc=0) 
+print('Sanity check: Inferred s shape and mean: {0:.4f}, {1:.5f}'.format(fit_s_shape, fit_s_shape*fit_s_scale))
+
+# Convert to 2*hs
+breaks = [-np.inf] + list(dominance_coeff_breaks) + [0]
+hs_arrays = []
+for h, s_start, s_end in zip(dominance_coeff_list, breaks[:-1], breaks[1:]):
+    hs_arrays.append(2*h*s_array[np.logical_and(s_start < s_array, s_array < s_end)])
+hs_array = np.concatenate(hs_arrays)
+
+# Calculate mean hs
+mean_hs = hs_array.mean()
+print('Simulated mean 2hs: {0:.6f}'.format(mean_hs))
+
+#fit_hs_shape, _, fit_hs_scale = ssd.gamma.fit(hs_array, method='MM', floc=0)
+#dfe_hs = ssd.gamma(fit_hs_shape, scale=-fit_hs_scale)
+#
+#print('Inferred 2hs shape and mean: {0:.4f}, {1:.5f}'.format(fit_hs_shape, fit_hs_shape*fit_hs_scale))
+
+# Results from inferences
+dfe_dadi= ssd.gamma(0.33, scale=0.002)
+dfe_polydfe = ssd.gamma(0.33, scale=0.002)
+dfe_grapes = ssd.gamma(0.35, scale=0.0035)
+
+# Plotting
+import matplotlib.pyplot as plt
+plt.rc('font', size=10)
+
+fig = plt.figure(2132, figsize=(3.5,2.5))
+fig.clear()
+ax = fig.add_subplot(1,1,1)
+# Histogram of 2*hs, scaled by 10^3
+ax.hist(hs_array*1e3, bins=101, density=True, label='Simulated 2hs distribution')
+# DFE inferred for 2hs
+xx = np.linspace(-2.2, 0, 1001)
+ax.plot(xx, dfe.pdf(-xx/1e3)/1e3, '-k', label='Original s DFE')
+ax.plot(xx, dfe_dadi.pdf(-xx/1e3)/1e3, '-', label='Inferred DFE (GRAPES)')
+ax.plot(xx, dfe_polydfe.pdf(-xx/1e3)/1e3, '-', label='Inferred DFE (polyDFE)')
+ax.plot(xx, dfe_grapes.pdf(-xx/1e3)/1e3, '-', label='Inferred DFE (dadi)')
+ax.set_xlabel(r'$2hs\,\,(\times 10^{-3})$')
+ax.set_ylabel('Probability density')
+ax.legend(loc='upper left')
+ax.set_ylim(0, 2)
+ax.set_xlim(-2.2,0)
+fig.tight_layout(pad=0)
+fig.savefig('hs_hist.pdf')
diff --git a/figures/PhoSin/Vaquita2Epoch_1R22/hs_hist.pdf b/figures/PhoSin/Vaquita2Epoch_1R22/hs_hist.pdf