diff --git a/DESCRIPTION b/DESCRIPTION index 9c73dd86..5b1be56c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: qtl2 -Version: 0.15-22 -Date: 2018-07-18 +Version: 0.15-23 +Date: 2018-07-20 Title: Quantitative Trait Locus Mapping in Experimental Crosses Description: R/qtl2 provides a set of tools to perform quantitative trait locus (QTL) analysis in experimental crosses. It is a diff --git a/NEWS.md b/NEWS.md index e409ddcd..3f3e75ec 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -## qtl2 0.15-22 (2018-07-18) +## qtl2 0.15-23 (2018-07-20) ### New features @@ -78,6 +78,9 @@ - Fix a bug in `scan1snps()` where it didn't check that the `genoprobs` and `map` conform. +- Revised underlying binary trait regression function to avoid some of + the tendency towards NAs. + ## qtl2 0.14 (2018-03-09) diff --git a/src/binreg_eigen.cpp b/src/binreg_eigen.cpp index 471f99bf..360e6f82 100644 --- a/src/binreg_eigen.cpp +++ b/src/binreg_eigen.cpp @@ -18,6 +18,8 @@ using namespace Eigen; double calc_ll_binreg_eigenchol(const NumericMatrix& X, const NumericVector& y, const int maxit=100, const double tol=1e-6) { + const double nu_tol = 100.0; // maximum allowed value on linear predictor + const int n_ind = y.size(); #ifndef RQTL2_NODEBUG if(n_ind != X.rows()) @@ -49,7 +51,13 @@ double calc_ll_binreg_eigenchol(const NumericMatrix& X, const NumericVector& y, llik = 0.0; for(int ind=0; ind nu_tol) nu[ind] = nu_tol; + pi[ind] = exp(nu[ind])/(1.0 + exp(nu[ind])); + wt[ind] = sqrt(pi[ind] * (1.0 - pi[ind])); z[ind] = nu[ind]*wt[ind] + (y[ind] - pi[ind])/wt[ind]; llik += y[ind] * log10(pi[ind]) + (1.0-y[ind])*log10(1.0-pi[ind]); @@ -78,6 +86,8 @@ double calc_ll_binreg_eigenqr(const NumericMatrix& X, const NumericVector& y, const int maxit=100, const double tol=1e-6, const double qr_tol=1e-12) { + const double nu_tol = 100.0; // maximum allowed value on linear predictor + const int n_ind = y.size(); #ifndef RQTL2_NODEBUG if(n_ind != X.rows()) @@ -109,7 +119,13 @@ double calc_ll_binreg_eigenqr(const NumericMatrix& X, const NumericVector& y, llik = 0.0; for(int ind=0; ind nu_tol) nu[ind] = nu_tol; + pi[ind] = exp(nu[ind])/(1.0 + exp(nu[ind])); + wt[ind] = sqrt(pi[ind] * (1.0 - pi[ind])); z[ind] = nu[ind]*wt[ind] + (y[ind] - pi[ind])/wt[ind]; llik += y[ind] * log10(pi[ind]) + (1.0-y[ind])*log10(1.0-pi[ind]); @@ -174,6 +190,8 @@ List fit_binreg_eigenqr(const NumericMatrix& X, const double tol=1e-6, const double qr_tol=1e-12) { + const double nu_tol = 100.0; // maximum allowed value on linear predictor + const int n_ind = y.size(); #ifndef RQTL2_NODEBUG if(n_ind != X.rows()) @@ -184,10 +202,11 @@ List fit_binreg_eigenqr(const NumericMatrix& X, NumericVector pi(n_ind), wt(n_ind), nu(n_ind), z(n_ind); for(int ind=0; ind nu_tol) nu[ind] = nu_tol; + pi[ind] = exp(nu[ind])/(1.0 + exp(nu[ind])); + wt[ind] = sqrt(pi[ind] * (1.0 - pi[ind])); z[ind] = nu[ind]*wt[ind] + (y[ind] - pi[ind])/wt[ind]; + llik += y[ind] * log10(pi[ind]) + (1.0-y[ind])*log10(1.0-pi[ind]); } diff --git a/src/binreg_weighted_eigen.cpp b/src/binreg_weighted_eigen.cpp index ba0a17af..1937d0aa 100644 --- a/src/binreg_weighted_eigen.cpp +++ b/src/binreg_weighted_eigen.cpp @@ -20,6 +20,8 @@ double calc_ll_binreg_weighted_eigenchol(const NumericMatrix& X, const NumericVe const NumericVector &weights, const int maxit=100, const double tol=1e-6) { + const double nu_tol = 100.0; // maximum allowed value on linear predictor + const int n_ind = y.size(); #ifndef RQTL2_NODEBUG if(n_ind != X.rows()) @@ -53,7 +55,13 @@ double calc_ll_binreg_weighted_eigenchol(const NumericMatrix& X, const NumericVe llik = 0.0; for(int ind=0; ind nu_tol) nu[ind] = nu_tol; + pi[ind] = exp(nu[ind])/(1.0 + exp(nu[ind])); + wt[ind] = sqrt(pi[ind] * (1.0 - pi[ind])*weights[ind]); z[ind] = wt[ind]*(nu[ind] + (y[ind] - pi[ind])/(pi[ind]*(1.0-pi[ind]))); llik += (y[ind] * log10(pi[ind]) + (1.0-y[ind])*log10(1.0-pi[ind]))*weights[ind]; @@ -84,6 +92,8 @@ double calc_ll_binreg_weighted_eigenqr(const NumericMatrix& X, const NumericVect const int maxit=100, const double tol=1e-6, const double qr_tol=1e-12) { + const double nu_tol = 100.0; // maximum allowed value on linear predictor + const int n_ind = y.size(); #ifndef RQTL2_NODEBUG if(n_ind != X.rows()) @@ -117,7 +127,13 @@ double calc_ll_binreg_weighted_eigenqr(const NumericMatrix& X, const NumericVect llik = 0.0; for(int ind=0; ind nu_tol) nu[ind] = nu_tol; + pi[ind] = exp(nu[ind])/(1.0 + exp(nu[ind])); + wt[ind] = sqrt(pi[ind] * (1.0 - pi[ind])*weights[ind]); z[ind] = wt[ind]*(nu[ind] + (y[ind] - pi[ind])/(pi[ind]*(1.0-pi[ind]))); llik += (y[ind] * log10(pi[ind]) + (1.0-y[ind])*log10(1.0-pi[ind]))*weights[ind]; @@ -187,6 +203,8 @@ List fit_binreg_weighted_eigenqr(const NumericMatrix& X, const double tol=1e-6, const double qr_tol=1e-12) { + const double nu_tol = 100.0; // maximum allowed value on linear predictor + const int n_ind = y.size(); #ifndef RQTL2_NODEBUG if(n_ind != X.rows()) @@ -220,7 +238,13 @@ List fit_binreg_weighted_eigenqr(const NumericMatrix& X, llik = 0.0; for(int ind=0; ind nu_tol) nu[ind] = nu_tol; + pi[ind] = exp(nu[ind])/(1.0 + exp(nu[ind])); + wt[ind] = sqrt(pi[ind] * (1.0 - pi[ind])*weights[ind]); z[ind] = wt[ind]*(nu[ind] + (y[ind] - pi[ind])/(pi[ind]*(1.0-pi[ind]))); llik += (y[ind] * log10(pi[ind]) + (1.0-y[ind])*log10(1.0-pi[ind]))*weights[ind]; diff --git a/tests/testthat/test-fit1_binary.R b/tests/testthat/test-fit1_binary.R index 4ce49094..f4700d0d 100644 --- a/tests/testthat/test-fit1_binary.R +++ b/tests/testthat/test-fit1_binary.R @@ -314,3 +314,81 @@ test_that("fit1 works for binary traits with weights", { expect_equivalent(out_fit1_1$SE, sum_glm_1$coef[,2]) }) + +test_that("fit one for binary traits handles NA case", { + + iron <- read_cross2(system.file("extdata", "iron.zip", package="qtl2")) + iron <- iron[,c("2", "X")] + map <- insert_pseudomarkers(iron$gmap, step=1) + probs <- calc_genoprob(iron, map, err=0.002) + + phe <- iron$pheno[,1] + phe <- setNames(as.numeric(phe > quantile(phe, 0.95)), + ind_ids(iron)) + + suppressWarnings(out <- scan1(probs, phe, model="binary")) + + mar <- c("D2Mit304", "DXMit186") + p1 <- pull_genoprobpos(probs, mar[1]) + p2 <- pull_genoprobpos(probs, mar[2]) + + suppressWarnings(ll1a <- glm(phe ~ -1 + p1, family=binomial(link=logit))$deviance) + suppressWarnings(ll1b <- glm(phe ~ -1 + p2, family=binomial(link=logit))$deviance) + ll0 <- glm(phe ~ 1, family=binomial(link=logit))$deviance + + expect_equal(out[mar[1],1], (ll0-ll1a)/2/log(10)) + expect_equal(out[mar[2],1], (ll0-ll1b)/2/log(10), tol=1e-7) + + # repeat with weights + set.seed(20180720) + w <- setNames( runif(length(phe), 1, 5), names(phe)) + suppressWarnings(outw <- scan1(probs, phe, model="binary", weights=w)) + + suppressWarnings(ll1a_w <- glm(phe ~ -1 + p1, family=binomial(link=logit), weights=w)$deviance) + suppressWarnings(ll1b_w <- glm(phe ~ -1 + p2, family=binomial(link=logit), weights=w)$deviance) + suppressWarnings(ll0_w <- glm(phe ~ 1, family=binomial(link=logit), weights=w)$deviance) + + expect_equal(outw[mar[1],1], (ll0_w-ll1a_w)/2/log(10)) + expect_equal(outw[mar[2],1], (ll0_w-ll1b_w)/2/log(10), tol=1e-7) + +}) + +test_that("fit one for binary traits handles NA case with DO data", { + + if(isnt_karl()) skip("this test only run locally") + + file <- paste0("https://raw.githubusercontent.com/rqtl/", + "qtl2data/master/DOex/DOex.zip") + DOex <- read_cross2(file) + probs <- calc_genoprob(DOex, error_prob=0.002, cores=0) + apr <- genoprob_to_alleleprob(probs) + + phe <- setNames((DOex$pheno[,1] > quantile(DOex$pheno[,1], 0.95, na.rm=TRUE))*1, rownames(DOex$pheno)) + + suppressWarnings(out <- scan1(apr, phe, model="binary")) + + mar <- c("backupUNC020117657", "UNC020459944") + p1 <- pull_genoprobpos(apr, mar[1]) + p2 <- pull_genoprobpos(apr, mar[2]) + + suppressWarnings(ll1a <- glm(phe ~ -1 + p1, family=binomial(link=logit))$deviance) + suppressWarnings(ll1b <- glm(phe ~ -1 + p2, family=binomial(link=logit))$deviance) + ll0 <- glm(phe ~ 1, family=binomial(link=logit))$deviance + + expect_equal(out[mar[1],1], (ll0-ll1a)/2/log(10)) + expect_equal(out[mar[2],1], (ll0-ll1b)/2/log(10)) + + + # repeat with weights + set.seed(20180720) + w <- setNames( runif(length(phe), 1, 5), names(phe)) + suppressWarnings(outw <- scan1(apr, phe, model="binary", weights=w)) + + suppressWarnings(ll1a_w <- glm(phe ~ -1 + p1, family=binomial(link=logit), weights=w)$deviance) + suppressWarnings(ll1b_w <- glm(phe ~ -1 + p2, family=binomial(link=logit), weights=w)$deviance) + suppressWarnings(ll0_w <- glm(phe ~ 1, family=binomial(link=logit), weights=w)$deviance) + + expect_equal(outw[mar[1],1], (ll0_w-ll1a_w)/2/log(10)) + expect_equal(outw[mar[2],1], (ll0_w-ll1b_w)/2/log(10)) + +})