Skip to content

Commit

Permalink
Merge pull request #74 from kbroman/master
Browse files Browse the repository at this point in the history
Fix problem with NAs in binary trait regression (Issue #55)
  • Loading branch information
kbroman authored Jul 20, 2018
2 parents a4a9542 + 1d1dbc5 commit 8dafba1
Show file tree
Hide file tree
Showing 5 changed files with 137 additions and 6 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: qtl2
Version: 0.15-22
Date: 2018-07-18
Version: 0.15-23
Date: 2018-07-20
Title: Quantitative Trait Locus Mapping in Experimental Crosses
Description: R/qtl2 provides a set of tools to perform quantitative
trait locus (QTL) analysis in experimental crosses. It is a
Expand Down
5 changes: 4 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
## qtl2 0.15-22 (2018-07-18)
## qtl2 0.15-23 (2018-07-20)

### New features

Expand Down Expand Up @@ -78,6 +78,9 @@
- Fix a bug in `scan1snps()` where it didn't check that the `genoprobs`
and `map` conform.

- Revised underlying binary trait regression function to avoid some of
the tendency towards NAs.


## qtl2 0.14 (2018-03-09)

Expand Down
32 changes: 29 additions & 3 deletions src/binreg_eigen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ using namespace Eigen;
double calc_ll_binreg_eigenchol(const NumericMatrix& X, const NumericVector& y,
const int maxit=100, const double tol=1e-6)
{
const double nu_tol = 100.0; // maximum allowed value on linear predictor

const int n_ind = y.size();
#ifndef RQTL2_NODEBUG
if(n_ind != X.rows())
Expand Down Expand Up @@ -49,7 +51,13 @@ double calc_ll_binreg_eigenchol(const NumericMatrix& X, const NumericVector& y,
llik = 0.0;
for(int ind=0; ind<n_ind; ind++) {
nu[ind] /= wt[ind]; // need to divide by previous weights

// don't let nu get too large or too small
if(nu[ind] < -nu_tol) nu[ind] = -nu_tol;
else if(nu[ind] > nu_tol) nu[ind] = nu_tol;

pi[ind] = exp(nu[ind])/(1.0 + exp(nu[ind]));

wt[ind] = sqrt(pi[ind] * (1.0 - pi[ind]));
z[ind] = nu[ind]*wt[ind] + (y[ind] - pi[ind])/wt[ind];
llik += y[ind] * log10(pi[ind]) + (1.0-y[ind])*log10(1.0-pi[ind]);
Expand Down Expand Up @@ -78,6 +86,8 @@ double calc_ll_binreg_eigenqr(const NumericMatrix& X, const NumericVector& y,
const int maxit=100, const double tol=1e-6,
const double qr_tol=1e-12)
{
const double nu_tol = 100.0; // maximum allowed value on linear predictor

const int n_ind = y.size();
#ifndef RQTL2_NODEBUG
if(n_ind != X.rows())
Expand Down Expand Up @@ -109,7 +119,13 @@ double calc_ll_binreg_eigenqr(const NumericMatrix& X, const NumericVector& y,
llik = 0.0;
for(int ind=0; ind<n_ind; ind++) {
nu[ind] /= wt[ind]; // need to divide by previous weights

// don't let nu get too large or too small
if(nu[ind] < -nu_tol) nu[ind] = -nu_tol;
else if(nu[ind] > nu_tol) nu[ind] = nu_tol;

pi[ind] = exp(nu[ind])/(1.0 + exp(nu[ind]));

wt[ind] = sqrt(pi[ind] * (1.0 - pi[ind]));
z[ind] = nu[ind]*wt[ind] + (y[ind] - pi[ind])/wt[ind];
llik += y[ind] * log10(pi[ind]) + (1.0-y[ind])*log10(1.0-pi[ind]);
Expand Down Expand Up @@ -174,6 +190,8 @@ List fit_binreg_eigenqr(const NumericMatrix& X,
const double tol=1e-6,
const double qr_tol=1e-12)
{
const double nu_tol = 100.0; // maximum allowed value on linear predictor

const int n_ind = y.size();
#ifndef RQTL2_NODEBUG
if(n_ind != X.rows())
Expand All @@ -184,10 +202,11 @@ List fit_binreg_eigenqr(const NumericMatrix& X,
NumericVector pi(n_ind), wt(n_ind), nu(n_ind), z(n_ind);

for(int ind=0; ind<n_ind; ind++) {
pi[ind] = (y[ind] + 0.5)/2;
wt[ind] = sqrt(pi[ind] * (1-pi[ind]));
nu[ind] = log(pi[ind]) - log(1-pi[ind]);
pi[ind] = (y[ind] + 0.5)/2.0;
wt[ind] = sqrt(pi[ind] * (1.0-pi[ind]));
nu[ind] = log(pi[ind]) - log(1.0-pi[ind]);
z[ind] = nu[ind]*wt[ind] + (y[ind] - pi[ind])/wt[ind];

curllik += y[ind] * log10(pi[ind]) + (1.0-y[ind])*log10(1.0-pi[ind]);
}

Expand All @@ -205,9 +224,16 @@ List fit_binreg_eigenqr(const NumericMatrix& X,
llik = 0.0;
for(int ind=0; ind<n_ind; ind++) {
nu[ind] /= wt[ind]; // need to divide by previous weights

// don't let nu get too large or too small
if(nu[ind] < -nu_tol) nu[ind] = -nu_tol;
else if(nu[ind] > nu_tol) nu[ind] = nu_tol;

pi[ind] = exp(nu[ind])/(1.0 + exp(nu[ind]));

wt[ind] = sqrt(pi[ind] * (1.0 - pi[ind]));
z[ind] = nu[ind]*wt[ind] + (y[ind] - pi[ind])/wt[ind];

llik += y[ind] * log10(pi[ind]) + (1.0-y[ind])*log10(1.0-pi[ind]);
}

Expand Down
24 changes: 24 additions & 0 deletions src/binreg_weighted_eigen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ double calc_ll_binreg_weighted_eigenchol(const NumericMatrix& X, const NumericVe
const NumericVector &weights,
const int maxit=100, const double tol=1e-6)
{
const double nu_tol = 100.0; // maximum allowed value on linear predictor

const int n_ind = y.size();
#ifndef RQTL2_NODEBUG
if(n_ind != X.rows())
Expand Down Expand Up @@ -53,7 +55,13 @@ double calc_ll_binreg_weighted_eigenchol(const NumericMatrix& X, const NumericVe
llik = 0.0;
for(int ind=0; ind<n_ind; ind++) {
nu[ind] /= wt[ind]; // need to divide by previous weights

// don't let nu get too large or too small
if(nu[ind] < -nu_tol) nu[ind] = -nu_tol;
else if(nu[ind] > nu_tol) nu[ind] = nu_tol;

pi[ind] = exp(nu[ind])/(1.0 + exp(nu[ind]));

wt[ind] = sqrt(pi[ind] * (1.0 - pi[ind])*weights[ind]);
z[ind] = wt[ind]*(nu[ind] + (y[ind] - pi[ind])/(pi[ind]*(1.0-pi[ind])));
llik += (y[ind] * log10(pi[ind]) + (1.0-y[ind])*log10(1.0-pi[ind]))*weights[ind];
Expand Down Expand Up @@ -84,6 +92,8 @@ double calc_ll_binreg_weighted_eigenqr(const NumericMatrix& X, const NumericVect
const int maxit=100, const double tol=1e-6,
const double qr_tol=1e-12)
{
const double nu_tol = 100.0; // maximum allowed value on linear predictor

const int n_ind = y.size();
#ifndef RQTL2_NODEBUG
if(n_ind != X.rows())
Expand Down Expand Up @@ -117,7 +127,13 @@ double calc_ll_binreg_weighted_eigenqr(const NumericMatrix& X, const NumericVect
llik = 0.0;
for(int ind=0; ind<n_ind; ind++) {
nu[ind] /= wt[ind]; // need to divide by previous weights

// don't let nu get too large or too small
if(nu[ind] < -nu_tol) nu[ind] = -nu_tol;
else if(nu[ind] > nu_tol) nu[ind] = nu_tol;

pi[ind] = exp(nu[ind])/(1.0 + exp(nu[ind]));

wt[ind] = sqrt(pi[ind] * (1.0 - pi[ind])*weights[ind]);
z[ind] = wt[ind]*(nu[ind] + (y[ind] - pi[ind])/(pi[ind]*(1.0-pi[ind])));
llik += (y[ind] * log10(pi[ind]) + (1.0-y[ind])*log10(1.0-pi[ind]))*weights[ind];
Expand Down Expand Up @@ -187,6 +203,8 @@ List fit_binreg_weighted_eigenqr(const NumericMatrix& X,
const double tol=1e-6,
const double qr_tol=1e-12)
{
const double nu_tol = 100.0; // maximum allowed value on linear predictor

const int n_ind = y.size();
#ifndef RQTL2_NODEBUG
if(n_ind != X.rows())
Expand Down Expand Up @@ -220,7 +238,13 @@ List fit_binreg_weighted_eigenqr(const NumericMatrix& X,
llik = 0.0;
for(int ind=0; ind<n_ind; ind++) {
nu[ind] /= wt[ind]; // need to divide by previous weights

// don't let nu get too large or too small
if(nu[ind] < -nu_tol) nu[ind] = -nu_tol;
else if(nu[ind] > nu_tol) nu[ind] = nu_tol;

pi[ind] = exp(nu[ind])/(1.0 + exp(nu[ind]));

wt[ind] = sqrt(pi[ind] * (1.0 - pi[ind])*weights[ind]);
z[ind] = wt[ind]*(nu[ind] + (y[ind] - pi[ind])/(pi[ind]*(1.0-pi[ind])));
llik += (y[ind] * log10(pi[ind]) + (1.0-y[ind])*log10(1.0-pi[ind]))*weights[ind];
Expand Down
78 changes: 78 additions & 0 deletions tests/testthat/test-fit1_binary.R
Original file line number Diff line number Diff line change
Expand Up @@ -314,3 +314,81 @@ test_that("fit1 works for binary traits with weights", {
expect_equivalent(out_fit1_1$SE, sum_glm_1$coef[,2])

})

test_that("fit one for binary traits handles NA case", {

iron <- read_cross2(system.file("extdata", "iron.zip", package="qtl2"))
iron <- iron[,c("2", "X")]
map <- insert_pseudomarkers(iron$gmap, step=1)
probs <- calc_genoprob(iron, map, err=0.002)

phe <- iron$pheno[,1]
phe <- setNames(as.numeric(phe > quantile(phe, 0.95)),
ind_ids(iron))

suppressWarnings(out <- scan1(probs, phe, model="binary"))

mar <- c("D2Mit304", "DXMit186")
p1 <- pull_genoprobpos(probs, mar[1])
p2 <- pull_genoprobpos(probs, mar[2])

suppressWarnings(ll1a <- glm(phe ~ -1 + p1, family=binomial(link=logit))$deviance)
suppressWarnings(ll1b <- glm(phe ~ -1 + p2, family=binomial(link=logit))$deviance)
ll0 <- glm(phe ~ 1, family=binomial(link=logit))$deviance

expect_equal(out[mar[1],1], (ll0-ll1a)/2/log(10))
expect_equal(out[mar[2],1], (ll0-ll1b)/2/log(10), tol=1e-7)

# repeat with weights
set.seed(20180720)
w <- setNames( runif(length(phe), 1, 5), names(phe))
suppressWarnings(outw <- scan1(probs, phe, model="binary", weights=w))

suppressWarnings(ll1a_w <- glm(phe ~ -1 + p1, family=binomial(link=logit), weights=w)$deviance)
suppressWarnings(ll1b_w <- glm(phe ~ -1 + p2, family=binomial(link=logit), weights=w)$deviance)
suppressWarnings(ll0_w <- glm(phe ~ 1, family=binomial(link=logit), weights=w)$deviance)

expect_equal(outw[mar[1],1], (ll0_w-ll1a_w)/2/log(10))
expect_equal(outw[mar[2],1], (ll0_w-ll1b_w)/2/log(10), tol=1e-7)

})

test_that("fit one for binary traits handles NA case with DO data", {

if(isnt_karl()) skip("this test only run locally")

file <- paste0("https://raw.githubusercontent.com/rqtl/",
"qtl2data/master/DOex/DOex.zip")
DOex <- read_cross2(file)
probs <- calc_genoprob(DOex, error_prob=0.002, cores=0)
apr <- genoprob_to_alleleprob(probs)

phe <- setNames((DOex$pheno[,1] > quantile(DOex$pheno[,1], 0.95, na.rm=TRUE))*1, rownames(DOex$pheno))

suppressWarnings(out <- scan1(apr, phe, model="binary"))

mar <- c("backupUNC020117657", "UNC020459944")
p1 <- pull_genoprobpos(apr, mar[1])
p2 <- pull_genoprobpos(apr, mar[2])

suppressWarnings(ll1a <- glm(phe ~ -1 + p1, family=binomial(link=logit))$deviance)
suppressWarnings(ll1b <- glm(phe ~ -1 + p2, family=binomial(link=logit))$deviance)
ll0 <- glm(phe ~ 1, family=binomial(link=logit))$deviance

expect_equal(out[mar[1],1], (ll0-ll1a)/2/log(10))
expect_equal(out[mar[2],1], (ll0-ll1b)/2/log(10))


# repeat with weights
set.seed(20180720)
w <- setNames( runif(length(phe), 1, 5), names(phe))
suppressWarnings(outw <- scan1(apr, phe, model="binary", weights=w))

suppressWarnings(ll1a_w <- glm(phe ~ -1 + p1, family=binomial(link=logit), weights=w)$deviance)
suppressWarnings(ll1b_w <- glm(phe ~ -1 + p2, family=binomial(link=logit), weights=w)$deviance)
suppressWarnings(ll0_w <- glm(phe ~ 1, family=binomial(link=logit), weights=w)$deviance)

expect_equal(outw[mar[1],1], (ll0_w-ll1a_w)/2/log(10))
expect_equal(outw[mar[2],1], (ll0_w-ll1b_w)/2/log(10))

})

0 comments on commit 8dafba1

Please sign in to comment.