Skip to content

Commit

Permalink
use multiple cores in scan1_pg.R routine scan1_pg_clean (#234)
Browse files Browse the repository at this point in the history
* modify scan1_pg_clean to branch by chr, pheno and pos

* modified scan1_pg_clean to better divide positions relative to phenos when using cores
  • Loading branch information
byandell authored Jun 20, 2024
1 parent 2d39f6f commit f5d2fae
Showing 1 changed file with 47 additions and 19 deletions.
66 changes: 47 additions & 19 deletions R/scan1_pg.R
Original file line number Diff line number Diff line change
Expand Up @@ -314,14 +314,53 @@ scan1_pg_clean <-
else no_x <- FALSE
} else loco <- TRUE

batches <- list(chr=rep(seq_len(length(genoprobs)), ncol(pheno)),
phecol=rep(seq_len(ncol(pheno)), each=length(genoprobs)))

# This creates a batch for every chr, pheno and set of positions.
# If cores == 1, use all positions on each chr (original code).
# if cores != 1, then
# for each chr, divide positions into contiguous intervals
# number of intervals across chr and phenos =~ cores
npos_by_chr <- dim(genoprobs)[3,]
# Genoprob names not retained for qtl2fst
names(npos_by_chr) <- names(genoprobs)
if(n_cores(cores) == 1) {
# Add beginning and end pos of each chr to original batches list.
batches <- list(chr=rep(seq_len(length(genoprobs)), ncol(pheno)),
pos1 = rep(1, length(genoprobs) * ncol(pheno)),
pos2 = rep(npos_by_chr, ncol(pheno)),
phecol=rep(seq_len(ncol(pheno)), each=length(genoprobs)))
} else {
# Divide each chr into intervals pos1:pos2 contiguous from 1 to length of chr.
tmpfn <- function(x) {
npos <- ceiling(x * ncol(pheno) * length(npos_by_chr) / n_cores(cores))
pos1 <- seq(1, x, by = npos)
pos2 <- c(pos1[-1] - 1, x)
list(pos1 = pos1, pos2 = pos2)
}
npos <- lapply(npos_by_chr, tmpfn)

# Collapse pos1 and pos2 into vectors.
pos1 <- pos2 <- NULL
for(i in names(npos)) {
pos1 <- c(pos1, npos[[i]]$pos1)
pos2 <- c(pos2, npos[[i]]$pos2)
}
# npos = number of intervals by chr
npos <- sapply(npos, function(x) length(x$pos1))

# Batches list
batches <- list(chr=rep(rep(seq_len(length(genoprobs)),npos), ncol(pheno)),
pos1=rep(pos1, ncol(pheno)),
pos2=rep(pos2, ncol(pheno)),
phecol=rep(seq_len(ncol(pheno)), each=sum(npos)))
}

# function that does the work
by_batch_func <-
function(batch)
{
chr <- batches$chr[batch]
pos1 <- batches$pos1[batch]
pos2 <- batches$pos2[batch]
phecol <- batches$phecol[batch]

if(loco) {
Expand All @@ -342,11 +381,12 @@ scan1_pg_clean <-
# subset the genotype probabilities: drop cols with all 0s, plus the first column
Xcol2drop <- genoprob_Xcol2drop[[chr]]
if(length(Xcol2drop) > 0) {
pr <- genoprobs[[chr]][ind2keep,-Xcol2drop,,drop=FALSE]
pr <- genoprobs[[chr]][ind2keep,-Xcol2drop,pos1:pos2,drop=FALSE]
pr <- pr[,-1,,drop=FALSE]
}
else
pr <- genoprobs[[chr]][ind2keep,-1,,drop=FALSE]
else {
pr <- genoprobs[[chr]][ind2keep,-1,pos1:pos2,drop=FALSE]
}
# weight the probabilities
pr <- weight_array(pr, weights)

Expand Down Expand Up @@ -384,19 +424,7 @@ scan1_pg_clean <-
if(any(result_is_null))
stop("cluster problem: returned ", sum(result_is_null), " NULLs.")

npos_by_chr <- dim(genoprobs)[3,]
totpos <- sum(npos_by_chr)
pos_index <- split(seq_len(totpos), rep(seq_len(length(genoprobs)), npos_by_chr))

# to contain the results
result <- matrix(nrow=totpos, ncol=ncol(pheno))
for(batch in seq_along(batches$chr)) {
chr <- batches$chr[batch]
phecol <- batches$phecol[batch]
result[pos_index[[chr]], phecol] <- lod_list[[batch]]
}

result
matrix(nrow=sum(npos_by_chr), ncol=ncol(pheno), unlist(lod_list))
}


Expand Down

0 comments on commit f5d2fae

Please sign in to comment.