Skip to content

Commit

Permalink
uses generator in motifs injection
Browse files Browse the repository at this point in the history
  • Loading branch information
KrystynaGrzesiak committed Aug 20, 2024
1 parent a787c7e commit dcc3acd
Show file tree
Hide file tree
Showing 5 changed files with 71 additions and 106 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ Imports:
checkmate (>= 2.0.0),
FCBF,
FSelectorRcpp,
itertools,
seqR,
praznik,
purrr
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ export(get_target_logic)
export(rbinom_vec)
export(validate_motifs)
import(checkmate)
import(itertools)
importFrom(FCBF,fcbf)
importFrom(FSelectorRcpp,information_gain)
importFrom(biogram,test_features)
Expand Down
158 changes: 55 additions & 103 deletions R/motifs.R
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ generate_motif <- function(alphabet, n, d, motifProbs = NULL) {
#'
#' This function injects motifs to a sequence
#'
#' @import itertools
#'
#' @param motifs list of motifs to be injected
#' @param sequence vector of alphabet elements
#'
Expand All @@ -71,117 +73,67 @@ generate_motif <- function(alphabet, n, d, motifProbs = NULL) {
#'
#' @export

add_motifs <- function(motifs, sequence, max_attempts = 20) {
add_motifs <- function(motifs, sequence) {

sequence_len <- length(sequence)

# create list of possible motifs' positions
maximum_motifs_positions <- lapply(motifs, function(x)
seq(sequence_len - length(x) + 1))

positions_to_sample <- maximum_motifs_positions
motifs_positions <- numeric(length(motifs))
attempt <- 1
ith_motif <- 1

while(attempt <= max_attempts & ith_motif <= length(motifs)) {
dummy_ok <- TRUE
position <- sample(positions_to_sample[[ith_motif]], 1)
jth_motif <- 2

while(jth_motif <= length(motifs)) {

remained_positions <-
setdiff(positions_to_sample[[jth_motif]],
(position - length(motifs[[jth_motif]]) + 1):(position + length(motifs[[ith_motif]]) - 1))

if(length(remained_positions) == 0) {

if(attempt == max_attempts)
stop("Given motifs cannot be injected to a sequence!")

positions_to_sample <- maximum_motifs_positions
motifs_positions <- numeric(length(motifs))
ith_motif <<- 1
jth_motif <<- 2

attempt <- attempt + 1
print(paste0("attempt", attempt))
dummy_ok <- FALSE
break()
} else {
positions_to_sample[[jth_motif]] <- remained_positions
sample(seq(sequence_len - length(x) + 1))
)

prod_generator <- do.call(product, maximum_motifs_positions)

max_iter <- prod(lengths(maximum_motifs_positions))
remaining_iterations <- max_iter %% 10000

iterations <- as.vector(na.omit(
c(rep(10000, floor(max_iter/10000)),
ifelse(remaining_iterations == 0, NA, remaining_iterations))
))

for(iter in na.omit(iterations)) {

motifs_grid <- t(sapply(1:iter, function(i){nextElem(prod_generator)}))

for (i in 1:nrow(motifs_grid)) {
list_of_masks <- list()
injected_sequence <- sequence
injected_positions <- logical(length(sequence))

for (j in 1:ncol(motifs_grid)) {
mask <- rep(FALSE, sequence_len)
new_injected_sequence <- injected_sequence
motif <- motifs[[j]]
ids <- 0:(length(motif) - 1)
ids <- ids[motif != "_"] + unlist(motifs_grid[i, j])
mask[ids] <- TRUE
new_injected_sequence[ids] <- motif[motif != "_"]

if (j == 1) {
injected_sequence <- new_injected_sequence
injected_positions <- mask
} else {
if (all(injected_sequence[injected_positions] ==
new_injected_sequence[injected_positions])){
injected_sequence <- new_injected_sequence
injected_positions <- (injected_positions | mask)
} else {
break
}
}

list_of_masks[[j]] <- mask

if (j == ncol(motifs_grid)){
attr(injected_sequence, "motifs") <- motifs
attr(injected_sequence, "masks") <- list_of_masks
return(injected_sequence)
}
}

jth_motif <- jth_motif + 1
}

if(!dummy_ok)
next()

motifs_positions[ith_motif] <- position
ith_motif <- ith_motif + 1
}

list_of_masks <- list()

for(i in 1:length(motifs)) {
mask <- rep(FALSE, sequence_len)
motif <- motifs[[i]]
motif_ids <- motifs_positions[i]:(motifs_positions[i] + length(motif) - 1)
motif_ids <- setdiff(motif_ids, motif_ids[motif == "_"])

mask[motif_ids] <- TRUE
list_of_masks[[i]] <- mask

sequence[motif_ids] <- motif[motif != "_"]
}

attr(sequence, "motifs") <- motifs
attr(sequence, "masks") <- list_of_masks

sequence

# motifs_grid <- expand.grid(maximum_motifs_positions)
# motifs_grid <- motifs_grid[sample(1:nrow(motifs_grid)), , drop = FALSE]
#
# for (i in 1:nrow(motifs_grid)) {
# list_of_masks <- list()
# injected_sequence <- sequence
# injected_positions <- logical(length(sequence))
#
# for (j in 1:ncol(motifs_grid)) {
# mask <- rep(FALSE, sequence_len)
# new_injected_sequence <- injected_sequence
# motif <- motifs[[j]]
# ids <- 0:(length(motif) - 1)
# ids <- ids[motif != "_"] + motifs_grid[i, j]
# mask[ids] <- TRUE
# new_injected_sequence[ids] <- motif[motif != "_"]
#
# if (j == 1) {
# injected_sequence <- new_injected_sequence
# injected_positions <- mask
# } else {
# if (all(injected_sequence[injected_positions] ==
# new_injected_sequence[injected_positions])){
# injected_sequence <- new_injected_sequence
# injected_positions <- (injected_positions | mask)
# } else {
# break
# }
# }
#
# list_of_masks[[j]] <- mask
#
# if (j == ncol(motifs_grid)){
# attr(injected_sequence, "motifs") <- motifs
# attr(injected_sequence, "masks") <- list_of_masks
# return(injected_sequence)
# }
# }
# }
# stop("Given motifs cannot be injected to a sequence!")
stop("Given motifs cannot be injected to a sequence!")
}

#' Validate if given set of motifs can occur in a sequence at the same time
Expand Down
2 changes: 1 addition & 1 deletion man/add_motifs.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 13 additions & 2 deletions renv.lock
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"R": {
"Version": "4.3.3",
"Version": "4.4.1",
"Repositories": [
{
"Name": "BioCsoft",
Expand Down Expand Up @@ -78,7 +78,6 @@
"Package": "BiocVersion",
"Version": "3.18.1",
"Source": "Bioconductor",
"Repository": "Bioconductor 3.18",
"Requirements": [
"R"
],
Expand Down Expand Up @@ -902,6 +901,18 @@
],
"Hash": "8954069286b4b2b0d023d1b288dce978"
},
"itertools": {
"Package": "itertools",
"Version": "0.1-3",
"Source": "Repository",
"Repository": "CRAN",
"Requirements": [
"R",
"iterators",
"parallel"
],
"Hash": "9d0ef8912bc1d66357977f5b8e4dc5ed"
},
"jquerylib": {
"Package": "jquerylib",
"Version": "0.1.4",
Expand Down

0 comments on commit dcc3acd

Please sign in to comment.