Skip to content

Commit

Permalink
added updated local version of propr/grea module
Browse files Browse the repository at this point in the history
  • Loading branch information
suzannejin committed Oct 10, 2024
1 parent 38a32b6 commit 118b257
Show file tree
Hide file tree
Showing 6 changed files with 56 additions and 25 deletions.
2 changes: 1 addition & 1 deletion assets/tools_samplesheet.csv
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ propd_fdr,propd,--permutation 100,,,,
pcorbshrink,,,propr,--metric pcor.bshrink,,
propr,,,propr,--metric rho,,
cor,,,propr,--metric cor,,
propd_grea,propd,,,,grea,--permutation 10
propd_grea,propd,,,,grea,
1 change: 0 additions & 1 deletion conf/test_experimental.config
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ params {
max_time = '6.h'

// Input data

input = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/mus_musculus/rnaseq_expression/SRP254919.samplesheet.csv'
matrix = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/mus_musculus/rnaseq_expression/SRP254919.salmon.merged.gene_counts.top1000cov.tsv'
contrasts = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/mus_musculus/rnaseq_expression/SRP254919.contrasts.csv'
Expand Down
8 changes: 4 additions & 4 deletions modules/local/propr/grea/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ process PROPR_GREA {
tag "$meta.id"
label 'process_high'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/mulled-v2-401a215d4024df776a98d90a352048199e342a3d:5ba9bbf6cd4f4f98983526673c223d2e7d829b36-0':
'biocontainers/mulled-v2-401a215d4024df776a98d90a352048199e342a3d:5ba9bbf6cd4f4f98983526673c223d2e7d829b36-0' }"
// conda "${moduleDir}/environment.yml"
// container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
// 'https://depot.galaxyproject.org/singularity/mulled-v2-401a215d4024df776a98d90a352048199e342a3d:5ba9bbf6cd4f4f98983526673c223d2e7d829b36-0':
// 'biocontainers/mulled-v2-401a215d4024df776a98d90a352048199e342a3d:5ba9bbf6cd4f4f98983526673c223d2e7d829b36-0' }"

input:
tuple val(meta), path(adj)
Expand Down
54 changes: 43 additions & 11 deletions modules/local/propr/grea/templates/grea.R
Original file line number Diff line number Diff line change
Expand Up @@ -54,27 +54,39 @@ read_delim_flexible <- function(file, header = TRUE, row.names = 1, check.names
#' Loads the .gmt file and converts it into a knowledge database
#'
#' @param filename path of the .gmt file
#' @param genes vector of gene names
#' @return output dataframe. A knowledge database where each row is a graph node (eg. gene)
#' and each column is a concept (eg. GO term, pathway, etc).
#' @param genes vector of gene names. Note that this set should be as complete as possible.
#' So it should not only contain the target genes but also the background genes.
#' @return output a list with: `db` A knowledge database where each row is a graph node (eg. gene)
#' and each column is a concept (eg. GO term, pathway, etc) and `description` A list of descriptions
#' for each concept
load_gmt <- function(filename, nodes) {

# read gmt file
gmt <- readLines(filename)
gmt <- strsplit(gmt, "\t")
gmt <- strsplit(gmt, "\\t")

# initialize database matrix
db <- matrix(0, nrow = length(nodes), ncol = length(gmt))
rownames(db) <- nodes
colnames(db) <- sapply(gmt, function(entry) entry[[1]])

# fill 1 if gene is in concept
# description of the concepts
description <- list()

# for concept in gmt
for (i in 1:length(gmt)) {

# get concept and description
concept <- gmt[[i]][[1]]
description[[concept]] <- gmt[[i]][[2]]

# fill 1 if gene is in concept
nodes_in_concept <- gmt[[i]][-c(1, 2)]
nodes_in_concept <- nodes_in_concept[nodes_in_concept %in% nodes]
db[nodes_in_concept, i] <- 1
}

return(gmt)
return(list(db = db, description = description))
}

################################################
Expand All @@ -92,6 +104,10 @@ opt <- list(
adj = '$adj', # adjacency matrix
gmt = '$gmt', # knowledge database .gmt file

# parameters for gene sets
set_min = 15, # minimum number of genes in a set
set_max = 500, # maximum number of genes in a set

# parameters for permutation test
permutation = 100,

Expand Down Expand Up @@ -173,30 +189,46 @@ if (!is.na(opt\$seed)) {
# load adjacency matrix
# this matrix should have gene x gene dimensions

adj <- read_delim_flexible(
adj <- as.matrix(read_delim_flexible(
opt\$adj,
header = TRUE,
row.names = 1,
check.names = TRUE
)
))
if (nrow(adj) != ncol(adj)) {
stop('Adjacency matrix is not square')
}
if (!all(rownames(adj) == colnames(adj))) {
stop('Adjacency matrix row names are not equal to column names')
}

# load and process knowledge database

db <- load_gmt(
gmt <- load_gmt(
opt\$gmt,
rownames(adj)
)

# filter gene sets
# gene sets with less than set_min or more than set_max genes are removed

idx <- which(colSums(gmt\$db) > opt\$set_min & colSums(gmt\$db) < opt\$set_max)
gmt\$db <- gmt\$db[, idx]
gmt\$description <- gmt\$description[idx]

# run GREA
# Basically, it calculates the odds ratio of the graph being enriched in each concept,
# and the FDR of the odds ratio through permutation tests

odds <- runGraflex(
adj,
db,
gmt\$db,
p=opt\$permutation,
ncores=opt\$ncores
)
odds\$Description <- sapply(odds\$Concept, function(concept)
gmt\$description[[concept]]
)

################################################
################################################
Expand All @@ -208,7 +240,7 @@ write.table(
odds,
file = paste0(opt\$prefix, '.grea.tsv'),
col.names = TRUE,
row.names = TRUE,
row.names = FALSE,
sep = '\\t',
quote = FALSE

Expand Down
8 changes: 4 additions & 4 deletions modules/local/propr/propd/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ process PROPR_PROPD {
tag "$meta.id"
label 'process_medium'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/mulled-v2-401a215d4024df776a98d90a352048199e342a3d:5ba9bbf6cd4f4f98983526673c223d2e7d829b36-0':
'biocontainers/mulled-v2-401a215d4024df776a98d90a352048199e342a3d:5ba9bbf6cd4f4f98983526673c223d2e7d829b36-0' }"
// conda "${moduleDir}/environment.yml"
// container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
// 'https://depot.galaxyproject.org/singularity/mulled-v2-401a215d4024df776a98d90a352048199e342a3d:5ba9bbf6cd4f4f98983526673c223d2e7d829b36-0':
// 'biocontainers/mulled-v2-401a215d4024df776a98d90a352048199e342a3d:5ba9bbf6cd4f4f98983526673c223d2e7d829b36-0' }"

input:
tuple val(meta), path(count)
Expand Down
8 changes: 4 additions & 4 deletions modules/local/propr/propr/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ process PROPR_PROPR {
tag "$meta.id"
label 'process_medium'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/mulled-v2-401a215d4024df776a98d90a352048199e342a3d:5ba9bbf6cd4f4f98983526673c223d2e7d829b36-0':
'biocontainers/mulled-v2-401a215d4024df776a98d90a352048199e342a3d:5ba9bbf6cd4f4f98983526673c223d2e7d829b36-0' }"
// conda "${moduleDir}/environment.yml"
// container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
// 'https://depot.galaxyproject.org/singularity/mulled-v2-401a215d4024df776a98d90a352048199e342a3d:5ba9bbf6cd4f4f98983526673c223d2e7d829b36-0':
// 'biocontainers/mulled-v2-401a215d4024df776a98d90a352048199e342a3d:5ba9bbf6cd4f4f98983526673c223d2e7d829b36-0' }"

input:
tuple val(meta), path(count)
Expand Down

0 comments on commit 118b257

Please sign in to comment.