diff --git a/api/r/cellxgene.census/R/get_helpers.R b/api/r/cellxgene.census/R/get_helpers.R index a8edb15ae..363734afd 100644 --- a/api/r/cellxgene.census/R/get_helpers.R +++ b/api/r/cellxgene.census/R/get_helpers.R @@ -4,15 +4,17 @@ #' @param organism The organism to query, usually one of `Homo sapiens` or `Mus musculus` #' @param measurement_name The measurement object to query. Defaults to `RNA`. #' -#' @return a [Matrix::sparseMatrix] with dataset join id & feature join id dimensions, -#' filled with 1s indicating presence +#' @return A sparse matrix-like object with dataset join id & feature join id dimensions, +#' filled with 1s indicating presence. The sparse matrix is accessed with zero-based +#' indexes, since the join id's may be zero. However, any vector- or matrix-valued +#' slices taken from the zero-based view will be conventional one-based R objects. #' @export #' #' @examples get_presence_matrix <- function(census, organism, measurement_name = "RNA") { exp <- get_experiment(census, organism) presence <- exp$ms$get(measurement_name)$get("feature_dataset_presence_matrix") - return(presence$read_sparse_matrix()) + return(presence$read_sparse_matrix_zero_based()) } #' Convenience wrapper around `SOMAExperimentAxisQuery`, to build and execute a diff --git a/api/r/cellxgene.census/tests/testthat/test-get_helpers.R b/api/r/cellxgene.census/tests/testthat/test-get_helpers.R index 5f9940700..369afcecc 100644 --- a/api/r/cellxgene.census/tests/testthat/test-get_helpers.R +++ b/api/r/cellxgene.census/tests/testthat/test-get_helpers.R @@ -21,14 +21,16 @@ test_that("get_presence_matrix", { datasets <- as.data.frame(census$get("census_info")$get("datasets")$read()) for (org in c("homo_sapiens", "mus_musculus")) { pm <- get_presence_matrix(census, org) - expect_s4_class(pm, "sparseMatrix") + expect_true(inherits(pm, "matrixZeroBasedView")) + pm1 <- tiledbsoma::as.one.based(pm) + expect_s4_class(pm1, "sparseMatrix") expect_equal(nrow(pm), nrow(datasets)) expect_equal( ncol(pm), nrow(census$get("census_data")$get(org)$ms$get("RNA")$var$read(column_names = "soma_joinid")) ) - expect_equal(min(pm), 0) - expect_equal(max(pm), 1) + expect_equal(min(pm1), 0) + expect_equal(max(pm1), 1) } }) diff --git a/api/r/cellxgene.census/vignettes/census_dataset_presence.Rmd b/api/r/cellxgene.census/vignettes/census_dataset_presence.Rmd index 15fc8b1d5..6b7539e78 100644 --- a/api/r/cellxgene.census/vignettes/census_dataset_presence.Rmd +++ b/api/r/cellxgene.census/vignettes/census_dataset_presence.Rmd @@ -61,7 +61,7 @@ print(var_df) ```{r} var_joinid <- var_df$soma_joinid[var_df$feature_id == "ENSG00000286096"] dataset_joinid <- datasets_df$soma_joinid[datasets_df$dataset_id == "97a17473-e2b1-4f31-a544-44a60773e2dd"] -is_present <- presence_matrix[dataset_joinid + 1, var_joinid + 1] +is_present <- presence_matrix[dataset_joinid, var_joinid] cat(paste("Feature is", if (is_present) "present." else "not present.")) ``` @@ -75,9 +75,9 @@ var_joinid <- var_df$soma_joinid[var_df$feature_id == "ENSG00000286096"] # The presence matrix is indexed by the joinids of the dataset and var dataframes, # so slice out the feature of interest by its joinid. -dataset_joinids <- datasets_df$soma_joinid[presence_matrix[, var_joinid + 1] != 0] +dataset_joinids <- datasets_df$soma_joinid[presence_matrix[, var_joinid] != 0] -print(datasets_df[dataset_joinids + 1, ]) +print(datasets_df[dataset_joinids, ]) ``` ## What features are in a dataset? @@ -92,7 +92,7 @@ This example also demonstrates the ability to do the query on multiple datasets. dataset_joinids <- datasets_df$soma_joinid[datasets_df$collection_id == "17481d16-ee44-49e5-bcf0-28c0780d8c4a"] # Slice the presence matrix by the first dimension, i.e., by dataset -var_joinids <- var_df$soma_joinid[which(Matrix::colSums(presence_matrix[dataset_joinids + 1, ]) > 0)] +var_joinids <- var_df$soma_joinid[which(Matrix::colSums(presence_matrix[dataset_joinids, ]) > 0)] -print(var_df[var_joinids + 1, ]) +print(var_df[var_joinids, ]) ```