chanzuckerberg · ebezzi · May 12, 2023 · May 11, 2023 · May 11, 2023 · May 12, 2023
diff --git a/api/r/cellxgene.census/R/get_helpers.R b/api/r/cellxgene.census/R/get_helpers.R
@@ -4,15 +4,17 @@
 #' @param organism The organism to query, usually one of `Homo sapiens` or `Mus musculus`
 #' @param measurement_name The measurement object to query. Defaults to `RNA`.
 #'
-#' @return a [Matrix::sparseMatrix] with dataset join id & feature join id dimensions,
-#'         filled with 1s indicating presence
+#' @return A sparse matrix-like object with dataset join id & feature join id dimensions,
+#'         filled with 1s indicating presence. The sparse matrix is accessed with zero-based
+#'         indexes, since the join id's may be zero. However, any vector- or matrix-valued
+#'         slices taken from the zero-based view will be conventional one-based R objects.
 #' @export
 #'
 #' @examples
 get_presence_matrix <- function(census, organism, measurement_name = "RNA") {
   exp <- get_experiment(census, organism)
   presence <- exp$ms$get(measurement_name)$get("feature_dataset_presence_matrix")
-  return(presence$read_sparse_matrix())
+  return(presence$read_sparse_matrix_zero_based())
 }
 
 #' Convenience wrapper around `SOMAExperimentAxisQuery`, to build and execute a

diff --git a/api/r/cellxgene.census/tests/testthat/test-get_helpers.R b/api/r/cellxgene.census/tests/testthat/test-get_helpers.R
@@ -21,14 +21,16 @@ test_that("get_presence_matrix", {
   datasets <- as.data.frame(census$get("census_info")$get("datasets")$read())
   for (org in c("homo_sapiens", "mus_musculus")) {
     pm <- get_presence_matrix(census, org)
-    expect_s4_class(pm, "sparseMatrix")
+    expect_true(inherits(pm, "matrixZeroBasedView"))
+    pm1 <- tiledbsoma::as.one.based(pm)
+    expect_s4_class(pm1, "sparseMatrix")
     expect_equal(nrow(pm), nrow(datasets))
     expect_equal(
       ncol(pm),
       nrow(census$get("census_data")$get(org)$ms$get("RNA")$var$read(column_names = "soma_joinid"))
     )
-    expect_equal(min(pm), 0)
-    expect_equal(max(pm), 1)
+    expect_equal(min(pm1), 0)
+    expect_equal(max(pm1), 1)
   }
 })
 

diff --git a/api/r/cellxgene.census/vignettes/census_dataset_presence.Rmd b/api/r/cellxgene.census/vignettes/census_dataset_presence.Rmd
@@ -61,7 +61,7 @@ print(var_df)
 ```{r}
 var_joinid <- var_df$soma_joinid[var_df$feature_id == "ENSG00000286096"]
 dataset_joinid <- datasets_df$soma_joinid[datasets_df$dataset_id == "97a17473-e2b1-4f31-a544-44a60773e2dd"]
-is_present <- presence_matrix[dataset_joinid + 1, var_joinid + 1]
+is_present <- presence_matrix[dataset_joinid, var_joinid]
 cat(paste("Feature is", if (is_present) "present." else "not present."))
 ```
 
@@ -75,9 +75,9 @@ var_joinid <- var_df$soma_joinid[var_df$feature_id == "ENSG00000286096"]
 
 # The presence matrix is indexed by the joinids of the dataset and var dataframes,
 # so slice out the feature of interest by its joinid.
-dataset_joinids <- datasets_df$soma_joinid[presence_matrix[, var_joinid + 1] != 0]
+dataset_joinids <- datasets_df$soma_joinid[presence_matrix[, var_joinid] != 0]
 
-print(datasets_df[dataset_joinids + 1, ])
+print(datasets_df[dataset_joinids, ])
 ```
 
 ## What features are in a dataset?
@@ -92,7 +92,7 @@ This example also demonstrates the ability to do the query on multiple datasets.
 dataset_joinids <- datasets_df$soma_joinid[datasets_df$collection_id == "17481d16-ee44-49e5-bcf0-28c0780d8c4a"]
 
 # Slice the presence matrix by the first dimension, i.e., by dataset
-var_joinids <- var_df$soma_joinid[which(Matrix::colSums(presence_matrix[dataset_joinids + 1, ]) > 0)]
+var_joinids <- var_df$soma_joinid[which(Matrix::colSums(presence_matrix[dataset_joinids, ]) > 0)]
 
-print(var_df[var_joinids + 1, ])
+print(var_df[var_joinids, ])
 ```