chanzuckerberg · mlin · Feb 28, 2023 · Feb 19, 2023 · Feb 19, 2023 · Feb 19, 2023
diff --git a/.github/workflows/Rcheck.yml b/.github/workflows/Rcheck.yml
@@ -0,0 +1,28 @@
+name: cell_census R package checks
+
+on:
+  pull_request:
+    paths-ignore:
+      - "apis/python/**"
+  push:
+    branches: [main]
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - uses: r-lib/actions/setup-r@v2
+        with:
+          extra-repositories: https://tiledb-inc.r-universe.dev
+      - uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          working-directory: ./api/r/CellCensus
+          extra-packages: any::rcmdcheck, any::styler
+          needs: check
+      - name: styler
+        run: Rscript -e 'library("styler"); style_pkg("api/r/CellCensus", dry="fail")'
+      - uses: r-lib/actions/check-r-package@v2
+        with:
+          working-directory: ./api/r/CellCensus
+          args: 'c("--no-manual", "--as-cran")'
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -2,6 +2,8 @@ name: cell_census build
 
 on:
   pull_request:
+    paths-ignore:
+      - "apis/r/**"
   push:
     branches: [main]
   workflow_dispatch:

diff --git a/.github/workflows/formatting.yml b/.github/workflows/formatting.yml
@@ -2,6 +2,8 @@ name: Python Linting
 
 on:
   pull_request:
+    paths-ignore:
+      - "apis/r/**"
   push:
     branches: [main]
 

diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml
@@ -2,6 +2,8 @@ name: cell_census Python package unit tests
 
 on:
   pull_request:
+    paths-ignore:
+      - "apis/r/**"
   push:
     branches: [main]
 

diff --git a/.gitignore b/.gitignore
@@ -134,3 +134,4 @@ temp
 
 # ruff
 .ruff_cache
+.Rproj.user
diff --git a/api/r/CellCensus/.Rbuildignore b/api/r/CellCensus/.Rbuildignore
@@ -0,0 +1,4 @@
+^CellCensus\.Rproj$
+^\.Rproj\.user$
+^LICENSE\.md$
+^README\.Rmd$
diff --git a/api/r/CellCensus/.gitignore b/api/r/CellCensus/.gitignore
@@ -0,0 +1,2 @@
+.Rproj.user
+.Rhistory
diff --git a/api/r/CellCensus/CellCensus.Rproj b/api/r/CellCensus/CellCensus.Rproj
@@ -0,0 +1,22 @@
+Version: 1.0
+
+RestoreWorkspace: No
+SaveWorkspace: No
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: UTF-8
+
+RnwWeave: Sweave
+LaTeX: pdfLaTeX
+
+AutoAppendNewline: Yes
+StripTrailingWhitespace: Yes
+LineEndingConversion: Posix
+
+BuildType: Package
+PackageUseDevtools: Yes
+PackageInstallArgs: --no-multiarch --with-keep.source
+PackageRoxygenize: rd,collate,namespace
diff --git a/api/r/CellCensus/DESCRIPTION b/api/r/CellCensus/DESCRIPTION
@@ -0,0 +1,23 @@
+Package: CellCensus
+Title: CZI Science Cell Census API
+Version: 0.0.0.9000
+Authors@R: 
+    person("Chan Zuckerberg Initiative", email = "[email protected]",
+           role = c("aut", "cre", "cph", "fnd"))
+Description: API to facilitate use of the CZI Science Cell Census. The Cell
+  Census is a versioned container for the single-cell data hosted at CELLxGENE
+  Discover.
+License: MIT + file LICENSE
+URL: https://github.com/chanzuckerberg/cell-census
+BugReports: https://github.com/chanzuckerberg/cell-census/issues
+Encoding: UTF-8
+Roxygen: list(markdown = TRUE)
+RoxygenNote: 7.2.3
+Additional_repositories: https://tiledb-inc.r-universe.dev
+Imports:
+  jsonlite,
+  tiledbsoma,
+  tiledb
+Suggests: 
+  testthat (>= 3.0.0)
+Config/testthat/edition: 3
diff --git a/api/r/CellCensus/LICENSE b/api/r/CellCensus/LICENSE
@@ -0,0 +1,2 @@
+YEAR: 2023
+COPYRIGHT HOLDER: Chan Zuckerberg Initiative
diff --git a/api/r/CellCensus/LICENSE.md b/api/r/CellCensus/LICENSE.md
@@ -0,0 +1,21 @@
+# MIT License
+
+Copyright (c) 2023 Chan Zuckerberg Initiative
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/api/r/CellCensus/NAMESPACE b/api/r/CellCensus/NAMESPACE
@@ -0,0 +1,8 @@
+# Generated by roxygen2: do not edit by hand
+
+export(get_census_version_description)
+export(get_census_version_directory)
+export(open_soma)
+importFrom(jsonlite,fromJSON)
+importFrom(tiledb,tiledb_ctx)
+importFrom(tiledbsoma,SOMACollection)
diff --git a/api/r/CellCensus/R/open.R b/api/r/CellCensus/R/open.R
@@ -0,0 +1,31 @@
+DEFAULT_TILEDB_CONFIGURATION <- c(
+  "py.init_buffer_bytes" = paste(1 * 1024**3),
+  "soma.init_buffer_bytes" = paste(1 * 1024**3)
+)
+
+#' Open the Cell Census
+#'
+#' @param census_version The version of the Census, e.g., "latest"
+#' @param uri The URI containing the Census SOMA objects. If specified, takes
+#'            precedence over `census_version`.
+#'
+#' @return Top-level `tiledbsoma::SOMACollection` object
+#' @importFrom tiledbsoma SOMACollection
+#' @importFrom tiledb tiledb_ctx
+#' @export
+#'
+#' @examples
+open_soma <- function(census_version = "latest", uri = "") {
+  cfg <- DEFAULT_TILEDB_CONFIGURATION
+
+  if (uri == "") {
+    description <- get_census_version_description(census_version)
+    uri <- description$soma.uri
+    if ("soma.s3_region" %in% names(description) &&
+      description$soma.s3_region != "") {
+      cfg <- c(cfg, c("vfs.s3.region" = description$soma.s3_region))
+    }
+  }
+
+  return(tiledbsoma::SOMACollection$new(uri, ctx = tiledb::tiledb_ctx(cfg)))
+}
diff --git a/api/r/CellCensus/R/release_directory.R b/api/r/CellCensus/R/release_directory.R
@@ -0,0 +1,59 @@
+CELL_CENSUS_RELEASE_DIRECTORY_URL <- "https://s3.us-west-2.amazonaws.com/cellxgene-data-public/cell-census/release.json"
+
+
+#' Get release description for given census version
+#'
+#' @param census_version The census version name.
+#'
+#' @return List with the release location and metadata
+#' @export
+#'
+#' @examples
+get_census_version_description <- function(census_version) {
+  census_directory <- get_census_version_directory()
+  description <- census_directory[census_version, ]
+  if (nrow(description) == 0) {
+    stop(paste("unknown Cell Census version:", census_version))
+  }
+  ans <- as.list(description)
+  ans$census_version <- census_version
+  return(ans)
+}
+
+#' Get the directory of cell census releases currently available
+#'
+#' @return Data frame of available cell census releases, including location and
+#'   metadata.
+#' @importFrom jsonlite fromJSON
+#' @export
+#'
+#' @examples
+get_census_version_directory <- function() {
+  raw <- jsonlite::fromJSON(CELL_CENSUS_RELEASE_DIRECTORY_URL)
+
+  # Resolve all aliases for easier use
+  for (field in names(raw)) {
+    points_at <- raw[[field]]
+    while (is.character(points_at)) {
+      points_at <- raw[[points_at]]
+    }
+    raw[[field]] <- points_at
+  }
+
+  # Replace NULLs with empty string to facilitate data frame conversion
+  raw <- simple_rapply(raw, function(x) ifelse(is.null(x), "", x))
+
+  # Convert nested list to data frame
+  df <- do.call(rbind, lapply(raw, as.data.frame))
+  rownames(df) <- names(raw)
+  return(df)
+}
+
+# https://stackoverflow.com/a/38950304
+simple_rapply <- function(x, fn) {
+  if (is.list(x)) {
+    lapply(x, simple_rapply, fn)
+  } else {
+    fn(x)
+  }
+}
diff --git a/api/r/CellCensus/README.md b/api/r/CellCensus/README.md
@@ -0,0 +1,38 @@
+
+# CELLxGENE Cell Census
+
+<!-- badges: start -->
+<!-- badges: end -->
+
+The `CellCensus` package provides an API to facilitate use of the CZI Science Cell Census. The Cell Census is a versioned container for the single-cell data hosted at [CELLxGENE Discover](https://cellxgene.cziscience.com/).
+
+**Status**: Pre-release, under rapid development. Expect API changes.
+
+For more information, see the [cell_census repo](https://github.com/chanzuckerberg/cell-census/).
+
+## Installation
+
+You can install the development version of CellCensus from [GitHub](https://github.com/) with:
+
+``` r
+# install.packages("devtools")
+devtools::install_github("chanzuckerberg/cell-census/api/r/CellCensus")
+print(CellCensus::open_soma())
+```
+
+(minimal apt dependencies: r-base cmake git)
+
+## Example
+
+This is a basic example which shows you how to solve a common problem:
+
+``` r
+library(CellCensus)
+## basic example code
+```
+
+## For More Help
+
+For more help, please file a issue on the repo, or contact us at <[email protected]>
+
+If you believe you have found a security issue, we would appreciate notification. Please send email to <[email protected]>.
diff --git a/api/r/CellCensus/man/get_census_version_description.Rd b/api/r/CellCensus/man/get_census_version_description.Rd
diff --git a/api/r/CellCensus/man/get_census_version_directory.Rd b/api/r/CellCensus/man/get_census_version_directory.Rd
diff --git a/api/r/CellCensus/man/open_soma.Rd b/api/r/CellCensus/man/open_soma.Rd
diff --git a/api/r/CellCensus/tests/testthat.R b/api/r/CellCensus/tests/testthat.R
@@ -0,0 +1,12 @@
+# This file is part of the standard setup for testthat.
+# It is recommended that you do not modify it.
+#
+# Where should you do additional test configuration?
+# Learn more about the roles of various files in:
+# * https://r-pkgs.org/tests.html
+# * https://testthat.r-lib.org/reference/test_package.html#special-files
+
+library(testthat)
+library(CellCensus)
+
+test_check("CellCensus")
diff --git a/api/r/CellCensus/tests/testthat/test-open.R b/api/r/CellCensus/tests/testthat/test-open.R
@@ -0,0 +1,12 @@
+test_that("open_soma", {
+  coll <- open_soma("2023-02-13")
+  expect_equal(coll$uri, "s3://cellxgene-data-public/cell-census/2023-02-13/soma/")
+  expect_true(coll$exists())
+  expect_true(coll$get("census_data")$get("homo_sapiens")$exists())
+})
+
+test_that("open_soma latest/default", {
+  coll_default <- open_soma()
+  coll_latest <- open_soma("latest")
+  expect_equal(coll_default$uri, coll_latest$uri)
+})
diff --git a/api/r/CellCensus/tests/testthat/test-release_directory.R b/api/r/CellCensus/tests/testthat/test-release_directory.R
@@ -0,0 +1,19 @@
+test_that("get_census_version_description", {
+  desc <- get_census_version_description("2023-02-13")
+  expect_equal(desc$release_build, "2023-02-13")
+  expect_equal(desc$soma.uri, "s3://cellxgene-data-public/cell-census/2023-02-13/soma/")
+
+  # alias resolution
+  desc <- get_census_version_description("latest")
+  expect_true(is.list(desc))
+  expect_true(is.character(desc$release_build))
+  expect_true(is.character(desc$soma.uri))
+})
+
+test_that("get_census_version_directory", {
+  df <- get_census_version_directory()
+  expect_true(is.data.frame(df))
+  desc <- as.list(df["2023-02-13", ])
+  expect_equal(desc$release_build, "2023-02-13")
+  expect_equal(desc$soma.uri, "s3://cellxgene-data-public/cell-census/2023-02-13/soma/")
+})