Skip to content

Commit

Permalink
[builder] Add collection_doi_label to the datasets dataframe (#1200)
Browse files Browse the repository at this point in the history
  • Loading branch information
ebezzi authored Jun 27, 2024
1 parent fad6746 commit 1298aa9
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ class Dataset:
collection_id: str = "" # CELLxGENE collection id
collection_name: str = "" # CELLxGENE collection name
collection_doi: str = "" # CELLxGENE collection doi
collection_doi_label: str = "" # CELLxGENE collection doi label
asset_h5ad_filesize: int = -1
cell_count: int = -1
mean_genes_per_cell: float = -1.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
("collection_id", pa.large_string()),
("collection_name", pa.large_string()),
("collection_doi", pa.large_string()),
("collection_doi_label", pa.large_string()),
("dataset_id", pa.large_string()),
("dataset_version_id", pa.large_string()),
("dataset_title", pa.large_string()),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,14 +85,15 @@ def load_manifest_from_CxG() -> list[Dataset]:
d = Dataset(
dataset_id=dataset_id,
dataset_asset_h5ad_uri=asset_h5ad_uri,
dataset_title=null_to_empty_str(dataset["title"]),
dataset_title=null_to_empty_str(dataset.get("title")),
citation=dataset["citation"],
collection_id=dataset["collection_id"],
collection_name=null_to_empty_str(dataset["collection_name"]),
collection_doi=null_to_empty_str(dataset["collection_doi"]),
collection_name=null_to_empty_str(dataset.get("collection_name")),
collection_doi=null_to_empty_str(dataset.get("collection_doi")),
collection_doi_label=null_to_empty_str(dataset.get("collection_doi_label")),
asset_h5ad_filesize=asset_h5ad_filesize,
schema_version=schema_version,
dataset_version_id=null_to_empty_str(dataset["dataset_version_id"]),
dataset_version_id=null_to_empty_str(dataset.get("dataset_version_id")),
cell_count=dataset["cell_count"],
mean_genes_per_cell=dataset["mean_genes_per_cell"],
)
Expand Down
4 changes: 4 additions & 0 deletions tools/cellxgene_census_builder/tests/test_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ def test_load_manifest_from_cxg(empty_blocklist: str) -> None:
"collection_id": "collection_1",
"collection_name": "1",
"collection_doi": None,
"collection_doi_label": "Publication 1",
"citation": "citation",
"title": "dataset #1",
"schema_version": "5.0.0",
Expand All @@ -86,6 +87,7 @@ def test_load_manifest_from_cxg(empty_blocklist: str) -> None:
"collection_id": "collection_1",
"collection_name": "1",
"collection_doi": None,
"collection_doi_label": "Publication 2",
"citation": "citation",
"title": "dataset #2",
"schema_version": "5.0.0",
Expand Down Expand Up @@ -117,6 +119,7 @@ def test_load_manifest_from_cxg_errors_on_datasets_with_old_schema(
"collection_id": "collection_1",
"collection_name": "1",
"collection_doi": None,
"collection_doi_label": "Publication 1",
"citation": "citation",
"title": "dataset #1",
"schema_version": "5.0.0",
Expand All @@ -130,6 +133,7 @@ def test_load_manifest_from_cxg_errors_on_datasets_with_old_schema(
"collection_id": "collection_1",
"collection_name": "1",
"collection_doi": None,
"collection_doi_label": "Publication 2",
"citation": "citation",
"title": "dataset #2",
"schema_version": "2.0.0", # Old schema version
Expand Down

0 comments on commit 1298aa9

Please sign in to comment.