Skip to content

Commit

Permalink
does this work
Browse files Browse the repository at this point in the history
  • Loading branch information
amva13 committed Oct 23, 2024
1 parent 58b1d52 commit 9e6c68e
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 5 deletions.
5 changes: 3 additions & 2 deletions tdc/model_server/tokenizers/geneformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ def tokenize_cell_vectors(self,
cell_vector_adata,
target_sum=10_000,
chunk_size=512,
ensembl_id="ensembl_id"):
ensembl_id="ensembl_id",
ncounts="ncounts"):
"""
Tokenizing single-cell gene expression vectors formatted as anndata types.
Expand Down Expand Up @@ -96,7 +97,7 @@ def tokenize_cell_vectors(self,
for i in range(0, len(filter_pass_loc), chunk_size):
idx = filter_pass_loc[i:i + chunk_size]

n_counts = adata[idx].obs['ncounts'].values[:, None]
n_counts = adata[idx].obs[ncounts].values[:, None]
X_view = adata[idx, coding_miRNA_loc].X
X_norm = (X_view / n_counts * target_sum / norm_factor_vector)
X_norm = sp.csr_matrix(X_norm)
Expand Down
20 changes: 17 additions & 3 deletions tdc/test/test_model_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,12 +89,26 @@ def testGeneformerTokenizer(self):
column_names = {"obs": ["assay", "cell_type", "tissue", "tissue_general", "suspension_type", "disease"]},
)
# adata.obs["ncounts"] = [2] * len(adata.obs)
raise Exception("obs", adata.obs.columns, "var", adata.var.columns)
adata.obs["ncounts"] = [2] * len(adata.obs)
# raise Exception("obs", adata.obs.columns, "var", adata.var.columns)
"""
Exception: ('obs', Index(['soma_joinid', 'dataset_id', 'assay', 'assay_ontology_term_id',
'cell_type', 'cell_type_ontology_term_id', 'development_stage',
'development_stage_ontology_term_id', 'disease',
'disease_ontology_term_id', 'donor_id', 'is_primary_data',
'observation_joinid', 'self_reported_ethnicity',
'self_reported_ethnicity_ontology_term_id', 'sex',
'sex_ontology_term_id', 'suspension_type', 'tissue',
'tissue_ontology_term_id', 'tissue_type', 'tissue_general',
'tissue_general_ontology_term_id', 'raw_sum', 'nnz', 'raw_mean_nnz',
'raw_variance_nnz', 'n_measured_vars'],
dtype='object'), 'var', Index(['soma_joinid', 'feature_id', 'feature_name', 'feature_length', 'nnz',
'n_measured_obs'],
dtype='object'))
"""
print("initializing tokenizer")
tokenizer = GeneformerTokenizer()
print("testing tokenizer")
x = tokenizer.tokenize_cell_vectors(adata)
x = tokenizer.tokenize_cell_vectors(adata, ensembl_id="feature_id", ncounts="n_measured_vars")
assert x[0]

# test Geneformer can serve the request
Expand Down

0 comments on commit 9e6c68e

Please sign in to comment.