Skip to content

Commit

Permalink
retrain across all data sets with model 1 (#412)
Browse files Browse the repository at this point in the history
* fix: reenable model1 train config
* fix: regen config with model1 train
* add model1 training stages to pipeline script
* feat: reenable subset of model1 summary stages
* reenable subset of model1 summary in pipeline.sh
* reenable remainder of model1 summary stages
* update gitignores in reports folders
  • Loading branch information
cameronraysmith authored Sep 1, 2023
1 parent fcfeea5 commit a7d0003
Show file tree
Hide file tree
Showing 46 changed files with 3,645 additions and 53 deletions.
63 changes: 59 additions & 4 deletions .github/pipeline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,31 @@ function run_parallel_pipeline() {
dvc repro preprocess

# manually execute training stages to distribute over four GPUs
$DVC_COMMAND train@pancreas_model1 &
sleep 7
$DVC_COMMAND train@pbmc68k_model1 &
sleep 7
$DVC_COMMAND train@pons_model1 &
sleep 7
$DVC_COMMAND train@larry_model1 &
wait

$DVC_COMMAND train@larry_tips_model1 &
sleep 7
$DVC_COMMAND train@larry_mono_model1 &
sleep 7
$DVC_COMMAND train@larry_neu_model1 &
sleep 7
$DVC_COMMAND train@larry_multilineage_model1 &
wait

$DVC_COMMAND train@bonemarrow_model1 &
sleep 7
$DVC_COMMAND train@pbmc10k_model1 &
sleep 7
$DVC_COMMAND train@pbmc5k_model1 &
wait

$DVC_COMMAND train@pancreas_model2 &
sleep 7
$DVC_COMMAND train@pbmc68k_model2 &
Expand Down Expand Up @@ -90,7 +115,34 @@ function run_parallel_pipeline() {
# xargs -t -n 1 -P 6 bash -c 'sleep $((RANDOM % 15 + 5)); '"$DVC_COMMAND_SUMMARIZE"' "$@"' --
# wait

# manually execute training stages to distribute over four GPUs
# manually execute summarize stages to distribute over four GPUs
$DVC_COMMAND summarize@pancreas_model1 &
sleep 7
$DVC_COMMAND summarize@pbmc68k_model1 &
sleep 7
$DVC_COMMAND summarize@pons_model1 &
sleep 7
# $DVC_COMMAND summarize@larry_model1 &
wait

$DVC_COMMAND summarize@larry_tips_model1 &
sleep 7
$DVC_COMMAND summarize@larry_mono_model1 &
sleep 7
$DVC_COMMAND summarize@larry_neu_model1 &
sleep 7
$DVC_COMMAND summarize@larry_multilineage_model1 &
wait

$DVC_COMMAND summarize@bonemarrow_model1 &
sleep 7
$DVC_COMMAND summarize@pbmc10k_model1 &
sleep 7
$DVC_COMMAND summarize@pbmc5k_model1 &

wait


$DVC_COMMAND summarize@pancreas_model2 &
sleep 7
$DVC_COMMAND summarize@pbmc68k_model2 &
Expand Down Expand Up @@ -132,7 +184,7 @@ cd reproducibility/figures || exit

dvc pull
run_parallel_pipeline
dvc repro
# dvc repro
dvc push

npm update -g @dvcorg/cml
Expand Down Expand Up @@ -170,13 +222,16 @@ data_sets=(
"pbmc10k"
"pbmc5k"
"bonemarrow"
"larry"
# "larry"
"larry_mono"
"larry_neu"
"larry_multilineage"
"larry_tips"
)
models=("model2")
models=(
"model1"
"model2"
)

for data_set in "${data_sets[@]}"; do
for model in "${models[@]}"; do
Expand Down
110 changes: 106 additions & 4 deletions pyrovelocity/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -613,6 +613,7 @@ def create_reports_config(model_name: str, model_number: int):
"pancreas",
1,
"umap",
gpu_id=0,
guide_type="auto_t0_constraint",
max_epochs=2000,
),
Expand All @@ -625,6 +626,15 @@ def create_reports_config(model_name: str, model_number: int):
offset=True,
max_epochs=2000,
),
bonemarrow_model1=create_model_config(
"scvelo",
"bonemarrow",
1,
"umap",
gpu_id=0,
guide_type="auto_t0_constraint",
max_epochs=2000,
),
bonemarrow_model2=create_model_config(
"scvelo",
"bonemarrow",
Expand All @@ -639,6 +649,7 @@ def create_reports_config(model_name: str, model_number: int):
"pbmc68k",
1,
"tsne",
gpu_id=1,
guide_type="auto_t0_constraint",
cell_state="celltype",
max_epochs=2000,
Expand All @@ -658,6 +669,7 @@ def create_reports_config(model_name: str, model_number: int):
"pons",
1,
"umap",
gpu_id=2,
guide_type="auto_t0_constraint",
cell_state="celltype",
max_epochs=2000,
Expand All @@ -672,6 +684,18 @@ def create_reports_config(model_name: str, model_number: int):
offset=True,
max_epochs=2000,
),
larry_model1=create_model_config(
"pyrovelocity",
"larry",
1,
"emb",
gpu_id=3,
svi_train=True,
batch_size=4000,
cell_state="state_info",
guide_type="auto_t0_constraint",
max_epochs=1000,
),
larry_model2=create_model_config(
"pyrovelocity",
"larry",
Expand All @@ -684,6 +708,18 @@ def create_reports_config(model_name: str, model_number: int):
offset=True,
max_epochs=1000,
),
larry_tips_model1=create_model_config(
"pyrovelocity",
"larry_tips",
1,
"umap",
gpu_id=0,
svi_train=True,
batch_size=4000,
cell_state="state_info",
guide_type="auto_t0_constraint",
max_epochs=1000,
),
larry_tips_model2=create_model_config(
"pyrovelocity",
"larry_tips",
Expand All @@ -696,6 +732,18 @@ def create_reports_config(model_name: str, model_number: int):
offset=True,
max_epochs=1000,
),
larry_mono_model1=create_model_config(
"pyrovelocity",
"larry_mono",
1,
"emb",
gpu_id=1,
svi_train=True,
batch_size=4000,
cell_state="state_info",
guide_type="auto_t0_constraint",
max_epochs=1000,
),
larry_mono_model2=create_model_config(
"pyrovelocity",
"larry_mono",
Expand All @@ -708,6 +756,18 @@ def create_reports_config(model_name: str, model_number: int):
offset=True,
max_epochs=1000,
),
larry_neu_model1=create_model_config(
"pyrovelocity",
"larry_neu",
1,
"emb",
gpu_id=2,
svi_train=True,
batch_size=4000,
cell_state="state_info",
guide_type="auto_t0_constraint",
max_epochs=1000,
),
larry_neu_model2=create_model_config(
"pyrovelocity",
"larry_neu",
Expand All @@ -720,6 +780,18 @@ def create_reports_config(model_name: str, model_number: int):
offset=True,
max_epochs=1000,
),
larry_multilineage_model1=create_model_config(
"pyrovelocity",
"larry_multilineage",
1,
"emb",
gpu_id=3,
svi_train=True,
batch_size=4000,
cell_state="state_info",
guide_type="auto_t0_constraint",
max_epochs=1000,
),
larry_multilineage_model2=create_model_config(
"pyrovelocity",
"larry_multilineage",
Expand All @@ -732,6 +804,16 @@ def create_reports_config(model_name: str, model_number: int):
offset=True,
max_epochs=1000,
),
pbmc10k_model1=create_model_config(
"pyrovelocity",
"pbmc10k",
1,
"umap",
gpu_id=1,
cell_state="celltype",
guide_type="auto_t0_constraint",
max_epochs=2000,
),
pbmc10k_model2=create_model_config(
"pyrovelocity",
"pbmc10k",
Expand All @@ -742,6 +824,16 @@ def create_reports_config(model_name: str, model_number: int):
offset=True,
max_epochs=2000,
),
pbmc5k_model1=create_model_config(
"pyrovelocity",
"pbmc5k",
1,
"umap",
gpu_id=2,
cell_state="celltype",
guide_type="auto_t0_constraint",
max_epochs=2000,
),
pbmc5k_model2=create_model_config(
"pyrovelocity",
"pbmc5k",
Expand Down Expand Up @@ -919,28 +1011,38 @@ def create_reports_config(model_name: str, model_number: int):
model_training={
k: model_training[k]
for k in model_training
if fnmatch.fnmatch(k, "*_model2") or k == "pancreas_model1"
# if fnmatch.fnmatch(k, "*_model2") or k == "pancreas_model1"
},
reports=dict(
model_summary=dict(
# simulate_model1=create_reports_config("medium", 1),
# simulate_model2=create_reports_config("medium", 2),
pancreas_model1=create_reports_config("pancreas", 1),
pancreas_model2=create_reports_config("pancreas", 2),
bonemarrow_model1=create_reports_config("bonemarrow", 1),
bonemarrow_model2=create_reports_config("bonemarrow", 2),
# pbmc68k_model1=create_reports_config("pbmc68k", 1),
pbmc68k_model1=create_reports_config("pbmc68k", 1),
pbmc68k_model2=create_reports_config("pbmc68k", 2),
# pons_model1=create_reports_config("pons", 1),
pons_model1=create_reports_config("pons", 1),
pons_model2=create_reports_config("pons", 2),
pbmc10k_model1=create_reports_config("pbmc10k", 1),
pbmc10k_model2=create_reports_config("pbmc10k", 2),
pbmc5k_model1=create_reports_config("pbmc5k", 1),
pbmc5k_model2=create_reports_config("pbmc5k", 2),
larry_tips_model1=create_reports_config("larry_tips", 1),
larry_tips_model2=create_reports_config("larry_tips", 2),
larry_mono_model1=create_reports_config("larry_mono", 1),
larry_mono_model2=create_reports_config("larry_mono", 2),
larry_neu_model1=create_reports_config("larry_neu", 1),
larry_neu_model2=create_reports_config("larry_neu", 2),
larry_multilineage_model1=create_reports_config(
"larry_multilineage", 1
),
larry_multilineage_model2=create_reports_config(
"larry_multilineage", 2
),
larry_model2=create_reports_config("larry", 2),
# larry_model1=create_reports_config("larry", 1),
# larry_model2=create_reports_config("larry", 2),
),
figure2=dict(
tag="fig2",
Expand Down
Loading

0 comments on commit a7d0003

Please sign in to comment.