Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

example-dvc-experiments: Added modifications for DVCLive #95

Closed
wants to merge 18 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions example-dvc-experiments/code/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
tensorflow>=2.5,<2.6
ruamel.yaml>=0.17,<0.18
imageio>=2.9,<3
dvclive<0.5
82 changes: 31 additions & 51 deletions example-dvc-experiments/code/src/train.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,38 @@
import os

# Set tensorflow logging to minimum
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # or any {'0', '1', '2'}
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" # or any {'0', '1', '2'}
import tensorflow as tf
import numpy as np
from util import load_params, read_labeled_images
import json
from dvclive.keras import DvcLiveCallback


INPUT_DIR = "data/images"
RESUME_PREVIOUS_MODEL = False
OUTPUT_DIR = "models"

METRICS_FILE = "metrics.json"
SEED = 20210715

BATCH_SIZE = 128


def get_model(dense_units=128,
conv_kernel=(3, 3),
conv_units=32,
dropout=0.5,
activation="relu"):
model = tf.keras.models.Sequential([
tf.keras.layers.Reshape(input_shape=(28, 28),
target_shape=(28, 28, 1)),
tf.keras.layers.Conv2D(conv_units,
kernel_size=conv_kernel,
activation=activation),
tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
tf.keras.layers.Dropout(dropout),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(dense_units, activation=activation),
tf.keras.layers.Dense(10, activation="softmax")])
def get_model(
dense_units=128, conv_kernel=(3, 3), conv_units=32, dropout=0.5, activation="relu"
):
model = tf.keras.models.Sequential(
[
tf.keras.layers.Reshape(input_shape=(28, 28), target_shape=(28, 28, 1)),
tf.keras.layers.Conv2D(
conv_units, kernel_size=conv_kernel, activation=activation
),
tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
tf.keras.layers.Dropout(dropout),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(dense_units, activation=activation),
tf.keras.layers.Dense(10, activation="softmax"),
]
)

loss = tf.keras.losses.CategoricalCrossentropy()
metrics = [tf.keras.metrics.CategoricalAccuracy(name="acc")]
Expand All @@ -49,31 +50,16 @@ def normalize(images_array):
return images_array / 255


def history_to_csv(history):
keys = list(history.history.keys())
csv_string = ", ".join(["epoch"] + keys) + "\n"
list_len = len(history.history[keys[0]])
for i in range(list_len):
row = (
str(i + 1)
+ ", "
+ ", ".join([str(history.history[k][i]) for k in keys])
+ "\n"
)
csv_string += row

return csv_string


def main():
params = load_params()
m = get_model()
m = get_model(conv_units=params["model"]["conv_units"])
m.summary()

training_images, training_labels = read_labeled_images(
os.path.join(INPUT_DIR, 'train/'))
os.path.join(INPUT_DIR, "train/")
)
testing_images, testing_labels = read_labeled_images(
os.path.join(INPUT_DIR, 'test/')
os.path.join(INPUT_DIR, "test/")
)

assert training_images.shape[0] + testing_images.shape[0] == 70000
Expand All @@ -83,41 +69,35 @@ def main():
testing_images = normalize(testing_images)

training_labels = tf.keras.utils.to_categorical(
training_labels, num_classes=10, dtype="float32")
training_labels, num_classes=10, dtype="float32"
)
testing_labels = tf.keras.utils.to_categorical(
testing_labels, num_classes=10, dtype="float32")
testing_labels, num_classes=10, dtype="float32"
)

# We use the test set as validation for simplicity
x_train = training_images
x_valid = testing_images
y_train = training_labels
y_valid = testing_labels

history = m.fit(
m.fit(
x_train,
y_train,
batch_size=BATCH_SIZE,
epochs=params["train"]["epochs"],
verbose=1,
validation_data=(x_valid, y_valid),
callbacks=[DvcLiveCallback(model_file=f"{OUTPUT_DIR}/model.h5")],
)

with open("logs.csv", "w") as f:
f.write(history_to_csv(history))

model_file = os.path.join(OUTPUT_DIR, "model.h5")
m.save(model_file)

metrics_dict = m.evaluate(
m.evaluate(
testing_images,
testing_labels,
batch_size=BATCH_SIZE,
return_dict=True,
)

with open(METRICS_FILE, "w") as f:
f.write(json.dumps(metrics_dict))


if __name__ == "__main__":
main()
43 changes: 25 additions & 18 deletions example-dvc-experiments/generate.bash
Original file line number Diff line number Diff line change
Expand Up @@ -50,17 +50,22 @@ add_main_pipeline() {

echo "/images/" >> data/.gitignore

mkdir -p models

dvc stage add -n train \
-d data/images/ \
-d src/train.py \
-p model.conv_units \
-p train.epochs \
--outs models/model.h5 \
--plots-no-cache logs.csv \
--metrics-no-cache metrics.json \
python3 src/train.py
mkdir models
# dvc stage add -n train \
# -d data/images/ \
# -d src/train.py \
# -p model.conv_units \
# -p train.epochs \
# -o models/model.h5 \
# --live metrics \
# python3 src/train.py

dvc exp init --name train \
--data data/ \
--models models/ \
--params params.yaml \
--live metrics \
python3 src/train.py

}

Expand All @@ -73,7 +78,9 @@ virtualenv -p python3 .venv
export VIRTUAL_ENV_DISABLE_PROMPT=true
source .venv/bin/activate
echo '.venv/' > .gitignore
pip install 'dvc[all]'
# install dvc from master until 2.9 is released
# pip install 'dvc[all]'
pip install git+https://github.com/iterative/dvc 'dvc[all]'

git init
git checkout -b main
Expand Down Expand Up @@ -113,7 +120,7 @@ git tag "added-data"

tag_tick
add_main_pipeline
git add dvc.yaml data/.gitignore models/.gitignore
git add dvc.yaml data/.gitignore .gitignore
git commit -m "Added experiments pipeline"
git tag "created-pipeline"

Expand All @@ -127,16 +134,16 @@ git tag "configured-remote"

git tag "get-started"

# dvc exp run is not suitable for the first run due to missing file warnings
dvc repro
# Normally, the following should be dvc exp run but there appears warnings about missing deps/files
# See: https://github.com/iterative/dvc/issues/6592
dvc exp run
tag_tick
git add models/.gitignore data/.gitignore dvc.lock logs.csv metrics.json
git add data/.gitignore dvc.lock metrics.json metrics_dvc_plots/index.html models/.gitignore
git commit -m "Baseline experiment run"
git tag "baseline-experiment"

dvc exp run -n cnn-32 --queue -S model.conv_units=32
dvc exp run -n cnn-64 --queue -S model.conv_units=64
dvc exp run -n cnn-96 --queue -S model.conv_units=96
dvc exp run -n cnn-128 --queue -S model.conv_units=128

dvc exp run --run-all --jobs 2
Expand All @@ -158,7 +165,7 @@ set -veux

pushd ${REPO_PATH}

dvc remote add --force --default storage s3://dvc-public/remote/${PROJECT_NAME}/
dvc remote add --local --force --default storage s3://dvc-public/remote/${PROJECT_NAME}/
dvc push

git remote add origin "[email protected]:iterative/${PROJECT_NAME}.git"
Expand Down