Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add working scripts for TF ZCC #36

Merged
merged 1 commit into from
Nov 23, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 22 additions & 14 deletions examples/tensorflow/scripts/mnist.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import smdebug.tensorflow as smd

parser = argparse.ArgumentParser()
parser.add_argument("--script-mode", type=bool, default=False)
parser.add_argument("--smdebug_path", type=str)
parser.add_argument("--train_frequency", type=int, help="How often to save TS data", default=50)
parser.add_argument("--eval_frequency", type=int, help="How often to save TS data", default=10)
Expand Down Expand Up @@ -86,7 +87,8 @@ def cnn_model_fn(features, labels, mode):
# Configure the Training Op (for TRAIN mode)
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=args.lr)
optimizer = smd.get_hook().wrap_optimizer(optimizer)
if args.script_mode:
optimizer = smd.get_hook().wrap_optimizer(optimizer)
train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)

Expand Down Expand Up @@ -116,19 +118,25 @@ def cnn_model_fn(features, labels, mode):
x={"x": eval_data}, y=eval_labels, num_epochs=1, shuffle=False
)

hook = smd.SessionHook(
out_dir=args.smdebug_path,
save_config=smd.SaveConfig(
{
smd.modes.TRAIN: smd.SaveConfigMode(args.train_frequency),
smd.modes.EVAL: smd.SaveConfigMode(args.eval_frequency),
}
),
)
if args.script_mode:
hook = smd.SessionHook(
out_dir=args.smdebug_path,
save_config=smd.SaveConfig(
{
smd.modes.TRAIN: smd.SaveConfigMode(args.train_frequency),
smd.modes.EVAL: smd.SaveConfigMode(args.eval_frequency),
}
),
)
hooks = [hook]
else:
hooks = []

hook.set_mode(smd.modes.TRAIN)
if args.script_mode:
hook.set_mode(smd.modes.TRAIN)
# train one step and display the probabilties
mnist_classifier.train(input_fn=train_input_fn, steps=args.num_steps, hooks=[hook])
mnist_classifier.train(input_fn=train_input_fn, steps=args.num_steps, hooks=hooks)

hook.set_mode(smd.modes.EVAL)
mnist_classifier.evaluate(input_fn=eval_input_fn, steps=args.num_eval_steps, hooks=[hook])
if args.script_mode:
hook.set_mode(smd.modes.EVAL)
mnist_classifier.evaluate(input_fn=eval_input_fn, steps=args.num_eval_steps, hooks=hooks)
53 changes: 31 additions & 22 deletions examples/tensorflow/scripts/simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ def str2bool(v):


parser = argparse.ArgumentParser()
parser.add_argument("--script-mode", type=str2bool, default=False)
parser.add_argument("--model_dir", type=str, help="S3 path for the model")
parser.add_argument("--lr", type=float, help="Learning Rate", default=0.001)
parser.add_argument("--steps", type=int, help="Number of steps to run", default=100)
Expand Down Expand Up @@ -52,22 +53,26 @@ def str2bool(v):
random.seed(12)


# save tensors as reductions if necessary
rdnc = (
smd.ReductionConfig(reductions=["mean"], abs_reductions=["max"], norms=["l1"])
if args.reductions
else None
)

# create the hook
# Note that we are saving all tensors here by passing save_all=True
hook = smd.SessionHook(
out_dir=args.smdebug_path,
save_all=args.save_all,
include_collections=["weights", "gradients", "losses"],
save_config=smd.SaveConfig(save_interval=args.save_frequency),
reduction_config=rdnc,
)
if args.script_mode:
# save tensors as reductions if necessary
rdnc = (
smd.ReductionConfig(reductions=["mean"], abs_reductions=["max"], norms=["l1"])
if args.reductions
else None
)

# create the hook
# Note that we are saving all tensors here by passing save_all=True
hook = smd.SessionHook(
out_dir=args.smdebug_path,
save_all=args.save_all,
include_collections=["weights", "gradients", "losses"],
save_config=smd.SaveConfig(save_interval=args.save_frequency),
reduction_config=rdnc,
)
hooks = [hook]
else:
hooks = []

# Network definition
# Note the use of name scopes
Expand All @@ -78,31 +83,35 @@ def str2bool(v):
w0 = [[1], [1.0]]
y = tf.matmul(x, w0)
loss = tf.reduce_mean((tf.matmul(x, w) - y) ** 2, name="loss")
hook.add_to_collection("losses", loss)

smd.get_hook("session", create_if_not_exists=True).add_to_collection("losses", loss)

global_step = tf.Variable(17, name="global_step", trainable=False)
increment_global_step_op = tf.assign(global_step, global_step + 1)

optimizer = tf.train.AdamOptimizer(args.lr)

# Wrap the optimizer with wrap_optimizer so Tornasole can find gradients and optimizer_variables to save
optimizer = hook.wrap_optimizer(optimizer)
if args.script_mode:
# Wrap the optimizer with wrap_optimizer so Tornasole can find gradients and optimizer_variables to save
optimizer = hook.wrap_optimizer(optimizer)

# use this wrapped optimizer to minimize loss
optimizer_op = optimizer.minimize(loss, global_step=increment_global_step_op)

hook.set_mode(smd.modes.TRAIN)
if args.script_mode:
hook.set_mode(smd.modes.TRAIN)

# pass the hook to hooks parameter of monitored session
sess = tf.train.MonitoredSession(hooks=[hook])
sess = tf.train.MonitoredSession(hooks=hooks)

# use this session for running the tensorflow model
for i in range(args.steps):
x_ = np.random.random((10, 2)) * args.scale
_loss, opt, gstep = sess.run([loss, optimizer_op, increment_global_step_op], {x: x_})
print(f"Step={i}, Loss={_loss}")

hook.set_mode(smd.modes.EVAL)
if args.script_mode:
hook.set_mode(smd.modes.EVAL)
for i in range(args.steps):
x_ = np.random.random((10, 2)) * args.scale
sess.run([loss, increment_global_step_op], {x: x_})