From 3634e6b7f4220de5855c343b01babbb45633ff00 Mon Sep 17 00:00:00 2001 From: Rahul Huilgol Date: Sat, 23 Nov 2019 11:21:10 -0800 Subject: [PATCH] Add working scripts for ZCC --- examples/tensorflow/scripts/mnist.py | 36 +++++++++++------- examples/tensorflow/scripts/simple.py | 53 ++++++++++++++++----------- 2 files changed, 53 insertions(+), 36 deletions(-) diff --git a/examples/tensorflow/scripts/mnist.py b/examples/tensorflow/scripts/mnist.py index 6580a0937..078b06a3b 100644 --- a/examples/tensorflow/scripts/mnist.py +++ b/examples/tensorflow/scripts/mnist.py @@ -10,6 +10,7 @@ import smdebug.tensorflow as smd parser = argparse.ArgumentParser() +parser.add_argument("--script-mode", type=bool, default=False) parser.add_argument("--smdebug_path", type=str) parser.add_argument("--train_frequency", type=int, help="How often to save TS data", default=50) parser.add_argument("--eval_frequency", type=int, help="How often to save TS data", default=10) @@ -86,7 +87,8 @@ def cnn_model_fn(features, labels, mode): # Configure the Training Op (for TRAIN mode) if mode == tf.estimator.ModeKeys.TRAIN: optimizer = tf.train.GradientDescentOptimizer(learning_rate=args.lr) - optimizer = smd.get_hook().wrap_optimizer(optimizer) + if args.script_mode: + optimizer = smd.get_hook().wrap_optimizer(optimizer) train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step()) return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op) @@ -116,19 +118,25 @@ def cnn_model_fn(features, labels, mode): x={"x": eval_data}, y=eval_labels, num_epochs=1, shuffle=False ) -hook = smd.SessionHook( - out_dir=args.smdebug_path, - save_config=smd.SaveConfig( - { - smd.modes.TRAIN: smd.SaveConfigMode(args.train_frequency), - smd.modes.EVAL: smd.SaveConfigMode(args.eval_frequency), - } - ), -) +if args.script_mode: + hook = smd.SessionHook( + out_dir=args.smdebug_path, + save_config=smd.SaveConfig( + { + smd.modes.TRAIN: smd.SaveConfigMode(args.train_frequency), + smd.modes.EVAL: smd.SaveConfigMode(args.eval_frequency), + } + ), + ) + hooks = [hook] +else: + hooks = [] -hook.set_mode(smd.modes.TRAIN) +if args.script_mode: + hook.set_mode(smd.modes.TRAIN) # train one step and display the probabilties -mnist_classifier.train(input_fn=train_input_fn, steps=args.num_steps, hooks=[hook]) +mnist_classifier.train(input_fn=train_input_fn, steps=args.num_steps, hooks=hooks) -hook.set_mode(smd.modes.EVAL) -mnist_classifier.evaluate(input_fn=eval_input_fn, steps=args.num_eval_steps, hooks=[hook]) +if args.script_mode: + hook.set_mode(smd.modes.EVAL) +mnist_classifier.evaluate(input_fn=eval_input_fn, steps=args.num_eval_steps, hooks=hooks) diff --git a/examples/tensorflow/scripts/simple.py b/examples/tensorflow/scripts/simple.py index 7b6e4cdb5..b8268d0af 100644 --- a/examples/tensorflow/scripts/simple.py +++ b/examples/tensorflow/scripts/simple.py @@ -23,6 +23,7 @@ def str2bool(v): parser = argparse.ArgumentParser() +parser.add_argument("--script-mode", type=str2bool, default=False) parser.add_argument("--model_dir", type=str, help="S3 path for the model") parser.add_argument("--lr", type=float, help="Learning Rate", default=0.001) parser.add_argument("--steps", type=int, help="Number of steps to run", default=100) @@ -52,22 +53,26 @@ def str2bool(v): random.seed(12) -# save tensors as reductions if necessary -rdnc = ( - smd.ReductionConfig(reductions=["mean"], abs_reductions=["max"], norms=["l1"]) - if args.reductions - else None -) - -# create the hook -# Note that we are saving all tensors here by passing save_all=True -hook = smd.SessionHook( - out_dir=args.smdebug_path, - save_all=args.save_all, - include_collections=["weights", "gradients", "losses"], - save_config=smd.SaveConfig(save_interval=args.save_frequency), - reduction_config=rdnc, -) +if args.script_mode: + # save tensors as reductions if necessary + rdnc = ( + smd.ReductionConfig(reductions=["mean"], abs_reductions=["max"], norms=["l1"]) + if args.reductions + else None + ) + + # create the hook + # Note that we are saving all tensors here by passing save_all=True + hook = smd.SessionHook( + out_dir=args.smdebug_path, + save_all=args.save_all, + include_collections=["weights", "gradients", "losses"], + save_config=smd.SaveConfig(save_interval=args.save_frequency), + reduction_config=rdnc, + ) + hooks = [hook] +else: + hooks = [] # Network definition # Note the use of name scopes @@ -78,23 +83,26 @@ def str2bool(v): w0 = [[1], [1.0]] y = tf.matmul(x, w0) loss = tf.reduce_mean((tf.matmul(x, w) - y) ** 2, name="loss") -hook.add_to_collection("losses", loss) + +smd.get_hook("session", create_if_not_exists=True).add_to_collection("losses", loss) global_step = tf.Variable(17, name="global_step", trainable=False) increment_global_step_op = tf.assign(global_step, global_step + 1) optimizer = tf.train.AdamOptimizer(args.lr) -# Wrap the optimizer with wrap_optimizer so Tornasole can find gradients and optimizer_variables to save -optimizer = hook.wrap_optimizer(optimizer) +if args.script_mode: + # Wrap the optimizer with wrap_optimizer so Tornasole can find gradients and optimizer_variables to save + optimizer = hook.wrap_optimizer(optimizer) # use this wrapped optimizer to minimize loss optimizer_op = optimizer.minimize(loss, global_step=increment_global_step_op) -hook.set_mode(smd.modes.TRAIN) +if args.script_mode: + hook.set_mode(smd.modes.TRAIN) # pass the hook to hooks parameter of monitored session -sess = tf.train.MonitoredSession(hooks=[hook]) +sess = tf.train.MonitoredSession(hooks=hooks) # use this session for running the tensorflow model for i in range(args.steps): @@ -102,7 +110,8 @@ def str2bool(v): _loss, opt, gstep = sess.run([loss, optimizer_op, increment_global_step_op], {x: x_}) print(f"Step={i}, Loss={_loss}") -hook.set_mode(smd.modes.EVAL) +if args.script_mode: + hook.set_mode(smd.modes.EVAL) for i in range(args.steps): x_ = np.random.random((10, 2)) * args.scale sess.run([loss, increment_global_step_op], {x: x_})