1# Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Demo of the tfdbg curses CLI: Locating the source of bad numerical values with TF v2. 16 17This demo contains a classical example of a neural network for the mnist 18dataset, but modifications are made so that problematic numerical values (infs 19and nans) appear in nodes of the graph during training. 20""" 21from __future__ import absolute_import 22from __future__ import division 23from __future__ import print_function 24 25import argparse 26import sys 27 28import absl 29import tensorflow.compat.v2 as tf 30 31IMAGE_SIZE = 28 32HIDDEN_SIZE = 500 33NUM_LABELS = 10 34 35# If we set the weights randomly, the model will converge normally about half 36# the time. We need a seed to ensure that the bad numerical values issue 37# appears. 38RAND_SEED = 42 39 40tf.compat.v1.enable_v2_behavior() 41 42FLAGS = None 43 44 45def parse_args(): 46 """Parses commandline arguments. 47 48 Returns: 49 A tuple (parsed, unparsed) of the parsed object and a group of unparsed 50 arguments that did not match the parser. 51 """ 52 parser = argparse.ArgumentParser() 53 parser.register("type", "bool", lambda v: v.lower() == "true") 54 parser.add_argument( 55 "--max_steps", 56 type=int, 57 default=10, 58 help="Number of steps to run trainer.") 59 parser.add_argument( 60 "--train_batch_size", 61 type=int, 62 default=100, 63 help="Batch size used during training.") 64 parser.add_argument( 65 "--learning_rate", 66 type=float, 67 default=0.025, 68 help="Initial learning rate.") 69 parser.add_argument( 70 "--data_dir", 71 type=str, 72 default="/tmp/mnist_data", 73 help="Directory for storing data") 74 parser.add_argument( 75 "--fake_data", 76 type="bool", 77 nargs="?", 78 const=True, 79 default=False, 80 help="Use fake MNIST data for unit testing") 81 parser.add_argument( 82 "--check_numerics", 83 type="bool", 84 nargs="?", 85 const=True, 86 default=False, 87 help="Use tfdbg to track down bad values during training. " 88 "Mutually exclusive with the --dump_dir flag.") 89 parser.add_argument( 90 "--dump_dir", 91 type=str, 92 default=None, 93 help="Dump TensorFlow program debug data to the specified directory. " 94 "The dumped data contains information regarding tf.function building, " 95 "execution of ops and tf.functions, as well as their stack traces and " 96 "associated source-code snapshots. " 97 "Mutually exclusive with the --check_numerics flag.") 98 parser.add_argument( 99 "--dump_tensor_debug_mode", 100 type=str, 101 default="FULL_HEALTH", 102 help="Mode for dumping tensor values. Options: NO_TENSOR, CURT_HEALTH, " 103 "CONCISE_HEALTH, SHAPE, FULL_HEALTH. This is relevant only when " 104 "--dump_dir is set.") 105 # TODO(cais): Add more tensor debug mode strings once they are supported. 106 parser.add_argument( 107 "--dump_circular_buffer_size", 108 type=int, 109 default=-1, 110 help="Size of the circular buffer used to dump execution events. " 111 "A value <= 0 disables the circular-buffer behavior and causes " 112 "all instrumented tensor values to be dumped. " 113 "This is relevant only when --dump_dir is set.") 114 parser.add_argument( 115 "--use_random_config_path", 116 type="bool", 117 nargs="?", 118 const=True, 119 default=False, 120 help="""If set, set config file path to a random file in the temporary 121 directory.""") 122 return parser.parse_known_args() 123 124 125def main(_): 126 if FLAGS.check_numerics and FLAGS.dump_dir: 127 raise ValueError( 128 "The --check_numerics and --dump_dir flags are mutually " 129 "exclusive.") 130 if FLAGS.check_numerics: 131 tf.debugging.enable_check_numerics() 132 elif FLAGS.dump_dir: 133 tf.debugging.experimental.enable_dump_debug_info( 134 FLAGS.dump_dir, 135 tensor_debug_mode=FLAGS.dump_tensor_debug_mode, 136 circular_buffer_size=FLAGS.dump_circular_buffer_size) 137 138 # Import data 139 if FLAGS.fake_data: 140 imgs = tf.random.uniform(maxval=256, shape=(1000, 28, 28), dtype=tf.int32) 141 labels = tf.random.uniform(maxval=10, shape=(1000,), dtype=tf.int32) 142 mnist_train = imgs, labels 143 mnist_test = imgs, labels 144 else: 145 mnist_train, mnist_test = tf.keras.datasets.mnist.load_data() 146 147 @tf.function 148 def format_example(imgs, labels): 149 """Formats each training and test example to work with our model.""" 150 imgs = tf.reshape(imgs, [-1, 28 * 28]) 151 imgs = tf.cast(imgs, tf.float32) / 255.0 152 labels = tf.one_hot(labels, depth=10, dtype=tf.float32) 153 return imgs, labels 154 155 train_ds = tf.data.Dataset.from_tensor_slices(mnist_train).shuffle( 156 FLAGS.train_batch_size * FLAGS.max_steps, 157 seed=RAND_SEED).batch(FLAGS.train_batch_size) 158 train_ds = train_ds.map(format_example) 159 160 test_ds = tf.data.Dataset.from_tensor_slices(mnist_test).repeat().batch( 161 len(mnist_test[0])) 162 test_ds = test_ds.map(format_example) 163 164 def get_dense_weights(input_dim, output_dim): 165 """Initializes the parameters for a single dense layer.""" 166 initial_kernel = tf.keras.initializers.TruncatedNormal( 167 mean=0.0, stddev=0.1, seed=RAND_SEED) 168 kernel = tf.Variable(initial_kernel([input_dim, output_dim])) 169 bias = tf.Variable(tf.constant(0.1, shape=[output_dim])) 170 171 return kernel, bias 172 173 @tf.function 174 def dense_layer(weights, input_tensor, act=tf.nn.relu): 175 """Runs the forward computation for a single dense layer.""" 176 kernel, bias = weights 177 preactivate = tf.matmul(input_tensor, kernel) + bias 178 179 activations = act(preactivate) 180 return activations 181 182 # init model 183 hidden_weights = get_dense_weights(IMAGE_SIZE**2, HIDDEN_SIZE) 184 output_weights = get_dense_weights(HIDDEN_SIZE, NUM_LABELS) 185 variables = hidden_weights + output_weights 186 187 @tf.function 188 def model(x): 189 """Feed forward function of the model. 190 191 Args: 192 x: a (?, 28*28) tensor consisting of the feature inputs for a batch of 193 examples. 194 195 Returns: 196 A (?, 10) tensor containing the class scores for each example. 197 """ 198 hidden_act = dense_layer(hidden_weights, x) 199 logits_act = dense_layer(output_weights, hidden_act, tf.identity) 200 y = tf.nn.softmax(logits_act) 201 return y 202 203 @tf.function 204 def loss(probs, labels): 205 """Calculates cross entropy loss. 206 207 Args: 208 probs: Class probabilities predicted by the model. The shape is expected 209 to be (?, 10). 210 labels: Truth labels for the classes, as one-hot encoded vectors. The 211 shape is expected to be the same as `probs`. 212 213 Returns: 214 A scalar loss tensor. 215 """ 216 diff = -labels * tf.math.log(probs) 217 loss = tf.reduce_mean(diff) 218 return loss 219 220 train_batches = iter(train_ds) 221 test_batches = iter(test_ds) 222 optimizer = tf.optimizers.Adam(learning_rate=FLAGS.learning_rate) 223 for i in range(FLAGS.max_steps): 224 x_train, y_train = next(train_batches) 225 x_test, y_test = next(test_batches) 226 227 # Train Step 228 with tf.GradientTape() as tape: 229 y = model(x_train) 230 loss_val = loss(y, y_train) 231 grads = tape.gradient(loss_val, variables) 232 233 optimizer.apply_gradients(zip(grads, variables)) 234 235 # Evaluation Step 236 y = model(x_test) 237 correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_test, 1)) 238 accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 239 print("Accuracy at step %d: %s" % (i, accuracy.numpy())) 240 241 242if __name__ == "__main__": 243 FLAGS, unparsed = parse_args() 244 absl.app.run(main=main, argv=[sys.argv[0]] + unparsed) 245