• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Demo of the tfdbg curses CLI: Locating the source of bad numerical values with TF v2.
16
17This demo contains a classical example of a neural network for the mnist
18dataset, but modifications are made so that problematic numerical values (infs
19and nans) appear in nodes of the graph during training.
20"""
21from __future__ import absolute_import
22from __future__ import division
23from __future__ import print_function
24
25import argparse
26import sys
27
28import absl
29import tensorflow.compat.v2 as tf
30
31IMAGE_SIZE = 28
32HIDDEN_SIZE = 500
33NUM_LABELS = 10
34
35# If we set the weights randomly, the model will converge normally about half
36# the time. We need a seed to ensure that the bad numerical values issue
37# appears.
38RAND_SEED = 42
39
40tf.compat.v1.enable_v2_behavior()
41
42FLAGS = None
43
44
45def parse_args():
46  """Parses commandline arguments.
47
48  Returns:
49    A tuple (parsed, unparsed) of the parsed object and a group of unparsed
50      arguments that did not match the parser.
51  """
52  parser = argparse.ArgumentParser()
53  parser.register("type", "bool", lambda v: v.lower() == "true")
54  parser.add_argument(
55      "--max_steps",
56      type=int,
57      default=10,
58      help="Number of steps to run trainer.")
59  parser.add_argument(
60      "--train_batch_size",
61      type=int,
62      default=100,
63      help="Batch size used during training.")
64  parser.add_argument(
65      "--learning_rate",
66      type=float,
67      default=0.025,
68      help="Initial learning rate.")
69  parser.add_argument(
70      "--data_dir",
71      type=str,
72      default="/tmp/mnist_data",
73      help="Directory for storing data")
74  parser.add_argument(
75      "--fake_data",
76      type="bool",
77      nargs="?",
78      const=True,
79      default=False,
80      help="Use fake MNIST data for unit testing")
81  parser.add_argument(
82      "--check_numerics",
83      type="bool",
84      nargs="?",
85      const=True,
86      default=False,
87      help="Use tfdbg to track down bad values during training. "
88      "Mutually exclusive with the --dump_dir flag.")
89  parser.add_argument(
90      "--dump_dir",
91      type=str,
92      default=None,
93      help="Dump TensorFlow program debug data to the specified directory. "
94      "The dumped data contains information regarding tf.function building, "
95      "execution of ops and tf.functions, as well as their stack traces and "
96      "associated source-code snapshots. "
97      "Mutually exclusive with the --check_numerics flag.")
98  parser.add_argument(
99      "--dump_tensor_debug_mode",
100      type=str,
101      default="FULL_HEALTH",
102      help="Mode for dumping tensor values. Options: NO_TENSOR, CURT_HEALTH, "
103      "CONCISE_HEALTH, SHAPE, FULL_HEALTH. This is relevant only when "
104      "--dump_dir is set.")
105  # TODO(cais): Add more tensor debug mode strings once they are supported.
106  parser.add_argument(
107      "--dump_circular_buffer_size",
108      type=int,
109      default=-1,
110      help="Size of the circular buffer used to dump execution events. "
111      "A value <= 0 disables the circular-buffer behavior and causes "
112      "all instrumented tensor values to be dumped. "
113      "This is relevant only when --dump_dir is set.")
114  parser.add_argument(
115      "--use_random_config_path",
116      type="bool",
117      nargs="?",
118      const=True,
119      default=False,
120      help="""If set, set config file path to a random file in the temporary
121      directory.""")
122  return parser.parse_known_args()
123
124
125def main(_):
126  if FLAGS.check_numerics and FLAGS.dump_dir:
127    raise ValueError(
128        "The --check_numerics and --dump_dir flags are mutually "
129        "exclusive.")
130  if FLAGS.check_numerics:
131    tf.debugging.enable_check_numerics()
132  elif FLAGS.dump_dir:
133    tf.debugging.experimental.enable_dump_debug_info(
134        FLAGS.dump_dir,
135        tensor_debug_mode=FLAGS.dump_tensor_debug_mode,
136        circular_buffer_size=FLAGS.dump_circular_buffer_size)
137
138  # Import data
139  if FLAGS.fake_data:
140    imgs = tf.random.uniform(maxval=256, shape=(1000, 28, 28), dtype=tf.int32)
141    labels = tf.random.uniform(maxval=10, shape=(1000,), dtype=tf.int32)
142    mnist_train = imgs, labels
143    mnist_test = imgs, labels
144  else:
145    mnist_train, mnist_test = tf.keras.datasets.mnist.load_data()
146
147  @tf.function
148  def format_example(imgs, labels):
149    """Formats each training and test example to work with our model."""
150    imgs = tf.reshape(imgs, [-1, 28 * 28])
151    imgs = tf.cast(imgs, tf.float32) / 255.0
152    labels = tf.one_hot(labels, depth=10, dtype=tf.float32)
153    return imgs, labels
154
155  train_ds = tf.data.Dataset.from_tensor_slices(mnist_train).shuffle(
156      FLAGS.train_batch_size * FLAGS.max_steps,
157      seed=RAND_SEED).batch(FLAGS.train_batch_size)
158  train_ds = train_ds.map(format_example)
159
160  test_ds = tf.data.Dataset.from_tensor_slices(mnist_test).repeat().batch(
161      len(mnist_test[0]))
162  test_ds = test_ds.map(format_example)
163
164  def get_dense_weights(input_dim, output_dim):
165    """Initializes the parameters for a single dense layer."""
166    initial_kernel = tf.keras.initializers.TruncatedNormal(
167        mean=0.0, stddev=0.1, seed=RAND_SEED)
168    kernel = tf.Variable(initial_kernel([input_dim, output_dim]))
169    bias = tf.Variable(tf.constant(0.1, shape=[output_dim]))
170
171    return kernel, bias
172
173  @tf.function
174  def dense_layer(weights, input_tensor, act=tf.nn.relu):
175    """Runs the forward computation for a single dense layer."""
176    kernel, bias = weights
177    preactivate = tf.matmul(input_tensor, kernel) + bias
178
179    activations = act(preactivate)
180    return activations
181
182  # init model
183  hidden_weights = get_dense_weights(IMAGE_SIZE**2, HIDDEN_SIZE)
184  output_weights = get_dense_weights(HIDDEN_SIZE, NUM_LABELS)
185  variables = hidden_weights + output_weights
186
187  @tf.function
188  def model(x):
189    """Feed forward function of the model.
190
191    Args:
192      x: a (?, 28*28) tensor consisting of the feature inputs for a batch of
193        examples.
194
195    Returns:
196      A (?, 10) tensor containing the class scores for each example.
197    """
198    hidden_act = dense_layer(hidden_weights, x)
199    logits_act = dense_layer(output_weights, hidden_act, tf.identity)
200    y = tf.nn.softmax(logits_act)
201    return y
202
203  @tf.function
204  def loss(probs, labels):
205    """Calculates cross entropy loss.
206
207    Args:
208      probs: Class probabilities predicted by the model. The shape is expected
209        to be (?, 10).
210      labels: Truth labels for the classes, as one-hot encoded vectors. The
211        shape is expected to be the same as `probs`.
212
213    Returns:
214      A scalar loss tensor.
215    """
216    diff = -labels * tf.math.log(probs)
217    loss = tf.reduce_mean(diff)
218    return loss
219
220  train_batches = iter(train_ds)
221  test_batches = iter(test_ds)
222  optimizer = tf.optimizers.Adam(learning_rate=FLAGS.learning_rate)
223  for i in range(FLAGS.max_steps):
224    x_train, y_train = next(train_batches)
225    x_test, y_test = next(test_batches)
226
227    # Train Step
228    with tf.GradientTape() as tape:
229      y = model(x_train)
230      loss_val = loss(y, y_train)
231    grads = tape.gradient(loss_val, variables)
232
233    optimizer.apply_gradients(zip(grads, variables))
234
235    # Evaluation Step
236    y = model(x_test)
237    correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_test, 1))
238    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
239    print("Accuracy at step %d: %s" % (i, accuracy.numpy()))
240
241
242if __name__ == "__main__":
243  FLAGS, unparsed = parse_args()
244  absl.app.run(main=main, argv=[sys.argv[0]] + unparsed)
245