• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Tests for tfdbg module debug_data."""
16import os
17import platform
18import tempfile
19
20import numpy as np
21
22from tensorflow.core.framework import graph_pb2
23from tensorflow.core.framework import tensor_pb2
24from tensorflow.python.debug.lib import debug_data
25from tensorflow.python.framework import test_util
26from tensorflow.python.lib.io import file_io
27from tensorflow.python.platform import gfile
28from tensorflow.python.platform import googletest
29from tensorflow.python.platform import test
30
31
32class DeviceNamePathConversionTest(test_util.TensorFlowTestCase):
33
34  def testDeviceNameToDevicePath(self):
35    self.assertEqual(
36        debug_data.METADATA_FILE_PREFIX + debug_data.DEVICE_TAG +
37        ",job_ps,replica_1,task_2,cpu_0",
38        debug_data.device_name_to_device_path("/job:ps/replica:1/task:2/cpu:0"))
39
40  def testDevicePathToDeviceName(self):
41    self.assertEqual(
42        "/job:ps/replica:1/task:2/cpu:0",
43        debug_data.device_path_to_device_name(
44            debug_data.METADATA_FILE_PREFIX + debug_data.DEVICE_TAG +
45            ",job_ps,replica_1,task_2,cpu_0"))
46
47
48class HasNanOrInfTest(test_util.TensorFlowTestCase):
49
50  def setUp(self):
51    self._dummy_datum = dummy_datum = debug_data.DebugTensorDatum(
52        "/foo", "bar_0_DebugIdentity_42")
53
54  def testNaN(self):
55    a = np.array([np.nan, np.nan, 7.0])
56    self.assertTrue(debug_data.has_inf_or_nan(self._dummy_datum, a))
57
58  def testInf(self):
59    a = np.array([np.inf, np.inf, 7.0])
60    self.assertTrue(debug_data.has_inf_or_nan(self._dummy_datum, a))
61
62  def testNanAndInf(self):
63    a = np.array([np.inf, np.nan, 7.0])
64    self.assertTrue(debug_data.has_inf_or_nan(self._dummy_datum, a))
65
66  def testNoNanOrInf(self):
67    a = np.array([0.0, 0.0, 7.0])
68    self.assertFalse(debug_data.has_inf_or_nan(self._dummy_datum, a))
69
70  def testEmpty(self):
71    a = np.array([])
72    self.assertFalse(debug_data.has_inf_or_nan(self._dummy_datum, a))
73
74  def testInconvertibleTensorProto(self):
75    self.assertFalse(debug_data.has_inf_or_nan(
76        self._dummy_datum,
77        debug_data.InconvertibleTensorProto(tensor_pb2.TensorProto(),
78                                            initialized=False)))
79    self.assertFalse(debug_data.has_inf_or_nan(
80        self._dummy_datum,
81        debug_data.InconvertibleTensorProto(tensor_pb2.TensorProto(),
82                                            initialized=True)))
83
84  def testDTypeComplexWorks(self):
85    a = np.array([1j, 3j, 3j, 7j], dtype=np.complex128)
86    self.assertFalse(debug_data.has_inf_or_nan(self._dummy_datum, a))
87
88    b = np.array([1j, 3j, 3j, 7j, np.nan], dtype=np.complex128)
89    self.assertTrue(debug_data.has_inf_or_nan(self._dummy_datum, b))
90
91  def testDTypeIntegerWorks(self):
92    a = np.array([1, 3, 3, 7], dtype=np.int16)
93    self.assertFalse(debug_data.has_inf_or_nan(self._dummy_datum, a))
94
95  def testDTypeStringGivesFalse(self):
96    """isnan and isinf are not applicable to strings."""
97
98    a = np.array(["s", "p", "a", "m"])
99    self.assertFalse(debug_data.has_inf_or_nan(self._dummy_datum, a))
100
101  def testDTypeObjectGivesFalse(self):
102    dt = np.dtype([("spam", np.str_, 16), ("eggs", np.float64, (2,))])
103    a = np.array([("spam", (8.0, 7.0)), ("eggs", (6.0, 5.0))], dtype=dt)
104    self.assertFalse(debug_data.has_inf_or_nan(self._dummy_datum, a))
105
106
107class DebugTensorDatumTest(test_util.TensorFlowTestCase):
108
109  def testDebugDatum(self):
110    dump_root = "/tmp/tfdbg_1"
111    debug_dump_rel_path = (
112        debug_data.METADATA_FILE_PREFIX + debug_data.DEVICE_TAG +
113        ",job_localhost,replica_0,task_0,cpu_0" +
114        "/ns1/ns2/node_a_1_2_DebugIdentity_1472563253536385")
115
116    datum = debug_data.DebugTensorDatum(dump_root, debug_dump_rel_path)
117
118    self.assertEqual("DebugIdentity", datum.debug_op)
119    self.assertEqual("ns1/ns2/node_a_1", datum.node_name)
120    self.assertEqual(2, datum.output_slot)
121    self.assertEqual("ns1/ns2/node_a_1:2", datum.tensor_name)
122    self.assertEqual(1472563253536385, datum.timestamp)
123    self.assertEqual("ns1/ns2/node_a_1:2:DebugIdentity", datum.watch_key)
124    self.assertEqual(
125        os.path.join(dump_root, debug_dump_rel_path), datum.file_path)
126    self.assertEqual(
127        "{DebugTensorDatum (/job:localhost/replica:0/task:0/cpu:0) "
128        "%s:%d @ %s @ %d}" % (datum.node_name,
129                              datum.output_slot,
130                              datum.debug_op,
131                              datum.timestamp), str(datum))
132    self.assertEqual(
133        "{DebugTensorDatum (/job:localhost/replica:0/task:0/cpu:0) "
134        "%s:%d @ %s @ %d}" % (datum.node_name,
135                              datum.output_slot,
136                              datum.debug_op,
137                              datum.timestamp), repr(datum))
138
139  def testDumpSizeBytesIsNoneForNonexistentFilePath(self):
140    dump_root = "/tmp/tfdbg_1"
141    debug_dump_rel_path = "ns1/ns2/node_foo_1_2_DebugIdentity_1472563253536385"
142    datum = debug_data.DebugTensorDatum(dump_root, debug_dump_rel_path)
143
144    self.assertIsNone(datum.dump_size_bytes)
145
146
147class DebugDumpDirTest(test_util.TensorFlowTestCase):
148
149  def setUp(self):
150    self._dump_root = tempfile.mkdtemp()
151
152  def tearDown(self):
153    # Tear down temporary dump directory.
154    file_io.delete_recursively(self._dump_root)
155
156  def _makeDataDirWithMultipleDevicesAndDuplicateNodeNames(self):
157    cpu_0_dir = os.path.join(
158        self._dump_root,
159        debug_data.METADATA_FILE_PREFIX + debug_data.DEVICE_TAG +
160        ",job_localhost,replica_0,task_0,cpu_0")
161    gpu_0_dir = os.path.join(
162        self._dump_root,
163        debug_data.METADATA_FILE_PREFIX + debug_data.DEVICE_TAG +
164        ",job_localhost,replica_0,task_0,device_GPU_0")
165    gpu_1_dir = os.path.join(
166        self._dump_root,
167        debug_data.METADATA_FILE_PREFIX + debug_data.DEVICE_TAG +
168        ",job_localhost,replica_0,task_0,device_GPU_1")
169    os.makedirs(cpu_0_dir)
170    os.makedirs(gpu_0_dir)
171    os.makedirs(gpu_1_dir)
172    open(os.path.join(
173        cpu_0_dir, "node_foo_1_2_DebugIdentity_1472563253536386"), "wb")
174    open(os.path.join(
175        gpu_0_dir, "node_foo_1_2_DebugIdentity_1472563253536385"), "wb")
176    open(os.path.join(
177        gpu_1_dir, "node_foo_1_2_DebugIdentity_1472563253536387"), "wb")
178
179  def testDebugDumpDir_nonexistentDumpRoot(self):
180    with self.assertRaisesRegex(IOError, "does not exist"):
181      debug_data.DebugDumpDir(tempfile.mkdtemp() + "_foo")
182
183  def testDebugDumpDir_invalidFileNamingPattern(self):
184    # File name with too few underscores should lead to an exception.
185    device_dir = os.path.join(
186        self._dump_root,
187        debug_data.METADATA_FILE_PREFIX + debug_data.DEVICE_TAG +
188        ",job_localhost,replica_0,task_0,cpu_0")
189    os.makedirs(device_dir)
190    open(os.path.join(device_dir, "node1_DebugIdentity_1234"), "wb")
191
192    with self.assertRaisesRegex(ValueError,
193                                "does not conform to the naming pattern"):
194      debug_data.DebugDumpDir(self._dump_root)
195
196  def testDebugDumpDir_validDuplicateNodeNamesWithMultipleDevices(self):
197    self._makeDataDirWithMultipleDevicesAndDuplicateNodeNames()
198
199    graph_cpu_0 = graph_pb2.GraphDef()
200    node = graph_cpu_0.node.add()
201    node.name = "node_foo_1"
202    node.op = "FooOp"
203    node.device = "/job:localhost/replica:0/task:0/cpu:0"
204    graph_gpu_0 = graph_pb2.GraphDef()
205    node = graph_gpu_0.node.add()
206    node.name = "node_foo_1"
207    node.op = "FooOp"
208    node.device = "/job:localhost/replica:0/task:0/device:GPU:0"
209    graph_gpu_1 = graph_pb2.GraphDef()
210    node = graph_gpu_1.node.add()
211    node.name = "node_foo_1"
212    node.op = "FooOp"
213    node.device = "/job:localhost/replica:0/task:0/device:GPU:1"
214
215    dump_dir = debug_data.DebugDumpDir(
216        self._dump_root,
217        partition_graphs=[graph_cpu_0, graph_gpu_0, graph_gpu_1])
218
219    self.assertItemsEqual(
220        ["/job:localhost/replica:0/task:0/cpu:0",
221         "/job:localhost/replica:0/task:0/device:GPU:0",
222         "/job:localhost/replica:0/task:0/device:GPU:1"], dump_dir.devices())
223    self.assertEqual(1472563253536385, dump_dir.t0)
224    self.assertEqual(3, dump_dir.size)
225
226    with self.assertRaisesRegex(ValueError, r"Invalid device name: "):
227      dump_dir.nodes("/job:localhost/replica:0/task:0/device:GPU:2")
228    self.assertItemsEqual(["node_foo_1", "node_foo_1", "node_foo_1"],
229                          dump_dir.nodes())
230    self.assertItemsEqual(
231        ["node_foo_1"],
232        dump_dir.nodes(device_name="/job:localhost/replica:0/task:0/cpu:0"))
233
234  def testDuplicateNodeNamesInGraphDefOfSingleDeviceRaisesException(self):
235    self._makeDataDirWithMultipleDevicesAndDuplicateNodeNames()
236    graph_cpu_0 = graph_pb2.GraphDef()
237    node = graph_cpu_0.node.add()
238    node.name = "node_foo_1"
239    node.op = "FooOp"
240    node.device = "/job:localhost/replica:0/task:0/cpu:0"
241    graph_gpu_0 = graph_pb2.GraphDef()
242    node = graph_gpu_0.node.add()
243    node.name = "node_foo_1"
244    node.op = "FooOp"
245    node.device = "/job:localhost/replica:0/task:0/device:GPU:0"
246    graph_gpu_1 = graph_pb2.GraphDef()
247    node = graph_gpu_1.node.add()
248    node.name = "node_foo_1"
249    node.op = "FooOp"
250    node.device = "/job:localhost/replica:0/task:0/device:GPU:1"
251    node = graph_gpu_1.node.add()  # Here is the duplicate.
252    node.name = "node_foo_1"
253    node.op = "FooOp"
254    node.device = "/job:localhost/replica:0/task:0/device:GPU:1"
255
256    with self.assertRaisesRegex(ValueError, r"Duplicate node name on device "):
257      debug_data.DebugDumpDir(
258          self._dump_root,
259          partition_graphs=[graph_cpu_0, graph_gpu_0, graph_gpu_1])
260
261  def testDebugDumpDir_emptyDumpDir(self):
262    dump_dir = debug_data.DebugDumpDir(self._dump_root)
263
264    self.assertIsNone(dump_dir.t0)
265    self.assertEqual([], dump_dir.dumped_tensor_data)
266
267  def testDebugDumpDir_usesGfileGlob(self):
268    if platform.system() == "Windows":
269      self.skipTest("gfile.Glob is not used on Windows.")
270
271    self._makeDataDirWithMultipleDevicesAndDuplicateNodeNames()
272
273    def fake_gfile_glob(glob_pattern):
274      del glob_pattern
275      return []
276
277    with test.mock.patch.object(
278        gfile, "Glob", side_effect=fake_gfile_glob, autospec=True) as fake:
279      debug_data.DebugDumpDir(self._dump_root)
280      expected_calls = [
281          test.mock.call(os.path.join(
282              self._dump_root,
283              (debug_data.METADATA_FILE_PREFIX +
284               debug_data.CORE_METADATA_TAG + "*"))),
285          test.mock.call(os.path.join(
286              self._dump_root,
287              (debug_data.METADATA_FILE_PREFIX +
288               debug_data.FETCHES_INFO_FILE_TAG + "*"))),
289          test.mock.call(os.path.join(
290              self._dump_root,
291              (debug_data.METADATA_FILE_PREFIX +
292               debug_data.FEED_KEYS_INFO_FILE_TAG + "*"))),
293          test.mock.call(os.path.join(
294              self._dump_root,
295              (debug_data.METADATA_FILE_PREFIX +
296               debug_data.DEVICE_TAG + "*")))]
297      fake.assert_has_calls(expected_calls, any_order=True)
298
299
300if __name__ == "__main__":
301  googletest.main()
302