1# Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Tests for tfdbg module debug_data.""" 16import os 17import platform 18import tempfile 19 20import numpy as np 21 22from tensorflow.core.framework import graph_pb2 23from tensorflow.core.framework import tensor_pb2 24from tensorflow.python.debug.lib import debug_data 25from tensorflow.python.framework import test_util 26from tensorflow.python.lib.io import file_io 27from tensorflow.python.platform import gfile 28from tensorflow.python.platform import googletest 29from tensorflow.python.platform import test 30 31 32class DeviceNamePathConversionTest(test_util.TensorFlowTestCase): 33 34 def testDeviceNameToDevicePath(self): 35 self.assertEqual( 36 debug_data.METADATA_FILE_PREFIX + debug_data.DEVICE_TAG + 37 ",job_ps,replica_1,task_2,cpu_0", 38 debug_data.device_name_to_device_path("/job:ps/replica:1/task:2/cpu:0")) 39 40 def testDevicePathToDeviceName(self): 41 self.assertEqual( 42 "/job:ps/replica:1/task:2/cpu:0", 43 debug_data.device_path_to_device_name( 44 debug_data.METADATA_FILE_PREFIX + debug_data.DEVICE_TAG + 45 ",job_ps,replica_1,task_2,cpu_0")) 46 47 48class HasNanOrInfTest(test_util.TensorFlowTestCase): 49 50 def setUp(self): 51 self._dummy_datum = dummy_datum = debug_data.DebugTensorDatum( 52 "/foo", "bar_0_DebugIdentity_42") 53 54 def testNaN(self): 55 a = np.array([np.nan, np.nan, 7.0]) 56 self.assertTrue(debug_data.has_inf_or_nan(self._dummy_datum, a)) 57 58 def testInf(self): 59 a = np.array([np.inf, np.inf, 7.0]) 60 self.assertTrue(debug_data.has_inf_or_nan(self._dummy_datum, a)) 61 62 def testNanAndInf(self): 63 a = np.array([np.inf, np.nan, 7.0]) 64 self.assertTrue(debug_data.has_inf_or_nan(self._dummy_datum, a)) 65 66 def testNoNanOrInf(self): 67 a = np.array([0.0, 0.0, 7.0]) 68 self.assertFalse(debug_data.has_inf_or_nan(self._dummy_datum, a)) 69 70 def testEmpty(self): 71 a = np.array([]) 72 self.assertFalse(debug_data.has_inf_or_nan(self._dummy_datum, a)) 73 74 def testInconvertibleTensorProto(self): 75 self.assertFalse(debug_data.has_inf_or_nan( 76 self._dummy_datum, 77 debug_data.InconvertibleTensorProto(tensor_pb2.TensorProto(), 78 initialized=False))) 79 self.assertFalse(debug_data.has_inf_or_nan( 80 self._dummy_datum, 81 debug_data.InconvertibleTensorProto(tensor_pb2.TensorProto(), 82 initialized=True))) 83 84 def testDTypeComplexWorks(self): 85 a = np.array([1j, 3j, 3j, 7j], dtype=np.complex128) 86 self.assertFalse(debug_data.has_inf_or_nan(self._dummy_datum, a)) 87 88 b = np.array([1j, 3j, 3j, 7j, np.nan], dtype=np.complex128) 89 self.assertTrue(debug_data.has_inf_or_nan(self._dummy_datum, b)) 90 91 def testDTypeIntegerWorks(self): 92 a = np.array([1, 3, 3, 7], dtype=np.int16) 93 self.assertFalse(debug_data.has_inf_or_nan(self._dummy_datum, a)) 94 95 def testDTypeStringGivesFalse(self): 96 """isnan and isinf are not applicable to strings.""" 97 98 a = np.array(["s", "p", "a", "m"]) 99 self.assertFalse(debug_data.has_inf_or_nan(self._dummy_datum, a)) 100 101 def testDTypeObjectGivesFalse(self): 102 dt = np.dtype([("spam", np.str_, 16), ("eggs", np.float64, (2,))]) 103 a = np.array([("spam", (8.0, 7.0)), ("eggs", (6.0, 5.0))], dtype=dt) 104 self.assertFalse(debug_data.has_inf_or_nan(self._dummy_datum, a)) 105 106 107class DebugTensorDatumTest(test_util.TensorFlowTestCase): 108 109 def testDebugDatum(self): 110 dump_root = "/tmp/tfdbg_1" 111 debug_dump_rel_path = ( 112 debug_data.METADATA_FILE_PREFIX + debug_data.DEVICE_TAG + 113 ",job_localhost,replica_0,task_0,cpu_0" + 114 "/ns1/ns2/node_a_1_2_DebugIdentity_1472563253536385") 115 116 datum = debug_data.DebugTensorDatum(dump_root, debug_dump_rel_path) 117 118 self.assertEqual("DebugIdentity", datum.debug_op) 119 self.assertEqual("ns1/ns2/node_a_1", datum.node_name) 120 self.assertEqual(2, datum.output_slot) 121 self.assertEqual("ns1/ns2/node_a_1:2", datum.tensor_name) 122 self.assertEqual(1472563253536385, datum.timestamp) 123 self.assertEqual("ns1/ns2/node_a_1:2:DebugIdentity", datum.watch_key) 124 self.assertEqual( 125 os.path.join(dump_root, debug_dump_rel_path), datum.file_path) 126 self.assertEqual( 127 "{DebugTensorDatum (/job:localhost/replica:0/task:0/cpu:0) " 128 "%s:%d @ %s @ %d}" % (datum.node_name, 129 datum.output_slot, 130 datum.debug_op, 131 datum.timestamp), str(datum)) 132 self.assertEqual( 133 "{DebugTensorDatum (/job:localhost/replica:0/task:0/cpu:0) " 134 "%s:%d @ %s @ %d}" % (datum.node_name, 135 datum.output_slot, 136 datum.debug_op, 137 datum.timestamp), repr(datum)) 138 139 def testDumpSizeBytesIsNoneForNonexistentFilePath(self): 140 dump_root = "/tmp/tfdbg_1" 141 debug_dump_rel_path = "ns1/ns2/node_foo_1_2_DebugIdentity_1472563253536385" 142 datum = debug_data.DebugTensorDatum(dump_root, debug_dump_rel_path) 143 144 self.assertIsNone(datum.dump_size_bytes) 145 146 147class DebugDumpDirTest(test_util.TensorFlowTestCase): 148 149 def setUp(self): 150 self._dump_root = tempfile.mkdtemp() 151 152 def tearDown(self): 153 # Tear down temporary dump directory. 154 file_io.delete_recursively(self._dump_root) 155 156 def _makeDataDirWithMultipleDevicesAndDuplicateNodeNames(self): 157 cpu_0_dir = os.path.join( 158 self._dump_root, 159 debug_data.METADATA_FILE_PREFIX + debug_data.DEVICE_TAG + 160 ",job_localhost,replica_0,task_0,cpu_0") 161 gpu_0_dir = os.path.join( 162 self._dump_root, 163 debug_data.METADATA_FILE_PREFIX + debug_data.DEVICE_TAG + 164 ",job_localhost,replica_0,task_0,device_GPU_0") 165 gpu_1_dir = os.path.join( 166 self._dump_root, 167 debug_data.METADATA_FILE_PREFIX + debug_data.DEVICE_TAG + 168 ",job_localhost,replica_0,task_0,device_GPU_1") 169 os.makedirs(cpu_0_dir) 170 os.makedirs(gpu_0_dir) 171 os.makedirs(gpu_1_dir) 172 open(os.path.join( 173 cpu_0_dir, "node_foo_1_2_DebugIdentity_1472563253536386"), "wb") 174 open(os.path.join( 175 gpu_0_dir, "node_foo_1_2_DebugIdentity_1472563253536385"), "wb") 176 open(os.path.join( 177 gpu_1_dir, "node_foo_1_2_DebugIdentity_1472563253536387"), "wb") 178 179 def testDebugDumpDir_nonexistentDumpRoot(self): 180 with self.assertRaisesRegex(IOError, "does not exist"): 181 debug_data.DebugDumpDir(tempfile.mkdtemp() + "_foo") 182 183 def testDebugDumpDir_invalidFileNamingPattern(self): 184 # File name with too few underscores should lead to an exception. 185 device_dir = os.path.join( 186 self._dump_root, 187 debug_data.METADATA_FILE_PREFIX + debug_data.DEVICE_TAG + 188 ",job_localhost,replica_0,task_0,cpu_0") 189 os.makedirs(device_dir) 190 open(os.path.join(device_dir, "node1_DebugIdentity_1234"), "wb") 191 192 with self.assertRaisesRegex(ValueError, 193 "does not conform to the naming pattern"): 194 debug_data.DebugDumpDir(self._dump_root) 195 196 def testDebugDumpDir_validDuplicateNodeNamesWithMultipleDevices(self): 197 self._makeDataDirWithMultipleDevicesAndDuplicateNodeNames() 198 199 graph_cpu_0 = graph_pb2.GraphDef() 200 node = graph_cpu_0.node.add() 201 node.name = "node_foo_1" 202 node.op = "FooOp" 203 node.device = "/job:localhost/replica:0/task:0/cpu:0" 204 graph_gpu_0 = graph_pb2.GraphDef() 205 node = graph_gpu_0.node.add() 206 node.name = "node_foo_1" 207 node.op = "FooOp" 208 node.device = "/job:localhost/replica:0/task:0/device:GPU:0" 209 graph_gpu_1 = graph_pb2.GraphDef() 210 node = graph_gpu_1.node.add() 211 node.name = "node_foo_1" 212 node.op = "FooOp" 213 node.device = "/job:localhost/replica:0/task:0/device:GPU:1" 214 215 dump_dir = debug_data.DebugDumpDir( 216 self._dump_root, 217 partition_graphs=[graph_cpu_0, graph_gpu_0, graph_gpu_1]) 218 219 self.assertItemsEqual( 220 ["/job:localhost/replica:0/task:0/cpu:0", 221 "/job:localhost/replica:0/task:0/device:GPU:0", 222 "/job:localhost/replica:0/task:0/device:GPU:1"], dump_dir.devices()) 223 self.assertEqual(1472563253536385, dump_dir.t0) 224 self.assertEqual(3, dump_dir.size) 225 226 with self.assertRaisesRegex(ValueError, r"Invalid device name: "): 227 dump_dir.nodes("/job:localhost/replica:0/task:0/device:GPU:2") 228 self.assertItemsEqual(["node_foo_1", "node_foo_1", "node_foo_1"], 229 dump_dir.nodes()) 230 self.assertItemsEqual( 231 ["node_foo_1"], 232 dump_dir.nodes(device_name="/job:localhost/replica:0/task:0/cpu:0")) 233 234 def testDuplicateNodeNamesInGraphDefOfSingleDeviceRaisesException(self): 235 self._makeDataDirWithMultipleDevicesAndDuplicateNodeNames() 236 graph_cpu_0 = graph_pb2.GraphDef() 237 node = graph_cpu_0.node.add() 238 node.name = "node_foo_1" 239 node.op = "FooOp" 240 node.device = "/job:localhost/replica:0/task:0/cpu:0" 241 graph_gpu_0 = graph_pb2.GraphDef() 242 node = graph_gpu_0.node.add() 243 node.name = "node_foo_1" 244 node.op = "FooOp" 245 node.device = "/job:localhost/replica:0/task:0/device:GPU:0" 246 graph_gpu_1 = graph_pb2.GraphDef() 247 node = graph_gpu_1.node.add() 248 node.name = "node_foo_1" 249 node.op = "FooOp" 250 node.device = "/job:localhost/replica:0/task:0/device:GPU:1" 251 node = graph_gpu_1.node.add() # Here is the duplicate. 252 node.name = "node_foo_1" 253 node.op = "FooOp" 254 node.device = "/job:localhost/replica:0/task:0/device:GPU:1" 255 256 with self.assertRaisesRegex(ValueError, r"Duplicate node name on device "): 257 debug_data.DebugDumpDir( 258 self._dump_root, 259 partition_graphs=[graph_cpu_0, graph_gpu_0, graph_gpu_1]) 260 261 def testDebugDumpDir_emptyDumpDir(self): 262 dump_dir = debug_data.DebugDumpDir(self._dump_root) 263 264 self.assertIsNone(dump_dir.t0) 265 self.assertEqual([], dump_dir.dumped_tensor_data) 266 267 def testDebugDumpDir_usesGfileGlob(self): 268 if platform.system() == "Windows": 269 self.skipTest("gfile.Glob is not used on Windows.") 270 271 self._makeDataDirWithMultipleDevicesAndDuplicateNodeNames() 272 273 def fake_gfile_glob(glob_pattern): 274 del glob_pattern 275 return [] 276 277 with test.mock.patch.object( 278 gfile, "Glob", side_effect=fake_gfile_glob, autospec=True) as fake: 279 debug_data.DebugDumpDir(self._dump_root) 280 expected_calls = [ 281 test.mock.call(os.path.join( 282 self._dump_root, 283 (debug_data.METADATA_FILE_PREFIX + 284 debug_data.CORE_METADATA_TAG + "*"))), 285 test.mock.call(os.path.join( 286 self._dump_root, 287 (debug_data.METADATA_FILE_PREFIX + 288 debug_data.FETCHES_INFO_FILE_TAG + "*"))), 289 test.mock.call(os.path.join( 290 self._dump_root, 291 (debug_data.METADATA_FILE_PREFIX + 292 debug_data.FEED_KEYS_INFO_FILE_TAG + "*"))), 293 test.mock.call(os.path.join( 294 self._dump_root, 295 (debug_data.METADATA_FILE_PREFIX + 296 debug_data.DEVICE_TAG + "*")))] 297 fake.assert_has_calls(expected_calls, any_order=True) 298 299 300if __name__ == "__main__": 301 googletest.main() 302