• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""test the RunMetadata proto."""
16
17from __future__ import absolute_import
18from __future__ import division
19from __future__ import print_function
20
21from collections import defaultdict
22
23import six
24
25from tensorflow.core.protobuf import config_pb2
26from tensorflow.core.protobuf import rewriter_config_pb2
27from tensorflow.python.client import session
28from tensorflow.python.framework import ops
29from tensorflow.python.framework import test_util
30from tensorflow.python.ops import math_ops
31from tensorflow.python.ops import random_ops
32from tensorflow.python.ops import variables
33from tensorflow.python.platform import test
34from tensorflow.python.profiler import option_builder
35
36# pylint: disable=g-bad-import-order
37# XXX: this depends on pywrap_tensorflow and must come later
38from tensorflow.python.profiler import model_analyzer
39from tensorflow.python.profiler.internal import model_analyzer_testlib as lib
40
41SIZE = 1300
42builder = option_builder.ProfileOptionBuilder
43
44
45def _extract_node(run_meta, node_name):
46  ret = defaultdict(list)
47  for dev_stat in run_meta.step_stats.dev_stats:
48    dev = dev_stat.device.lower()
49    if dev.find('cpu:') > 0:
50      dev = dev[dev.find('cpu:'):]
51    elif dev.find('gpu:') > 0:
52      dev = dev[dev.find('gpu:'):]
53    elif '/host:cpu' not in dev:
54      assert False, 'Unrecognized device name: %s' % dev
55
56    for node_stat in dev_stat.node_stats:
57      nname = node_stat.node_name
58      if nname.find(':') > 0:
59        nname = nname[:nname.find(':')]
60      if nname == node_name:
61        ret[dev].append(node_stat)
62  return ret
63
64
65def _run_model():
66  x = random_ops.random_normal(shape=[1, SIZE])
67  w = random_ops.random_normal(shape=[SIZE, 2 * SIZE])
68  y = math_ops.matmul(x, w)
69
70  config = config_pb2.ConfigProto()
71  config.graph_options.rewrite_options.arithmetic_optimization = (
72      rewriter_config_pb2.RewriterConfig.OFF)
73  with session.Session(config=config) as sess:
74    run_metadata = config_pb2.RunMetadata()
75    opts = builder.time_and_memory()
76    opts['min_micros'] = 0
77    opts['min_bytes'] = 0
78    opts['order_by'] = 'name'
79    opts['output'] = 'none'
80    _ = sess.run(y,
81                 options=config_pb2.RunOptions(
82                     trace_level=config_pb2.RunOptions.FULL_TRACE),
83                 run_metadata=run_metadata)
84    tfprof_node = model_analyzer.profile(
85        sess.graph,
86        run_meta=run_metadata,
87        options=opts)
88
89    return tfprof_node, run_metadata
90
91
92def _run_loop_model():
93  with session.Session() as sess:
94    x = lib.BuildFullModel()
95
96    sess.run(variables.global_variables_initializer())
97    run_meta = config_pb2.RunMetadata()
98    _ = sess.run(x,
99                 options=config_pb2.RunOptions(
100                     trace_level=config_pb2.RunOptions.FULL_TRACE),
101                 run_metadata=run_meta)
102
103    opts = builder.time_and_memory()
104    opts['order_by'] = 'name'
105    opts['output'] = 'none'
106
107    tfprof_node = model_analyzer.profile(
108        sess.graph, run_meta, options=opts)
109    return tfprof_node, run_meta
110
111
112class RunMetadataTest(test.TestCase):
113
114  @test_util.run_deprecated_v1
115  def testGPU(self):
116    if not test.is_gpu_available(cuda_only=True):
117      return
118
119    gpu_dev = test.gpu_device_name()
120    ops.reset_default_graph()
121    with ops.device(gpu_dev):
122      tfprof_node, run_meta = _run_model()
123      self.assertEqual(tfprof_node.children[0].name, 'MatMul')
124      self.assertGreater(tfprof_node.children[0].exec_micros, 10)
125
126    ret = _extract_node(run_meta, 'MatMul')
127    self.assertEqual(len(ret['gpu:0']), 1)
128    self.assertEqual(len(ret['gpu:0/stream:all']), 1, '%s' % run_meta)
129
130  @test_util.run_deprecated_v1
131  def testAllocationHistory(self):
132    if not test.is_gpu_available(cuda_only=True):
133      return
134
135    gpu_dev = test.gpu_device_name()
136    ops.reset_default_graph()
137    with ops.device(gpu_dev):
138      _, run_meta = _run_model()
139
140    mm = _extract_node(run_meta, 'MatMul')['gpu:0'][0]
141    mm_allocs = mm.memory[0].allocation_records
142    # has allocation and deallocation.
143    self.assertEqual(len(mm_allocs), 2)
144    # first allocated.
145    self.assertGreater(mm_allocs[1].alloc_micros, mm_allocs[0].alloc_micros)
146    self.assertGreater(mm_allocs[0].alloc_bytes, 0)
147    # Then deallocated.
148    self.assertLess(mm_allocs[1].alloc_bytes, 0)
149    # All memory deallocated.
150    self.assertEqual(mm_allocs[0].alloc_bytes + mm_allocs[1].alloc_bytes, 0)
151
152    rand = _extract_node(
153        run_meta, 'random_normal/RandomStandardNormal')['gpu:0'][0]
154    random_allocs = rand.memory[0].allocation_records
155    # random normal must allocated first since matmul depends on it.
156    self.assertLess(random_allocs[0].alloc_micros, mm.all_start_micros)
157    # deallocates the memory after matmul started.
158    self.assertGreater(random_allocs[1].alloc_micros, mm.all_start_micros)
159
160  @test_util.run_deprecated_v1
161  def testCPU(self):
162    ops.reset_default_graph()
163    with ops.device('/cpu:0'):
164      tfprof_node, run_meta = _run_model()
165      self.assertEqual(tfprof_node.children[0].name, 'MatMul')
166      self.assertGreater(tfprof_node.children[0].exec_micros, 0)
167
168    ret = _extract_node(run_meta, 'MatMul')
169    self.assertEqual(len(ret['cpu:0']), 1)
170
171    ret = _extract_node(run_meta, 'MatMul:MatMul')
172    self.assertEqual(len(ret), 0)
173
174  @test_util.run_v1_only('b/120545219')
175  def testLoopCPU(self):
176    ops.reset_default_graph()
177    with ops.device('/cpu:0'):
178      tfprof_node, run_meta = _run_loop_model()
179      # The while-loop caused a node to appear 4 times in scheduling.
180      ret = _extract_node(run_meta,
181                          'rnn/while/basic_rnn_cell/MatMul')
182      self.assertEqual(len(ret['cpu:0']), 4)
183
184      total_cpu_execs = 0
185      for node in ret['cpu:0']:
186        total_cpu_execs += node.op_end_rel_micros
187
188      mm_node = lib.SearchTFProfNode(
189          tfprof_node,
190          'rnn/while/basic_rnn_cell/MatMul')
191
192      self.assertEqual(mm_node.run_count, 4)
193      self.assertEqual(mm_node.cpu_exec_micros, total_cpu_execs)
194      self.assertEqual(mm_node.exec_micros, total_cpu_execs)
195
196  def testGradientGraph(self):
197    # Note: Please don't just adjust the test to make it pass.
198    # The code view logic depends on it.
199    ops.reset_default_graph()
200    _, _ = _run_loop_model()
201    graph = ops.get_default_graph()
202    forward_op = set()
203    backward_op = set()
204    back_to_forward = dict()
205    for op in graph.get_operations():
206      if op.name.find('gradients/') > 0 and op.name.find('_grad/') > 0:
207        backward_op.add(op.name)
208        idx1 = op.name.find('gradients/') + 10
209        idx2 = op.name.find('_grad/')
210        back_to_forward[op.name] = op.name[idx1:idx2]
211      else:
212        forward_op.add(op.name)
213
214    for _, f in six.iteritems(back_to_forward):
215      self.assertTrue(f in forward_op)
216
217  def testLoopGPU(self):
218    if not test.is_gpu_available():
219      return
220
221    ops.reset_default_graph()
222    with ops.device('/device:GPU:0'):
223      _, run_meta = _run_loop_model()
224      # The while-loop caused a node to appear 4 times in scheduling.
225      ret = _extract_node(run_meta,
226                          'rnn/while/basic_rnn_cell/MatMul')
227      self.assertEqual(len(ret['gpu:0']), 4, '%s' % run_meta)
228
229      total_cpu_execs = 0
230      for node in ret['gpu:0']:
231        total_cpu_execs += node.op_end_rel_micros
232
233      self.assertGreaterEqual(len(ret['gpu:0/stream:all']), 4, '%s' % run_meta)
234
235
236if __name__ == '__main__':
237  test.main()
238