1# Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""TensorFlow 2.0 Profiler for both Eager Mode and Graph Mode. 16 17The profiler has two mode: 18- Programmatic Mode: start(), stop() and Profiler class. It will perform 19 when calling start() or create Profiler class and will stop 20 when calling stop() or destroying Profiler class. 21- On-demand Mode: start_profiler_server(). It will perform profiling when 22 receive profiling request. 23 24NOTE: Only one active profiler session is allowed. Use of simultaneous 25Programmatic Mode and On-demand Mode is undefined and will likely fail. 26 27NOTE: The Keras TensorBoard callback will automatically perform sampled 28profiling. Before enabling customized profiling, set the callback flag 29"profile_batches=[]" to disable automatic sampled profiling. 30customized profiling. 31""" 32 33from __future__ import absolute_import 34from __future__ import division 35from __future__ import print_function 36 37import datetime 38import os 39import threading 40 41from tensorflow.python import pywrap_tensorflow 42from tensorflow.python.eager import context 43from tensorflow.python.framework import c_api_util 44from tensorflow.python.platform import gfile 45from tensorflow.python.platform import tf_logging as logging 46from tensorflow.python.util import compat 47 48_profiler = None 49_profiler_lock = threading.Lock() 50_run_num = 0 51# This suffix should be kept in sync with kProfileEmptySuffix in 52# tensorflow/core/profiler/rpc/client/capture_profile.cc. 53_EVENT_FILE_SUFFIX = '.profile-empty' 54 55 56class ProfilerAlreadyRunningError(Exception): 57 pass 58 59 60class ProfilerNotRunningError(Exception): 61 pass 62 63 64def start(): 65 """Start profiling. 66 67 Raises: 68 ProfilerAlreadyRunningError: If another profiling session is running. 69 """ 70 global _profiler 71 with _profiler_lock: 72 if _profiler is not None: 73 raise ProfilerAlreadyRunningError('Another profiler is running.') 74 profiler_context = pywrap_tensorflow.TFE_NewProfilerContext() 75 if context.default_execution_mode == context.EAGER_MODE: 76 pywrap_tensorflow.TFE_ProfilerContextSetEagerContext( 77 profiler_context, 78 context.context()._handle) # pylint: disable=protected-access 79 _profiler = pywrap_tensorflow.TFE_NewProfiler(profiler_context) 80 pywrap_tensorflow.TFE_DeleteProfilerContext(profiler_context) 81 if not pywrap_tensorflow.TFE_ProfilerIsOk(_profiler): 82 logging.warning('Another profiler session is running which is probably ' 83 'created by profiler server. Please avoid using profiler ' 84 'server and profiler APIs at the same time.') 85 86 87def stop(): 88 """Stop current profiling session and return its result. 89 90 Returns: 91 A binary string of tensorflow.tpu.Trace. User can write the string 92 to file for offline analysis by tensorboard. 93 94 Raises: 95 ProfilerNotRunningError: If there is no active profiling session. 96 """ 97 global _profiler 98 global _run_num 99 with _profiler_lock: 100 if _profiler is None: 101 raise ProfilerNotRunningError( 102 'Cannot stop profiling. No profiler is running.') 103 with c_api_util.tf_buffer() as buffer_: 104 pywrap_tensorflow.TFE_ProfilerSerializeToString( 105 context.context()._handle, # pylint: disable=protected-access 106 _profiler, 107 buffer_) 108 result = pywrap_tensorflow.TF_GetBuffer(buffer_) 109 pywrap_tensorflow.TFE_DeleteProfiler(_profiler) 110 _profiler = None 111 _run_num += 1 112 return result 113 114 115def maybe_create_event_file(logdir): 116 """Create an empty event file if not already exists. 117 118 This event file indicates that we have a plugins/profile/ directory in the 119 current logdir. 120 121 Args: 122 logdir: log directory. 123 """ 124 for file_name in gfile.ListDirectory(logdir): 125 if file_name.endswith(_EVENT_FILE_SUFFIX): 126 return 127 # TODO(b/127330388): Use summary_ops_v2.create_file_writer instead. 128 event_writer = pywrap_tensorflow.EventsWriter( 129 compat.as_bytes(os.path.join(logdir, 'events'))) 130 event_writer.InitWithSuffix(compat.as_bytes(_EVENT_FILE_SUFFIX)) 131 132 133def save(logdir, result): 134 """Save profile result to TensorBoard logdir. 135 136 Args: 137 logdir: log directory read by TensorBoard. 138 result: profiling result returned by stop(). 139 """ 140 plugin_dir = os.path.join( 141 logdir, 'plugins', 'profile', 142 datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')) 143 gfile.MakeDirs(plugin_dir) 144 maybe_create_event_file(logdir) 145 with gfile.Open(os.path.join(plugin_dir, 'local.trace'), 'wb') as f: 146 f.write(result) 147 148 149def start_profiler_server(port): 150 """Start a profiler grpc server that listens to given port. 151 152 The profiler server will keep the program running even the training finishes. 153 Please shutdown the server with CTRL-C. It can be used in both eager mode and 154 graph mode. The service defined in 155 tensorflow/core/profiler/profiler_service.proto. Please use 156 tensorflow/contrib/tpu/profiler/capture_tpu_profile to capture tracable 157 file following https://cloud.google.com/tpu/docs/cloud-tpu-tools#capture_trace 158 159 Args: 160 port: port profiler server listens to. 161 """ 162 profiler_context = pywrap_tensorflow.TFE_NewProfilerContext() 163 if context.default_execution_mode == context.EAGER_MODE: 164 pywrap_tensorflow.TFE_ProfilerContextSetEagerContext( 165 profiler_context, 166 context.context()._handle) # pylint: disable=protected-access 167 pywrap_tensorflow.TFE_StartProfilerServer(profiler_context, port) 168 pywrap_tensorflow.TFE_DeleteProfilerContext(profiler_context) 169 170 171class Profiler(object): 172 """Context-manager eager profiler api. 173 174 Example usage: 175 ```python 176 with Profiler("/path/to/logdir"): 177 # do some work 178 ``` 179 """ 180 181 def __init__(self, logdir): 182 self._logdir = logdir 183 184 def __enter__(self): 185 start() 186 187 def __exit__(self, typ, value, tb): 188 result = stop() 189 save(self._logdir, result) 190