1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/python/profiler/internal/python_hooks.h"
16
17 #include "absl/strings/string_view.h"
18 #include "absl/strings/strip.h"
19 #include "tensorflow/core/platform/env.h"
20 #include "tensorflow/core/platform/path.h"
21 #include "tensorflow/core/profiler/utils/time_utils.h"
22 #include "tensorflow/core/profiler/utils/xplane_builder.h"
23 #include "tensorflow/core/profiler/utils/xplane_schema.h"
24 #include "tensorflow/core/profiler/utils/xplane_utils.h"
25
26 namespace tensorflow {
27 namespace profiler {
28
29 namespace py = ::pybind11;
30
31 namespace {
32
33 template <typename T>
ProfileFunction(PyObject * obj,PyFrameObject * frame,int what,PyObject * arg)34 int ProfileFunction(PyObject* obj, PyFrameObject* frame, int what,
35 PyObject* arg) {
36 T::GetSingleton()->ProfileFast(frame, what, arg);
37 return 0;
38 }
39
SysSetProfileNone()40 void SysSetProfileNone() {
41 py::object setprofile = py::module::import("sys").attr("setprofile");
42 setprofile(py::none());
43 }
44
ThreadingSetProfile(const py::object & callback)45 void ThreadingSetProfile(const py::object& callback) {
46 py::object setprofile = py::module::import("threading").attr("setprofile");
47 setprofile(callback);
48 }
49
GetEventName(PyCodeObject * py_code)50 std::string GetEventName(PyCodeObject* py_code) {
51 string filename(py::reinterpret_borrow<py::str>(py_code->co_filename));
52 string function;
53 if (py_code->co_name == nullptr) {
54 function = "<unknown>";
55 } else {
56 function = py::reinterpret_borrow<py::str>(py_code->co_name);
57 }
58
59 return absl::StrCat("$", io::Basename(filename), ":", py_code->co_firstlineno,
60 " ", function);
61 }
62
GetEventName(PyCFunctionObject * py_cfunc)63 string GetEventName(PyCFunctionObject* py_cfunc) {
64 PyObject* module = py_cfunc->m_module;
65 string filename;
66 bool filename_ok;
67 #if PY_MAJOR_VERSION < 3
68 filename_ok = (module != nullptr && PyString_Check(module));
69 #else
70 filename_ok = (module != nullptr && PyUnicode_Check(module));
71 #endif
72 if (filename_ok) {
73 filename = py::reinterpret_borrow<py::str>(module);
74 } else {
75 filename = "<unknown>";
76 }
77
78 return absl::StrCat("$", filename, " ", py_cfunc->m_ml->ml_name);
79 }
80
AddEventToXLine(const PythonTraceEntry & event,XLineBuilder * line,XPlaneBuilder * plane)81 void AddEventToXLine(const PythonTraceEntry& event, XLineBuilder* line,
82 XPlaneBuilder* plane) {
83 // TODO(jiesun): maybe add full filename as event stats.
84 auto xevent = line->AddEvent(*plane->GetOrCreateEventMetadata(event.Name()));
85 xevent.SetTimestampNs(event.start_time_ns);
86 xevent.SetEndTimestampNs(event.end_time_ns);
87 }
88
89 } // namespace
90
91 /*static*/ PythonHookContext* PythonHooks::e2e_context_ = nullptr;
92
Name() const93 std::string PythonTraceEntry::Name() const {
94 std::string event_name;
95 if (code_object) {
96 return GetEventName(code_object);
97 } else if (function_object) {
98 return GetEventName(function_object);
99 }
100 return "<unknown>";
101 }
102
GetSingleton()103 PythonHooks* PythonHooks::GetSingleton() {
104 static PythonHooks* singleton = new PythonHooks;
105 return singleton;
106 }
107
Start(const PythonHooksOptions & options)108 void PythonHookContext::Start(const PythonHooksOptions& options) {
109 if (!Py_IsInitialized()) return;
110
111 #if PY_MAJOR_VERSION < 3 || (PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION < 7)
112 // Before Python 3.7, the GIL is created on demand by PyEval_InitThreads().
113 // When a thread was not started by Python (e.g., when starting profiling via
114 // RPC) there might be no GIL. Before Python 3.6, PyGILState_Ensure would
115 // crash. The crash was fixed in Python 3.6 but the fix introduced a race for
116 // GIL creation. Calling PyEval_InitThreads() prevents the race. This is a
117 // no-op when called for a second time so it is innocuous. See
118 // https://vstinner.github.io/python37-gil-change.html for details.
119 PyEval_InitThreads();
120 #endif
121
122 options_ = options;
123 start_timestamp_ns_ = GetCurrentTimeNanos();
124 if (options_.enable_python_traceme || options_.enable_trace_python_function) {
125 PyGILState_STATE gil_state = PyGILState_Ensure();
126 if (options_.enable_trace_python_function) {
127 SetProfilerInAllThreads();
128 }
129 if (options_.enable_python_traceme) {
130 EnableTraceMe(true);
131 }
132 if (options_.end_to_end_mode) {
133 // When end to end mode is used, Stop() and Finalize() i.e. symbolization
134 // and data collection happens during C's atexit(), when Py_FinalizeEx()
135 // already called.
136 try {
137 auto atexit = py::module::import("atexit");
138 atexit.attr("register")(py::cpp_function([]() {
139 PythonHooks* singleton = PythonHooks::GetSingleton();
140 auto e2e_context = singleton->Stop();
141 // Serialize into internal storage before the tracked PyCodeObjects
142 // went out of scope.
143 if (e2e_context) {
144 e2e_context->CollectData(nullptr);
145 PythonHooks::set_e2e_context(e2e_context.release());
146 }
147 }));
148 } catch (const py::error_already_set& e) {
149 LOG(ERROR) << "Can't install atexit handler for e2e mode." << e.what();
150 }
151 }
152 PyGILState_Release(gil_state);
153 }
154 }
155
Stop()156 void PythonHookContext::Stop() {
157 if (!Py_IsInitialized()) return;
158 if (options_.enable_python_traceme || options_.enable_trace_python_function) {
159 PyGILState_STATE gil_state = PyGILState_Ensure();
160 if (options_.enable_trace_python_function) {
161 ClearProfilerInAllThreads();
162 }
163 if (options_.enable_python_traceme) {
164 EnableTraceMe(false);
165 }
166 PyGILState_Release(gil_state);
167 }
168 }
169
CollectData(XPlane * raw_plane)170 void PythonHookContext::CollectData(XPlane* raw_plane) {
171 if (raw_plane == nullptr) {
172 end_to_end_xplane_.emplace();
173 raw_plane = &*end_to_end_xplane_;
174 }
175 XPlaneBuilder plane(raw_plane);
176 for (auto& it : entries_) {
177 uint64 thread_id = it.first;
178 auto& thread_events = it.second;
179 VLOG(1) << "Collecting " << thread_events.completed.size() << ":"
180 << thread_events.active.size() << " events on thread " << thread_id;
181 auto line = plane.GetOrCreateLine(thread_id);
182 line.SetTimestampNs(start_timestamp_ns_);
183 for (const auto& event : thread_events.completed) {
184 AddEventToXLine(event, &line, &plane);
185 }
186 if (options_.include_incomplete_events) {
187 uint64 now = GetCurrentTimeNanos();
188 while (!thread_events.active.empty()) {
189 auto& event = thread_events.active.top();
190 event.end_time_ns = now;
191 AddEventToXLine(event, &line, &plane);
192 thread_events.active.pop();
193 }
194 }
195 }
196 entries_.clear();
197 }
198
Finalize(XSpace * space)199 void PythonHookContext::Finalize(XSpace* space) {
200 if (space && options_.enable_trace_python_function) {
201 XPlane* plane =
202 FindOrAddMutablePlaneWithName(space, kPythonTracerPlaneName);
203 if (options_.end_to_end_mode) {
204 if (end_to_end_xplane_) {
205 end_to_end_xplane_->set_name(plane->name());
206 plane->Swap(&*end_to_end_xplane_);
207 end_to_end_xplane_.reset();
208 }
209 } else {
210 PyGILState_STATE gil_state = PyGILState_Ensure();
211 CollectData(plane);
212 PyGILState_Release(gil_state);
213 }
214 }
215 }
216
ProfileSlow(const py::object & frame,const string & event,const py::object & arg)217 void PythonHooks::ProfileSlow(const py::object& frame, const string& event,
218 const py::object& arg) {
219 int what;
220 absl::string_view event_name(event);
221
222 if (absl::ConsumePrefix(&event_name, "c_")) {
223 if (event_name == "call") {
224 what = PyTrace_C_CALL;
225 } else if (event_name == "return") {
226 what = PyTrace_C_RETURN;
227 } else if (event_name == "exception") {
228 what = PyTrace_C_EXCEPTION;
229 } else {
230 return;
231 }
232 } else {
233 if (event_name == "call") {
234 what = PyTrace_CALL;
235 } else if (event_name == "return") {
236 what = PyTrace_RETURN;
237 } else if (event_name == "exception") {
238 what = PyTrace_EXCEPTION;
239 } else {
240 return;
241 }
242 }
243
244 ProfileFast(reinterpret_cast<PyFrameObject*>(frame.ptr()), what, arg.ptr());
245 }
246
ProfileFast(PyFrameObject * frame,int what,PyObject * arg)247 void PythonHookContext::ProfileFast(PyFrameObject* frame, int what,
248 PyObject* arg) {
249 const int64 thread_id = Env::Default()->GetCurrentThreadId();
250 uint64 now = GetCurrentTimeNanos();
251 auto& thread_traces = entries_[thread_id];
252
253 switch (what) {
254 case PyTrace_CALL: {
255 PyCodeObject* f_code = frame->f_code;
256 thread_traces.active.emplace(now, 0, f_code, nullptr);
257 break;
258 }
259 case PyTrace_RETURN:
260 case PyTrace_EXCEPTION: {
261 if (!thread_traces.active.empty()) {
262 auto& entry = thread_traces.active.top();
263 entry.end_time_ns = now;
264 thread_traces.completed.emplace_back(std::move(entry));
265 thread_traces.active.pop();
266 } else if (options_.include_incomplete_events) {
267 PyCodeObject* f_code = frame->f_code;
268 thread_traces.completed.emplace_back(start_timestamp_ns_, now, f_code,
269 nullptr);
270 }
271 break;
272 }
273 case PyTrace_C_CALL: {
274 if (PyCFunction_Check(arg)) {
275 // Python stack does not have a filename/line_no for native calls.
276 auto* func = reinterpret_cast<PyCFunctionObject*>(arg);
277 entries_[thread_id].active.emplace(now, 0, nullptr, func);
278 }
279 break;
280 }
281 case PyTrace_C_RETURN:
282 case PyTrace_C_EXCEPTION: {
283 if (!thread_traces.active.empty()) {
284 auto& entry = thread_traces.active.top();
285 entry.end_time_ns = now;
286 thread_traces.completed.emplace_back(std::move(entry));
287 thread_traces.active.pop();
288 } else if (options_.include_incomplete_events) {
289 // Only the end of the events is recorded, use profiler start as start.
290 if (PyCFunction_Check(arg)) {
291 // Python stack does not have a filename/line_no for native calls.
292 auto* func = reinterpret_cast<PyCFunctionObject*>(arg);
293 entries_[thread_id].completed.emplace_back(start_timestamp_ns_, now,
294 nullptr, func);
295 }
296 }
297 break;
298 }
299 default:
300 break;
301 }
302 }
303
SetProfilerInAllThreads()304 void PythonHookContext::SetProfilerInAllThreads() {
305 // We also want any new threads started to use our profiler.
306 // NOTE: threading does not provide a C API equivalent to
307 // `threading.setprofile` so we are forced to go via Python to setup the
308 // profile when a new thread is created. After the first callback in that
309 // thread we unregister the Python profile function and use
310 // `PyEval_SetProfile` to register a C profiler which has significantly less
311 // overhead (>2x faster).
312 PythonHooks* singleton = PythonHooks::GetSingleton();
313 py::cpp_function callback =
314 py::cpp_function([singleton](const py::object& frame, const string& event,
315 const py::object& arg) {
316 singleton->ProfileSlow(frame, event, arg);
317 SysSetProfileNone();
318 PyEval_SetProfile(ProfileFunction<PythonHooks>, nullptr);
319 });
320
321 ThreadingSetProfile(callback);
322
323 // NOTE: This must be after `threading.setprofile` otherwise we
324 // end up recording that in our trace.
325 PyThreadState* curr_thread = PyThreadState_Get();
326 PyThreadState* next_thread = curr_thread;
327 while (next_thread != nullptr) {
328 VLOG(1) << "Setting profiler in " << next_thread->thread_id;
329 PyThreadState_Swap(next_thread);
330 PyEval_SetProfile(ProfileFunction<PythonHooks>, nullptr);
331 next_thread = next_thread->next;
332 }
333 PyThreadState_Swap(curr_thread);
334 }
335
ClearProfilerInAllThreads()336 /*static*/ void PythonHookContext::ClearProfilerInAllThreads() {
337 PyThreadState* curr_thread = PyThreadState_Get();
338 PyThreadState* next_thread = curr_thread;
339 while (next_thread != nullptr) {
340 VLOG(1) << "Clearing profiler in " << next_thread->thread_id;
341 PyThreadState_Swap(next_thread);
342 PyEval_SetProfile(nullptr, nullptr);
343 next_thread = next_thread->next;
344 }
345 PyThreadState_Swap(curr_thread);
346
347 // And notify the threading library that we're done.
348 ThreadingSetProfile(py::none());
349 }
350
EnableTraceMe(bool enable)351 /*static*/ void PythonHookContext::EnableTraceMe(bool enable) {
352 const char* kModuleName =
353 "tensorflow.python.profiler.trace";
354 try {
355 auto trace_module = py::module::import(kModuleName);
356 trace_module.attr("enabled") = py::bool_(enable);
357 } catch (const py::error_already_set& e) {
358 LOG(ERROR) << "Can't import " << kModuleName;
359 }
360 }
361
362 } // namespace profiler
363 } // namespace tensorflow
364