• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/python/profiler/internal/python_hooks.h"
16 
17 #include <atomic>
18 
19 #include "absl/strings/string_view.h"
20 #include "absl/strings/strip.h"
21 #include "tensorflow/core/platform/env.h"
22 #include "tensorflow/core/platform/path.h"
23 #include "tensorflow/core/profiler/utils/time_utils.h"
24 #include "tensorflow/core/profiler/utils/xplane_builder.h"
25 #include "tensorflow/core/profiler/utils/xplane_schema.h"
26 #include "tensorflow/core/profiler/utils/xplane_utils.h"
27 
28 namespace tensorflow {
29 namespace profiler {
30 
31 namespace py = ::pybind11;
32 
33 namespace {
34 
SysSetProfileNone()35 void SysSetProfileNone() {
36   py::object setprofile = py::module::import("sys").attr("setprofile");
37   setprofile(py::none());
38 }
39 
ThreadingSetProfile(const py::object & callback)40 void ThreadingSetProfile(const py::object& callback) {
41   py::object setprofile = py::module::import("threading").attr("setprofile");
42   setprofile(callback);
43 }
44 
GetEventName(PyObject * co_filename,PyObject * co_name,int co_firstlineno)45 std::string GetEventName(PyObject* co_filename, PyObject* co_name,
46                          int co_firstlineno) {
47   string filename(py::reinterpret_borrow<py::str>(co_filename));
48   string function;
49   if (co_name == nullptr) {
50     function = "<unknown>";
51   } else {
52     function = py::reinterpret_borrow<py::str>(co_name);
53   }
54 
55   return absl::StrCat("$", io::Basename(filename), ":", co_firstlineno, " ",
56                       function);
57 }
58 
GetEventName(PyMethodDef * method,PyObject * module)59 string GetEventName(PyMethodDef* method, PyObject* module) {
60   // Python stack does not have a filename/line_no for native calls.
61   // Use module name and function/method name instead.
62   string filename;
63   bool filename_ok;
64 #if PY_MAJOR_VERSION < 3
65   filename_ok = (module != nullptr && PyString_Check(module));
66 #else
67   filename_ok = (module != nullptr && PyUnicode_Check(module));
68 #endif
69   if (filename_ok) {
70     filename = py::reinterpret_borrow<py::str>(module);
71   } else {
72     filename = "<unknown>";
73   }
74 
75   return absl::StrCat("$", filename, " ", method->ml_name);
76 }
77 
AddEventToXLine(const PythonTraceEntry & event,XLineBuilder * line,XPlaneBuilder * plane)78 void AddEventToXLine(const PythonTraceEntry& event, XLineBuilder* line,
79                      XPlaneBuilder* plane) {
80   // TODO(jiesun): maybe add full filename as event stats.
81   auto xevent = line->AddEvent(*plane->GetOrCreateEventMetadata(event.Name()));
82   xevent.SetTimestampNs(event.start_time_ns);
83   xevent.SetEndTimestampNs(event.end_time_ns);
84 }
85 
86 template <typename ForEachThreadFunc>
ForEachThread(PyThreadState * curr_thread,ForEachThreadFunc && callback)87 void ForEachThread(PyThreadState* curr_thread, ForEachThreadFunc&& callback) {
88   // Note: PyThreadState's interp is not accessible in open source due to
89   // Py_LIMITED_API definition nuances. We can not iterate all threads through
90   // that PyInterpreterState.
91 #ifndef NDEBUG
92   // If debug version of python runtime is used, (e.g. monolithic binaries in
93   // g3). PyGILState_Check will fail because current thread's PyThreadState
94   // is not the one that holding GIL (after PyThreadState_Swap). This extra
95   // check in PyEval_SetProfile is not useful, but will sporadic crash if user
96   // use debug version of python runtime. In this case, we fallback to only
97   // set up profile hooks in current threads.
98   // In OSS, the python runtime and tensorflow profiler are built separately.
99   // So this workaround doesn't apply.
100   callback(curr_thread);
101 #else
102   for (PyThreadState* p = curr_thread; p != nullptr; p = p->next) {
103     PyThreadState_Swap(p);
104     callback(p);
105   }
106   for (PyThreadState* p = curr_thread->prev; p != nullptr; p = p->prev) {
107     PyThreadState_Swap(p);
108     callback(p);
109   }
110 #endif
111 }
112 
113 }  // namespace
114 
115 /*static*/ PythonHookContext* PythonHooks::e2e_context_ = nullptr;
116 
Name() const117 std::string PythonTraceEntry::Name() const {
118   std::string event_name;
119   if (co_filename) {
120     return GetEventName(co_filename, co_name, co_firstlineno);
121   } else {
122     return GetEventName(method_def, m_module);
123   }
124   return "<unknown>";
125 }
126 
GetSingleton()127 PythonHooks* PythonHooks::GetSingleton() {
128   static PythonHooks* singleton = new PythonHooks;
129   return singleton;
130 }
131 
Start(const PythonHooksOptions & options)132 void PythonHookContext::Start(const PythonHooksOptions& options) {
133   if (!Py_IsInitialized()) return;
134 
135 #if PY_MAJOR_VERSION < 3 || (PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION < 7)
136   // Before Python 3.7, the GIL is created on demand by PyEval_InitThreads().
137   // When a thread was not started by Python (e.g., when starting profiling via
138   // RPC) there might be no GIL. Before Python 3.6, PyGILState_Ensure would
139   // crash. The crash was fixed in Python 3.6 but the fix introduced a race for
140   // GIL creation. Calling PyEval_InitThreads() prevents the race. This is a
141   // no-op when called for a second time so it is innocuous. See
142   // https://vstinner.github.io/python37-gil-change.html for details.
143   PyEval_InitThreads();
144 #endif
145 
146   options_ = options;
147   start_timestamp_ns_ = GetCurrentTimeNanos();
148   if (options_.enable_python_traceme || options_.enable_trace_python_function) {
149     PyGILState_STATE gil_state = PyGILState_Ensure();
150     if (options_.enable_python_traceme) {
151       EnableTraceMe(true);
152     }
153     if (options_.enable_trace_python_function) {
154       SetProfilerInAllThreads();
155     }
156     if (options_.end_to_end_mode) {
157       // When end to end mode is used, Stop() and Finalize() i.e. symbolization
158       // and data collection happens during C's atexit(), when Py_FinalizeEx()
159       // already called.
160       try {
161         auto atexit = py::module::import("atexit");
162         atexit.attr("register")(py::cpp_function([]() {
163           PythonHooks* singleton = PythonHooks::GetSingleton();
164           auto e2e_context = singleton->Stop();
165           // Serialize into internal storage before the tracked PyCodeObjects
166           // went out of scope.
167           if (e2e_context) {
168             e2e_context->CollectData(nullptr);
169             PythonHooks::set_e2e_context(e2e_context.release());
170           }
171         }));
172       } catch (const py::error_already_set& e) {
173         LOG(ERROR) << "Can't install atexit handler for e2e mode." << e.what();
174       }
175     }
176     PyGILState_Release(gil_state);
177   }
178 }
179 
Stop()180 void PythonHookContext::Stop() {
181   if (!Py_IsInitialized()) return;
182   if (options_.enable_python_traceme || options_.enable_trace_python_function) {
183     PyGILState_STATE gil_state = PyGILState_Ensure();
184     if (options_.enable_trace_python_function) {
185       ClearProfilerInAllThreads();
186     }
187     if (options_.enable_python_traceme) {
188       EnableTraceMe(false);
189     }
190     PyGILState_Release(gil_state);
191   }
192 }
193 
CollectData(XPlane * raw_plane)194 void PythonHookContext::CollectData(XPlane* raw_plane) {
195   if (raw_plane == nullptr) {
196     end_to_end_xplane_.emplace();
197     raw_plane = &*end_to_end_xplane_;
198   }
199   XPlaneBuilder plane(raw_plane);
200   for (auto& it : entries_) {
201     uint64 thread_id = it.first;
202     auto& thread_events = it.second;
203     VLOG(1) << "Collecting " << thread_events.completed.size() << ":"
204             << thread_events.active.size() << " events on thread " << thread_id;
205     auto line = plane.GetOrCreateLine(thread_id);
206     line.SetTimestampNs(start_timestamp_ns_);
207     for (const auto& event : thread_events.completed) {
208       AddEventToXLine(event, &line, &plane);
209     }
210     if (options_.include_incomplete_events) {
211       uint64 now = GetCurrentTimeNanos();
212       while (!thread_events.active.empty()) {
213         auto& event = thread_events.active.top();
214         event.end_time_ns = now;
215         AddEventToXLine(event, &line, &plane);
216         thread_events.active.pop();
217       }
218     }
219   }
220   PyGILState_STATE gil_state = PyGILState_Ensure();
221   entries_.clear();
222   PyGILState_Release(gil_state);
223 }
224 
Finalize(XSpace * space)225 void PythonHookContext::Finalize(XSpace* space) {
226   if (space && options_.enable_trace_python_function) {
227     XPlane* plane =
228         FindOrAddMutablePlaneWithName(space, kPythonTracerPlaneName);
229     if (options_.end_to_end_mode) {
230       if (end_to_end_xplane_) {
231         end_to_end_xplane_->set_name(plane->name());
232         plane->Swap(&*end_to_end_xplane_);
233         end_to_end_xplane_.reset();
234       }
235     } else {
236       PyGILState_STATE gil_state = PyGILState_Ensure();
237       CollectData(plane);
238       PyGILState_Release(gil_state);
239     }
240   }
241 }
242 
ProfileFunction(PyObject * obj,PyFrameObject * frame,int what,PyObject * arg)243 /*static*/ int PythonHooks::ProfileFunction(PyObject* obj, PyFrameObject* frame,
244                                             int what, PyObject* arg) {
245   GetSingleton()->ProfileFast(frame, what, arg);
246   return 0;
247 }
248 
ProfileSlow(const py::object & frame,const string & event,const py::object & arg)249 void PythonHooks::ProfileSlow(const py::object& frame, const string& event,
250                               const py::object& arg) {
251   int what;
252   absl::string_view event_name(event);
253 
254   if (absl::ConsumePrefix(&event_name, "c_")) {
255     if (event_name == "call") {
256       what = PyTrace_C_CALL;
257     } else if (event_name == "return") {
258       what = PyTrace_C_RETURN;
259     } else if (event_name == "exception") {
260       what = PyTrace_C_EXCEPTION;
261     } else {
262       return;
263     }
264   } else {
265     if (event_name == "call") {
266       what = PyTrace_CALL;
267     } else if (event_name == "return") {
268       what = PyTrace_RETURN;
269     } else if (event_name == "exception") {
270       what = PyTrace_EXCEPTION;
271     } else {
272       return;
273     }
274   }
275 
276   ProfileFast(reinterpret_cast<PyFrameObject*>(frame.ptr()), what, arg.ptr());
277 }
278 
ProfileFast(PyFrameObject * frame,int what,PyObject * arg)279 void PythonHookContext::ProfileFast(PyFrameObject* frame, int what,
280                                     PyObject* arg) {
281   const int64_t thread_id = Env::Default()->GetCurrentThreadId();
282   uint64 now = GetCurrentTimeNanos();
283   auto& thread_traces = entries_[thread_id];
284 
285   switch (what) {
286     case PyTrace_CALL: {
287 #if PY_VERSION_HEX < 0x030b0000
288       PyCodeObject* f_code = frame->f_code;
289       thread_traces.active.emplace(now, 0, f_code);
290 #else   // PY_VERSION_HEX < 0x030b0000
291       PyCodeObject* f_code = PyFrame_GetCode(frame);
292       thread_traces.active.emplace(now, 0, f_code);
293       Py_XDECREF(f_code);
294 #endif  // PY_VERSION_HEX < 0x030b0000
295       break;
296     }
297     case PyTrace_RETURN:
298     case PyTrace_EXCEPTION: {
299       if (!thread_traces.active.empty()) {
300         auto& entry = thread_traces.active.top();
301         entry.end_time_ns = now;
302         thread_traces.completed.emplace_back(std::move(entry));
303         thread_traces.active.pop();
304       } else if (options_.include_incomplete_events) {
305 #if PY_VERSION_HEX < 0x030b0000
306         PyCodeObject* f_code = frame->f_code;
307         thread_traces.completed.emplace_back(start_timestamp_ns_, now, f_code);
308 #else   // PY_VERSION_HEX < 0x030b0000
309         PyCodeObject* f_code = PyFrame_GetCode(frame);
310         thread_traces.completed.emplace_back(start_timestamp_ns_, now, f_code);
311         Py_XDECREF(f_code);
312 #endif  // PY_VERSION_HEX < 0x030b0000
313       }
314       break;
315     }
316     case PyTrace_C_CALL: {
317       if (PyCFunction_Check(arg)) {
318         // Python stack does not have a filename/line_no for native calls.
319         auto* func = reinterpret_cast<PyCFunctionObject*>(arg);
320         entries_[thread_id].active.emplace(now, 0, func);
321       }
322       break;
323     }
324     case PyTrace_C_RETURN:
325     case PyTrace_C_EXCEPTION: {
326       if (PyCFunction_Check(arg)) {
327         if (!thread_traces.active.empty()) {
328           auto& entry = thread_traces.active.top();
329           entry.end_time_ns = now;
330           thread_traces.completed.emplace_back(std::move(entry));
331           thread_traces.active.pop();
332         } else if (options_.include_incomplete_events) {
333           // Only the end of the events is recorded, use profiler start as
334           // start timestamp of the new event.
335           auto* func = reinterpret_cast<PyCFunctionObject*>(arg);
336           entries_[thread_id].completed.emplace_back(start_timestamp_ns_, now,
337                                                      func);
338         }
339       }
340       break;
341     }
342     default:
343       break;
344   }
345 }
346 
SetProfilerInAllThreads()347 /*static*/ void PythonHookContext::SetProfilerInAllThreads() {
348   // We also want any new threads started to use our profiler.
349   // NOTE: threading does not provide a C API equivalent to
350   // `threading.setprofile` so we are forced to go via Python to setup the
351   // profile when a new thread is created. After the first callback in that
352   // thread we unregister the Python profile function and use
353   // `PyEval_SetProfile` to register a C profiler which has significantly less
354   // overhead (>2x faster).
355   PythonHooks* singleton = PythonHooks::GetSingleton();
356   py::cpp_function callback =
357       py::cpp_function([singleton](const py::object& frame, const string& event,
358                                    const py::object& arg) {
359         singleton->ProfileSlow(frame, event, arg);
360         SysSetProfileNone();
361         PyEval_SetProfile(&PythonHooks::ProfileFunction, nullptr);
362       });
363 
364   ThreadingSetProfile(callback);
365 
366   // NOTE: This must be after `threading.setprofile` otherwise we
367   // end up recording that in our trace.
368   PyThreadState* curr_thread = PyThreadState_Get();
369   ForEachThread(curr_thread, [](PyThreadState* thread) {
370     VLOG(1) << "Setting profiler in " << thread->thread_id;
371     PyEval_SetProfile(&PythonHooks::ProfileFunction, nullptr);
372   });
373   PyThreadState_Swap(curr_thread);
374 }
375 
ClearProfilerInAllThreads()376 /*static*/ void PythonHookContext::ClearProfilerInAllThreads() {
377   PyThreadState* curr_thread = PyThreadState_Get();
378   ForEachThread(curr_thread, [](PyThreadState* thread) {
379     VLOG(1) << "Clearing profiler in " << thread->thread_id;
380     PyEval_SetProfile(nullptr, nullptr);
381   });
382   PyThreadState_Swap(curr_thread);
383 
384   // And notify the threading library that we're done.
385   ThreadingSetProfile(py::none());
386 }
387 
EnableTraceMe(bool enable)388 /*static*/ void PythonHookContext::EnableTraceMe(bool enable) {
389   const char* kModuleName =
390       "tensorflow.python.profiler.trace";
391   try {
392     auto trace_module = py::module::import(kModuleName);
393     trace_module.attr("enabled") = py::bool_(enable);
394   } catch (const py::error_already_set& e) {
395     LOG(ERROR) << "Can't import " << kModuleName;
396   }
397 }
398 
399 }  // namespace profiler
400 }  // namespace tensorflow
401