• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/python/profiler/internal/python_hooks.h"
16 
17 #include "absl/strings/string_view.h"
18 #include "absl/strings/strip.h"
19 #include "tensorflow/core/platform/env.h"
20 #include "tensorflow/core/platform/path.h"
21 #include "tensorflow/core/profiler/utils/time_utils.h"
22 #include "tensorflow/core/profiler/utils/xplane_builder.h"
23 #include "tensorflow/core/profiler/utils/xplane_schema.h"
24 #include "tensorflow/core/profiler/utils/xplane_utils.h"
25 
26 namespace tensorflow {
27 namespace profiler {
28 
29 namespace py = ::pybind11;
30 
31 namespace {
32 
33 template <typename T>
ProfileFunction(PyObject * obj,PyFrameObject * frame,int what,PyObject * arg)34 int ProfileFunction(PyObject* obj, PyFrameObject* frame, int what,
35                     PyObject* arg) {
36   T::GetSingleton()->ProfileFast(frame, what, arg);
37   return 0;
38 }
39 
SysSetProfileNone()40 void SysSetProfileNone() {
41   py::object setprofile = py::module::import("sys").attr("setprofile");
42   setprofile(py::none());
43 }
44 
ThreadingSetProfile(const py::object & callback)45 void ThreadingSetProfile(const py::object& callback) {
46   py::object setprofile = py::module::import("threading").attr("setprofile");
47   setprofile(callback);
48 }
49 
GetEventName(PyCodeObject * py_code)50 std::string GetEventName(PyCodeObject* py_code) {
51   string filename(py::reinterpret_borrow<py::str>(py_code->co_filename));
52   string function;
53   if (py_code->co_name == nullptr) {
54     function = "<unknown>";
55   } else {
56     function = py::reinterpret_borrow<py::str>(py_code->co_name);
57   }
58 
59   return absl::StrCat("$", io::Basename(filename), ":", py_code->co_firstlineno,
60                       " ", function);
61 }
62 
GetEventName(PyCFunctionObject * py_cfunc)63 string GetEventName(PyCFunctionObject* py_cfunc) {
64   PyObject* module = py_cfunc->m_module;
65   string filename;
66   bool filename_ok;
67 #if PY_MAJOR_VERSION < 3
68   filename_ok = (module != nullptr && PyString_Check(module));
69 #else
70   filename_ok = (module != nullptr && PyUnicode_Check(module));
71 #endif
72   if (filename_ok) {
73     filename = py::reinterpret_borrow<py::str>(module);
74   } else {
75     filename = "<unknown>";
76   }
77 
78   return absl::StrCat("$", filename, " ", py_cfunc->m_ml->ml_name);
79 }
80 
AddEventToXLine(const PythonTraceEntry & event,XLineBuilder * line,XPlaneBuilder * plane)81 void AddEventToXLine(const PythonTraceEntry& event, XLineBuilder* line,
82                      XPlaneBuilder* plane) {
83   // TODO(jiesun): maybe add full filename as event stats.
84   auto xevent = line->AddEvent(*plane->GetOrCreateEventMetadata(event.Name()));
85   xevent.SetTimestampNs(event.start_time_ns);
86   xevent.SetEndTimestampNs(event.end_time_ns);
87 }
88 
89 }  // namespace
90 
91 /*static*/ PythonHookContext* PythonHooks::e2e_context_ = nullptr;
92 
Name() const93 std::string PythonTraceEntry::Name() const {
94   std::string event_name;
95   if (code_object) {
96     return GetEventName(code_object);
97   } else if (function_object) {
98     return GetEventName(function_object);
99   }
100   return "<unknown>";
101 }
102 
GetSingleton()103 PythonHooks* PythonHooks::GetSingleton() {
104   static PythonHooks* singleton = new PythonHooks;
105   return singleton;
106 }
107 
Start(const PythonHooksOptions & options)108 void PythonHookContext::Start(const PythonHooksOptions& options) {
109   if (!Py_IsInitialized()) return;
110 
111 #if PY_MAJOR_VERSION < 3 || (PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION < 7)
112   // Before Python 3.7, the GIL is created on demand by PyEval_InitThreads().
113   // When a thread was not started by Python (e.g., when starting profiling via
114   // RPC) there might be no GIL. Before Python 3.6, PyGILState_Ensure would
115   // crash. The crash was fixed in Python 3.6 but the fix introduced a race for
116   // GIL creation. Calling PyEval_InitThreads() prevents the race. This is a
117   // no-op when called for a second time so it is innocuous. See
118   // https://vstinner.github.io/python37-gil-change.html for details.
119   PyEval_InitThreads();
120 #endif
121 
122   options_ = options;
123   start_timestamp_ns_ = GetCurrentTimeNanos();
124   if (options_.enable_python_traceme || options_.enable_trace_python_function) {
125     PyGILState_STATE gil_state = PyGILState_Ensure();
126     if (options_.enable_trace_python_function) {
127       SetProfilerInAllThreads();
128     }
129     if (options_.enable_python_traceme) {
130       EnableTraceMe(true);
131     }
132     if (options_.end_to_end_mode) {
133       // When end to end mode is used, Stop() and Finalize() i.e. symbolization
134       // and data collection happens during C's atexit(), when Py_FinalizeEx()
135       // already called.
136       try {
137         auto atexit = py::module::import("atexit");
138         atexit.attr("register")(py::cpp_function([]() {
139           PythonHooks* singleton = PythonHooks::GetSingleton();
140           auto e2e_context = singleton->Stop();
141           // Serialize into internal storage before the tracked PyCodeObjects
142           // went out of scope.
143           if (e2e_context) {
144             e2e_context->CollectData(nullptr);
145             PythonHooks::set_e2e_context(e2e_context.release());
146           }
147         }));
148       } catch (const py::error_already_set& e) {
149         LOG(ERROR) << "Can't install atexit handler for e2e mode." << e.what();
150       }
151     }
152     PyGILState_Release(gil_state);
153   }
154 }
155 
Stop()156 void PythonHookContext::Stop() {
157   if (!Py_IsInitialized()) return;
158   if (options_.enable_python_traceme || options_.enable_trace_python_function) {
159     PyGILState_STATE gil_state = PyGILState_Ensure();
160     if (options_.enable_trace_python_function) {
161       ClearProfilerInAllThreads();
162     }
163     if (options_.enable_python_traceme) {
164       EnableTraceMe(false);
165     }
166     PyGILState_Release(gil_state);
167   }
168 }
169 
CollectData(XPlane * raw_plane)170 void PythonHookContext::CollectData(XPlane* raw_plane) {
171   if (raw_plane == nullptr) {
172     end_to_end_xplane_.emplace();
173     raw_plane = &*end_to_end_xplane_;
174   }
175   XPlaneBuilder plane(raw_plane);
176   for (auto& it : entries_) {
177     uint64 thread_id = it.first;
178     auto& thread_events = it.second;
179     VLOG(1) << "Collecting " << thread_events.completed.size() << ":"
180             << thread_events.active.size() << " events on thread " << thread_id;
181     auto line = plane.GetOrCreateLine(thread_id);
182     line.SetTimestampNs(start_timestamp_ns_);
183     for (const auto& event : thread_events.completed) {
184       AddEventToXLine(event, &line, &plane);
185     }
186     if (options_.include_incomplete_events) {
187       uint64 now = GetCurrentTimeNanos();
188       while (!thread_events.active.empty()) {
189         auto& event = thread_events.active.top();
190         event.end_time_ns = now;
191         AddEventToXLine(event, &line, &plane);
192         thread_events.active.pop();
193       }
194     }
195   }
196   entries_.clear();
197 }
198 
Finalize(XSpace * space)199 void PythonHookContext::Finalize(XSpace* space) {
200   if (space && options_.enable_trace_python_function) {
201     XPlane* plane =
202         FindOrAddMutablePlaneWithName(space, kPythonTracerPlaneName);
203     if (options_.end_to_end_mode) {
204       if (end_to_end_xplane_) {
205         end_to_end_xplane_->set_name(plane->name());
206         plane->Swap(&*end_to_end_xplane_);
207         end_to_end_xplane_.reset();
208       }
209     } else {
210       PyGILState_STATE gil_state = PyGILState_Ensure();
211       CollectData(plane);
212       PyGILState_Release(gil_state);
213     }
214   }
215 }
216 
ProfileSlow(const py::object & frame,const string & event,const py::object & arg)217 void PythonHooks::ProfileSlow(const py::object& frame, const string& event,
218                               const py::object& arg) {
219   int what;
220   absl::string_view event_name(event);
221 
222   if (absl::ConsumePrefix(&event_name, "c_")) {
223     if (event_name == "call") {
224       what = PyTrace_C_CALL;
225     } else if (event_name == "return") {
226       what = PyTrace_C_RETURN;
227     } else if (event_name == "exception") {
228       what = PyTrace_C_EXCEPTION;
229     } else {
230       return;
231     }
232   } else {
233     if (event_name == "call") {
234       what = PyTrace_CALL;
235     } else if (event_name == "return") {
236       what = PyTrace_RETURN;
237     } else if (event_name == "exception") {
238       what = PyTrace_EXCEPTION;
239     } else {
240       return;
241     }
242   }
243 
244   ProfileFast(reinterpret_cast<PyFrameObject*>(frame.ptr()), what, arg.ptr());
245 }
246 
ProfileFast(PyFrameObject * frame,int what,PyObject * arg)247 void PythonHookContext::ProfileFast(PyFrameObject* frame, int what,
248                                     PyObject* arg) {
249   const int64 thread_id = Env::Default()->GetCurrentThreadId();
250   uint64 now = GetCurrentTimeNanos();
251   auto& thread_traces = entries_[thread_id];
252 
253   switch (what) {
254     case PyTrace_CALL: {
255       PyCodeObject* f_code = frame->f_code;
256       thread_traces.active.emplace(now, 0, f_code, nullptr);
257       break;
258     }
259     case PyTrace_RETURN:
260     case PyTrace_EXCEPTION: {
261       if (!thread_traces.active.empty()) {
262         auto& entry = thread_traces.active.top();
263         entry.end_time_ns = now;
264         thread_traces.completed.emplace_back(std::move(entry));
265         thread_traces.active.pop();
266       } else if (options_.include_incomplete_events) {
267         PyCodeObject* f_code = frame->f_code;
268         thread_traces.completed.emplace_back(start_timestamp_ns_, now, f_code,
269                                              nullptr);
270       }
271       break;
272     }
273     case PyTrace_C_CALL: {
274       if (PyCFunction_Check(arg)) {
275         // Python stack does not have a filename/line_no for native calls.
276         auto* func = reinterpret_cast<PyCFunctionObject*>(arg);
277         entries_[thread_id].active.emplace(now, 0, nullptr, func);
278       }
279       break;
280     }
281     case PyTrace_C_RETURN:
282     case PyTrace_C_EXCEPTION: {
283       if (!thread_traces.active.empty()) {
284         auto& entry = thread_traces.active.top();
285         entry.end_time_ns = now;
286         thread_traces.completed.emplace_back(std::move(entry));
287         thread_traces.active.pop();
288       } else if (options_.include_incomplete_events) {
289         // Only the end of the events is recorded, use profiler start as start.
290         if (PyCFunction_Check(arg)) {
291           // Python stack does not have a filename/line_no for native calls.
292           auto* func = reinterpret_cast<PyCFunctionObject*>(arg);
293           entries_[thread_id].completed.emplace_back(start_timestamp_ns_, now,
294                                                      nullptr, func);
295         }
296       }
297       break;
298     }
299     default:
300       break;
301   }
302 }
303 
SetProfilerInAllThreads()304 void PythonHookContext::SetProfilerInAllThreads() {
305   // We also want any new threads started to use our profiler.
306   // NOTE: threading does not provide a C API equivalent to
307   // `threading.setprofile` so we are forced to go via Python to setup the
308   // profile when a new thread is created. After the first callback in that
309   // thread we unregister the Python profile function and use
310   // `PyEval_SetProfile` to register a C profiler which has significantly less
311   // overhead (>2x faster).
312   PythonHooks* singleton = PythonHooks::GetSingleton();
313   py::cpp_function callback =
314       py::cpp_function([singleton](const py::object& frame, const string& event,
315                                    const py::object& arg) {
316         singleton->ProfileSlow(frame, event, arg);
317         SysSetProfileNone();
318         PyEval_SetProfile(ProfileFunction<PythonHooks>, nullptr);
319       });
320 
321   ThreadingSetProfile(callback);
322 
323   // NOTE: This must be after `threading.setprofile` otherwise we
324   // end up recording that in our trace.
325   PyThreadState* curr_thread = PyThreadState_Get();
326   PyThreadState* next_thread = curr_thread;
327   while (next_thread != nullptr) {
328     VLOG(1) << "Setting profiler in " << next_thread->thread_id;
329     PyThreadState_Swap(next_thread);
330     PyEval_SetProfile(ProfileFunction<PythonHooks>, nullptr);
331     next_thread = next_thread->next;
332   }
333   PyThreadState_Swap(curr_thread);
334 }
335 
ClearProfilerInAllThreads()336 /*static*/ void PythonHookContext::ClearProfilerInAllThreads() {
337   PyThreadState* curr_thread = PyThreadState_Get();
338   PyThreadState* next_thread = curr_thread;
339   while (next_thread != nullptr) {
340     VLOG(1) << "Clearing profiler in " << next_thread->thread_id;
341     PyThreadState_Swap(next_thread);
342     PyEval_SetProfile(nullptr, nullptr);
343     next_thread = next_thread->next;
344   }
345   PyThreadState_Swap(curr_thread);
346 
347   // And notify the threading library that we're done.
348   ThreadingSetProfile(py::none());
349 }
350 
EnableTraceMe(bool enable)351 /*static*/ void PythonHookContext::EnableTraceMe(bool enable) {
352   const char* kModuleName =
353       "tensorflow.python.profiler.trace";
354   try {
355     auto trace_module = py::module::import(kModuleName);
356     trace_module.attr("enabled") = py::bool_(enable);
357   } catch (const py::error_already_set& e) {
358     LOG(ERROR) << "Can't import " << kModuleName;
359   }
360 }
361 
362 }  // namespace profiler
363 }  // namespace tensorflow
364