1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/python/profiler/internal/python_hooks.h"
16
17 #include <atomic>
18
19 #include "absl/strings/string_view.h"
20 #include "absl/strings/strip.h"
21 #include "tensorflow/core/platform/env.h"
22 #include "tensorflow/core/platform/path.h"
23 #include "tensorflow/core/profiler/utils/time_utils.h"
24 #include "tensorflow/core/profiler/utils/xplane_builder.h"
25 #include "tensorflow/core/profiler/utils/xplane_schema.h"
26 #include "tensorflow/core/profiler/utils/xplane_utils.h"
27
28 namespace tensorflow {
29 namespace profiler {
30
31 namespace py = ::pybind11;
32
33 namespace {
34
SysSetProfileNone()35 void SysSetProfileNone() {
36 py::object setprofile = py::module::import("sys").attr("setprofile");
37 setprofile(py::none());
38 }
39
ThreadingSetProfile(const py::object & callback)40 void ThreadingSetProfile(const py::object& callback) {
41 py::object setprofile = py::module::import("threading").attr("setprofile");
42 setprofile(callback);
43 }
44
GetEventName(PyObject * co_filename,PyObject * co_name,int co_firstlineno)45 std::string GetEventName(PyObject* co_filename, PyObject* co_name,
46 int co_firstlineno) {
47 string filename(py::reinterpret_borrow<py::str>(co_filename));
48 string function;
49 if (co_name == nullptr) {
50 function = "<unknown>";
51 } else {
52 function = py::reinterpret_borrow<py::str>(co_name);
53 }
54
55 return absl::StrCat("$", io::Basename(filename), ":", co_firstlineno, " ",
56 function);
57 }
58
GetEventName(PyMethodDef * method,PyObject * module)59 string GetEventName(PyMethodDef* method, PyObject* module) {
60 // Python stack does not have a filename/line_no for native calls.
61 // Use module name and function/method name instead.
62 string filename;
63 bool filename_ok;
64 #if PY_MAJOR_VERSION < 3
65 filename_ok = (module != nullptr && PyString_Check(module));
66 #else
67 filename_ok = (module != nullptr && PyUnicode_Check(module));
68 #endif
69 if (filename_ok) {
70 filename = py::reinterpret_borrow<py::str>(module);
71 } else {
72 filename = "<unknown>";
73 }
74
75 return absl::StrCat("$", filename, " ", method->ml_name);
76 }
77
AddEventToXLine(const PythonTraceEntry & event,XLineBuilder * line,XPlaneBuilder * plane)78 void AddEventToXLine(const PythonTraceEntry& event, XLineBuilder* line,
79 XPlaneBuilder* plane) {
80 // TODO(jiesun): maybe add full filename as event stats.
81 auto xevent = line->AddEvent(*plane->GetOrCreateEventMetadata(event.Name()));
82 xevent.SetTimestampNs(event.start_time_ns);
83 xevent.SetEndTimestampNs(event.end_time_ns);
84 }
85
86 template <typename ForEachThreadFunc>
ForEachThread(PyThreadState * curr_thread,ForEachThreadFunc && callback)87 void ForEachThread(PyThreadState* curr_thread, ForEachThreadFunc&& callback) {
88 // Note: PyThreadState's interp is not accessible in open source due to
89 // Py_LIMITED_API definition nuances. We can not iterate all threads through
90 // that PyInterpreterState.
91 #ifndef NDEBUG
92 // If debug version of python runtime is used, (e.g. monolithic binaries in
93 // g3). PyGILState_Check will fail because current thread's PyThreadState
94 // is not the one that holding GIL (after PyThreadState_Swap). This extra
95 // check in PyEval_SetProfile is not useful, but will sporadic crash if user
96 // use debug version of python runtime. In this case, we fallback to only
97 // set up profile hooks in current threads.
98 // In OSS, the python runtime and tensorflow profiler are built separately.
99 // So this workaround doesn't apply.
100 callback(curr_thread);
101 #else
102 for (PyThreadState* p = curr_thread; p != nullptr; p = p->next) {
103 PyThreadState_Swap(p);
104 callback(p);
105 }
106 for (PyThreadState* p = curr_thread->prev; p != nullptr; p = p->prev) {
107 PyThreadState_Swap(p);
108 callback(p);
109 }
110 #endif
111 }
112
113 } // namespace
114
115 /*static*/ PythonHookContext* PythonHooks::e2e_context_ = nullptr;
116
Name() const117 std::string PythonTraceEntry::Name() const {
118 std::string event_name;
119 if (co_filename) {
120 return GetEventName(co_filename, co_name, co_firstlineno);
121 } else {
122 return GetEventName(method_def, m_module);
123 }
124 return "<unknown>";
125 }
126
GetSingleton()127 PythonHooks* PythonHooks::GetSingleton() {
128 static PythonHooks* singleton = new PythonHooks;
129 return singleton;
130 }
131
Start(const PythonHooksOptions & options)132 void PythonHookContext::Start(const PythonHooksOptions& options) {
133 if (!Py_IsInitialized()) return;
134
135 #if PY_MAJOR_VERSION < 3 || (PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION < 7)
136 // Before Python 3.7, the GIL is created on demand by PyEval_InitThreads().
137 // When a thread was not started by Python (e.g., when starting profiling via
138 // RPC) there might be no GIL. Before Python 3.6, PyGILState_Ensure would
139 // crash. The crash was fixed in Python 3.6 but the fix introduced a race for
140 // GIL creation. Calling PyEval_InitThreads() prevents the race. This is a
141 // no-op when called for a second time so it is innocuous. See
142 // https://vstinner.github.io/python37-gil-change.html for details.
143 PyEval_InitThreads();
144 #endif
145
146 options_ = options;
147 start_timestamp_ns_ = GetCurrentTimeNanos();
148 if (options_.enable_python_traceme || options_.enable_trace_python_function) {
149 PyGILState_STATE gil_state = PyGILState_Ensure();
150 if (options_.enable_python_traceme) {
151 EnableTraceMe(true);
152 }
153 if (options_.enable_trace_python_function) {
154 SetProfilerInAllThreads();
155 }
156 if (options_.end_to_end_mode) {
157 // When end to end mode is used, Stop() and Finalize() i.e. symbolization
158 // and data collection happens during C's atexit(), when Py_FinalizeEx()
159 // already called.
160 try {
161 auto atexit = py::module::import("atexit");
162 atexit.attr("register")(py::cpp_function([]() {
163 PythonHooks* singleton = PythonHooks::GetSingleton();
164 auto e2e_context = singleton->Stop();
165 // Serialize into internal storage before the tracked PyCodeObjects
166 // went out of scope.
167 if (e2e_context) {
168 e2e_context->CollectData(nullptr);
169 PythonHooks::set_e2e_context(e2e_context.release());
170 }
171 }));
172 } catch (const py::error_already_set& e) {
173 LOG(ERROR) << "Can't install atexit handler for e2e mode." << e.what();
174 }
175 }
176 PyGILState_Release(gil_state);
177 }
178 }
179
Stop()180 void PythonHookContext::Stop() {
181 if (!Py_IsInitialized()) return;
182 if (options_.enable_python_traceme || options_.enable_trace_python_function) {
183 PyGILState_STATE gil_state = PyGILState_Ensure();
184 if (options_.enable_trace_python_function) {
185 ClearProfilerInAllThreads();
186 }
187 if (options_.enable_python_traceme) {
188 EnableTraceMe(false);
189 }
190 PyGILState_Release(gil_state);
191 }
192 }
193
CollectData(XPlane * raw_plane)194 void PythonHookContext::CollectData(XPlane* raw_plane) {
195 if (raw_plane == nullptr) {
196 end_to_end_xplane_.emplace();
197 raw_plane = &*end_to_end_xplane_;
198 }
199 XPlaneBuilder plane(raw_plane);
200 for (auto& it : entries_) {
201 uint64 thread_id = it.first;
202 auto& thread_events = it.second;
203 VLOG(1) << "Collecting " << thread_events.completed.size() << ":"
204 << thread_events.active.size() << " events on thread " << thread_id;
205 auto line = plane.GetOrCreateLine(thread_id);
206 line.SetTimestampNs(start_timestamp_ns_);
207 for (const auto& event : thread_events.completed) {
208 AddEventToXLine(event, &line, &plane);
209 }
210 if (options_.include_incomplete_events) {
211 uint64 now = GetCurrentTimeNanos();
212 while (!thread_events.active.empty()) {
213 auto& event = thread_events.active.top();
214 event.end_time_ns = now;
215 AddEventToXLine(event, &line, &plane);
216 thread_events.active.pop();
217 }
218 }
219 }
220 PyGILState_STATE gil_state = PyGILState_Ensure();
221 entries_.clear();
222 PyGILState_Release(gil_state);
223 }
224
Finalize(XSpace * space)225 void PythonHookContext::Finalize(XSpace* space) {
226 if (space && options_.enable_trace_python_function) {
227 XPlane* plane =
228 FindOrAddMutablePlaneWithName(space, kPythonTracerPlaneName);
229 if (options_.end_to_end_mode) {
230 if (end_to_end_xplane_) {
231 end_to_end_xplane_->set_name(plane->name());
232 plane->Swap(&*end_to_end_xplane_);
233 end_to_end_xplane_.reset();
234 }
235 } else {
236 PyGILState_STATE gil_state = PyGILState_Ensure();
237 CollectData(plane);
238 PyGILState_Release(gil_state);
239 }
240 }
241 }
242
ProfileFunction(PyObject * obj,PyFrameObject * frame,int what,PyObject * arg)243 /*static*/ int PythonHooks::ProfileFunction(PyObject* obj, PyFrameObject* frame,
244 int what, PyObject* arg) {
245 GetSingleton()->ProfileFast(frame, what, arg);
246 return 0;
247 }
248
ProfileSlow(const py::object & frame,const string & event,const py::object & arg)249 void PythonHooks::ProfileSlow(const py::object& frame, const string& event,
250 const py::object& arg) {
251 int what;
252 absl::string_view event_name(event);
253
254 if (absl::ConsumePrefix(&event_name, "c_")) {
255 if (event_name == "call") {
256 what = PyTrace_C_CALL;
257 } else if (event_name == "return") {
258 what = PyTrace_C_RETURN;
259 } else if (event_name == "exception") {
260 what = PyTrace_C_EXCEPTION;
261 } else {
262 return;
263 }
264 } else {
265 if (event_name == "call") {
266 what = PyTrace_CALL;
267 } else if (event_name == "return") {
268 what = PyTrace_RETURN;
269 } else if (event_name == "exception") {
270 what = PyTrace_EXCEPTION;
271 } else {
272 return;
273 }
274 }
275
276 ProfileFast(reinterpret_cast<PyFrameObject*>(frame.ptr()), what, arg.ptr());
277 }
278
ProfileFast(PyFrameObject * frame,int what,PyObject * arg)279 void PythonHookContext::ProfileFast(PyFrameObject* frame, int what,
280 PyObject* arg) {
281 const int64_t thread_id = Env::Default()->GetCurrentThreadId();
282 uint64 now = GetCurrentTimeNanos();
283 auto& thread_traces = entries_[thread_id];
284
285 switch (what) {
286 case PyTrace_CALL: {
287 #if PY_VERSION_HEX < 0x030b0000
288 PyCodeObject* f_code = frame->f_code;
289 thread_traces.active.emplace(now, 0, f_code);
290 #else // PY_VERSION_HEX < 0x030b0000
291 PyCodeObject* f_code = PyFrame_GetCode(frame);
292 thread_traces.active.emplace(now, 0, f_code);
293 Py_XDECREF(f_code);
294 #endif // PY_VERSION_HEX < 0x030b0000
295 break;
296 }
297 case PyTrace_RETURN:
298 case PyTrace_EXCEPTION: {
299 if (!thread_traces.active.empty()) {
300 auto& entry = thread_traces.active.top();
301 entry.end_time_ns = now;
302 thread_traces.completed.emplace_back(std::move(entry));
303 thread_traces.active.pop();
304 } else if (options_.include_incomplete_events) {
305 #if PY_VERSION_HEX < 0x030b0000
306 PyCodeObject* f_code = frame->f_code;
307 thread_traces.completed.emplace_back(start_timestamp_ns_, now, f_code);
308 #else // PY_VERSION_HEX < 0x030b0000
309 PyCodeObject* f_code = PyFrame_GetCode(frame);
310 thread_traces.completed.emplace_back(start_timestamp_ns_, now, f_code);
311 Py_XDECREF(f_code);
312 #endif // PY_VERSION_HEX < 0x030b0000
313 }
314 break;
315 }
316 case PyTrace_C_CALL: {
317 if (PyCFunction_Check(arg)) {
318 // Python stack does not have a filename/line_no for native calls.
319 auto* func = reinterpret_cast<PyCFunctionObject*>(arg);
320 entries_[thread_id].active.emplace(now, 0, func);
321 }
322 break;
323 }
324 case PyTrace_C_RETURN:
325 case PyTrace_C_EXCEPTION: {
326 if (PyCFunction_Check(arg)) {
327 if (!thread_traces.active.empty()) {
328 auto& entry = thread_traces.active.top();
329 entry.end_time_ns = now;
330 thread_traces.completed.emplace_back(std::move(entry));
331 thread_traces.active.pop();
332 } else if (options_.include_incomplete_events) {
333 // Only the end of the events is recorded, use profiler start as
334 // start timestamp of the new event.
335 auto* func = reinterpret_cast<PyCFunctionObject*>(arg);
336 entries_[thread_id].completed.emplace_back(start_timestamp_ns_, now,
337 func);
338 }
339 }
340 break;
341 }
342 default:
343 break;
344 }
345 }
346
SetProfilerInAllThreads()347 /*static*/ void PythonHookContext::SetProfilerInAllThreads() {
348 // We also want any new threads started to use our profiler.
349 // NOTE: threading does not provide a C API equivalent to
350 // `threading.setprofile` so we are forced to go via Python to setup the
351 // profile when a new thread is created. After the first callback in that
352 // thread we unregister the Python profile function and use
353 // `PyEval_SetProfile` to register a C profiler which has significantly less
354 // overhead (>2x faster).
355 PythonHooks* singleton = PythonHooks::GetSingleton();
356 py::cpp_function callback =
357 py::cpp_function([singleton](const py::object& frame, const string& event,
358 const py::object& arg) {
359 singleton->ProfileSlow(frame, event, arg);
360 SysSetProfileNone();
361 PyEval_SetProfile(&PythonHooks::ProfileFunction, nullptr);
362 });
363
364 ThreadingSetProfile(callback);
365
366 // NOTE: This must be after `threading.setprofile` otherwise we
367 // end up recording that in our trace.
368 PyThreadState* curr_thread = PyThreadState_Get();
369 ForEachThread(curr_thread, [](PyThreadState* thread) {
370 VLOG(1) << "Setting profiler in " << thread->thread_id;
371 PyEval_SetProfile(&PythonHooks::ProfileFunction, nullptr);
372 });
373 PyThreadState_Swap(curr_thread);
374 }
375
ClearProfilerInAllThreads()376 /*static*/ void PythonHookContext::ClearProfilerInAllThreads() {
377 PyThreadState* curr_thread = PyThreadState_Get();
378 ForEachThread(curr_thread, [](PyThreadState* thread) {
379 VLOG(1) << "Clearing profiler in " << thread->thread_id;
380 PyEval_SetProfile(nullptr, nullptr);
381 });
382 PyThreadState_Swap(curr_thread);
383
384 // And notify the threading library that we're done.
385 ThreadingSetProfile(py::none());
386 }
387
EnableTraceMe(bool enable)388 /*static*/ void PythonHookContext::EnableTraceMe(bool enable) {
389 const char* kModuleName =
390 "tensorflow.python.profiler.trace";
391 try {
392 auto trace_module = py::module::import(kModuleName);
393 trace_module.attr("enabled") = py::bool_(enable);
394 } catch (const py::error_already_set& e) {
395 LOG(ERROR) << "Can't import " << kModuleName;
396 }
397 }
398
399 } // namespace profiler
400 } // namespace tensorflow
401