1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 // We extract stack traces in Python using the logic in tf_stack.cc, which
17 // stores a list of PyCodeObject*. Such stack trace extraction is really fast.
18 //
19 // We store the retrieved stack trace within the Node object directly. Then
20 // whenever the graph is instantiated/copies, we copy the stack trace with it.
21 // Since the graph instantiation goes through the protobuf roundtrip, we store
22 // the original stack traces mapping attached in FunctionLibraryDefinition.
23
24 #include <Python.h>
25 #include <frameobject.h>
26
27 #include <algorithm>
28 #include <vector>
29
30 #include "absl/algorithm/container.h"
31 #include "absl/container/flat_hash_map.h"
32 #include "absl/container/flat_hash_set.h"
33 #include "absl/hash/hash.h"
34 #include "absl/strings/str_format.h"
35 #include "absl/strings/str_join.h"
36 #include "absl/types/span.h"
37 #include "pybind11/pybind11.h"
38 #include "pybind11/stl.h"
39 #include "pybind11/stl_bind.h"
40 #include "tensorflow/c/c_api_internal.h"
41 #include "tensorflow/core/graph/graph.h"
42 #include "tensorflow/core/platform/path.h"
43 #include "tensorflow/python/util/stack_trace.h"
44
45 struct StackFrame; // Forward declaration.
46 struct StackTrace;
47
48 PYBIND11_MAKE_OPAQUE(std::vector<StackFrame>);
49 PYBIND11_MAKE_OPAQUE(StackTrace);
50
51 namespace tensorflow {
52
53 namespace {
54
55 namespace py = pybind11;
56
57 using SourceLoc = std::tuple<std::string, int>;
58
59 using SourceMap = absl::flat_hash_map<SourceLoc, StackFrame>;
60
61 using StringSet = absl::flat_hash_set<std::string>;
62
63 // Python wrapper for a SourceMap.
64 class PyBindSourceMap {
65 public:
PyBindSourceMap()66 PyBindSourceMap() : source_map_(std::make_shared<SourceMap>()) {}
67
68 // Shares ownership with whoever captures traces in the scope of this map.
69 std::shared_ptr<SourceMap> source_map_;
70 };
71
72 // Python wrapper for a FileSet.
73 class PyBindFileSet {
74 public:
PyBindFileSet()75 PyBindFileSet() : file_set_(std::make_shared<StringSet>()) {}
76
77 // Shares ownership with whoever captures traces in the scope of this set.
78 std::shared_ptr<StringSet> file_set_;
79 };
80
81 // Returns contents of the line corresponding to the given frame.
82 //
83 // Precondition: must be holding Python GIL.
LineContents(const StackFrame & frame)84 py::str LineContents(const StackFrame& frame) {
85 DCheckPyGilStateForStackTrace();
86 static const auto* linecache =
87 new py::module(py::module::import("linecache"));
88 const auto& checkcache = linecache->attr("checkcache");
89 const auto& getline = linecache->attr("getline");
90 checkcache(py::str(frame.file_name));
91 return py::cast<py::str>(
92 getline(py::str(frame.file_name), py::int_(frame.line_number))
93 .attr("strip")());
94 }
95
96 // Ignores the frames containing this substring for common prefix calculation.
97 static const char* kFilenameToIgnorePrefix = "<embedded";
98
99 // Converts the given stack frame to string, according to options defined in
100 // `opts`.
StackFrameToString(const StackFrame & frame,const AbstractStackTrace::TracePrintingOptions & opts,int shared_prefix_size=0)101 std::string StackFrameToString(
102 const StackFrame& frame,
103 const AbstractStackTrace::TracePrintingOptions& opts,
104 int shared_prefix_size = 0) {
105 std::string out = absl::StrFormat(
106 "File \"%s\", line %d, in %s",
107 absl::StrContains(frame.file_name, kFilenameToIgnorePrefix)
108 ? frame.file_name
109 : frame.file_name.substr(shared_prefix_size),
110 frame.line_number, frame.function_name);
111
112 if (opts.show_line_contents) {
113 PyGILState_STATE state = PyGILState_Ensure();
114 std::string line_contents = std::string(LineContents(frame));
115 PyGILState_Release(state);
116 if (!line_contents.empty()) {
117 absl::StrAppend(&out, "\n ", line_contents);
118 }
119 }
120 return out;
121 }
122
123 class StackTraceWrapper : public AbstractStackTrace {
124 public:
StackTraceWrapper(StackTrace && captured,const std::shared_ptr<SourceMap> & source_map,const std::shared_ptr<StringSet> & filter)125 StackTraceWrapper(StackTrace&& captured,
126 const std::shared_ptr<SourceMap>& source_map,
127 const std::shared_ptr<StringSet>& filter)
128 : captured_(std::move(captured)),
129 source_map_(source_map),
130 filter_(filter) {}
131
StackTraceWrapper(absl::Span<StackFrame const> stack_frames)132 explicit StackTraceWrapper(absl::Span<StackFrame const> stack_frames)
133 : stack_frames_cache_(std::vector<StackFrame>(stack_frames.begin(),
134 stack_frames.end())) {}
135
ExtractStack(const std::shared_ptr<SourceMap> & source_map,const std::shared_ptr<StringSet> & filter)136 static StackTraceWrapper ExtractStack(
137 const std::shared_ptr<SourceMap>& source_map,
138 const std::shared_ptr<StringSet>& filter) {
139 return StackTraceWrapper{StackTrace::Capture(-1), source_map, filter};
140 }
141
ToFrames() const142 absl::Span<StackFrame const> ToFrames() const override {
143 if (stack_frames_cache_) {
144 return *stack_frames_cache_;
145 }
146
147 // Grabbing the GIL solves two purposes: 1) makes the class thread-safe,
148 // and 2) ToStackFrames and LineContents actually need it.
149 PyGILState_STATE state = PyGILState_Ensure();
150
151 stack_frames_cache_ = captured_.ToStackFrames(
152 [&](std::pair<const char*, int> p) { return StackTraceMapping(p); },
153 [&](const char* f) { return StackTraceFiltering(f); });
154 stack_frames_cache_->pop_back(); // Drop last stack frame.
155 PyGILState_Release(state);
156 return *stack_frames_cache_;
157 }
158
LastUserFrame() const159 StackFrame LastUserFrame() const override {
160 if (last_stack_frame_cache_) {
161 return *last_stack_frame_cache_;
162 }
163
164 PyGILState_STATE state = PyGILState_Ensure();
165 std::vector<StackFrame> last_frame = captured_.ToStackFrames(
166 [&](std::pair<const char*, int> p) { return StackTraceMapping(p); },
167 [&](const char* file_name) {
168 return StackTraceFiltering(file_name) ||
169 IsInternalFrameForFilename(file_name);
170 },
171 /*reverse_traversal=*/true,
172 /*limit=*/1);
173
174 if (last_frame.empty()) {
175 last_stack_frame_cache_ = StackFrame{"", -1, ""};
176 } else {
177 DCHECK_EQ(last_frame.size(), 1);
178 last_stack_frame_cache_ = last_frame[0];
179 }
180 PyGILState_Release(state);
181 return *last_stack_frame_cache_;
182 }
183
ToString(const TracePrintingOptions & opts) const184 std::string ToString(const TracePrintingOptions& opts) const override {
185 std::vector<std::string> files_to_find_prefix;
186 for (const StackFrame& frame : ToFrames()) {
187 if (!absl::StrContains(frame.file_name, kFilenameToIgnorePrefix)) {
188 files_to_find_prefix.push_back(frame.file_name);
189 }
190 }
191 int shared_prefix_size =
192 opts.filter_common_prefix
193 ? io::CommonPathPrefix(files_to_find_prefix).size()
194 : 0;
195
196 if (!opts.drop_internal_frames) {
197 return ToStringHelper(*stack_frames_cache_, opts, shared_prefix_size);
198 }
199
200 std::vector<StackFrame> filtered_frames;
201 for (const StackFrame& frame : *stack_frames_cache_) {
202 if (!IsInternalFrameForFilename(frame.file_name)) {
203 filtered_frames.push_back(frame);
204 }
205 }
206 return ToStringHelper(filtered_frames, opts, shared_prefix_size);
207 }
208
209 StackTraceWrapper(StackTraceWrapper&&) = default;
~StackTraceWrapper()210 ~StackTraceWrapper() override {
211 PyGILState_STATE state = PyGILState_Ensure();
212 captured_.Clear();
213 source_map_.reset();
214 filter_.reset();
215 PyGILState_Release(state);
216 }
217
218 private:
ToStringHelper(absl::Span<StackFrame const> stack_frames,const TracePrintingOptions & opts,int shared_prefix_size)219 static std::string ToStringHelper(absl::Span<StackFrame const> stack_frames,
220 const TracePrintingOptions& opts,
221 int shared_prefix_size) {
222 return absl::StrJoin(
223 stack_frames, "\n", [&](std::string* out, const StackFrame& frame) {
224 absl::StrAppend(out,
225 StackFrameToString(frame, opts, shared_prefix_size));
226 });
227 }
228
StackTraceMapping(SourceLoc loc) const229 absl::optional<StackFrame> StackTraceMapping(SourceLoc loc) const {
230 if (source_map_->contains(loc)) {
231 return source_map_->at(loc);
232 }
233
234 return absl::nullopt;
235 }
236
StackTraceFiltering(const char * file_name) const237 bool StackTraceFiltering(const char* file_name) const {
238 return filter_->contains(file_name);
239 }
240
241 StackTrace captured_;
242 std::shared_ptr<SourceMap> source_map_;
243 std::shared_ptr<StringSet> filter_;
244
245 // Using optional to force destruction while we hold a GIL.
246 mutable absl::optional<std::vector<StackFrame>> stack_frames_cache_;
247 mutable absl::optional<StackFrame> last_stack_frame_cache_;
248 };
249
250 } // namespace
251
PYBIND11_MODULE(_tf_stack,m)252 PYBIND11_MODULE(_tf_stack, m) {
253 py::class_<PyBindSourceMap>(m, "PyBindSourceMap")
254 .def(py::init())
255 .def("update_to",
256 [](const PyBindSourceMap& self, const py::tuple& source_map) {
257 self.source_map_->clear();
258 for (const auto& item : source_map) {
259 const auto& tuple_item = py::cast<py::tuple>(item);
260
261 const auto& key = py::cast<py::tuple>(tuple_item[0]);
262 std::string&& k_filename = py::cast<std::string>(key[0]);
263 int k_lineno = py::cast<int>(key[1]);
264
265 const auto& value = py::cast<py::tuple>(tuple_item[1]);
266 std::string&& v_filename = py::cast<std::string>(value[0]);
267 int v_lineno = py::cast<int>(value[1]);
268 const auto& function_name_val = value[2];
269 std::string&& v_function_name =
270 function_name_val.is_none()
271 ? ""
272 : py::cast<std::string>(function_name_val);
273
274 self.source_map_->emplace(
275 SourceLoc(k_filename, k_lineno),
276 StackFrame({v_filename, v_lineno, v_function_name}));
277 }
278 });
279
280 py::class_<PyBindFileSet>(m, "PyBindFileSet")
281 .def(py::init())
282 .def("update_to", [](const PyBindFileSet& self, const py::set& file_set) {
283 self.file_set_->clear();
284 for (const auto& item : file_set) {
285 self.file_set_->insert(py::cast<std::string>(item));
286 }
287 });
288
289 py::class_<StackFrame>(m, "StackFrame")
290 .def_property_readonly(
291 "filename",
292 [](const StackFrame& self) { return py::str(self.file_name); })
293 .def_property_readonly(
294 "lineno",
295 [](const StackFrame& self) { return py::int_(self.line_number); })
296 .def_property_readonly(
297 "name",
298 [](const StackFrame& self) { return py::str(self.function_name); })
299 .def_property_readonly(
300 "line", [](const StackFrame& self) { return LineContents(self); })
301
302 // For compatibility with the traceback module.
303 .def("__eq__", &StackFrame::operator==)
304 .def("__ne__", &StackFrame::operator!=)
305 .def("__hash__",
306 [](const StackFrame& self) {
307 return absl::Hash<std::tuple<std::string, int, std::string>>()(
308 std::make_tuple(self.file_name, self.line_number,
309 self.function_name));
310 })
311 .def("__getitem__",
312 [](const StackFrame& self, const py::object& index) -> py::object {
313 return py::make_tuple(
314 py::str(self.file_name), py::int_(self.line_number),
315 py::str(self.function_name), LineContents(self))[index];
316 })
317 .def("__iter__",
318 [](const StackFrame& self) {
319 return py::iter(py::make_tuple(
320 py::str(self.file_name), py::int_(self.line_number),
321 py::str(self.function_name), LineContents(self))
322
323 );
324 })
325 .def("__repr__",
326 [](const StackFrame& self) { return StackFrameToString(self, {}); })
327 .def("__len__", [](const StackFrame&) { return 4; });
328
329 py::class_<StackTraceWrapper>(m, "StackTraceWrapper", py::module_local(true))
330 // TODO(slebedev): upstream negative indexing support into pybind11.
331 .def(
332 "__getitem__",
333 [](const StackTraceWrapper& self, ssize_t index) {
334 absl::Span<StackFrame const> frames = self.ToFrames();
335 const size_t eff_index =
336 index < 0 ? frames.size() + index : static_cast<size_t>(index);
337 if (eff_index >= frames.size()) {
338 throw py::index_error();
339 }
340 return frames[eff_index];
341 },
342 py::return_value_policy::reference_internal)
343 .def(
344 "__getitem__",
345 [](const StackTraceWrapper& self, py::slice slice) {
346 absl::Span<StackFrame const> frames = self.ToFrames();
347 py::ssize_t start, stop, step, slicelength;
348 if (!slice.compute(frames.size(), &start, &stop, &step,
349 &slicelength)) {
350 throw py::error_already_set();
351 }
352 if (step == 1) {
353 return StackTraceWrapper{frames.subspan(start, slicelength)};
354 }
355 // TODO(cheshire): Cleanup, use Python slicing logic directly
356 // instead.
357 std::vector<StackFrame> out;
358 out.reserve(slicelength);
359 // Python slices allow negative indexing.
360 for (int i = start; i != stop; i += step) {
361 out.push_back(frames[i]);
362 }
363 return StackTraceWrapper{out};
364 },
365 py::return_value_policy::reference_internal)
366 .def("__len__",
367 [](const StackTraceWrapper& self) { return self.ToFrames().size(); })
368 .def("__eq__",
369 [](const StackTraceWrapper& self, const StackTraceWrapper& other) {
370 return self.ToFrames() == other.ToFrames();
371 })
372 .def("__hash__",
373 [](const StackTraceWrapper& self) {
374 return py::hash(py::str(self.ToString({})));
375 })
376 .def("__repr__",
377 [](const StackTraceWrapper& self) {
378 return py::str(self.ToString({}));
379 })
380 .def("last_user_frame",
381 [](const StackTraceWrapper& self) { return self.LastUserFrame(); });
382
383 m.def(
384 "extract_stack_for_node",
385 [](const PyBindSourceMap& source_map, const PyBindFileSet& file_set,
386 TF_Operation* op) -> const AbstractStackTrace& {
387 Node* node = reinterpret_cast<Node*>(op);
388 DCHECK(!node->GetStackTrace()) << "Should not reset the stack trace";
389 node->SetStackTrace(
390 std::make_shared<StackTraceWrapper>(StackTraceWrapper::ExtractStack(
391 source_map.source_map_, file_set.file_set_)));
392 return *node->GetStackTrace();
393 },
394 py::return_value_policy::reference);
395
396 m.def(
397 "extract_stack",
398 [](const PyBindSourceMap& source_map, const PyBindFileSet& file_set) {
399 return StackTraceWrapper::ExtractStack(source_map.source_map_,
400 file_set.file_set_);
401 },
402 py::return_value_policy::move);
403 }
404
405 } // namespace tensorflow
406