1 /*
2 * Copyright (C) 2023 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/trace_processor/sqlite/sql_source.h"
18
19 #include <sqlite3.h>
20 #include <algorithm>
21 #include <cstddef>
22 #include <cstdint>
23 #include <iterator>
24 #include <limits>
25 #include <optional>
26 #include <string>
27 #include <string_view>
28 #include <utility>
29 #include <vector>
30
31 #include "perfetto/base/logging.h"
32 #include "perfetto/ext/base/string_utils.h"
33
34 #if SQLITE_VERSION_NUMBER < 3041002
35 // There is a bug in pre-3.41.2 versions of SQLite where sqlite3_error_offset
36 // can return an offset out of bounds. Make it a hard compiler error to prevent
37 // us from hitting this bug.
38 #error "SQLite version is too old."
39 #endif
40
41 namespace perfetto::trace_processor {
42
43 namespace {
44
GetLineAndColumnForOffset(const std::string & sql,uint32_t line,uint32_t column,uint32_t offset)45 std::pair<uint32_t, uint32_t> GetLineAndColumnForOffset(const std::string& sql,
46 uint32_t line,
47 uint32_t column,
48 uint32_t offset) {
49 if (offset == 0) {
50 return std::make_pair(line, column);
51 }
52
53 const char* new_start = sql.c_str() + offset;
54 size_t prev_nl = sql.rfind('\n', offset - 1);
55 int64_t nl_count = std::count(sql.c_str(), new_start, '\n');
56 PERFETTO_DCHECK((nl_count == 0) == (prev_nl == std::string_view::npos));
57
58 if (prev_nl == std::string::npos) {
59 return std::make_pair(line + static_cast<uint32_t>(nl_count),
60 column + static_cast<uint32_t>(offset));
61 }
62
63 int64_t new_column = std::distance(sql.c_str() + prev_nl, new_start);
64 return std::make_pair(line + static_cast<uint32_t>(nl_count),
65 static_cast<uint32_t>(new_column));
66 }
67
SqlContextAndCaretPos(const std::string & sql,uint32_t offset)68 std::pair<std::string, size_t> SqlContextAndCaretPos(const std::string& sql,
69 uint32_t offset) {
70 PERFETTO_DCHECK(offset <= sql.size());
71
72 // Go back 128 characters, until the start of the string or the start of the
73 // line (which we encounter first).
74 size_t start_idx = offset - std::min<size_t>(128ul, offset);
75 if (offset > 0) {
76 size_t prev_nl = sql.rfind('\n', offset - 1);
77 if (prev_nl != std::string::npos) {
78 start_idx = std::max(prev_nl + 1, start_idx);
79 }
80 }
81
82 // Go forward 128 characters, to the end of the string or the end of the
83 // line (which we encounter first).
84 size_t end_idx = std::min<size_t>(offset + 128ul, sql.size());
85 size_t next_nl = sql.find('\n', offset);
86 if (next_nl != std::string::npos) {
87 end_idx = std::min(next_nl, end_idx);
88 }
89 return std::make_pair(sql.substr(start_idx, end_idx - start_idx),
90 offset - start_idx);
91 }
92
93 } // namespace
94
95 SqlSource::SqlSource() = default;
SqlSource(Node node)96 SqlSource::SqlSource(Node node) : root_(std::move(node)) {}
97
SqlSource(std::string sql,std::string name,bool include_traceback_header)98 SqlSource::SqlSource(std::string sql,
99 std::string name,
100 bool include_traceback_header) {
101 root_.name = std::move(name);
102 root_.original_sql = sql;
103 root_.rewritten_sql = std::move(sql);
104 root_.include_traceback_header = include_traceback_header;
105 }
106
FromExecuteQuery(std::string sql)107 SqlSource SqlSource::FromExecuteQuery(std::string sql) {
108 return {std::move(sql), "File \"stdin\"", true};
109 }
110
FromMetric(std::string sql,const std::string & name)111 SqlSource SqlSource::FromMetric(std::string sql, const std::string& name) {
112 return {std::move(sql), "Metric \"" + name + "\"", true};
113 }
114
FromMetricFile(std::string sql,const std::string & name)115 SqlSource SqlSource::FromMetricFile(std::string sql, const std::string& name) {
116 return {std::move(sql), "Metric file \"" + name + "\"", false};
117 }
118
FromModuleInclude(std::string sql,const std::string & module)119 SqlSource SqlSource::FromModuleInclude(std::string sql,
120 const std::string& module) {
121 return {std::move(sql), "Module include \"" + module + "\"", false};
122 }
123
FromTraceProcessorImplementation(std::string sql)124 SqlSource SqlSource::FromTraceProcessorImplementation(std::string sql) {
125 return {std::move(sql), "Trace Processor Internal", false};
126 }
127
AsTraceback(uint32_t offset) const128 std::string SqlSource::AsTraceback(uint32_t offset) const {
129 return root_.AsTraceback(offset);
130 }
131
AsTracebackForSqliteOffset(std::optional<uint32_t> opt_offset) const132 std::string SqlSource::AsTracebackForSqliteOffset(
133 std::optional<uint32_t> opt_offset) const {
134 uint32_t offset = opt_offset.value_or(0);
135 PERFETTO_CHECK(offset <= sql().size());
136 return AsTraceback(offset);
137 }
138
Substr(uint32_t offset,uint32_t len) const139 SqlSource SqlSource::Substr(uint32_t offset, uint32_t len) const {
140 SqlSource source;
141 source.root_ = root_.Substr(offset, len);
142 return source;
143 }
144
RewriteAllIgnoreExisting(SqlSource source) const145 SqlSource SqlSource::RewriteAllIgnoreExisting(SqlSource source) const {
146 // Reset any rewrites.
147 SqlSource copy = *this;
148 copy.root_.rewritten_sql = copy.root_.original_sql;
149 copy.root_.rewrites.clear();
150
151 SqlSource::Rewriter rewriter(std::move(copy));
152 rewriter.Rewrite(0, static_cast<uint32_t>(root_.original_sql.size()),
153 std::move(source));
154 return std::move(rewriter).Build();
155 }
156
ApplyRewrites(const std::string & original_sql,const std::vector<Rewrite> & rewrites)157 std::string SqlSource::ApplyRewrites(const std::string& original_sql,
158 const std::vector<Rewrite>& rewrites) {
159 std::string sql;
160 uint32_t prev_idx = 0;
161 for (const auto& rewrite : rewrites) {
162 PERFETTO_CHECK(prev_idx <= rewrite.original_sql_start);
163 sql.append(
164 original_sql.substr(prev_idx, rewrite.original_sql_start - prev_idx));
165 sql.append(rewrite.rewrite_node.rewritten_sql);
166 prev_idx = rewrite.original_sql_end;
167 }
168 sql.append(original_sql.substr(prev_idx, original_sql.size() - prev_idx));
169 return sql;
170 }
171
AsTraceback(uint32_t rewritten_offset) const172 std::string SqlSource::Node::AsTraceback(uint32_t rewritten_offset) const {
173 PERFETTO_CHECK(rewritten_offset <= rewritten_sql.size());
174 uint32_t original_offset = RewrittenOffsetToOriginalOffset(rewritten_offset);
175 std::string res = SelfTraceback(rewritten_offset, original_offset);
176 if (auto opt_idx = RewriteForOriginalOffset(original_offset); opt_idx) {
177 const Rewrite& rewrite = rewrites[*opt_idx];
178 PERFETTO_CHECK(rewritten_offset >= rewrite.rewritten_sql_start);
179 PERFETTO_CHECK(rewritten_offset < rewrite.rewritten_sql_end);
180 res.append(rewrite.rewrite_node.AsTraceback(rewritten_offset -
181 rewrite.rewritten_sql_start));
182 }
183 return res;
184 }
185
SelfTraceback(uint32_t rewritten_offset,uint32_t original_offset) const186 std::string SqlSource::Node::SelfTraceback(uint32_t rewritten_offset,
187 uint32_t original_offset) const {
188 PERFETTO_DCHECK(original_offset <= original_sql.size());
189 auto [o_context, o_caret_pos] =
190 SqlContextAndCaretPos(original_sql, original_offset);
191 std::string header;
192 if (include_traceback_header) {
193 if (!rewrites.empty()) {
194 auto [r_context, r_caret_pos] =
195 SqlContextAndCaretPos(rewritten_sql, rewritten_offset);
196 std::string caret = std::string(r_caret_pos, ' ') + "^";
197 base::StackString<1024> str("Fully expanded statement\n %s\n %s\n",
198 r_context.c_str(), caret.c_str());
199 header.append(str.c_str());
200 }
201 header += "Traceback (most recent call last):\n";
202 }
203
204 auto line_and_col =
205 GetLineAndColumnForOffset(original_sql, line, col, original_offset);
206 std::string caret = std::string(o_caret_pos, ' ') + "^";
207 base::StackString<1024> str("%s %s line %u col %u\n %s\n %s\n",
208 header.c_str(), name.c_str(), line_and_col.first,
209 line_and_col.second, o_context.c_str(),
210 caret.c_str());
211 return str.ToStdString();
212 }
213
Substr(uint32_t offset,uint32_t len) const214 SqlSource::Node SqlSource::Node::Substr(uint32_t offset, uint32_t len) const {
215 uint32_t offset_end = offset + len;
216 PERFETTO_CHECK(offset_end <= rewritten_sql.size());
217
218 uint32_t original_offset_start = RewrittenOffsetToOriginalOffset(offset);
219 uint32_t original_offset_end = RewrittenOffsetToOriginalOffset(offset_end);
220 std::vector<Rewrite> new_rewrites;
221 for (const Rewrite& rewrite : rewrites) {
222 if (offset >= rewrite.rewritten_sql_end) {
223 continue;
224 }
225 if (offset_end < rewrite.rewritten_sql_start) {
226 break;
227 }
228 // Special case: when the end of the substr is in the middle of a rewrite,
229 // we actually want to capture the original SQL up to the end of the
230 // rewrite, not just to the start as |ChildRewrittenOffset| returns.
231 if (offset_end < rewrite.rewritten_sql_end) {
232 original_offset_end = rewrite.original_sql_end;
233 }
234 uint32_t bounded_start = std::max(offset, rewrite.rewritten_sql_start);
235 uint32_t bounded_end = std::min(offset_end, rewrite.rewritten_sql_end);
236
237 uint32_t nested_start = bounded_start - rewrite.rewritten_sql_start;
238 uint32_t nested_len = bounded_end - bounded_start;
239
240 new_rewrites.push_back(Rewrite{
241 rewrite.original_sql_start - original_offset_start,
242 rewrite.original_sql_end - original_offset_start,
243 bounded_start - offset,
244 bounded_end - offset,
245 rewrite.rewrite_node.Substr(nested_start, nested_len),
246 });
247 }
248 std::string new_original = original_sql.substr(
249 original_offset_start, original_offset_end - original_offset_start);
250 std::string new_rewritten = rewritten_sql.substr(offset, len);
251 PERFETTO_DCHECK(ApplyRewrites(new_original, new_rewrites) == new_rewritten);
252
253 auto line_and_col = GetLineAndColumnForOffset(rewritten_sql, line, col,
254 original_offset_start);
255 return Node{
256 name,
257 include_traceback_header,
258 line_and_col.first,
259 line_and_col.second,
260 new_original,
261 std::move(new_rewrites),
262 new_rewritten,
263 };
264 }
265
RewrittenOffsetToOriginalOffset(uint32_t rewritten_offset) const266 uint32_t SqlSource::Node::RewrittenOffsetToOriginalOffset(
267 uint32_t rewritten_offset) const {
268 uint32_t remaining = rewritten_offset;
269 for (const Rewrite& rewrite : rewrites) {
270 if (rewritten_offset >= rewrite.rewritten_sql_end) {
271 remaining -= rewrite.rewritten_sql_end - rewrite.rewritten_sql_start;
272 remaining += rewrite.original_sql_end - rewrite.original_sql_start;
273 continue;
274 }
275 if (rewritten_offset < rewrite.rewritten_sql_start) {
276 break;
277 }
278 // IMPORTANT: if the rewritten offset is anywhere inside a rewrite, we just
279 // map the original offset to point to the start of the rewrite. This is
280 // the only sane way we can handle arbitrary transformations of the
281 // original sql.
282 return rewrite.original_sql_start;
283 }
284 return remaining;
285 }
286
RewriteForOriginalOffset(uint32_t original_offset) const287 std::optional<uint32_t> SqlSource::Node::RewriteForOriginalOffset(
288 uint32_t original_offset) const {
289 for (uint32_t i = 0; i < rewrites.size(); ++i) {
290 if (original_offset >= rewrites[i].original_sql_start &&
291 original_offset < rewrites[i].original_sql_end) {
292 return i;
293 }
294 }
295 return std::nullopt;
296 }
297
Rewriter(SqlSource source)298 SqlSource::Rewriter::Rewriter(SqlSource source)
299 : Rewriter(std::move(source.root_)) {}
Rewriter(Node source)300 SqlSource::Rewriter::Rewriter(Node source) : orig_(std::move(source)) {
301 // Note: it's important that we *don't* move out of |orig_| here as we want to
302 // be able to access the untouched offsets through
303 // calls to |RewrittenOffsetToOriginalOffset| etc.
304 for (const SqlSource::Rewrite& rewrite : orig_.rewrites) {
305 nested_.push_back(SqlSource::Rewriter(rewrite.rewrite_node));
306 }
307 }
308
Rewrite(uint32_t rewritten_start,uint32_t rewritten_end,SqlSource source)309 void SqlSource::Rewriter::Rewrite(uint32_t rewritten_start,
310 uint32_t rewritten_end,
311 SqlSource source) {
312 PERFETTO_CHECK(rewritten_start <= rewritten_end);
313 PERFETTO_CHECK(rewritten_end <= orig_.rewritten_sql.size());
314
315 uint32_t original_start =
316 orig_.RewrittenOffsetToOriginalOffset(rewritten_start);
317 std::optional<uint32_t> maybe_rewrite =
318 orig_.RewriteForOriginalOffset(original_start);
319 if (maybe_rewrite) {
320 const SqlSource::Rewrite& rewrite = orig_.rewrites[*maybe_rewrite];
321 nested_[*maybe_rewrite].Rewrite(
322 rewritten_start - rewrite.rewritten_sql_start,
323 rewritten_end - rewrite.rewritten_sql_start, std::move(source));
324 } else {
325 uint32_t original_end =
326 orig_.RewrittenOffsetToOriginalOffset(rewritten_end);
327 non_nested_.push_back(SqlSource::Rewrite{
328 original_start,
329 original_end,
330 std::numeric_limits<uint32_t>::max(), // Dummy, corrected in |Build|.
331 std::numeric_limits<uint32_t>::max(), // Dummy, corrected in |Build|.
332 std::move(source.root_),
333 });
334 }
335 }
336
Build()337 SqlSource SqlSource::Rewriter::Build() && {
338 // Phase 1: finalize all the nested rewrites and merge both nested and
339 // non-nested into a single vector.
340 std::vector<SqlSource::Rewrite> all_rewrites = std::move(non_nested_);
341 for (uint32_t i = 0; i < nested_.size(); ++i) {
342 const SqlSource::Rewrite orig_rewrite = orig_.rewrites[i];
343 all_rewrites.push_back(SqlSource::Rewrite{
344 orig_rewrite.original_sql_start,
345 orig_rewrite.original_sql_end,
346 std::numeric_limits<uint32_t>::max(), // Dummy, corrected in phase 3.
347 std::numeric_limits<uint32_t>::max(), // Dummy, corrected in phase 3.
348 std::move(nested_[i]).Build().root_,
349 });
350 }
351
352 // Phase 2: sort the new rewrite vector by original offset and verify that the
353 // original offsets are monotonic and non-overlapping.
354 std::sort(all_rewrites.begin(), all_rewrites.end(),
355 [](const SqlSource::Rewrite& a, const SqlSource::Rewrite& b) {
356 return a.original_sql_start < b.original_sql_start;
357 });
358 for (uint32_t i = 1; i < all_rewrites.size(); ++i) {
359 PERFETTO_CHECK(all_rewrites[i - 1].original_sql_end <=
360 all_rewrites[i].original_sql_start);
361 }
362
363 // Phase 3: compute the new rewritten offsets and assign them to the rewrites.
364 // Also unset the traceback flag for all rewrites.
365 uint32_t original_bytes_in_rewrites = 0;
366 uint32_t rewritten_bytes_in_rewrites = 0;
367 for (SqlSource::Rewrite& rewrite : all_rewrites) {
368 uint32_t source_size =
369 static_cast<uint32_t>(rewrite.rewrite_node.rewritten_sql.size());
370
371 rewrite.rewritten_sql_start = rewrite.original_sql_start +
372 rewritten_bytes_in_rewrites -
373 original_bytes_in_rewrites;
374 rewrite.rewritten_sql_end = rewrite.rewritten_sql_start + source_size;
375 rewrite.rewrite_node.include_traceback_header = false;
376
377 original_bytes_in_rewrites +=
378 rewrite.original_sql_end - rewrite.original_sql_start;
379 rewritten_bytes_in_rewrites += source_size;
380 }
381
382 // Phase 4: update the node to reflect the new rewrites.
383 orig_.rewrites = std::move(all_rewrites);
384 orig_.rewritten_sql = ApplyRewrites(orig_.original_sql, orig_.rewrites);
385 return SqlSource(std::move(orig_));
386 }
387
388 } // namespace perfetto::trace_processor
389