1 /*
2 * Copyright (C) 2023 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/trace_processor/perfetto_sql/preprocessor/perfetto_sql_preprocessor.h"
18
19 #include <algorithm>
20 #include <cstddef>
21 #include <cstdint>
22 #include <cstdlib>
23 #include <list>
24 #include <memory>
25 #include <optional>
26 #include <string>
27 #include <string_view>
28 #include <unordered_set>
29 #include <utility>
30 #include <variant>
31 #include <vector>
32
33 #include "perfetto/base/compiler.h"
34 #include "perfetto/base/logging.h"
35 #include "perfetto/base/status.h"
36 #include "perfetto/ext/base/flat_hash_map.h"
37 #include "perfetto/ext/base/string_utils.h"
38 #include "src/trace_processor/perfetto_sql/preprocessor/preprocessor_grammar_interface.h"
39 #include "src/trace_processor/perfetto_sql/tokenizer/sqlite_tokenizer.h"
40 #include "src/trace_processor/sqlite/sql_source.h"
41
42 namespace perfetto::trace_processor {
43 namespace {
44
45 using State = PreprocessorGrammarState;
46
47 struct Preprocessor {
48 public:
Preprocessorperfetto::trace_processor::__anona35945a20111::Preprocessor49 explicit Preprocessor(State* state)
50 : parser_(PreprocessorGrammarParseAlloc(malloc, state)) {}
~Preprocessorperfetto::trace_processor::__anona35945a20111::Preprocessor51 ~Preprocessor() { PreprocessorGrammarParseFree(parser_, free); }
52
Parseperfetto::trace_processor::__anona35945a20111::Preprocessor53 void Parse(int token_type, PreprocessorGrammarToken token) {
54 PreprocessorGrammarParse(parser_, token_type, token);
55 }
56
57 private:
58 void* parser_;
59 };
60
61 struct Stringify {
62 bool ignore_table;
63 };
64 struct Apply {
65 int join_token;
66 int prefix_token;
67 };
68 using MacroImpl =
69 std::variant<PerfettoSqlPreprocessor::Macro*, Stringify, Apply>;
70
71 // Synthetic "stackframe" representing the processing of a single piece of SQL.
72 struct Frame {
73 struct Root {};
74 struct Rewrite {
75 SqliteTokenizer& tokenizer;
76 SqlSource::Rewriter& rewriter;
77 SqliteTokenizer::Token start;
78 SqliteTokenizer::Token end;
79 };
80 struct Append {
81 std::vector<SqlSource>& result;
82 };
83 using Type = std::variant<Root, Rewrite, Append>;
84 struct ActiveMacro {
85 std::string name;
86 MacroImpl impl;
87 std::vector<SqlSource> args;
88 uint32_t nested_macro_count;
89 std::unordered_set<std::string> seen_variables;
90 std::unordered_set<std::string> expanded_variables;
91 };
92 enum VariableHandling { kLookup, kLookupOrIgnore, kIgnore };
93
Frameperfetto::trace_processor::__anona35945a20111::Frame94 explicit Frame(Type _type,
95 VariableHandling _var_handling,
96 State* s,
97 const SqlSource& source)
98 : type(_type),
99 var_handling(_var_handling),
100 preprocessor(s),
101 tokenizer(source),
102 rewriter(source),
103 substituitions(&owned_substituitions) {}
104 Frame(const Frame&) = delete;
105 Frame& operator=(const Frame&) = delete;
106 Frame(Frame&&) = delete;
107 Frame& operator=(Frame&&) = delete;
108
109 Type type;
110 VariableHandling var_handling;
111 Preprocessor preprocessor;
112 SqliteTokenizer tokenizer;
113
114 bool seen_semicolon = false;
115 SqlSource::Rewriter rewriter;
116 bool ignore_rewrite = false;
117
118 std::optional<ActiveMacro> active_macro;
119
120 base::FlatHashMap<std::string, SqlSource> owned_substituitions;
121 base::FlatHashMap<std::string, SqlSource>* substituitions;
122 };
123
124 struct ErrorToken {
125 SqliteTokenizer::Token token;
126 std::string message;
127 };
128
129 extern "C" struct PreprocessorGrammarState {
130 std::list<Frame> stack;
131 const base::FlatHashMap<std::string, PerfettoSqlPreprocessor::Macro>& macros;
132 std::optional<ErrorToken> error;
133 };
134
135 extern "C" struct PreprocessorGrammarApplyList {
136 std::vector<PreprocessorGrammarTokenBounds> args;
137 };
138
GrammarTokenToTokenizerToken(const PreprocessorGrammarToken & token)139 SqliteTokenizer::Token GrammarTokenToTokenizerToken(
140 const PreprocessorGrammarToken& token) {
141 return SqliteTokenizer::Token{std::string_view(token.ptr, token.n),
142 TK_ILLEGAL};
143 }
144
ErrorAtToken(const SqliteTokenizer & tokenizer,const SqliteTokenizer::Token & token,const char * error)145 base::Status ErrorAtToken(const SqliteTokenizer& tokenizer,
146 const SqliteTokenizer::Token& token,
147 const char* error) {
148 std::string traceback = tokenizer.AsTraceback(token);
149 return base::ErrStatus("%s%s", traceback.c_str(), error);
150 }
151
SqlSourceVectorToString(const std::vector<SqlSource> & vec)152 std::vector<std::string> SqlSourceVectorToString(
153 const std::vector<SqlSource>& vec) {
154 std::vector<std::string> pieces;
155 pieces.reserve(vec.size());
156 for (const auto& list : vec) {
157 pieces.emplace_back(list.sql());
158 }
159 return pieces;
160 }
161
BoundsToStringView(const PreprocessorGrammarTokenBounds & b)162 std::string_view BoundsToStringView(const PreprocessorGrammarTokenBounds& b) {
163 return {b.start.ptr, static_cast<size_t>(b.end.ptr + b.end.n - b.start.ptr)};
164 }
165
RewriteIntrinsicMacro(Frame & frame,SqliteTokenizer::Token name,SqliteTokenizer::Token rp)166 void RewriteIntrinsicMacro(Frame& frame,
167 SqliteTokenizer::Token name,
168 SqliteTokenizer::Token rp) {
169 const auto& macro = *frame.active_macro;
170 frame.tokenizer.Rewrite(
171 frame.rewriter, name, rp,
172 SqlSource::FromTraceProcessorImplementation(
173 macro.name + "!(" +
174 base::Join(SqlSourceVectorToString(macro.args), ", ") + ")"),
175 SqliteTokenizer::EndToken::kInclusive);
176 }
177
ExecuteSqlMacro(State * state,Frame & frame,Frame::ActiveMacro & macro,SqliteTokenizer::Token name,SqliteTokenizer::Token rp)178 void ExecuteSqlMacro(State* state,
179 Frame& frame,
180 Frame::ActiveMacro& macro,
181 SqliteTokenizer::Token name,
182 SqliteTokenizer::Token rp) {
183 auto& sql_macro = std::get<PerfettoSqlPreprocessor::Macro*>(macro.impl);
184 if (macro.args.size() != sql_macro->args.size()) {
185 state->error = ErrorToken{
186 name,
187 base::ErrStatus(
188 "wrong number of macro arguments, expected %zu actual %zu",
189 sql_macro->args.size(), macro.args.size())
190 .message(),
191 };
192 return;
193 }
194 // TODO(lalitm): switch back to kLookup once we have proper parser support.
195 state->stack.emplace_back(
196 Frame::Rewrite{frame.tokenizer, frame.rewriter, name, rp},
197 Frame::kLookupOrIgnore, state, sql_macro->sql);
198 auto& macro_frame = state->stack.back();
199 for (uint32_t i = 0; i < sql_macro->args.size(); ++i) {
200 macro_frame.owned_substituitions.Insert(sql_macro->args[i],
201 std::move(macro.args[i]));
202 }
203 }
204
ExecuteStringify(State * state,Frame & frame,Frame::ActiveMacro & macro,SqliteTokenizer::Token name,SqliteTokenizer::Token rp)205 void ExecuteStringify(State* state,
206 Frame& frame,
207 Frame::ActiveMacro& macro,
208 SqliteTokenizer::Token name,
209 SqliteTokenizer::Token rp) {
210 auto& stringify = std::get<Stringify>(macro.impl);
211 if (macro.args.size() != 1) {
212 state->error = ErrorToken{
213 name,
214 base::ErrStatus(
215 "stringify: must specify exactly 1 argument, actual %zu",
216 macro.args.size())
217 .message(),
218 };
219 return;
220 }
221 bool can_stringify_outer =
222 macro.seen_variables.empty() ||
223 (stringify.ignore_table && macro.seen_variables.size() == 1 &&
224 macro.seen_variables.count("table"));
225 if (!can_stringify_outer) {
226 RewriteIntrinsicMacro(frame, name, rp);
227 return;
228 }
229 if (!macro.expanded_variables.empty()) {
230 state->stack.emplace_back(
231 Frame::Rewrite{frame.tokenizer, frame.rewriter, name, rp},
232 Frame::kIgnore, state,
233 SqlSource::FromTraceProcessorImplementation(macro.name + "!(" +
234 macro.args[0].sql() + ")"));
235 return;
236 }
237 auto res = SqlSource::FromTraceProcessorImplementation(
238 "'" + macro.args[0].sql() + "'");
239 frame.tokenizer.Rewrite(frame.rewriter, name, rp, std::move(res),
240 SqliteTokenizer::EndToken::kInclusive);
241 }
242
ExecuteApply(State * state,Frame & frame,Frame::ActiveMacro & macro,SqliteTokenizer::Token name,SqliteTokenizer::Token rp)243 void ExecuteApply(State* state,
244 Frame& frame,
245 Frame::ActiveMacro& macro,
246 SqliteTokenizer::Token name,
247 SqliteTokenizer::Token rp) {
248 auto& apply = std::get<Apply>(macro.impl);
249 if (!macro.seen_variables.empty()) {
250 RewriteIntrinsicMacro(frame, name, rp);
251 return;
252 }
253 state->stack.emplace_back(
254 Frame::Rewrite{frame.tokenizer, frame.rewriter, name, rp},
255 Frame::VariableHandling::kIgnore, state,
256 SqlSource::FromTraceProcessorImplementation(
257 base::Join(SqlSourceVectorToString(macro.args), " ")));
258
259 auto& expansion_frame = state->stack.back();
260 expansion_frame.preprocessor.Parse(
261 PPTK_APPLY, PreprocessorGrammarToken{nullptr, 0, PPTK_APPLY});
262 expansion_frame.preprocessor.Parse(
263 apply.join_token, PreprocessorGrammarToken{nullptr, 0, apply.join_token});
264 expansion_frame.preprocessor.Parse(
265 apply.prefix_token,
266 PreprocessorGrammarToken{nullptr, 0, apply.prefix_token});
267 expansion_frame.ignore_rewrite = true;
268 }
269
OnPreprocessorSyntaxError(State * state,PreprocessorGrammarToken * token)270 extern "C" void OnPreprocessorSyntaxError(State* state,
271 PreprocessorGrammarToken* token) {
272 state->error = {GrammarTokenToTokenizerToken(*token),
273 "preprocessor syntax error"};
274 }
275
OnPreprocessorApply(PreprocessorGrammarState * state,PreprocessorGrammarToken * name,PreprocessorGrammarToken * join,PreprocessorGrammarToken * prefix,PreprocessorGrammarApplyList * raw_a,PreprocessorGrammarApplyList * raw_b)276 extern "C" void OnPreprocessorApply(PreprocessorGrammarState* state,
277 PreprocessorGrammarToken* name,
278 PreprocessorGrammarToken* join,
279 PreprocessorGrammarToken* prefix,
280 PreprocessorGrammarApplyList* raw_a,
281 PreprocessorGrammarApplyList* raw_b) {
282 std::unique_ptr<PreprocessorGrammarApplyList> a(raw_a);
283 std::unique_ptr<PreprocessorGrammarApplyList> b(raw_b);
284 auto& frame = state->stack.back();
285 size_t size = std::min(a->args.size(), b ? b->args.size() : a->args.size());
286 if (size == 0) {
287 auto& rewrite = std::get<Frame::Rewrite>(frame.type);
288 rewrite.tokenizer.Rewrite(rewrite.rewriter, rewrite.start, rewrite.end,
289 SqlSource::FromTraceProcessorImplementation(""),
290 SqliteTokenizer::EndToken::kInclusive);
291 return;
292 }
293 std::string macro(name->ptr, name->n);
294 std::vector<std::string> args;
295 for (uint32_t i = 0; i < size; ++i) {
296 std::string arg = macro;
297 arg.append("!(").append(BoundsToStringView(a->args[i]));
298 if (b) {
299 arg.append(",").append(BoundsToStringView(b->args[i]));
300 }
301 arg.append(")");
302 args.emplace_back(std::move(arg));
303 }
304 std::string joiner = join->major == PPTK_AND ? " AND " : " , ";
305 std::string res = prefix->major == PPTK_TRUE ? joiner : "";
306 res.append(base::Join(args, joiner));
307 state->stack.emplace_back(
308 frame.type, Frame::VariableHandling::kLookupOrIgnore, state,
309 SqlSource::FromTraceProcessorImplementation(std::move(res)));
310 }
311
OnPreprocessorVariable(State * state,PreprocessorGrammarToken * var)312 extern "C" void OnPreprocessorVariable(State* state,
313 PreprocessorGrammarToken* var) {
314 if (var->n == 0 || var->ptr[0] != '$') {
315 state->error = {GrammarTokenToTokenizerToken(*var),
316 "variable must start with '$'"};
317 return;
318 }
319 auto& frame = state->stack.back();
320 if (frame.active_macro) {
321 std::string name(var->ptr + 1, var->n - 1);
322 if (frame.substituitions->Find(name)) {
323 frame.active_macro->expanded_variables.insert(name);
324 } else {
325 frame.active_macro->seen_variables.insert(name);
326 }
327 return;
328 }
329 switch (frame.var_handling) {
330 case Frame::kLookup:
331 case Frame::kLookupOrIgnore: {
332 auto* it =
333 frame.substituitions->Find(std::string(var->ptr + 1, var->n - 1));
334 if (!it) {
335 if (frame.var_handling == Frame::kLookup) {
336 state->error = {GrammarTokenToTokenizerToken(*var),
337 "variable not defined"};
338 }
339 return;
340 }
341 frame.tokenizer.RewriteToken(frame.rewriter,
342 GrammarTokenToTokenizerToken(*var), *it);
343 break;
344 }
345 case Frame::kIgnore:
346 break;
347 }
348 }
349
OnPreprocessorMacroId(State * state,PreprocessorGrammarToken * name_tok)350 extern "C" void OnPreprocessorMacroId(State* state,
351 PreprocessorGrammarToken* name_tok) {
352 auto& invocation = state->stack.back();
353 if (invocation.active_macro) {
354 invocation.active_macro->nested_macro_count++;
355 return;
356 }
357 std::string name(name_tok->ptr, name_tok->n);
358 MacroImpl impl;
359 if (name == "__intrinsic_stringify") {
360 impl = Stringify();
361 } else if (name == "__intrinsic_stringify_ignore_table") {
362 impl = Stringify{true};
363 } else if (name == "__intrinsic_token_apply") {
364 impl = Apply{PPTK_COMMA, PPTK_FALSE};
365 } else if (name == "__intrinsic_token_apply_prefix") {
366 impl = Apply{PPTK_COMMA, PPTK_TRUE};
367 } else if (name == "__intrinsic_token_apply_and") {
368 impl = Apply{PPTK_AND, PPTK_FALSE};
369 } else if (name == "__intrinsic_token_apply_and_prefix") {
370 impl = Apply{PPTK_AND, PPTK_TRUE};
371 } else {
372 auto* sql_macro = state->macros.Find(name);
373 if (!sql_macro) {
374 state->error = {GrammarTokenToTokenizerToken(*name_tok),
375 "no such macro defined"};
376 return;
377 }
378 impl = sql_macro;
379 }
380 invocation.active_macro =
381 Frame::ActiveMacro{std::move(name), impl, {}, 0, {}, {}};
382 }
383
OnPreprocessorMacroArg(State * state,PreprocessorGrammarTokenBounds * arg)384 extern "C" void OnPreprocessorMacroArg(State* state,
385 PreprocessorGrammarTokenBounds* arg) {
386 auto& frame = state->stack.back();
387 auto& macro = *frame.active_macro;
388 if (macro.nested_macro_count > 0) {
389 return;
390 }
391 auto start_token = GrammarTokenToTokenizerToken(arg->start);
392 auto end_token = GrammarTokenToTokenizerToken(arg->end);
393 state->stack.emplace_back(
394 Frame::Append{macro.args}, frame.var_handling, state,
395 frame.tokenizer.Substr(start_token, end_token,
396 SqliteTokenizer::EndToken::kInclusive));
397
398 auto& arg_frame = state->stack.back();
399 arg_frame.substituitions = frame.substituitions;
400 }
401
OnPreprocessorMacroEnd(State * state,PreprocessorGrammarToken * name,PreprocessorGrammarToken * rp)402 extern "C" void OnPreprocessorMacroEnd(State* state,
403 PreprocessorGrammarToken* name,
404 PreprocessorGrammarToken* rp) {
405 auto& frame = state->stack.back();
406 auto& macro = *frame.active_macro;
407 if (macro.nested_macro_count > 0) {
408 --macro.nested_macro_count;
409 return;
410 }
411 switch (macro.impl.index()) {
412 case base::variant_index<MacroImpl, PerfettoSqlPreprocessor::Macro*>():
413 ExecuteSqlMacro(state, frame, macro, GrammarTokenToTokenizerToken(*name),
414 GrammarTokenToTokenizerToken(*rp));
415 break;
416 case base::variant_index<MacroImpl, Stringify>():
417 ExecuteStringify(state, frame, macro, GrammarTokenToTokenizerToken(*name),
418 GrammarTokenToTokenizerToken(*rp));
419 break;
420 case base::variant_index<MacroImpl, Apply>():
421 ExecuteApply(state, frame, macro, GrammarTokenToTokenizerToken(*name),
422 GrammarTokenToTokenizerToken(*rp));
423 break;
424 default:
425 PERFETTO_FATAL("Unknown variant type");
426 }
427 frame.active_macro = std::nullopt;
428 }
429
OnPreprocessorEnd(State * state)430 extern "C" void OnPreprocessorEnd(State* state) {
431 auto& frame = state->stack.back();
432 PERFETTO_CHECK(!frame.active_macro);
433
434 if (frame.ignore_rewrite) {
435 return;
436 }
437 switch (frame.type.index()) {
438 case base::variant_index<Frame::Type, Frame::Append>(): {
439 auto& append = std::get<Frame::Append>(frame.type);
440 append.result.push_back(std::move(frame.rewriter).Build());
441 break;
442 }
443 case base::variant_index<Frame::Type, Frame::Rewrite>(): {
444 auto& rewrite = std::get<Frame::Rewrite>(frame.type);
445 rewrite.tokenizer.Rewrite(rewrite.rewriter, rewrite.start, rewrite.end,
446 std::move(frame.rewriter).Build(),
447 SqliteTokenizer::EndToken::kInclusive);
448 break;
449 }
450 case base::variant_index<Frame::Type, Frame::Root>():
451 break;
452 default:
453 PERFETTO_FATAL("Unknown frame type");
454 }
455 }
456
457 } // namespace
458
PerfettoSqlPreprocessor(SqlSource source,const base::FlatHashMap<std::string,Macro> & macros)459 PerfettoSqlPreprocessor::PerfettoSqlPreprocessor(
460 SqlSource source,
461 const base::FlatHashMap<std::string, Macro>& macros)
462 : global_tokenizer_(std::move(source)), macros_(¯os) {}
463
NextStatement()464 bool PerfettoSqlPreprocessor::NextStatement() {
465 PERFETTO_CHECK(status_.ok());
466
467 // Skip through any number of semi-colons (representing empty statements).
468 SqliteTokenizer::Token tok = global_tokenizer_.NextNonWhitespace();
469 while (tok.token_type == TK_SEMI) {
470 tok = global_tokenizer_.NextNonWhitespace();
471 }
472
473 // If we still see a terminal token at this point, we must have hit EOF.
474 if (tok.IsTerminal()) {
475 PERFETTO_DCHECK(tok.token_type != TK_SEMI);
476 return false;
477 }
478
479 SqlSource stmt =
480 global_tokenizer_.Substr(tok, global_tokenizer_.NextTerminal(),
481 SqliteTokenizer::EndToken::kExclusive);
482
483 State s{{}, *macros_, {}};
484 s.stack.emplace_back(Frame::Root(), Frame::kIgnore, &s, std::move(stmt));
485 for (;;) {
486 auto* frame = &s.stack.back();
487 auto& tk = frame->tokenizer;
488 SqliteTokenizer::Token t = tk.NextNonWhitespace();
489 int token_type;
490 if (t.str.empty()) {
491 token_type = frame->seen_semicolon ? 0 : PPTK_SEMI;
492 frame->seen_semicolon = true;
493 } else if (t.token_type == TK_SEMI) {
494 token_type = PPTK_SEMI;
495 frame->seen_semicolon = true;
496 } else if (t.token_type == TK_ILLEGAL) {
497 if (t.str.size() == 1 && t.str[0] == '!') {
498 token_type = PPTK_EXCLAIM;
499 } else {
500 status_ = ErrorAtToken(tk, t, "illegal token");
501 return false;
502 }
503 } else if (t.token_type == TK_ID) {
504 token_type = PPTK_ID;
505 } else if (t.token_type == TK_LP) {
506 token_type = PPTK_LP;
507 } else if (t.token_type == TK_RP) {
508 token_type = PPTK_RP;
509 } else if (t.token_type == TK_COMMA) {
510 token_type = PPTK_COMMA;
511 } else if (t.token_type == TK_VARIABLE) {
512 token_type = PPTK_VARIABLE;
513 } else {
514 token_type = PPTK_OPAQUE;
515 }
516 frame->preprocessor.Parse(
517 token_type,
518 PreprocessorGrammarToken{t.str.data(), t.str.size(), token_type});
519 if (s.error) {
520 status_ = ErrorAtToken(tk, s.error->token, s.error->message.c_str());
521 return false;
522 }
523 if (token_type == 0) {
524 if (s.stack.size() == 1) {
525 statement_ = std::move(frame->rewriter).Build();
526 return true;
527 }
528 s.stack.pop_back();
529 frame = &s.stack.back();
530 }
531 }
532 }
533
OnPreprocessorCreateApplyList()534 extern "C" PreprocessorGrammarApplyList* OnPreprocessorCreateApplyList() {
535 return std::make_unique<PreprocessorGrammarApplyList>().release();
536 }
537
OnPreprocessorAppendApplyList(PreprocessorGrammarApplyList * list,PreprocessorGrammarTokenBounds * bounds)538 extern "C" PreprocessorGrammarApplyList* OnPreprocessorAppendApplyList(
539 PreprocessorGrammarApplyList* list,
540 PreprocessorGrammarTokenBounds* bounds) {
541 list->args.push_back(*bounds);
542 return list;
543 }
544
OnPreprocessorFreeApplyList(PreprocessorGrammarState *,PreprocessorGrammarApplyList * list)545 extern "C" void OnPreprocessorFreeApplyList(
546 PreprocessorGrammarState*,
547 PreprocessorGrammarApplyList* list) {
548 std::unique_ptr<PreprocessorGrammarApplyList> l(list);
549 }
550
551 } // namespace perfetto::trace_processor
552