1 /*
2  * Copyright (C) 2023 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/trace_processor/perfetto_sql/preprocessor/perfetto_sql_preprocessor.h"
18 
19 #include <algorithm>
20 #include <cstddef>
21 #include <cstdint>
22 #include <cstdlib>
23 #include <list>
24 #include <memory>
25 #include <optional>
26 #include <string>
27 #include <string_view>
28 #include <unordered_set>
29 #include <utility>
30 #include <variant>
31 #include <vector>
32 
33 #include "perfetto/base/compiler.h"
34 #include "perfetto/base/logging.h"
35 #include "perfetto/base/status.h"
36 #include "perfetto/ext/base/flat_hash_map.h"
37 #include "perfetto/ext/base/string_utils.h"
38 #include "src/trace_processor/perfetto_sql/preprocessor/preprocessor_grammar_interface.h"
39 #include "src/trace_processor/perfetto_sql/tokenizer/sqlite_tokenizer.h"
40 #include "src/trace_processor/sqlite/sql_source.h"
41 
42 namespace perfetto::trace_processor {
43 namespace {
44 
45 using State = PreprocessorGrammarState;
46 
47 struct Preprocessor {
48  public:
Preprocessorperfetto::trace_processor::__anona35945a20111::Preprocessor49   explicit Preprocessor(State* state)
50       : parser_(PreprocessorGrammarParseAlloc(malloc, state)) {}
~Preprocessorperfetto::trace_processor::__anona35945a20111::Preprocessor51   ~Preprocessor() { PreprocessorGrammarParseFree(parser_, free); }
52 
Parseperfetto::trace_processor::__anona35945a20111::Preprocessor53   void Parse(int token_type, PreprocessorGrammarToken token) {
54     PreprocessorGrammarParse(parser_, token_type, token);
55   }
56 
57  private:
58   void* parser_;
59 };
60 
61 struct Stringify {
62   bool ignore_table;
63 };
64 struct Apply {
65   int join_token;
66   int prefix_token;
67 };
68 using MacroImpl =
69     std::variant<PerfettoSqlPreprocessor::Macro*, Stringify, Apply>;
70 
71 // Synthetic "stackframe" representing the processing of a single piece of SQL.
72 struct Frame {
73   struct Root {};
74   struct Rewrite {
75     SqliteTokenizer& tokenizer;
76     SqlSource::Rewriter& rewriter;
77     SqliteTokenizer::Token start;
78     SqliteTokenizer::Token end;
79   };
80   struct Append {
81     std::vector<SqlSource>& result;
82   };
83   using Type = std::variant<Root, Rewrite, Append>;
84   struct ActiveMacro {
85     std::string name;
86     MacroImpl impl;
87     std::vector<SqlSource> args;
88     uint32_t nested_macro_count;
89     std::unordered_set<std::string> seen_variables;
90     std::unordered_set<std::string> expanded_variables;
91   };
92   enum VariableHandling { kLookup, kLookupOrIgnore, kIgnore };
93 
Frameperfetto::trace_processor::__anona35945a20111::Frame94   explicit Frame(Type _type,
95                  VariableHandling _var_handling,
96                  State* s,
97                  const SqlSource& source)
98       : type(_type),
99         var_handling(_var_handling),
100         preprocessor(s),
101         tokenizer(source),
102         rewriter(source),
103         substituitions(&owned_substituitions) {}
104   Frame(const Frame&) = delete;
105   Frame& operator=(const Frame&) = delete;
106   Frame(Frame&&) = delete;
107   Frame& operator=(Frame&&) = delete;
108 
109   Type type;
110   VariableHandling var_handling;
111   Preprocessor preprocessor;
112   SqliteTokenizer tokenizer;
113 
114   bool seen_semicolon = false;
115   SqlSource::Rewriter rewriter;
116   bool ignore_rewrite = false;
117 
118   std::optional<ActiveMacro> active_macro;
119 
120   base::FlatHashMap<std::string, SqlSource> owned_substituitions;
121   base::FlatHashMap<std::string, SqlSource>* substituitions;
122 };
123 
124 struct ErrorToken {
125   SqliteTokenizer::Token token;
126   std::string message;
127 };
128 
129 extern "C" struct PreprocessorGrammarState {
130   std::list<Frame> stack;
131   const base::FlatHashMap<std::string, PerfettoSqlPreprocessor::Macro>& macros;
132   std::optional<ErrorToken> error;
133 };
134 
135 extern "C" struct PreprocessorGrammarApplyList {
136   std::vector<PreprocessorGrammarTokenBounds> args;
137 };
138 
GrammarTokenToTokenizerToken(const PreprocessorGrammarToken & token)139 SqliteTokenizer::Token GrammarTokenToTokenizerToken(
140     const PreprocessorGrammarToken& token) {
141   return SqliteTokenizer::Token{std::string_view(token.ptr, token.n),
142                                 TK_ILLEGAL};
143 }
144 
ErrorAtToken(const SqliteTokenizer & tokenizer,const SqliteTokenizer::Token & token,const char * error)145 base::Status ErrorAtToken(const SqliteTokenizer& tokenizer,
146                           const SqliteTokenizer::Token& token,
147                           const char* error) {
148   std::string traceback = tokenizer.AsTraceback(token);
149   return base::ErrStatus("%s%s", traceback.c_str(), error);
150 }
151 
SqlSourceVectorToString(const std::vector<SqlSource> & vec)152 std::vector<std::string> SqlSourceVectorToString(
153     const std::vector<SqlSource>& vec) {
154   std::vector<std::string> pieces;
155   pieces.reserve(vec.size());
156   for (const auto& list : vec) {
157     pieces.emplace_back(list.sql());
158   }
159   return pieces;
160 }
161 
BoundsToStringView(const PreprocessorGrammarTokenBounds & b)162 std::string_view BoundsToStringView(const PreprocessorGrammarTokenBounds& b) {
163   return {b.start.ptr, static_cast<size_t>(b.end.ptr + b.end.n - b.start.ptr)};
164 }
165 
RewriteIntrinsicMacro(Frame & frame,SqliteTokenizer::Token name,SqliteTokenizer::Token rp)166 void RewriteIntrinsicMacro(Frame& frame,
167                            SqliteTokenizer::Token name,
168                            SqliteTokenizer::Token rp) {
169   const auto& macro = *frame.active_macro;
170   frame.tokenizer.Rewrite(
171       frame.rewriter, name, rp,
172       SqlSource::FromTraceProcessorImplementation(
173           macro.name + "!(" +
174           base::Join(SqlSourceVectorToString(macro.args), ", ") + ")"),
175       SqliteTokenizer::EndToken::kInclusive);
176 }
177 
ExecuteSqlMacro(State * state,Frame & frame,Frame::ActiveMacro & macro,SqliteTokenizer::Token name,SqliteTokenizer::Token rp)178 void ExecuteSqlMacro(State* state,
179                      Frame& frame,
180                      Frame::ActiveMacro& macro,
181                      SqliteTokenizer::Token name,
182                      SqliteTokenizer::Token rp) {
183   auto& sql_macro = std::get<PerfettoSqlPreprocessor::Macro*>(macro.impl);
184   if (macro.args.size() != sql_macro->args.size()) {
185     state->error = ErrorToken{
186         name,
187         base::ErrStatus(
188             "wrong number of macro arguments, expected %zu actual %zu",
189             sql_macro->args.size(), macro.args.size())
190             .message(),
191     };
192     return;
193   }
194   // TODO(lalitm): switch back to kLookup once we have proper parser support.
195   state->stack.emplace_back(
196       Frame::Rewrite{frame.tokenizer, frame.rewriter, name, rp},
197       Frame::kLookupOrIgnore, state, sql_macro->sql);
198   auto& macro_frame = state->stack.back();
199   for (uint32_t i = 0; i < sql_macro->args.size(); ++i) {
200     macro_frame.owned_substituitions.Insert(sql_macro->args[i],
201                                             std::move(macro.args[i]));
202   }
203 }
204 
ExecuteStringify(State * state,Frame & frame,Frame::ActiveMacro & macro,SqliteTokenizer::Token name,SqliteTokenizer::Token rp)205 void ExecuteStringify(State* state,
206                       Frame& frame,
207                       Frame::ActiveMacro& macro,
208                       SqliteTokenizer::Token name,
209                       SqliteTokenizer::Token rp) {
210   auto& stringify = std::get<Stringify>(macro.impl);
211   if (macro.args.size() != 1) {
212     state->error = ErrorToken{
213         name,
214         base::ErrStatus(
215             "stringify: must specify exactly 1 argument, actual %zu",
216             macro.args.size())
217             .message(),
218     };
219     return;
220   }
221   bool can_stringify_outer =
222       macro.seen_variables.empty() ||
223       (stringify.ignore_table && macro.seen_variables.size() == 1 &&
224        macro.seen_variables.count("table"));
225   if (!can_stringify_outer) {
226     RewriteIntrinsicMacro(frame, name, rp);
227     return;
228   }
229   if (!macro.expanded_variables.empty()) {
230     state->stack.emplace_back(
231         Frame::Rewrite{frame.tokenizer, frame.rewriter, name, rp},
232         Frame::kIgnore, state,
233         SqlSource::FromTraceProcessorImplementation(macro.name + "!(" +
234                                                     macro.args[0].sql() + ")"));
235     return;
236   }
237   auto res = SqlSource::FromTraceProcessorImplementation(
238       "'" + macro.args[0].sql() + "'");
239   frame.tokenizer.Rewrite(frame.rewriter, name, rp, std::move(res),
240                           SqliteTokenizer::EndToken::kInclusive);
241 }
242 
ExecuteApply(State * state,Frame & frame,Frame::ActiveMacro & macro,SqliteTokenizer::Token name,SqliteTokenizer::Token rp)243 void ExecuteApply(State* state,
244                   Frame& frame,
245                   Frame::ActiveMacro& macro,
246                   SqliteTokenizer::Token name,
247                   SqliteTokenizer::Token rp) {
248   auto& apply = std::get<Apply>(macro.impl);
249   if (!macro.seen_variables.empty()) {
250     RewriteIntrinsicMacro(frame, name, rp);
251     return;
252   }
253   state->stack.emplace_back(
254       Frame::Rewrite{frame.tokenizer, frame.rewriter, name, rp},
255       Frame::VariableHandling::kIgnore, state,
256       SqlSource::FromTraceProcessorImplementation(
257           base::Join(SqlSourceVectorToString(macro.args), " ")));
258 
259   auto& expansion_frame = state->stack.back();
260   expansion_frame.preprocessor.Parse(
261       PPTK_APPLY, PreprocessorGrammarToken{nullptr, 0, PPTK_APPLY});
262   expansion_frame.preprocessor.Parse(
263       apply.join_token, PreprocessorGrammarToken{nullptr, 0, apply.join_token});
264   expansion_frame.preprocessor.Parse(
265       apply.prefix_token,
266       PreprocessorGrammarToken{nullptr, 0, apply.prefix_token});
267   expansion_frame.ignore_rewrite = true;
268 }
269 
OnPreprocessorSyntaxError(State * state,PreprocessorGrammarToken * token)270 extern "C" void OnPreprocessorSyntaxError(State* state,
271                                           PreprocessorGrammarToken* token) {
272   state->error = {GrammarTokenToTokenizerToken(*token),
273                   "preprocessor syntax error"};
274 }
275 
OnPreprocessorApply(PreprocessorGrammarState * state,PreprocessorGrammarToken * name,PreprocessorGrammarToken * join,PreprocessorGrammarToken * prefix,PreprocessorGrammarApplyList * raw_a,PreprocessorGrammarApplyList * raw_b)276 extern "C" void OnPreprocessorApply(PreprocessorGrammarState* state,
277                                     PreprocessorGrammarToken* name,
278                                     PreprocessorGrammarToken* join,
279                                     PreprocessorGrammarToken* prefix,
280                                     PreprocessorGrammarApplyList* raw_a,
281                                     PreprocessorGrammarApplyList* raw_b) {
282   std::unique_ptr<PreprocessorGrammarApplyList> a(raw_a);
283   std::unique_ptr<PreprocessorGrammarApplyList> b(raw_b);
284   auto& frame = state->stack.back();
285   size_t size = std::min(a->args.size(), b ? b->args.size() : a->args.size());
286   if (size == 0) {
287     auto& rewrite = std::get<Frame::Rewrite>(frame.type);
288     rewrite.tokenizer.Rewrite(rewrite.rewriter, rewrite.start, rewrite.end,
289                               SqlSource::FromTraceProcessorImplementation(""),
290                               SqliteTokenizer::EndToken::kInclusive);
291     return;
292   }
293   std::string macro(name->ptr, name->n);
294   std::vector<std::string> args;
295   for (uint32_t i = 0; i < size; ++i) {
296     std::string arg = macro;
297     arg.append("!(").append(BoundsToStringView(a->args[i]));
298     if (b) {
299       arg.append(",").append(BoundsToStringView(b->args[i]));
300     }
301     arg.append(")");
302     args.emplace_back(std::move(arg));
303   }
304   std::string joiner = join->major == PPTK_AND ? " AND " : " , ";
305   std::string res = prefix->major == PPTK_TRUE ? joiner : "";
306   res.append(base::Join(args, joiner));
307   state->stack.emplace_back(
308       frame.type, Frame::VariableHandling::kLookupOrIgnore, state,
309       SqlSource::FromTraceProcessorImplementation(std::move(res)));
310 }
311 
OnPreprocessorVariable(State * state,PreprocessorGrammarToken * var)312 extern "C" void OnPreprocessorVariable(State* state,
313                                        PreprocessorGrammarToken* var) {
314   if (var->n == 0 || var->ptr[0] != '$') {
315     state->error = {GrammarTokenToTokenizerToken(*var),
316                     "variable must start with '$'"};
317     return;
318   }
319   auto& frame = state->stack.back();
320   if (frame.active_macro) {
321     std::string name(var->ptr + 1, var->n - 1);
322     if (frame.substituitions->Find(name)) {
323       frame.active_macro->expanded_variables.insert(name);
324     } else {
325       frame.active_macro->seen_variables.insert(name);
326     }
327     return;
328   }
329   switch (frame.var_handling) {
330     case Frame::kLookup:
331     case Frame::kLookupOrIgnore: {
332       auto* it =
333           frame.substituitions->Find(std::string(var->ptr + 1, var->n - 1));
334       if (!it) {
335         if (frame.var_handling == Frame::kLookup) {
336           state->error = {GrammarTokenToTokenizerToken(*var),
337                           "variable not defined"};
338         }
339         return;
340       }
341       frame.tokenizer.RewriteToken(frame.rewriter,
342                                    GrammarTokenToTokenizerToken(*var), *it);
343       break;
344     }
345     case Frame::kIgnore:
346       break;
347   }
348 }
349 
OnPreprocessorMacroId(State * state,PreprocessorGrammarToken * name_tok)350 extern "C" void OnPreprocessorMacroId(State* state,
351                                       PreprocessorGrammarToken* name_tok) {
352   auto& invocation = state->stack.back();
353   if (invocation.active_macro) {
354     invocation.active_macro->nested_macro_count++;
355     return;
356   }
357   std::string name(name_tok->ptr, name_tok->n);
358   MacroImpl impl;
359   if (name == "__intrinsic_stringify") {
360     impl = Stringify();
361   } else if (name == "__intrinsic_stringify_ignore_table") {
362     impl = Stringify{true};
363   } else if (name == "__intrinsic_token_apply") {
364     impl = Apply{PPTK_COMMA, PPTK_FALSE};
365   } else if (name == "__intrinsic_token_apply_prefix") {
366     impl = Apply{PPTK_COMMA, PPTK_TRUE};
367   } else if (name == "__intrinsic_token_apply_and") {
368     impl = Apply{PPTK_AND, PPTK_FALSE};
369   } else if (name == "__intrinsic_token_apply_and_prefix") {
370     impl = Apply{PPTK_AND, PPTK_TRUE};
371   } else {
372     auto* sql_macro = state->macros.Find(name);
373     if (!sql_macro) {
374       state->error = {GrammarTokenToTokenizerToken(*name_tok),
375                       "no such macro defined"};
376       return;
377     }
378     impl = sql_macro;
379   }
380   invocation.active_macro =
381       Frame::ActiveMacro{std::move(name), impl, {}, 0, {}, {}};
382 }
383 
OnPreprocessorMacroArg(State * state,PreprocessorGrammarTokenBounds * arg)384 extern "C" void OnPreprocessorMacroArg(State* state,
385                                        PreprocessorGrammarTokenBounds* arg) {
386   auto& frame = state->stack.back();
387   auto& macro = *frame.active_macro;
388   if (macro.nested_macro_count > 0) {
389     return;
390   }
391   auto start_token = GrammarTokenToTokenizerToken(arg->start);
392   auto end_token = GrammarTokenToTokenizerToken(arg->end);
393   state->stack.emplace_back(
394       Frame::Append{macro.args}, frame.var_handling, state,
395       frame.tokenizer.Substr(start_token, end_token,
396                              SqliteTokenizer::EndToken::kInclusive));
397 
398   auto& arg_frame = state->stack.back();
399   arg_frame.substituitions = frame.substituitions;
400 }
401 
OnPreprocessorMacroEnd(State * state,PreprocessorGrammarToken * name,PreprocessorGrammarToken * rp)402 extern "C" void OnPreprocessorMacroEnd(State* state,
403                                        PreprocessorGrammarToken* name,
404                                        PreprocessorGrammarToken* rp) {
405   auto& frame = state->stack.back();
406   auto& macro = *frame.active_macro;
407   if (macro.nested_macro_count > 0) {
408     --macro.nested_macro_count;
409     return;
410   }
411   switch (macro.impl.index()) {
412     case base::variant_index<MacroImpl, PerfettoSqlPreprocessor::Macro*>():
413       ExecuteSqlMacro(state, frame, macro, GrammarTokenToTokenizerToken(*name),
414                       GrammarTokenToTokenizerToken(*rp));
415       break;
416     case base::variant_index<MacroImpl, Stringify>():
417       ExecuteStringify(state, frame, macro, GrammarTokenToTokenizerToken(*name),
418                        GrammarTokenToTokenizerToken(*rp));
419       break;
420     case base::variant_index<MacroImpl, Apply>():
421       ExecuteApply(state, frame, macro, GrammarTokenToTokenizerToken(*name),
422                    GrammarTokenToTokenizerToken(*rp));
423       break;
424     default:
425       PERFETTO_FATAL("Unknown variant type");
426   }
427   frame.active_macro = std::nullopt;
428 }
429 
OnPreprocessorEnd(State * state)430 extern "C" void OnPreprocessorEnd(State* state) {
431   auto& frame = state->stack.back();
432   PERFETTO_CHECK(!frame.active_macro);
433 
434   if (frame.ignore_rewrite) {
435     return;
436   }
437   switch (frame.type.index()) {
438     case base::variant_index<Frame::Type, Frame::Append>(): {
439       auto& append = std::get<Frame::Append>(frame.type);
440       append.result.push_back(std::move(frame.rewriter).Build());
441       break;
442     }
443     case base::variant_index<Frame::Type, Frame::Rewrite>(): {
444       auto& rewrite = std::get<Frame::Rewrite>(frame.type);
445       rewrite.tokenizer.Rewrite(rewrite.rewriter, rewrite.start, rewrite.end,
446                                 std::move(frame.rewriter).Build(),
447                                 SqliteTokenizer::EndToken::kInclusive);
448       break;
449     }
450     case base::variant_index<Frame::Type, Frame::Root>():
451       break;
452     default:
453       PERFETTO_FATAL("Unknown frame type");
454   }
455 }
456 
457 }  // namespace
458 
PerfettoSqlPreprocessor(SqlSource source,const base::FlatHashMap<std::string,Macro> & macros)459 PerfettoSqlPreprocessor::PerfettoSqlPreprocessor(
460     SqlSource source,
461     const base::FlatHashMap<std::string, Macro>& macros)
462     : global_tokenizer_(std::move(source)), macros_(¯os) {}
463 
NextStatement()464 bool PerfettoSqlPreprocessor::NextStatement() {
465   PERFETTO_CHECK(status_.ok());
466 
467   // Skip through any number of semi-colons (representing empty statements).
468   SqliteTokenizer::Token tok = global_tokenizer_.NextNonWhitespace();
469   while (tok.token_type == TK_SEMI) {
470     tok = global_tokenizer_.NextNonWhitespace();
471   }
472 
473   // If we still see a terminal token at this point, we must have hit EOF.
474   if (tok.IsTerminal()) {
475     PERFETTO_DCHECK(tok.token_type != TK_SEMI);
476     return false;
477   }
478 
479   SqlSource stmt =
480       global_tokenizer_.Substr(tok, global_tokenizer_.NextTerminal(),
481                                SqliteTokenizer::EndToken::kExclusive);
482 
483   State s{{}, *macros_, {}};
484   s.stack.emplace_back(Frame::Root(), Frame::kIgnore, &s, std::move(stmt));
485   for (;;) {
486     auto* frame = &s.stack.back();
487     auto& tk = frame->tokenizer;
488     SqliteTokenizer::Token t = tk.NextNonWhitespace();
489     int token_type;
490     if (t.str.empty()) {
491       token_type = frame->seen_semicolon ? 0 : PPTK_SEMI;
492       frame->seen_semicolon = true;
493     } else if (t.token_type == TK_SEMI) {
494       token_type = PPTK_SEMI;
495       frame->seen_semicolon = true;
496     } else if (t.token_type == TK_ILLEGAL) {
497       if (t.str.size() == 1 && t.str[0] == '!') {
498         token_type = PPTK_EXCLAIM;
499       } else {
500         status_ = ErrorAtToken(tk, t, "illegal token");
501         return false;
502       }
503     } else if (t.token_type == TK_ID) {
504       token_type = PPTK_ID;
505     } else if (t.token_type == TK_LP) {
506       token_type = PPTK_LP;
507     } else if (t.token_type == TK_RP) {
508       token_type = PPTK_RP;
509     } else if (t.token_type == TK_COMMA) {
510       token_type = PPTK_COMMA;
511     } else if (t.token_type == TK_VARIABLE) {
512       token_type = PPTK_VARIABLE;
513     } else {
514       token_type = PPTK_OPAQUE;
515     }
516     frame->preprocessor.Parse(
517         token_type,
518         PreprocessorGrammarToken{t.str.data(), t.str.size(), token_type});
519     if (s.error) {
520       status_ = ErrorAtToken(tk, s.error->token, s.error->message.c_str());
521       return false;
522     }
523     if (token_type == 0) {
524       if (s.stack.size() == 1) {
525         statement_ = std::move(frame->rewriter).Build();
526         return true;
527       }
528       s.stack.pop_back();
529       frame = &s.stack.back();
530     }
531   }
532 }
533 
OnPreprocessorCreateApplyList()534 extern "C" PreprocessorGrammarApplyList* OnPreprocessorCreateApplyList() {
535   return std::make_unique<PreprocessorGrammarApplyList>().release();
536 }
537 
OnPreprocessorAppendApplyList(PreprocessorGrammarApplyList * list,PreprocessorGrammarTokenBounds * bounds)538 extern "C" PreprocessorGrammarApplyList* OnPreprocessorAppendApplyList(
539     PreprocessorGrammarApplyList* list,
540     PreprocessorGrammarTokenBounds* bounds) {
541   list->args.push_back(*bounds);
542   return list;
543 }
544 
OnPreprocessorFreeApplyList(PreprocessorGrammarState *,PreprocessorGrammarApplyList * list)545 extern "C" void OnPreprocessorFreeApplyList(
546     PreprocessorGrammarState*,
547     PreprocessorGrammarApplyList* list) {
548   std::unique_ptr<PreprocessorGrammarApplyList> l(list);
549 }
550 
551 }  // namespace perfetto::trace_processor
552