1 /*
2 * Copyright (C) 2023 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/trace_processor/perfetto_sql/engine/perfetto_sql_parser.h"
18
19 #include <algorithm>
20 #include <cctype>
21 #include <functional>
22 #include <optional>
23 #include <string>
24 #include <utility>
25 #include <vector>
26
27 #include "perfetto/base/logging.h"
28 #include "perfetto/base/status.h"
29 #include "perfetto/ext/base/flat_hash_map.h"
30 #include "perfetto/ext/base/string_utils.h"
31 #include "src/trace_processor/perfetto_sql/engine/perfetto_sql_preprocessor.h"
32 #include "src/trace_processor/sqlite/sql_source.h"
33 #include "src/trace_processor/sqlite/sqlite_tokenizer.h"
34
35 namespace perfetto {
36 namespace trace_processor {
37 namespace {
38
39 using Token = SqliteTokenizer::Token;
40 using Statement = PerfettoSqlParser::Statement;
41
42 enum class State {
43 kStmtStart,
44 kCreate,
45 kInclude,
46 kIncludePerfetto,
47 kCreateOr,
48 kCreateOrReplace,
49 kCreateOrReplacePerfetto,
50 kCreatePerfetto,
51 kPassthrough,
52 };
53
KeywordEqual(std::string_view expected,std::string_view actual)54 bool KeywordEqual(std::string_view expected, std::string_view actual) {
55 PERFETTO_DCHECK(std::all_of(expected.begin(), expected.end(), islower));
56 return std::equal(expected.begin(), expected.end(), actual.begin(),
57 actual.end(),
58 [](char a, char b) { return a == tolower(b); });
59 }
60
TokenIsSqliteKeyword(std::string_view keyword,SqliteTokenizer::Token t)61 bool TokenIsSqliteKeyword(std::string_view keyword, SqliteTokenizer::Token t) {
62 return t.token_type == SqliteTokenType::TK_GENERIC_KEYWORD &&
63 KeywordEqual(keyword, t.str);
64 }
65
TokenIsCustomKeyword(std::string_view keyword,SqliteTokenizer::Token t)66 bool TokenIsCustomKeyword(std::string_view keyword, SqliteTokenizer::Token t) {
67 return t.token_type == SqliteTokenType::TK_ID && KeywordEqual(keyword, t.str);
68 }
69
IsValidModuleWord(const std::string & word)70 bool IsValidModuleWord(const std::string& word) {
71 for (const char& c : word) {
72 if (!std::isalnum(c) && (c != '_') && !std::islower(c)) {
73 return false;
74 }
75 }
76 return true;
77 }
78
ValidateModuleName(const std::string & name)79 bool ValidateModuleName(const std::string& name) {
80 if (name.empty()) {
81 return false;
82 }
83
84 std::vector<std::string> packages = base::SplitString(name, ".");
85
86 // The last part of the path can be a wildcard.
87 if (!packages.empty() && packages.back() == "*") {
88 packages.pop_back();
89 }
90
91 // The rest of the path must be valid words.
92 return std::find_if(packages.begin(), packages.end(),
93 std::not_fn(IsValidModuleWord)) == packages.end();
94 }
95
96 } // namespace
97
PerfettoSqlParser(SqlSource source,const base::FlatHashMap<std::string,PerfettoSqlPreprocessor::Macro> & macros)98 PerfettoSqlParser::PerfettoSqlParser(
99 SqlSource source,
100 const base::FlatHashMap<std::string, PerfettoSqlPreprocessor::Macro>&
101 macros)
102 : preprocessor_(std::move(source), macros),
103 tokenizer_(SqlSource::FromTraceProcessorImplementation("")) {}
104
Next()105 bool PerfettoSqlParser::Next() {
106 PERFETTO_CHECK(status_.ok());
107
108 if (!preprocessor_.NextStatement()) {
109 status_ = preprocessor_.status();
110 return false;
111 }
112 tokenizer_.Reset(preprocessor_.statement());
113
114 State state = State::kStmtStart;
115 std::optional<Token> first_non_space_token;
116 for (Token token = tokenizer_.Next();; token = tokenizer_.Next()) {
117 // Space should always be completely ignored by any logic below as it will
118 // never change the current state in the state machine.
119 if (token.token_type == SqliteTokenType::TK_SPACE) {
120 continue;
121 }
122
123 if (token.IsTerminal()) {
124 // If we have a non-space character we've seen, just return all the stuff
125 // after that point.
126 if (first_non_space_token) {
127 statement_ = SqliteSql{};
128 statement_sql_ = tokenizer_.Substr(*first_non_space_token, token);
129 return true;
130 }
131 // This means we've seen a semi-colon without any non-space content. Just
132 // try and find the next statement as this "statement" is a noop.
133 if (token.token_type == SqliteTokenType::TK_SEMI) {
134 continue;
135 }
136 // This means we've reached the end of the SQL.
137 PERFETTO_DCHECK(token.str.empty());
138 return false;
139 }
140
141 // If we've not seen a space character, keep track of the current position.
142 if (!first_non_space_token) {
143 first_non_space_token = token;
144 }
145
146 switch (state) {
147 case State::kPassthrough:
148 statement_ = SqliteSql{};
149 statement_sql_ = preprocessor_.statement();
150 return true;
151 case State::kStmtStart:
152 if (TokenIsSqliteKeyword("create", token)) {
153 state = State::kCreate;
154 } else if (TokenIsCustomKeyword("include", token)) {
155 state = State::kInclude;
156 } else {
157 state = State::kPassthrough;
158 }
159 break;
160 case State::kInclude:
161 if (TokenIsCustomKeyword("perfetto", token)) {
162 state = State::kIncludePerfetto;
163 } else {
164 return ErrorAtToken(token,
165 "Use 'INCLUDE PERFETTO MODULE {include_key}'.");
166 }
167 break;
168 case State::kIncludePerfetto:
169 if (TokenIsCustomKeyword("module", token)) {
170 return ParseIncludePerfettoModule(*first_non_space_token);
171 } else {
172 return ErrorAtToken(token,
173 "Use 'INCLUDE PERFETTO MODULE {include_key}'.");
174 }
175 case State::kCreate:
176 if (TokenIsSqliteKeyword("trigger", token)) {
177 // TODO(lalitm): add this to the "errors" documentation page
178 // explaining why this is the case.
179 return ErrorAtToken(
180 token, "Creating triggers is not supported in PerfettoSQL.");
181 }
182 if (TokenIsCustomKeyword("perfetto", token)) {
183 state = State::kCreatePerfetto;
184 } else if (TokenIsSqliteKeyword("or", token)) {
185 state = State::kCreateOr;
186 } else {
187 state = State::kPassthrough;
188 }
189 break;
190 case State::kCreateOr:
191 state = TokenIsSqliteKeyword("replace", token) ? State::kCreateOrReplace
192 : State::kPassthrough;
193 break;
194 case State::kCreateOrReplace:
195 state = TokenIsCustomKeyword("perfetto", token)
196 ? State::kCreateOrReplacePerfetto
197 : State::kPassthrough;
198 break;
199 case State::kCreateOrReplacePerfetto:
200 case State::kCreatePerfetto:
201 bool replace = state == State::kCreateOrReplacePerfetto;
202 if (TokenIsCustomKeyword("function", token)) {
203 return ParseCreatePerfettoFunction(replace, *first_non_space_token);
204 }
205 if (TokenIsSqliteKeyword("table", token)) {
206 return ParseCreatePerfettoTableOrView(replace, *first_non_space_token,
207 TableOrView::kTable);
208 }
209 if (TokenIsSqliteKeyword("view", token)) {
210 return ParseCreatePerfettoTableOrView(replace, *first_non_space_token,
211 TableOrView::kView);
212 }
213 if (TokenIsCustomKeyword("macro", token)) {
214 return ParseCreatePerfettoMacro(replace);
215 }
216 base::StackString<1024> err(
217 "Expected 'FUNCTION', 'TABLE' or 'MACRO' after 'CREATE PERFETTO', "
218 "received '%*s'.",
219 static_cast<int>(token.str.size()), token.str.data());
220 return ErrorAtToken(token, err.c_str());
221 }
222 }
223 }
224
ParseIncludePerfettoModule(Token first_non_space_token)225 bool PerfettoSqlParser::ParseIncludePerfettoModule(
226 Token first_non_space_token) {
227 auto tok = tokenizer_.NextNonWhitespace();
228 auto terminal = tokenizer_.NextTerminal();
229 std::string key = tokenizer_.Substr(tok, terminal).sql();
230
231 if (!ValidateModuleName(key)) {
232 base::StackString<1024> err(
233 "Include key should be a dot-separated list of module names, with the "
234 "last name optionally being a wildcard: '%s'",
235 key.c_str());
236 return ErrorAtToken(tok, err.c_str());
237 }
238
239 statement_ = Include{key};
240 statement_sql_ = tokenizer_.Substr(first_non_space_token, terminal);
241 return true;
242 }
243
ParseCreatePerfettoTableOrView(bool replace,Token first_non_space_token,TableOrView table_or_view)244 bool PerfettoSqlParser::ParseCreatePerfettoTableOrView(
245 bool replace,
246 Token first_non_space_token,
247 TableOrView table_or_view) {
248 Token table_name = tokenizer_.NextNonWhitespace();
249 if (table_name.token_type != SqliteTokenType::TK_ID) {
250 base::StackString<1024> err("Invalid table name %.*s",
251 static_cast<int>(table_name.str.size()),
252 table_name.str.data());
253 return ErrorAtToken(table_name, err.c_str());
254 }
255 std::string name(table_name.str);
256 std::vector<sql_argument::ArgumentDefinition> schema;
257
258 auto token = tokenizer_.NextNonWhitespace();
259
260 // If the next token is a left parenthesis, then the table or view have a
261 // schema.
262 if (token.token_type == SqliteTokenType::TK_LP) {
263 if (!ParseArguments(schema)) {
264 return false;
265 }
266 token = tokenizer_.NextNonWhitespace();
267 }
268
269 if (!TokenIsSqliteKeyword("as", token)) {
270 base::StackString<1024> err(
271 "Expected 'AS' after table_name, received "
272 "%*s.",
273 static_cast<int>(token.str.size()), token.str.data());
274 return ErrorAtToken(token, err.c_str());
275 }
276
277 Token first = tokenizer_.NextNonWhitespace();
278 Token terminal = tokenizer_.NextTerminal();
279 switch (table_or_view) {
280 case TableOrView::kTable:
281 statement_ = CreateTable{replace, std::move(name),
282 tokenizer_.Substr(first, terminal), schema};
283 break;
284 case TableOrView::kView:
285 SqlSource original_statement =
286 tokenizer_.Substr(first_non_space_token, terminal);
287 SqlSource header = SqlSource::FromTraceProcessorImplementation(
288 "CREATE VIEW " + name + " AS ");
289 SqlSource::Rewriter rewriter(original_statement);
290 tokenizer_.Rewrite(rewriter, first_non_space_token, first, header,
291 SqliteTokenizer::EndToken::kExclusive);
292 statement_ = CreateView{replace, std::move(name),
293 tokenizer_.Substr(first, terminal),
294 std::move(rewriter).Build(), schema};
295 break;
296 }
297 statement_sql_ = tokenizer_.Substr(first_non_space_token, terminal);
298 return true;
299 }
300
ParseCreatePerfettoFunction(bool replace,Token first_non_space_token)301 bool PerfettoSqlParser::ParseCreatePerfettoFunction(
302 bool replace,
303 Token first_non_space_token) {
304 Token function_name = tokenizer_.NextNonWhitespace();
305 if (function_name.token_type != SqliteTokenType::TK_ID) {
306 // TODO(lalitm): add a link to create function documentation.
307 base::StackString<1024> err("Invalid function name %.*s",
308 static_cast<int>(function_name.str.size()),
309 function_name.str.data());
310 return ErrorAtToken(function_name, err.c_str());
311 }
312
313 // TK_LP == '(' (i.e. left parenthesis).
314 if (Token lp = tokenizer_.NextNonWhitespace();
315 lp.token_type != SqliteTokenType::TK_LP) {
316 // TODO(lalitm): add a link to create function documentation.
317 return ErrorAtToken(lp, "Malformed function prototype: '(' expected");
318 }
319
320 std::vector<sql_argument::ArgumentDefinition> args;
321 if (!ParseArguments(args)) {
322 return false;
323 }
324
325 if (Token returns = tokenizer_.NextNonWhitespace();
326 !TokenIsCustomKeyword("returns", returns)) {
327 // TODO(lalitm): add a link to create function documentation.
328 return ErrorAtToken(returns, "Expected keyword 'returns'");
329 }
330
331 Token ret_token = tokenizer_.NextNonWhitespace();
332 std::string ret;
333 bool table_return = TokenIsSqliteKeyword("table", ret_token);
334 if (table_return) {
335 if (Token lp = tokenizer_.NextNonWhitespace();
336 lp.token_type != SqliteTokenType::TK_LP) {
337 // TODO(lalitm): add a link to create function documentation.
338 return ErrorAtToken(lp, "Malformed table return: '(' expected");
339 }
340 // Table function return.
341 std::vector<sql_argument::ArgumentDefinition> ret_args;
342 if (!ParseArguments(ret_args)) {
343 return false;
344 }
345 ret = sql_argument::SerializeArguments(ret_args);
346 } else if (ret_token.token_type != SqliteTokenType::TK_ID) {
347 // TODO(lalitm): add a link to create function documentation.
348 return ErrorAtToken(ret_token, "Invalid return type");
349 } else {
350 // Scalar function return.
351 ret = ret_token.str;
352 }
353
354 if (Token as_token = tokenizer_.NextNonWhitespace();
355 !TokenIsSqliteKeyword("as", as_token)) {
356 // TODO(lalitm): add a link to create function documentation.
357 return ErrorAtToken(as_token, "Expected keyword 'as'");
358 }
359
360 Token first = tokenizer_.NextNonWhitespace();
361 Token terminal = tokenizer_.NextTerminal();
362 statement_ = CreateFunction{
363 replace,
364 FunctionPrototype{std::string(function_name.str), std::move(args)},
365 std::move(ret), tokenizer_.Substr(first, terminal), table_return};
366 statement_sql_ = tokenizer_.Substr(first_non_space_token, terminal);
367 return true;
368 }
369
ParseCreatePerfettoMacro(bool replace)370 bool PerfettoSqlParser::ParseCreatePerfettoMacro(bool replace) {
371 Token name = tokenizer_.NextNonWhitespace();
372 if (name.token_type != SqliteTokenType::TK_ID) {
373 // TODO(lalitm): add a link to create macro documentation.
374 base::StackString<1024> err("Invalid macro name %.*s",
375 static_cast<int>(name.str.size()),
376 name.str.data());
377 return ErrorAtToken(name, err.c_str());
378 }
379
380 // TK_LP == '(' (i.e. left parenthesis).
381 if (Token lp = tokenizer_.NextNonWhitespace();
382 lp.token_type != SqliteTokenType::TK_LP) {
383 // TODO(lalitm): add a link to create macro documentation.
384 return ErrorAtToken(lp, "Malformed macro prototype: '(' expected");
385 }
386
387 std::vector<RawArgument> raw_args;
388 std::vector<std::pair<SqlSource, SqlSource>> args;
389 if (!ParseRawArguments(raw_args)) {
390 return false;
391 }
392 for (const auto& arg : raw_args) {
393 args.emplace_back(tokenizer_.SubstrToken(arg.name),
394 tokenizer_.SubstrToken(arg.type));
395 }
396
397 if (Token returns = tokenizer_.NextNonWhitespace();
398 !TokenIsCustomKeyword("returns", returns)) {
399 // TODO(lalitm): add a link to create macro documentation.
400 return ErrorAtToken(returns, "Expected keyword 'returns'");
401 }
402
403 Token returns_value = tokenizer_.NextNonWhitespace();
404 if (returns_value.token_type != SqliteTokenType::TK_ID) {
405 // TODO(lalitm): add a link to create function documentation.
406 return ErrorAtToken(returns_value, "Expected return type");
407 }
408
409 if (Token as_token = tokenizer_.NextNonWhitespace();
410 !TokenIsSqliteKeyword("as", as_token)) {
411 // TODO(lalitm): add a link to create macro documentation.
412 return ErrorAtToken(as_token, "Expected keyword 'as'");
413 }
414
415 Token first = tokenizer_.NextNonWhitespace();
416 Token tok = tokenizer_.NextTerminal();
417 statement_ = CreateMacro{
418 replace, tokenizer_.SubstrToken(name), std::move(args),
419 tokenizer_.SubstrToken(returns_value), tokenizer_.Substr(first, tok)};
420 return true;
421 }
422
ParseRawArguments(std::vector<RawArgument> & args)423 bool PerfettoSqlParser::ParseRawArguments(std::vector<RawArgument>& args) {
424 enum TokenType {
425 kIdOrRp,
426 kId,
427 kType,
428 kCommaOrRp,
429 };
430
431 std::optional<Token> id = std::nullopt;
432 TokenType expected = kIdOrRp;
433 for (Token tok = tokenizer_.NextNonWhitespace();;
434 tok = tokenizer_.NextNonWhitespace()) {
435 // Keywords can be used as names accidentally so have an explicit error
436 // message for those.
437 if (tok.token_type == SqliteTokenType::TK_GENERIC_KEYWORD) {
438 base::StackString<1024> err(
439 "Malformed function/macro prototype: %.*s is a SQL keyword so "
440 "cannot appear in a prototype",
441 static_cast<int>(tok.str.size()), tok.str.data());
442 return ErrorAtToken(tok, err.c_str());
443 }
444 if (expected == kCommaOrRp) {
445 PERFETTO_CHECK(expected == kCommaOrRp);
446 if (tok.token_type == SqliteTokenType::TK_RP) {
447 return true;
448 }
449 if (tok.token_type == SqliteTokenType::TK_COMMA) {
450 expected = kId;
451 continue;
452 }
453 return ErrorAtToken(tok, "')' or ',' expected");
454 }
455 if (expected == kType) {
456 if (tok.token_type != SqliteTokenType::TK_ID) {
457 // TODO(lalitm): add a link to documentation.
458 base::StackString<1024> err("%.*s is not a valid argument type",
459 static_cast<int>(tok.str.size()),
460 tok.str.data());
461 return ErrorAtToken(tok, err.c_str());
462 }
463 PERFETTO_CHECK(id);
464 args.push_back({*id, tok});
465 id = std::nullopt;
466 expected = kCommaOrRp;
467 continue;
468 }
469
470 // kIdOrRp only happens on the very first token.
471 if (tok.token_type == SqliteTokenType::TK_RP && expected == kIdOrRp) {
472 return true;
473 }
474
475 if (tok.token_type != SqliteTokenType::TK_ID) {
476 // TODO(lalitm): add a link to documentation.
477 base::StackString<1024> err("%.*s is not a valid argument name",
478 static_cast<int>(tok.str.size()),
479 tok.str.data());
480 return ErrorAtToken(tok, err.c_str());
481 }
482 id = tok;
483 expected = kType;
484 continue;
485 }
486 }
487
ParseArguments(std::vector<sql_argument::ArgumentDefinition> & args)488 bool PerfettoSqlParser::ParseArguments(
489 std::vector<sql_argument::ArgumentDefinition>& args) {
490 std::vector<RawArgument> raw_args;
491 if (!ParseRawArguments(raw_args)) {
492 return false;
493 }
494 for (const auto& raw_arg : raw_args) {
495 std::optional<sql_argument::ArgumentDefinition> arg =
496 ResolveRawArgument(raw_arg);
497 if (!arg) {
498 return false;
499 }
500 args.emplace_back(std::move(*arg));
501 }
502 return true;
503 }
504
505 std::optional<sql_argument::ArgumentDefinition>
ResolveRawArgument(RawArgument arg)506 PerfettoSqlParser::ResolveRawArgument(RawArgument arg) {
507 std::string arg_name = tokenizer_.SubstrToken(arg.name).sql();
508 std::string arg_type = tokenizer_.SubstrToken(arg.type).sql();
509 if (!sql_argument::IsValidName(base::StringView(arg_name))) {
510 base::StackString<1024> err("Name %s is not alphanumeric",
511 arg_name.c_str());
512 ErrorAtToken(arg.name, err.c_str());
513 return std::nullopt;
514 }
515 std::optional<sql_argument::Type> parsed_arg_type =
516 sql_argument::ParseType(base::StringView(arg_type));
517 if (!parsed_arg_type) {
518 base::StackString<1024> err("Invalid type %s", arg_type.c_str());
519 ErrorAtToken(arg.type, err.c_str());
520 return std::nullopt;
521 }
522 return sql_argument::ArgumentDefinition("$" + arg_name, *parsed_arg_type);
523 }
524
ErrorAtToken(const SqliteTokenizer::Token & token,const char * error,...)525 bool PerfettoSqlParser::ErrorAtToken(const SqliteTokenizer::Token& token,
526 const char* error,
527 ...) {
528 std::string traceback = tokenizer_.AsTraceback(token);
529 status_ = base::ErrStatus("%s%s", traceback.c_str(), error);
530 return false;
531 }
532
533 } // namespace trace_processor
534 } // namespace perfetto
535