1 /* 2 * Copyright (C) 2023 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef SRC_TRACE_PROCESSOR_SQLITE_SQLITE_TOKENIZER_H_ 18 #define SRC_TRACE_PROCESSOR_SQLITE_SQLITE_TOKENIZER_H_ 19 20 #include <optional> 21 #include <string_view> 22 #include "src/trace_processor/sqlite/sql_source.h" 23 24 namespace perfetto { 25 namespace trace_processor { 26 27 // List of token types returnable by |SqliteTokenizer| 28 // 1:1 matches the defintions in SQLite. 29 enum class SqliteTokenType : uint32_t { 30 TK_SEMI = 1, 31 TK_LP = 22, 32 TK_RP = 23, 33 TK_COMMA = 25, 34 TK_NE = 52, 35 TK_EQ = 53, 36 TK_GT = 54, 37 TK_LE = 55, 38 TK_LT = 56, 39 TK_GE = 57, 40 TK_ID = 59, 41 TK_BITAND = 102, 42 TK_BITOR = 103, 43 TK_LSHIFT = 104, 44 TK_RSHIFT = 105, 45 TK_PLUS = 106, 46 TK_MINUS = 107, 47 TK_STAR = 108, 48 TK_SLASH = 109, 49 TK_REM = 110, 50 TK_CONCAT = 111, 51 TK_PTR = 112, 52 TK_BITNOT = 114, 53 TK_STRING = 117, 54 TK_DOT = 141, 55 TK_FLOAT = 153, 56 TK_BLOB = 154, 57 TK_INTEGER = 155, 58 TK_VARIABLE = 156, 59 TK_SPACE = 183, 60 TK_ILLEGAL = 184, 61 62 // Generic constant which replaces all the keywords in SQLite as we do not 63 // care about the distinguishing between the vast majority of them. 64 TK_GENERIC_KEYWORD = 1000, 65 }; 66 67 // Tokenizes SQL statements according to SQLite SQL language specification: 68 // https://www2.sqlite.org/hlr40000.html 69 // 70 // Usage of this class: 71 // SqliteTokenizer tzr(std::move(my_sql_source)); 72 // for (auto t = tzr.Next(); t.token_type != TK_SEMI; t = tzr.Next()) { 73 // // Handle t here 74 // } 75 class SqliteTokenizer { 76 public: 77 // A single SQL token according to the SQLite standard. 78 struct Token { 79 // The string contents of the token. 80 std::string_view str; 81 82 // The type of the token. 83 SqliteTokenType token_type = SqliteTokenType::TK_ILLEGAL; 84 85 bool operator==(const Token& o) const { 86 return str == o.str && token_type == o.token_type; 87 } 88 89 // Returns if the token is empty or semicolon. IsTerminalToken90 bool IsTerminal() { 91 return token_type == SqliteTokenType::TK_SEMI || str.empty(); 92 } 93 }; 94 95 enum class EndToken { 96 kExclusive, 97 kInclusive, 98 }; 99 100 // Creates a tokenizer which tokenizes |sql|. 101 explicit SqliteTokenizer(SqlSource sql); 102 103 // Returns the next SQL token. 104 Token Next(); 105 106 // Returns the next SQL token which is not of type TK_SPACE. 107 Token NextNonWhitespace(); 108 109 // Returns the next SQL token which is terminal. 110 Token NextTerminal(); 111 112 // Returns an SqlSource containing all the tokens between |start| and |end|. 113 // 114 // Note: |start| and |end| must both have been previously returned by this 115 // tokenizer. 116 SqlSource Substr(const Token& start, const Token& end) const; 117 118 // Returns an SqlSource containing only the SQL backing |token|. 119 // 120 // Note: |token| must have been previously returned by this tokenizer. 121 SqlSource SubstrToken(const Token& token) const; 122 123 // Returns a traceback error message for the SqlSource backing this tokenizer 124 // pointing to |token|. See SqlSource::AsTraceback for more information about 125 // this method. 126 // 127 // Note: |token| must have been previously returned by this tokenizer. 128 std::string AsTraceback(const Token&) const; 129 130 // Replaces the SQL in |rewriter| between |start| and |end| with the contents 131 // of |rewrite|. If |end_token| == kInclusive, the end token is also included 132 // in the rewrite. 133 void Rewrite(SqlSource::Rewriter& rewriter, 134 const Token& start, 135 const Token& end, 136 SqlSource rewrite, 137 EndToken end_token = EndToken::kExclusive) const; 138 139 // Replaces the SQL in |rewriter| backing |token| with the contents of 140 // |rewrite|. 141 void RewriteToken(SqlSource::Rewriter&, 142 const Token&, 143 SqlSource rewrite) const; 144 145 // Resets this tokenizer to tokenize |source|. Any previous returned tokens 146 // are invalidated. Reset(SqlSource source)147 void Reset(SqlSource source) { 148 source_ = std::move(source); 149 offset_ = 0; 150 } 151 152 private: 153 SqliteTokenizer(const SqliteTokenizer&) = delete; 154 SqliteTokenizer& operator=(const SqliteTokenizer&) = delete; 155 156 SqliteTokenizer(SqliteTokenizer&&) = delete; 157 SqliteTokenizer& operator=(SqliteTokenizer&&) = delete; 158 159 SqlSource source_; 160 uint32_t offset_ = 0; 161 }; 162 163 } // namespace trace_processor 164 } // namespace perfetto 165 166 #endif // SRC_TRACE_PROCESSOR_SQLITE_SQLITE_TOKENIZER_H_ 167