• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2023 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef SRC_TRACE_PROCESSOR_SQLITE_SQLITE_TOKENIZER_H_
18 #define SRC_TRACE_PROCESSOR_SQLITE_SQLITE_TOKENIZER_H_
19 
20 #include <optional>
21 #include <string_view>
22 #include "src/trace_processor/sqlite/sql_source.h"
23 
24 namespace perfetto {
25 namespace trace_processor {
26 
27 // List of token types returnable by |SqliteTokenizer|
28 // 1:1 matches the defintions in SQLite.
29 enum class SqliteTokenType : uint32_t {
30   TK_SEMI = 1,
31   TK_LP = 22,
32   TK_RP = 23,
33   TK_COMMA = 25,
34   TK_NE = 52,
35   TK_EQ = 53,
36   TK_GT = 54,
37   TK_LE = 55,
38   TK_LT = 56,
39   TK_GE = 57,
40   TK_ID = 59,
41   TK_BITAND = 102,
42   TK_BITOR = 103,
43   TK_LSHIFT = 104,
44   TK_RSHIFT = 105,
45   TK_PLUS = 106,
46   TK_MINUS = 107,
47   TK_STAR = 108,
48   TK_SLASH = 109,
49   TK_REM = 110,
50   TK_CONCAT = 111,
51   TK_PTR = 112,
52   TK_BITNOT = 114,
53   TK_STRING = 117,
54   TK_DOT = 141,
55   TK_FLOAT = 153,
56   TK_BLOB = 154,
57   TK_INTEGER = 155,
58   TK_VARIABLE = 156,
59   TK_SPACE = 183,
60   TK_ILLEGAL = 184,
61 
62   // Generic constant which replaces all the keywords in SQLite as we do not
63   // care about the distinguishing between the vast majority of them.
64   TK_GENERIC_KEYWORD = 1000,
65 };
66 
67 // Tokenizes SQL statements according to SQLite SQL language specification:
68 // https://www2.sqlite.org/hlr40000.html
69 //
70 // Usage of this class:
71 // SqliteTokenizer tzr(std::move(my_sql_source));
72 // for (auto t = tzr.Next(); t.token_type != TK_SEMI; t = tzr.Next()) {
73 //   // Handle t here
74 // }
75 class SqliteTokenizer {
76  public:
77   // A single SQL token according to the SQLite standard.
78   struct Token {
79     // The string contents of the token.
80     std::string_view str;
81 
82     // The type of the token.
83     SqliteTokenType token_type = SqliteTokenType::TK_ILLEGAL;
84 
85     bool operator==(const Token& o) const {
86       return str == o.str && token_type == o.token_type;
87     }
88 
89     // Returns if the token is empty or semicolon.
IsTerminalToken90     bool IsTerminal() {
91       return token_type == SqliteTokenType::TK_SEMI || str.empty();
92     }
93   };
94 
95   enum class EndToken {
96     kExclusive,
97     kInclusive,
98   };
99 
100   // Creates a tokenizer which tokenizes |sql|.
101   explicit SqliteTokenizer(SqlSource sql);
102 
103   // Returns the next SQL token.
104   Token Next();
105 
106   // Returns the next SQL token which is not of type TK_SPACE.
107   Token NextNonWhitespace();
108 
109   // Returns the next SQL token which is terminal.
110   Token NextTerminal();
111 
112   // Returns an SqlSource containing all the tokens between |start| and |end|.
113   //
114   // Note: |start| and |end| must both have been previously returned by this
115   // tokenizer.
116   SqlSource Substr(const Token& start, const Token& end) const;
117 
118   // Returns an SqlSource containing only the SQL backing |token|.
119   //
120   // Note: |token| must have been previously returned by this tokenizer.
121   SqlSource SubstrToken(const Token& token) const;
122 
123   // Returns a traceback error message for the SqlSource backing this tokenizer
124   // pointing to |token|. See SqlSource::AsTraceback for more information about
125   // this method.
126   //
127   // Note: |token| must have been previously returned by this tokenizer.
128   std::string AsTraceback(const Token&) const;
129 
130   // Replaces the SQL in |rewriter| between |start| and |end| with the contents
131   // of |rewrite|. If |end_token| == kInclusive, the end token is also included
132   // in the rewrite.
133   void Rewrite(SqlSource::Rewriter& rewriter,
134                const Token& start,
135                const Token& end,
136                SqlSource rewrite,
137                EndToken end_token = EndToken::kExclusive) const;
138 
139   // Replaces the SQL in |rewriter| backing |token| with the contents of
140   // |rewrite|.
141   void RewriteToken(SqlSource::Rewriter&,
142                     const Token&,
143                     SqlSource rewrite) const;
144 
145   // Resets this tokenizer to tokenize |source|. Any previous returned tokens
146   // are invalidated.
Reset(SqlSource source)147   void Reset(SqlSource source) {
148     source_ = std::move(source);
149     offset_ = 0;
150   }
151 
152  private:
153   SqliteTokenizer(const SqliteTokenizer&) = delete;
154   SqliteTokenizer& operator=(const SqliteTokenizer&) = delete;
155 
156   SqliteTokenizer(SqliteTokenizer&&) = delete;
157   SqliteTokenizer& operator=(SqliteTokenizer&&) = delete;
158 
159   SqlSource source_;
160   uint32_t offset_ = 0;
161 };
162 
163 }  // namespace trace_processor
164 }  // namespace perfetto
165 
166 #endif  // SRC_TRACE_PROCESSOR_SQLITE_SQLITE_TOKENIZER_H_
167