• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "parser/lexer.h"
17 
18 #include <utility>
19 #include "util/string_builder.h"
20 
21 namespace OHOS {
22 namespace Idl {
Lexer()23 Lexer::Lexer()
24 {
25     InitializeKeywords();
26 }
27 
~Lexer()28 Lexer::~Lexer()
29 {
30     if (currentFile_ != nullptr) {
31         currentFile_->Close();
32     }
33 }
34 
InitializeKeywords()35 void Lexer::InitializeKeywords()
36 {
37     keywords_[String("boolean")] = Token::BOOLEAN;
38     keywords_[String("byte")] = Token::BYTE;
39     keywords_[String("char")] = Token::CHAR;
40     keywords_[String("double")] = Token::DOUBLE;
41     keywords_[String("float")] = Token::FLOAT;
42     keywords_[String("in")] = Token::IN;
43     keywords_[String("inout")] = Token::INOUT;
44     keywords_[String("int")] = Token::INTEGER;
45     keywords_[String("interface")] = Token::INTERFACE;
46     keywords_[String("List")] = Token::LIST;
47     keywords_[String("long")] = Token::LONG;
48     keywords_[String("Map")] = Token::MAP;
49     keywords_[String("oneway")] = Token::ONEWAY;
50     keywords_[String("out")] = Token::OUT;
51     keywords_[String("sequenceable")] = Token::SEQUENCEABLE;
52     keywords_[String("short")] = Token::SHORT;
53     keywords_[String("String")] = Token::STRING;
54 }
55 
OpenSourceFile(const String & filePath)56 bool Lexer::OpenSourceFile(const String& filePath)
57 {
58     currentFile_ = std::make_shared<File>(filePath, File::READ);
59     if (!currentFile_->IsValid()) {
60         return false;
61     }
62 
63     return true;
64 }
65 
GetToken(bool skipComment)66 Token Lexer::GetToken(bool skipComment)
67 {
68     if (!havePeek_) {
69         currentToken_ = ReadToken(skipComment);
70     }
71     havePeek_ = false;
72     return currentToken_;
73 }
74 
PeekToken(bool skipComment)75 Token Lexer::PeekToken(bool skipComment)
76 {
77     if (!havePeek_) {
78         currentToken_ = ReadToken(skipComment);
79         havePeek_ = true;
80     }
81     return currentToken_;
82 }
83 
ReadToken(bool skipComment)84 Token Lexer::ReadToken(bool skipComment)
85 {
86     while (!currentFile_->IsEof()) {
87         char c = currentFile_->GetChar();
88         tokenLineNo_ = currentFile_->GetCharLineNumber();
89         tokenColumnNo_ = currentFile_->GetCharColumnNumber();
90         if (IsSpace(c)) {
91             continue;
92         } else if (IsAlphabet(c) || c == '_') {
93             return ReadIdentifier(c);
94         }
95         switch (c) {
96             case '<':
97                 currentToken_ = Token::ANGLE_BRACKETS_LEFT;
98                 return currentToken_;
99             case '>':
100                 currentToken_ = Token::ANGLE_BRACKETS_RIGHT;
101                 return currentToken_;
102             case '{':
103                 currentToken_ = Token::BRACES_LEFT;
104                 return currentToken_;
105             case '}':
106                 currentToken_ = Token::BRACES_RIGHT;
107                 return currentToken_;
108             case '[':
109                 currentToken_ = Token::BRACKETS_LEFT;
110                 return currentToken_;
111             case ']':
112                 currentToken_ = Token::BRACKETS_RIGHT;
113                 return currentToken_;
114             case ',':
115                 currentToken_ = Token::COMMA;
116                 return currentToken_;
117             case '/':
118                 if (currentFile_->PeekChar() == '/') {
119                     ReadLineComment(c);
120                     if (!skipComment) {
121                         return currentToken_;
122                     }
123                     continue;
124                 } else if (currentFile_->PeekChar() == '*') {
125                     ReadBlockComment(c);
126                     if (!skipComment) {
127                         return currentToken_;
128                     }
129                     continue;
130                 }
131                 currentToken_ = Token::UNKNOWN;
132                 return currentToken_;
133             case '(':
134                 currentToken_ = Token::PARENTHESES_LEFT;
135                 return currentToken_;
136             case ')':
137                 currentToken_ = Token::PARENTHESES_RIGHT;
138                 return currentToken_;
139             case '.':
140                 currentToken_ = Token::DOT;
141                 return currentToken_;
142             case ';':
143                 currentToken_ = Token::SEMICOLON;
144                 return currentToken_;
145             default:
146                 currentToken_ = Token::UNKNOWN;
147                 return currentToken_;
148         }
149     }
150     currentToken_ = Token::END_OF_FILE;
151     return currentToken_;
152 }
153 
ReadIdentifier(char c)154 Token Lexer::ReadIdentifier(char c)
155 {
156     StringBuilder sb;
157 
158     sb.Append(c);
159     while (!currentFile_->IsEof()) {
160         c = currentFile_->PeekChar();
161         if (IsAlphabet(c) || c == '_' || IsDecimalDigital(c) || c == '.') {
162             c = currentFile_->GetChar();
163             sb.Append(c);
164             continue;
165         }
166         if (IsSpace(c)) {
167             currentFile_->GetChar();
168         }
169         break;
170     }
171     String key = sb.ToString();
172     auto it = keywords_.find(key);
173     if (it == keywords_.end()) {
174         identifier_ = key;
175         currentToken_ = Token::IDENTIFIER;
176     } else {
177         currentToken_ = it->second;
178     }
179     return currentToken_;
180 }
181 
ReadLineComment(char c)182 Token Lexer::ReadLineComment(char c)
183 {
184     StringBuilder sb;
185 
186     sb.Append(c);
187     while (!currentFile_->IsEof()) {
188         c = currentFile_->GetChar();
189         if (c == '\n') {
190             break;
191         }
192         sb.Append(c);
193     }
194     comment_ = sb.ToString();
195     currentToken_ = Token::COMMENT_LINE;
196     return currentToken_;
197 }
198 
ReadBlockComment(char c)199 Token Lexer::ReadBlockComment(char c)
200 {
201     StringBuilder sb;
202 
203     sb.Append(c);
204     while (!currentFile_->IsEof()) {
205         c = currentFile_->GetChar();
206         sb.Append(c);
207         if (c == '*' && currentFile_->PeekChar() == '/') {
208             c = currentFile_->GetChar();
209             sb.Append(c);
210             break;
211         }
212     }
213     comment_ = sb.ToString();
214     currentToken_ = Token::COMMENT_BLOCK;
215     return currentToken_;
216 }
217 
SkipCurrentLine()218 void Lexer::SkipCurrentLine()
219 {
220     while (!currentFile_->IsEof()) {
221         char c = currentFile_->GetChar();
222         if (c == '\n') {
223             currentFile_->GetChar();
224             return;
225         }
226     }
227 }
228 
SkipCurrentLine(char untilChar)229 bool Lexer::SkipCurrentLine(char untilChar)
230 {
231     while (!currentFile_->IsEof()) {
232         int c = currentFile_->GetChar();
233         if (c == untilChar) {
234             return true;
235         }
236         if (c == '\n') {
237             currentFile_->GetChar();
238             return false;
239         }
240     }
241     return true;
242 }
243 
TokenToChar(Token token)244 int Lexer::TokenToChar(Token token)
245 {
246     switch (token) {
247         case Token::ANGLE_BRACKETS_LEFT:
248             return '<';
249         case Token::ANGLE_BRACKETS_RIGHT:
250             return '>';
251         case Token::BRACES_LEFT:
252             return '{';
253         case Token::BRACES_RIGHT:
254             return '}';
255         case Token::BRACKETS_LEFT:
256             return '[';
257         case Token::BRACKETS_RIGHT:
258             return ']';
259         case Token::COMMA:
260             return ',';
261         case Token::DOT:
262             return '.';
263         case Token::PARENTHESES_LEFT:
264             return '(';
265         case Token::PARENTHESES_RIGHT:
266             return ')';
267         case Token::SEMICOLON:
268             return ';';
269         default:
270             return -1;
271     }
272 }
273 
DumpToken() const274 String Lexer::DumpToken() const
275 {
276     switch (currentToken_) {
277         case Token::ANGLE_BRACKETS_LEFT:
278             return "<";
279         case Token::ANGLE_BRACKETS_RIGHT:
280             return ">";
281         case Token::BOOLEAN:
282             return "boolean";
283         case Token::BRACES_LEFT:
284             return "{";
285         case Token::BRACES_RIGHT:
286             return "}";
287         case Token::BRACKETS_LEFT:
288             return "[";
289         case Token::BRACKETS_RIGHT:
290             return "]";
291         case Token::BYTE:
292             return "byte";
293         case Token::CHAR:
294             return "char";
295         case Token::COMMA:
296             return ",";
297         case Token::COMMENT_BLOCK:
298         case Token::COMMENT_LINE:
299             return comment_;
300         case Token::DOT:
301             return ".";
302         case Token::DOUBLE:
303             return "double";
304         case Token::END_OF_FILE:
305             return "eof";
306         case Token::FLOAT:
307             return "float";
308         case Token::IDENTIFIER:
309             return identifier_;
310         case Token::IN:
311             return "in";
312         case Token::INOUT:
313             return "inout";
314         case Token::INTEGER:
315             return "int";
316         case Token::LIST:
317             return "List";
318         case Token::LONG:
319             return "long";
320         case Token::MAP:
321             return "Map";
322         case Token::ONEWAY:
323             return "oneway";
324         case Token::OUT:
325             return "out";
326         case Token::SEQUENCEABLE:
327             return "sequenceable";
328         case Token::PARENTHESES_LEFT:
329             return "(";
330         case Token::PARENTHESES_RIGHT:
331             return ")";
332         case Token::SEMICOLON:
333             return ";";
334         case Token::SHORT:
335             return "short";
336         case Token::STRING:
337             return "String";
338         default:
339             return "unknown token";
340     }
341 }
342 } // namespace idl
343 } // namespace OHOS
344