• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021 Huawei Device Co., Ltd.
3  *
4  * HDF is dual licensed: you can use it either under the terms of
5  * the GPL, or the BSD license, at your option.
6  * See the LICENSE file in the root of this repository for complete details.
7  */
8 
9 #include "parser/lexer.h"
10 #include "util/string_builder.h"
11 
12 namespace OHOS {
13 namespace HDI {
Lexer()14 Lexer::Lexer()
15 {
16     InitializeKeywords();
17 }
18 
~Lexer()19 Lexer::~Lexer()
20 {
21     if (currentFile_ != nullptr) {
22         currentFile_->Close();
23     }
24 }
25 
InitializeKeywords()26 void Lexer::InitializeKeywords()
27 {
28     keywords_ = {
29         { "boolean", Token::BOOLEAN }, { "byte", Token::BYTE }, { "callback", Token::CALLBACK },
30         { "char", Token::CHAR}, { "double", Token::DOUBLE }, { "enum", Token::ENUM },
31         { "extends", Token::EXTENDS }, { "float", Token::FLOAT }, { "full", Token::FULL },
32         { "import", Token::IMPORT }, { "in", Token::IN }, { "int", Token::INTEGER }, { "interface", Token::INTERFACE },
33         { "List", Token::LIST }, { "lite", Token::LITE }, { "long", Token::LONG },
34         { "Map", Token::MAP }, { "oneway", Token::ONEWAY }, { "out", Token::OUT },
35         { "package", Token::PACKAGE}, { "sequenceable", Token::SEQUENCEABLE }, { "short", Token::SHORT },
36         { "String", Token::STRING }, { "struct", Token::STRUCT }, { "union", Token::UNION },
37         { "unsigned", Token::UNSIGNED }, { "FileDescriptor", Token::FILEDESCRIPTOR },
38         {"SharedMemQueue", Token::SMEMQUEUE},
39     };
40 
41     delimiters_ = {
42         {'<', Token::ANGLE_BRACKETS_LEFT}, {'>', Token::ANGLE_BRACKETS_RIGHT},
43         {'{', Token::BRACES_LEFT}, {'}', Token::BRACES_RIGHT},
44         {'[', Token::BRACKETS_LEFT}, {']', Token::BRACKETS_RIGHT},
45         {',', Token::COMMA}, {'(', Token::PARENTHESES_LEFT},
46         {')', Token::PARENTHESES_RIGHT}, {'.', Token::DOT},
47         {':', Token::COLON}, {';', Token::SEMICOLON},
48         {'=', Token::ASSIGN}
49     };
50 
51     tokenDumps_ = {
52         { Token::ANGLE_BRACKETS_LEFT, "<" }, { Token::ANGLE_BRACKETS_RIGHT, ">"}, { Token::BOOLEAN, "boolean"},
53         { Token::BRACES_LEFT, "{"}, { Token::BRACES_RIGHT, "}"}, { Token::BRACKETS_LEFT, "["},
54         { Token::BRACKETS_RIGHT, "]"}, { Token::BYTE, "byte"}, { Token::CALLBACK, "callback"},
55         { Token::CHAR, "char"}, { Token::COLON, ":"}, { Token::COMMA, ","},
56         { Token::DOT, "."}, { Token::DOUBLE, "double"}, { Token::END_OF_FILE, "eof"},
57         { Token::ENUM, "enum"}, { Token::EXTENDS, "extends"}, { Token::FLOAT, "float"},
58         { Token::FULL, "full"}, { Token::IMPORT, "import"}, { Token::IN, "in"},
59         { Token::INTEGER, "int"}, { Token::INTERFACE, "interface"}, { Token::LITE, "lite"},
60         { Token::LIST, "List"}, { Token::LONG, "long"}, { Token::MAP, "Map"},
61         { Token::ONEWAY, "oneway"}, { Token::OUT, "out"}, { Token::PACKAGE, "package"},
62         { Token::SEQUENCEABLE, "sequenceable"}, { Token::STRUCT, "struct"}, { Token::PARENTHESES_LEFT, "("},
63         { Token::PARENTHESES_RIGHT, ")"}, { Token::SEMICOLON, ";"}, { Token::SHORT, "short"},
64         { Token::STRING, "String"}, { Token::UNION, "union"}, { Token::UNSIGNED, "unsigned"},
65         { Token::FILEDESCRIPTOR, "FileDescriptor"}, {Token::SMEMQUEUE, "SharedMemQueue"},
66     };
67 }
68 
OpenSourceFile(const String & filePath)69 bool Lexer::OpenSourceFile(const String& filePath)
70 {
71     currentFile_ = std::make_unique<File>(filePath, int(File::READ));
72     if (!currentFile_->IsValid()) {
73         return false;
74     }
75 
76     return true;
77 }
78 
GetToken(bool skipComment)79 Token Lexer::GetToken(bool skipComment)
80 {
81     if (!havePeek_) {
82         currentToken_ = ReadToken(skipComment);
83     }
84     havePeek_ = false;
85     return currentToken_;
86 }
87 
PeekToken(bool skipComment)88 Token Lexer::PeekToken(bool skipComment)
89 {
90     if (!havePeek_) {
91         currentToken_ = ReadToken(skipComment);
92         havePeek_ = true;
93     }
94     return currentToken_;
95 }
96 
ReadToken(bool skipComment)97 Token Lexer::ReadToken(bool skipComment)
98 {
99     while (!currentFile_->IsEof()) {
100         char c = currentFile_->GetChar();
101         tokenLineNo_ = currentFile_->GetCharLineNumber();
102         tokenColumnNo_ = currentFile_->GetCharColumnNumber();
103         if (IsSpace(c)) {
104             continue;
105         } else if (IsAlphabet(c) || c == '_') {
106             Token t = ReadIdentifier(c);
107             return t;
108         } else if (IsDecimalDigital(c)) {
109             Token t = ReadDecimalDigital(c);
110             return t;
111         }
112 
113         auto iter = delimiters_.find(c);
114         if (iter != delimiters_.end()) {
115             return iter->second;
116         }
117 
118         if (c == '/') {
119             if (currentFile_->PeekChar() == '/') {
120                 ReadLineComment(c);
121             } else if (currentFile_->PeekChar() == '*') {
122                 ReadBlockComment(c);
123             } else {
124                 currentToken_ = Token::UNKNOWN;
125                 return currentToken_;
126             }
127 
128             if (skipComment) {
129                 continue;
130             }
131 
132             return currentToken_;
133         }
134 
135         currentToken_ = Token::UNKNOWN;
136         return currentToken_;
137     }
138     currentToken_ = Token::END_OF_FILE;
139     return currentToken_;
140 }
141 
ReadIdentifier(char c)142 Token Lexer::ReadIdentifier(char c)
143 {
144     StringBuilder sb;
145 
146     sb.Append(c);
147     while (!currentFile_->IsEof()) {
148         c = currentFile_->PeekChar();
149         if (IsAlphabet(c) || c == '_' || IsDecimalDigital(c) || c == '.') {
150             c = currentFile_->GetChar();
151             sb.Append(c);
152             continue;
153         }
154         if (IsSpace(c)) {
155             currentFile_->GetChar();
156         }
157         break;
158     }
159     String key = sb.ToString();
160     auto it = keywords_.find(key);
161     if (it == keywords_.end()) {
162         identifier_ = key;
163         currentToken_ = Token::IDENTIFIER;
164     } else {
165         currentToken_ = it->second;
166     }
167 
168     return currentToken_;
169 }
170 
ReadDecimalDigital(char c)171 Token Lexer::ReadDecimalDigital(char c)
172 {
173     StringBuilder sb;
174     sb.Append(c);
175     while (!currentFile_->IsEof()) {
176         c = currentFile_->PeekChar();
177         if (IsDecimalDigital(c)) {
178             c = currentFile_->GetChar();
179             sb.Append(c);
180             continue;
181         } else {
182             break;
183         }
184     }
185     digit_ = sb.ToString();
186     currentToken_ = Token::DIGIT;
187     return currentToken_;
188 }
189 
ReadLineComment(char c)190 Token Lexer::ReadLineComment(char c)
191 {
192     StringBuilder sb;
193 
194     sb.Append(c);
195     while (!currentFile_->IsEof()) {
196         c = currentFile_->GetChar();
197         if (c == '\n') {
198             break;
199         }
200         sb.Append(c);
201     }
202     comment_ = sb.ToString();
203     currentToken_ = Token::COMMENT_LINE;
204     return currentToken_;
205 }
206 
ReadBlockComment(char c)207 Token Lexer::ReadBlockComment(char c)
208 {
209     StringBuilder sb;
210 
211     sb.Append(c);
212     while (!currentFile_->IsEof()) {
213         c = currentFile_->GetChar();
214         sb.Append(c);
215         if (c == '*' && currentFile_->PeekChar() == '/') {
216             c = currentFile_->GetChar();
217             sb.Append(c);
218             break;
219         }
220     }
221     comment_ = sb.ToString();
222     currentToken_ = Token::COMMENT_BLOCK;
223     return currentToken_;
224 }
225 
SkipCurrentLine()226 void Lexer::SkipCurrentLine()
227 {
228     while (!currentFile_->IsEof()) {
229         char c = currentFile_->GetChar();
230         if (c == '\n') {
231             currentFile_->GetChar();
232             return;
233         }
234     }
235 }
236 
SkipCurrentLine(char untilChar)237 bool Lexer::SkipCurrentLine(char untilChar)
238 {
239     while (!currentFile_->IsEof()) {
240         int c = currentFile_->GetChar();
241         if (c == untilChar) {
242             return true;
243         }
244         if (c == '\n') {
245             currentFile_->GetChar();
246             return false;
247         }
248     }
249     return true;
250 }
251 
Skip(char untilChar)252 void Lexer::Skip(char untilChar)
253 {
254     while (!currentFile_->IsEof()) {
255         int c = currentFile_->GetChar();
256         if (c == untilChar) {
257             return;
258         }
259     }
260 }
261 
SkipEof()262 void Lexer::SkipEof()
263 {
264     while (!currentFile_->IsEof()) {}
265 }
266 
TokenToChar(Token token)267 int Lexer::TokenToChar(Token token)
268 {
269     switch (token) {
270         case Token::ANGLE_BRACKETS_LEFT:
271             return '<';
272         case Token::ANGLE_BRACKETS_RIGHT:
273             return '>';
274         case Token::BRACES_LEFT:
275             return '{';
276         case Token::BRACES_RIGHT:
277             return '}';
278         case Token::BRACKETS_LEFT:
279             return '[';
280         case Token::BRACKETS_RIGHT:
281             return ']';
282         case Token::COMMA:
283             return ',';
284         case Token::DOT:
285             return '.';
286         case Token::PARENTHESES_LEFT:
287             return '(';
288         case Token::PARENTHESES_RIGHT:
289             return ')';
290         case Token::COLON:
291             return ':';
292         case Token::SEMICOLON:
293             return ';';
294         default:
295             return -1;
296     }
297 }
298 
DumpToken() const299 String Lexer::DumpToken() const
300 {
301     auto iter = tokenDumps_.find(currentToken_);
302     if (iter != tokenDumps_.end()) {
303         return iter->second;
304     }
305 
306     if (currentToken_ == Token::COMMENT_BLOCK || currentToken_ == Token::COMMENT_LINE) {
307         return comment_;
308     }
309 
310     if (currentToken_ == Token::IDENTIFIER) {
311         return identifier_;
312     }
313 
314     return "unknown token";
315 }
316 } // namespace HDI
317 } // namespace OHOS