• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021 Huawei Device Co., Ltd.
3  *
4  * HDF is dual licensed: you can use it either under the terms of
5  * the GPL, or the BSD license, at your option.
6  * See the LICENSE file in the root of this repository for complete details.
7  */
8 
9 #include "lexer.h"
10 #include <sstream>
11 #include <string>
12 #include "logger.h"
13 
14 using namespace OHOS::Hardware;
15 
16 static constexpr int BINARY_NUM = 2;
17 static constexpr int OCTAL_NUM = 8;
18 static constexpr int DECIMAL_NUM = 10;
19 static constexpr int HEX_NUM = 16;
20 
Lexer()21 Lexer::Lexer() : lineno_(0), lineLoc_(0) {}
22 
23 std::map<std::string, TokenType> Lexer::keyWords_ = {
24     {"#include", INCLUDE },
25     {"root",     ROOT    },
26     {"delete",   DELETE  },
27     {"template", TEMPLATE},
28 };
29 
Initialize(const std::string & sourceName)30 bool Lexer::Initialize(const std::string &sourceName)
31 {
32     srcName_ = std::make_shared<std::string>(sourceName);
33 
34     if (src_.is_open()) {
35         src_.close();
36     }
37     bufferStart_ = nullptr;
38     bufferEnd_ = nullptr;
39     lineno_ = 1;
40     lineLoc_ = 1;
41     src_.open(srcName_->c_str(), std::ifstream::binary);
42     if (!src_.is_open()) {
43         Logger().Error() << "Failed to open source file: " << srcName_->data();
44         return false;
45     }
46     return true;
47 }
48 
SetTokenCharacter(char c,Token & token)49 bool Lexer::SetTokenCharacter(char c, Token &token)
50 {
51     switch (c) {
52         case ';': /* fall-through */
53         case ',': /* fall-through */
54         case '[': /* fall-through */
55         case ']': /* fall-through */
56         case '{': /* fall-through */
57         case '}': /* fall-through */
58         case '=': /* fall-through */
59         case '&': /* fall-through */
60         case ':':
61             ConsumeChar();
62             token.type = c;
63             token.lineNo = lineno_;
64             break;
65         case '"':
66             return LexFromString(token);
67         case '+': /* fall-through */
68         case '-':
69             return LexFromNumber(token);
70         case EOF:
71             token.type = EOF;
72             break;
73         default:
74             Logger().Error() << *this << "can not recognized character '" << c << "'";
75             return false;
76     }
77     return true;
78 }
79 
Lex(Token & token)80 bool Lexer::Lex(Token &token)
81 {
82     char c;
83     InitToken(token);
84     do {
85         if (!PeekChar(c, true)) {
86             token.type = EOF;
87             return true;
88         }
89         if (c == '#') {
90             return LexInclude(token);
91         }
92         if (isalpha(c)) {
93             LexFromLiteral(token);
94             return true;
95         }
96 
97         if (IsNum(c)) {
98             return LexFromNumber(token);
99         }
100 
101         if (c == '/') {
102             if (!ProcessComment()) {
103                 return false;
104             }
105             continue;
106         }
107 
108         return SetTokenCharacter(c, token);
109     } while (true);
110 
111     return true;
112 }
113 
GetRawChar()114 char Lexer::GetRawChar()
115 {
116     if (!FillBuffer()) {
117         return EOF;
118     }
119     lineLoc_++;
120     return *bufferStart_++;
121 }
122 
GetChar(char & c,bool skipSpace)123 bool Lexer::GetChar(char &c, bool skipSpace)
124 {
125     char chr = GetRawChar();
126     if (skipSpace) {
127         while (IsSpace(chr)) {
128             chr = GetRawChar();
129         }
130     }
131 
132     if (chr == '\n') {
133         lineno_++;
134         lineLoc_ = 0;
135     }
136     c = chr;
137     return chr != EOF;
138 }
139 
PeekChar(char & c,bool skipSpace)140 bool Lexer::PeekChar(char &c, bool skipSpace)
141 {
142     if (!FillBuffer()) {
143         return false;
144     }
145 
146     if (skipSpace) {
147         while (bufferStart_ <= bufferEnd_ && (IsSpace(*bufferStart_) || *bufferStart_ == '\n')) {
148             lineLoc_++;
149             if (*bufferStart_ == '\n') {
150                 lineLoc_ = 0;
151                 lineno_++;
152             }
153             bufferStart_++;
154         }
155     }
156 
157     if (bufferStart_ > bufferEnd_) {
158         return false;
159     }
160     c = *bufferStart_;
161     return true;
162 }
163 
IsSpace(char c)164 bool Lexer::IsSpace(char c)
165 {
166     return c == ' ' || c == '\t' || c == '\r';
167 }
168 
FillBuffer()169 bool Lexer::FillBuffer()
170 {
171     if (bufferStart_ != nullptr && bufferStart_ <= bufferEnd_) {
172         return true;
173     }
174     auto size = src_.readsome(buffer_, BUFFER_SIZE);
175     if (size == 0) {
176         return false;
177     }
178     bufferStart_ = buffer_;
179     bufferEnd_ = bufferStart_ + size - 1;
180     return true;
181 }
182 
ProcessComment()183 bool Lexer::ProcessComment()
184 {
185     char c = 0;
186     ConsumeChar(); // skip first '/'
187     if (!GetChar(c)) {
188         Logger().Error() << *this << "unterminated comment";
189         return false;
190     }
191 
192     if (c == '/') {
193         while (c != '\n' && GetChar(c)) {}
194         if (c != '\n' && c != EOF) {
195             Logger().Error() << *this << "unterminated signal line comment";
196             return false;
197         }
198     } else if (c == '*') {
199         while (GetChar(c)) {
200             if (c == '*' && GetChar(c) && c == '/') {
201                 return true;
202             }
203         }
204         if (c != '/') {
205             Logger().Error() << *this << "unterminated multi-line comment";
206             return false;
207         }
208     } else {
209         Logger().Error() << *this << "invalid character";
210         return false;
211     }
212 
213     return true;
214 }
215 
GetSourceName() const216 std::shared_ptr<std::string> Lexer::GetSourceName() const
217 {
218     return srcName_;
219 }
220 
GetLineno() const221 int32_t Lexer::GetLineno() const
222 {
223     return lineno_;
224 }
225 
GetLineLoc() const226 int32_t Lexer::GetLineLoc() const
227 {
228     return lineLoc_;
229 }
230 
operator <<(std::ostream & stream,const Lexer & p)231 std::ostream &OHOS::Hardware::operator<<(std::ostream &stream, const Lexer &p)
232 {
233     return stream << p.GetSourceName()->data() << ":" << p.GetLineno() << ":" << p.GetLineLoc() << ": ";
234 }
235 
InitToken(Token & token)236 void Lexer::InitToken(Token &token)
237 {
238     token.type = 0;
239     token.numval = 0;
240     token.strval.clear();
241     token.src = srcName_;
242     token.lineNo = lineno_;
243 }
244 
LexFromString(Token & token)245 bool Lexer::LexFromString(Token &token)
246 {
247     char c;
248     GetChar(c, false); // skip first '"'
249     std::string value;
250     while (GetChar(c, false) && c != '"') {
251         value.push_back(c);
252     }
253 
254     if (c != '"') {
255         Logger().Error() << *this << "unterminated string";
256         return false;
257     }
258     token.type = STRING;
259     token.strval = std::move(value);
260     token.lineNo = lineno_;
261     return true;
262 }
263 
LexHexAndBinaryNum(std::string & value,char & c,uint64_t & v)264 void Lexer::LexHexAndBinaryNum(std::string &value, char &c, uint64_t &v)
265 {
266     switch (c) {
267         case 'x': // fall-through
268         case 'X': // hex number
269             ConsumeChar();
270             while (PeekChar(c, false) && (IsNum(c) || (c >= 'a' && c <= 'f')
271                                             || (c >= 'A' && c <= 'F'))) {
272                 value.push_back(c);
273                 ConsumeChar();
274             }
275             v = strtoll(value.data(), nullptr, HEX_NUM);
276             break;
277         case 'b': // binary number
278             ConsumeChar();
279             while (PeekChar(c, false) && (c == '0' || c == '1')) {
280                 value.push_back(c);
281                 ConsumeChar();
282             }
283             v = strtoll(value.data(), nullptr, BINARY_NUM);
284             break;
285         default:; // fall-through
286     }
287 }
288 
LexFromNumber(Token & token)289 bool Lexer::LexFromNumber(Token &token)
290 {
291     std::string value;
292     char c = 0;
293     uint64_t v = 0;
294     errno = 0;
295 
296     GetChar(c, false);
297     switch (c) {
298         case '0':
299             if (!PeekChar(c, true)) {
300                 break;
301             }
302 
303             if (IsNum(c)) { // Octal number
304                 while (PeekChar(c) && IsNum(c)) {
305                     ConsumeChar();
306                     value.push_back(c);
307                 }
308                 v = static_cast<uint64_t>(strtoll(value.data(), nullptr, OCTAL_NUM));
309                 break;
310             }
311             LexHexAndBinaryNum(value, c, v);
312             break;
313         case '+': // fall-through
314         case '-': // fall-through, signed decimal number
315         default:  // unsigned decimal number
316             value.push_back(c);
317             while (PeekChar(c, true) && IsNum(c)) {
318                 ConsumeChar();
319                 value.push_back(c);
320             }
321             v = strtoll(value.data(), nullptr, DECIMAL_NUM);
322             break;
323     }
324 
325     if (errno != 0) {
326         Logger().Error() << *this << "illegal number: " << value.data();
327         return false;
328     }
329     token.type = NUMBER;
330     token.numval = v;
331     token.lineNo = lineno_;
332     return true;
333 }
334 
LexFromLiteral(Token & token)335 void Lexer::LexFromLiteral(Token &token)
336 {
337     std::string value;
338     char c;
339 
340     while (PeekChar(c, false) && !IsSpace(c)) {
341         if (!isalnum(c) && c != '_' && c != '.' && c != '\\') {
342             break;
343         }
344         value.push_back(c);
345         ConsumeChar();
346     }
347 
348     do {
349         if (value == "true") {
350             token.type = NUMBER;
351             token.numval = 1;
352             break;
353         } else if (value == "false") {
354             token.type = NUMBER;
355             token.numval = 0;
356             break;
357         }
358         auto keyword = keyWords_.find(value);
359         if (keyword != keyWords_.end()) {
360             token.type = keyword->second;
361             break;
362         }
363 
364         if (value.find('.') != std::string::npos) {
365             token.type = REF_PATH;
366         } else {
367             token.type = LITERAL;
368         }
369     } while (false);
370 
371     token.strval = std::move(value);
372     token.lineNo = lineno_;
373 }
374 
ConsumeChar()375 void Lexer::ConsumeChar()
376 {
377     char c;
378     (void)GetChar(c, false);
379 }
380 
IsNum(char c)381 bool Lexer::IsNum(char c)
382 {
383     return c >= '0' && c <= '9';
384 }
385 
LexInclude(Token & token)386 bool Lexer::LexInclude(Token &token)
387 {
388     ConsumeChar();
389     LexFromLiteral(token);
390     if (token.strval != "include") {
391         return false;
392     }
393 
394     token.type = INCLUDE;
395     return true;
396 }
397