1 /*
2 * Copyright (c) 2021 Huawei Device Co., Ltd.
3 *
4 * HDF is dual licensed: you can use it either under the terms of
5 * the GPL, or the BSD license, at your option.
6 * See the LICENSE file in the root of this repository for complete details.
7 */
8
9 #include "lexer.h"
10
11 #include <sstream>
12 #include <string>
13
14 #include "logger.h"
15
16 using namespace OHOS::Hardware;
17
18 static constexpr int BINARY_NUM = 2;
19 static constexpr int OCTAL_NUM = 8;
20 static constexpr int DECIMAL_NUM = 10;
21 static constexpr int HEX_NUM = 16;
22
Lexer()23 Lexer::Lexer() : lineno_(0), lineLoc_(0) {}
24
25 std::map<std::string, TokenType> Lexer::keyWords_ = {
26 {"#include", INCLUDE },
27 {"root", ROOT },
28 {"delete", DELETE },
29 {"template", TEMPLATE},
30 };
31
Initialize(const std::string & sourceName)32 bool Lexer::Initialize(const std::string &sourceName)
33 {
34 srcName_ = std::make_shared<std::string>(sourceName);
35
36 if (src_.is_open()) {
37 src_.close();
38 }
39 bufferStart_ = nullptr;
40 bufferEnd_ = nullptr;
41 lineno_ = 1;
42 lineLoc_ = 1;
43 src_.open(srcName_->c_str(), std::ifstream::binary);
44 if (!src_.is_open()) {
45 Logger().Error() << "Failed to open source file: " << srcName_->data();
46 return false;
47 }
48 return true;
49 }
50
SetTokenCharacter(char c,Token & token)51 bool Lexer::SetTokenCharacter(char c, Token &token)
52 {
53 switch (c) {
54 case ';': /* fall-through */
55 case ',': /* fall-through */
56 case '[': /* fall-through */
57 case ']': /* fall-through */
58 case '{': /* fall-through */
59 case '}': /* fall-through */
60 case '=': /* fall-through */
61 case '&': /* fall-through */
62 case ':':
63 ConsumeChar();
64 token.type = c;
65 token.lineNo = lineno_;
66 break;
67 case '"':
68 return LexFromString(token);
69 case '+': /* fall-through */
70 case '-':
71 return LexFromNumber(token);
72 case EOF:
73 token.type = EOF;
74 break;
75 default:
76 Logger().Error() << *this << "can not recognized character '" << c << "'";
77 return false;
78 }
79 return true;
80 }
81
Lex(Token & token)82 bool Lexer::Lex(Token &token)
83 {
84 char c;
85 InitToken(token);
86 do {
87 if (!PeekChar(c, true)) {
88 token.type = EOF;
89 return true;
90 }
91 if (c == '#') {
92 return LexInclude(token);
93 }
94 if (isalpha(c)) {
95 LexFromLiteral(token);
96 return true;
97 }
98
99 if (IsNum(c)) {
100 return LexFromNumber(token);
101 }
102
103 if (c == '/') {
104 if (!ProcessComment()) {
105 return false;
106 }
107 continue;
108 }
109
110 return SetTokenCharacter(c, token);
111 } while (true);
112
113 return true;
114 }
115
GetRawChar()116 char Lexer::GetRawChar()
117 {
118 if (!FillBuffer()) {
119 return EOF;
120 }
121 lineLoc_++;
122 return *bufferStart_++;
123 }
124
GetChar(char & c,bool skipSpace)125 bool Lexer::GetChar(char &c, bool skipSpace)
126 {
127 char chr = GetRawChar();
128 if (skipSpace) {
129 while (IsSpace(chr)) {
130 chr = GetRawChar();
131 }
132 }
133
134 if (chr == '\n') {
135 lineno_++;
136 lineLoc_ = 0;
137 }
138 c = chr;
139 return chr != EOF;
140 }
141
PeekChar(char & c,bool skipSpace)142 bool Lexer::PeekChar(char &c, bool skipSpace)
143 {
144 if (!FillBuffer()) {
145 return false;
146 }
147
148 if (skipSpace) {
149 while (bufferStart_ <= bufferEnd_ && (IsSpace(*bufferStart_) || *bufferStart_ == '\n')) {
150 lineLoc_++;
151 if (*bufferStart_ == '\n') {
152 lineLoc_ = 0;
153 lineno_++;
154 }
155 bufferStart_++;
156 }
157 }
158
159 if (bufferStart_ > bufferEnd_) {
160 return false;
161 }
162 c = *bufferStart_;
163 return true;
164 }
165
IsSpace(char c)166 bool Lexer::IsSpace(char c)
167 {
168 return c == ' ' || c == '\t' || c == '\r';
169 }
170
FillBuffer()171 bool Lexer::FillBuffer()
172 {
173 if (bufferStart_ != nullptr && bufferStart_ <= bufferEnd_) {
174 return true;
175 }
176 auto size = src_.readsome(buffer_, BUFFER_SIZE);
177 if (size == 0) {
178 return false;
179 }
180 bufferStart_ = buffer_;
181 bufferEnd_ = bufferStart_ + size - 1;
182 return true;
183 }
184
ProcessComment()185 bool Lexer::ProcessComment()
186 {
187 char c = 0;
188 ConsumeChar(); // skip first '/'
189 if (!GetChar(c)) {
190 Logger().Error() << *this << "unterminated comment";
191 return false;
192 }
193
194 if (c == '/') {
195 while (c != '\n' && GetChar(c)) {}
196 if (c != '\n' && c != EOF) {
197 Logger().Error() << *this << "unterminated signal line comment";
198 return false;
199 }
200 } else if (c == '*') {
201 while (GetChar(c)) {
202 if (c == '*' && GetChar(c) && c == '/') {
203 return true;
204 }
205 }
206 if (c != '/') {
207 Logger().Error() << *this << "unterminated multi-line comment";
208 return false;
209 }
210 } else {
211 Logger().Error() << *this << "invalid character";
212 return false;
213 }
214
215 return true;
216 }
217
GetSourceName() const218 std::shared_ptr<std::string> Lexer::GetSourceName() const
219 {
220 return srcName_;
221 }
222
GetLineno() const223 int32_t Lexer::GetLineno() const
224 {
225 return lineno_;
226 }
227
GetLineLoc() const228 int32_t Lexer::GetLineLoc() const
229 {
230 return lineLoc_;
231 }
232
operator <<(std::ostream & stream,const Lexer & p)233 std::ostream &OHOS::Hardware::operator<<(std::ostream &stream, const Lexer &p)
234 {
235 return stream << p.GetSourceName()->data() << ":" << p.GetLineno() << ":" << p.GetLineLoc() << ": ";
236 }
237
InitToken(Token & token) const238 void Lexer::InitToken(Token &token) const
239 {
240 token.type = 0;
241 token.numval = 0;
242 token.strval.clear();
243 token.src = srcName_;
244 token.lineNo = lineno_;
245 }
246
LexFromString(Token & token)247 bool Lexer::LexFromString(Token &token)
248 {
249 char c;
250 GetChar(c, false); // skip first '"'
251 std::string value;
252 while (GetChar(c, false) && c != '"') {
253 value.push_back(c);
254 }
255
256 if (c != '"') {
257 Logger().Error() << *this << "unterminated string";
258 return false;
259 }
260 token.type = STRING;
261 token.strval = std::move(value);
262 token.lineNo = lineno_;
263 return true;
264 }
265
LexHexAndBinaryNum(std::string & value,char & c,uint64_t & v)266 void Lexer::LexHexAndBinaryNum(std::string &value, char &c, uint64_t &v)
267 {
268 switch (c) {
269 case 'x': // fall-through
270 case 'X': // hex number
271 ConsumeChar();
272 while (PeekChar(c, false) && (IsNum(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))) {
273 value.push_back(c);
274 ConsumeChar();
275 }
276 v = static_cast<uint64_t>(strtoll(value.data(), nullptr, HEX_NUM));
277 break;
278 case 'b': // binary number
279 ConsumeChar();
280 while (PeekChar(c, false) && (c == '0' || c == '1')) {
281 value.push_back(c);
282 ConsumeChar();
283 }
284 v = static_cast<uint64_t>(strtoll(value.data(), nullptr, BINARY_NUM));
285 break;
286 default:; // fall-through
287 }
288 }
289
LexFromNumber(Token & token)290 bool Lexer::LexFromNumber(Token &token)
291 {
292 std::string value;
293 char c = 0;
294 uint64_t v = 0;
295 errno = 0;
296
297 GetChar(c, false);
298 switch (c) {
299 case '0':
300 if (!PeekChar(c, true)) {
301 break;
302 }
303
304 if (IsNum(c)) { // Octal number
305 while (PeekChar(c) && IsNum(c)) {
306 ConsumeChar();
307 value.push_back(c);
308 }
309 v = static_cast<uint64_t>(strtoll(value.data(), nullptr, OCTAL_NUM));
310 break;
311 }
312 LexHexAndBinaryNum(value, c, v);
313 break;
314 case '+': // fall-through
315 case '-': // fall-through, signed decimal number
316 default: // unsigned decimal number
317 value.push_back(c);
318 while (PeekChar(c, true) && IsNum(c)) {
319 ConsumeChar();
320 value.push_back(c);
321 }
322 v = static_cast<uint64_t>(strtoll(value.data(), nullptr, DECIMAL_NUM));
323 break;
324 }
325
326 if (errno != 0) {
327 Logger().Error() << *this << "illegal number: " << value.data();
328 return false;
329 }
330 token.type = NUMBER;
331 token.numval = v;
332 token.lineNo = lineno_;
333 return true;
334 }
335
LexFromLiteral(Token & token)336 void Lexer::LexFromLiteral(Token &token)
337 {
338 std::string value;
339 char c;
340
341 while (PeekChar(c, false) && !IsSpace(c)) {
342 if (!isalnum(c) && c != '_' && c != '.' && c != '\\') {
343 break;
344 }
345 value.push_back(c);
346 ConsumeChar();
347 }
348
349 do {
350 if (value == "true") {
351 token.type = NUMBER;
352 token.numval = 1;
353 break;
354 } else if (value == "false") {
355 token.type = NUMBER;
356 token.numval = 0;
357 break;
358 }
359 auto keyword = keyWords_.find(value);
360 if (keyword != keyWords_.end()) {
361 token.type = keyword->second;
362 break;
363 }
364
365 if (value.find('.') != std::string::npos) {
366 token.type = REF_PATH;
367 } else {
368 token.type = LITERAL;
369 }
370 } while (false);
371
372 token.strval = std::move(value);
373 token.lineNo = lineno_;
374 }
375
ConsumeChar()376 void Lexer::ConsumeChar()
377 {
378 char c;
379 (void)GetChar(c, false);
380 }
381
IsNum(char c)382 bool Lexer::IsNum(char c)
383 {
384 return c >= '0' && c <= '9';
385 }
386
LexInclude(Token & token)387 bool Lexer::LexInclude(Token &token)
388 {
389 ConsumeChar();
390 LexFromLiteral(token);
391 if (token.strval != "include") {
392 return false;
393 }
394
395 token.type = INCLUDE;
396 return true;
397 }
398