1 /*
2 * Copyright (c) 2021 Huawei Device Co., Ltd.
3 *
4 * HDF is dual licensed: you can use it either under the terms of
5 * the GPL, or the BSD license, at your option.
6 * See the LICENSE file in the root of this repository for complete details.
7 */
8
9 #include "lexer.h"
10 #include <sstream>
11 #include <string>
12 #include "logger.h"
13
14 using namespace OHOS::Hardware;
15
16 static constexpr int BINARY_NUM = 2;
17 static constexpr int OCTAL_NUM = 8;
18 static constexpr int DECIMAL_NUM = 10;
19 static constexpr int HEX_NUM = 16;
20
Lexer()21 Lexer::Lexer() : lineno_(0), lineLoc_(0) {}
22
23 std::map<std::string, TokenType> Lexer::keyWords_ = {
24 {"#include", INCLUDE },
25 {"root", ROOT },
26 {"delete", DELETE },
27 {"template", TEMPLATE},
28 };
29
Initialize(const std::string & sourceName)30 bool Lexer::Initialize(const std::string &sourceName)
31 {
32 srcName_ = std::make_shared<std::string>(sourceName);
33
34 if (src_.is_open()) {
35 src_.close();
36 }
37 bufferStart_ = nullptr;
38 bufferEnd_ = nullptr;
39 lineno_ = 1;
40 lineLoc_ = 1;
41 src_.open(srcName_->c_str(), std::ifstream::binary);
42 if (!src_.is_open()) {
43 Logger().Error() << "Failed to open source file: " << srcName_->data();
44 return false;
45 }
46 return true;
47 }
48
SetTokenCharacter(char c,Token & token)49 bool Lexer::SetTokenCharacter(char c, Token &token)
50 {
51 switch (c) {
52 case ';': /* fall-through */
53 case ',': /* fall-through */
54 case '[': /* fall-through */
55 case ']': /* fall-through */
56 case '{': /* fall-through */
57 case '}': /* fall-through */
58 case '=': /* fall-through */
59 case '&': /* fall-through */
60 case ':':
61 ConsumeChar();
62 token.type = c;
63 token.lineNo = lineno_;
64 break;
65 case '"':
66 return LexFromString(token);
67 case '+': /* fall-through */
68 case '-':
69 return LexFromNumber(token);
70 case EOF:
71 token.type = EOF;
72 break;
73 default:
74 Logger().Error() << *this << "can not recognized character '" << c << "'";
75 return false;
76 }
77 return true;
78 }
79
Lex(Token & token)80 bool Lexer::Lex(Token &token)
81 {
82 char c;
83 InitToken(token);
84 do {
85 if (!PeekChar(c, true)) {
86 token.type = EOF;
87 return true;
88 }
89 if (c == '#') {
90 return LexInclude(token);
91 }
92 if (isalpha(c)) {
93 LexFromLiteral(token);
94 return true;
95 }
96
97 if (IsNum(c)) {
98 return LexFromNumber(token);
99 }
100
101 if (c == '/') {
102 if (!ProcessComment()) {
103 return false;
104 }
105 continue;
106 }
107
108 return SetTokenCharacter(c, token);
109 } while (true);
110
111 return true;
112 }
113
GetRawChar()114 char Lexer::GetRawChar()
115 {
116 if (!FillBuffer()) {
117 return EOF;
118 }
119 lineLoc_++;
120 return *bufferStart_++;
121 }
122
GetChar(char & c,bool skipSpace)123 bool Lexer::GetChar(char &c, bool skipSpace)
124 {
125 char chr = GetRawChar();
126 if (skipSpace) {
127 while (IsSpace(chr)) {
128 chr = GetRawChar();
129 }
130 }
131
132 if (chr == '\n') {
133 lineno_++;
134 lineLoc_ = 0;
135 }
136 c = chr;
137 return chr != EOF;
138 }
139
PeekChar(char & c,bool skipSpace)140 bool Lexer::PeekChar(char &c, bool skipSpace)
141 {
142 if (!FillBuffer()) {
143 return false;
144 }
145
146 if (skipSpace) {
147 while (bufferStart_ <= bufferEnd_ && (IsSpace(*bufferStart_) || *bufferStart_ == '\n')) {
148 lineLoc_++;
149 if (*bufferStart_ == '\n') {
150 lineLoc_ = 0;
151 lineno_++;
152 }
153 bufferStart_++;
154 }
155 }
156
157 if (bufferStart_ > bufferEnd_) {
158 return false;
159 }
160 c = *bufferStart_;
161 return true;
162 }
163
IsSpace(char c)164 bool Lexer::IsSpace(char c)
165 {
166 return c == ' ' || c == '\t' || c == '\r';
167 }
168
FillBuffer()169 bool Lexer::FillBuffer()
170 {
171 if (bufferStart_ != nullptr && bufferStart_ <= bufferEnd_) {
172 return true;
173 }
174 auto size = src_.readsome(buffer_, BUFFER_SIZE);
175 if (size == 0) {
176 return false;
177 }
178 bufferStart_ = buffer_;
179 bufferEnd_ = bufferStart_ + size - 1;
180 return true;
181 }
182
ProcessComment()183 bool Lexer::ProcessComment()
184 {
185 char c = 0;
186 ConsumeChar(); // skip first '/'
187 if (!GetChar(c)) {
188 Logger().Error() << *this << "unterminated comment";
189 return false;
190 }
191
192 if (c == '/') {
193 while (c != '\n' && GetChar(c)) {}
194 if (c != '\n' && c != EOF) {
195 Logger().Error() << *this << "unterminated signal line comment";
196 return false;
197 }
198 } else if (c == '*') {
199 while (GetChar(c)) {
200 if (c == '*' && GetChar(c) && c == '/') {
201 return true;
202 }
203 }
204 if (c != '/') {
205 Logger().Error() << *this << "unterminated multi-line comment";
206 return false;
207 }
208 } else {
209 Logger().Error() << *this << "invalid character";
210 return false;
211 }
212
213 return true;
214 }
215
GetSourceName() const216 std::shared_ptr<std::string> Lexer::GetSourceName() const
217 {
218 return srcName_;
219 }
220
GetLineno() const221 int32_t Lexer::GetLineno() const
222 {
223 return lineno_;
224 }
225
GetLineLoc() const226 int32_t Lexer::GetLineLoc() const
227 {
228 return lineLoc_;
229 }
230
operator <<(std::ostream & stream,const Lexer & p)231 std::ostream &OHOS::Hardware::operator<<(std::ostream &stream, const Lexer &p)
232 {
233 return stream << p.GetSourceName()->data() << ":" << p.GetLineno() << ":" << p.GetLineLoc() << ": ";
234 }
235
InitToken(Token & token)236 void Lexer::InitToken(Token &token)
237 {
238 token.type = 0;
239 token.numval = 0;
240 token.strval.clear();
241 token.src = srcName_;
242 token.lineNo = lineno_;
243 }
244
LexFromString(Token & token)245 bool Lexer::LexFromString(Token &token)
246 {
247 char c;
248 GetChar(c, false); // skip first '"'
249 std::string value;
250 while (GetChar(c, false) && c != '"') {
251 value.push_back(c);
252 }
253
254 if (c != '"') {
255 Logger().Error() << *this << "unterminated string";
256 return false;
257 }
258 token.type = STRING;
259 token.strval = std::move(value);
260 token.lineNo = lineno_;
261 return true;
262 }
263
LexHexAndBinaryNum(std::string & value,char & c,uint64_t & v)264 void Lexer::LexHexAndBinaryNum(std::string &value, char &c, uint64_t &v)
265 {
266 switch (c) {
267 case 'x': // fall-through
268 case 'X': // hex number
269 ConsumeChar();
270 while (PeekChar(c, false) && (IsNum(c) || (c >= 'a' && c <= 'f')
271 || (c >= 'A' && c <= 'F'))) {
272 value.push_back(c);
273 ConsumeChar();
274 }
275 v = strtoll(value.data(), nullptr, HEX_NUM);
276 break;
277 case 'b': // binary number
278 ConsumeChar();
279 while (PeekChar(c, false) && (c == '0' || c == '1')) {
280 value.push_back(c);
281 ConsumeChar();
282 }
283 v = strtoll(value.data(), nullptr, BINARY_NUM);
284 break;
285 default:; // fall-through
286 }
287 }
288
LexFromNumber(Token & token)289 bool Lexer::LexFromNumber(Token &token)
290 {
291 std::string value;
292 char c = 0;
293 uint64_t v = 0;
294 errno = 0;
295
296 GetChar(c, false);
297 switch (c) {
298 case '0':
299 if (!PeekChar(c, true)) {
300 break;
301 }
302
303 if (IsNum(c)) { // Octal number
304 while (PeekChar(c) && IsNum(c)) {
305 ConsumeChar();
306 value.push_back(c);
307 }
308 v = static_cast<uint64_t>(strtoll(value.data(), nullptr, OCTAL_NUM));
309 break;
310 }
311 LexHexAndBinaryNum(value, c, v);
312 break;
313 case '+': // fall-through
314 case '-': // fall-through, signed decimal number
315 default: // unsigned decimal number
316 value.push_back(c);
317 while (PeekChar(c, true) && IsNum(c)) {
318 ConsumeChar();
319 value.push_back(c);
320 }
321 v = strtoll(value.data(), nullptr, DECIMAL_NUM);
322 break;
323 }
324
325 if (errno != 0) {
326 Logger().Error() << *this << "illegal number: " << value.data();
327 return false;
328 }
329 token.type = NUMBER;
330 token.numval = v;
331 token.lineNo = lineno_;
332 return true;
333 }
334
LexFromLiteral(Token & token)335 void Lexer::LexFromLiteral(Token &token)
336 {
337 std::string value;
338 char c;
339
340 while (PeekChar(c, false) && !IsSpace(c)) {
341 if (!isalnum(c) && c != '_' && c != '.' && c != '\\') {
342 break;
343 }
344 value.push_back(c);
345 ConsumeChar();
346 }
347
348 do {
349 if (value == "true") {
350 token.type = NUMBER;
351 token.numval = 1;
352 break;
353 } else if (value == "false") {
354 token.type = NUMBER;
355 token.numval = 0;
356 break;
357 }
358 auto keyword = keyWords_.find(value);
359 if (keyword != keyWords_.end()) {
360 token.type = keyword->second;
361 break;
362 }
363
364 if (value.find('.') != std::string::npos) {
365 token.type = REF_PATH;
366 } else {
367 token.type = LITERAL;
368 }
369 } while (false);
370
371 token.strval = std::move(value);
372 token.lineNo = lineno_;
373 }
374
ConsumeChar()375 void Lexer::ConsumeChar()
376 {
377 char c;
378 (void)GetChar(c, false);
379 }
380
IsNum(char c)381 bool Lexer::IsNum(char c)
382 {
383 return c >= '0' && c <= '9';
384 }
385
LexInclude(Token & token)386 bool Lexer::LexInclude(Token &token)
387 {
388 ConsumeChar();
389 LexFromLiteral(token);
390 if (token.strval != "include") {
391 return false;
392 }
393
394 token.type = INCLUDE;
395 return true;
396 }
397