1 /*
2 * Copyright (c) 2023 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "lexer.h"
17 #include <cmath>
18 #include <climits>
19 #include <cstdlib>
20 #include "mpl_logging.h"
21 #include "debug_info.h"
22 #include "mir_module.h"
23 #include "securec.h"
24 #include "utils.h"
25
26 namespace maple {
HexCharToDigit(char c)27 int32 HexCharToDigit(char c)
28 {
29 int32 ret = utils::ToDigit<16, int32>(c);
30 return (ret != INT32_MAX ? ret : 0);
31 }
32
33 // Read (next) line from the MIR (text) file, and return the read
34 // number of chars.
35 // if the line is empty (nothing but a newline), returns 0.
36 // if EOF, return -1.
37 // The trailing new-line character has been removed.
ReadALine()38 int MIRLexer::ReadALine()
39 {
40 if (airFile == nullptr) {
41 line = "";
42 return -1;
43 }
44
45 curIdx = 0;
46 if (!std::getline(*airFile, line)) { // EOF
47 line = "";
48 airFile = nullptr;
49 currentLineSize = 0;
50 return -1;
51 }
52
53 RemoveReturnInline(line);
54 currentLineSize = line.length();
55 return currentLineSize;
56 }
57
ReadALineByMirQueue()58 int MIRLexer::ReadALineByMirQueue()
59 {
60 if (mirQueue.empty()) {
61 line = "";
62 return -1;
63 }
64 curIdx = 0;
65 line = mirQueue.front();
66 RemoveReturnInline(line);
67 currentLineSize = line.length();
68 mirQueue.pop();
69 return currentLineSize;
70 }
71
MIRLexer(MIRModule & mod)72 MIRLexer::MIRLexer(MIRModule &mod)
73 : module(mod), seenComments(mod.GetMPAllocator().Adapter()), keywordMap(mod.GetMPAllocator().Adapter())
74 {
75 // initialize keywordMap
76 keywordMap.clear();
77 #define KEYWORD(STR) \
78 { \
79 std::string str; \
80 str = #STR; \
81 keywordMap[str] = TK_##STR; \
82 }
83 #include "keywords.def"
84 #undef KEYWORD
85 }
86
PrepareForFile(const std::string & filename)87 void MIRLexer::PrepareForFile(const std::string &filename)
88 {
89 // open MIR file
90 airFileInternal.open(filename);
91 CHECK_FATAL(airFileInternal.is_open(), "cannot open MIR file %s\n", &filename);
92
93 airFile = &airFileInternal;
94 // try to read the first line
95 if (ReadALine() < 0) {
96 lineNum = 0;
97 } else {
98 lineNum = 1;
99 }
100 module.GetDbgInfo()->UpdateMsg(lineNum, line.c_str());
101 kind = TK_invalid;
102 }
103
PrepareForString(const std::string & src)104 void MIRLexer::PrepareForString(const std::string &src)
105 {
106 SetMirQueue(src);
107 if (ReadALineByMirQueue() < 0) {
108 lineNum = 0;
109 } else {
110 lineNum = 1;
111 }
112 module.GetDbgInfo()->UpdateMsg(lineNum, line.c_str());
113 kind = TK_invalid;
114 }
115
GenName()116 void MIRLexer::GenName()
117 {
118 uint32 startIdx = curIdx;
119 char c = GetNextCurrentCharWithUpperCheck();
120 CHECK_FATAL(curIdx > 0, "must not be zero");
121 char cp = GetCharAt(curIdx - 1);
122 if (c == '@' && (cp == 'h' || cp == 'f')) {
123 // special pattern for exception handling labels: catch or finally
124 c = GetNextCurrentCharWithUpperCheck();
125 }
126 while (utils::IsAlnum(c) || c < 0 || c == '_' || c == '$' || c == ';' || c == '/' || c == '|' || c == '.' ||
127 c == '?' || c == '@') {
128 c = GetNextCurrentCharWithUpperCheck();
129 }
130 name = line.substr(startIdx, curIdx - startIdx);
131 }
132
GetSpecialFloatConst()133 TokenKind MIRLexer::GetSpecialFloatConst()
134 {
135 constexpr uint32 lenSpecFloat = 4;
136 constexpr uint32 lenSpecDouble = 3;
137 if (line.compare(curIdx, lenSpecFloat, "inff") == 0 &&
138 !utils::IsAlnum(GetCharAtWithUpperCheck(curIdx + lenSpecFloat))) {
139 curIdx += lenSpecFloat;
140 theFloatVal = -INFINITY;
141 return TK_floatconst;
142 }
143 if (line.compare(curIdx, lenSpecDouble, "inf") == 0 &&
144 !utils::IsAlnum(GetCharAtWithUpperCheck(curIdx + lenSpecDouble))) {
145 curIdx += lenSpecDouble;
146 theDoubleVal = -INFINITY;
147 return TK_doubleconst;
148 }
149 if (line.compare(curIdx, lenSpecFloat, "nanf") == 0 &&
150 !utils::IsAlnum(GetCharAtWithUpperCheck(curIdx + lenSpecFloat))) {
151 curIdx += lenSpecFloat;
152 theFloatVal = -NAN;
153 return TK_floatconst;
154 }
155 if (line.compare(curIdx, lenSpecDouble, "nan") == 0 &&
156 !utils::IsAlnum(GetCharAtWithUpperCheck(curIdx + lenSpecDouble))) {
157 curIdx += lenSpecDouble;
158 theDoubleVal = -NAN;
159 return TK_doubleconst;
160 }
161 return TK_invalid;
162 }
163
GetHexConst(uint32 valStart,bool negative)164 TokenKind MIRLexer::GetHexConst(uint32 valStart, bool negative)
165 {
166 char c = GetCharAtWithUpperCheck(curIdx);
167 if (!isxdigit(c)) {
168 name = line.substr(valStart, curIdx - valStart);
169 return TK_invalid;
170 }
171 uint64 tmp = static_cast<uint32>(HexCharToDigit(c));
172 c = GetNextCurrentCharWithUpperCheck();
173 while (isxdigit(c)) {
174 tmp = (tmp << k16BitShift) + static_cast<uint32>(HexCharToDigit(c));
175 c = GetNextCurrentCharWithUpperCheck();
176 }
177 theIntVal = static_cast<uint64>(static_cast<uint64>(tmp));
178 if (negative) {
179 theIntVal = -theIntVal;
180 }
181 theFloatVal = static_cast<float>(theIntVal);
182 theDoubleVal = static_cast<double>(theIntVal);
183 if (negative && theIntVal == 0) {
184 theFloatVal = -theFloatVal;
185 theDoubleVal = -theDoubleVal;
186 }
187 name = line.substr(valStart, curIdx - valStart);
188 return TK_intconst;
189 }
190
GetIntConst(uint32 valStart,bool negative)191 TokenKind MIRLexer::GetIntConst(uint32 valStart, bool negative)
192 {
193 auto negOrSelf = [negative](uint64 val) { return negative ? ~val + 1 : val; };
194
195 theIntVal = static_cast<uint64>(HexCharToDigit(GetCharAtWithUpperCheck(curIdx)));
196
197 uint64 radix = theIntVal == 0 ? 8 : 10;
198
199 char c = GetNextCurrentCharWithUpperCheck();
200
201 for (theIntVal = negOrSelf(theIntVal); isdigit(c); c = GetNextCurrentCharWithUpperCheck()) {
202 theIntVal = (theIntVal * radix) + negOrSelf(HexCharToDigit(c));
203 }
204
205 if (c == 'u' || c == 'U') { // skip 'u' or 'U'
206 c = GetNextCurrentCharWithUpperCheck();
207 if (c == 'l' || c == 'L') {
208 c = GetNextCurrentCharWithUpperCheck();
209 }
210 }
211
212 if (c == 'l' || c == 'L') {
213 c = GetNextCurrentCharWithUpperCheck();
214 if (c == 'l' || c == 'L' || c == 'u' || c == 'U') {
215 ++curIdx;
216 }
217 }
218
219 name = line.substr(valStart, curIdx - valStart);
220
221 if (negative) {
222 theFloatVal = static_cast<float>(static_cast<int64>(theIntVal));
223 theDoubleVal = static_cast<double>(static_cast<int64>(theIntVal));
224
225 if (theIntVal == 0) {
226 theFloatVal = -theFloatVal;
227 theDoubleVal = -theDoubleVal;
228 }
229 } else {
230 theFloatVal = static_cast<float>(theIntVal);
231 theDoubleVal = static_cast<double>(theIntVal);
232 }
233
234 return TK_intconst;
235 }
236
GetTokenWithPrefixDollar()237 TokenKind MIRLexer::GetTokenWithPrefixDollar()
238 {
239 // token with prefix '$'
240 char c = GetCharAtWithUpperCheck(curIdx);
241 if (utils::IsAlpha(c) || c == '_' || c == '$') {
242 GenName();
243 return TK_gname;
244 } else {
245 // for error reporting.
246 const uint32 printLength = 2;
247 DEBUG_ASSERT(curIdx > 0, "must not be zero");
248 name = line.substr(curIdx - 1, printLength);
249 return TK_invalid;
250 }
251 }
252
GetTokenWithPrefixPercent()253 TokenKind MIRLexer::GetTokenWithPrefixPercent()
254 {
255 // token with prefix '%'
256 char c = GetCharAtWithUpperCheck(curIdx);
257 if (isdigit(c)) {
258 int valStart = static_cast<int>(curIdx) - 1;
259 theIntVal = static_cast<uint64>(HexCharToDigit(c));
260 c = GetNextCurrentCharWithUpperCheck();
261 while (isdigit(c)) {
262 theIntVal = (theIntVal * 10) + static_cast<uint64>(HexCharToDigit(c)); // 10 for decimal
263 DEBUG_ASSERT(theIntVal >= 0, "int value overflow");
264 c = GetNextCurrentCharWithUpperCheck();
265 }
266 name = line.substr(valStart, curIdx - valStart);
267 return TK_preg;
268 }
269 if (utils::IsAlpha(c) || c == '_' || c == '$') {
270 GenName();
271 return TK_lname;
272 }
273 if (c == '%' && utils::IsAlpha(GetCharAtWithUpperCheck(curIdx + 1))) {
274 ++curIdx;
275 GenName();
276 return TK_specialreg;
277 }
278 return TK_invalid;
279 }
280
GetTokenWithPrefixAmpersand()281 TokenKind MIRLexer::GetTokenWithPrefixAmpersand()
282 {
283 // token with prefix '&'
284 char c = GetCurrentCharWithUpperCheck();
285 if (utils::IsAlpha(c) || c == '_') {
286 GenName();
287 return TK_fname;
288 }
289 // for error reporting.
290 constexpr uint32 printLength = 2;
291 CHECK_FATAL(curIdx > 0, "must not be zero");
292 name = line.substr(curIdx - 1, printLength);
293 return TK_invalid;
294 }
295
GetTokenWithPrefixAtOrCircumflex(char prefix)296 TokenKind MIRLexer::GetTokenWithPrefixAtOrCircumflex(char prefix)
297 {
298 // token with prefix '@' or `^`
299 char c = GetCurrentCharWithUpperCheck();
300 if (utils::IsAlnum(c) || c < 0 || c == '_' || c == '@' || c == '$' || c == '|') {
301 GenName();
302 if (prefix == '@') {
303 return TK_label;
304 }
305 return TK_prntfield;
306 }
307 return TK_invalid;
308 }
309
GetTokenWithPrefixExclamation()310 TokenKind MIRLexer::GetTokenWithPrefixExclamation()
311 {
312 // token with prefix '!'
313 char c = GetCurrentCharWithUpperCheck();
314 if (utils::IsAlpha(c)) {
315 GenName();
316 return TK_typeparam;
317 }
318 // for error reporting.
319 const uint32 printLength = 2;
320 CHECK_FATAL(curIdx > 0, "must not be zero");
321 name = line.substr(curIdx - 1, printLength);
322 return TK_invalid;
323 }
324
GetTokenWithPrefixQuotation()325 TokenKind MIRLexer::GetTokenWithPrefixQuotation()
326 {
327 if (GetCharAtWithUpperCheck(curIdx + 1) == '\'') {
328 theIntVal = GetCharAtWithUpperCheck(curIdx);
329 constexpr uint32 hexLength = 2;
330 curIdx += hexLength;
331 return TK_intconst;
332 }
333 return TK_invalid;
334 }
335
GetTokenSpecial()336 TokenKind MIRLexer::GetTokenSpecial()
337 {
338 --curIdx;
339 char c = GetCharAtWithLowerCheck(curIdx);
340 if (utils::IsAlpha(c) || c < 0 || c == '_') {
341 GenName();
342 TokenKind tk = keywordMap[name];
343 switch (tk) {
344 case TK_nanf:
345 theFloatVal = NAN;
346 return TK_floatconst;
347 case TK_nan:
348 theDoubleVal = NAN;
349 return TK_doubleconst;
350 case TK_inff:
351 theFloatVal = INFINITY;
352 return TK_floatconst;
353 case TK_inf:
354 theDoubleVal = INFINITY;
355 return TK_doubleconst;
356 default:
357 return tk;
358 }
359 }
360 MIR_ERROR("error in input file\n");
361 return TK_eof;
362 }
363
NextToken()364 TokenKind MIRLexer::NextToken()
365 {
366 return kind;
367 }
368 } // namespace maple
369