• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2023 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "lexer.h"
17 #include <cmath>
18 #include <climits>
19 #include <cstdlib>
20 #include "mpl_logging.h"
21 #include "debug_info.h"
22 #include "mir_module.h"
23 #include "securec.h"
24 #include "utils.h"
25 
26 namespace maple {
HexCharToDigit(char c)27 int32 HexCharToDigit(char c)
28 {
29     int32 ret = utils::ToDigit<16, int32>(c);
30     return (ret != INT32_MAX ? ret : 0);
31 }
32 
33 // Read (next) line from the MIR (text) file, and return the read
34 // number of chars.
35 // if the line is empty (nothing but a newline), returns 0.
36 // if EOF, return -1.
37 // The trailing new-line character has been removed.
ReadALine()38 int MIRLexer::ReadALine()
39 {
40     if (airFile == nullptr) {
41         line = "";
42         return -1;
43     }
44 
45     curIdx = 0;
46     if (!std::getline(*airFile, line)) {  // EOF
47         line = "";
48         airFile = nullptr;
49         currentLineSize = 0;
50         return -1;
51     }
52 
53     RemoveReturnInline(line);
54     currentLineSize = line.length();
55     return currentLineSize;
56 }
57 
ReadALineByMirQueue()58 int MIRLexer::ReadALineByMirQueue()
59 {
60     if (mirQueue.empty()) {
61         line = "";
62         return -1;
63     }
64     curIdx = 0;
65     line = mirQueue.front();
66     RemoveReturnInline(line);
67     currentLineSize = line.length();
68     mirQueue.pop();
69     return currentLineSize;
70 }
71 
MIRLexer(MIRModule & mod)72 MIRLexer::MIRLexer(MIRModule &mod)
73     : module(mod), seenComments(mod.GetMPAllocator().Adapter()), keywordMap(mod.GetMPAllocator().Adapter())
74 {
75     // initialize keywordMap
76     keywordMap.clear();
77 #define KEYWORD(STR)                \
78     {                               \
79         std::string str;            \
80         str = #STR;                 \
81         keywordMap[str] = TK_##STR; \
82     }
83 #include "keywords.def"
84 #undef KEYWORD
85 }
86 
PrepareForFile(const std::string & filename)87 void MIRLexer::PrepareForFile(const std::string &filename)
88 {
89     // open MIR file
90     airFileInternal.open(filename);
91     CHECK_FATAL(airFileInternal.is_open(), "cannot open MIR file %s\n", &filename);
92 
93     airFile = &airFileInternal;
94     // try to read the first line
95     if (ReadALine() < 0) {
96         lineNum = 0;
97     } else {
98         lineNum = 1;
99     }
100     module.GetDbgInfo()->UpdateMsg(lineNum, line.c_str());
101     kind = TK_invalid;
102 }
103 
PrepareForString(const std::string & src)104 void MIRLexer::PrepareForString(const std::string &src)
105 {
106     SetMirQueue(src);
107     if (ReadALineByMirQueue() < 0) {
108         lineNum = 0;
109     } else {
110         lineNum = 1;
111     }
112     module.GetDbgInfo()->UpdateMsg(lineNum, line.c_str());
113     kind = TK_invalid;
114 }
115 
GenName()116 void MIRLexer::GenName()
117 {
118     uint32 startIdx = curIdx;
119     char c = GetNextCurrentCharWithUpperCheck();
120     CHECK_FATAL(curIdx > 0, "must not be zero");
121     char cp = GetCharAt(curIdx - 1);
122     if (c == '@' && (cp == 'h' || cp == 'f')) {
123         // special pattern for exception handling labels: catch or finally
124         c = GetNextCurrentCharWithUpperCheck();
125     }
126     while (utils::IsAlnum(c) || c < 0 || c == '_' || c == '$' || c == ';' || c == '/' || c == '|' || c == '.' ||
127            c == '?' || c == '@') {
128         c = GetNextCurrentCharWithUpperCheck();
129     }
130     name = line.substr(startIdx, curIdx - startIdx);
131 }
132 
GetSpecialFloatConst()133 TokenKind MIRLexer::GetSpecialFloatConst()
134 {
135     constexpr uint32 lenSpecFloat = 4;
136     constexpr uint32 lenSpecDouble = 3;
137     if (line.compare(curIdx, lenSpecFloat, "inff") == 0 &&
138         !utils::IsAlnum(GetCharAtWithUpperCheck(curIdx + lenSpecFloat))) {
139         curIdx += lenSpecFloat;
140         theFloatVal = -INFINITY;
141         return TK_floatconst;
142     }
143     if (line.compare(curIdx, lenSpecDouble, "inf") == 0 &&
144         !utils::IsAlnum(GetCharAtWithUpperCheck(curIdx + lenSpecDouble))) {
145         curIdx += lenSpecDouble;
146         theDoubleVal = -INFINITY;
147         return TK_doubleconst;
148     }
149     if (line.compare(curIdx, lenSpecFloat, "nanf") == 0 &&
150         !utils::IsAlnum(GetCharAtWithUpperCheck(curIdx + lenSpecFloat))) {
151         curIdx += lenSpecFloat;
152         theFloatVal = -NAN;
153         return TK_floatconst;
154     }
155     if (line.compare(curIdx, lenSpecDouble, "nan") == 0 &&
156         !utils::IsAlnum(GetCharAtWithUpperCheck(curIdx + lenSpecDouble))) {
157         curIdx += lenSpecDouble;
158         theDoubleVal = -NAN;
159         return TK_doubleconst;
160     }
161     return TK_invalid;
162 }
163 
GetHexConst(uint32 valStart,bool negative)164 TokenKind MIRLexer::GetHexConst(uint32 valStart, bool negative)
165 {
166     char c = GetCharAtWithUpperCheck(curIdx);
167     if (!isxdigit(c)) {
168         name = line.substr(valStart, curIdx - valStart);
169         return TK_invalid;
170     }
171     uint64 tmp = static_cast<uint32>(HexCharToDigit(c));
172     c = GetNextCurrentCharWithUpperCheck();
173     while (isxdigit(c)) {
174         tmp = (tmp << k16BitShift) + static_cast<uint32>(HexCharToDigit(c));
175         c = GetNextCurrentCharWithUpperCheck();
176     }
177     theIntVal = static_cast<uint64>(static_cast<uint64>(tmp));
178     if (negative) {
179         theIntVal = -theIntVal;
180     }
181     theFloatVal = static_cast<float>(theIntVal);
182     theDoubleVal = static_cast<double>(theIntVal);
183     if (negative && theIntVal == 0) {
184         theFloatVal = -theFloatVal;
185         theDoubleVal = -theDoubleVal;
186     }
187     name = line.substr(valStart, curIdx - valStart);
188     return TK_intconst;
189 }
190 
GetIntConst(uint32 valStart,bool negative)191 TokenKind MIRLexer::GetIntConst(uint32 valStart, bool negative)
192 {
193     auto negOrSelf = [negative](uint64 val) { return negative ? ~val + 1 : val; };
194 
195     theIntVal = static_cast<uint64>(HexCharToDigit(GetCharAtWithUpperCheck(curIdx)));
196 
197     uint64 radix = theIntVal == 0 ? 8 : 10;
198 
199     char c = GetNextCurrentCharWithUpperCheck();
200 
201     for (theIntVal = negOrSelf(theIntVal); isdigit(c); c = GetNextCurrentCharWithUpperCheck()) {
202         theIntVal = (theIntVal * radix) + negOrSelf(HexCharToDigit(c));
203     }
204 
205     if (c == 'u' || c == 'U') {  // skip 'u' or 'U'
206         c = GetNextCurrentCharWithUpperCheck();
207         if (c == 'l' || c == 'L') {
208             c = GetNextCurrentCharWithUpperCheck();
209         }
210     }
211 
212     if (c == 'l' || c == 'L') {
213         c = GetNextCurrentCharWithUpperCheck();
214         if (c == 'l' || c == 'L' || c == 'u' || c == 'U') {
215             ++curIdx;
216         }
217     }
218 
219     name = line.substr(valStart, curIdx - valStart);
220 
221     if (negative) {
222         theFloatVal = static_cast<float>(static_cast<int64>(theIntVal));
223         theDoubleVal = static_cast<double>(static_cast<int64>(theIntVal));
224 
225         if (theIntVal == 0) {
226             theFloatVal = -theFloatVal;
227             theDoubleVal = -theDoubleVal;
228         }
229     } else {
230         theFloatVal = static_cast<float>(theIntVal);
231         theDoubleVal = static_cast<double>(theIntVal);
232     }
233 
234     return TK_intconst;
235 }
236 
GetTokenWithPrefixDollar()237 TokenKind MIRLexer::GetTokenWithPrefixDollar()
238 {
239     // token with prefix '$'
240     char c = GetCharAtWithUpperCheck(curIdx);
241     if (utils::IsAlpha(c) || c == '_' || c == '$') {
242         GenName();
243         return TK_gname;
244     } else {
245         // for error reporting.
246         const uint32 printLength = 2;
247         DEBUG_ASSERT(curIdx > 0, "must not be zero");
248         name = line.substr(curIdx - 1, printLength);
249         return TK_invalid;
250     }
251 }
252 
GetTokenWithPrefixPercent()253 TokenKind MIRLexer::GetTokenWithPrefixPercent()
254 {
255     // token with prefix '%'
256     char c = GetCharAtWithUpperCheck(curIdx);
257     if (isdigit(c)) {
258         int valStart = static_cast<int>(curIdx) - 1;
259         theIntVal = static_cast<uint64>(HexCharToDigit(c));
260         c = GetNextCurrentCharWithUpperCheck();
261         while (isdigit(c)) {
262             theIntVal = (theIntVal * 10) + static_cast<uint64>(HexCharToDigit(c)); // 10 for decimal
263             DEBUG_ASSERT(theIntVal >= 0, "int value overflow");
264             c = GetNextCurrentCharWithUpperCheck();
265         }
266         name = line.substr(valStart, curIdx - valStart);
267         return TK_preg;
268     }
269     if (utils::IsAlpha(c) || c == '_' || c == '$') {
270         GenName();
271         return TK_lname;
272     }
273     if (c == '%' && utils::IsAlpha(GetCharAtWithUpperCheck(curIdx + 1))) {
274         ++curIdx;
275         GenName();
276         return TK_specialreg;
277     }
278     return TK_invalid;
279 }
280 
GetTokenWithPrefixAmpersand()281 TokenKind MIRLexer::GetTokenWithPrefixAmpersand()
282 {
283     // token with prefix '&'
284     char c = GetCurrentCharWithUpperCheck();
285     if (utils::IsAlpha(c) || c == '_') {
286         GenName();
287         return TK_fname;
288     }
289     // for error reporting.
290     constexpr uint32 printLength = 2;
291     CHECK_FATAL(curIdx > 0, "must not be zero");
292     name = line.substr(curIdx - 1, printLength);
293     return TK_invalid;
294 }
295 
GetTokenWithPrefixAtOrCircumflex(char prefix)296 TokenKind MIRLexer::GetTokenWithPrefixAtOrCircumflex(char prefix)
297 {
298     // token with prefix '@' or `^`
299     char c = GetCurrentCharWithUpperCheck();
300     if (utils::IsAlnum(c) || c < 0 || c == '_' || c == '@' || c == '$' || c == '|') {
301         GenName();
302         if (prefix == '@') {
303             return TK_label;
304         }
305         return TK_prntfield;
306     }
307     return TK_invalid;
308 }
309 
GetTokenWithPrefixExclamation()310 TokenKind MIRLexer::GetTokenWithPrefixExclamation()
311 {
312     // token with prefix '!'
313     char c = GetCurrentCharWithUpperCheck();
314     if (utils::IsAlpha(c)) {
315         GenName();
316         return TK_typeparam;
317     }
318     // for error reporting.
319     const uint32 printLength = 2;
320     CHECK_FATAL(curIdx > 0, "must not be zero");
321     name = line.substr(curIdx - 1, printLength);
322     return TK_invalid;
323 }
324 
GetTokenWithPrefixQuotation()325 TokenKind MIRLexer::GetTokenWithPrefixQuotation()
326 {
327     if (GetCharAtWithUpperCheck(curIdx + 1) == '\'') {
328         theIntVal = GetCharAtWithUpperCheck(curIdx);
329         constexpr uint32 hexLength = 2;
330         curIdx += hexLength;
331         return TK_intconst;
332     }
333     return TK_invalid;
334 }
335 
GetTokenSpecial()336 TokenKind MIRLexer::GetTokenSpecial()
337 {
338     --curIdx;
339     char c = GetCharAtWithLowerCheck(curIdx);
340     if (utils::IsAlpha(c) || c < 0 || c == '_') {
341         GenName();
342         TokenKind tk = keywordMap[name];
343         switch (tk) {
344             case TK_nanf:
345                 theFloatVal = NAN;
346                 return TK_floatconst;
347             case TK_nan:
348                 theDoubleVal = NAN;
349                 return TK_doubleconst;
350             case TK_inff:
351                 theFloatVal = INFINITY;
352                 return TK_floatconst;
353             case TK_inf:
354                 theDoubleVal = INFINITY;
355                 return TK_doubleconst;
356             default:
357                 return tk;
358         }
359     }
360     MIR_ERROR("error in input file\n");
361     return TK_eof;
362 }
363 
NextToken()364 TokenKind MIRLexer::NextToken()
365 {
366     return kind;
367 }
368 }  // namespace maple
369