1 /*
2 * Copyright (c) 2023 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include <cerrno>
17 #include "mdlexer.h"
18
19 namespace MDGen {
PrepareFile(const std::string & mdfileName)20 void MDLexer::PrepareFile(const std::string &mdfileName)
21 {
22 mdFileInternal.open(mdfileName);
23 if (!mdFileInternal.is_open()) {
24 CHECK_FATAL(false, "Open target file failed");
25 }
26 mdFile = &mdFileInternal;
27 }
28
ReturnError() const29 MDTokenKind MDLexer::ReturnError() const
30 {
31 maple::LogInfo::MapleLogger() << "Unexpect character at Line" << lineNumber << "\n";
32 return kMDError;
33 }
34
ReadOneLine()35 int MDLexer::ReadOneLine()
36 {
37 if (mdFile == nullptr) {
38 strLine = "";
39 return -1;
40 }
41 curPos = 0;
42 if (!std::getline(*mdFile, strLine)) { /* EOF */
43 strLine = "";
44 mdFile = nullptr;
45 return -1;
46 }
47 RemoveInValidAtBack();
48 return GetStrLineSize();
49 }
50
NextToken()51 MDTokenKind MDLexer::NextToken()
52 {
53 curKind = LexToken();
54 return curKind;
55 }
56
LexToken()57 MDTokenKind MDLexer::LexToken()
58 {
59 char c = GetCurChar();
60 while (c == ' ' || c == '\t') { /* skip space && tab */
61 c = GetNextChar();
62 }
63 while (c == 0) {
64 if (ReadOneLine() < 0) {
65 return kMDEOF;
66 }
67 lineNumber++;
68 c = GetCurChar();
69 while (c == ' ' || c == '\t') {
70 c = GetNextChar();
71 }
72 }
73 curPos++;
74 switch (c) {
75 case '(':
76 return kMDOpenParen;
77 case ')':
78 return kMDCloseParen;
79 case '{':
80 return kMDOpenBrace;
81 case '}':
82 return kMDCloseBrace;
83 case '[':
84 return kMDOpenSquare;
85 case ']':
86 return kMDCloseSquare;
87 case '<':
88 return kMDLess;
89 case '>':
90 return kMDGreater;
91 case ';':
92 return kMDSemi;
93 case ',':
94 return kMDComma;
95 case ':':
96 return kMDColon;
97 case '=':
98 return kMDEqual;
99 case '&':
100 return kMDLgAnd;
101 case '|':
102 return kMDLgOr;
103 case '0': /* start handling number */
104 case '1':
105 case '2':
106 case '3':
107 case '4':
108 case '5':
109 case '6':
110 case '7':
111 case '8':
112 case '9':
113 case '-':
114 curPos--; /* support HEX AND INTERGER at present */
115 return GetTokenConstVal();
116 case '/': { /* handle comment; */
117 char cn = GetCurChar();
118 if (cn == '/') {
119 SkipALineComment();
120 } else if (cn == '*') {
121 if (!SkipCComment()) {
122 return kMDError;
123 }
124 } else {
125 return ReturnError();
126 }
127 return LexToken();
128 }
129 default:
130 if (isalpha(c) || c == '_') {
131 return GetTokenIdentifier(); /* identifier need to be modify */
132 }
133 return ReturnError();
134 }
135 }
136
GetTokenConstVal()137 MDTokenKind MDLexer::GetTokenConstVal()
138 {
139 bool negative = false;
140 char curC = GetCurChar();
141 if (curC == '-') {
142 curC = GetNextChar();
143 /* have Special Float const? */
144 negative = true;
145 }
146 const uint32 hexPrefixLength = 2;
147 if (strLine.compare(curPos, hexPrefixLength, "0x") == 0) {
148 curPos += hexPrefixLength;
149 return GetHexConst(curPos, negative);
150 }
151 uint32 digitStartPos = curPos;
152 char digitStartC = GetCurChar();
153 while (isdigit(curC)) {
154 curC = GetNextChar();
155 }
156 if (!isdigit(digitStartC) && curC != '.') {
157 return kMDInvalid;
158 }
159 if (curC != '.' && curC != 'e' && curC != 'E') {
160 return GetIntConst(digitStartPos, negative);
161 }
162 return GetFloatConst();
163 }
164
GetHexConst(uint32 digitStartPos,bool isNegative)165 MDTokenKind MDLexer::GetHexConst(uint32 digitStartPos, bool isNegative)
166 {
167 if (digitStartPos >= strLine.length()) {
168 return ReturnError();
169 }
170 char c = GetCurChar();
171 if (!isxdigit(c)) {
172 return kMDInvalid;
173 }
174 int loopDepth = 0;
175 while (isxdigit(c)) {
176 c = GetNextChar();
177 ++loopDepth;
178 if (loopDepth > maxNumLength) {
179 return ReturnError();
180 }
181 }
182 std::string hexStr = strLine.substr(digitStartPos, curPos - digitStartPos);
183 const char *hexStrPtr = hexStr.c_str();
184 errno = 0;
185 constexpr int hexInDec = 16;
186 intVal = static_cast<int32>(std::strtoll(hexStrPtr, nullptr, hexInDec));
187 if (errno == EINVAL) { /* Invalid hexadecimal number */
188 return ReturnError();
189 }
190 if (errno == ERANGE) {
191 errno = 0;
192 intVal = static_cast<uint32>(std::strtoll(hexStrPtr, nullptr, hexInDec));
193 if (errno == EINVAL) { /* Invalid hexadecimal number */
194 return ReturnError();
195 }
196 if (errno == ERANGE) { /* input number is out of range */
197 return ReturnError();
198 }
199 }
200 if (isNegative) {
201 intVal = -intVal;
202 }
203 return kMDIntVal;
204 }
205
GetIntConst(uint32 digitStartPos,bool isNegative)206 MDTokenKind MDLexer::GetIntConst(uint32 digitStartPos, bool isNegative)
207 {
208 char c = GetCharAt(digitStartPos);
209 /* no ULL LL suffix at present */
210 int loopDepth = 0;
211 while (isdigit(c)) {
212 c = GetNextChar();
213 ++loopDepth;
214 if (loopDepth > maxNumLength) {
215 return ReturnError();
216 }
217 }
218 curPos--;
219 if (digitStartPos >= strLine.length() || digitStartPos > curPos) {
220 return ReturnError();
221 }
222 std::string intStr = strLine.substr(digitStartPos, curPos - digitStartPos);
223 const char *intStrPtr = intStr.c_str();
224 errno = 0;
225 constexpr int decInDec = 10;
226 intVal = static_cast<int32>(std::strtoll(intStrPtr, nullptr, decInDec));
227 if (errno == ERANGE) {
228 return ReturnError();
229 }
230 if (isNegative) {
231 intVal = -intVal;
232 }
233 return kMDIntVal;
234 }
235
GetFloatConst()236 MDTokenKind MDLexer::GetFloatConst()
237 {
238 floatVal = 0;
239 return kMDInvalid;
240 }
241
GetTokenIdentifier()242 MDTokenKind MDLexer::GetTokenIdentifier()
243 {
244 --curPos;
245 uint32 startPos = curPos;
246 char curC = GetCurChar();
247
248 while (isalnum(curC) || curC == '_' || curC == '-' || curC < 0) {
249 curC = GetNextChar();
250 }
251 if (startPos >= strLine.length()) {
252 return ReturnError();
253 }
254 strToken = strLine.substr(startPos, curPos - startPos);
255 auto it = keywords.find(strToken);
256 if (it != keywords.end()) {
257 return it->second;
258 }
259 return kMDIdentifier;
260 }
261
SkipALineComment()262 void MDLexer::SkipALineComment()
263 {
264 while (curPos < GetStrLineSize()) {
265 curPos++;
266 }
267 /* if comment is required to be stored. it can be done here */
268 }
269
SkipCComment()270 bool MDLexer::SkipCComment()
271 {
272 bool startAnewLine = false;
273 char commentNext;
274 while (true) {
275 if (!startAnewLine) {
276 commentNext = GetNextChar();
277 } else {
278 commentNext = GetCurChar();
279 startAnewLine = false;
280 }
281 switch (commentNext) {
282 case 0:
283 if (ReadOneLine() < 0) {
284 DEBUG_ASSERT(false, "Untermianted comment");
285 return false;
286 }
287 ++lineNumber;
288 startAnewLine = true;
289 break;
290 case '*':
291 commentNext = GetNextChar();
292 if (commentNext == '/') {
293 ++curPos;
294 return true;
295 }
296 break;
297 default:
298 break;
299 }
300 }
301 return false;
302 }
303 } // namespace MDGen
304