1 /*
2 * Copyright (c) 2021-2025 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #ifndef PANDA_ASSEMBLER_ASSEMBLY_PARSER_H
17 #define PANDA_ASSEMBLER_ASSEMBLY_PARSER_H
18
19 #include <iostream>
20 #include <memory>
21 #include <string>
22 #include <string_view>
23
24 #include "assembly-context.h"
25 #include "assembly-emitter.h"
26 #include "assembly-field.h"
27 #include "assembly-function.h"
28 #include "assembly-ins.h"
29 #include "assembly-label.h"
30 #include "assembly-program.h"
31 #include "assembly-record.h"
32 #include "assembly-type.h"
33 #include "define.h"
34 #include "error.h"
35 #include "ide_helpers.h"
36 #include "lexer.h"
37 #include "meta.h"
38 #include "utils/expected.h"
39
40 namespace ark::pandasm {
41
42 using Instructions = std::pair<std::vector<Ins>, Error>;
43
44 using Functions = std::pair<std::unordered_map<std::string, Function>, std::unordered_map<std::string, Record>>;
45
46 class Parser {
47 public:
48 Parser() = default;
49
50 NO_MOVE_SEMANTIC(Parser);
51 NO_COPY_SEMANTIC(Parser);
52
53 ~Parser() = default;
54
55 /*
56 * The main function of parsing, which takes a vector of token vectors and a name of the source file.
57 * Returns a program or an error value: Expected<Program, Error>
58 * This function analyzes code containing several functions:
59 * - Each function used must be declared.
60 * - The correct function declaration looks like this: .function ret_type fun_name([param_type aN,]) [<metadata>]
61 * ([data] shows that this 'data' is optional).
62 * - N in function parameters must increase when number of parameters increases
63 * (Possible: a0, a1,..., aN. Impossible: a1, a10, a13).
64 * - Each function has its own label table.
65 */
66 PANDA_PUBLIC_API Expected<Program, Error> Parse(TokenSet &vectorsTokens, const std::string &fileName = "");
67
68 /*
69 * The main function of parsing, which takes a string with source and a name of the source file.
70 * Returns a program or an error value: Expected<Program, Error>
71 */
72 PANDA_PUBLIC_API Expected<Program, Error> Parse(const std::string &source, const std::string &fileName = "");
73
74 /*
75 * Returns a set error
76 */
ShowError()77 Error ShowError() const
78 {
79 return err_;
80 }
81
ShowWarnings()82 ErrorList ShowWarnings() const
83 {
84 return war_;
85 }
IsUnderscoreOrDollarOrHyphen(char c)86 inline bool IsUnderscoreOrDollarOrHyphen(char c)
87 {
88 return c == '_' || c == '$' || c == '-';
89 }
90
IsAlphaNumeric(char c)91 inline bool IsAlphaNumeric(char c)
92 {
93 return std::isalnum(c) != 0 || IsUnderscoreOrDollarOrHyphen(c);
94 }
95
IsNonDigit(char c)96 inline bool IsNonDigit(char c)
97 {
98 return std::isdigit(c) == 0;
99 }
100
101 private:
102 ark::pandasm::Program program_;
103 std::unordered_map<std::string, ark::pandasm::Label> *labelTable_ = nullptr;
104 Metadata *metadata_ = nullptr;
105 Context context_; /* token iterator */
106 ark::pandasm::Record *currRecord_ = nullptr;
107 ark::pandasm::LiteralArray *currArray_ = nullptr;
108 bool isConstArray_ = false;
109 ark::pandasm::LiteralArray::Literal *currArrayElem_ = nullptr;
110 ark::pandasm::Function *currFunc_ = nullptr;
111 std::map<std::pair<std::string, bool>, ark::pandasm::Function> ambiguousFunctionTable_;
112 ark::pandasm::Ins *currIns_ = nullptr;
113 ark::pandasm::Field *currFld_ = nullptr;
114 size_t lineStric_ = 0;
115 ark::pandasm::Error err_;
116 ark::pandasm::ErrorList war_;
117 bool open_ = false; /* flag of being in a code section */
118 bool recordDef_ = false;
119 bool arrayDef_ = false;
120 bool funcDef_ = false;
121 static constexpr uint32_t INTRO_CONST_ARRAY_LITERALS_NUMBER = 2;
122
123 enum class BracketOptions : uint8_t {
124 NOT_ALLOW_BRACKETS = 0,
125 ALLOW_BRACKETS = 1,
126 ALLOW_ANGLE_BRACKETS = 2,
127 ALL_BRACKETS = ALLOW_BRACKETS | ALLOW_ANGLE_BRACKETS
128 };
129
IsAllowAngleBrackets(BracketOptions options)130 bool IsAllowAngleBrackets(BracketOptions options)
131 {
132 return (static_cast<uint8_t>(options) & static_cast<uint8_t>(BracketOptions::ALLOW_ANGLE_BRACKETS)) != 0;
133 }
IsAllowBrackets(BracketOptions options)134 bool IsAllowBrackets(BracketOptions options)
135 {
136 return (static_cast<uint8_t>(options) & static_cast<uint8_t>(BracketOptions::ALLOW_BRACKETS)) != 0;
137 }
138
139 inline Error GetError(const std::string &mess = "", Error::ErrorType err = Error::ErrorType::ERR_NONE,
140 int8_t shift = 0, int tokenShift = 0, const std::string &addMess = "") const
141 {
142 return Error(mess, lineStric_, err, addMess,
143 context_.tokens[static_cast<int>(context_.number) + tokenShift - 1].boundLeft + shift,
144 context_.tokens[static_cast<int>(context_.number) + tokenShift - 1].boundRight,
145 context_.tokens[static_cast<int>(context_.number) + tokenShift - 1].wholeLine);
146 }
147
148 inline void GetWarning(const std::string &mess = "", Error::ErrorType err = Error::ErrorType::ERR_NONE,
149 int8_t shift = 0, const std::string &addMess = "")
150 {
151 war_.emplace_back(mess, lineStric_, err, addMess,
152 context_.tokens[context_.number - 1].boundLeft + static_cast<size_t>(shift),
153 context_.tokens[context_.number - 1].boundRight,
154 context_.tokens[context_.number - 1].wholeLine, Error::ErrorClass::WARNING);
155 }
156
GetCurrentPosition(bool leftBound)157 SourcePosition GetCurrentPosition(bool leftBound) const
158 {
159 if (leftBound) {
160 return SourcePosition {lineStric_, context_.tokens[context_.number - 1].boundLeft};
161 }
162 return SourcePosition {lineStric_, context_.tokens[context_.number - 1].boundRight};
163 }
164
165 bool LabelValidName();
166 bool TypeValidName();
167 bool RegValidName();
168 bool ParamValidName();
169 bool FunctionValidName();
170 bool ParseFunctionName();
171 bool ParseLabel();
172 bool ParseOperation();
173 bool ParseOperands();
174 bool ParseFunctionCode();
175 bool ParseFunctionInstruction();
176 bool ParseFunctionFullSign();
177 bool UpdateFunctionName();
178 bool UpdateFunctionName(bool isHomonym);
179 bool ParseFunctionReturn();
180 bool ParseFunctionArg();
181 bool ParseFunctionArgComma(bool &comma);
182 bool ParseFunctionArgs();
183 bool ParseType(Type *type);
184 bool PrefixedValidName(BracketOptions options = BracketOptions::NOT_ALLOW_BRACKETS);
185 bool ParseMetaListComma(bool &comma, bool eq);
186 bool MeetExpMetaList(bool eq);
187 bool BuildMetaListAttr(bool &eq, std::string &attributeName, std::string &attributeValue);
188 bool ParseMetaList(bool flag);
189 bool ParseMetaDef();
190 bool ParseRecordFullSign();
191 bool ParseRecordFields();
192 bool ParseRecordField();
193 bool ParseRecordName();
194 bool RecordValidName();
195 bool ParseArrayFullSign();
196 bool IsConstArray();
197 bool ParseArrayName();
198 bool ArrayValidName();
199 bool ArrayElementsValidNumber();
200 bool ParseArrayElements();
201 bool ParseArrayElement();
202 bool ParseArrayElementType();
203 bool ParseArrayElementValue();
204 bool ParseArrayElementValueInteger();
205 bool ParseArrayElementValueFloat();
206 bool ParseArrayElementValueString();
207 bool ParseFieldName();
208 bool ParseFieldType();
209 std::optional<std::string> ParseStringLiteral();
210 int64_t MnemonicToBuiltinId();
211 uint8_t ParseMultiArrayHallmark();
212
213 bool ParseInteger(int64_t *value);
214 bool ParseFloat(double *value, bool is64bit);
215 bool ParseOperandVreg();
216 bool ParseOperandComma();
217 bool ParseOperandInteger();
218 bool ParseOperandFloat(bool is64bit);
219 bool ParseOperandId();
220 bool ParseOperandLabel();
221 bool ParseOperandField();
222 bool ParseOperandType(Type::VerificationType verType);
223 bool ParseOperandNone();
224 bool ParseOperandString();
225 bool ParseOperandLiteralArray();
226 bool ParseOperandCall();
227 bool ParseOperandSignature(std::string *sign);
228 bool ParseOperandSignatureTypesList(std::string *sign);
229 bool ParseOperandBuiltinMnemonic();
230 bool ParseOperandInitobj();
231
232 void SetFunctionInformation();
233 void SetRecordInformation(const std::string &recordName);
234 void SetArrayInformation();
235 void SetOperationInformation();
236 void ParseAsCatchall(const std::vector<Token> &tokens);
237 void ParseAsLanguage(const std::vector<Token> &tokens, bool &isLangParsed, bool &isFirstStatement);
238 void ParseAsRecord(const std::vector<Token> &tokens);
239 void ParseAsArray(const std::vector<Token> &tokens);
240 void ParseAsFunction(const std::vector<Token> &tokens);
241 void ParseAsBraceRight(const std::vector<Token> &tokens);
242 bool ParseAfterLine(bool &isFirstStatement);
243 void ParseContextByType(const std::vector<Token> &tokens, bool &isLangParsed, bool &isFirstStatement);
244 Expected<Program, Error> ParseAfterMainLoop(const std::string &fileName);
245 void ParseResetFunctionLabelsAndParams();
246 void ParseResetFunctionParams(bool isHomonym);
247 void ParseResetTables();
248 void ParseResetFunctionTable();
249 void ParseInsFromFuncTable(ark::pandasm::Function &func);
250 void CheckVirtualCalls(const std::map<std::string, ark::pandasm::Function> &functionTable);
251 bool CheckVirtualCallInsn(const ark::pandasm::Ins &insn);
252 void ParseResetRecordTable();
253 void ParseResetRecords(const ark::pandasm::Record &record);
254 void ParseResetArrayTable();
255 void ParseAsLanguageDirective();
256 Function::CatchBlock PrepareCatchBlock(bool isCatchall, size_t size, size_t catchallTokensNum,
257 size_t catchTokensNum);
258 void ParseAsCatchDirective();
259 void SetError();
260 void SetMetadataContextError(const Metadata::Error &err, bool hasValue);
261
262 Expected<char, Error> ParseOctalEscapeSequence(std::string_view s, size_t *i);
263 Expected<char, Error> ParseHexEscapeSequence(std::string_view s, size_t *i);
264 Expected<char, Error> ParseEscapeSequence(std::string_view s, size_t *i);
265
AnalyzeEmplacement(bool isDefinition,bool isInserted,FileLocation * fileLocation)266 bool AnalyzeEmplacement(bool isDefinition, bool isInserted, FileLocation *fileLocation)
267 {
268 if (isInserted) {
269 return true;
270 }
271
272 if (fileLocation->isDefined && isDefinition) {
273 return false;
274 }
275
276 if (!fileLocation->isDefined && isDefinition) {
277 fileLocation->isDefined = true;
278 return true;
279 }
280
281 if (!fileLocation->isDefined) {
282 fileLocation->boundLeft = context_.tokens[context_.number - 1].boundLeft;
283 fileLocation->boundRight = context_.tokens[context_.number - 1].boundRight;
284 fileLocation->wholeLine = context_.tokens[context_.number - 1].wholeLine;
285 fileLocation->lineNumber = lineStric_;
286 }
287
288 return true;
289 }
290
291 template <class T, class E>
TryEmplaceInTable(bool isDefinition,T & item,const E & elem,const std::string & name)292 auto TryEmplaceInTable(bool isDefinition, T &item, const E &elem, const std::string &name)
293 {
294 return item.try_emplace(elem, name, program_.lang, context_.tokens[context_.number - 1].boundLeft,
295 context_.tokens[context_.number - 1].boundRight,
296 context_.tokens[context_.number - 1].wholeLine, isDefinition, lineStric_);
297 }
298
299 template <class T>
300 bool AddObjectInTable(bool isDefinition, T &item, const std::string &cid = "")
301 {
302 std::string name = !cid.empty() ? cid : std::string(context_.GiveToken().data(), context_.GiveToken().length());
303 FileLocation *fileLocation {nullptr};
304 bool isInserted = false;
305 if constexpr (std::is_same_v<T, std::map<std::pair<std::string, bool>, ark::pandasm::Function>>) {
306 auto res = TryEmplaceInTable(isDefinition, item, std::make_pair(name, false), name);
307 isInserted = res.second;
308 fileLocation = &(res.first->second.fileLocation.value());
309 } else {
310 auto res = TryEmplaceInTable(isDefinition, item, name, name);
311 isInserted = res.second;
312 fileLocation = &(res.first->second.fileLocation.value());
313 }
314 return AnalyzeEmplacement(isDefinition, isInserted, fileLocation);
315 }
316 };
317
318 template <>
TryEmplaceInTable(bool isDefinition,std::unordered_map<std::string,ark::pandasm::Label> & item,const std::string & elem,const std::string & name)319 inline auto Parser::TryEmplaceInTable(bool isDefinition, std::unordered_map<std::string, ark::pandasm::Label> &item,
320 [[maybe_unused]] const std::string &elem,
321 [[maybe_unused]] const std::string &name)
322 {
323 return item.try_emplace(std::string(context_.GiveToken().data(), context_.GiveToken().length()),
324 std::string(context_.GiveToken().data(), context_.GiveToken().length()),
325 context_.tokens[context_.number - 1].boundLeft,
326 context_.tokens[context_.number - 1].boundRight,
327 context_.tokens[context_.number - 1].wholeLine, isDefinition, lineStric_);
328 }
329
330 } // namespace ark::pandasm
331
332 #endif // PANDA_ASSEMBLER_ASSEMBLY_PARSER_H
333