1 //===- llvm/MC/MCTargetAsmParser.h - Target Assembly Parser -----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_MC_MCPARSER_MCTARGETASMPARSER_H 10 #define LLVM_MC_MCPARSER_MCTARGETASMPARSER_H 11 12 #include "llvm/ADT/StringRef.h" 13 #include "llvm/MC/MCExpr.h" 14 #include "llvm/MC/MCInstrInfo.h" 15 #include "llvm/MC/MCParser/MCAsmLexer.h" 16 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 17 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 18 #include "llvm/MC/MCTargetOptions.h" 19 #include "llvm/MC/SubtargetFeature.h" 20 #include "llvm/Support/SMLoc.h" 21 #include <cstdint> 22 #include <memory> 23 24 namespace llvm { 25 26 class MCInst; 27 class MCParsedAsmOperand; 28 class MCStreamer; 29 class MCSubtargetInfo; 30 template <typename T> class SmallVectorImpl; 31 32 using OperandVector = SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand>>; 33 34 enum AsmRewriteKind { 35 AOK_Align, // Rewrite align as .align. 36 AOK_EVEN, // Rewrite even as .even. 37 AOK_Emit, // Rewrite _emit as .byte. 38 AOK_CallInput, // Rewrite in terms of ${N:P}. 39 AOK_Input, // Rewrite in terms of $N. 40 AOK_Output, // Rewrite in terms of $N. 41 AOK_SizeDirective, // Add a sizing directive (e.g., dword ptr). 42 AOK_Label, // Rewrite local labels. 43 AOK_EndOfStatement, // Add EndOfStatement (e.g., "\n\t"). 44 AOK_Skip, // Skip emission (e.g., offset/type operators). 45 AOK_IntelExpr // SizeDirective SymDisp [BaseReg + IndexReg * Scale + ImmDisp] 46 }; 47 48 const char AsmRewritePrecedence [] = { 49 2, // AOK_Align 50 2, // AOK_EVEN 51 2, // AOK_Emit 52 3, // AOK_Input 53 3, // AOK_CallInput 54 3, // AOK_Output 55 5, // AOK_SizeDirective 56 1, // AOK_Label 57 5, // AOK_EndOfStatement 58 2, // AOK_Skip 59 2 // AOK_IntelExpr 60 }; 61 62 // Represnt the various parts which makes up an intel expression, 63 // used for emitting compound intel expressions 64 struct IntelExpr { 65 bool NeedBracs; 66 int64_t Imm; 67 StringRef BaseReg; 68 StringRef IndexReg; 69 StringRef OffsetName; 70 unsigned Scale; 71 IntelExprIntelExpr72 IntelExpr() 73 : NeedBracs(false), Imm(0), BaseReg(StringRef()), IndexReg(StringRef()), 74 OffsetName(StringRef()), Scale(1) {} 75 // [BaseReg + IndexReg * ScaleExpression + OFFSET name + ImmediateExpression] IntelExprIntelExpr76 IntelExpr(StringRef baseReg, StringRef indexReg, unsigned scale, 77 StringRef offsetName, int64_t imm, bool needBracs) 78 : NeedBracs(needBracs), Imm(imm), BaseReg(baseReg), IndexReg(indexReg), 79 OffsetName(offsetName), Scale(1) { 80 if (scale) 81 Scale = scale; 82 } hasBaseRegIntelExpr83 bool hasBaseReg() const { return !BaseReg.empty(); } hasIndexRegIntelExpr84 bool hasIndexReg() const { return !IndexReg.empty(); } hasRegsIntelExpr85 bool hasRegs() const { return hasBaseReg() || hasIndexReg(); } hasOffsetIntelExpr86 bool hasOffset() const { return !OffsetName.empty(); } 87 // Normally we won't emit immediates unconditionally, 88 // unless we've got no other components emitImmIntelExpr89 bool emitImm() const { return !(hasRegs() || hasOffset()); } isValidIntelExpr90 bool isValid() const { 91 return (Scale == 1) || 92 (hasIndexReg() && (Scale == 2 || Scale == 4 || Scale == 8)); 93 } 94 }; 95 96 struct AsmRewrite { 97 AsmRewriteKind Kind; 98 SMLoc Loc; 99 unsigned Len; 100 bool Done; 101 int64_t Val; 102 StringRef Label; 103 IntelExpr IntelExp; 104 105 public: 106 AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len = 0, int64_t val = 0) KindAsmRewrite107 : Kind(kind), Loc(loc), Len(len), Done(false), Val(val) {} AsmRewriteAsmRewrite108 AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len, StringRef label) 109 : AsmRewrite(kind, loc, len) { Label = label; } AsmRewriteAsmRewrite110 AsmRewrite(SMLoc loc, unsigned len, IntelExpr exp) 111 : AsmRewrite(AOK_IntelExpr, loc, len) { IntelExp = exp; } 112 }; 113 114 struct ParseInstructionInfo { 115 SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr; 116 117 ParseInstructionInfo() = default; ParseInstructionInfoParseInstructionInfo118 ParseInstructionInfo(SmallVectorImpl<AsmRewrite> *rewrites) 119 : AsmRewrites(rewrites) {} 120 }; 121 122 enum OperandMatchResultTy { 123 MatchOperand_Success, // operand matched successfully 124 MatchOperand_NoMatch, // operand did not match 125 MatchOperand_ParseFail // operand matched but had errors 126 }; 127 128 enum class DiagnosticPredicateTy { 129 Match, 130 NearMatch, 131 NoMatch, 132 }; 133 134 // When an operand is parsed, the assembler will try to iterate through a set of 135 // possible operand classes that the operand might match and call the 136 // corresponding PredicateMethod to determine that. 137 // 138 // If there are two AsmOperands that would give a specific diagnostic if there 139 // is no match, there is currently no mechanism to distinguish which operand is 140 // a closer match. The DiagnosticPredicate distinguishes between 'completely 141 // no match' and 'near match', so the assembler can decide whether to give a 142 // specific diagnostic, or use 'InvalidOperand' and continue to find a 143 // 'better matching' diagnostic. 144 // 145 // For example: 146 // opcode opnd0, onpd1, opnd2 147 // 148 // where: 149 // opnd2 could be an 'immediate of range [-8, 7]' 150 // opnd2 could be a 'register + shift/extend'. 151 // 152 // If opnd2 is a valid register, but with a wrong shift/extend suffix, it makes 153 // little sense to give a diagnostic that the operand should be an immediate 154 // in range [-8, 7]. 155 // 156 // This is a light-weight alternative to the 'NearMissInfo' approach 157 // below which collects *all* possible diagnostics. This alternative 158 // is optional and fully backward compatible with existing 159 // PredicateMethods that return a 'bool' (match or no match). 160 struct DiagnosticPredicate { 161 DiagnosticPredicateTy Type; 162 DiagnosticPredicateDiagnosticPredicate163 explicit DiagnosticPredicate(bool Match) 164 : Type(Match ? DiagnosticPredicateTy::Match 165 : DiagnosticPredicateTy::NearMatch) {} DiagnosticPredicateDiagnosticPredicate166 DiagnosticPredicate(DiagnosticPredicateTy T) : Type(T) {} 167 DiagnosticPredicate(const DiagnosticPredicate &) = default; 168 DiagnosticPredicate& operator=(const DiagnosticPredicate &) = default; 169 170 operator bool() const { return Type == DiagnosticPredicateTy::Match; } isMatchDiagnosticPredicate171 bool isMatch() const { return Type == DiagnosticPredicateTy::Match; } isNearMatchDiagnosticPredicate172 bool isNearMatch() const { return Type == DiagnosticPredicateTy::NearMatch; } isNoMatchDiagnosticPredicate173 bool isNoMatch() const { return Type == DiagnosticPredicateTy::NoMatch; } 174 }; 175 176 // When matching of an assembly instruction fails, there may be multiple 177 // encodings that are close to being a match. It's often ambiguous which one 178 // the programmer intended to use, so we want to report an error which mentions 179 // each of these "near-miss" encodings. This struct contains information about 180 // one such encoding, and why it did not match the parsed instruction. 181 class NearMissInfo { 182 public: 183 enum NearMissKind { 184 NoNearMiss, 185 NearMissOperand, 186 NearMissFeature, 187 NearMissPredicate, 188 NearMissTooFewOperands, 189 }; 190 191 // The encoding is valid for the parsed assembly string. This is only used 192 // internally to the table-generated assembly matcher. getSuccess()193 static NearMissInfo getSuccess() { return NearMissInfo(); } 194 195 // The instruction encoding is not valid because it requires some target 196 // features that are not currently enabled. MissingFeatures has a bit set for 197 // each feature that the encoding needs but which is not enabled. getMissedFeature(const FeatureBitset & MissingFeatures)198 static NearMissInfo getMissedFeature(const FeatureBitset &MissingFeatures) { 199 NearMissInfo Result; 200 Result.Kind = NearMissFeature; 201 Result.Features = MissingFeatures; 202 return Result; 203 } 204 205 // The instruction encoding is not valid because the target-specific 206 // predicate function returned an error code. FailureCode is the 207 // target-specific error code returned by the predicate. getMissedPredicate(unsigned FailureCode)208 static NearMissInfo getMissedPredicate(unsigned FailureCode) { 209 NearMissInfo Result; 210 Result.Kind = NearMissPredicate; 211 Result.PredicateError = FailureCode; 212 return Result; 213 } 214 215 // The instruction encoding is not valid because one (and only one) parsed 216 // operand is not of the correct type. OperandError is the error code 217 // relating to the operand class expected by the encoding. OperandClass is 218 // the type of the expected operand. Opcode is the opcode of the encoding. 219 // OperandIndex is the index into the parsed operand list. getMissedOperand(unsigned OperandError,unsigned OperandClass,unsigned Opcode,unsigned OperandIndex)220 static NearMissInfo getMissedOperand(unsigned OperandError, 221 unsigned OperandClass, unsigned Opcode, 222 unsigned OperandIndex) { 223 NearMissInfo Result; 224 Result.Kind = NearMissOperand; 225 Result.MissedOperand.Error = OperandError; 226 Result.MissedOperand.Class = OperandClass; 227 Result.MissedOperand.Opcode = Opcode; 228 Result.MissedOperand.Index = OperandIndex; 229 return Result; 230 } 231 232 // The instruction encoding is not valid because it expects more operands 233 // than were parsed. OperandClass is the class of the expected operand that 234 // was not provided. Opcode is the instruction encoding. getTooFewOperands(unsigned OperandClass,unsigned Opcode)235 static NearMissInfo getTooFewOperands(unsigned OperandClass, 236 unsigned Opcode) { 237 NearMissInfo Result; 238 Result.Kind = NearMissTooFewOperands; 239 Result.TooFewOperands.Class = OperandClass; 240 Result.TooFewOperands.Opcode = Opcode; 241 return Result; 242 } 243 244 operator bool() const { return Kind != NoNearMiss; } 245 getKind()246 NearMissKind getKind() const { return Kind; } 247 248 // Feature flags required by the instruction, that the current target does 249 // not have. getFeatures()250 const FeatureBitset& getFeatures() const { 251 assert(Kind == NearMissFeature); 252 return Features; 253 } 254 // Error code returned by the target predicate when validating this 255 // instruction encoding. getPredicateError()256 unsigned getPredicateError() const { 257 assert(Kind == NearMissPredicate); 258 return PredicateError; 259 } 260 // MatchClassKind of the operand that we expected to see. getOperandClass()261 unsigned getOperandClass() const { 262 assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands); 263 return MissedOperand.Class; 264 } 265 // Opcode of the encoding we were trying to match. getOpcode()266 unsigned getOpcode() const { 267 assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands); 268 return MissedOperand.Opcode; 269 } 270 // Error code returned when validating the operand. getOperandError()271 unsigned getOperandError() const { 272 assert(Kind == NearMissOperand); 273 return MissedOperand.Error; 274 } 275 // Index of the actual operand we were trying to match in the list of parsed 276 // operands. getOperandIndex()277 unsigned getOperandIndex() const { 278 assert(Kind == NearMissOperand); 279 return MissedOperand.Index; 280 } 281 282 private: 283 NearMissKind Kind; 284 285 // These two structs share a common prefix, so we can safely rely on the fact 286 // that they overlap in the union. 287 struct MissedOpInfo { 288 unsigned Class; 289 unsigned Opcode; 290 unsigned Error; 291 unsigned Index; 292 }; 293 294 struct TooFewOperandsInfo { 295 unsigned Class; 296 unsigned Opcode; 297 }; 298 299 union { 300 FeatureBitset Features; 301 unsigned PredicateError; 302 MissedOpInfo MissedOperand; 303 TooFewOperandsInfo TooFewOperands; 304 }; 305 NearMissInfo()306 NearMissInfo() : Kind(NoNearMiss) {} 307 }; 308 309 /// MCTargetAsmParser - Generic interface to target specific assembly parsers. 310 class MCTargetAsmParser : public MCAsmParserExtension { 311 public: 312 enum MatchResultTy { 313 Match_InvalidOperand, 314 Match_InvalidTiedOperand, 315 Match_MissingFeature, 316 Match_MnemonicFail, 317 Match_Success, 318 Match_NearMisses, 319 FIRST_TARGET_MATCH_RESULT_TY 320 }; 321 322 protected: // Can only create subclasses. 323 MCTargetAsmParser(MCTargetOptions const &, const MCSubtargetInfo &STI, 324 const MCInstrInfo &MII); 325 326 /// Create a copy of STI and return a non-const reference to it. 327 MCSubtargetInfo ©STI(); 328 329 /// AvailableFeatures - The current set of available features. 330 FeatureBitset AvailableFeatures; 331 332 /// ParsingInlineAsm - Are we parsing ms-style inline assembly? 333 bool ParsingInlineAsm = false; 334 335 /// SemaCallback - The Sema callback implementation. Must be set when parsing 336 /// ms-style inline assembly. 337 MCAsmParserSemaCallback *SemaCallback; 338 339 /// Set of options which affects instrumentation of inline assembly. 340 MCTargetOptions MCOptions; 341 342 /// Current STI. 343 const MCSubtargetInfo *STI; 344 345 const MCInstrInfo &MII; 346 347 public: 348 MCTargetAsmParser(const MCTargetAsmParser &) = delete; 349 MCTargetAsmParser &operator=(const MCTargetAsmParser &) = delete; 350 351 ~MCTargetAsmParser() override; 352 353 const MCSubtargetInfo &getSTI() const; 354 getAvailableFeatures()355 const FeatureBitset& getAvailableFeatures() const { 356 return AvailableFeatures; 357 } setAvailableFeatures(const FeatureBitset & Value)358 void setAvailableFeatures(const FeatureBitset& Value) { 359 AvailableFeatures = Value; 360 } 361 isParsingInlineAsm()362 bool isParsingInlineAsm () { return ParsingInlineAsm; } setParsingInlineAsm(bool Value)363 void setParsingInlineAsm (bool Value) { ParsingInlineAsm = Value; } 364 getTargetOptions()365 MCTargetOptions getTargetOptions() const { return MCOptions; } 366 setSemaCallback(MCAsmParserSemaCallback * Callback)367 void setSemaCallback(MCAsmParserSemaCallback *Callback) { 368 SemaCallback = Callback; 369 } 370 371 // Target-specific parsing of expression. parsePrimaryExpr(const MCExpr * & Res,SMLoc & EndLoc)372 virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { 373 return getParser().parsePrimaryExpr(Res, EndLoc); 374 } 375 376 virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 377 SMLoc &EndLoc) = 0; 378 379 /// ParseInstruction - Parse one assembly instruction. 380 /// 381 /// The parser is positioned following the instruction name. The target 382 /// specific instruction parser should parse the entire instruction and 383 /// construct the appropriate MCInst, or emit an error. On success, the entire 384 /// line should be parsed up to and including the end-of-statement token. On 385 /// failure, the parser is not required to read to the end of the line. 386 // 387 /// \param Name - The instruction name. 388 /// \param NameLoc - The source location of the name. 389 /// \param Operands [out] - The list of parsed operands, this returns 390 /// ownership of them to the caller. 391 /// \return True on failure. 392 virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 393 SMLoc NameLoc, OperandVector &Operands) = 0; ParseInstruction(ParseInstructionInfo & Info,StringRef Name,AsmToken Token,OperandVector & Operands)394 virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 395 AsmToken Token, OperandVector &Operands) { 396 return ParseInstruction(Info, Name, Token.getLoc(), Operands); 397 } 398 399 /// ParseDirective - Parse a target specific assembler directive 400 /// 401 /// The parser is positioned following the directive name. The target 402 /// specific directive parser should parse the entire directive doing or 403 /// recording any target specific work, or return true and do nothing if the 404 /// directive is not target specific. If the directive is specific for 405 /// the target, the entire line is parsed up to and including the 406 /// end-of-statement token and false is returned. 407 /// 408 /// \param DirectiveID - the identifier token of the directive. 409 virtual bool ParseDirective(AsmToken DirectiveID) = 0; 410 411 /// MatchAndEmitInstruction - Recognize a series of operands of a parsed 412 /// instruction as an actual MCInst and emit it to the specified MCStreamer. 413 /// This returns false on success and returns true on failure to match. 414 /// 415 /// On failure, the target parser is responsible for emitting a diagnostic 416 /// explaining the match failure. 417 virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 418 OperandVector &Operands, MCStreamer &Out, 419 uint64_t &ErrorInfo, 420 bool MatchingInlineAsm) = 0; 421 422 /// Allows targets to let registers opt out of clobber lists. OmitRegisterFromClobberLists(unsigned RegNo)423 virtual bool OmitRegisterFromClobberLists(unsigned RegNo) { return false; } 424 425 /// Allow a target to add special case operand matching for things that 426 /// tblgen doesn't/can't handle effectively. For example, literal 427 /// immediates on ARM. TableGen expects a token operand, but the parser 428 /// will recognize them as immediates. validateTargetOperandClass(MCParsedAsmOperand & Op,unsigned Kind)429 virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 430 unsigned Kind) { 431 return Match_InvalidOperand; 432 } 433 434 /// Validate the instruction match against any complex target predicates 435 /// before rendering any operands to it. 436 virtual unsigned checkEarlyTargetMatchPredicate(MCInst & Inst,const OperandVector & Operands)437 checkEarlyTargetMatchPredicate(MCInst &Inst, const OperandVector &Operands) { 438 return Match_Success; 439 } 440 441 /// checkTargetMatchPredicate - Validate the instruction match against 442 /// any complex target predicates not expressible via match classes. checkTargetMatchPredicate(MCInst & Inst)443 virtual unsigned checkTargetMatchPredicate(MCInst &Inst) { 444 return Match_Success; 445 } 446 447 virtual void convertToMapAndConstraints(unsigned Kind, 448 const OperandVector &Operands) = 0; 449 450 /// Returns whether two registers are equal and is used by the tied-operands 451 /// checks in the AsmMatcher. This method can be overridden allow e.g. a 452 /// sub- or super-register as the tied operand. regsEqual(const MCParsedAsmOperand & Op1,const MCParsedAsmOperand & Op2)453 virtual bool regsEqual(const MCParsedAsmOperand &Op1, 454 const MCParsedAsmOperand &Op2) const { 455 assert(Op1.isReg() && Op2.isReg() && "Operands not all regs"); 456 return Op1.getReg() == Op2.getReg(); 457 } 458 459 // Return whether this parser uses assignment statements with equals tokens equalIsAsmAssignment()460 virtual bool equalIsAsmAssignment() { return true; }; 461 // Return whether this start of statement identifier is a label isLabel(AsmToken & Token)462 virtual bool isLabel(AsmToken &Token) { return true; }; 463 // Return whether this parser accept star as start of statement starIsStartOfStatement()464 virtual bool starIsStartOfStatement() { return false; }; 465 applyModifierToExpr(const MCExpr * E,MCSymbolRefExpr::VariantKind,MCContext & Ctx)466 virtual const MCExpr *applyModifierToExpr(const MCExpr *E, 467 MCSymbolRefExpr::VariantKind, 468 MCContext &Ctx) { 469 return nullptr; 470 } 471 472 // For actions that have to be performed before a label is emitted doBeforeLabelEmit(MCSymbol * Symbol)473 virtual void doBeforeLabelEmit(MCSymbol *Symbol) {} 474 onLabelParsed(MCSymbol * Symbol)475 virtual void onLabelParsed(MCSymbol *Symbol) {} 476 477 /// Ensure that all previously parsed instructions have been emitted to the 478 /// output streamer, if the target does not emit them immediately. flushPendingInstructions(MCStreamer & Out)479 virtual void flushPendingInstructions(MCStreamer &Out) {} 480 createTargetUnaryExpr(const MCExpr * E,AsmToken::TokenKind OperatorToken,MCContext & Ctx)481 virtual const MCExpr *createTargetUnaryExpr(const MCExpr *E, 482 AsmToken::TokenKind OperatorToken, 483 MCContext &Ctx) { 484 return nullptr; 485 } 486 487 // For any checks or cleanups at the end of parsing. onEndOfFile()488 virtual void onEndOfFile() {} 489 }; 490 491 } // end namespace llvm 492 493 #endif // LLVM_MC_MCPARSER_MCTARGETASMPARSER_H 494