• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "MCTargetDesc/X86BaseInfo.h"
10 #include "MCTargetDesc/X86IntelInstPrinter.h"
11 #include "MCTargetDesc/X86MCExpr.h"
12 #include "MCTargetDesc/X86TargetStreamer.h"
13 #include "TargetInfo/X86TargetInfo.h"
14 #include "X86AsmParserCommon.h"
15 #include "X86Operand.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallString.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/ADT/Twine.h"
21 #include "llvm/MC/MCContext.h"
22 #include "llvm/MC/MCExpr.h"
23 #include "llvm/MC/MCInst.h"
24 #include "llvm/MC/MCInstrInfo.h"
25 #include "llvm/MC/MCParser/MCAsmLexer.h"
26 #include "llvm/MC/MCParser/MCAsmParser.h"
27 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
28 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
29 #include "llvm/MC/MCRegisterInfo.h"
30 #include "llvm/MC/MCSection.h"
31 #include "llvm/MC/MCStreamer.h"
32 #include "llvm/MC/MCSubtargetInfo.h"
33 #include "llvm/MC/MCSymbol.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/Compiler.h"
36 #include "llvm/Support/SourceMgr.h"
37 #include "llvm/Support/TargetRegistry.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include <algorithm>
40 #include <memory>
41 
42 using namespace llvm;
43 
44 static cl::opt<bool> LVIInlineAsmHardening(
45     "x86-experimental-lvi-inline-asm-hardening",
46     cl::desc("Harden inline assembly code that may be vulnerable to Load Value"
47              " Injection (LVI). This feature is experimental."), cl::Hidden);
48 
checkScale(unsigned Scale,StringRef & ErrMsg)49 static bool checkScale(unsigned Scale, StringRef &ErrMsg) {
50   if (Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
51     ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
52     return true;
53   }
54   return false;
55 }
56 
57 namespace {
58 
59 static const char OpPrecedence[] = {
60     0,  // IC_OR
61     1,  // IC_XOR
62     2,  // IC_AND
63     4,  // IC_LSHIFT
64     4,  // IC_RSHIFT
65     5,  // IC_PLUS
66     5,  // IC_MINUS
67     6,  // IC_MULTIPLY
68     6,  // IC_DIVIDE
69     6,  // IC_MOD
70     7,  // IC_NOT
71     8,  // IC_NEG
72     9,  // IC_RPAREN
73     10, // IC_LPAREN
74     0,  // IC_IMM
75     0,  // IC_REGISTER
76     3,  // IC_EQ
77     3,  // IC_NE
78     3,  // IC_LT
79     3,  // IC_LE
80     3,  // IC_GT
81     3   // IC_GE
82 };
83 
84 class X86AsmParser : public MCTargetAsmParser {
85   ParseInstructionInfo *InstInfo;
86   bool Code16GCC;
87   unsigned ForcedDataPrefix = 0;
88 
89   enum VEXEncoding {
90     VEXEncoding_Default,
91     VEXEncoding_VEX,
92     VEXEncoding_VEX2,
93     VEXEncoding_VEX3,
94     VEXEncoding_EVEX,
95   };
96 
97   VEXEncoding ForcedVEXEncoding = VEXEncoding_Default;
98 
99   enum DispEncoding {
100     DispEncoding_Default,
101     DispEncoding_Disp8,
102     DispEncoding_Disp32,
103   };
104 
105   DispEncoding ForcedDispEncoding = DispEncoding_Default;
106 
107 private:
consumeToken()108   SMLoc consumeToken() {
109     MCAsmParser &Parser = getParser();
110     SMLoc Result = Parser.getTok().getLoc();
111     Parser.Lex();
112     return Result;
113   }
114 
getTargetStreamer()115   X86TargetStreamer &getTargetStreamer() {
116     assert(getParser().getStreamer().getTargetStreamer() &&
117            "do not have a target streamer");
118     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
119     return static_cast<X86TargetStreamer &>(TS);
120   }
121 
MatchInstruction(const OperandVector & Operands,MCInst & Inst,uint64_t & ErrorInfo,FeatureBitset & MissingFeatures,bool matchingInlineAsm,unsigned VariantID=0)122   unsigned MatchInstruction(const OperandVector &Operands, MCInst &Inst,
123                             uint64_t &ErrorInfo, FeatureBitset &MissingFeatures,
124                             bool matchingInlineAsm, unsigned VariantID = 0) {
125     // In Code16GCC mode, match as 32-bit.
126     if (Code16GCC)
127       SwitchMode(X86::Mode32Bit);
128     unsigned rv = MatchInstructionImpl(Operands, Inst, ErrorInfo,
129                                        MissingFeatures, matchingInlineAsm,
130                                        VariantID);
131     if (Code16GCC)
132       SwitchMode(X86::Mode16Bit);
133     return rv;
134   }
135 
136   enum InfixCalculatorTok {
137     IC_OR = 0,
138     IC_XOR,
139     IC_AND,
140     IC_LSHIFT,
141     IC_RSHIFT,
142     IC_PLUS,
143     IC_MINUS,
144     IC_MULTIPLY,
145     IC_DIVIDE,
146     IC_MOD,
147     IC_NOT,
148     IC_NEG,
149     IC_RPAREN,
150     IC_LPAREN,
151     IC_IMM,
152     IC_REGISTER,
153     IC_EQ,
154     IC_NE,
155     IC_LT,
156     IC_LE,
157     IC_GT,
158     IC_GE
159   };
160 
161   enum IntelOperatorKind {
162     IOK_INVALID = 0,
163     IOK_LENGTH,
164     IOK_SIZE,
165     IOK_TYPE,
166   };
167 
168   enum MasmOperatorKind {
169     MOK_INVALID = 0,
170     MOK_LENGTHOF,
171     MOK_SIZEOF,
172     MOK_TYPE,
173   };
174 
175   class InfixCalculator {
176     typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
177     SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
178     SmallVector<ICToken, 4> PostfixStack;
179 
isUnaryOperator(InfixCalculatorTok Op) const180     bool isUnaryOperator(InfixCalculatorTok Op) const {
181       return Op == IC_NEG || Op == IC_NOT;
182     }
183 
184   public:
popOperand()185     int64_t popOperand() {
186       assert (!PostfixStack.empty() && "Poped an empty stack!");
187       ICToken Op = PostfixStack.pop_back_val();
188       if (!(Op.first == IC_IMM || Op.first == IC_REGISTER))
189         return -1; // The invalid Scale value will be caught later by checkScale
190       return Op.second;
191     }
pushOperand(InfixCalculatorTok Op,int64_t Val=0)192     void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
193       assert ((Op == IC_IMM || Op == IC_REGISTER) &&
194               "Unexpected operand!");
195       PostfixStack.push_back(std::make_pair(Op, Val));
196     }
197 
popOperator()198     void popOperator() { InfixOperatorStack.pop_back(); }
pushOperator(InfixCalculatorTok Op)199     void pushOperator(InfixCalculatorTok Op) {
200       // Push the new operator if the stack is empty.
201       if (InfixOperatorStack.empty()) {
202         InfixOperatorStack.push_back(Op);
203         return;
204       }
205 
206       // Push the new operator if it has a higher precedence than the operator
207       // on the top of the stack or the operator on the top of the stack is a
208       // left parentheses.
209       unsigned Idx = InfixOperatorStack.size() - 1;
210       InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
211       if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
212         InfixOperatorStack.push_back(Op);
213         return;
214       }
215 
216       // The operator on the top of the stack has higher precedence than the
217       // new operator.
218       unsigned ParenCount = 0;
219       while (1) {
220         // Nothing to process.
221         if (InfixOperatorStack.empty())
222           break;
223 
224         Idx = InfixOperatorStack.size() - 1;
225         StackOp = InfixOperatorStack[Idx];
226         if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
227           break;
228 
229         // If we have an even parentheses count and we see a left parentheses,
230         // then stop processing.
231         if (!ParenCount && StackOp == IC_LPAREN)
232           break;
233 
234         if (StackOp == IC_RPAREN) {
235           ++ParenCount;
236           InfixOperatorStack.pop_back();
237         } else if (StackOp == IC_LPAREN) {
238           --ParenCount;
239           InfixOperatorStack.pop_back();
240         } else {
241           InfixOperatorStack.pop_back();
242           PostfixStack.push_back(std::make_pair(StackOp, 0));
243         }
244       }
245       // Push the new operator.
246       InfixOperatorStack.push_back(Op);
247     }
248 
execute()249     int64_t execute() {
250       // Push any remaining operators onto the postfix stack.
251       while (!InfixOperatorStack.empty()) {
252         InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
253         if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
254           PostfixStack.push_back(std::make_pair(StackOp, 0));
255       }
256 
257       if (PostfixStack.empty())
258         return 0;
259 
260       SmallVector<ICToken, 16> OperandStack;
261       for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
262         ICToken Op = PostfixStack[i];
263         if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
264           OperandStack.push_back(Op);
265         } else if (isUnaryOperator(Op.first)) {
266           assert (OperandStack.size() > 0 && "Too few operands.");
267           ICToken Operand = OperandStack.pop_back_val();
268           assert (Operand.first == IC_IMM &&
269                   "Unary operation with a register!");
270           switch (Op.first) {
271           default:
272             report_fatal_error("Unexpected operator!");
273             break;
274           case IC_NEG:
275             OperandStack.push_back(std::make_pair(IC_IMM, -Operand.second));
276             break;
277           case IC_NOT:
278             OperandStack.push_back(std::make_pair(IC_IMM, ~Operand.second));
279             break;
280           }
281         } else {
282           assert (OperandStack.size() > 1 && "Too few operands.");
283           int64_t Val;
284           ICToken Op2 = OperandStack.pop_back_val();
285           ICToken Op1 = OperandStack.pop_back_val();
286           switch (Op.first) {
287           default:
288             report_fatal_error("Unexpected operator!");
289             break;
290           case IC_PLUS:
291             Val = Op1.second + Op2.second;
292             OperandStack.push_back(std::make_pair(IC_IMM, Val));
293             break;
294           case IC_MINUS:
295             Val = Op1.second - Op2.second;
296             OperandStack.push_back(std::make_pair(IC_IMM, Val));
297             break;
298           case IC_MULTIPLY:
299             assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
300                     "Multiply operation with an immediate and a register!");
301             Val = Op1.second * Op2.second;
302             OperandStack.push_back(std::make_pair(IC_IMM, Val));
303             break;
304           case IC_DIVIDE:
305             assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
306                     "Divide operation with an immediate and a register!");
307             assert (Op2.second != 0 && "Division by zero!");
308             Val = Op1.second / Op2.second;
309             OperandStack.push_back(std::make_pair(IC_IMM, Val));
310             break;
311           case IC_MOD:
312             assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
313                     "Modulo operation with an immediate and a register!");
314             Val = Op1.second % Op2.second;
315             OperandStack.push_back(std::make_pair(IC_IMM, Val));
316             break;
317           case IC_OR:
318             assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
319                     "Or operation with an immediate and a register!");
320             Val = Op1.second | Op2.second;
321             OperandStack.push_back(std::make_pair(IC_IMM, Val));
322             break;
323           case IC_XOR:
324             assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
325               "Xor operation with an immediate and a register!");
326             Val = Op1.second ^ Op2.second;
327             OperandStack.push_back(std::make_pair(IC_IMM, Val));
328             break;
329           case IC_AND:
330             assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
331                     "And operation with an immediate and a register!");
332             Val = Op1.second & Op2.second;
333             OperandStack.push_back(std::make_pair(IC_IMM, Val));
334             break;
335           case IC_LSHIFT:
336             assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
337                     "Left shift operation with an immediate and a register!");
338             Val = Op1.second << Op2.second;
339             OperandStack.push_back(std::make_pair(IC_IMM, Val));
340             break;
341           case IC_RSHIFT:
342             assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
343                     "Right shift operation with an immediate and a register!");
344             Val = Op1.second >> Op2.second;
345             OperandStack.push_back(std::make_pair(IC_IMM, Val));
346             break;
347           case IC_EQ:
348             assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
349                    "Equals operation with an immediate and a register!");
350             Val = (Op1.second == Op2.second) ? -1 : 0;
351             OperandStack.push_back(std::make_pair(IC_IMM, Val));
352             break;
353           case IC_NE:
354             assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
355                    "Not-equals operation with an immediate and a register!");
356             Val = (Op1.second != Op2.second) ? -1 : 0;
357             OperandStack.push_back(std::make_pair(IC_IMM, Val));
358             break;
359           case IC_LT:
360             assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
361                    "Less-than operation with an immediate and a register!");
362             Val = (Op1.second < Op2.second) ? -1 : 0;
363             OperandStack.push_back(std::make_pair(IC_IMM, Val));
364             break;
365           case IC_LE:
366             assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
367                    "Less-than-or-equal operation with an immediate and a "
368                    "register!");
369             Val = (Op1.second <= Op2.second) ? -1 : 0;
370             OperandStack.push_back(std::make_pair(IC_IMM, Val));
371             break;
372           case IC_GT:
373             assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
374                    "Greater-than operation with an immediate and a register!");
375             Val = (Op1.second > Op2.second) ? -1 : 0;
376             OperandStack.push_back(std::make_pair(IC_IMM, Val));
377             break;
378           case IC_GE:
379             assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
380                    "Greater-than-or-equal operation with an immediate and a "
381                    "register!");
382             Val = (Op1.second >= Op2.second) ? -1 : 0;
383             OperandStack.push_back(std::make_pair(IC_IMM, Val));
384             break;
385           }
386         }
387       }
388       assert (OperandStack.size() == 1 && "Expected a single result.");
389       return OperandStack.pop_back_val().second;
390     }
391   };
392 
393   enum IntelExprState {
394     IES_INIT,
395     IES_OR,
396     IES_XOR,
397     IES_AND,
398     IES_EQ,
399     IES_NE,
400     IES_LT,
401     IES_LE,
402     IES_GT,
403     IES_GE,
404     IES_LSHIFT,
405     IES_RSHIFT,
406     IES_PLUS,
407     IES_MINUS,
408     IES_OFFSET,
409     IES_CAST,
410     IES_NOT,
411     IES_MULTIPLY,
412     IES_DIVIDE,
413     IES_MOD,
414     IES_LBRAC,
415     IES_RBRAC,
416     IES_LPAREN,
417     IES_RPAREN,
418     IES_REGISTER,
419     IES_INTEGER,
420     IES_IDENTIFIER,
421     IES_ERROR
422   };
423 
424   class IntelExprStateMachine {
425     IntelExprState State, PrevState;
426     unsigned BaseReg, IndexReg, TmpReg, Scale;
427     int64_t Imm;
428     const MCExpr *Sym;
429     StringRef SymName;
430     InfixCalculator IC;
431     InlineAsmIdentifierInfo Info;
432     short BracCount;
433     bool MemExpr;
434     bool OffsetOperator;
435     SMLoc OffsetOperatorLoc;
436     AsmTypeInfo CurType;
437 
setSymRef(const MCExpr * Val,StringRef ID,StringRef & ErrMsg)438     bool setSymRef(const MCExpr *Val, StringRef ID, StringRef &ErrMsg) {
439       if (Sym) {
440         ErrMsg = "cannot use more than one symbol in memory operand";
441         return true;
442       }
443       Sym = Val;
444       SymName = ID;
445       return false;
446     }
447 
448   public:
IntelExprStateMachine()449     IntelExprStateMachine()
450         : State(IES_INIT), PrevState(IES_ERROR), BaseReg(0), IndexReg(0),
451           TmpReg(0), Scale(0), Imm(0), Sym(nullptr), BracCount(0),
452           MemExpr(false), OffsetOperator(false) {}
453 
addImm(int64_t imm)454     void addImm(int64_t imm) { Imm += imm; }
getBracCount() const455     short getBracCount() const { return BracCount; }
isMemExpr() const456     bool isMemExpr() const { return MemExpr; }
isOffsetOperator() const457     bool isOffsetOperator() const { return OffsetOperator; }
getOffsetLoc() const458     SMLoc getOffsetLoc() const { return OffsetOperatorLoc; }
getBaseReg() const459     unsigned getBaseReg() const { return BaseReg; }
getIndexReg() const460     unsigned getIndexReg() const { return IndexReg; }
getScale() const461     unsigned getScale() const { return Scale; }
getSym() const462     const MCExpr *getSym() const { return Sym; }
getSymName() const463     StringRef getSymName() const { return SymName; }
getType() const464     StringRef getType() const { return CurType.Name; }
getSize() const465     unsigned getSize() const { return CurType.Size; }
getElementSize() const466     unsigned getElementSize() const { return CurType.ElementSize; }
getLength() const467     unsigned getLength() const { return CurType.Length; }
getImm()468     int64_t getImm() { return Imm + IC.execute(); }
isValidEndState() const469     bool isValidEndState() const {
470       return State == IES_RBRAC || State == IES_INTEGER;
471     }
hadError() const472     bool hadError() const { return State == IES_ERROR; }
getIdentifierInfo() const473     const InlineAsmIdentifierInfo &getIdentifierInfo() const { return Info; }
474 
onOr()475     void onOr() {
476       IntelExprState CurrState = State;
477       switch (State) {
478       default:
479         State = IES_ERROR;
480         break;
481       case IES_INTEGER:
482       case IES_RPAREN:
483       case IES_REGISTER:
484         State = IES_OR;
485         IC.pushOperator(IC_OR);
486         break;
487       }
488       PrevState = CurrState;
489     }
onXor()490     void onXor() {
491       IntelExprState CurrState = State;
492       switch (State) {
493       default:
494         State = IES_ERROR;
495         break;
496       case IES_INTEGER:
497       case IES_RPAREN:
498       case IES_REGISTER:
499         State = IES_XOR;
500         IC.pushOperator(IC_XOR);
501         break;
502       }
503       PrevState = CurrState;
504     }
onAnd()505     void onAnd() {
506       IntelExprState CurrState = State;
507       switch (State) {
508       default:
509         State = IES_ERROR;
510         break;
511       case IES_INTEGER:
512       case IES_RPAREN:
513       case IES_REGISTER:
514         State = IES_AND;
515         IC.pushOperator(IC_AND);
516         break;
517       }
518       PrevState = CurrState;
519     }
onEq()520     void onEq() {
521       IntelExprState CurrState = State;
522       switch (State) {
523       default:
524         State = IES_ERROR;
525         break;
526       case IES_INTEGER:
527       case IES_RPAREN:
528       case IES_REGISTER:
529         State = IES_EQ;
530         IC.pushOperator(IC_EQ);
531         break;
532       }
533       PrevState = CurrState;
534     }
onNE()535     void onNE() {
536       IntelExprState CurrState = State;
537       switch (State) {
538       default:
539         State = IES_ERROR;
540         break;
541       case IES_INTEGER:
542       case IES_RPAREN:
543       case IES_REGISTER:
544         State = IES_NE;
545         IC.pushOperator(IC_NE);
546         break;
547       }
548       PrevState = CurrState;
549     }
onLT()550     void onLT() {
551       IntelExprState CurrState = State;
552       switch (State) {
553       default:
554         State = IES_ERROR;
555         break;
556       case IES_INTEGER:
557       case IES_RPAREN:
558       case IES_REGISTER:
559         State = IES_LT;
560         IC.pushOperator(IC_LT);
561         break;
562       }
563       PrevState = CurrState;
564     }
onLE()565     void onLE() {
566       IntelExprState CurrState = State;
567       switch (State) {
568       default:
569         State = IES_ERROR;
570         break;
571       case IES_INTEGER:
572       case IES_RPAREN:
573       case IES_REGISTER:
574         State = IES_LE;
575         IC.pushOperator(IC_LE);
576         break;
577       }
578       PrevState = CurrState;
579     }
onGT()580     void onGT() {
581       IntelExprState CurrState = State;
582       switch (State) {
583       default:
584         State = IES_ERROR;
585         break;
586       case IES_INTEGER:
587       case IES_RPAREN:
588       case IES_REGISTER:
589         State = IES_GT;
590         IC.pushOperator(IC_GT);
591         break;
592       }
593       PrevState = CurrState;
594     }
onGE()595     void onGE() {
596       IntelExprState CurrState = State;
597       switch (State) {
598       default:
599         State = IES_ERROR;
600         break;
601       case IES_INTEGER:
602       case IES_RPAREN:
603       case IES_REGISTER:
604         State = IES_GE;
605         IC.pushOperator(IC_GE);
606         break;
607       }
608       PrevState = CurrState;
609     }
onLShift()610     void onLShift() {
611       IntelExprState CurrState = State;
612       switch (State) {
613       default:
614         State = IES_ERROR;
615         break;
616       case IES_INTEGER:
617       case IES_RPAREN:
618       case IES_REGISTER:
619         State = IES_LSHIFT;
620         IC.pushOperator(IC_LSHIFT);
621         break;
622       }
623       PrevState = CurrState;
624     }
onRShift()625     void onRShift() {
626       IntelExprState CurrState = State;
627       switch (State) {
628       default:
629         State = IES_ERROR;
630         break;
631       case IES_INTEGER:
632       case IES_RPAREN:
633       case IES_REGISTER:
634         State = IES_RSHIFT;
635         IC.pushOperator(IC_RSHIFT);
636         break;
637       }
638       PrevState = CurrState;
639     }
onPlus(StringRef & ErrMsg)640     bool onPlus(StringRef &ErrMsg) {
641       IntelExprState CurrState = State;
642       switch (State) {
643       default:
644         State = IES_ERROR;
645         break;
646       case IES_INTEGER:
647       case IES_RPAREN:
648       case IES_REGISTER:
649       case IES_OFFSET:
650         State = IES_PLUS;
651         IC.pushOperator(IC_PLUS);
652         if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
653           // If we already have a BaseReg, then assume this is the IndexReg with
654           // no explicit scale.
655           if (!BaseReg) {
656             BaseReg = TmpReg;
657           } else {
658             if (IndexReg) {
659               ErrMsg = "BaseReg/IndexReg already set!";
660               return true;
661             }
662             IndexReg = TmpReg;
663             Scale = 0;
664           }
665         }
666         break;
667       }
668       PrevState = CurrState;
669       return false;
670     }
onMinus(StringRef & ErrMsg)671     bool onMinus(StringRef &ErrMsg) {
672       IntelExprState CurrState = State;
673       switch (State) {
674       default:
675         State = IES_ERROR;
676         break;
677       case IES_OR:
678       case IES_XOR:
679       case IES_AND:
680       case IES_EQ:
681       case IES_NE:
682       case IES_LT:
683       case IES_LE:
684       case IES_GT:
685       case IES_GE:
686       case IES_LSHIFT:
687       case IES_RSHIFT:
688       case IES_PLUS:
689       case IES_NOT:
690       case IES_MULTIPLY:
691       case IES_DIVIDE:
692       case IES_MOD:
693       case IES_LPAREN:
694       case IES_RPAREN:
695       case IES_LBRAC:
696       case IES_RBRAC:
697       case IES_INTEGER:
698       case IES_REGISTER:
699       case IES_INIT:
700       case IES_OFFSET:
701         State = IES_MINUS;
702         // push minus operator if it is not a negate operator
703         if (CurrState == IES_REGISTER || CurrState == IES_RPAREN ||
704             CurrState == IES_INTEGER  || CurrState == IES_RBRAC  ||
705             CurrState == IES_OFFSET)
706           IC.pushOperator(IC_MINUS);
707         else if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
708           // We have negate operator for Scale: it's illegal
709           ErrMsg = "Scale can't be negative";
710           return true;
711         } else
712           IC.pushOperator(IC_NEG);
713         if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
714           // If we already have a BaseReg, then assume this is the IndexReg with
715           // no explicit scale.
716           if (!BaseReg) {
717             BaseReg = TmpReg;
718           } else {
719             if (IndexReg) {
720               ErrMsg = "BaseReg/IndexReg already set!";
721               return true;
722             }
723             IndexReg = TmpReg;
724             Scale = 0;
725           }
726         }
727         break;
728       }
729       PrevState = CurrState;
730       return false;
731     }
onNot()732     void onNot() {
733       IntelExprState CurrState = State;
734       switch (State) {
735       default:
736         State = IES_ERROR;
737         break;
738       case IES_OR:
739       case IES_XOR:
740       case IES_AND:
741       case IES_EQ:
742       case IES_NE:
743       case IES_LT:
744       case IES_LE:
745       case IES_GT:
746       case IES_GE:
747       case IES_LSHIFT:
748       case IES_RSHIFT:
749       case IES_PLUS:
750       case IES_MINUS:
751       case IES_NOT:
752       case IES_MULTIPLY:
753       case IES_DIVIDE:
754       case IES_MOD:
755       case IES_LPAREN:
756       case IES_LBRAC:
757       case IES_INIT:
758         State = IES_NOT;
759         IC.pushOperator(IC_NOT);
760         break;
761       }
762       PrevState = CurrState;
763     }
onRegister(unsigned Reg,StringRef & ErrMsg)764     bool onRegister(unsigned Reg, StringRef &ErrMsg) {
765       IntelExprState CurrState = State;
766       switch (State) {
767       default:
768         State = IES_ERROR;
769         break;
770       case IES_PLUS:
771       case IES_LPAREN:
772       case IES_LBRAC:
773         State = IES_REGISTER;
774         TmpReg = Reg;
775         IC.pushOperand(IC_REGISTER);
776         break;
777       case IES_MULTIPLY:
778         // Index Register - Scale * Register
779         if (PrevState == IES_INTEGER) {
780           if (IndexReg) {
781             ErrMsg = "BaseReg/IndexReg already set!";
782             return true;
783           }
784           State = IES_REGISTER;
785           IndexReg = Reg;
786           // Get the scale and replace the 'Scale * Register' with '0'.
787           Scale = IC.popOperand();
788           if (checkScale(Scale, ErrMsg))
789             return true;
790           IC.pushOperand(IC_IMM);
791           IC.popOperator();
792         } else {
793           State = IES_ERROR;
794         }
795         break;
796       }
797       PrevState = CurrState;
798       return false;
799     }
onIdentifierExpr(const MCExpr * SymRef,StringRef SymRefName,const InlineAsmIdentifierInfo & IDInfo,const AsmTypeInfo & Type,bool ParsingMSInlineAsm,StringRef & ErrMsg)800     bool onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName,
801                           const InlineAsmIdentifierInfo &IDInfo,
802                           const AsmTypeInfo &Type, bool ParsingMSInlineAsm,
803                           StringRef &ErrMsg) {
804       // InlineAsm: Treat an enum value as an integer
805       if (ParsingMSInlineAsm)
806         if (IDInfo.isKind(InlineAsmIdentifierInfo::IK_EnumVal))
807           return onInteger(IDInfo.Enum.EnumVal, ErrMsg);
808       // Treat a symbolic constant like an integer
809       if (auto *CE = dyn_cast<MCConstantExpr>(SymRef))
810         return onInteger(CE->getValue(), ErrMsg);
811       PrevState = State;
812       switch (State) {
813       default:
814         State = IES_ERROR;
815         break;
816       case IES_CAST:
817       case IES_PLUS:
818       case IES_MINUS:
819       case IES_NOT:
820       case IES_INIT:
821       case IES_LBRAC:
822       case IES_LPAREN:
823         if (setSymRef(SymRef, SymRefName, ErrMsg))
824           return true;
825         MemExpr = true;
826         State = IES_INTEGER;
827         IC.pushOperand(IC_IMM);
828         if (ParsingMSInlineAsm)
829           Info = IDInfo;
830         setTypeInfo(Type);
831         break;
832       }
833       return false;
834     }
onInteger(int64_t TmpInt,StringRef & ErrMsg)835     bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
836       IntelExprState CurrState = State;
837       switch (State) {
838       default:
839         State = IES_ERROR;
840         break;
841       case IES_PLUS:
842       case IES_MINUS:
843       case IES_NOT:
844       case IES_OR:
845       case IES_XOR:
846       case IES_AND:
847       case IES_EQ:
848       case IES_NE:
849       case IES_LT:
850       case IES_LE:
851       case IES_GT:
852       case IES_GE:
853       case IES_LSHIFT:
854       case IES_RSHIFT:
855       case IES_DIVIDE:
856       case IES_MOD:
857       case IES_MULTIPLY:
858       case IES_LPAREN:
859       case IES_INIT:
860       case IES_LBRAC:
861         State = IES_INTEGER;
862         if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
863           // Index Register - Register * Scale
864           if (IndexReg) {
865             ErrMsg = "BaseReg/IndexReg already set!";
866             return true;
867           }
868           IndexReg = TmpReg;
869           Scale = TmpInt;
870           if (checkScale(Scale, ErrMsg))
871             return true;
872           // Get the scale and replace the 'Register * Scale' with '0'.
873           IC.popOperator();
874         } else {
875           IC.pushOperand(IC_IMM, TmpInt);
876         }
877         break;
878       }
879       PrevState = CurrState;
880       return false;
881     }
onStar()882     void onStar() {
883       PrevState = State;
884       switch (State) {
885       default:
886         State = IES_ERROR;
887         break;
888       case IES_INTEGER:
889       case IES_REGISTER:
890       case IES_RPAREN:
891         State = IES_MULTIPLY;
892         IC.pushOperator(IC_MULTIPLY);
893         break;
894       }
895     }
onDivide()896     void onDivide() {
897       PrevState = State;
898       switch (State) {
899       default:
900         State = IES_ERROR;
901         break;
902       case IES_INTEGER:
903       case IES_RPAREN:
904         State = IES_DIVIDE;
905         IC.pushOperator(IC_DIVIDE);
906         break;
907       }
908     }
onMod()909     void onMod() {
910       PrevState = State;
911       switch (State) {
912       default:
913         State = IES_ERROR;
914         break;
915       case IES_INTEGER:
916       case IES_RPAREN:
917         State = IES_MOD;
918         IC.pushOperator(IC_MOD);
919         break;
920       }
921     }
onLBrac()922     bool onLBrac() {
923       if (BracCount)
924         return true;
925       PrevState = State;
926       switch (State) {
927       default:
928         State = IES_ERROR;
929         break;
930       case IES_RBRAC:
931       case IES_INTEGER:
932       case IES_RPAREN:
933         State = IES_PLUS;
934         IC.pushOperator(IC_PLUS);
935         CurType.Length = 1;
936         CurType.Size = CurType.ElementSize;
937         break;
938       case IES_INIT:
939       case IES_CAST:
940         assert(!BracCount && "BracCount should be zero on parsing's start");
941         State = IES_LBRAC;
942         break;
943       }
944       MemExpr = true;
945       BracCount++;
946       return false;
947     }
onRBrac()948     bool onRBrac() {
949       IntelExprState CurrState = State;
950       switch (State) {
951       default:
952         State = IES_ERROR;
953         break;
954       case IES_INTEGER:
955       case IES_OFFSET:
956       case IES_REGISTER:
957       case IES_RPAREN:
958         if (BracCount-- != 1)
959           return true;
960         State = IES_RBRAC;
961         if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
962           // If we already have a BaseReg, then assume this is the IndexReg with
963           // no explicit scale.
964           if (!BaseReg) {
965             BaseReg = TmpReg;
966           } else {
967             assert (!IndexReg && "BaseReg/IndexReg already set!");
968             IndexReg = TmpReg;
969             Scale = 0;
970           }
971         }
972         break;
973       }
974       PrevState = CurrState;
975       return false;
976     }
onLParen()977     void onLParen() {
978       IntelExprState CurrState = State;
979       switch (State) {
980       default:
981         State = IES_ERROR;
982         break;
983       case IES_PLUS:
984       case IES_MINUS:
985       case IES_NOT:
986       case IES_OR:
987       case IES_XOR:
988       case IES_AND:
989       case IES_EQ:
990       case IES_NE:
991       case IES_LT:
992       case IES_LE:
993       case IES_GT:
994       case IES_GE:
995       case IES_LSHIFT:
996       case IES_RSHIFT:
997       case IES_MULTIPLY:
998       case IES_DIVIDE:
999       case IES_MOD:
1000       case IES_LPAREN:
1001       case IES_INIT:
1002       case IES_LBRAC:
1003         State = IES_LPAREN;
1004         IC.pushOperator(IC_LPAREN);
1005         break;
1006       }
1007       PrevState = CurrState;
1008     }
onRParen()1009     void onRParen() {
1010       PrevState = State;
1011       switch (State) {
1012       default:
1013         State = IES_ERROR;
1014         break;
1015       case IES_INTEGER:
1016       case IES_OFFSET:
1017       case IES_REGISTER:
1018       case IES_RBRAC:
1019       case IES_RPAREN:
1020         State = IES_RPAREN;
1021         IC.pushOperator(IC_RPAREN);
1022         break;
1023       }
1024     }
onOffset(const MCExpr * Val,SMLoc OffsetLoc,StringRef ID,const InlineAsmIdentifierInfo & IDInfo,bool ParsingMSInlineAsm,StringRef & ErrMsg)1025     bool onOffset(const MCExpr *Val, SMLoc OffsetLoc, StringRef ID,
1026                   const InlineAsmIdentifierInfo &IDInfo,
1027                   bool ParsingMSInlineAsm, StringRef &ErrMsg) {
1028       PrevState = State;
1029       switch (State) {
1030       default:
1031         ErrMsg = "unexpected offset operator expression";
1032         return true;
1033       case IES_PLUS:
1034       case IES_INIT:
1035       case IES_LBRAC:
1036         if (setSymRef(Val, ID, ErrMsg))
1037           return true;
1038         OffsetOperator = true;
1039         OffsetOperatorLoc = OffsetLoc;
1040         State = IES_OFFSET;
1041         // As we cannot yet resolve the actual value (offset), we retain
1042         // the requested semantics by pushing a '0' to the operands stack
1043         IC.pushOperand(IC_IMM);
1044         if (ParsingMSInlineAsm) {
1045           Info = IDInfo;
1046         }
1047         break;
1048       }
1049       return false;
1050     }
onCast(AsmTypeInfo Info)1051     void onCast(AsmTypeInfo Info) {
1052       PrevState = State;
1053       switch (State) {
1054       default:
1055         State = IES_ERROR;
1056         break;
1057       case IES_LPAREN:
1058         setTypeInfo(Info);
1059         State = IES_CAST;
1060         break;
1061       }
1062     }
setTypeInfo(AsmTypeInfo Type)1063     void setTypeInfo(AsmTypeInfo Type) { CurType = Type; }
1064   };
1065 
Error(SMLoc L,const Twine & Msg,SMRange Range=None,bool MatchingInlineAsm=false)1066   bool Error(SMLoc L, const Twine &Msg, SMRange Range = None,
1067              bool MatchingInlineAsm = false) {
1068     MCAsmParser &Parser = getParser();
1069     if (MatchingInlineAsm) {
1070       if (!getLexer().isAtStartOfStatement())
1071         Parser.eatToEndOfStatement();
1072       return false;
1073     }
1074     return Parser.Error(L, Msg, Range);
1075   }
1076 
1077   bool MatchRegisterByName(unsigned &RegNo, StringRef RegName, SMLoc StartLoc,
1078                            SMLoc EndLoc);
1079   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1080                      bool RestoreOnFailure);
1081 
1082   std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
1083   std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
1084   bool IsSIReg(unsigned Reg);
1085   unsigned GetSIDIForRegClass(unsigned RegClassID, unsigned Reg, bool IsSIReg);
1086   void
1087   AddDefaultSrcDestOperands(OperandVector &Operands,
1088                             std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
1089                             std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst);
1090   bool VerifyAndAdjustOperands(OperandVector &OrigOperands,
1091                                OperandVector &FinalOperands);
1092   bool ParseOperand(OperandVector &Operands);
1093   bool ParseATTOperand(OperandVector &Operands);
1094   bool ParseIntelOperand(OperandVector &Operands);
1095   bool ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID,
1096                                 InlineAsmIdentifierInfo &Info, SMLoc &End);
1097   bool ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End);
1098   unsigned IdentifyIntelInlineAsmOperator(StringRef Name);
1099   unsigned ParseIntelInlineAsmOperator(unsigned OpKind);
1100   unsigned IdentifyMasmOperator(StringRef Name);
1101   bool ParseMasmOperator(unsigned OpKind, int64_t &Val);
1102   bool ParseRoundingModeOp(SMLoc Start, OperandVector &Operands);
1103   bool ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM,
1104                                bool &ParseError, SMLoc &End);
1105   bool ParseMasmNamedOperator(StringRef Name, IntelExprStateMachine &SM,
1106                               bool &ParseError, SMLoc &End);
1107   void RewriteIntelExpression(IntelExprStateMachine &SM, SMLoc Start,
1108                               SMLoc End);
1109   bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
1110   bool ParseIntelInlineAsmIdentifier(const MCExpr *&Val, StringRef &Identifier,
1111                                      InlineAsmIdentifierInfo &Info,
1112                                      bool IsUnevaluatedOperand, SMLoc &End,
1113                                      bool IsParsingOffsetOperator = false);
1114 
1115   bool ParseMemOperand(unsigned SegReg, const MCExpr *Disp, SMLoc StartLoc,
1116                        SMLoc EndLoc, OperandVector &Operands);
1117 
1118   X86::CondCode ParseConditionCode(StringRef CCode);
1119 
1120   bool ParseIntelMemoryOperandSize(unsigned &Size);
1121   bool CreateMemForMSInlineAsm(unsigned SegReg, const MCExpr *Disp,
1122                                unsigned BaseReg, unsigned IndexReg,
1123                                unsigned Scale, SMLoc Start, SMLoc End,
1124                                unsigned Size, StringRef Identifier,
1125                                const InlineAsmIdentifierInfo &Info,
1126                                OperandVector &Operands);
1127 
1128   bool parseDirectiveArch();
1129   bool parseDirectiveNops(SMLoc L);
1130   bool parseDirectiveEven(SMLoc L);
1131   bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
1132 
1133   /// CodeView FPO data directives.
1134   bool parseDirectiveFPOProc(SMLoc L);
1135   bool parseDirectiveFPOSetFrame(SMLoc L);
1136   bool parseDirectiveFPOPushReg(SMLoc L);
1137   bool parseDirectiveFPOStackAlloc(SMLoc L);
1138   bool parseDirectiveFPOStackAlign(SMLoc L);
1139   bool parseDirectiveFPOEndPrologue(SMLoc L);
1140   bool parseDirectiveFPOEndProc(SMLoc L);
1141 
1142   /// SEH directives.
1143   bool parseSEHRegisterNumber(unsigned RegClassID, unsigned &RegNo);
1144   bool parseDirectiveSEHPushReg(SMLoc);
1145   bool parseDirectiveSEHSetFrame(SMLoc);
1146   bool parseDirectiveSEHSaveReg(SMLoc);
1147   bool parseDirectiveSEHSaveXMM(SMLoc);
1148   bool parseDirectiveSEHPushFrame(SMLoc);
1149 
1150   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1151 
1152   bool validateInstruction(MCInst &Inst, const OperandVector &Ops);
1153   bool processInstruction(MCInst &Inst, const OperandVector &Ops);
1154 
1155   // Load Value Injection (LVI) Mitigations for machine code
1156   void emitWarningForSpecialLVIInstruction(SMLoc Loc);
1157   void applyLVICFIMitigation(MCInst &Inst, MCStreamer &Out);
1158   void applyLVILoadHardeningMitigation(MCInst &Inst, MCStreamer &Out);
1159 
1160   /// Wrapper around MCStreamer::emitInstruction(). Possibly adds
1161   /// instrumentation around Inst.
1162   void emitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
1163 
1164   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1165                                OperandVector &Operands, MCStreamer &Out,
1166                                uint64_t &ErrorInfo,
1167                                bool MatchingInlineAsm) override;
1168 
1169   void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
1170                          MCStreamer &Out, bool MatchingInlineAsm);
1171 
1172   bool ErrorMissingFeature(SMLoc IDLoc, const FeatureBitset &MissingFeatures,
1173                            bool MatchingInlineAsm);
1174 
1175   bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
1176                                   OperandVector &Operands, MCStreamer &Out,
1177                                   uint64_t &ErrorInfo,
1178                                   bool MatchingInlineAsm);
1179 
1180   bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
1181                                     OperandVector &Operands, MCStreamer &Out,
1182                                     uint64_t &ErrorInfo,
1183                                     bool MatchingInlineAsm);
1184 
1185   bool OmitRegisterFromClobberLists(unsigned RegNo) override;
1186 
1187   /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
1188   /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
1189   /// return false if no parsing errors occurred, true otherwise.
1190   bool HandleAVX512Operand(OperandVector &Operands);
1191 
1192   bool ParseZ(std::unique_ptr<X86Operand> &Z, const SMLoc &StartLoc);
1193 
is64BitMode() const1194   bool is64BitMode() const {
1195     // FIXME: Can tablegen auto-generate this?
1196     return getSTI().getFeatureBits()[X86::Mode64Bit];
1197   }
is32BitMode() const1198   bool is32BitMode() const {
1199     // FIXME: Can tablegen auto-generate this?
1200     return getSTI().getFeatureBits()[X86::Mode32Bit];
1201   }
is16BitMode() const1202   bool is16BitMode() const {
1203     // FIXME: Can tablegen auto-generate this?
1204     return getSTI().getFeatureBits()[X86::Mode16Bit];
1205   }
SwitchMode(unsigned mode)1206   void SwitchMode(unsigned mode) {
1207     MCSubtargetInfo &STI = copySTI();
1208     FeatureBitset AllModes({X86::Mode64Bit, X86::Mode32Bit, X86::Mode16Bit});
1209     FeatureBitset OldMode = STI.getFeatureBits() & AllModes;
1210     FeatureBitset FB = ComputeAvailableFeatures(
1211       STI.ToggleFeature(OldMode.flip(mode)));
1212     setAvailableFeatures(FB);
1213 
1214     assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes));
1215   }
1216 
getPointerWidth()1217   unsigned getPointerWidth() {
1218     if (is16BitMode()) return 16;
1219     if (is32BitMode()) return 32;
1220     if (is64BitMode()) return 64;
1221     llvm_unreachable("invalid mode");
1222   }
1223 
isParsingIntelSyntax()1224   bool isParsingIntelSyntax() {
1225     return getParser().getAssemblerDialect();
1226   }
1227 
1228   /// @name Auto-generated Matcher Functions
1229   /// {
1230 
1231 #define GET_ASSEMBLER_HEADER
1232 #include "X86GenAsmMatcher.inc"
1233 
1234   /// }
1235 
1236 public:
1237   enum X86MatchResultTy {
1238     Match_Unsupported = FIRST_TARGET_MATCH_RESULT_TY,
1239 #define GET_OPERAND_DIAGNOSTIC_TYPES
1240 #include "X86GenAsmMatcher.inc"
1241   };
1242 
X86AsmParser(const MCSubtargetInfo & sti,MCAsmParser & Parser,const MCInstrInfo & mii,const MCTargetOptions & Options)1243   X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser,
1244                const MCInstrInfo &mii, const MCTargetOptions &Options)
1245       : MCTargetAsmParser(Options, sti, mii),  InstInfo(nullptr),
1246         Code16GCC(false) {
1247 
1248     Parser.addAliasForDirective(".word", ".2byte");
1249 
1250     // Initialize the set of available features.
1251     setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
1252   }
1253 
1254   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1255   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1256                                         SMLoc &EndLoc) override;
1257 
1258   bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1259 
1260   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1261                         SMLoc NameLoc, OperandVector &Operands) override;
1262 
1263   bool ParseDirective(AsmToken DirectiveID) override;
1264 };
1265 } // end anonymous namespace
1266 
1267 /// @name Auto-generated Match Functions
1268 /// {
1269 
1270 static unsigned MatchRegisterName(StringRef Name);
1271 
1272 /// }
1273 
CheckBaseRegAndIndexRegAndScale(unsigned BaseReg,unsigned IndexReg,unsigned Scale,bool Is64BitMode,StringRef & ErrMsg)1274 static bool CheckBaseRegAndIndexRegAndScale(unsigned BaseReg, unsigned IndexReg,
1275                                             unsigned Scale, bool Is64BitMode,
1276                                             StringRef &ErrMsg) {
1277   // If we have both a base register and an index register make sure they are
1278   // both 64-bit or 32-bit registers.
1279   // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
1280 
1281   if (BaseReg != 0 &&
1282       !(BaseReg == X86::RIP || BaseReg == X86::EIP ||
1283         X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) ||
1284         X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) ||
1285         X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg))) {
1286     ErrMsg = "invalid base+index expression";
1287     return true;
1288   }
1289 
1290   if (IndexReg != 0 &&
1291       !(IndexReg == X86::EIZ || IndexReg == X86::RIZ ||
1292         X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1293         X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1294         X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) ||
1295         X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) ||
1296         X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) ||
1297         X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg))) {
1298     ErrMsg = "invalid base+index expression";
1299     return true;
1300   }
1301 
1302   if (((BaseReg == X86::RIP || BaseReg == X86::EIP) && IndexReg != 0) ||
1303       IndexReg == X86::EIP || IndexReg == X86::RIP ||
1304       IndexReg == X86::ESP || IndexReg == X86::RSP) {
1305     ErrMsg = "invalid base+index expression";
1306     return true;
1307   }
1308 
1309   // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
1310   // and then only in non-64-bit modes.
1311   if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1312       (Is64BitMode || (BaseReg != X86::BX && BaseReg != X86::BP &&
1313                        BaseReg != X86::SI && BaseReg != X86::DI))) {
1314     ErrMsg = "invalid 16-bit base register";
1315     return true;
1316   }
1317 
1318   if (BaseReg == 0 &&
1319       X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
1320     ErrMsg = "16-bit memory operand may not include only index register";
1321     return true;
1322   }
1323 
1324   if (BaseReg != 0 && IndexReg != 0) {
1325     if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
1326         (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1327          X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1328          IndexReg == X86::EIZ)) {
1329       ErrMsg = "base register is 64-bit, but index register is not";
1330       return true;
1331     }
1332     if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
1333         (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1334          X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) ||
1335          IndexReg == X86::RIZ)) {
1336       ErrMsg = "base register is 32-bit, but index register is not";
1337       return true;
1338     }
1339     if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
1340       if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1341           X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
1342         ErrMsg = "base register is 16-bit, but index register is not";
1343         return true;
1344       }
1345       if ((BaseReg != X86::BX && BaseReg != X86::BP) ||
1346           (IndexReg != X86::SI && IndexReg != X86::DI)) {
1347         ErrMsg = "invalid 16-bit base/index register combination";
1348         return true;
1349       }
1350     }
1351   }
1352 
1353   // RIP/EIP-relative addressing is only supported in 64-bit mode.
1354   if (!Is64BitMode && BaseReg != 0 &&
1355       (BaseReg == X86::RIP || BaseReg == X86::EIP)) {
1356     ErrMsg = "IP-relative addressing requires 64-bit mode";
1357     return true;
1358   }
1359 
1360   return checkScale(Scale, ErrMsg);
1361 }
1362 
MatchRegisterByName(unsigned & RegNo,StringRef RegName,SMLoc StartLoc,SMLoc EndLoc)1363 bool X86AsmParser::MatchRegisterByName(unsigned &RegNo, StringRef RegName,
1364                                        SMLoc StartLoc, SMLoc EndLoc) {
1365   // If we encounter a %, ignore it. This code handles registers with and
1366   // without the prefix, unprefixed registers can occur in cfi directives.
1367   RegName.consume_front("%");
1368 
1369   RegNo = MatchRegisterName(RegName);
1370 
1371   // If the match failed, try the register name as lowercase.
1372   if (RegNo == 0)
1373     RegNo = MatchRegisterName(RegName.lower());
1374 
1375   // The "flags" and "mxcsr" registers cannot be referenced directly.
1376   // Treat it as an identifier instead.
1377   if (isParsingMSInlineAsm() && isParsingIntelSyntax() &&
1378       (RegNo == X86::EFLAGS || RegNo == X86::MXCSR))
1379     RegNo = 0;
1380 
1381   if (!is64BitMode()) {
1382     // FIXME: This should be done using Requires<Not64BitMode> and
1383     // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
1384     // checked.
1385     if (RegNo == X86::RIZ || RegNo == X86::RIP ||
1386         X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
1387         X86II::isX86_64NonExtLowByteReg(RegNo) ||
1388         X86II::isX86_64ExtendedReg(RegNo)) {
1389       return Error(StartLoc,
1390                    "register %" + RegName + " is only available in 64-bit mode",
1391                    SMRange(StartLoc, EndLoc));
1392     }
1393   }
1394 
1395   // If this is "db[0-15]", match it as an alias
1396   // for dr[0-15].
1397   if (RegNo == 0 && RegName.startswith("db")) {
1398     if (RegName.size() == 3) {
1399       switch (RegName[2]) {
1400       case '0':
1401         RegNo = X86::DR0;
1402         break;
1403       case '1':
1404         RegNo = X86::DR1;
1405         break;
1406       case '2':
1407         RegNo = X86::DR2;
1408         break;
1409       case '3':
1410         RegNo = X86::DR3;
1411         break;
1412       case '4':
1413         RegNo = X86::DR4;
1414         break;
1415       case '5':
1416         RegNo = X86::DR5;
1417         break;
1418       case '6':
1419         RegNo = X86::DR6;
1420         break;
1421       case '7':
1422         RegNo = X86::DR7;
1423         break;
1424       case '8':
1425         RegNo = X86::DR8;
1426         break;
1427       case '9':
1428         RegNo = X86::DR9;
1429         break;
1430       }
1431     } else if (RegName.size() == 4 && RegName[2] == '1') {
1432       switch (RegName[3]) {
1433       case '0':
1434         RegNo = X86::DR10;
1435         break;
1436       case '1':
1437         RegNo = X86::DR11;
1438         break;
1439       case '2':
1440         RegNo = X86::DR12;
1441         break;
1442       case '3':
1443         RegNo = X86::DR13;
1444         break;
1445       case '4':
1446         RegNo = X86::DR14;
1447         break;
1448       case '5':
1449         RegNo = X86::DR15;
1450         break;
1451       }
1452     }
1453   }
1454 
1455   if (RegNo == 0) {
1456     if (isParsingIntelSyntax())
1457       return true;
1458     return Error(StartLoc, "invalid register name", SMRange(StartLoc, EndLoc));
1459   }
1460   return false;
1461 }
1462 
ParseRegister(unsigned & RegNo,SMLoc & StartLoc,SMLoc & EndLoc,bool RestoreOnFailure)1463 bool X86AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1464                                  SMLoc &EndLoc, bool RestoreOnFailure) {
1465   MCAsmParser &Parser = getParser();
1466   MCAsmLexer &Lexer = getLexer();
1467   RegNo = 0;
1468 
1469   SmallVector<AsmToken, 5> Tokens;
1470   auto OnFailure = [RestoreOnFailure, &Lexer, &Tokens]() {
1471     if (RestoreOnFailure) {
1472       while (!Tokens.empty()) {
1473         Lexer.UnLex(Tokens.pop_back_val());
1474       }
1475     }
1476   };
1477 
1478   const AsmToken &PercentTok = Parser.getTok();
1479   StartLoc = PercentTok.getLoc();
1480 
1481   // If we encounter a %, ignore it. This code handles registers with and
1482   // without the prefix, unprefixed registers can occur in cfi directives.
1483   if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent)) {
1484     Tokens.push_back(PercentTok);
1485     Parser.Lex(); // Eat percent token.
1486   }
1487 
1488   const AsmToken &Tok = Parser.getTok();
1489   EndLoc = Tok.getEndLoc();
1490 
1491   if (Tok.isNot(AsmToken::Identifier)) {
1492     OnFailure();
1493     if (isParsingIntelSyntax()) return true;
1494     return Error(StartLoc, "invalid register name",
1495                  SMRange(StartLoc, EndLoc));
1496   }
1497 
1498   if (MatchRegisterByName(RegNo, Tok.getString(), StartLoc, EndLoc)) {
1499     OnFailure();
1500     return true;
1501   }
1502 
1503   // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
1504   if (RegNo == X86::ST0) {
1505     Tokens.push_back(Tok);
1506     Parser.Lex(); // Eat 'st'
1507 
1508     // Check to see if we have '(4)' after %st.
1509     if (Lexer.isNot(AsmToken::LParen))
1510       return false;
1511     // Lex the paren.
1512     Tokens.push_back(Parser.getTok());
1513     Parser.Lex();
1514 
1515     const AsmToken &IntTok = Parser.getTok();
1516     if (IntTok.isNot(AsmToken::Integer)) {
1517       OnFailure();
1518       return Error(IntTok.getLoc(), "expected stack index");
1519     }
1520     switch (IntTok.getIntVal()) {
1521     case 0: RegNo = X86::ST0; break;
1522     case 1: RegNo = X86::ST1; break;
1523     case 2: RegNo = X86::ST2; break;
1524     case 3: RegNo = X86::ST3; break;
1525     case 4: RegNo = X86::ST4; break;
1526     case 5: RegNo = X86::ST5; break;
1527     case 6: RegNo = X86::ST6; break;
1528     case 7: RegNo = X86::ST7; break;
1529     default:
1530       OnFailure();
1531       return Error(IntTok.getLoc(), "invalid stack index");
1532     }
1533 
1534     // Lex IntTok
1535     Tokens.push_back(IntTok);
1536     Parser.Lex();
1537     if (Lexer.isNot(AsmToken::RParen)) {
1538       OnFailure();
1539       return Error(Parser.getTok().getLoc(), "expected ')'");
1540     }
1541 
1542     EndLoc = Parser.getTok().getEndLoc();
1543     Parser.Lex(); // Eat ')'
1544     return false;
1545   }
1546 
1547   EndLoc = Parser.getTok().getEndLoc();
1548 
1549   if (RegNo == 0) {
1550     OnFailure();
1551     if (isParsingIntelSyntax()) return true;
1552     return Error(StartLoc, "invalid register name",
1553                  SMRange(StartLoc, EndLoc));
1554   }
1555 
1556   Parser.Lex(); // Eat identifier token.
1557   return false;
1558 }
1559 
ParseRegister(unsigned & RegNo,SMLoc & StartLoc,SMLoc & EndLoc)1560 bool X86AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1561                                  SMLoc &EndLoc) {
1562   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
1563 }
1564 
tryParseRegister(unsigned & RegNo,SMLoc & StartLoc,SMLoc & EndLoc)1565 OperandMatchResultTy X86AsmParser::tryParseRegister(unsigned &RegNo,
1566                                                     SMLoc &StartLoc,
1567                                                     SMLoc &EndLoc) {
1568   bool Result =
1569       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
1570   bool PendingErrors = getParser().hasPendingError();
1571   getParser().clearPendingErrors();
1572   if (PendingErrors)
1573     return MatchOperand_ParseFail;
1574   if (Result)
1575     return MatchOperand_NoMatch;
1576   return MatchOperand_Success;
1577 }
1578 
DefaultMemSIOperand(SMLoc Loc)1579 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
1580   bool Parse32 = is32BitMode() || Code16GCC;
1581   unsigned Basereg = is64BitMode() ? X86::RSI : (Parse32 ? X86::ESI : X86::SI);
1582   const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1583   return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1584                                /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1585                                Loc, Loc, 0);
1586 }
1587 
DefaultMemDIOperand(SMLoc Loc)1588 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
1589   bool Parse32 = is32BitMode() || Code16GCC;
1590   unsigned Basereg = is64BitMode() ? X86::RDI : (Parse32 ? X86::EDI : X86::DI);
1591   const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1592   return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1593                                /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1594                                Loc, Loc, 0);
1595 }
1596 
IsSIReg(unsigned Reg)1597 bool X86AsmParser::IsSIReg(unsigned Reg) {
1598   switch (Reg) {
1599   default: llvm_unreachable("Only (R|E)SI and (R|E)DI are expected!");
1600   case X86::RSI:
1601   case X86::ESI:
1602   case X86::SI:
1603     return true;
1604   case X86::RDI:
1605   case X86::EDI:
1606   case X86::DI:
1607     return false;
1608   }
1609 }
1610 
GetSIDIForRegClass(unsigned RegClassID,unsigned Reg,bool IsSIReg)1611 unsigned X86AsmParser::GetSIDIForRegClass(unsigned RegClassID, unsigned Reg,
1612                                           bool IsSIReg) {
1613   switch (RegClassID) {
1614   default: llvm_unreachable("Unexpected register class");
1615   case X86::GR64RegClassID:
1616     return IsSIReg ? X86::RSI : X86::RDI;
1617   case X86::GR32RegClassID:
1618     return IsSIReg ? X86::ESI : X86::EDI;
1619   case X86::GR16RegClassID:
1620     return IsSIReg ? X86::SI : X86::DI;
1621   }
1622 }
1623 
AddDefaultSrcDestOperands(OperandVector & Operands,std::unique_ptr<llvm::MCParsedAsmOperand> && Src,std::unique_ptr<llvm::MCParsedAsmOperand> && Dst)1624 void X86AsmParser::AddDefaultSrcDestOperands(
1625     OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
1626     std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst) {
1627   if (isParsingIntelSyntax()) {
1628     Operands.push_back(std::move(Dst));
1629     Operands.push_back(std::move(Src));
1630   }
1631   else {
1632     Operands.push_back(std::move(Src));
1633     Operands.push_back(std::move(Dst));
1634   }
1635 }
1636 
VerifyAndAdjustOperands(OperandVector & OrigOperands,OperandVector & FinalOperands)1637 bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands,
1638                                            OperandVector &FinalOperands) {
1639 
1640   if (OrigOperands.size() > 1) {
1641     // Check if sizes match, OrigOperands also contains the instruction name
1642     assert(OrigOperands.size() == FinalOperands.size() + 1 &&
1643            "Operand size mismatch");
1644 
1645     SmallVector<std::pair<SMLoc, std::string>, 2> Warnings;
1646     // Verify types match
1647     int RegClassID = -1;
1648     for (unsigned int i = 0; i < FinalOperands.size(); ++i) {
1649       X86Operand &OrigOp = static_cast<X86Operand &>(*OrigOperands[i + 1]);
1650       X86Operand &FinalOp = static_cast<X86Operand &>(*FinalOperands[i]);
1651 
1652       if (FinalOp.isReg() &&
1653           (!OrigOp.isReg() || FinalOp.getReg() != OrigOp.getReg()))
1654         // Return false and let a normal complaint about bogus operands happen
1655         return false;
1656 
1657       if (FinalOp.isMem()) {
1658 
1659         if (!OrigOp.isMem())
1660           // Return false and let a normal complaint about bogus operands happen
1661           return false;
1662 
1663         unsigned OrigReg = OrigOp.Mem.BaseReg;
1664         unsigned FinalReg = FinalOp.Mem.BaseReg;
1665 
1666         // If we've already encounterd a register class, make sure all register
1667         // bases are of the same register class
1668         if (RegClassID != -1 &&
1669             !X86MCRegisterClasses[RegClassID].contains(OrigReg)) {
1670           return Error(OrigOp.getStartLoc(),
1671                        "mismatching source and destination index registers");
1672         }
1673 
1674         if (X86MCRegisterClasses[X86::GR64RegClassID].contains(OrigReg))
1675           RegClassID = X86::GR64RegClassID;
1676         else if (X86MCRegisterClasses[X86::GR32RegClassID].contains(OrigReg))
1677           RegClassID = X86::GR32RegClassID;
1678         else if (X86MCRegisterClasses[X86::GR16RegClassID].contains(OrigReg))
1679           RegClassID = X86::GR16RegClassID;
1680         else
1681           // Unexpected register class type
1682           // Return false and let a normal complaint about bogus operands happen
1683           return false;
1684 
1685         bool IsSI = IsSIReg(FinalReg);
1686         FinalReg = GetSIDIForRegClass(RegClassID, FinalReg, IsSI);
1687 
1688         if (FinalReg != OrigReg) {
1689           std::string RegName = IsSI ? "ES:(R|E)SI" : "ES:(R|E)DI";
1690           Warnings.push_back(std::make_pair(
1691               OrigOp.getStartLoc(),
1692               "memory operand is only for determining the size, " + RegName +
1693                   " will be used for the location"));
1694         }
1695 
1696         FinalOp.Mem.Size = OrigOp.Mem.Size;
1697         FinalOp.Mem.SegReg = OrigOp.Mem.SegReg;
1698         FinalOp.Mem.BaseReg = FinalReg;
1699       }
1700     }
1701 
1702     // Produce warnings only if all the operands passed the adjustment - prevent
1703     // legal cases like "movsd (%rax), %xmm0" mistakenly produce warnings
1704     for (auto &WarningMsg : Warnings) {
1705       Warning(WarningMsg.first, WarningMsg.second);
1706     }
1707 
1708     // Remove old operands
1709     for (unsigned int i = 0; i < FinalOperands.size(); ++i)
1710       OrigOperands.pop_back();
1711   }
1712   // OrigOperands.append(FinalOperands.begin(), FinalOperands.end());
1713   for (unsigned int i = 0; i < FinalOperands.size(); ++i)
1714     OrigOperands.push_back(std::move(FinalOperands[i]));
1715 
1716   return false;
1717 }
1718 
ParseOperand(OperandVector & Operands)1719 bool X86AsmParser::ParseOperand(OperandVector &Operands) {
1720   if (isParsingIntelSyntax())
1721     return ParseIntelOperand(Operands);
1722 
1723   return ParseATTOperand(Operands);
1724 }
1725 
CreateMemForMSInlineAsm(unsigned SegReg,const MCExpr * Disp,unsigned BaseReg,unsigned IndexReg,unsigned Scale,SMLoc Start,SMLoc End,unsigned Size,StringRef Identifier,const InlineAsmIdentifierInfo & Info,OperandVector & Operands)1726 bool X86AsmParser::CreateMemForMSInlineAsm(
1727     unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
1728     unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
1729     const InlineAsmIdentifierInfo &Info, OperandVector &Operands) {
1730   // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
1731   // some other label reference.
1732   if (Info.isKind(InlineAsmIdentifierInfo::IK_Label)) {
1733     // Insert an explicit size if the user didn't have one.
1734     if (!Size) {
1735       Size = getPointerWidth();
1736       InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start,
1737                                           /*Len=*/0, Size);
1738     }
1739     // Create an absolute memory reference in order to match against
1740     // instructions taking a PC relative operand.
1741     Operands.push_back(X86Operand::CreateMem(getPointerWidth(), Disp, Start,
1742                                              End, Size, Identifier,
1743                                              Info.Label.Decl));
1744     return false;
1745   }
1746   // We either have a direct symbol reference, or an offset from a symbol.  The
1747   // parser always puts the symbol on the LHS, so look there for size
1748   // calculation purposes.
1749   unsigned FrontendSize = 0;
1750   void *Decl = nullptr;
1751   bool IsGlobalLV = false;
1752   if (Info.isKind(InlineAsmIdentifierInfo::IK_Var)) {
1753     // Size is in terms of bits in this context.
1754     FrontendSize = Info.Var.Type * 8;
1755     Decl = Info.Var.Decl;
1756     IsGlobalLV = Info.Var.IsGlobalLV;
1757   }
1758   // It is widely common for MS InlineAsm to use a global variable and one/two
1759   // registers in a mmory expression, and though unaccessible via rip/eip.
1760   if (IsGlobalLV && (BaseReg || IndexReg)) {
1761     Operands.push_back(
1762         X86Operand::CreateMem(getPointerWidth(), Disp, Start, End));
1763     return false;
1764   }
1765   // Otherwise, we set the base register to a non-zero value
1766   // if we don't know the actual value at this time.  This is necessary to
1767   // get the matching correct in some cases.
1768   BaseReg = BaseReg ? BaseReg : 1;
1769   Operands.push_back(X86Operand::CreateMem(
1770       getPointerWidth(), SegReg, Disp, BaseReg, IndexReg, Scale, Start, End,
1771       Size,
1772       /*DefaultBaseReg=*/X86::RIP, Identifier, Decl, FrontendSize));
1773   return false;
1774 }
1775 
1776 // Some binary bitwise operators have a named synonymous
1777 // Query a candidate string for being such a named operator
1778 // and if so - invoke the appropriate handler
ParseIntelNamedOperator(StringRef Name,IntelExprStateMachine & SM,bool & ParseError,SMLoc & End)1779 bool X86AsmParser::ParseIntelNamedOperator(StringRef Name,
1780                                            IntelExprStateMachine &SM,
1781                                            bool &ParseError, SMLoc &End) {
1782   // A named operator should be either lower or upper case, but not a mix...
1783   // except in MASM, which uses full case-insensitivity.
1784   if (Name.compare(Name.lower()) && Name.compare(Name.upper()) &&
1785       !getParser().isParsingMasm())
1786     return false;
1787   if (Name.equals_lower("not")) {
1788     SM.onNot();
1789   } else if (Name.equals_lower("or")) {
1790     SM.onOr();
1791   } else if (Name.equals_lower("shl")) {
1792     SM.onLShift();
1793   } else if (Name.equals_lower("shr")) {
1794     SM.onRShift();
1795   } else if (Name.equals_lower("xor")) {
1796     SM.onXor();
1797   } else if (Name.equals_lower("and")) {
1798     SM.onAnd();
1799   } else if (Name.equals_lower("mod")) {
1800     SM.onMod();
1801   } else if (Name.equals_lower("offset")) {
1802     SMLoc OffsetLoc = getTok().getLoc();
1803     const MCExpr *Val = nullptr;
1804     StringRef ID;
1805     InlineAsmIdentifierInfo Info;
1806     ParseError = ParseIntelOffsetOperator(Val, ID, Info, End);
1807     if (ParseError)
1808       return true;
1809     StringRef ErrMsg;
1810     ParseError =
1811         SM.onOffset(Val, OffsetLoc, ID, Info, isParsingMSInlineAsm(), ErrMsg);
1812     if (ParseError)
1813       return Error(SMLoc::getFromPointer(Name.data()), ErrMsg);
1814   } else {
1815     return false;
1816   }
1817   if (!Name.equals_lower("offset"))
1818     End = consumeToken();
1819   return true;
1820 }
ParseMasmNamedOperator(StringRef Name,IntelExprStateMachine & SM,bool & ParseError,SMLoc & End)1821 bool X86AsmParser::ParseMasmNamedOperator(StringRef Name,
1822                                           IntelExprStateMachine &SM,
1823                                           bool &ParseError, SMLoc &End) {
1824   if (Name.equals_lower("eq")) {
1825     SM.onEq();
1826   } else if (Name.equals_lower("ne")) {
1827     SM.onNE();
1828   } else if (Name.equals_lower("lt")) {
1829     SM.onLT();
1830   } else if (Name.equals_lower("le")) {
1831     SM.onLE();
1832   } else if (Name.equals_lower("gt")) {
1833     SM.onGT();
1834   } else if (Name.equals_lower("ge")) {
1835     SM.onGE();
1836   } else {
1837     return false;
1838   }
1839   End = consumeToken();
1840   return true;
1841 }
1842 
ParseIntelExpression(IntelExprStateMachine & SM,SMLoc & End)1843 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1844   MCAsmParser &Parser = getParser();
1845   const AsmToken &Tok = Parser.getTok();
1846   StringRef ErrMsg;
1847 
1848   AsmToken::TokenKind PrevTK = AsmToken::Error;
1849   bool Done = false;
1850   while (!Done) {
1851     bool UpdateLocLex = true;
1852     AsmToken::TokenKind TK = getLexer().getKind();
1853 
1854     switch (TK) {
1855     default:
1856       if ((Done = SM.isValidEndState()))
1857         break;
1858       return Error(Tok.getLoc(), "unknown token in expression");
1859     case AsmToken::Error:
1860       return Error(getLexer().getErrLoc(), getLexer().getErr());
1861       break;
1862     case AsmToken::EndOfStatement:
1863       Done = true;
1864       break;
1865     case AsmToken::Real:
1866       // DotOperator: [ebx].0
1867       UpdateLocLex = false;
1868       if (ParseIntelDotOperator(SM, End))
1869         return true;
1870       break;
1871     case AsmToken::Dot:
1872       if (!Parser.isParsingMasm()) {
1873         if ((Done = SM.isValidEndState()))
1874           break;
1875         return Error(Tok.getLoc(), "unknown token in expression");
1876       }
1877       // MASM allows spaces around the dot operator (e.g., "var . x")
1878       Lex();
1879       UpdateLocLex = false;
1880       if (ParseIntelDotOperator(SM, End))
1881         return true;
1882       break;
1883     case AsmToken::Dollar:
1884       if (!Parser.isParsingMasm()) {
1885         if ((Done = SM.isValidEndState()))
1886           break;
1887         return Error(Tok.getLoc(), "unknown token in expression");
1888       }
1889       LLVM_FALLTHROUGH;
1890     case AsmToken::String: {
1891       if (Parser.isParsingMasm()) {
1892         // MASM parsers handle strings in expressions as constants.
1893         SMLoc ValueLoc = Tok.getLoc();
1894         int64_t Res;
1895         const MCExpr *Val;
1896         if (Parser.parsePrimaryExpr(Val, End, nullptr))
1897           return true;
1898         UpdateLocLex = false;
1899         if (!Val->evaluateAsAbsolute(Res, getStreamer().getAssemblerPtr()))
1900           return Error(ValueLoc, "expected absolute value");
1901         if (SM.onInteger(Res, ErrMsg))
1902           return Error(ValueLoc, ErrMsg);
1903         break;
1904       }
1905       LLVM_FALLTHROUGH;
1906     }
1907     case AsmToken::At:
1908     case AsmToken::Identifier: {
1909       SMLoc IdentLoc = Tok.getLoc();
1910       StringRef Identifier = Tok.getString();
1911       UpdateLocLex = false;
1912       if (Parser.isParsingMasm()) {
1913         size_t DotOffset = Identifier.find_first_of('.');
1914         if (DotOffset != StringRef::npos) {
1915           consumeToken();
1916           StringRef LHS = Identifier.slice(0, DotOffset);
1917           StringRef Dot = Identifier.slice(DotOffset, DotOffset + 1);
1918           StringRef RHS = Identifier.slice(DotOffset + 1, StringRef::npos);
1919           if (!RHS.empty()) {
1920             getLexer().UnLex(AsmToken(AsmToken::Identifier, RHS));
1921           }
1922           getLexer().UnLex(AsmToken(AsmToken::Dot, Dot));
1923           if (!LHS.empty()) {
1924             getLexer().UnLex(AsmToken(AsmToken::Identifier, LHS));
1925           }
1926           break;
1927         }
1928       }
1929       // (MASM only) <TYPE> PTR operator
1930       if (Parser.isParsingMasm()) {
1931         const AsmToken &NextTok = getLexer().peekTok();
1932         if (NextTok.is(AsmToken::Identifier) &&
1933             NextTok.getIdentifier().equals_lower("ptr")) {
1934           AsmTypeInfo Info;
1935           if (Parser.lookUpType(Identifier, Info))
1936             return Error(Tok.getLoc(), "unknown type");
1937           SM.onCast(Info);
1938           // Eat type and PTR.
1939           consumeToken();
1940           End = consumeToken();
1941           break;
1942         }
1943       }
1944       // Register, or (MASM only) <register>.<field>
1945       unsigned Reg;
1946       if (Tok.is(AsmToken::Identifier)) {
1947         if (!ParseRegister(Reg, IdentLoc, End, /*RestoreOnFailure=*/true)) {
1948           if (SM.onRegister(Reg, ErrMsg))
1949             return Error(IdentLoc, ErrMsg);
1950           break;
1951         }
1952         if (Parser.isParsingMasm()) {
1953           const std::pair<StringRef, StringRef> IDField =
1954               Tok.getString().split('.');
1955           const StringRef ID = IDField.first, Field = IDField.second;
1956           SMLoc IDEndLoc = SMLoc::getFromPointer(ID.data() + ID.size());
1957           if (!Field.empty() &&
1958               !MatchRegisterByName(Reg, ID, IdentLoc, IDEndLoc)) {
1959             if (SM.onRegister(Reg, ErrMsg))
1960               return Error(IdentLoc, ErrMsg);
1961 
1962             AsmFieldInfo Info;
1963             SMLoc FieldStartLoc = SMLoc::getFromPointer(Field.data());
1964             if (Parser.lookUpField(Field, Info))
1965               return Error(FieldStartLoc, "unknown offset");
1966             else if (SM.onPlus(ErrMsg))
1967               return Error(getTok().getLoc(), ErrMsg);
1968             else if (SM.onInteger(Info.Offset, ErrMsg))
1969               return Error(IdentLoc, ErrMsg);
1970             SM.setTypeInfo(Info.Type);
1971 
1972             End = consumeToken();
1973             break;
1974           }
1975         }
1976       }
1977       // Operator synonymous ("not", "or" etc.)
1978       bool ParseError = false;
1979       if (ParseIntelNamedOperator(Identifier, SM, ParseError, End)) {
1980         if (ParseError)
1981           return true;
1982         break;
1983       }
1984       if (Parser.isParsingMasm() &&
1985           ParseMasmNamedOperator(Identifier, SM, ParseError, End)) {
1986         if (ParseError)
1987           return true;
1988         break;
1989       }
1990       // Symbol reference, when parsing assembly content
1991       InlineAsmIdentifierInfo Info;
1992       AsmFieldInfo FieldInfo;
1993       const MCExpr *Val;
1994       if (isParsingMSInlineAsm() || Parser.isParsingMasm()) {
1995         // MS Dot Operator expression
1996         if (Identifier.count('.') &&
1997             (PrevTK == AsmToken::RBrac || PrevTK == AsmToken::RParen)) {
1998           if (ParseIntelDotOperator(SM, End))
1999             return true;
2000           break;
2001         }
2002       }
2003       if (isParsingMSInlineAsm()) {
2004         // MS InlineAsm operators (TYPE/LENGTH/SIZE)
2005         if (unsigned OpKind = IdentifyIntelInlineAsmOperator(Identifier)) {
2006           if (int64_t Val = ParseIntelInlineAsmOperator(OpKind)) {
2007             if (SM.onInteger(Val, ErrMsg))
2008               return Error(IdentLoc, ErrMsg);
2009           } else {
2010             return true;
2011           }
2012           break;
2013         }
2014         // MS InlineAsm identifier
2015         // Call parseIdentifier() to combine @ with the identifier behind it.
2016         if (TK == AsmToken::At && Parser.parseIdentifier(Identifier))
2017           return Error(IdentLoc, "expected identifier");
2018         if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, false, End))
2019           return true;
2020         else if (SM.onIdentifierExpr(Val, Identifier, Info, FieldInfo.Type,
2021                                      true, ErrMsg))
2022           return Error(IdentLoc, ErrMsg);
2023         break;
2024       }
2025       if (Parser.isParsingMasm()) {
2026         if (unsigned OpKind = IdentifyMasmOperator(Identifier)) {
2027           int64_t Val;
2028           if (ParseMasmOperator(OpKind, Val))
2029             return true;
2030           if (SM.onInteger(Val, ErrMsg))
2031             return Error(IdentLoc, ErrMsg);
2032           break;
2033         }
2034         if (!getParser().lookUpType(Identifier, FieldInfo.Type)) {
2035           // Field offset immediate; <TYPE>.<field specification>
2036           Lex(); // eat type
2037           bool EndDot = parseOptionalToken(AsmToken::Dot);
2038           while (EndDot || (getTok().is(AsmToken::Identifier) &&
2039                             getTok().getString().startswith("."))) {
2040             getParser().parseIdentifier(Identifier);
2041             if (!EndDot)
2042               Identifier.consume_front(".");
2043             EndDot = Identifier.consume_back(".");
2044             if (getParser().lookUpField(FieldInfo.Type.Name, Identifier,
2045                                         FieldInfo)) {
2046               SMLoc IDEnd =
2047                   SMLoc::getFromPointer(Identifier.data() + Identifier.size());
2048               return Error(IdentLoc, "Unable to lookup field reference!",
2049                            SMRange(IdentLoc, IDEnd));
2050             }
2051             if (!EndDot)
2052               EndDot = parseOptionalToken(AsmToken::Dot);
2053           }
2054           if (SM.onInteger(FieldInfo.Offset, ErrMsg))
2055             return Error(IdentLoc, ErrMsg);
2056           break;
2057         }
2058       }
2059       if (getParser().parsePrimaryExpr(Val, End, &FieldInfo.Type)) {
2060         return Error(Tok.getLoc(), "Unexpected identifier!");
2061       } else if (SM.onIdentifierExpr(Val, Identifier, Info, FieldInfo.Type,
2062                                      false, ErrMsg)) {
2063         return Error(IdentLoc, ErrMsg);
2064       }
2065       break;
2066     }
2067     case AsmToken::Integer: {
2068       // Look for 'b' or 'f' following an Integer as a directional label
2069       SMLoc Loc = getTok().getLoc();
2070       int64_t IntVal = getTok().getIntVal();
2071       End = consumeToken();
2072       UpdateLocLex = false;
2073       if (getLexer().getKind() == AsmToken::Identifier) {
2074         StringRef IDVal = getTok().getString();
2075         if (IDVal == "f" || IDVal == "b") {
2076           MCSymbol *Sym =
2077               getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b");
2078           MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
2079           const MCExpr *Val =
2080               MCSymbolRefExpr::create(Sym, Variant, getContext());
2081           if (IDVal == "b" && Sym->isUndefined())
2082             return Error(Loc, "invalid reference to undefined symbol");
2083           StringRef Identifier = Sym->getName();
2084           InlineAsmIdentifierInfo Info;
2085           AsmTypeInfo Type;
2086           if (SM.onIdentifierExpr(Val, Identifier, Info, Type,
2087                                   isParsingMSInlineAsm(), ErrMsg))
2088             return Error(Loc, ErrMsg);
2089           End = consumeToken();
2090         } else {
2091           if (SM.onInteger(IntVal, ErrMsg))
2092             return Error(Loc, ErrMsg);
2093         }
2094       } else {
2095         if (SM.onInteger(IntVal, ErrMsg))
2096           return Error(Loc, ErrMsg);
2097       }
2098       break;
2099     }
2100     case AsmToken::Plus:
2101       if (SM.onPlus(ErrMsg))
2102         return Error(getTok().getLoc(), ErrMsg);
2103       break;
2104     case AsmToken::Minus:
2105       if (SM.onMinus(ErrMsg))
2106         return Error(getTok().getLoc(), ErrMsg);
2107       break;
2108     case AsmToken::Tilde:   SM.onNot(); break;
2109     case AsmToken::Star:    SM.onStar(); break;
2110     case AsmToken::Slash:   SM.onDivide(); break;
2111     case AsmToken::Percent: SM.onMod(); break;
2112     case AsmToken::Pipe:    SM.onOr(); break;
2113     case AsmToken::Caret:   SM.onXor(); break;
2114     case AsmToken::Amp:     SM.onAnd(); break;
2115     case AsmToken::LessLess:
2116                             SM.onLShift(); break;
2117     case AsmToken::GreaterGreater:
2118                             SM.onRShift(); break;
2119     case AsmToken::LBrac:
2120       if (SM.onLBrac())
2121         return Error(Tok.getLoc(), "unexpected bracket encountered");
2122       break;
2123     case AsmToken::RBrac:
2124       if (SM.onRBrac())
2125         return Error(Tok.getLoc(), "unexpected bracket encountered");
2126       break;
2127     case AsmToken::LParen:  SM.onLParen(); break;
2128     case AsmToken::RParen:  SM.onRParen(); break;
2129     }
2130     if (SM.hadError())
2131       return Error(Tok.getLoc(), "unknown token in expression");
2132 
2133     if (!Done && UpdateLocLex)
2134       End = consumeToken();
2135 
2136     PrevTK = TK;
2137   }
2138   return false;
2139 }
2140 
RewriteIntelExpression(IntelExprStateMachine & SM,SMLoc Start,SMLoc End)2141 void X86AsmParser::RewriteIntelExpression(IntelExprStateMachine &SM,
2142                                           SMLoc Start, SMLoc End) {
2143   SMLoc Loc = Start;
2144   unsigned ExprLen = End.getPointer() - Start.getPointer();
2145   // Skip everything before a symbol displacement (if we have one)
2146   if (SM.getSym() && !SM.isOffsetOperator()) {
2147     StringRef SymName = SM.getSymName();
2148     if (unsigned Len = SymName.data() - Start.getPointer())
2149       InstInfo->AsmRewrites->emplace_back(AOK_Skip, Start, Len);
2150     Loc = SMLoc::getFromPointer(SymName.data() + SymName.size());
2151     ExprLen = End.getPointer() - (SymName.data() + SymName.size());
2152     // If we have only a symbol than there's no need for complex rewrite,
2153     // simply skip everything after it
2154     if (!(SM.getBaseReg() || SM.getIndexReg() || SM.getImm())) {
2155       if (ExprLen)
2156         InstInfo->AsmRewrites->emplace_back(AOK_Skip, Loc, ExprLen);
2157       return;
2158     }
2159   }
2160   // Build an Intel Expression rewrite
2161   StringRef BaseRegStr;
2162   StringRef IndexRegStr;
2163   StringRef OffsetNameStr;
2164   if (SM.getBaseReg())
2165     BaseRegStr = X86IntelInstPrinter::getRegisterName(SM.getBaseReg());
2166   if (SM.getIndexReg())
2167     IndexRegStr = X86IntelInstPrinter::getRegisterName(SM.getIndexReg());
2168   if (SM.isOffsetOperator())
2169     OffsetNameStr = SM.getSymName();
2170   // Emit it
2171   IntelExpr Expr(BaseRegStr, IndexRegStr, SM.getScale(), OffsetNameStr,
2172                  SM.getImm(), SM.isMemExpr());
2173   InstInfo->AsmRewrites->emplace_back(Loc, ExprLen, Expr);
2174 }
2175 
2176 // Inline assembly may use variable names with namespace alias qualifiers.
ParseIntelInlineAsmIdentifier(const MCExpr * & Val,StringRef & Identifier,InlineAsmIdentifierInfo & Info,bool IsUnevaluatedOperand,SMLoc & End,bool IsParsingOffsetOperator)2177 bool X86AsmParser::ParseIntelInlineAsmIdentifier(
2178     const MCExpr *&Val, StringRef &Identifier, InlineAsmIdentifierInfo &Info,
2179     bool IsUnevaluatedOperand, SMLoc &End, bool IsParsingOffsetOperator) {
2180   MCAsmParser &Parser = getParser();
2181   assert(isParsingMSInlineAsm() && "Expected to be parsing inline assembly.");
2182   Val = nullptr;
2183 
2184   StringRef LineBuf(Identifier.data());
2185   SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
2186 
2187   const AsmToken &Tok = Parser.getTok();
2188   SMLoc Loc = Tok.getLoc();
2189 
2190   // Advance the token stream until the end of the current token is
2191   // after the end of what the frontend claimed.
2192   const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
2193   do {
2194     End = Tok.getEndLoc();
2195     getLexer().Lex();
2196   } while (End.getPointer() < EndPtr);
2197   Identifier = LineBuf;
2198 
2199   // The frontend should end parsing on an assembler token boundary, unless it
2200   // failed parsing.
2201   assert((End.getPointer() == EndPtr ||
2202           Info.isKind(InlineAsmIdentifierInfo::IK_Invalid)) &&
2203           "frontend claimed part of a token?");
2204 
2205   // If the identifier lookup was unsuccessful, assume that we are dealing with
2206   // a label.
2207   if (Info.isKind(InlineAsmIdentifierInfo::IK_Invalid)) {
2208     StringRef InternalName =
2209       SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(),
2210                                          Loc, false);
2211     assert(InternalName.size() && "We should have an internal name here.");
2212     // Push a rewrite for replacing the identifier name with the internal name,
2213     // unless we are parsing the operand of an offset operator
2214     if (!IsParsingOffsetOperator)
2215       InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(),
2216                                           InternalName);
2217     else
2218       Identifier = InternalName;
2219   } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal))
2220     return false;
2221   // Create the symbol reference.
2222   MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
2223   MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
2224   Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext());
2225   return false;
2226 }
2227 
2228 //ParseRoundingModeOp - Parse AVX-512 rounding mode operand
ParseRoundingModeOp(SMLoc Start,OperandVector & Operands)2229 bool X86AsmParser::ParseRoundingModeOp(SMLoc Start, OperandVector &Operands) {
2230   MCAsmParser &Parser = getParser();
2231   const AsmToken &Tok = Parser.getTok();
2232   // Eat "{" and mark the current place.
2233   const SMLoc consumedToken = consumeToken();
2234   if (Tok.isNot(AsmToken::Identifier))
2235     return Error(Tok.getLoc(), "Expected an identifier after {");
2236   if (Tok.getIdentifier().startswith("r")){
2237     int rndMode = StringSwitch<int>(Tok.getIdentifier())
2238       .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT)
2239       .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF)
2240       .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF)
2241       .Case("rz", X86::STATIC_ROUNDING::TO_ZERO)
2242       .Default(-1);
2243     if (-1 == rndMode)
2244       return Error(Tok.getLoc(), "Invalid rounding mode.");
2245      Parser.Lex();  // Eat "r*" of r*-sae
2246     if (!getLexer().is(AsmToken::Minus))
2247       return Error(Tok.getLoc(), "Expected - at this point");
2248     Parser.Lex();  // Eat "-"
2249     Parser.Lex();  // Eat the sae
2250     if (!getLexer().is(AsmToken::RCurly))
2251       return Error(Tok.getLoc(), "Expected } at this point");
2252     SMLoc End = Tok.getEndLoc();
2253     Parser.Lex();  // Eat "}"
2254     const MCExpr *RndModeOp =
2255       MCConstantExpr::create(rndMode, Parser.getContext());
2256     Operands.push_back(X86Operand::CreateImm(RndModeOp, Start, End));
2257     return false;
2258   }
2259   if(Tok.getIdentifier().equals("sae")){
2260     Parser.Lex();  // Eat the sae
2261     if (!getLexer().is(AsmToken::RCurly))
2262       return Error(Tok.getLoc(), "Expected } at this point");
2263     Parser.Lex();  // Eat "}"
2264     Operands.push_back(X86Operand::CreateToken("{sae}", consumedToken));
2265     return false;
2266   }
2267   return Error(Tok.getLoc(), "unknown token in expression");
2268 }
2269 
2270 /// Parse the '.' operator.
ParseIntelDotOperator(IntelExprStateMachine & SM,SMLoc & End)2271 bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM,
2272                                          SMLoc &End) {
2273   const AsmToken &Tok = getTok();
2274   AsmFieldInfo Info;
2275 
2276   // Drop the optional '.'.
2277   StringRef DotDispStr = Tok.getString();
2278   if (DotDispStr.startswith("."))
2279     DotDispStr = DotDispStr.drop_front(1);
2280   StringRef TrailingDot;
2281 
2282   // .Imm gets lexed as a real.
2283   if (Tok.is(AsmToken::Real)) {
2284     APInt DotDisp;
2285     DotDispStr.getAsInteger(10, DotDisp);
2286     Info.Offset = DotDisp.getZExtValue();
2287   } else if ((isParsingMSInlineAsm() || getParser().isParsingMasm()) &&
2288              Tok.is(AsmToken::Identifier)) {
2289     if (DotDispStr.endswith(".")) {
2290       TrailingDot = DotDispStr.substr(DotDispStr.size() - 1);
2291       DotDispStr = DotDispStr.drop_back(1);
2292     }
2293     const std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
2294     const StringRef Base = BaseMember.first, Member = BaseMember.second;
2295     if (getParser().lookUpField(SM.getType(), DotDispStr, Info) &&
2296         getParser().lookUpField(SM.getSymName(), DotDispStr, Info) &&
2297         getParser().lookUpField(DotDispStr, Info) &&
2298         (!SemaCallback ||
2299          SemaCallback->LookupInlineAsmField(Base, Member, Info.Offset)))
2300       return Error(Tok.getLoc(), "Unable to lookup field reference!");
2301   } else {
2302     return Error(Tok.getLoc(), "Unexpected token type!");
2303   }
2304 
2305   // Eat the DotExpression and update End
2306   End = SMLoc::getFromPointer(DotDispStr.data());
2307   const char *DotExprEndLoc = DotDispStr.data() + DotDispStr.size();
2308   while (Tok.getLoc().getPointer() < DotExprEndLoc)
2309     Lex();
2310   if (!TrailingDot.empty())
2311     getLexer().UnLex(AsmToken(AsmToken::Dot, TrailingDot));
2312   SM.addImm(Info.Offset);
2313   SM.setTypeInfo(Info.Type);
2314   return false;
2315 }
2316 
2317 /// Parse the 'offset' operator.
2318 /// This operator is used to specify the location of a given operand
ParseIntelOffsetOperator(const MCExpr * & Val,StringRef & ID,InlineAsmIdentifierInfo & Info,SMLoc & End)2319 bool X86AsmParser::ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID,
2320                                             InlineAsmIdentifierInfo &Info,
2321                                             SMLoc &End) {
2322   // Eat offset, mark start of identifier.
2323   SMLoc Start = Lex().getLoc();
2324   ID = getTok().getString();
2325   if (!isParsingMSInlineAsm()) {
2326     if ((getTok().isNot(AsmToken::Identifier) &&
2327          getTok().isNot(AsmToken::String)) ||
2328         getParser().parsePrimaryExpr(Val, End, nullptr))
2329       return Error(Start, "unexpected token!");
2330   } else if (ParseIntelInlineAsmIdentifier(Val, ID, Info, false, End, true)) {
2331     return Error(Start, "unable to lookup expression");
2332   } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal)) {
2333     return Error(Start, "offset operator cannot yet handle constants");
2334   }
2335   return false;
2336 }
2337 
2338 // Query a candidate string for being an Intel assembly operator
2339 // Report back its kind, or IOK_INVALID if does not evaluated as a known one
IdentifyIntelInlineAsmOperator(StringRef Name)2340 unsigned X86AsmParser::IdentifyIntelInlineAsmOperator(StringRef Name) {
2341   return StringSwitch<unsigned>(Name)
2342     .Cases("TYPE","type",IOK_TYPE)
2343     .Cases("SIZE","size",IOK_SIZE)
2344     .Cases("LENGTH","length",IOK_LENGTH)
2345     .Default(IOK_INVALID);
2346 }
2347 
2348 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators.  The LENGTH operator
2349 /// returns the number of elements in an array.  It returns the value 1 for
2350 /// non-array variables.  The SIZE operator returns the size of a C or C++
2351 /// variable.  A variable's size is the product of its LENGTH and TYPE.  The
2352 /// TYPE operator returns the size of a C or C++ type or variable. If the
2353 /// variable is an array, TYPE returns the size of a single element.
ParseIntelInlineAsmOperator(unsigned OpKind)2354 unsigned X86AsmParser::ParseIntelInlineAsmOperator(unsigned OpKind) {
2355   MCAsmParser &Parser = getParser();
2356   const AsmToken &Tok = Parser.getTok();
2357   Parser.Lex(); // Eat operator.
2358 
2359   const MCExpr *Val = nullptr;
2360   InlineAsmIdentifierInfo Info;
2361   SMLoc Start = Tok.getLoc(), End;
2362   StringRef Identifier = Tok.getString();
2363   if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info,
2364                                     /*IsUnevaluatedOperand=*/true, End))
2365     return 0;
2366 
2367   if (!Info.isKind(InlineAsmIdentifierInfo::IK_Var)) {
2368     Error(Start, "unable to lookup expression");
2369     return 0;
2370   }
2371 
2372   unsigned CVal = 0;
2373   switch(OpKind) {
2374   default: llvm_unreachable("Unexpected operand kind!");
2375   case IOK_LENGTH: CVal = Info.Var.Length; break;
2376   case IOK_SIZE: CVal = Info.Var.Size; break;
2377   case IOK_TYPE: CVal = Info.Var.Type; break;
2378   }
2379 
2380   return CVal;
2381 }
2382 
2383 // Query a candidate string for being an Intel assembly operator
2384 // Report back its kind, or IOK_INVALID if does not evaluated as a known one
IdentifyMasmOperator(StringRef Name)2385 unsigned X86AsmParser::IdentifyMasmOperator(StringRef Name) {
2386   return StringSwitch<unsigned>(Name.lower())
2387       .Case("type", MOK_TYPE)
2388       .Cases("size", "sizeof", MOK_SIZEOF)
2389       .Cases("length", "lengthof", MOK_LENGTHOF)
2390       .Default(MOK_INVALID);
2391 }
2392 
2393 /// Parse the 'LENGTHOF', 'SIZEOF', and 'TYPE' operators.  The LENGTHOF operator
2394 /// returns the number of elements in an array.  It returns the value 1 for
2395 /// non-array variables.  The SIZEOF operator returns the size of a type or
2396 /// variable in bytes.  A variable's size is the product of its LENGTH and TYPE.
2397 /// The TYPE operator returns the size of a variable. If the variable is an
2398 /// array, TYPE returns the size of a single element.
ParseMasmOperator(unsigned OpKind,int64_t & Val)2399 bool X86AsmParser::ParseMasmOperator(unsigned OpKind, int64_t &Val) {
2400   MCAsmParser &Parser = getParser();
2401   SMLoc OpLoc = Parser.getTok().getLoc();
2402   Parser.Lex(); // Eat operator.
2403 
2404   Val = 0;
2405   if (OpKind == MOK_SIZEOF || OpKind == MOK_TYPE) {
2406     // Check for SIZEOF(<type>) and TYPE(<type>).
2407     bool InParens = Parser.getTok().is(AsmToken::LParen);
2408     const AsmToken &IDTok = InParens ? getLexer().peekTok() : Parser.getTok();
2409     AsmTypeInfo Type;
2410     if (IDTok.is(AsmToken::Identifier) &&
2411         !Parser.lookUpType(IDTok.getIdentifier(), Type)) {
2412       Val = Type.Size;
2413 
2414       // Eat tokens.
2415       if (InParens)
2416         parseToken(AsmToken::LParen);
2417       parseToken(AsmToken::Identifier);
2418       if (InParens)
2419         parseToken(AsmToken::RParen);
2420     }
2421   }
2422 
2423   if (!Val) {
2424     IntelExprStateMachine SM;
2425     SMLoc End, Start = Parser.getTok().getLoc();
2426     if (ParseIntelExpression(SM, End))
2427       return true;
2428 
2429     switch (OpKind) {
2430     default:
2431       llvm_unreachable("Unexpected operand kind!");
2432     case MOK_SIZEOF:
2433       Val = SM.getSize();
2434       break;
2435     case MOK_LENGTHOF:
2436       Val = SM.getLength();
2437       break;
2438     case MOK_TYPE:
2439       Val = SM.getElementSize();
2440       break;
2441     }
2442 
2443     if (!Val)
2444       return Error(OpLoc, "expression has unknown type", SMRange(Start, End));
2445   }
2446 
2447   return false;
2448 }
2449 
ParseIntelMemoryOperandSize(unsigned & Size)2450 bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) {
2451   Size = StringSwitch<unsigned>(getTok().getString())
2452     .Cases("BYTE", "byte", 8)
2453     .Cases("WORD", "word", 16)
2454     .Cases("DWORD", "dword", 32)
2455     .Cases("FLOAT", "float", 32)
2456     .Cases("LONG", "long", 32)
2457     .Cases("FWORD", "fword", 48)
2458     .Cases("DOUBLE", "double", 64)
2459     .Cases("QWORD", "qword", 64)
2460     .Cases("MMWORD","mmword", 64)
2461     .Cases("XWORD", "xword", 80)
2462     .Cases("TBYTE", "tbyte", 80)
2463     .Cases("XMMWORD", "xmmword", 128)
2464     .Cases("YMMWORD", "ymmword", 256)
2465     .Cases("ZMMWORD", "zmmword", 512)
2466     .Default(0);
2467   if (Size) {
2468     const AsmToken &Tok = Lex(); // Eat operand size (e.g., byte, word).
2469     if (!(Tok.getString().equals("PTR") || Tok.getString().equals("ptr")))
2470       return Error(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
2471     Lex(); // Eat ptr.
2472   }
2473   return false;
2474 }
2475 
ParseIntelOperand(OperandVector & Operands)2476 bool X86AsmParser::ParseIntelOperand(OperandVector &Operands) {
2477   MCAsmParser &Parser = getParser();
2478   const AsmToken &Tok = Parser.getTok();
2479   SMLoc Start, End;
2480 
2481   // Parse optional Size directive.
2482   unsigned Size;
2483   if (ParseIntelMemoryOperandSize(Size))
2484     return true;
2485   bool PtrInOperand = bool(Size);
2486 
2487   Start = Tok.getLoc();
2488 
2489   // Rounding mode operand.
2490   if (getLexer().is(AsmToken::LCurly))
2491     return ParseRoundingModeOp(Start, Operands);
2492 
2493   // Register operand.
2494   unsigned RegNo = 0;
2495   if (Tok.is(AsmToken::Identifier) && !ParseRegister(RegNo, Start, End)) {
2496     if (RegNo == X86::RIP)
2497       return Error(Start, "rip can only be used as a base register");
2498     // A Register followed by ':' is considered a segment override
2499     if (Tok.isNot(AsmToken::Colon)) {
2500       if (PtrInOperand)
2501         return Error(Start, "expected memory operand after 'ptr', "
2502                             "found register operand instead");
2503       Operands.push_back(X86Operand::CreateReg(RegNo, Start, End));
2504       return false;
2505     }
2506     // An alleged segment override. check if we have a valid segment register
2507     if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
2508       return Error(Start, "invalid segment register");
2509     // Eat ':' and update Start location
2510     Start = Lex().getLoc();
2511   }
2512 
2513   // Immediates and Memory
2514   IntelExprStateMachine SM;
2515   if (ParseIntelExpression(SM, End))
2516     return true;
2517 
2518   if (isParsingMSInlineAsm())
2519     RewriteIntelExpression(SM, Start, Tok.getLoc());
2520 
2521   int64_t Imm = SM.getImm();
2522   const MCExpr *Disp = SM.getSym();
2523   const MCExpr *ImmDisp = MCConstantExpr::create(Imm, getContext());
2524   if (Disp && Imm)
2525     Disp = MCBinaryExpr::createAdd(Disp, ImmDisp, getContext());
2526   if (!Disp)
2527     Disp = ImmDisp;
2528 
2529   // RegNo != 0 specifies a valid segment register,
2530   // and we are parsing a segment override
2531   if (!SM.isMemExpr() && !RegNo) {
2532     if (isParsingMSInlineAsm() && SM.isOffsetOperator()) {
2533       const InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
2534       if (Info.isKind(InlineAsmIdentifierInfo::IK_Var)) {
2535         // Disp includes the address of a variable; make sure this is recorded
2536         // for later handling.
2537         Operands.push_back(X86Operand::CreateImm(Disp, Start, End,
2538                                                  SM.getSymName(), Info.Var.Decl,
2539                                                  Info.Var.IsGlobalLV));
2540         return false;
2541       }
2542     }
2543 
2544     Operands.push_back(X86Operand::CreateImm(Disp, Start, End));
2545     return false;
2546   }
2547 
2548   StringRef ErrMsg;
2549   unsigned BaseReg = SM.getBaseReg();
2550   unsigned IndexReg = SM.getIndexReg();
2551   unsigned Scale = SM.getScale();
2552   if (!PtrInOperand)
2553     Size = SM.getElementSize() << 3;
2554 
2555   if (Scale == 0 && BaseReg != X86::ESP && BaseReg != X86::RSP &&
2556       (IndexReg == X86::ESP || IndexReg == X86::RSP))
2557     std::swap(BaseReg, IndexReg);
2558 
2559   // If BaseReg is a vector register and IndexReg is not, swap them unless
2560   // Scale was specified in which case it would be an error.
2561   if (Scale == 0 &&
2562       !(X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) ||
2563         X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) ||
2564         X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg)) &&
2565       (X86MCRegisterClasses[X86::VR128XRegClassID].contains(BaseReg) ||
2566        X86MCRegisterClasses[X86::VR256XRegClassID].contains(BaseReg) ||
2567        X86MCRegisterClasses[X86::VR512RegClassID].contains(BaseReg)))
2568     std::swap(BaseReg, IndexReg);
2569 
2570   if (Scale != 0 &&
2571       X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg))
2572     return Error(Start, "16-bit addresses cannot have a scale");
2573 
2574   // If there was no explicit scale specified, change it to 1.
2575   if (Scale == 0)
2576     Scale = 1;
2577 
2578   // If this is a 16-bit addressing mode with the base and index in the wrong
2579   // order, swap them so CheckBaseRegAndIndexRegAndScale doesn't fail. It is
2580   // shared with att syntax where order matters.
2581   if ((BaseReg == X86::SI || BaseReg == X86::DI) &&
2582       (IndexReg == X86::BX || IndexReg == X86::BP))
2583     std::swap(BaseReg, IndexReg);
2584 
2585   if ((BaseReg || IndexReg) &&
2586       CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(),
2587                                       ErrMsg))
2588     return Error(Start, ErrMsg);
2589   if (isParsingMSInlineAsm())
2590     return CreateMemForMSInlineAsm(RegNo, Disp, BaseReg, IndexReg, Scale, Start,
2591                                    End, Size, SM.getSymName(),
2592                                    SM.getIdentifierInfo(), Operands);
2593 
2594   // When parsing x64 MS-style assembly, all memory operands default to
2595   // RIP-relative when interpreted as non-absolute references.
2596   if (Parser.isParsingMasm() && is64BitMode()) {
2597     Operands.push_back(X86Operand::CreateMem(getPointerWidth(), RegNo, Disp,
2598                                              BaseReg, IndexReg, Scale, Start,
2599                                              End, Size,
2600                                              /*DefaultBaseReg=*/X86::RIP));
2601     return false;
2602   }
2603 
2604   if ((BaseReg || IndexReg || RegNo))
2605     Operands.push_back(X86Operand::CreateMem(getPointerWidth(), RegNo, Disp,
2606                                              BaseReg, IndexReg, Scale, Start,
2607                                              End, Size));
2608   else
2609     Operands.push_back(
2610         X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size));
2611   return false;
2612 }
2613 
ParseATTOperand(OperandVector & Operands)2614 bool X86AsmParser::ParseATTOperand(OperandVector &Operands) {
2615   MCAsmParser &Parser = getParser();
2616   switch (getLexer().getKind()) {
2617   case AsmToken::Dollar: {
2618     // $42 or $ID -> immediate.
2619     SMLoc Start = Parser.getTok().getLoc(), End;
2620     Parser.Lex();
2621     const MCExpr *Val;
2622     // This is an immediate, so we should not parse a register. Do a precheck
2623     // for '%' to supercede intra-register parse errors.
2624     SMLoc L = Parser.getTok().getLoc();
2625     if (check(getLexer().is(AsmToken::Percent), L,
2626               "expected immediate expression") ||
2627         getParser().parseExpression(Val, End) ||
2628         check(isa<X86MCExpr>(Val), L, "expected immediate expression"))
2629       return true;
2630     Operands.push_back(X86Operand::CreateImm(Val, Start, End));
2631     return false;
2632   }
2633   case AsmToken::LCurly: {
2634     SMLoc Start = Parser.getTok().getLoc();
2635     return ParseRoundingModeOp(Start, Operands);
2636   }
2637   default: {
2638     // This a memory operand or a register. We have some parsing complications
2639     // as a '(' may be part of an immediate expression or the addressing mode
2640     // block. This is complicated by the fact that an assembler-level variable
2641     // may refer either to a register or an immediate expression.
2642 
2643     SMLoc Loc = Parser.getTok().getLoc(), EndLoc;
2644     const MCExpr *Expr = nullptr;
2645     unsigned Reg = 0;
2646     if (getLexer().isNot(AsmToken::LParen)) {
2647       // No '(' so this is either a displacement expression or a register.
2648       if (Parser.parseExpression(Expr, EndLoc))
2649         return true;
2650       if (auto *RE = dyn_cast<X86MCExpr>(Expr)) {
2651         // Segment Register. Reset Expr and copy value to register.
2652         Expr = nullptr;
2653         Reg = RE->getRegNo();
2654 
2655         // Sanity check register.
2656         if (Reg == X86::EIZ || Reg == X86::RIZ)
2657           return Error(
2658               Loc, "%eiz and %riz can only be used as index registers",
2659               SMRange(Loc, EndLoc));
2660         if (Reg == X86::RIP)
2661           return Error(Loc, "%rip can only be used as a base register",
2662                        SMRange(Loc, EndLoc));
2663         // Return register that are not segment prefixes immediately.
2664         if (!Parser.parseOptionalToken(AsmToken::Colon)) {
2665           Operands.push_back(X86Operand::CreateReg(Reg, Loc, EndLoc));
2666           return false;
2667         }
2668         if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(Reg))
2669           return Error(Loc, "invalid segment register");
2670         // Accept a '*' absolute memory reference after the segment. Place it
2671         // before the full memory operand.
2672         if (getLexer().is(AsmToken::Star))
2673           Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
2674       }
2675     }
2676     // This is a Memory operand.
2677     return ParseMemOperand(Reg, Expr, Loc, EndLoc, Operands);
2678   }
2679   }
2680 }
2681 
2682 // X86::COND_INVALID if not a recognized condition code or alternate mnemonic,
2683 // otherwise the EFLAGS Condition Code enumerator.
ParseConditionCode(StringRef CC)2684 X86::CondCode X86AsmParser::ParseConditionCode(StringRef CC) {
2685   return StringSwitch<X86::CondCode>(CC)
2686       .Case("o", X86::COND_O)          // Overflow
2687       .Case("no", X86::COND_NO)        // No Overflow
2688       .Cases("b", "nae", X86::COND_B)  // Below/Neither Above nor Equal
2689       .Cases("ae", "nb", X86::COND_AE) // Above or Equal/Not Below
2690       .Cases("e", "z", X86::COND_E)    // Equal/Zero
2691       .Cases("ne", "nz", X86::COND_NE) // Not Equal/Not Zero
2692       .Cases("be", "na", X86::COND_BE) // Below or Equal/Not Above
2693       .Cases("a", "nbe", X86::COND_A)  // Above/Neither Below nor Equal
2694       .Case("s", X86::COND_S)          // Sign
2695       .Case("ns", X86::COND_NS)        // No Sign
2696       .Cases("p", "pe", X86::COND_P)   // Parity/Parity Even
2697       .Cases("np", "po", X86::COND_NP) // No Parity/Parity Odd
2698       .Cases("l", "nge", X86::COND_L)  // Less/Neither Greater nor Equal
2699       .Cases("ge", "nl", X86::COND_GE) // Greater or Equal/Not Less
2700       .Cases("le", "ng", X86::COND_LE) // Less or Equal/Not Greater
2701       .Cases("g", "nle", X86::COND_G)  // Greater/Neither Less nor Equal
2702       .Default(X86::COND_INVALID);
2703 }
2704 
2705 // true on failure, false otherwise
2706 // If no {z} mark was found - Parser doesn't advance
ParseZ(std::unique_ptr<X86Operand> & Z,const SMLoc & StartLoc)2707 bool X86AsmParser::ParseZ(std::unique_ptr<X86Operand> &Z,
2708                           const SMLoc &StartLoc) {
2709   MCAsmParser &Parser = getParser();
2710   // Assuming we are just pass the '{' mark, quering the next token
2711   // Searched for {z}, but none was found. Return false, as no parsing error was
2712   // encountered
2713   if (!(getLexer().is(AsmToken::Identifier) &&
2714         (getLexer().getTok().getIdentifier() == "z")))
2715     return false;
2716   Parser.Lex(); // Eat z
2717   // Query and eat the '}' mark
2718   if (!getLexer().is(AsmToken::RCurly))
2719     return Error(getLexer().getLoc(), "Expected } at this point");
2720   Parser.Lex(); // Eat '}'
2721   // Assign Z with the {z} mark opernad
2722   Z = X86Operand::CreateToken("{z}", StartLoc);
2723   return false;
2724 }
2725 
2726 // true on failure, false otherwise
HandleAVX512Operand(OperandVector & Operands)2727 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands) {
2728   MCAsmParser &Parser = getParser();
2729   if (getLexer().is(AsmToken::LCurly)) {
2730     // Eat "{" and mark the current place.
2731     const SMLoc consumedToken = consumeToken();
2732     // Distinguish {1to<NUM>} from {%k<NUM>}.
2733     if(getLexer().is(AsmToken::Integer)) {
2734       // Parse memory broadcasting ({1to<NUM>}).
2735       if (getLexer().getTok().getIntVal() != 1)
2736         return TokError("Expected 1to<NUM> at this point");
2737       StringRef Prefix = getLexer().getTok().getString();
2738       Parser.Lex(); // Eat first token of 1to8
2739       if (!getLexer().is(AsmToken::Identifier))
2740         return TokError("Expected 1to<NUM> at this point");
2741       // Recognize only reasonable suffixes.
2742       SmallVector<char, 5> BroadcastVector;
2743       StringRef BroadcastString = (Prefix + getLexer().getTok().getIdentifier())
2744                                       .toStringRef(BroadcastVector);
2745       if (!BroadcastString.startswith("1to"))
2746         return TokError("Expected 1to<NUM> at this point");
2747       const char *BroadcastPrimitive =
2748           StringSwitch<const char *>(BroadcastString)
2749               .Case("1to2", "{1to2}")
2750               .Case("1to4", "{1to4}")
2751               .Case("1to8", "{1to8}")
2752               .Case("1to16", "{1to16}")
2753               .Default(nullptr);
2754       if (!BroadcastPrimitive)
2755         return TokError("Invalid memory broadcast primitive.");
2756       Parser.Lex(); // Eat trailing token of 1toN
2757       if (!getLexer().is(AsmToken::RCurly))
2758         return TokError("Expected } at this point");
2759       Parser.Lex();  // Eat "}"
2760       Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
2761                                                  consumedToken));
2762       // No AVX512 specific primitives can pass
2763       // after memory broadcasting, so return.
2764       return false;
2765     } else {
2766       // Parse either {k}{z}, {z}{k}, {k} or {z}
2767       // last one have no meaning, but GCC accepts it
2768       // Currently, we're just pass a '{' mark
2769       std::unique_ptr<X86Operand> Z;
2770       if (ParseZ(Z, consumedToken))
2771         return true;
2772       // Reaching here means that parsing of the allegadly '{z}' mark yielded
2773       // no errors.
2774       // Query for the need of further parsing for a {%k<NUM>} mark
2775       if (!Z || getLexer().is(AsmToken::LCurly)) {
2776         SMLoc StartLoc = Z ? consumeToken() : consumedToken;
2777         // Parse an op-mask register mark ({%k<NUM>}), which is now to be
2778         // expected
2779         unsigned RegNo;
2780         SMLoc RegLoc;
2781         if (!ParseRegister(RegNo, RegLoc, StartLoc) &&
2782             X86MCRegisterClasses[X86::VK1RegClassID].contains(RegNo)) {
2783           if (RegNo == X86::K0)
2784             return Error(RegLoc, "Register k0 can't be used as write mask");
2785           if (!getLexer().is(AsmToken::RCurly))
2786             return Error(getLexer().getLoc(), "Expected } at this point");
2787           Operands.push_back(X86Operand::CreateToken("{", StartLoc));
2788           Operands.push_back(
2789               X86Operand::CreateReg(RegNo, StartLoc, StartLoc));
2790           Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
2791         } else
2792           return Error(getLexer().getLoc(),
2793                         "Expected an op-mask register at this point");
2794         // {%k<NUM>} mark is found, inquire for {z}
2795         if (getLexer().is(AsmToken::LCurly) && !Z) {
2796           // Have we've found a parsing error, or found no (expected) {z} mark
2797           // - report an error
2798           if (ParseZ(Z, consumeToken()) || !Z)
2799             return Error(getLexer().getLoc(),
2800                          "Expected a {z} mark at this point");
2801 
2802         }
2803         // '{z}' on its own is meaningless, hence should be ignored.
2804         // on the contrary - have it been accompanied by a K register,
2805         // allow it.
2806         if (Z)
2807           Operands.push_back(std::move(Z));
2808       }
2809     }
2810   }
2811   return false;
2812 }
2813 
2814 /// ParseMemOperand: 'seg : disp(basereg, indexreg, scale)'.  The '%ds:' prefix
2815 /// has already been parsed if present. disp may be provided as well.
ParseMemOperand(unsigned SegReg,const MCExpr * Disp,SMLoc StartLoc,SMLoc EndLoc,OperandVector & Operands)2816 bool X86AsmParser::ParseMemOperand(unsigned SegReg, const MCExpr *Disp,
2817                                    SMLoc StartLoc, SMLoc EndLoc,
2818                                    OperandVector &Operands) {
2819   MCAsmParser &Parser = getParser();
2820   SMLoc Loc;
2821   // Based on the initial passed values, we may be in any of these cases, we are
2822   // in one of these cases (with current position (*)):
2823 
2824   //   1. seg : * disp  (base-index-scale-expr)
2825   //   2. seg : *(disp) (base-index-scale-expr)
2826   //   3. seg :       *(base-index-scale-expr)
2827   //   4.        disp  *(base-index-scale-expr)
2828   //   5.      *(disp)  (base-index-scale-expr)
2829   //   6.             *(base-index-scale-expr)
2830   //   7.  disp *
2831   //   8. *(disp)
2832 
2833   // If we do not have an displacement yet, check if we're in cases 4 or 6 by
2834   // checking if the first object after the parenthesis is a register (or an
2835   // identifier referring to a register) and parse the displacement or default
2836   // to 0 as appropriate.
2837   auto isAtMemOperand = [this]() {
2838     if (this->getLexer().isNot(AsmToken::LParen))
2839       return false;
2840     AsmToken Buf[2];
2841     StringRef Id;
2842     auto TokCount = this->getLexer().peekTokens(Buf, true);
2843     if (TokCount == 0)
2844       return false;
2845     switch (Buf[0].getKind()) {
2846     case AsmToken::Percent:
2847     case AsmToken::Comma:
2848       return true;
2849     // These lower cases are doing a peekIdentifier.
2850     case AsmToken::At:
2851     case AsmToken::Dollar:
2852       if ((TokCount > 1) &&
2853           (Buf[1].is(AsmToken::Identifier) || Buf[1].is(AsmToken::String)) &&
2854           (Buf[0].getLoc().getPointer() + 1 == Buf[1].getLoc().getPointer()))
2855         Id = StringRef(Buf[0].getLoc().getPointer(),
2856                        Buf[1].getIdentifier().size() + 1);
2857       break;
2858     case AsmToken::Identifier:
2859     case AsmToken::String:
2860       Id = Buf[0].getIdentifier();
2861       break;
2862     default:
2863       return false;
2864     }
2865     // We have an ID. Check if it is bound to a register.
2866     if (!Id.empty()) {
2867       MCSymbol *Sym = this->getContext().getOrCreateSymbol(Id);
2868       if (Sym->isVariable()) {
2869         auto V = Sym->getVariableValue(/*SetUsed*/ false);
2870         return isa<X86MCExpr>(V);
2871       }
2872     }
2873     return false;
2874   };
2875 
2876   if (!Disp) {
2877     // Parse immediate if we're not at a mem operand yet.
2878     if (!isAtMemOperand()) {
2879       if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(Disp, EndLoc))
2880         return true;
2881       assert(!isa<X86MCExpr>(Disp) && "Expected non-register here.");
2882     } else {
2883       // Disp is implicitly zero if we haven't parsed it yet.
2884       Disp = MCConstantExpr::create(0, Parser.getContext());
2885     }
2886   }
2887 
2888   // We are now either at the end of the operand or at the '(' at the start of a
2889   // base-index-scale-expr.
2890 
2891   if (!parseOptionalToken(AsmToken::LParen)) {
2892     if (SegReg == 0)
2893       Operands.push_back(
2894           X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc));
2895     else
2896       Operands.push_back(X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
2897                                                0, 0, 1, StartLoc, EndLoc));
2898     return false;
2899   }
2900 
2901   // If we reached here, then eat the '(' and Process
2902   // the rest of the memory operand.
2903   unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
2904   SMLoc BaseLoc = getLexer().getLoc();
2905   const MCExpr *E;
2906   StringRef ErrMsg;
2907 
2908   // Parse BaseReg if one is provided.
2909   if (getLexer().isNot(AsmToken::Comma) && getLexer().isNot(AsmToken::RParen)) {
2910     if (Parser.parseExpression(E, EndLoc) ||
2911         check(!isa<X86MCExpr>(E), BaseLoc, "expected register here"))
2912       return true;
2913 
2914     // Sanity check register.
2915     BaseReg = cast<X86MCExpr>(E)->getRegNo();
2916     if (BaseReg == X86::EIZ || BaseReg == X86::RIZ)
2917       return Error(BaseLoc, "eiz and riz can only be used as index registers",
2918                    SMRange(BaseLoc, EndLoc));
2919   }
2920 
2921   if (parseOptionalToken(AsmToken::Comma)) {
2922     // Following the comma we should have either an index register, or a scale
2923     // value. We don't support the later form, but we want to parse it
2924     // correctly.
2925     //
2926     // Even though it would be completely consistent to support syntax like
2927     // "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
2928     if (getLexer().isNot(AsmToken::RParen)) {
2929       if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(E, EndLoc))
2930         return true;
2931 
2932       if (!isa<X86MCExpr>(E)) {
2933         // We've parsed an unexpected Scale Value instead of an index
2934         // register. Interpret it as an absolute.
2935         int64_t ScaleVal;
2936         if (!E->evaluateAsAbsolute(ScaleVal, getStreamer().getAssemblerPtr()))
2937           return Error(Loc, "expected absolute expression");
2938         if (ScaleVal != 1)
2939           Warning(Loc, "scale factor without index register is ignored");
2940         Scale = 1;
2941       } else { // IndexReg Found.
2942         IndexReg = cast<X86MCExpr>(E)->getRegNo();
2943 
2944         if (BaseReg == X86::RIP)
2945           return Error(Loc,
2946                        "%rip as base register can not have an index register");
2947         if (IndexReg == X86::RIP)
2948           return Error(Loc, "%rip is not allowed as an index register");
2949 
2950         if (parseOptionalToken(AsmToken::Comma)) {
2951           // Parse the scale amount:
2952           //  ::= ',' [scale-expression]
2953 
2954           // A scale amount without an index is ignored.
2955           if (getLexer().isNot(AsmToken::RParen)) {
2956             int64_t ScaleVal;
2957             if (Parser.parseTokenLoc(Loc) ||
2958                 Parser.parseAbsoluteExpression(ScaleVal))
2959               return Error(Loc, "expected scale expression");
2960             Scale = (unsigned)ScaleVal;
2961             // Validate the scale amount.
2962             if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
2963                 Scale != 1)
2964               return Error(Loc, "scale factor in 16-bit address must be 1");
2965             if (checkScale(Scale, ErrMsg))
2966               return Error(Loc, ErrMsg);
2967           }
2968         }
2969       }
2970     }
2971   }
2972 
2973   // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
2974   if (parseToken(AsmToken::RParen, "unexpected token in memory operand"))
2975     return true;
2976 
2977   // This is to support otherwise illegal operand (%dx) found in various
2978   // unofficial manuals examples (e.g. "out[s]?[bwl]? %al, (%dx)") and must now
2979   // be supported. Mark such DX variants separately fix only in special cases.
2980   if (BaseReg == X86::DX && IndexReg == 0 && Scale == 1 && SegReg == 0 &&
2981       isa<MCConstantExpr>(Disp) &&
2982       cast<MCConstantExpr>(Disp)->getValue() == 0) {
2983     Operands.push_back(X86Operand::CreateDXReg(BaseLoc, BaseLoc));
2984     return false;
2985   }
2986 
2987   if (CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(),
2988                                       ErrMsg))
2989     return Error(BaseLoc, ErrMsg);
2990 
2991   if (SegReg || BaseReg || IndexReg)
2992     Operands.push_back(X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
2993                                              BaseReg, IndexReg, Scale, StartLoc,
2994                                              EndLoc));
2995   else
2996     Operands.push_back(
2997         X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc));
2998   return false;
2999 }
3000 
3001 // Parse either a standard primary expression or a register.
parsePrimaryExpr(const MCExpr * & Res,SMLoc & EndLoc)3002 bool X86AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
3003   MCAsmParser &Parser = getParser();
3004   // See if this is a register first.
3005   if (getTok().is(AsmToken::Percent) ||
3006       (isParsingIntelSyntax() && getTok().is(AsmToken::Identifier) &&
3007        MatchRegisterName(Parser.getTok().getString()))) {
3008     SMLoc StartLoc = Parser.getTok().getLoc();
3009     unsigned RegNo;
3010     if (ParseRegister(RegNo, StartLoc, EndLoc))
3011       return true;
3012     Res = X86MCExpr::create(RegNo, Parser.getContext());
3013     return false;
3014   }
3015   return Parser.parsePrimaryExpr(Res, EndLoc, nullptr);
3016 }
3017 
ParseInstruction(ParseInstructionInfo & Info,StringRef Name,SMLoc NameLoc,OperandVector & Operands)3018 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
3019                                     SMLoc NameLoc, OperandVector &Operands) {
3020   MCAsmParser &Parser = getParser();
3021   InstInfo = &Info;
3022 
3023   // Reset the forced VEX encoding.
3024   ForcedVEXEncoding = VEXEncoding_Default;
3025   ForcedDispEncoding = DispEncoding_Default;
3026 
3027   // Parse pseudo prefixes.
3028   while (1) {
3029     if (Name == "{") {
3030       if (getLexer().isNot(AsmToken::Identifier))
3031         return Error(Parser.getTok().getLoc(), "Unexpected token after '{'");
3032       std::string Prefix = Parser.getTok().getString().lower();
3033       Parser.Lex(); // Eat identifier.
3034       if (getLexer().isNot(AsmToken::RCurly))
3035         return Error(Parser.getTok().getLoc(), "Expected '}'");
3036       Parser.Lex(); // Eat curly.
3037 
3038       if (Prefix == "vex")
3039         ForcedVEXEncoding = VEXEncoding_VEX;
3040       else if (Prefix == "vex2")
3041         ForcedVEXEncoding = VEXEncoding_VEX2;
3042       else if (Prefix == "vex3")
3043         ForcedVEXEncoding = VEXEncoding_VEX3;
3044       else if (Prefix == "evex")
3045         ForcedVEXEncoding = VEXEncoding_EVEX;
3046       else if (Prefix == "disp8")
3047         ForcedDispEncoding = DispEncoding_Disp8;
3048       else if (Prefix == "disp32")
3049         ForcedDispEncoding = DispEncoding_Disp32;
3050       else
3051         return Error(NameLoc, "unknown prefix");
3052 
3053       NameLoc = Parser.getTok().getLoc();
3054       if (getLexer().is(AsmToken::LCurly)) {
3055         Parser.Lex();
3056         Name = "{";
3057       } else {
3058         if (getLexer().isNot(AsmToken::Identifier))
3059           return Error(Parser.getTok().getLoc(), "Expected identifier");
3060         // FIXME: The mnemonic won't match correctly if its not in lower case.
3061         Name = Parser.getTok().getString();
3062         Parser.Lex();
3063       }
3064       continue;
3065     }
3066     // Parse MASM style pseudo prefixes.
3067     if (isParsingMSInlineAsm()) {
3068       if (Name.equals_lower("vex"))
3069         ForcedVEXEncoding = VEXEncoding_VEX;
3070       else if (Name.equals_lower("vex2"))
3071         ForcedVEXEncoding = VEXEncoding_VEX2;
3072       else if (Name.equals_lower("vex3"))
3073         ForcedVEXEncoding = VEXEncoding_VEX3;
3074       else if (Name.equals_lower("evex"))
3075         ForcedVEXEncoding = VEXEncoding_EVEX;
3076 
3077       if (ForcedVEXEncoding != VEXEncoding_Default) {
3078         if (getLexer().isNot(AsmToken::Identifier))
3079           return Error(Parser.getTok().getLoc(), "Expected identifier");
3080         // FIXME: The mnemonic won't match correctly if its not in lower case.
3081         Name = Parser.getTok().getString();
3082         NameLoc = Parser.getTok().getLoc();
3083         Parser.Lex();
3084       }
3085     }
3086     break;
3087   }
3088 
3089   // Support the suffix syntax for overriding displacement size as well.
3090   if (Name.consume_back(".d32")) {
3091     ForcedDispEncoding = DispEncoding_Disp32;
3092   } else if (Name.consume_back(".d8")) {
3093     ForcedDispEncoding = DispEncoding_Disp8;
3094   }
3095 
3096   StringRef PatchedName = Name;
3097 
3098   // Hack to skip "short" following Jcc.
3099   if (isParsingIntelSyntax() &&
3100       (PatchedName == "jmp" || PatchedName == "jc" || PatchedName == "jnc" ||
3101        PatchedName == "jcxz" || PatchedName == "jexcz" ||
3102        (PatchedName.startswith("j") &&
3103         ParseConditionCode(PatchedName.substr(1)) != X86::COND_INVALID))) {
3104     StringRef NextTok = Parser.getTok().getString();
3105     if (NextTok == "short") {
3106       SMLoc NameEndLoc =
3107           NameLoc.getFromPointer(NameLoc.getPointer() + Name.size());
3108       // Eat the short keyword.
3109       Parser.Lex();
3110       // MS and GAS ignore the short keyword; they both determine the jmp type
3111       // based on the distance of the label. (NASM does emit different code with
3112       // and without "short," though.)
3113       InstInfo->AsmRewrites->emplace_back(AOK_Skip, NameEndLoc,
3114                                           NextTok.size() + 1);
3115     }
3116   }
3117 
3118   // FIXME: Hack to recognize setneb as setne.
3119   if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
3120       PatchedName != "setb" && PatchedName != "setnb")
3121     PatchedName = PatchedName.substr(0, Name.size()-1);
3122 
3123   unsigned ComparisonPredicate = ~0U;
3124 
3125   // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
3126   if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
3127       (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
3128        PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
3129     bool IsVCMP = PatchedName[0] == 'v';
3130     unsigned CCIdx = IsVCMP ? 4 : 3;
3131     unsigned CC = StringSwitch<unsigned>(
3132       PatchedName.slice(CCIdx, PatchedName.size() - 2))
3133       .Case("eq",       0x00)
3134       .Case("eq_oq",    0x00)
3135       .Case("lt",       0x01)
3136       .Case("lt_os",    0x01)
3137       .Case("le",       0x02)
3138       .Case("le_os",    0x02)
3139       .Case("unord",    0x03)
3140       .Case("unord_q",  0x03)
3141       .Case("neq",      0x04)
3142       .Case("neq_uq",   0x04)
3143       .Case("nlt",      0x05)
3144       .Case("nlt_us",   0x05)
3145       .Case("nle",      0x06)
3146       .Case("nle_us",   0x06)
3147       .Case("ord",      0x07)
3148       .Case("ord_q",    0x07)
3149       /* AVX only from here */
3150       .Case("eq_uq",    0x08)
3151       .Case("nge",      0x09)
3152       .Case("nge_us",   0x09)
3153       .Case("ngt",      0x0A)
3154       .Case("ngt_us",   0x0A)
3155       .Case("false",    0x0B)
3156       .Case("false_oq", 0x0B)
3157       .Case("neq_oq",   0x0C)
3158       .Case("ge",       0x0D)
3159       .Case("ge_os",    0x0D)
3160       .Case("gt",       0x0E)
3161       .Case("gt_os",    0x0E)
3162       .Case("true",     0x0F)
3163       .Case("true_uq",  0x0F)
3164       .Case("eq_os",    0x10)
3165       .Case("lt_oq",    0x11)
3166       .Case("le_oq",    0x12)
3167       .Case("unord_s",  0x13)
3168       .Case("neq_us",   0x14)
3169       .Case("nlt_uq",   0x15)
3170       .Case("nle_uq",   0x16)
3171       .Case("ord_s",    0x17)
3172       .Case("eq_us",    0x18)
3173       .Case("nge_uq",   0x19)
3174       .Case("ngt_uq",   0x1A)
3175       .Case("false_os", 0x1B)
3176       .Case("neq_os",   0x1C)
3177       .Case("ge_oq",    0x1D)
3178       .Case("gt_oq",    0x1E)
3179       .Case("true_us",  0x1F)
3180       .Default(~0U);
3181     if (CC != ~0U && (IsVCMP || CC < 8)) {
3182       if (PatchedName.endswith("ss"))
3183         PatchedName = IsVCMP ? "vcmpss" : "cmpss";
3184       else if (PatchedName.endswith("sd"))
3185         PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
3186       else if (PatchedName.endswith("ps"))
3187         PatchedName = IsVCMP ? "vcmpps" : "cmpps";
3188       else if (PatchedName.endswith("pd"))
3189         PatchedName = IsVCMP ? "vcmppd" : "cmppd";
3190       else
3191         llvm_unreachable("Unexpected suffix!");
3192 
3193       ComparisonPredicate = CC;
3194     }
3195   }
3196 
3197   // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}.
3198   if (PatchedName.startswith("vpcmp") &&
3199       (PatchedName.back() == 'b' || PatchedName.back() == 'w' ||
3200        PatchedName.back() == 'd' || PatchedName.back() == 'q')) {
3201     unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1;
3202     unsigned CC = StringSwitch<unsigned>(
3203       PatchedName.slice(5, PatchedName.size() - SuffixSize))
3204       .Case("eq",    0x0) // Only allowed on unsigned. Checked below.
3205       .Case("lt",    0x1)
3206       .Case("le",    0x2)
3207       //.Case("false", 0x3) // Not a documented alias.
3208       .Case("neq",   0x4)
3209       .Case("nlt",   0x5)
3210       .Case("nle",   0x6)
3211       //.Case("true",  0x7) // Not a documented alias.
3212       .Default(~0U);
3213     if (CC != ~0U && (CC != 0 || SuffixSize == 2)) {
3214       switch (PatchedName.back()) {
3215       default: llvm_unreachable("Unexpected character!");
3216       case 'b': PatchedName = SuffixSize == 2 ? "vpcmpub" : "vpcmpb"; break;
3217       case 'w': PatchedName = SuffixSize == 2 ? "vpcmpuw" : "vpcmpw"; break;
3218       case 'd': PatchedName = SuffixSize == 2 ? "vpcmpud" : "vpcmpd"; break;
3219       case 'q': PatchedName = SuffixSize == 2 ? "vpcmpuq" : "vpcmpq"; break;
3220       }
3221       // Set up the immediate to push into the operands later.
3222       ComparisonPredicate = CC;
3223     }
3224   }
3225 
3226   // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}.
3227   if (PatchedName.startswith("vpcom") &&
3228       (PatchedName.back() == 'b' || PatchedName.back() == 'w' ||
3229        PatchedName.back() == 'd' || PatchedName.back() == 'q')) {
3230     unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1;
3231     unsigned CC = StringSwitch<unsigned>(
3232       PatchedName.slice(5, PatchedName.size() - SuffixSize))
3233       .Case("lt",    0x0)
3234       .Case("le",    0x1)
3235       .Case("gt",    0x2)
3236       .Case("ge",    0x3)
3237       .Case("eq",    0x4)
3238       .Case("neq",   0x5)
3239       .Case("false", 0x6)
3240       .Case("true",  0x7)
3241       .Default(~0U);
3242     if (CC != ~0U) {
3243       switch (PatchedName.back()) {
3244       default: llvm_unreachable("Unexpected character!");
3245       case 'b': PatchedName = SuffixSize == 2 ? "vpcomub" : "vpcomb"; break;
3246       case 'w': PatchedName = SuffixSize == 2 ? "vpcomuw" : "vpcomw"; break;
3247       case 'd': PatchedName = SuffixSize == 2 ? "vpcomud" : "vpcomd"; break;
3248       case 'q': PatchedName = SuffixSize == 2 ? "vpcomuq" : "vpcomq"; break;
3249       }
3250       // Set up the immediate to push into the operands later.
3251       ComparisonPredicate = CC;
3252     }
3253   }
3254 
3255 
3256   // Determine whether this is an instruction prefix.
3257   // FIXME:
3258   // Enhance prefixes integrity robustness. for example, following forms
3259   // are currently tolerated:
3260   // repz repnz <insn>    ; GAS errors for the use of two similar prefixes
3261   // lock addq %rax, %rbx ; Destination operand must be of memory type
3262   // xacquire <insn>      ; xacquire must be accompanied by 'lock'
3263   bool isPrefix = StringSwitch<bool>(Name)
3264                       .Cases("rex64", "data32", "data16", true)
3265                       .Cases("xacquire", "xrelease", true)
3266                       .Cases("acquire", "release", isParsingIntelSyntax())
3267                       .Default(false);
3268 
3269   auto isLockRepeatNtPrefix = [](StringRef N) {
3270     return StringSwitch<bool>(N)
3271         .Cases("lock", "rep", "repe", "repz", "repne", "repnz", "notrack", true)
3272         .Default(false);
3273   };
3274 
3275   bool CurlyAsEndOfStatement = false;
3276 
3277   unsigned Flags = X86::IP_NO_PREFIX;
3278   while (isLockRepeatNtPrefix(Name.lower())) {
3279     unsigned Prefix =
3280         StringSwitch<unsigned>(Name)
3281             .Cases("lock", "lock", X86::IP_HAS_LOCK)
3282             .Cases("rep", "repe", "repz", X86::IP_HAS_REPEAT)
3283             .Cases("repne", "repnz", X86::IP_HAS_REPEAT_NE)
3284             .Cases("notrack", "notrack", X86::IP_HAS_NOTRACK)
3285             .Default(X86::IP_NO_PREFIX); // Invalid prefix (impossible)
3286     Flags |= Prefix;
3287     if (getLexer().is(AsmToken::EndOfStatement)) {
3288       // We don't have real instr with the given prefix
3289       //  let's use the prefix as the instr.
3290       // TODO: there could be several prefixes one after another
3291       Flags = X86::IP_NO_PREFIX;
3292       break;
3293     }
3294     // FIXME: The mnemonic won't match correctly if its not in lower case.
3295     Name = Parser.getTok().getString();
3296     Parser.Lex(); // eat the prefix
3297     // Hack: we could have something like "rep # some comment" or
3298     //    "lock; cmpxchg16b $1" or "lock\0A\09incl" or "lock/incl"
3299     while (Name.startswith(";") || Name.startswith("\n") ||
3300            Name.startswith("#") || Name.startswith("\t") ||
3301            Name.startswith("/")) {
3302       // FIXME: The mnemonic won't match correctly if its not in lower case.
3303       Name = Parser.getTok().getString();
3304       Parser.Lex(); // go to next prefix or instr
3305     }
3306   }
3307 
3308   if (Flags)
3309     PatchedName = Name;
3310 
3311   // Hacks to handle 'data16' and 'data32'
3312   if (PatchedName == "data16" && is16BitMode()) {
3313     return Error(NameLoc, "redundant data16 prefix");
3314   }
3315   if (PatchedName == "data32") {
3316     if (is32BitMode())
3317       return Error(NameLoc, "redundant data32 prefix");
3318     if (is64BitMode())
3319       return Error(NameLoc, "'data32' is not supported in 64-bit mode");
3320     // Hack to 'data16' for the table lookup.
3321     PatchedName = "data16";
3322 
3323     if (getLexer().isNot(AsmToken::EndOfStatement)) {
3324       StringRef Next = Parser.getTok().getString();
3325       getLexer().Lex();
3326       // data32 effectively changes the instruction suffix.
3327       // TODO Generalize.
3328       if (Next == "callw")
3329         Next = "calll";
3330       if (Next == "ljmpw")
3331         Next = "ljmpl";
3332 
3333       Name = Next;
3334       PatchedName = Name;
3335       ForcedDataPrefix = X86::Mode32Bit;
3336       isPrefix = false;
3337     }
3338   }
3339 
3340   Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
3341 
3342   // Push the immediate if we extracted one from the mnemonic.
3343   if (ComparisonPredicate != ~0U && !isParsingIntelSyntax()) {
3344     const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate,
3345                                                  getParser().getContext());
3346     Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
3347   }
3348 
3349   // This does the actual operand parsing.  Don't parse any more if we have a
3350   // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
3351   // just want to parse the "lock" as the first instruction and the "incl" as
3352   // the next one.
3353   if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
3354     // Parse '*' modifier.
3355     if (getLexer().is(AsmToken::Star))
3356       Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
3357 
3358     // Read the operands.
3359     while(1) {
3360       if (ParseOperand(Operands))
3361         return true;
3362       if (HandleAVX512Operand(Operands))
3363         return true;
3364 
3365       // check for comma and eat it
3366       if (getLexer().is(AsmToken::Comma))
3367         Parser.Lex();
3368       else
3369         break;
3370      }
3371 
3372     // In MS inline asm curly braces mark the beginning/end of a block,
3373     // therefore they should be interepreted as end of statement
3374     CurlyAsEndOfStatement =
3375         isParsingIntelSyntax() && isParsingMSInlineAsm() &&
3376         (getLexer().is(AsmToken::LCurly) || getLexer().is(AsmToken::RCurly));
3377     if (getLexer().isNot(AsmToken::EndOfStatement) && !CurlyAsEndOfStatement)
3378       return TokError("unexpected token in argument list");
3379   }
3380 
3381   // Push the immediate if we extracted one from the mnemonic.
3382   if (ComparisonPredicate != ~0U && isParsingIntelSyntax()) {
3383     const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate,
3384                                                  getParser().getContext());
3385     Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
3386   }
3387 
3388   // Consume the EndOfStatement or the prefix separator Slash
3389   if (getLexer().is(AsmToken::EndOfStatement) ||
3390       (isPrefix && getLexer().is(AsmToken::Slash)))
3391     Parser.Lex();
3392   else if (CurlyAsEndOfStatement)
3393     // Add an actual EndOfStatement before the curly brace
3394     Info.AsmRewrites->emplace_back(AOK_EndOfStatement,
3395                                    getLexer().getTok().getLoc(), 0);
3396 
3397   // This is for gas compatibility and cannot be done in td.
3398   // Adding "p" for some floating point with no argument.
3399   // For example: fsub --> fsubp
3400   bool IsFp =
3401     Name == "fsub" || Name == "fdiv" || Name == "fsubr" || Name == "fdivr";
3402   if (IsFp && Operands.size() == 1) {
3403     const char *Repl = StringSwitch<const char *>(Name)
3404       .Case("fsub", "fsubp")
3405       .Case("fdiv", "fdivp")
3406       .Case("fsubr", "fsubrp")
3407       .Case("fdivr", "fdivrp");
3408     static_cast<X86Operand &>(*Operands[0]).setTokenValue(Repl);
3409   }
3410 
3411   if ((Name == "mov" || Name == "movw" || Name == "movl") &&
3412       (Operands.size() == 3)) {
3413     X86Operand &Op1 = (X86Operand &)*Operands[1];
3414     X86Operand &Op2 = (X86Operand &)*Operands[2];
3415     SMLoc Loc = Op1.getEndLoc();
3416     // Moving a 32 or 16 bit value into a segment register has the same
3417     // behavior. Modify such instructions to always take shorter form.
3418     if (Op1.isReg() && Op2.isReg() &&
3419         X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(
3420             Op2.getReg()) &&
3421         (X86MCRegisterClasses[X86::GR16RegClassID].contains(Op1.getReg()) ||
3422          X86MCRegisterClasses[X86::GR32RegClassID].contains(Op1.getReg()))) {
3423       // Change instruction name to match new instruction.
3424       if (Name != "mov" && Name[3] == (is16BitMode() ? 'l' : 'w')) {
3425         Name = is16BitMode() ? "movw" : "movl";
3426         Operands[0] = X86Operand::CreateToken(Name, NameLoc);
3427       }
3428       // Select the correct equivalent 16-/32-bit source register.
3429       unsigned Reg =
3430           getX86SubSuperRegisterOrZero(Op1.getReg(), is16BitMode() ? 16 : 32);
3431       Operands[1] = X86Operand::CreateReg(Reg, Loc, Loc);
3432     }
3433   }
3434 
3435   // This is a terrible hack to handle "out[s]?[bwl]? %al, (%dx)" ->
3436   // "outb %al, %dx".  Out doesn't take a memory form, but this is a widely
3437   // documented form in various unofficial manuals, so a lot of code uses it.
3438   if ((Name == "outb" || Name == "outsb" || Name == "outw" || Name == "outsw" ||
3439        Name == "outl" || Name == "outsl" || Name == "out" || Name == "outs") &&
3440       Operands.size() == 3) {
3441     X86Operand &Op = (X86Operand &)*Operands.back();
3442     if (Op.isDXReg())
3443       Operands.back() = X86Operand::CreateReg(X86::DX, Op.getStartLoc(),
3444                                               Op.getEndLoc());
3445   }
3446   // Same hack for "in[s]?[bwl]? (%dx), %al" -> "inb %dx, %al".
3447   if ((Name == "inb" || Name == "insb" || Name == "inw" || Name == "insw" ||
3448        Name == "inl" || Name == "insl" || Name == "in" || Name == "ins") &&
3449       Operands.size() == 3) {
3450     X86Operand &Op = (X86Operand &)*Operands[1];
3451     if (Op.isDXReg())
3452       Operands[1] = X86Operand::CreateReg(X86::DX, Op.getStartLoc(),
3453                                           Op.getEndLoc());
3454   }
3455 
3456   SmallVector<std::unique_ptr<MCParsedAsmOperand>, 2> TmpOperands;
3457   bool HadVerifyError = false;
3458 
3459   // Append default arguments to "ins[bwld]"
3460   if (Name.startswith("ins") &&
3461       (Operands.size() == 1 || Operands.size() == 3) &&
3462       (Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd" ||
3463        Name == "ins")) {
3464 
3465     AddDefaultSrcDestOperands(TmpOperands,
3466                               X86Operand::CreateReg(X86::DX, NameLoc, NameLoc),
3467                               DefaultMemDIOperand(NameLoc));
3468     HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3469   }
3470 
3471   // Append default arguments to "outs[bwld]"
3472   if (Name.startswith("outs") &&
3473       (Operands.size() == 1 || Operands.size() == 3) &&
3474       (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
3475        Name == "outsd" || Name == "outs")) {
3476     AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
3477                               X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
3478     HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3479   }
3480 
3481   // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
3482   // values of $SIREG according to the mode. It would be nice if this
3483   // could be achieved with InstAlias in the tables.
3484   if (Name.startswith("lods") &&
3485       (Operands.size() == 1 || Operands.size() == 2) &&
3486       (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
3487        Name == "lodsl" || Name == "lodsd" || Name == "lodsq")) {
3488     TmpOperands.push_back(DefaultMemSIOperand(NameLoc));
3489     HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3490   }
3491 
3492   // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
3493   // values of $DIREG according to the mode. It would be nice if this
3494   // could be achieved with InstAlias in the tables.
3495   if (Name.startswith("stos") &&
3496       (Operands.size() == 1 || Operands.size() == 2) &&
3497       (Name == "stos" || Name == "stosb" || Name == "stosw" ||
3498        Name == "stosl" || Name == "stosd" || Name == "stosq")) {
3499     TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
3500     HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3501   }
3502 
3503   // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
3504   // values of $DIREG according to the mode. It would be nice if this
3505   // could be achieved with InstAlias in the tables.
3506   if (Name.startswith("scas") &&
3507       (Operands.size() == 1 || Operands.size() == 2) &&
3508       (Name == "scas" || Name == "scasb" || Name == "scasw" ||
3509        Name == "scasl" || Name == "scasd" || Name == "scasq")) {
3510     TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
3511     HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3512   }
3513 
3514   // Add default SI and DI operands to "cmps[bwlq]".
3515   if (Name.startswith("cmps") &&
3516       (Operands.size() == 1 || Operands.size() == 3) &&
3517       (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
3518        Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
3519     AddDefaultSrcDestOperands(TmpOperands, DefaultMemDIOperand(NameLoc),
3520                               DefaultMemSIOperand(NameLoc));
3521     HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3522   }
3523 
3524   // Add default SI and DI operands to "movs[bwlq]".
3525   if (((Name.startswith("movs") &&
3526         (Name == "movs" || Name == "movsb" || Name == "movsw" ||
3527          Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
3528        (Name.startswith("smov") &&
3529         (Name == "smov" || Name == "smovb" || Name == "smovw" ||
3530          Name == "smovl" || Name == "smovd" || Name == "smovq"))) &&
3531       (Operands.size() == 1 || Operands.size() == 3)) {
3532     if (Name == "movsd" && Operands.size() == 1 && !isParsingIntelSyntax())
3533       Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
3534     AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
3535                               DefaultMemDIOperand(NameLoc));
3536     HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3537   }
3538 
3539   // Check if we encountered an error for one the string insturctions
3540   if (HadVerifyError) {
3541     return HadVerifyError;
3542   }
3543 
3544   // Transforms "xlat mem8" into "xlatb"
3545   if ((Name == "xlat" || Name == "xlatb") && Operands.size() == 2) {
3546     X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
3547     if (Op1.isMem8()) {
3548       Warning(Op1.getStartLoc(), "memory operand is only for determining the "
3549                                  "size, (R|E)BX will be used for the location");
3550       Operands.pop_back();
3551       static_cast<X86Operand &>(*Operands[0]).setTokenValue("xlatb");
3552     }
3553   }
3554 
3555   if (Flags)
3556     Operands.push_back(X86Operand::CreatePrefix(Flags, NameLoc, NameLoc));
3557   return false;
3558 }
3559 
processInstruction(MCInst & Inst,const OperandVector & Ops)3560 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
3561   const MCRegisterInfo *MRI = getContext().getRegisterInfo();
3562 
3563   switch (Inst.getOpcode()) {
3564   default: return false;
3565   case X86::JMP_1:
3566     // {disp32} forces a larger displacement as if the instruction was relaxed.
3567     // NOTE: 16-bit mode uses 16-bit displacement even though it says {disp32}.
3568     // This matches GNU assembler.
3569     if (ForcedDispEncoding == DispEncoding_Disp32) {
3570       Inst.setOpcode(is16BitMode() ? X86::JMP_2 : X86::JMP_4);
3571       return true;
3572     }
3573 
3574     return false;
3575   case X86::JCC_1:
3576     // {disp32} forces a larger displacement as if the instruction was relaxed.
3577     // NOTE: 16-bit mode uses 16-bit displacement even though it says {disp32}.
3578     // This matches GNU assembler.
3579     if (ForcedDispEncoding == DispEncoding_Disp32) {
3580       Inst.setOpcode(is16BitMode() ? X86::JCC_2 : X86::JCC_4);
3581       return true;
3582     }
3583 
3584     return false;
3585   case X86::VMOVZPQILo2PQIrr:
3586   case X86::VMOVAPDrr:
3587   case X86::VMOVAPDYrr:
3588   case X86::VMOVAPSrr:
3589   case X86::VMOVAPSYrr:
3590   case X86::VMOVDQArr:
3591   case X86::VMOVDQAYrr:
3592   case X86::VMOVDQUrr:
3593   case X86::VMOVDQUYrr:
3594   case X86::VMOVUPDrr:
3595   case X86::VMOVUPDYrr:
3596   case X86::VMOVUPSrr:
3597   case X86::VMOVUPSYrr: {
3598     // We can get a smaller encoding by using VEX.R instead of VEX.B if one of
3599     // the registers is extended, but other isn't.
3600     if (ForcedVEXEncoding == VEXEncoding_VEX3 ||
3601         MRI->getEncodingValue(Inst.getOperand(0).getReg()) >= 8 ||
3602         MRI->getEncodingValue(Inst.getOperand(1).getReg()) < 8)
3603       return false;
3604 
3605     unsigned NewOpc;
3606     switch (Inst.getOpcode()) {
3607     default: llvm_unreachable("Invalid opcode");
3608     case X86::VMOVZPQILo2PQIrr: NewOpc = X86::VMOVPQI2QIrr;   break;
3609     case X86::VMOVAPDrr:        NewOpc = X86::VMOVAPDrr_REV;  break;
3610     case X86::VMOVAPDYrr:       NewOpc = X86::VMOVAPDYrr_REV; break;
3611     case X86::VMOVAPSrr:        NewOpc = X86::VMOVAPSrr_REV;  break;
3612     case X86::VMOVAPSYrr:       NewOpc = X86::VMOVAPSYrr_REV; break;
3613     case X86::VMOVDQArr:        NewOpc = X86::VMOVDQArr_REV;  break;
3614     case X86::VMOVDQAYrr:       NewOpc = X86::VMOVDQAYrr_REV; break;
3615     case X86::VMOVDQUrr:        NewOpc = X86::VMOVDQUrr_REV;  break;
3616     case X86::VMOVDQUYrr:       NewOpc = X86::VMOVDQUYrr_REV; break;
3617     case X86::VMOVUPDrr:        NewOpc = X86::VMOVUPDrr_REV;  break;
3618     case X86::VMOVUPDYrr:       NewOpc = X86::VMOVUPDYrr_REV; break;
3619     case X86::VMOVUPSrr:        NewOpc = X86::VMOVUPSrr_REV;  break;
3620     case X86::VMOVUPSYrr:       NewOpc = X86::VMOVUPSYrr_REV; break;
3621     }
3622     Inst.setOpcode(NewOpc);
3623     return true;
3624   }
3625   case X86::VMOVSDrr:
3626   case X86::VMOVSSrr: {
3627     // We can get a smaller encoding by using VEX.R instead of VEX.B if one of
3628     // the registers is extended, but other isn't.
3629     if (ForcedVEXEncoding == VEXEncoding_VEX3 ||
3630         MRI->getEncodingValue(Inst.getOperand(0).getReg()) >= 8 ||
3631         MRI->getEncodingValue(Inst.getOperand(2).getReg()) < 8)
3632       return false;
3633 
3634     unsigned NewOpc;
3635     switch (Inst.getOpcode()) {
3636     default: llvm_unreachable("Invalid opcode");
3637     case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
3638     case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
3639     }
3640     Inst.setOpcode(NewOpc);
3641     return true;
3642   }
3643   case X86::RCR8ri: case X86::RCR16ri: case X86::RCR32ri: case X86::RCR64ri:
3644   case X86::RCL8ri: case X86::RCL16ri: case X86::RCL32ri: case X86::RCL64ri:
3645   case X86::ROR8ri: case X86::ROR16ri: case X86::ROR32ri: case X86::ROR64ri:
3646   case X86::ROL8ri: case X86::ROL16ri: case X86::ROL32ri: case X86::ROL64ri:
3647   case X86::SAR8ri: case X86::SAR16ri: case X86::SAR32ri: case X86::SAR64ri:
3648   case X86::SHR8ri: case X86::SHR16ri: case X86::SHR32ri: case X86::SHR64ri:
3649   case X86::SHL8ri: case X86::SHL16ri: case X86::SHL32ri: case X86::SHL64ri: {
3650     // Optimize s{hr,ar,hl} $1, <op> to "shift <op>". Similar for rotate.
3651     // FIXME: It would be great if we could just do this with an InstAlias.
3652     if (!Inst.getOperand(2).isImm() || Inst.getOperand(2).getImm() != 1)
3653       return false;
3654 
3655     unsigned NewOpc;
3656     switch (Inst.getOpcode()) {
3657     default: llvm_unreachable("Invalid opcode");
3658     case X86::RCR8ri:  NewOpc = X86::RCR8r1;  break;
3659     case X86::RCR16ri: NewOpc = X86::RCR16r1; break;
3660     case X86::RCR32ri: NewOpc = X86::RCR32r1; break;
3661     case X86::RCR64ri: NewOpc = X86::RCR64r1; break;
3662     case X86::RCL8ri:  NewOpc = X86::RCL8r1;  break;
3663     case X86::RCL16ri: NewOpc = X86::RCL16r1; break;
3664     case X86::RCL32ri: NewOpc = X86::RCL32r1; break;
3665     case X86::RCL64ri: NewOpc = X86::RCL64r1; break;
3666     case X86::ROR8ri:  NewOpc = X86::ROR8r1;  break;
3667     case X86::ROR16ri: NewOpc = X86::ROR16r1; break;
3668     case X86::ROR32ri: NewOpc = X86::ROR32r1; break;
3669     case X86::ROR64ri: NewOpc = X86::ROR64r1; break;
3670     case X86::ROL8ri:  NewOpc = X86::ROL8r1;  break;
3671     case X86::ROL16ri: NewOpc = X86::ROL16r1; break;
3672     case X86::ROL32ri: NewOpc = X86::ROL32r1; break;
3673     case X86::ROL64ri: NewOpc = X86::ROL64r1; break;
3674     case X86::SAR8ri:  NewOpc = X86::SAR8r1;  break;
3675     case X86::SAR16ri: NewOpc = X86::SAR16r1; break;
3676     case X86::SAR32ri: NewOpc = X86::SAR32r1; break;
3677     case X86::SAR64ri: NewOpc = X86::SAR64r1; break;
3678     case X86::SHR8ri:  NewOpc = X86::SHR8r1;  break;
3679     case X86::SHR16ri: NewOpc = X86::SHR16r1; break;
3680     case X86::SHR32ri: NewOpc = X86::SHR32r1; break;
3681     case X86::SHR64ri: NewOpc = X86::SHR64r1; break;
3682     case X86::SHL8ri:  NewOpc = X86::SHL8r1;  break;
3683     case X86::SHL16ri: NewOpc = X86::SHL16r1; break;
3684     case X86::SHL32ri: NewOpc = X86::SHL32r1; break;
3685     case X86::SHL64ri: NewOpc = X86::SHL64r1; break;
3686     }
3687 
3688     MCInst TmpInst;
3689     TmpInst.setOpcode(NewOpc);
3690     TmpInst.addOperand(Inst.getOperand(0));
3691     TmpInst.addOperand(Inst.getOperand(1));
3692     Inst = TmpInst;
3693     return true;
3694   }
3695   case X86::RCR8mi: case X86::RCR16mi: case X86::RCR32mi: case X86::RCR64mi:
3696   case X86::RCL8mi: case X86::RCL16mi: case X86::RCL32mi: case X86::RCL64mi:
3697   case X86::ROR8mi: case X86::ROR16mi: case X86::ROR32mi: case X86::ROR64mi:
3698   case X86::ROL8mi: case X86::ROL16mi: case X86::ROL32mi: case X86::ROL64mi:
3699   case X86::SAR8mi: case X86::SAR16mi: case X86::SAR32mi: case X86::SAR64mi:
3700   case X86::SHR8mi: case X86::SHR16mi: case X86::SHR32mi: case X86::SHR64mi:
3701   case X86::SHL8mi: case X86::SHL16mi: case X86::SHL32mi: case X86::SHL64mi: {
3702     // Optimize s{hr,ar,hl} $1, <op> to "shift <op>". Similar for rotate.
3703     // FIXME: It would be great if we could just do this with an InstAlias.
3704     if (!Inst.getOperand(X86::AddrNumOperands).isImm() ||
3705         Inst.getOperand(X86::AddrNumOperands).getImm() != 1)
3706       return false;
3707 
3708     unsigned NewOpc;
3709     switch (Inst.getOpcode()) {
3710     default: llvm_unreachable("Invalid opcode");
3711     case X86::RCR8mi:  NewOpc = X86::RCR8m1;  break;
3712     case X86::RCR16mi: NewOpc = X86::RCR16m1; break;
3713     case X86::RCR32mi: NewOpc = X86::RCR32m1; break;
3714     case X86::RCR64mi: NewOpc = X86::RCR64m1; break;
3715     case X86::RCL8mi:  NewOpc = X86::RCL8m1;  break;
3716     case X86::RCL16mi: NewOpc = X86::RCL16m1; break;
3717     case X86::RCL32mi: NewOpc = X86::RCL32m1; break;
3718     case X86::RCL64mi: NewOpc = X86::RCL64m1; break;
3719     case X86::ROR8mi:  NewOpc = X86::ROR8m1;  break;
3720     case X86::ROR16mi: NewOpc = X86::ROR16m1; break;
3721     case X86::ROR32mi: NewOpc = X86::ROR32m1; break;
3722     case X86::ROR64mi: NewOpc = X86::ROR64m1; break;
3723     case X86::ROL8mi:  NewOpc = X86::ROL8m1;  break;
3724     case X86::ROL16mi: NewOpc = X86::ROL16m1; break;
3725     case X86::ROL32mi: NewOpc = X86::ROL32m1; break;
3726     case X86::ROL64mi: NewOpc = X86::ROL64m1; break;
3727     case X86::SAR8mi:  NewOpc = X86::SAR8m1;  break;
3728     case X86::SAR16mi: NewOpc = X86::SAR16m1; break;
3729     case X86::SAR32mi: NewOpc = X86::SAR32m1; break;
3730     case X86::SAR64mi: NewOpc = X86::SAR64m1; break;
3731     case X86::SHR8mi:  NewOpc = X86::SHR8m1;  break;
3732     case X86::SHR16mi: NewOpc = X86::SHR16m1; break;
3733     case X86::SHR32mi: NewOpc = X86::SHR32m1; break;
3734     case X86::SHR64mi: NewOpc = X86::SHR64m1; break;
3735     case X86::SHL8mi:  NewOpc = X86::SHL8m1;  break;
3736     case X86::SHL16mi: NewOpc = X86::SHL16m1; break;
3737     case X86::SHL32mi: NewOpc = X86::SHL32m1; break;
3738     case X86::SHL64mi: NewOpc = X86::SHL64m1; break;
3739     }
3740 
3741     MCInst TmpInst;
3742     TmpInst.setOpcode(NewOpc);
3743     for (int i = 0; i != X86::AddrNumOperands; ++i)
3744       TmpInst.addOperand(Inst.getOperand(i));
3745     Inst = TmpInst;
3746     return true;
3747   }
3748   case X86::INT: {
3749     // Transforms "int $3" into "int3" as a size optimization.  We can't write an
3750     // instalias with an immediate operand yet.
3751     if (!Inst.getOperand(0).isImm() || Inst.getOperand(0).getImm() != 3)
3752       return false;
3753 
3754     MCInst TmpInst;
3755     TmpInst.setOpcode(X86::INT3);
3756     Inst = TmpInst;
3757     return true;
3758   }
3759   }
3760 }
3761 
validateInstruction(MCInst & Inst,const OperandVector & Ops)3762 bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
3763   const MCRegisterInfo *MRI = getContext().getRegisterInfo();
3764 
3765   switch (Inst.getOpcode()) {
3766   case X86::VGATHERDPDYrm:
3767   case X86::VGATHERDPDrm:
3768   case X86::VGATHERDPSYrm:
3769   case X86::VGATHERDPSrm:
3770   case X86::VGATHERQPDYrm:
3771   case X86::VGATHERQPDrm:
3772   case X86::VGATHERQPSYrm:
3773   case X86::VGATHERQPSrm:
3774   case X86::VPGATHERDDYrm:
3775   case X86::VPGATHERDDrm:
3776   case X86::VPGATHERDQYrm:
3777   case X86::VPGATHERDQrm:
3778   case X86::VPGATHERQDYrm:
3779   case X86::VPGATHERQDrm:
3780   case X86::VPGATHERQQYrm:
3781   case X86::VPGATHERQQrm: {
3782     unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg());
3783     unsigned Mask = MRI->getEncodingValue(Inst.getOperand(1).getReg());
3784     unsigned Index =
3785       MRI->getEncodingValue(Inst.getOperand(3 + X86::AddrIndexReg).getReg());
3786     if (Dest == Mask || Dest == Index || Mask == Index)
3787       return Warning(Ops[0]->getStartLoc(), "mask, index, and destination "
3788                                             "registers should be distinct");
3789     break;
3790   }
3791   case X86::VGATHERDPDZ128rm:
3792   case X86::VGATHERDPDZ256rm:
3793   case X86::VGATHERDPDZrm:
3794   case X86::VGATHERDPSZ128rm:
3795   case X86::VGATHERDPSZ256rm:
3796   case X86::VGATHERDPSZrm:
3797   case X86::VGATHERQPDZ128rm:
3798   case X86::VGATHERQPDZ256rm:
3799   case X86::VGATHERQPDZrm:
3800   case X86::VGATHERQPSZ128rm:
3801   case X86::VGATHERQPSZ256rm:
3802   case X86::VGATHERQPSZrm:
3803   case X86::VPGATHERDDZ128rm:
3804   case X86::VPGATHERDDZ256rm:
3805   case X86::VPGATHERDDZrm:
3806   case X86::VPGATHERDQZ128rm:
3807   case X86::VPGATHERDQZ256rm:
3808   case X86::VPGATHERDQZrm:
3809   case X86::VPGATHERQDZ128rm:
3810   case X86::VPGATHERQDZ256rm:
3811   case X86::VPGATHERQDZrm:
3812   case X86::VPGATHERQQZ128rm:
3813   case X86::VPGATHERQQZ256rm:
3814   case X86::VPGATHERQQZrm: {
3815     unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg());
3816     unsigned Index =
3817       MRI->getEncodingValue(Inst.getOperand(4 + X86::AddrIndexReg).getReg());
3818     if (Dest == Index)
3819       return Warning(Ops[0]->getStartLoc(), "index and destination registers "
3820                                             "should be distinct");
3821     break;
3822   }
3823   case X86::V4FMADDPSrm:
3824   case X86::V4FMADDPSrmk:
3825   case X86::V4FMADDPSrmkz:
3826   case X86::V4FMADDSSrm:
3827   case X86::V4FMADDSSrmk:
3828   case X86::V4FMADDSSrmkz:
3829   case X86::V4FNMADDPSrm:
3830   case X86::V4FNMADDPSrmk:
3831   case X86::V4FNMADDPSrmkz:
3832   case X86::V4FNMADDSSrm:
3833   case X86::V4FNMADDSSrmk:
3834   case X86::V4FNMADDSSrmkz:
3835   case X86::VP4DPWSSDSrm:
3836   case X86::VP4DPWSSDSrmk:
3837   case X86::VP4DPWSSDSrmkz:
3838   case X86::VP4DPWSSDrm:
3839   case X86::VP4DPWSSDrmk:
3840   case X86::VP4DPWSSDrmkz: {
3841     unsigned Src2 = Inst.getOperand(Inst.getNumOperands() -
3842                                     X86::AddrNumOperands - 1).getReg();
3843     unsigned Src2Enc = MRI->getEncodingValue(Src2);
3844     if (Src2Enc % 4 != 0) {
3845       StringRef RegName = X86IntelInstPrinter::getRegisterName(Src2);
3846       unsigned GroupStart = (Src2Enc / 4) * 4;
3847       unsigned GroupEnd = GroupStart + 3;
3848       return Warning(Ops[0]->getStartLoc(),
3849                      "source register '" + RegName + "' implicitly denotes '" +
3850                      RegName.take_front(3) + Twine(GroupStart) + "' to '" +
3851                      RegName.take_front(3) + Twine(GroupEnd) +
3852                      "' source group");
3853     }
3854     break;
3855   }
3856   }
3857 
3858   const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
3859   // Check that we aren't mixing AH/BH/CH/DH with REX prefix. We only need to
3860   // check this with the legacy encoding, VEX/EVEX/XOP don't use REX.
3861   if ((MCID.TSFlags & X86II::EncodingMask) == 0) {
3862     MCPhysReg HReg = X86::NoRegister;
3863     bool UsesRex = MCID.TSFlags & X86II::REX_W;
3864     unsigned NumOps = Inst.getNumOperands();
3865     for (unsigned i = 0; i != NumOps; ++i) {
3866       const MCOperand &MO = Inst.getOperand(i);
3867       if (!MO.isReg())
3868         continue;
3869       unsigned Reg = MO.getReg();
3870       if (Reg == X86::AH || Reg == X86::BH || Reg == X86::CH || Reg == X86::DH)
3871         HReg = Reg;
3872       if (X86II::isX86_64NonExtLowByteReg(Reg) ||
3873           X86II::isX86_64ExtendedReg(Reg))
3874         UsesRex = true;
3875     }
3876 
3877     if (UsesRex && HReg != X86::NoRegister) {
3878       StringRef RegName = X86IntelInstPrinter::getRegisterName(HReg);
3879       return Error(Ops[0]->getStartLoc(),
3880                    "can't encode '" + RegName + "' in an instruction requiring "
3881                    "REX prefix");
3882     }
3883   }
3884 
3885   return false;
3886 }
3887 
3888 static const char *getSubtargetFeatureName(uint64_t Val);
3889 
emitWarningForSpecialLVIInstruction(SMLoc Loc)3890 void X86AsmParser::emitWarningForSpecialLVIInstruction(SMLoc Loc) {
3891   Warning(Loc, "Instruction may be vulnerable to LVI and "
3892                "requires manual mitigation");
3893   Note(SMLoc(), "See https://software.intel.com/"
3894                 "security-software-guidance/insights/"
3895                 "deep-dive-load-value-injection#specialinstructions"
3896                 " for more information");
3897 }
3898 
3899 /// RET instructions and also instructions that indirect calls/jumps from memory
3900 /// combine a load and a branch within a single instruction. To mitigate these
3901 /// instructions against LVI, they must be decomposed into separate load and
3902 /// branch instructions, with an LFENCE in between. For more details, see:
3903 /// - X86LoadValueInjectionRetHardening.cpp
3904 /// - X86LoadValueInjectionIndirectThunks.cpp
3905 /// - https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection
3906 ///
3907 /// Returns `true` if a mitigation was applied or warning was emitted.
applyLVICFIMitigation(MCInst & Inst,MCStreamer & Out)3908 void X86AsmParser::applyLVICFIMitigation(MCInst &Inst, MCStreamer &Out) {
3909   // Information on control-flow instructions that require manual mitigation can
3910   // be found here:
3911   // https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
3912   switch (Inst.getOpcode()) {
3913   case X86::RETW:
3914   case X86::RETL:
3915   case X86::RETQ:
3916   case X86::RETIL:
3917   case X86::RETIQ:
3918   case X86::RETIW: {
3919     MCInst ShlInst, FenceInst;
3920     bool Parse32 = is32BitMode() || Code16GCC;
3921     unsigned Basereg =
3922         is64BitMode() ? X86::RSP : (Parse32 ? X86::ESP : X86::SP);
3923     const MCExpr *Disp = MCConstantExpr::create(0, getContext());
3924     auto ShlMemOp = X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
3925                                           /*BaseReg=*/Basereg, /*IndexReg=*/0,
3926                                           /*Scale=*/1, SMLoc{}, SMLoc{}, 0);
3927     ShlInst.setOpcode(X86::SHL64mi);
3928     ShlMemOp->addMemOperands(ShlInst, 5);
3929     ShlInst.addOperand(MCOperand::createImm(0));
3930     FenceInst.setOpcode(X86::LFENCE);
3931     Out.emitInstruction(ShlInst, getSTI());
3932     Out.emitInstruction(FenceInst, getSTI());
3933     return;
3934   }
3935   case X86::JMP16m:
3936   case X86::JMP32m:
3937   case X86::JMP64m:
3938   case X86::CALL16m:
3939   case X86::CALL32m:
3940   case X86::CALL64m:
3941     emitWarningForSpecialLVIInstruction(Inst.getLoc());
3942     return;
3943   }
3944 }
3945 
3946 /// To mitigate LVI, every instruction that performs a load can be followed by
3947 /// an LFENCE instruction to squash any potential mis-speculation. There are
3948 /// some instructions that require additional considerations, and may requre
3949 /// manual mitigation. For more details, see:
3950 /// https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection
3951 ///
3952 /// Returns `true` if a mitigation was applied or warning was emitted.
applyLVILoadHardeningMitigation(MCInst & Inst,MCStreamer & Out)3953 void X86AsmParser::applyLVILoadHardeningMitigation(MCInst &Inst,
3954                                                    MCStreamer &Out) {
3955   auto Opcode = Inst.getOpcode();
3956   auto Flags = Inst.getFlags();
3957   if ((Flags & X86::IP_HAS_REPEAT) || (Flags & X86::IP_HAS_REPEAT_NE)) {
3958     // Information on REP string instructions that require manual mitigation can
3959     // be found here:
3960     // https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
3961     switch (Opcode) {
3962     case X86::CMPSB:
3963     case X86::CMPSW:
3964     case X86::CMPSL:
3965     case X86::CMPSQ:
3966     case X86::SCASB:
3967     case X86::SCASW:
3968     case X86::SCASL:
3969     case X86::SCASQ:
3970       emitWarningForSpecialLVIInstruction(Inst.getLoc());
3971       return;
3972     }
3973   } else if (Opcode == X86::REP_PREFIX || Opcode == X86::REPNE_PREFIX) {
3974     // If a REP instruction is found on its own line, it may or may not be
3975     // followed by a vulnerable instruction. Emit a warning just in case.
3976     emitWarningForSpecialLVIInstruction(Inst.getLoc());
3977     return;
3978   }
3979 
3980   const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
3981 
3982   // Can't mitigate after terminators or calls. A control flow change may have
3983   // already occurred.
3984   if (MCID.isTerminator() || MCID.isCall())
3985     return;
3986 
3987   // LFENCE has the mayLoad property, don't double fence.
3988   if (MCID.mayLoad() && Inst.getOpcode() != X86::LFENCE) {
3989     MCInst FenceInst;
3990     FenceInst.setOpcode(X86::LFENCE);
3991     Out.emitInstruction(FenceInst, getSTI());
3992   }
3993 }
3994 
emitInstruction(MCInst & Inst,OperandVector & Operands,MCStreamer & Out)3995 void X86AsmParser::emitInstruction(MCInst &Inst, OperandVector &Operands,
3996                                    MCStreamer &Out) {
3997   if (LVIInlineAsmHardening &&
3998       getSTI().getFeatureBits()[X86::FeatureLVIControlFlowIntegrity])
3999     applyLVICFIMitigation(Inst, Out);
4000 
4001   Out.emitInstruction(Inst, getSTI());
4002 
4003   if (LVIInlineAsmHardening &&
4004       getSTI().getFeatureBits()[X86::FeatureLVILoadHardening])
4005     applyLVILoadHardeningMitigation(Inst, Out);
4006 }
4007 
MatchAndEmitInstruction(SMLoc IDLoc,unsigned & Opcode,OperandVector & Operands,MCStreamer & Out,uint64_t & ErrorInfo,bool MatchingInlineAsm)4008 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4009                                            OperandVector &Operands,
4010                                            MCStreamer &Out, uint64_t &ErrorInfo,
4011                                            bool MatchingInlineAsm) {
4012   if (isParsingIntelSyntax())
4013     return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
4014                                         MatchingInlineAsm);
4015   return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
4016                                     MatchingInlineAsm);
4017 }
4018 
MatchFPUWaitAlias(SMLoc IDLoc,X86Operand & Op,OperandVector & Operands,MCStreamer & Out,bool MatchingInlineAsm)4019 void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
4020                                      OperandVector &Operands, MCStreamer &Out,
4021                                      bool MatchingInlineAsm) {
4022   // FIXME: This should be replaced with a real .td file alias mechanism.
4023   // Also, MatchInstructionImpl should actually *do* the EmitInstruction
4024   // call.
4025   const char *Repl = StringSwitch<const char *>(Op.getToken())
4026                          .Case("finit", "fninit")
4027                          .Case("fsave", "fnsave")
4028                          .Case("fstcw", "fnstcw")
4029                          .Case("fstcww", "fnstcw")
4030                          .Case("fstenv", "fnstenv")
4031                          .Case("fstsw", "fnstsw")
4032                          .Case("fstsww", "fnstsw")
4033                          .Case("fclex", "fnclex")
4034                          .Default(nullptr);
4035   if (Repl) {
4036     MCInst Inst;
4037     Inst.setOpcode(X86::WAIT);
4038     Inst.setLoc(IDLoc);
4039     if (!MatchingInlineAsm)
4040       emitInstruction(Inst, Operands, Out);
4041     Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
4042   }
4043 }
4044 
ErrorMissingFeature(SMLoc IDLoc,const FeatureBitset & MissingFeatures,bool MatchingInlineAsm)4045 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc,
4046                                        const FeatureBitset &MissingFeatures,
4047                                        bool MatchingInlineAsm) {
4048   assert(MissingFeatures.any() && "Unknown missing feature!");
4049   SmallString<126> Msg;
4050   raw_svector_ostream OS(Msg);
4051   OS << "instruction requires:";
4052   for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i) {
4053     if (MissingFeatures[i])
4054       OS << ' ' << getSubtargetFeatureName(i);
4055   }
4056   return Error(IDLoc, OS.str(), SMRange(), MatchingInlineAsm);
4057 }
4058 
getPrefixes(OperandVector & Operands)4059 static unsigned getPrefixes(OperandVector &Operands) {
4060   unsigned Result = 0;
4061   X86Operand &Prefix = static_cast<X86Operand &>(*Operands.back());
4062   if (Prefix.isPrefix()) {
4063     Result = Prefix.getPrefix();
4064     Operands.pop_back();
4065   }
4066   return Result;
4067 }
4068 
checkTargetMatchPredicate(MCInst & Inst)4069 unsigned X86AsmParser::checkTargetMatchPredicate(MCInst &Inst) {
4070   unsigned Opc = Inst.getOpcode();
4071   const MCInstrDesc &MCID = MII.get(Opc);
4072 
4073   if (ForcedVEXEncoding == VEXEncoding_EVEX &&
4074       (MCID.TSFlags & X86II::EncodingMask) != X86II::EVEX)
4075     return Match_Unsupported;
4076 
4077   if ((ForcedVEXEncoding == VEXEncoding_VEX ||
4078        ForcedVEXEncoding == VEXEncoding_VEX2 ||
4079        ForcedVEXEncoding == VEXEncoding_VEX3) &&
4080       (MCID.TSFlags & X86II::EncodingMask) != X86II::VEX)
4081     return Match_Unsupported;
4082 
4083   // These instructions are only available with {vex}, {vex2} or {vex3} prefix
4084   if (MCID.TSFlags & X86II::ExplicitVEXPrefix &&
4085       (ForcedVEXEncoding != VEXEncoding_VEX &&
4086        ForcedVEXEncoding != VEXEncoding_VEX2 &&
4087        ForcedVEXEncoding != VEXEncoding_VEX3))
4088     return Match_Unsupported;
4089 
4090   // These instructions match ambiguously with their VEX encoded counterparts
4091   // and appear first in the matching table. Reject them unless we're forcing
4092   // EVEX encoding.
4093   // FIXME: We really need a way to break the ambiguity.
4094   switch (Opc) {
4095   case X86::VCVTSD2SIZrm_Int:
4096   case X86::VCVTSD2SI64Zrm_Int:
4097   case X86::VCVTSS2SIZrm_Int:
4098   case X86::VCVTSS2SI64Zrm_Int:
4099   case X86::VCVTTSD2SIZrm:   case X86::VCVTTSD2SIZrm_Int:
4100   case X86::VCVTTSD2SI64Zrm: case X86::VCVTTSD2SI64Zrm_Int:
4101   case X86::VCVTTSS2SIZrm:   case X86::VCVTTSS2SIZrm_Int:
4102   case X86::VCVTTSS2SI64Zrm: case X86::VCVTTSS2SI64Zrm_Int:
4103     if (ForcedVEXEncoding != VEXEncoding_EVEX)
4104       return Match_Unsupported;
4105     break;
4106   }
4107 
4108   return Match_Success;
4109 }
4110 
MatchAndEmitATTInstruction(SMLoc IDLoc,unsigned & Opcode,OperandVector & Operands,MCStreamer & Out,uint64_t & ErrorInfo,bool MatchingInlineAsm)4111 bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
4112                                               OperandVector &Operands,
4113                                               MCStreamer &Out,
4114                                               uint64_t &ErrorInfo,
4115                                               bool MatchingInlineAsm) {
4116   assert(!Operands.empty() && "Unexpect empty operand list!");
4117   assert((*Operands[0]).isToken() && "Leading operand should always be a mnemonic!");
4118   SMRange EmptyRange = None;
4119 
4120   // First, handle aliases that expand to multiple instructions.
4121   MatchFPUWaitAlias(IDLoc, static_cast<X86Operand &>(*Operands[0]), Operands,
4122                     Out, MatchingInlineAsm);
4123   X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
4124   unsigned Prefixes = getPrefixes(Operands);
4125 
4126   MCInst Inst;
4127 
4128   // If VEX/EVEX encoding is forced, we need to pass the USE_* flag to the
4129   // encoder and printer.
4130   if (ForcedVEXEncoding == VEXEncoding_VEX)
4131     Prefixes |= X86::IP_USE_VEX;
4132   else if (ForcedVEXEncoding == VEXEncoding_VEX2)
4133     Prefixes |= X86::IP_USE_VEX2;
4134   else if (ForcedVEXEncoding == VEXEncoding_VEX3)
4135     Prefixes |= X86::IP_USE_VEX3;
4136   else if (ForcedVEXEncoding == VEXEncoding_EVEX)
4137     Prefixes |= X86::IP_USE_EVEX;
4138 
4139   // Set encoded flags for {disp8} and {disp32}.
4140   if (ForcedDispEncoding == DispEncoding_Disp8)
4141     Prefixes |= X86::IP_USE_DISP8;
4142   else if (ForcedDispEncoding == DispEncoding_Disp32)
4143     Prefixes |= X86::IP_USE_DISP32;
4144 
4145   if (Prefixes)
4146     Inst.setFlags(Prefixes);
4147 
4148   // In 16-bit mode, if data32 is specified, temporarily switch to 32-bit mode
4149   // when matching the instruction.
4150   if (ForcedDataPrefix == X86::Mode32Bit)
4151     SwitchMode(X86::Mode32Bit);
4152   // First, try a direct match.
4153   FeatureBitset MissingFeatures;
4154   unsigned OriginalError = MatchInstruction(Operands, Inst, ErrorInfo,
4155                                             MissingFeatures, MatchingInlineAsm,
4156                                             isParsingIntelSyntax());
4157   if (ForcedDataPrefix == X86::Mode32Bit) {
4158     SwitchMode(X86::Mode16Bit);
4159     ForcedDataPrefix = 0;
4160   }
4161   switch (OriginalError) {
4162   default: llvm_unreachable("Unexpected match result!");
4163   case Match_Success:
4164     if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
4165       return true;
4166     // Some instructions need post-processing to, for example, tweak which
4167     // encoding is selected. Loop on it while changes happen so the
4168     // individual transformations can chain off each other.
4169     if (!MatchingInlineAsm)
4170       while (processInstruction(Inst, Operands))
4171         ;
4172 
4173     Inst.setLoc(IDLoc);
4174     if (!MatchingInlineAsm)
4175       emitInstruction(Inst, Operands, Out);
4176     Opcode = Inst.getOpcode();
4177     return false;
4178   case Match_InvalidImmUnsignedi4: {
4179     SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc();
4180     if (ErrorLoc == SMLoc())
4181       ErrorLoc = IDLoc;
4182     return Error(ErrorLoc, "immediate must be an integer in range [0, 15]",
4183                  EmptyRange, MatchingInlineAsm);
4184   }
4185   case Match_MissingFeature:
4186     return ErrorMissingFeature(IDLoc, MissingFeatures, MatchingInlineAsm);
4187   case Match_InvalidOperand:
4188   case Match_MnemonicFail:
4189   case Match_Unsupported:
4190     break;
4191   }
4192   if (Op.getToken().empty()) {
4193     Error(IDLoc, "instruction must have size higher than 0", EmptyRange,
4194           MatchingInlineAsm);
4195     return true;
4196   }
4197 
4198   // FIXME: Ideally, we would only attempt suffix matches for things which are
4199   // valid prefixes, and we could just infer the right unambiguous
4200   // type. However, that requires substantially more matcher support than the
4201   // following hack.
4202 
4203   // Change the operand to point to a temporary token.
4204   StringRef Base = Op.getToken();
4205   SmallString<16> Tmp;
4206   Tmp += Base;
4207   Tmp += ' ';
4208   Op.setTokenValue(Tmp);
4209 
4210   // If this instruction starts with an 'f', then it is a floating point stack
4211   // instruction.  These come in up to three forms for 32-bit, 64-bit, and
4212   // 80-bit floating point, which use the suffixes s,l,t respectively.
4213   //
4214   // Otherwise, we assume that this may be an integer instruction, which comes
4215   // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
4216   const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
4217   // MemSize corresponding to Suffixes.  { 8, 16, 32, 64 }    { 32, 64, 80, 0 }
4218   const char *MemSize = Base[0] != 'f' ? "\x08\x10\x20\x40" : "\x20\x40\x50\0";
4219 
4220   // Check for the various suffix matches.
4221   uint64_t ErrorInfoIgnore;
4222   FeatureBitset ErrorInfoMissingFeatures; // Init suppresses compiler warnings.
4223   unsigned Match[4];
4224 
4225   // Some instruction like VPMULDQ is NOT the variant of VPMULD but a new one.
4226   // So we should make sure the suffix matcher only works for memory variant
4227   // that has the same size with the suffix.
4228   // FIXME: This flag is a workaround for legacy instructions that didn't
4229   // declare non suffix variant assembly.
4230   bool HasVectorReg = false;
4231   X86Operand *MemOp = nullptr;
4232   for (const auto &Op : Operands) {
4233     X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
4234     if (X86Op->isVectorReg())
4235       HasVectorReg = true;
4236     else if (X86Op->isMem()) {
4237       MemOp = X86Op;
4238       assert(MemOp->Mem.Size == 0 && "Memory size always 0 under ATT syntax");
4239       // Have we found an unqualified memory operand,
4240       // break. IA allows only one memory operand.
4241       break;
4242     }
4243   }
4244 
4245   for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) {
4246     Tmp.back() = Suffixes[I];
4247     if (MemOp && HasVectorReg)
4248       MemOp->Mem.Size = MemSize[I];
4249     Match[I] = Match_MnemonicFail;
4250     if (MemOp || !HasVectorReg) {
4251       Match[I] =
4252           MatchInstruction(Operands, Inst, ErrorInfoIgnore, MissingFeatures,
4253                            MatchingInlineAsm, isParsingIntelSyntax());
4254       // If this returned as a missing feature failure, remember that.
4255       if (Match[I] == Match_MissingFeature)
4256         ErrorInfoMissingFeatures = MissingFeatures;
4257     }
4258   }
4259 
4260   // Restore the old token.
4261   Op.setTokenValue(Base);
4262 
4263   // If exactly one matched, then we treat that as a successful match (and the
4264   // instruction will already have been filled in correctly, since the failing
4265   // matches won't have modified it).
4266   unsigned NumSuccessfulMatches =
4267       std::count(std::begin(Match), std::end(Match), Match_Success);
4268   if (NumSuccessfulMatches == 1) {
4269     if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
4270       return true;
4271     // Some instructions need post-processing to, for example, tweak which
4272     // encoding is selected. Loop on it while changes happen so the
4273     // individual transformations can chain off each other.
4274     if (!MatchingInlineAsm)
4275       while (processInstruction(Inst, Operands))
4276         ;
4277 
4278     Inst.setLoc(IDLoc);
4279     if (!MatchingInlineAsm)
4280       emitInstruction(Inst, Operands, Out);
4281     Opcode = Inst.getOpcode();
4282     return false;
4283   }
4284 
4285   // Otherwise, the match failed, try to produce a decent error message.
4286 
4287   // If we had multiple suffix matches, then identify this as an ambiguous
4288   // match.
4289   if (NumSuccessfulMatches > 1) {
4290     char MatchChars[4];
4291     unsigned NumMatches = 0;
4292     for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I)
4293       if (Match[I] == Match_Success)
4294         MatchChars[NumMatches++] = Suffixes[I];
4295 
4296     SmallString<126> Msg;
4297     raw_svector_ostream OS(Msg);
4298     OS << "ambiguous instructions require an explicit suffix (could be ";
4299     for (unsigned i = 0; i != NumMatches; ++i) {
4300       if (i != 0)
4301         OS << ", ";
4302       if (i + 1 == NumMatches)
4303         OS << "or ";
4304       OS << "'" << Base << MatchChars[i] << "'";
4305     }
4306     OS << ")";
4307     Error(IDLoc, OS.str(), EmptyRange, MatchingInlineAsm);
4308     return true;
4309   }
4310 
4311   // Okay, we know that none of the variants matched successfully.
4312 
4313   // If all of the instructions reported an invalid mnemonic, then the original
4314   // mnemonic was invalid.
4315   if (std::count(std::begin(Match), std::end(Match), Match_MnemonicFail) == 4) {
4316     if (OriginalError == Match_MnemonicFail)
4317       return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
4318                    Op.getLocRange(), MatchingInlineAsm);
4319 
4320     if (OriginalError == Match_Unsupported)
4321       return Error(IDLoc, "unsupported instruction", EmptyRange,
4322                    MatchingInlineAsm);
4323 
4324     assert(OriginalError == Match_InvalidOperand && "Unexpected error");
4325     // Recover location info for the operand if we know which was the problem.
4326     if (ErrorInfo != ~0ULL) {
4327       if (ErrorInfo >= Operands.size())
4328         return Error(IDLoc, "too few operands for instruction", EmptyRange,
4329                      MatchingInlineAsm);
4330 
4331       X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
4332       if (Operand.getStartLoc().isValid()) {
4333         SMRange OperandRange = Operand.getLocRange();
4334         return Error(Operand.getStartLoc(), "invalid operand for instruction",
4335                      OperandRange, MatchingInlineAsm);
4336       }
4337     }
4338 
4339     return Error(IDLoc, "invalid operand for instruction", EmptyRange,
4340                  MatchingInlineAsm);
4341   }
4342 
4343   // If one instruction matched as unsupported, report this as unsupported.
4344   if (std::count(std::begin(Match), std::end(Match),
4345                  Match_Unsupported) == 1) {
4346     return Error(IDLoc, "unsupported instruction", EmptyRange,
4347                  MatchingInlineAsm);
4348   }
4349 
4350   // If one instruction matched with a missing feature, report this as a
4351   // missing feature.
4352   if (std::count(std::begin(Match), std::end(Match),
4353                  Match_MissingFeature) == 1) {
4354     ErrorInfo = Match_MissingFeature;
4355     return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures,
4356                                MatchingInlineAsm);
4357   }
4358 
4359   // If one instruction matched with an invalid operand, report this as an
4360   // operand failure.
4361   if (std::count(std::begin(Match), std::end(Match),
4362                  Match_InvalidOperand) == 1) {
4363     return Error(IDLoc, "invalid operand for instruction", EmptyRange,
4364                  MatchingInlineAsm);
4365   }
4366 
4367   // If all of these were an outright failure, report it in a useless way.
4368   Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
4369         EmptyRange, MatchingInlineAsm);
4370   return true;
4371 }
4372 
MatchAndEmitIntelInstruction(SMLoc IDLoc,unsigned & Opcode,OperandVector & Operands,MCStreamer & Out,uint64_t & ErrorInfo,bool MatchingInlineAsm)4373 bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
4374                                                 OperandVector &Operands,
4375                                                 MCStreamer &Out,
4376                                                 uint64_t &ErrorInfo,
4377                                                 bool MatchingInlineAsm) {
4378   assert(!Operands.empty() && "Unexpect empty operand list!");
4379   assert((*Operands[0]).isToken() && "Leading operand should always be a mnemonic!");
4380   StringRef Mnemonic = (static_cast<X86Operand &>(*Operands[0])).getToken();
4381   SMRange EmptyRange = None;
4382   StringRef Base = (static_cast<X86Operand &>(*Operands[0])).getToken();
4383   unsigned Prefixes = getPrefixes(Operands);
4384 
4385   // First, handle aliases that expand to multiple instructions.
4386   MatchFPUWaitAlias(IDLoc, static_cast<X86Operand &>(*Operands[0]), Operands, Out, MatchingInlineAsm);
4387   X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
4388 
4389   MCInst Inst;
4390 
4391   // If VEX/EVEX encoding is forced, we need to pass the USE_* flag to the
4392   // encoder and printer.
4393   if (ForcedVEXEncoding == VEXEncoding_VEX)
4394     Prefixes |= X86::IP_USE_VEX;
4395   else if (ForcedVEXEncoding == VEXEncoding_VEX2)
4396     Prefixes |= X86::IP_USE_VEX2;
4397   else if (ForcedVEXEncoding == VEXEncoding_VEX3)
4398     Prefixes |= X86::IP_USE_VEX3;
4399   else if (ForcedVEXEncoding == VEXEncoding_EVEX)
4400     Prefixes |= X86::IP_USE_EVEX;
4401 
4402   // Set encoded flags for {disp8} and {disp32}.
4403   if (ForcedDispEncoding == DispEncoding_Disp8)
4404     Prefixes |= X86::IP_USE_DISP8;
4405   else if (ForcedDispEncoding == DispEncoding_Disp32)
4406     Prefixes |= X86::IP_USE_DISP32;
4407 
4408   if (Prefixes)
4409     Inst.setFlags(Prefixes);
4410 
4411   // Find one unsized memory operand, if present.
4412   X86Operand *UnsizedMemOp = nullptr;
4413   for (const auto &Op : Operands) {
4414     X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
4415     if (X86Op->isMemUnsized()) {
4416       UnsizedMemOp = X86Op;
4417       // Have we found an unqualified memory operand,
4418       // break. IA allows only one memory operand.
4419       break;
4420     }
4421   }
4422 
4423   // Allow some instructions to have implicitly pointer-sized operands.  This is
4424   // compatible with gas.
4425   if (UnsizedMemOp) {
4426     static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"};
4427     for (const char *Instr : PtrSizedInstrs) {
4428       if (Mnemonic == Instr) {
4429         UnsizedMemOp->Mem.Size = getPointerWidth();
4430         break;
4431       }
4432     }
4433   }
4434 
4435   SmallVector<unsigned, 8> Match;
4436   FeatureBitset ErrorInfoMissingFeatures;
4437   FeatureBitset MissingFeatures;
4438 
4439   // If unsized push has immediate operand we should default the default pointer
4440   // size for the size.
4441   if (Mnemonic == "push" && Operands.size() == 2) {
4442     auto *X86Op = static_cast<X86Operand *>(Operands[1].get());
4443     if (X86Op->isImm()) {
4444       // If it's not a constant fall through and let remainder take care of it.
4445       const auto *CE = dyn_cast<MCConstantExpr>(X86Op->getImm());
4446       unsigned Size = getPointerWidth();
4447       if (CE &&
4448           (isIntN(Size, CE->getValue()) || isUIntN(Size, CE->getValue()))) {
4449         SmallString<16> Tmp;
4450         Tmp += Base;
4451         Tmp += (is64BitMode())
4452                    ? "q"
4453                    : (is32BitMode()) ? "l" : (is16BitMode()) ? "w" : " ";
4454         Op.setTokenValue(Tmp);
4455         // Do match in ATT mode to allow explicit suffix usage.
4456         Match.push_back(MatchInstruction(Operands, Inst, ErrorInfo,
4457                                          MissingFeatures, MatchingInlineAsm,
4458                                          false /*isParsingIntelSyntax()*/));
4459         Op.setTokenValue(Base);
4460       }
4461     }
4462   }
4463 
4464   // If an unsized memory operand is present, try to match with each memory
4465   // operand size.  In Intel assembly, the size is not part of the instruction
4466   // mnemonic.
4467   if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
4468     static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512};
4469     for (unsigned Size : MopSizes) {
4470       UnsizedMemOp->Mem.Size = Size;
4471       uint64_t ErrorInfoIgnore;
4472       unsigned LastOpcode = Inst.getOpcode();
4473       unsigned M = MatchInstruction(Operands, Inst, ErrorInfoIgnore,
4474                                     MissingFeatures, MatchingInlineAsm,
4475                                     isParsingIntelSyntax());
4476       if (Match.empty() || LastOpcode != Inst.getOpcode())
4477         Match.push_back(M);
4478 
4479       // If this returned as a missing feature failure, remember that.
4480       if (Match.back() == Match_MissingFeature)
4481         ErrorInfoMissingFeatures = MissingFeatures;
4482     }
4483 
4484     // Restore the size of the unsized memory operand if we modified it.
4485     UnsizedMemOp->Mem.Size = 0;
4486   }
4487 
4488   // If we haven't matched anything yet, this is not a basic integer or FPU
4489   // operation.  There shouldn't be any ambiguity in our mnemonic table, so try
4490   // matching with the unsized operand.
4491   if (Match.empty()) {
4492     Match.push_back(MatchInstruction(
4493         Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm,
4494         isParsingIntelSyntax()));
4495     // If this returned as a missing feature failure, remember that.
4496     if (Match.back() == Match_MissingFeature)
4497       ErrorInfoMissingFeatures = MissingFeatures;
4498   }
4499 
4500   // Restore the size of the unsized memory operand if we modified it.
4501   if (UnsizedMemOp)
4502     UnsizedMemOp->Mem.Size = 0;
4503 
4504   // If it's a bad mnemonic, all results will be the same.
4505   if (Match.back() == Match_MnemonicFail) {
4506     return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'",
4507                  Op.getLocRange(), MatchingInlineAsm);
4508   }
4509 
4510   unsigned NumSuccessfulMatches =
4511       std::count(std::begin(Match), std::end(Match), Match_Success);
4512 
4513   // If matching was ambiguous and we had size information from the frontend,
4514   // try again with that. This handles cases like "movxz eax, m8/m16".
4515   if (UnsizedMemOp && NumSuccessfulMatches > 1 &&
4516       UnsizedMemOp->getMemFrontendSize()) {
4517     UnsizedMemOp->Mem.Size = UnsizedMemOp->getMemFrontendSize();
4518     unsigned M = MatchInstruction(
4519         Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm,
4520         isParsingIntelSyntax());
4521     if (M == Match_Success)
4522       NumSuccessfulMatches = 1;
4523 
4524     // Add a rewrite that encodes the size information we used from the
4525     // frontend.
4526     InstInfo->AsmRewrites->emplace_back(
4527         AOK_SizeDirective, UnsizedMemOp->getStartLoc(),
4528         /*Len=*/0, UnsizedMemOp->getMemFrontendSize());
4529   }
4530 
4531   // If exactly one matched, then we treat that as a successful match (and the
4532   // instruction will already have been filled in correctly, since the failing
4533   // matches won't have modified it).
4534   if (NumSuccessfulMatches == 1) {
4535     if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
4536       return true;
4537     // Some instructions need post-processing to, for example, tweak which
4538     // encoding is selected. Loop on it while changes happen so the individual
4539     // transformations can chain off each other.
4540     if (!MatchingInlineAsm)
4541       while (processInstruction(Inst, Operands))
4542         ;
4543     Inst.setLoc(IDLoc);
4544     if (!MatchingInlineAsm)
4545       emitInstruction(Inst, Operands, Out);
4546     Opcode = Inst.getOpcode();
4547     return false;
4548   } else if (NumSuccessfulMatches > 1) {
4549     assert(UnsizedMemOp &&
4550            "multiple matches only possible with unsized memory operands");
4551     return Error(UnsizedMemOp->getStartLoc(),
4552                  "ambiguous operand size for instruction '" + Mnemonic + "\'",
4553                  UnsizedMemOp->getLocRange());
4554   }
4555 
4556   // If one instruction matched as unsupported, report this as unsupported.
4557   if (std::count(std::begin(Match), std::end(Match),
4558                  Match_Unsupported) == 1) {
4559     return Error(IDLoc, "unsupported instruction", EmptyRange,
4560                  MatchingInlineAsm);
4561   }
4562 
4563   // If one instruction matched with a missing feature, report this as a
4564   // missing feature.
4565   if (std::count(std::begin(Match), std::end(Match),
4566                  Match_MissingFeature) == 1) {
4567     ErrorInfo = Match_MissingFeature;
4568     return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures,
4569                                MatchingInlineAsm);
4570   }
4571 
4572   // If one instruction matched with an invalid operand, report this as an
4573   // operand failure.
4574   if (std::count(std::begin(Match), std::end(Match),
4575                  Match_InvalidOperand) == 1) {
4576     return Error(IDLoc, "invalid operand for instruction", EmptyRange,
4577                  MatchingInlineAsm);
4578   }
4579 
4580   if (std::count(std::begin(Match), std::end(Match),
4581                  Match_InvalidImmUnsignedi4) == 1) {
4582     SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc();
4583     if (ErrorLoc == SMLoc())
4584       ErrorLoc = IDLoc;
4585     return Error(ErrorLoc, "immediate must be an integer in range [0, 15]",
4586                  EmptyRange, MatchingInlineAsm);
4587   }
4588 
4589   // If all of these were an outright failure, report it in a useless way.
4590   return Error(IDLoc, "unknown instruction mnemonic", EmptyRange,
4591                MatchingInlineAsm);
4592 }
4593 
OmitRegisterFromClobberLists(unsigned RegNo)4594 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
4595   return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
4596 }
4597 
ParseDirective(AsmToken DirectiveID)4598 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
4599   MCAsmParser &Parser = getParser();
4600   StringRef IDVal = DirectiveID.getIdentifier();
4601   if (IDVal.startswith(".arch"))
4602     return parseDirectiveArch();
4603   if (IDVal.startswith(".code"))
4604     return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
4605   else if (IDVal.startswith(".att_syntax")) {
4606     if (getLexer().isNot(AsmToken::EndOfStatement)) {
4607       if (Parser.getTok().getString() == "prefix")
4608         Parser.Lex();
4609       else if (Parser.getTok().getString() == "noprefix")
4610         return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not "
4611                                            "supported: registers must have a "
4612                                            "'%' prefix in .att_syntax");
4613     }
4614     getParser().setAssemblerDialect(0);
4615     return false;
4616   } else if (IDVal.startswith(".intel_syntax")) {
4617     getParser().setAssemblerDialect(1);
4618     if (getLexer().isNot(AsmToken::EndOfStatement)) {
4619       if (Parser.getTok().getString() == "noprefix")
4620         Parser.Lex();
4621       else if (Parser.getTok().getString() == "prefix")
4622         return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not "
4623                                            "supported: registers must not have "
4624                                            "a '%' prefix in .intel_syntax");
4625     }
4626     return false;
4627   } else if (IDVal == ".nops")
4628     return parseDirectiveNops(DirectiveID.getLoc());
4629   else if (IDVal == ".even")
4630     return parseDirectiveEven(DirectiveID.getLoc());
4631   else if (IDVal == ".cv_fpo_proc")
4632     return parseDirectiveFPOProc(DirectiveID.getLoc());
4633   else if (IDVal == ".cv_fpo_setframe")
4634     return parseDirectiveFPOSetFrame(DirectiveID.getLoc());
4635   else if (IDVal == ".cv_fpo_pushreg")
4636     return parseDirectiveFPOPushReg(DirectiveID.getLoc());
4637   else if (IDVal == ".cv_fpo_stackalloc")
4638     return parseDirectiveFPOStackAlloc(DirectiveID.getLoc());
4639   else if (IDVal == ".cv_fpo_stackalign")
4640     return parseDirectiveFPOStackAlign(DirectiveID.getLoc());
4641   else if (IDVal == ".cv_fpo_endprologue")
4642     return parseDirectiveFPOEndPrologue(DirectiveID.getLoc());
4643   else if (IDVal == ".cv_fpo_endproc")
4644     return parseDirectiveFPOEndProc(DirectiveID.getLoc());
4645   else if (IDVal == ".seh_pushreg" ||
4646            (Parser.isParsingMasm() && IDVal.equals_lower(".pushreg")))
4647     return parseDirectiveSEHPushReg(DirectiveID.getLoc());
4648   else if (IDVal == ".seh_setframe" ||
4649            (Parser.isParsingMasm() && IDVal.equals_lower(".setframe")))
4650     return parseDirectiveSEHSetFrame(DirectiveID.getLoc());
4651   else if (IDVal == ".seh_savereg" ||
4652            (Parser.isParsingMasm() && IDVal.equals_lower(".savereg")))
4653     return parseDirectiveSEHSaveReg(DirectiveID.getLoc());
4654   else if (IDVal == ".seh_savexmm" ||
4655            (Parser.isParsingMasm() && IDVal.equals_lower(".savexmm128")))
4656     return parseDirectiveSEHSaveXMM(DirectiveID.getLoc());
4657   else if (IDVal == ".seh_pushframe" ||
4658            (Parser.isParsingMasm() && IDVal.equals_lower(".pushframe")))
4659     return parseDirectiveSEHPushFrame(DirectiveID.getLoc());
4660 
4661   return true;
4662 }
4663 
parseDirectiveArch()4664 bool X86AsmParser::parseDirectiveArch() {
4665   // Ignore .arch for now.
4666   getParser().parseStringToEndOfStatement();
4667   return false;
4668 }
4669 
4670 /// parseDirectiveNops
4671 ///  ::= .nops size[, control]
parseDirectiveNops(SMLoc L)4672 bool X86AsmParser::parseDirectiveNops(SMLoc L) {
4673   int64_t NumBytes = 0, Control = 0;
4674   SMLoc NumBytesLoc, ControlLoc;
4675   const MCSubtargetInfo STI = getSTI();
4676   NumBytesLoc = getTok().getLoc();
4677   if (getParser().checkForValidSection() ||
4678       getParser().parseAbsoluteExpression(NumBytes))
4679     return true;
4680 
4681   if (parseOptionalToken(AsmToken::Comma)) {
4682     ControlLoc = getTok().getLoc();
4683     if (getParser().parseAbsoluteExpression(Control))
4684       return true;
4685   }
4686   if (getParser().parseToken(AsmToken::EndOfStatement,
4687                              "unexpected token in '.nops' directive"))
4688     return true;
4689 
4690   if (NumBytes <= 0) {
4691     Error(NumBytesLoc, "'.nops' directive with non-positive size");
4692     return false;
4693   }
4694 
4695   if (Control < 0) {
4696     Error(ControlLoc, "'.nops' directive with negative NOP size");
4697     return false;
4698   }
4699 
4700   /// Emit nops
4701   getParser().getStreamer().emitNops(NumBytes, Control, L);
4702 
4703   return false;
4704 }
4705 
4706 /// parseDirectiveEven
4707 ///  ::= .even
parseDirectiveEven(SMLoc L)4708 bool X86AsmParser::parseDirectiveEven(SMLoc L) {
4709   if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive"))
4710     return false;
4711 
4712   const MCSection *Section = getStreamer().getCurrentSectionOnly();
4713   if (!Section) {
4714     getStreamer().InitSections(false);
4715     Section = getStreamer().getCurrentSectionOnly();
4716   }
4717   if (Section->UseCodeAlign())
4718     getStreamer().emitCodeAlignment(2, 0);
4719   else
4720     getStreamer().emitValueToAlignment(2, 0, 1, 0);
4721   return false;
4722 }
4723 
4724 /// ParseDirectiveCode
4725 ///  ::= .code16 | .code32 | .code64
ParseDirectiveCode(StringRef IDVal,SMLoc L)4726 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
4727   MCAsmParser &Parser = getParser();
4728   Code16GCC = false;
4729   if (IDVal == ".code16") {
4730     Parser.Lex();
4731     if (!is16BitMode()) {
4732       SwitchMode(X86::Mode16Bit);
4733       getParser().getStreamer().emitAssemblerFlag(MCAF_Code16);
4734     }
4735   } else if (IDVal == ".code16gcc") {
4736     // .code16gcc parses as if in 32-bit mode, but emits code in 16-bit mode.
4737     Parser.Lex();
4738     Code16GCC = true;
4739     if (!is16BitMode()) {
4740       SwitchMode(X86::Mode16Bit);
4741       getParser().getStreamer().emitAssemblerFlag(MCAF_Code16);
4742     }
4743   } else if (IDVal == ".code32") {
4744     Parser.Lex();
4745     if (!is32BitMode()) {
4746       SwitchMode(X86::Mode32Bit);
4747       getParser().getStreamer().emitAssemblerFlag(MCAF_Code32);
4748     }
4749   } else if (IDVal == ".code64") {
4750     Parser.Lex();
4751     if (!is64BitMode()) {
4752       SwitchMode(X86::Mode64Bit);
4753       getParser().getStreamer().emitAssemblerFlag(MCAF_Code64);
4754     }
4755   } else {
4756     Error(L, "unknown directive " + IDVal);
4757     return false;
4758   }
4759 
4760   return false;
4761 }
4762 
4763 // .cv_fpo_proc foo
parseDirectiveFPOProc(SMLoc L)4764 bool X86AsmParser::parseDirectiveFPOProc(SMLoc L) {
4765   MCAsmParser &Parser = getParser();
4766   StringRef ProcName;
4767   int64_t ParamsSize;
4768   if (Parser.parseIdentifier(ProcName))
4769     return Parser.TokError("expected symbol name");
4770   if (Parser.parseIntToken(ParamsSize, "expected parameter byte count"))
4771     return true;
4772   if (!isUIntN(32, ParamsSize))
4773     return Parser.TokError("parameters size out of range");
4774   if (Parser.parseEOL("unexpected tokens"))
4775     return addErrorSuffix(" in '.cv_fpo_proc' directive");
4776   MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName);
4777   return getTargetStreamer().emitFPOProc(ProcSym, ParamsSize, L);
4778 }
4779 
4780 // .cv_fpo_setframe ebp
parseDirectiveFPOSetFrame(SMLoc L)4781 bool X86AsmParser::parseDirectiveFPOSetFrame(SMLoc L) {
4782   MCAsmParser &Parser = getParser();
4783   unsigned Reg;
4784   SMLoc DummyLoc;
4785   if (ParseRegister(Reg, DummyLoc, DummyLoc) ||
4786       Parser.parseEOL("unexpected tokens"))
4787     return addErrorSuffix(" in '.cv_fpo_setframe' directive");
4788   return getTargetStreamer().emitFPOSetFrame(Reg, L);
4789 }
4790 
4791 // .cv_fpo_pushreg ebx
parseDirectiveFPOPushReg(SMLoc L)4792 bool X86AsmParser::parseDirectiveFPOPushReg(SMLoc L) {
4793   MCAsmParser &Parser = getParser();
4794   unsigned Reg;
4795   SMLoc DummyLoc;
4796   if (ParseRegister(Reg, DummyLoc, DummyLoc) ||
4797       Parser.parseEOL("unexpected tokens"))
4798     return addErrorSuffix(" in '.cv_fpo_pushreg' directive");
4799   return getTargetStreamer().emitFPOPushReg(Reg, L);
4800 }
4801 
4802 // .cv_fpo_stackalloc 20
parseDirectiveFPOStackAlloc(SMLoc L)4803 bool X86AsmParser::parseDirectiveFPOStackAlloc(SMLoc L) {
4804   MCAsmParser &Parser = getParser();
4805   int64_t Offset;
4806   if (Parser.parseIntToken(Offset, "expected offset") ||
4807       Parser.parseEOL("unexpected tokens"))
4808     return addErrorSuffix(" in '.cv_fpo_stackalloc' directive");
4809   return getTargetStreamer().emitFPOStackAlloc(Offset, L);
4810 }
4811 
4812 // .cv_fpo_stackalign 8
parseDirectiveFPOStackAlign(SMLoc L)4813 bool X86AsmParser::parseDirectiveFPOStackAlign(SMLoc L) {
4814   MCAsmParser &Parser = getParser();
4815   int64_t Offset;
4816   if (Parser.parseIntToken(Offset, "expected offset") ||
4817       Parser.parseEOL("unexpected tokens"))
4818     return addErrorSuffix(" in '.cv_fpo_stackalign' directive");
4819   return getTargetStreamer().emitFPOStackAlign(Offset, L);
4820 }
4821 
4822 // .cv_fpo_endprologue
parseDirectiveFPOEndPrologue(SMLoc L)4823 bool X86AsmParser::parseDirectiveFPOEndPrologue(SMLoc L) {
4824   MCAsmParser &Parser = getParser();
4825   if (Parser.parseEOL("unexpected tokens"))
4826     return addErrorSuffix(" in '.cv_fpo_endprologue' directive");
4827   return getTargetStreamer().emitFPOEndPrologue(L);
4828 }
4829 
4830 // .cv_fpo_endproc
parseDirectiveFPOEndProc(SMLoc L)4831 bool X86AsmParser::parseDirectiveFPOEndProc(SMLoc L) {
4832   MCAsmParser &Parser = getParser();
4833   if (Parser.parseEOL("unexpected tokens"))
4834     return addErrorSuffix(" in '.cv_fpo_endproc' directive");
4835   return getTargetStreamer().emitFPOEndProc(L);
4836 }
4837 
parseSEHRegisterNumber(unsigned RegClassID,unsigned & RegNo)4838 bool X86AsmParser::parseSEHRegisterNumber(unsigned RegClassID,
4839                                           unsigned &RegNo) {
4840   SMLoc startLoc = getLexer().getLoc();
4841   const MCRegisterInfo *MRI = getContext().getRegisterInfo();
4842 
4843   // Try parsing the argument as a register first.
4844   if (getLexer().getTok().isNot(AsmToken::Integer)) {
4845     SMLoc endLoc;
4846     if (ParseRegister(RegNo, startLoc, endLoc))
4847       return true;
4848 
4849     if (!X86MCRegisterClasses[RegClassID].contains(RegNo)) {
4850       return Error(startLoc,
4851                    "register is not supported for use with this directive");
4852     }
4853   } else {
4854     // Otherwise, an integer number matching the encoding of the desired
4855     // register may appear.
4856     int64_t EncodedReg;
4857     if (getParser().parseAbsoluteExpression(EncodedReg))
4858       return true;
4859 
4860     // The SEH register number is the same as the encoding register number. Map
4861     // from the encoding back to the LLVM register number.
4862     RegNo = 0;
4863     for (MCPhysReg Reg : X86MCRegisterClasses[RegClassID]) {
4864       if (MRI->getEncodingValue(Reg) == EncodedReg) {
4865         RegNo = Reg;
4866         break;
4867       }
4868     }
4869     if (RegNo == 0) {
4870       return Error(startLoc,
4871                    "incorrect register number for use with this directive");
4872     }
4873   }
4874 
4875   return false;
4876 }
4877 
parseDirectiveSEHPushReg(SMLoc Loc)4878 bool X86AsmParser::parseDirectiveSEHPushReg(SMLoc Loc) {
4879   unsigned Reg = 0;
4880   if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg))
4881     return true;
4882 
4883   if (getLexer().isNot(AsmToken::EndOfStatement))
4884     return TokError("unexpected token in directive");
4885 
4886   getParser().Lex();
4887   getStreamer().EmitWinCFIPushReg(Reg, Loc);
4888   return false;
4889 }
4890 
parseDirectiveSEHSetFrame(SMLoc Loc)4891 bool X86AsmParser::parseDirectiveSEHSetFrame(SMLoc Loc) {
4892   unsigned Reg = 0;
4893   int64_t Off;
4894   if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg))
4895     return true;
4896   if (getLexer().isNot(AsmToken::Comma))
4897     return TokError("you must specify a stack pointer offset");
4898 
4899   getParser().Lex();
4900   if (getParser().parseAbsoluteExpression(Off))
4901     return true;
4902 
4903   if (getLexer().isNot(AsmToken::EndOfStatement))
4904     return TokError("unexpected token in directive");
4905 
4906   getParser().Lex();
4907   getStreamer().EmitWinCFISetFrame(Reg, Off, Loc);
4908   return false;
4909 }
4910 
parseDirectiveSEHSaveReg(SMLoc Loc)4911 bool X86AsmParser::parseDirectiveSEHSaveReg(SMLoc Loc) {
4912   unsigned Reg = 0;
4913   int64_t Off;
4914   if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg))
4915     return true;
4916   if (getLexer().isNot(AsmToken::Comma))
4917     return TokError("you must specify an offset on the stack");
4918 
4919   getParser().Lex();
4920   if (getParser().parseAbsoluteExpression(Off))
4921     return true;
4922 
4923   if (getLexer().isNot(AsmToken::EndOfStatement))
4924     return TokError("unexpected token in directive");
4925 
4926   getParser().Lex();
4927   getStreamer().EmitWinCFISaveReg(Reg, Off, Loc);
4928   return false;
4929 }
4930 
parseDirectiveSEHSaveXMM(SMLoc Loc)4931 bool X86AsmParser::parseDirectiveSEHSaveXMM(SMLoc Loc) {
4932   unsigned Reg = 0;
4933   int64_t Off;
4934   if (parseSEHRegisterNumber(X86::VR128XRegClassID, Reg))
4935     return true;
4936   if (getLexer().isNot(AsmToken::Comma))
4937     return TokError("you must specify an offset on the stack");
4938 
4939   getParser().Lex();
4940   if (getParser().parseAbsoluteExpression(Off))
4941     return true;
4942 
4943   if (getLexer().isNot(AsmToken::EndOfStatement))
4944     return TokError("unexpected token in directive");
4945 
4946   getParser().Lex();
4947   getStreamer().EmitWinCFISaveXMM(Reg, Off, Loc);
4948   return false;
4949 }
4950 
parseDirectiveSEHPushFrame(SMLoc Loc)4951 bool X86AsmParser::parseDirectiveSEHPushFrame(SMLoc Loc) {
4952   bool Code = false;
4953   StringRef CodeID;
4954   if (getLexer().is(AsmToken::At)) {
4955     SMLoc startLoc = getLexer().getLoc();
4956     getParser().Lex();
4957     if (!getParser().parseIdentifier(CodeID)) {
4958       if (CodeID != "code")
4959         return Error(startLoc, "expected @code");
4960       Code = true;
4961     }
4962   }
4963 
4964   if (getLexer().isNot(AsmToken::EndOfStatement))
4965     return TokError("unexpected token in directive");
4966 
4967   getParser().Lex();
4968   getStreamer().EmitWinCFIPushFrame(Code, Loc);
4969   return false;
4970 }
4971 
4972 // Force static initialization.
LLVMInitializeX86AsmParser()4973 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86AsmParser() {
4974   RegisterMCAsmParser<X86AsmParser> X(getTheX86_32Target());
4975   RegisterMCAsmParser<X86AsmParser> Y(getTheX86_64Target());
4976 }
4977 
4978 #define GET_REGISTER_MATCHER
4979 #define GET_MATCHER_IMPLEMENTATION
4980 #define GET_SUBTARGET_FEATURE_NAME
4981 #include "X86GenAsmMatcher.inc"
4982