1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "X86AsmInstrumentation.h"
12 #include "X86AsmParserCommon.h"
13 #include "X86Operand.h"
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SmallString.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/MC/MCContext.h"
21 #include "llvm/MC/MCExpr.h"
22 #include "llvm/MC/MCInst.h"
23 #include "llvm/MC/MCInstrInfo.h"
24 #include "llvm/MC/MCParser/MCAsmLexer.h"
25 #include "llvm/MC/MCParser/MCAsmParser.h"
26 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
27 #include "llvm/MC/MCRegisterInfo.h"
28 #include "llvm/MC/MCStreamer.h"
29 #include "llvm/MC/MCSubtargetInfo.h"
30 #include "llvm/MC/MCSymbol.h"
31 #include "llvm/MC/MCTargetAsmParser.h"
32 #include "llvm/Support/SourceMgr.h"
33 #include "llvm/Support/TargetRegistry.h"
34 #include "llvm/Support/raw_ostream.h"
35 #include <memory>
36
37 using namespace llvm;
38
39 namespace {
40
41 static const char OpPrecedence[] = {
42 0, // IC_OR
43 1, // IC_AND
44 2, // IC_LSHIFT
45 2, // IC_RSHIFT
46 3, // IC_PLUS
47 3, // IC_MINUS
48 4, // IC_MULTIPLY
49 4, // IC_DIVIDE
50 5, // IC_RPAREN
51 6, // IC_LPAREN
52 0, // IC_IMM
53 0 // IC_REGISTER
54 };
55
56 class X86AsmParser : public MCTargetAsmParser {
57 MCSubtargetInfo &STI;
58 MCAsmParser &Parser;
59 const MCInstrInfo &MII;
60 ParseInstructionInfo *InstInfo;
61 std::unique_ptr<X86AsmInstrumentation> Instrumentation;
62 private:
consumeToken()63 SMLoc consumeToken() {
64 SMLoc Result = Parser.getTok().getLoc();
65 Parser.Lex();
66 return Result;
67 }
68
69 enum InfixCalculatorTok {
70 IC_OR = 0,
71 IC_AND,
72 IC_LSHIFT,
73 IC_RSHIFT,
74 IC_PLUS,
75 IC_MINUS,
76 IC_MULTIPLY,
77 IC_DIVIDE,
78 IC_RPAREN,
79 IC_LPAREN,
80 IC_IMM,
81 IC_REGISTER
82 };
83
84 class InfixCalculator {
85 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
86 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
87 SmallVector<ICToken, 4> PostfixStack;
88
89 public:
popOperand()90 int64_t popOperand() {
91 assert (!PostfixStack.empty() && "Poped an empty stack!");
92 ICToken Op = PostfixStack.pop_back_val();
93 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
94 && "Expected and immediate or register!");
95 return Op.second;
96 }
pushOperand(InfixCalculatorTok Op,int64_t Val=0)97 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
98 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
99 "Unexpected operand!");
100 PostfixStack.push_back(std::make_pair(Op, Val));
101 }
102
popOperator()103 void popOperator() { InfixOperatorStack.pop_back(); }
pushOperator(InfixCalculatorTok Op)104 void pushOperator(InfixCalculatorTok Op) {
105 // Push the new operator if the stack is empty.
106 if (InfixOperatorStack.empty()) {
107 InfixOperatorStack.push_back(Op);
108 return;
109 }
110
111 // Push the new operator if it has a higher precedence than the operator
112 // on the top of the stack or the operator on the top of the stack is a
113 // left parentheses.
114 unsigned Idx = InfixOperatorStack.size() - 1;
115 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
116 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
117 InfixOperatorStack.push_back(Op);
118 return;
119 }
120
121 // The operator on the top of the stack has higher precedence than the
122 // new operator.
123 unsigned ParenCount = 0;
124 while (1) {
125 // Nothing to process.
126 if (InfixOperatorStack.empty())
127 break;
128
129 Idx = InfixOperatorStack.size() - 1;
130 StackOp = InfixOperatorStack[Idx];
131 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
132 break;
133
134 // If we have an even parentheses count and we see a left parentheses,
135 // then stop processing.
136 if (!ParenCount && StackOp == IC_LPAREN)
137 break;
138
139 if (StackOp == IC_RPAREN) {
140 ++ParenCount;
141 InfixOperatorStack.pop_back();
142 } else if (StackOp == IC_LPAREN) {
143 --ParenCount;
144 InfixOperatorStack.pop_back();
145 } else {
146 InfixOperatorStack.pop_back();
147 PostfixStack.push_back(std::make_pair(StackOp, 0));
148 }
149 }
150 // Push the new operator.
151 InfixOperatorStack.push_back(Op);
152 }
execute()153 int64_t execute() {
154 // Push any remaining operators onto the postfix stack.
155 while (!InfixOperatorStack.empty()) {
156 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
157 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
158 PostfixStack.push_back(std::make_pair(StackOp, 0));
159 }
160
161 if (PostfixStack.empty())
162 return 0;
163
164 SmallVector<ICToken, 16> OperandStack;
165 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
166 ICToken Op = PostfixStack[i];
167 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
168 OperandStack.push_back(Op);
169 } else {
170 assert (OperandStack.size() > 1 && "Too few operands.");
171 int64_t Val;
172 ICToken Op2 = OperandStack.pop_back_val();
173 ICToken Op1 = OperandStack.pop_back_val();
174 switch (Op.first) {
175 default:
176 report_fatal_error("Unexpected operator!");
177 break;
178 case IC_PLUS:
179 Val = Op1.second + Op2.second;
180 OperandStack.push_back(std::make_pair(IC_IMM, Val));
181 break;
182 case IC_MINUS:
183 Val = Op1.second - Op2.second;
184 OperandStack.push_back(std::make_pair(IC_IMM, Val));
185 break;
186 case IC_MULTIPLY:
187 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
188 "Multiply operation with an immediate and a register!");
189 Val = Op1.second * Op2.second;
190 OperandStack.push_back(std::make_pair(IC_IMM, Val));
191 break;
192 case IC_DIVIDE:
193 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
194 "Divide operation with an immediate and a register!");
195 assert (Op2.second != 0 && "Division by zero!");
196 Val = Op1.second / Op2.second;
197 OperandStack.push_back(std::make_pair(IC_IMM, Val));
198 break;
199 case IC_OR:
200 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
201 "Or operation with an immediate and a register!");
202 Val = Op1.second | Op2.second;
203 OperandStack.push_back(std::make_pair(IC_IMM, Val));
204 break;
205 case IC_AND:
206 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
207 "And operation with an immediate and a register!");
208 Val = Op1.second & Op2.second;
209 OperandStack.push_back(std::make_pair(IC_IMM, Val));
210 break;
211 case IC_LSHIFT:
212 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
213 "Left shift operation with an immediate and a register!");
214 Val = Op1.second << Op2.second;
215 OperandStack.push_back(std::make_pair(IC_IMM, Val));
216 break;
217 case IC_RSHIFT:
218 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
219 "Right shift operation with an immediate and a register!");
220 Val = Op1.second >> Op2.second;
221 OperandStack.push_back(std::make_pair(IC_IMM, Val));
222 break;
223 }
224 }
225 }
226 assert (OperandStack.size() == 1 && "Expected a single result.");
227 return OperandStack.pop_back_val().second;
228 }
229 };
230
231 enum IntelExprState {
232 IES_OR,
233 IES_AND,
234 IES_LSHIFT,
235 IES_RSHIFT,
236 IES_PLUS,
237 IES_MINUS,
238 IES_NOT,
239 IES_MULTIPLY,
240 IES_DIVIDE,
241 IES_LBRAC,
242 IES_RBRAC,
243 IES_LPAREN,
244 IES_RPAREN,
245 IES_REGISTER,
246 IES_INTEGER,
247 IES_IDENTIFIER,
248 IES_ERROR
249 };
250
251 class IntelExprStateMachine {
252 IntelExprState State, PrevState;
253 unsigned BaseReg, IndexReg, TmpReg, Scale;
254 int64_t Imm;
255 const MCExpr *Sym;
256 StringRef SymName;
257 bool StopOnLBrac, AddImmPrefix;
258 InfixCalculator IC;
259 InlineAsmIdentifierInfo Info;
260 public:
IntelExprStateMachine(int64_t imm,bool stoponlbrac,bool addimmprefix)261 IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
262 State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
263 Scale(1), Imm(imm), Sym(nullptr), StopOnLBrac(stoponlbrac),
264 AddImmPrefix(addimmprefix) { Info.clear(); }
265
getBaseReg()266 unsigned getBaseReg() { return BaseReg; }
getIndexReg()267 unsigned getIndexReg() { return IndexReg; }
getScale()268 unsigned getScale() { return Scale; }
getSym()269 const MCExpr *getSym() { return Sym; }
getSymName()270 StringRef getSymName() { return SymName; }
getImm()271 int64_t getImm() { return Imm + IC.execute(); }
isValidEndState()272 bool isValidEndState() {
273 return State == IES_RBRAC || State == IES_INTEGER;
274 }
getStopOnLBrac()275 bool getStopOnLBrac() { return StopOnLBrac; }
getAddImmPrefix()276 bool getAddImmPrefix() { return AddImmPrefix; }
hadError()277 bool hadError() { return State == IES_ERROR; }
278
getIdentifierInfo()279 InlineAsmIdentifierInfo &getIdentifierInfo() {
280 return Info;
281 }
282
onOr()283 void onOr() {
284 IntelExprState CurrState = State;
285 switch (State) {
286 default:
287 State = IES_ERROR;
288 break;
289 case IES_INTEGER:
290 case IES_RPAREN:
291 case IES_REGISTER:
292 State = IES_OR;
293 IC.pushOperator(IC_OR);
294 break;
295 }
296 PrevState = CurrState;
297 }
onAnd()298 void onAnd() {
299 IntelExprState CurrState = State;
300 switch (State) {
301 default:
302 State = IES_ERROR;
303 break;
304 case IES_INTEGER:
305 case IES_RPAREN:
306 case IES_REGISTER:
307 State = IES_AND;
308 IC.pushOperator(IC_AND);
309 break;
310 }
311 PrevState = CurrState;
312 }
onLShift()313 void onLShift() {
314 IntelExprState CurrState = State;
315 switch (State) {
316 default:
317 State = IES_ERROR;
318 break;
319 case IES_INTEGER:
320 case IES_RPAREN:
321 case IES_REGISTER:
322 State = IES_LSHIFT;
323 IC.pushOperator(IC_LSHIFT);
324 break;
325 }
326 PrevState = CurrState;
327 }
onRShift()328 void onRShift() {
329 IntelExprState CurrState = State;
330 switch (State) {
331 default:
332 State = IES_ERROR;
333 break;
334 case IES_INTEGER:
335 case IES_RPAREN:
336 case IES_REGISTER:
337 State = IES_RSHIFT;
338 IC.pushOperator(IC_RSHIFT);
339 break;
340 }
341 PrevState = CurrState;
342 }
onPlus()343 void onPlus() {
344 IntelExprState CurrState = State;
345 switch (State) {
346 default:
347 State = IES_ERROR;
348 break;
349 case IES_INTEGER:
350 case IES_RPAREN:
351 case IES_REGISTER:
352 State = IES_PLUS;
353 IC.pushOperator(IC_PLUS);
354 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
355 // If we already have a BaseReg, then assume this is the IndexReg with
356 // a scale of 1.
357 if (!BaseReg) {
358 BaseReg = TmpReg;
359 } else {
360 assert (!IndexReg && "BaseReg/IndexReg already set!");
361 IndexReg = TmpReg;
362 Scale = 1;
363 }
364 }
365 break;
366 }
367 PrevState = CurrState;
368 }
onMinus()369 void onMinus() {
370 IntelExprState CurrState = State;
371 switch (State) {
372 default:
373 State = IES_ERROR;
374 break;
375 case IES_PLUS:
376 case IES_NOT:
377 case IES_MULTIPLY:
378 case IES_DIVIDE:
379 case IES_LPAREN:
380 case IES_RPAREN:
381 case IES_LBRAC:
382 case IES_RBRAC:
383 case IES_INTEGER:
384 case IES_REGISTER:
385 State = IES_MINUS;
386 // Only push the minus operator if it is not a unary operator.
387 if (!(CurrState == IES_PLUS || CurrState == IES_MINUS ||
388 CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE ||
389 CurrState == IES_LPAREN || CurrState == IES_LBRAC))
390 IC.pushOperator(IC_MINUS);
391 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
392 // If we already have a BaseReg, then assume this is the IndexReg with
393 // a scale of 1.
394 if (!BaseReg) {
395 BaseReg = TmpReg;
396 } else {
397 assert (!IndexReg && "BaseReg/IndexReg already set!");
398 IndexReg = TmpReg;
399 Scale = 1;
400 }
401 }
402 break;
403 }
404 PrevState = CurrState;
405 }
onNot()406 void onNot() {
407 IntelExprState CurrState = State;
408 switch (State) {
409 default:
410 State = IES_ERROR;
411 break;
412 case IES_PLUS:
413 case IES_NOT:
414 State = IES_NOT;
415 break;
416 }
417 PrevState = CurrState;
418 }
onRegister(unsigned Reg)419 void onRegister(unsigned Reg) {
420 IntelExprState CurrState = State;
421 switch (State) {
422 default:
423 State = IES_ERROR;
424 break;
425 case IES_PLUS:
426 case IES_LPAREN:
427 State = IES_REGISTER;
428 TmpReg = Reg;
429 IC.pushOperand(IC_REGISTER);
430 break;
431 case IES_MULTIPLY:
432 // Index Register - Scale * Register
433 if (PrevState == IES_INTEGER) {
434 assert (!IndexReg && "IndexReg already set!");
435 State = IES_REGISTER;
436 IndexReg = Reg;
437 // Get the scale and replace the 'Scale * Register' with '0'.
438 Scale = IC.popOperand();
439 IC.pushOperand(IC_IMM);
440 IC.popOperator();
441 } else {
442 State = IES_ERROR;
443 }
444 break;
445 }
446 PrevState = CurrState;
447 }
onIdentifierExpr(const MCExpr * SymRef,StringRef SymRefName)448 void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) {
449 PrevState = State;
450 switch (State) {
451 default:
452 State = IES_ERROR;
453 break;
454 case IES_PLUS:
455 case IES_MINUS:
456 case IES_NOT:
457 State = IES_INTEGER;
458 Sym = SymRef;
459 SymName = SymRefName;
460 IC.pushOperand(IC_IMM);
461 break;
462 }
463 }
onInteger(int64_t TmpInt,StringRef & ErrMsg)464 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
465 IntelExprState CurrState = State;
466 switch (State) {
467 default:
468 State = IES_ERROR;
469 break;
470 case IES_PLUS:
471 case IES_MINUS:
472 case IES_NOT:
473 case IES_OR:
474 case IES_AND:
475 case IES_LSHIFT:
476 case IES_RSHIFT:
477 case IES_DIVIDE:
478 case IES_MULTIPLY:
479 case IES_LPAREN:
480 State = IES_INTEGER;
481 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
482 // Index Register - Register * Scale
483 assert (!IndexReg && "IndexReg already set!");
484 IndexReg = TmpReg;
485 Scale = TmpInt;
486 if(Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
487 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
488 return true;
489 }
490 // Get the scale and replace the 'Register * Scale' with '0'.
491 IC.popOperator();
492 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
493 PrevState == IES_OR || PrevState == IES_AND ||
494 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
495 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
496 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
497 PrevState == IES_NOT) &&
498 CurrState == IES_MINUS) {
499 // Unary minus. No need to pop the minus operand because it was never
500 // pushed.
501 IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
502 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
503 PrevState == IES_OR || PrevState == IES_AND ||
504 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
505 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
506 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
507 PrevState == IES_NOT) &&
508 CurrState == IES_NOT) {
509 // Unary not. No need to pop the not operand because it was never
510 // pushed.
511 IC.pushOperand(IC_IMM, ~TmpInt); // Push ~Imm.
512 } else {
513 IC.pushOperand(IC_IMM, TmpInt);
514 }
515 break;
516 }
517 PrevState = CurrState;
518 return false;
519 }
onStar()520 void onStar() {
521 PrevState = State;
522 switch (State) {
523 default:
524 State = IES_ERROR;
525 break;
526 case IES_INTEGER:
527 case IES_REGISTER:
528 case IES_RPAREN:
529 State = IES_MULTIPLY;
530 IC.pushOperator(IC_MULTIPLY);
531 break;
532 }
533 }
onDivide()534 void onDivide() {
535 PrevState = State;
536 switch (State) {
537 default:
538 State = IES_ERROR;
539 break;
540 case IES_INTEGER:
541 case IES_RPAREN:
542 State = IES_DIVIDE;
543 IC.pushOperator(IC_DIVIDE);
544 break;
545 }
546 }
onLBrac()547 void onLBrac() {
548 PrevState = State;
549 switch (State) {
550 default:
551 State = IES_ERROR;
552 break;
553 case IES_RBRAC:
554 State = IES_PLUS;
555 IC.pushOperator(IC_PLUS);
556 break;
557 }
558 }
onRBrac()559 void onRBrac() {
560 IntelExprState CurrState = State;
561 switch (State) {
562 default:
563 State = IES_ERROR;
564 break;
565 case IES_INTEGER:
566 case IES_REGISTER:
567 case IES_RPAREN:
568 State = IES_RBRAC;
569 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
570 // If we already have a BaseReg, then assume this is the IndexReg with
571 // a scale of 1.
572 if (!BaseReg) {
573 BaseReg = TmpReg;
574 } else {
575 assert (!IndexReg && "BaseReg/IndexReg already set!");
576 IndexReg = TmpReg;
577 Scale = 1;
578 }
579 }
580 break;
581 }
582 PrevState = CurrState;
583 }
onLParen()584 void onLParen() {
585 IntelExprState CurrState = State;
586 switch (State) {
587 default:
588 State = IES_ERROR;
589 break;
590 case IES_PLUS:
591 case IES_MINUS:
592 case IES_NOT:
593 case IES_OR:
594 case IES_AND:
595 case IES_LSHIFT:
596 case IES_RSHIFT:
597 case IES_MULTIPLY:
598 case IES_DIVIDE:
599 case IES_LPAREN:
600 // FIXME: We don't handle this type of unary minus or not, yet.
601 if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
602 PrevState == IES_OR || PrevState == IES_AND ||
603 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
604 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
605 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
606 PrevState == IES_NOT) &&
607 (CurrState == IES_MINUS || CurrState == IES_NOT)) {
608 State = IES_ERROR;
609 break;
610 }
611 State = IES_LPAREN;
612 IC.pushOperator(IC_LPAREN);
613 break;
614 }
615 PrevState = CurrState;
616 }
onRParen()617 void onRParen() {
618 PrevState = State;
619 switch (State) {
620 default:
621 State = IES_ERROR;
622 break;
623 case IES_INTEGER:
624 case IES_REGISTER:
625 case IES_RPAREN:
626 State = IES_RPAREN;
627 IC.pushOperator(IC_RPAREN);
628 break;
629 }
630 }
631 };
632
getParser() const633 MCAsmParser &getParser() const { return Parser; }
634
getLexer() const635 MCAsmLexer &getLexer() const { return Parser.getLexer(); }
636
Error(SMLoc L,const Twine & Msg,ArrayRef<SMRange> Ranges=None,bool MatchingInlineAsm=false)637 bool Error(SMLoc L, const Twine &Msg,
638 ArrayRef<SMRange> Ranges = None,
639 bool MatchingInlineAsm = false) {
640 if (MatchingInlineAsm) return true;
641 return Parser.Error(L, Msg, Ranges);
642 }
643
ErrorAndEatStatement(SMLoc L,const Twine & Msg,ArrayRef<SMRange> Ranges=None,bool MatchingInlineAsm=false)644 bool ErrorAndEatStatement(SMLoc L, const Twine &Msg,
645 ArrayRef<SMRange> Ranges = None,
646 bool MatchingInlineAsm = false) {
647 Parser.eatToEndOfStatement();
648 return Error(L, Msg, Ranges, MatchingInlineAsm);
649 }
650
ErrorOperand(SMLoc Loc,StringRef Msg)651 std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg) {
652 Error(Loc, Msg);
653 return nullptr;
654 }
655
656 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
657 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
658 std::unique_ptr<X86Operand> ParseOperand();
659 std::unique_ptr<X86Operand> ParseATTOperand();
660 std::unique_ptr<X86Operand> ParseIntelOperand();
661 std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator();
662 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
663 std::unique_ptr<X86Operand> ParseIntelOperator(unsigned OpKind);
664 std::unique_ptr<X86Operand>
665 ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
666 std::unique_ptr<X86Operand>
667 ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc, unsigned Size);
668 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
669 std::unique_ptr<X86Operand> ParseIntelBracExpression(unsigned SegReg,
670 SMLoc Start,
671 int64_t ImmDisp,
672 unsigned Size);
673 bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
674 InlineAsmIdentifierInfo &Info,
675 bool IsUnevaluatedOperand, SMLoc &End);
676
677 std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
678
679 std::unique_ptr<X86Operand>
680 CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
681 unsigned IndexReg, unsigned Scale, SMLoc Start,
682 SMLoc End, unsigned Size, StringRef Identifier,
683 InlineAsmIdentifierInfo &Info);
684
685 bool ParseDirectiveWord(unsigned Size, SMLoc L);
686 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
687
688 bool processInstruction(MCInst &Inst, const OperandVector &Ops);
689
690 /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
691 /// instrumentation around Inst.
692 void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
693
694 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
695 OperandVector &Operands, MCStreamer &Out,
696 unsigned &ErrorInfo,
697 bool MatchingInlineAsm) override;
698
699 /// doSrcDstMatch - Returns true if operands are matching in their
700 /// word size (%si and %di, %esi and %edi, etc.). Order depends on
701 /// the parsing mode (Intel vs. AT&T).
702 bool doSrcDstMatch(X86Operand &Op1, X86Operand &Op2);
703
704 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
705 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
706 /// \return \c true if no parsing errors occurred, \c false otherwise.
707 bool HandleAVX512Operand(OperandVector &Operands,
708 const MCParsedAsmOperand &Op);
709
is64BitMode() const710 bool is64BitMode() const {
711 // FIXME: Can tablegen auto-generate this?
712 return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
713 }
is32BitMode() const714 bool is32BitMode() const {
715 // FIXME: Can tablegen auto-generate this?
716 return (STI.getFeatureBits() & X86::Mode32Bit) != 0;
717 }
is16BitMode() const718 bool is16BitMode() const {
719 // FIXME: Can tablegen auto-generate this?
720 return (STI.getFeatureBits() & X86::Mode16Bit) != 0;
721 }
SwitchMode(uint64_t mode)722 void SwitchMode(uint64_t mode) {
723 uint64_t oldMode = STI.getFeatureBits() &
724 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit);
725 unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(oldMode | mode));
726 setAvailableFeatures(FB);
727 assert(mode == (STI.getFeatureBits() &
728 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit)));
729 }
730
isParsingIntelSyntax()731 bool isParsingIntelSyntax() {
732 return getParser().getAssemblerDialect();
733 }
734
735 /// @name Auto-generated Matcher Functions
736 /// {
737
738 #define GET_ASSEMBLER_HEADER
739 #include "X86GenAsmMatcher.inc"
740
741 /// }
742
743 public:
X86AsmParser(MCSubtargetInfo & sti,MCAsmParser & parser,const MCInstrInfo & mii,const MCTargetOptions & Options)744 X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
745 const MCInstrInfo &mii,
746 const MCTargetOptions &Options)
747 : MCTargetAsmParser(), STI(sti), Parser(parser), MII(mii),
748 InstInfo(nullptr) {
749
750 // Initialize the set of available features.
751 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
752 Instrumentation.reset(
753 CreateX86AsmInstrumentation(Options, Parser.getContext(), STI));
754 }
755
756 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
757
758 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
759 SMLoc NameLoc, OperandVector &Operands) override;
760
761 bool ParseDirective(AsmToken DirectiveID) override;
762 };
763 } // end anonymous namespace
764
765 /// @name Auto-generated Match Functions
766 /// {
767
768 static unsigned MatchRegisterName(StringRef Name);
769
770 /// }
771
CheckBaseRegAndIndexReg(unsigned BaseReg,unsigned IndexReg,StringRef & ErrMsg)772 static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg,
773 StringRef &ErrMsg) {
774 // If we have both a base register and an index register make sure they are
775 // both 64-bit or 32-bit registers.
776 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
777 if (BaseReg != 0 && IndexReg != 0) {
778 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
779 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
780 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
781 IndexReg != X86::RIZ) {
782 ErrMsg = "base register is 64-bit, but index register is not";
783 return true;
784 }
785 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
786 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
787 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
788 IndexReg != X86::EIZ){
789 ErrMsg = "base register is 32-bit, but index register is not";
790 return true;
791 }
792 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
793 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
794 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
795 ErrMsg = "base register is 16-bit, but index register is not";
796 return true;
797 }
798 if (((BaseReg == X86::BX || BaseReg == X86::BP) &&
799 IndexReg != X86::SI && IndexReg != X86::DI) ||
800 ((BaseReg == X86::SI || BaseReg == X86::DI) &&
801 IndexReg != X86::BX && IndexReg != X86::BP)) {
802 ErrMsg = "invalid 16-bit base/index register combination";
803 return true;
804 }
805 }
806 }
807 return false;
808 }
809
doSrcDstMatch(X86Operand & Op1,X86Operand & Op2)810 bool X86AsmParser::doSrcDstMatch(X86Operand &Op1, X86Operand &Op2)
811 {
812 // Return true and let a normal complaint about bogus operands happen.
813 if (!Op1.isMem() || !Op2.isMem())
814 return true;
815
816 // Actually these might be the other way round if Intel syntax is
817 // being used. It doesn't matter.
818 unsigned diReg = Op1.Mem.BaseReg;
819 unsigned siReg = Op2.Mem.BaseReg;
820
821 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(siReg))
822 return X86MCRegisterClasses[X86::GR16RegClassID].contains(diReg);
823 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(siReg))
824 return X86MCRegisterClasses[X86::GR32RegClassID].contains(diReg);
825 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(siReg))
826 return X86MCRegisterClasses[X86::GR64RegClassID].contains(diReg);
827 // Again, return true and let another error happen.
828 return true;
829 }
830
ParseRegister(unsigned & RegNo,SMLoc & StartLoc,SMLoc & EndLoc)831 bool X86AsmParser::ParseRegister(unsigned &RegNo,
832 SMLoc &StartLoc, SMLoc &EndLoc) {
833 RegNo = 0;
834 const AsmToken &PercentTok = Parser.getTok();
835 StartLoc = PercentTok.getLoc();
836
837 // If we encounter a %, ignore it. This code handles registers with and
838 // without the prefix, unprefixed registers can occur in cfi directives.
839 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
840 Parser.Lex(); // Eat percent token.
841
842 const AsmToken &Tok = Parser.getTok();
843 EndLoc = Tok.getEndLoc();
844
845 if (Tok.isNot(AsmToken::Identifier)) {
846 if (isParsingIntelSyntax()) return true;
847 return Error(StartLoc, "invalid register name",
848 SMRange(StartLoc, EndLoc));
849 }
850
851 RegNo = MatchRegisterName(Tok.getString());
852
853 // If the match failed, try the register name as lowercase.
854 if (RegNo == 0)
855 RegNo = MatchRegisterName(Tok.getString().lower());
856
857 if (!is64BitMode()) {
858 // FIXME: This should be done using Requires<Not64BitMode> and
859 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
860 // checked.
861 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
862 // REX prefix.
863 if (RegNo == X86::RIZ ||
864 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
865 X86II::isX86_64NonExtLowByteReg(RegNo) ||
866 X86II::isX86_64ExtendedReg(RegNo))
867 return Error(StartLoc, "register %"
868 + Tok.getString() + " is only available in 64-bit mode",
869 SMRange(StartLoc, EndLoc));
870 }
871
872 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
873 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
874 RegNo = X86::ST0;
875 Parser.Lex(); // Eat 'st'
876
877 // Check to see if we have '(4)' after %st.
878 if (getLexer().isNot(AsmToken::LParen))
879 return false;
880 // Lex the paren.
881 getParser().Lex();
882
883 const AsmToken &IntTok = Parser.getTok();
884 if (IntTok.isNot(AsmToken::Integer))
885 return Error(IntTok.getLoc(), "expected stack index");
886 switch (IntTok.getIntVal()) {
887 case 0: RegNo = X86::ST0; break;
888 case 1: RegNo = X86::ST1; break;
889 case 2: RegNo = X86::ST2; break;
890 case 3: RegNo = X86::ST3; break;
891 case 4: RegNo = X86::ST4; break;
892 case 5: RegNo = X86::ST5; break;
893 case 6: RegNo = X86::ST6; break;
894 case 7: RegNo = X86::ST7; break;
895 default: return Error(IntTok.getLoc(), "invalid stack index");
896 }
897
898 if (getParser().Lex().isNot(AsmToken::RParen))
899 return Error(Parser.getTok().getLoc(), "expected ')'");
900
901 EndLoc = Parser.getTok().getEndLoc();
902 Parser.Lex(); // Eat ')'
903 return false;
904 }
905
906 EndLoc = Parser.getTok().getEndLoc();
907
908 // If this is "db[0-7]", match it as an alias
909 // for dr[0-7].
910 if (RegNo == 0 && Tok.getString().size() == 3 &&
911 Tok.getString().startswith("db")) {
912 switch (Tok.getString()[2]) {
913 case '0': RegNo = X86::DR0; break;
914 case '1': RegNo = X86::DR1; break;
915 case '2': RegNo = X86::DR2; break;
916 case '3': RegNo = X86::DR3; break;
917 case '4': RegNo = X86::DR4; break;
918 case '5': RegNo = X86::DR5; break;
919 case '6': RegNo = X86::DR6; break;
920 case '7': RegNo = X86::DR7; break;
921 }
922
923 if (RegNo != 0) {
924 EndLoc = Parser.getTok().getEndLoc();
925 Parser.Lex(); // Eat it.
926 return false;
927 }
928 }
929
930 if (RegNo == 0) {
931 if (isParsingIntelSyntax()) return true;
932 return Error(StartLoc, "invalid register name",
933 SMRange(StartLoc, EndLoc));
934 }
935
936 Parser.Lex(); // Eat identifier token.
937 return false;
938 }
939
DefaultMemSIOperand(SMLoc Loc)940 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
941 unsigned basereg =
942 is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI);
943 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
944 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/basereg,
945 /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0);
946 }
947
DefaultMemDIOperand(SMLoc Loc)948 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
949 unsigned basereg =
950 is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI);
951 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
952 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/basereg,
953 /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0);
954 }
955
ParseOperand()956 std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() {
957 if (isParsingIntelSyntax())
958 return ParseIntelOperand();
959 return ParseATTOperand();
960 }
961
962 /// getIntelMemOperandSize - Return intel memory operand size.
getIntelMemOperandSize(StringRef OpStr)963 static unsigned getIntelMemOperandSize(StringRef OpStr) {
964 unsigned Size = StringSwitch<unsigned>(OpStr)
965 .Cases("BYTE", "byte", 8)
966 .Cases("WORD", "word", 16)
967 .Cases("DWORD", "dword", 32)
968 .Cases("QWORD", "qword", 64)
969 .Cases("XWORD", "xword", 80)
970 .Cases("XMMWORD", "xmmword", 128)
971 .Cases("YMMWORD", "ymmword", 256)
972 .Cases("ZMMWORD", "zmmword", 512)
973 .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter
974 .Default(0);
975 return Size;
976 }
977
CreateMemForInlineAsm(unsigned SegReg,const MCExpr * Disp,unsigned BaseReg,unsigned IndexReg,unsigned Scale,SMLoc Start,SMLoc End,unsigned Size,StringRef Identifier,InlineAsmIdentifierInfo & Info)978 std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
979 unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
980 unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
981 InlineAsmIdentifierInfo &Info) {
982 // If this is not a VarDecl then assume it is a FuncDecl or some other label
983 // reference. We need an 'r' constraint here, so we need to create register
984 // operand to ensure proper matching. Just pick a GPR based on the size of
985 // a pointer.
986 if (isa<MCSymbolRefExpr>(Disp) && !Info.IsVarDecl) {
987 unsigned RegNo =
988 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
989 return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true,
990 SMLoc(), Identifier, Info.OpDecl);
991 }
992
993 // We either have a direct symbol reference, or an offset from a symbol. The
994 // parser always puts the symbol on the LHS, so look there for size
995 // calculation purposes.
996 const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp);
997 bool IsSymRef =
998 isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp);
999 if (IsSymRef) {
1000 if (!Size) {
1001 Size = Info.Type * 8; // Size is in terms of bits in this context.
1002 if (Size)
1003 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
1004 /*Len=*/0, Size));
1005 }
1006 }
1007
1008 // When parsing inline assembly we set the base register to a non-zero value
1009 // if we don't know the actual value at this time. This is necessary to
1010 // get the matching correct in some cases.
1011 BaseReg = BaseReg ? BaseReg : 1;
1012 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1013 End, Size, Identifier, Info.OpDecl);
1014 }
1015
1016 static void
RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> * AsmRewrites,StringRef SymName,int64_t ImmDisp,int64_t FinalImmDisp,SMLoc & BracLoc,SMLoc & StartInBrac,SMLoc & End)1017 RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
1018 StringRef SymName, int64_t ImmDisp,
1019 int64_t FinalImmDisp, SMLoc &BracLoc,
1020 SMLoc &StartInBrac, SMLoc &End) {
1021 // Remove the '[' and ']' from the IR string.
1022 AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1));
1023 AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1));
1024
1025 // If ImmDisp is non-zero, then we parsed a displacement before the
1026 // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
1027 // If ImmDisp doesn't match the displacement computed by the state machine
1028 // then we have an additional displacement in the bracketed expression.
1029 if (ImmDisp != FinalImmDisp) {
1030 if (ImmDisp) {
1031 // We have an immediate displacement before the bracketed expression.
1032 // Adjust this to match the final immediate displacement.
1033 bool Found = false;
1034 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1035 E = AsmRewrites->end(); I != E; ++I) {
1036 if ((*I).Loc.getPointer() > BracLoc.getPointer())
1037 continue;
1038 if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) {
1039 assert (!Found && "ImmDisp already rewritten.");
1040 (*I).Kind = AOK_Imm;
1041 (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer();
1042 (*I).Val = FinalImmDisp;
1043 Found = true;
1044 break;
1045 }
1046 }
1047 assert (Found && "Unable to rewrite ImmDisp.");
1048 (void)Found;
1049 } else {
1050 // We have a symbolic and an immediate displacement, but no displacement
1051 // before the bracketed expression. Put the immediate displacement
1052 // before the bracketed expression.
1053 AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp));
1054 }
1055 }
1056 // Remove all the ImmPrefix rewrites within the brackets.
1057 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1058 E = AsmRewrites->end(); I != E; ++I) {
1059 if ((*I).Loc.getPointer() < StartInBrac.getPointer())
1060 continue;
1061 if ((*I).Kind == AOK_ImmPrefix)
1062 (*I).Kind = AOK_Delete;
1063 }
1064 const char *SymLocPtr = SymName.data();
1065 // Skip everything before the symbol.
1066 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
1067 assert(Len > 0 && "Expected a non-negative length.");
1068 AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
1069 }
1070 // Skip everything after the symbol.
1071 if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
1072 SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
1073 assert(Len > 0 && "Expected a non-negative length.");
1074 AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len));
1075 }
1076 }
1077
ParseIntelExpression(IntelExprStateMachine & SM,SMLoc & End)1078 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1079 const AsmToken &Tok = Parser.getTok();
1080
1081 bool Done = false;
1082 while (!Done) {
1083 bool UpdateLocLex = true;
1084
1085 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
1086 // identifier. Don't try an parse it as a register.
1087 if (Tok.getString().startswith("."))
1088 break;
1089
1090 // If we're parsing an immediate expression, we don't expect a '['.
1091 if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
1092 break;
1093
1094 AsmToken::TokenKind TK = getLexer().getKind();
1095 switch (TK) {
1096 default: {
1097 if (SM.isValidEndState()) {
1098 Done = true;
1099 break;
1100 }
1101 return Error(Tok.getLoc(), "unknown token in expression");
1102 }
1103 case AsmToken::EndOfStatement: {
1104 Done = true;
1105 break;
1106 }
1107 case AsmToken::String:
1108 case AsmToken::Identifier: {
1109 // This could be a register or a symbolic displacement.
1110 unsigned TmpReg;
1111 const MCExpr *Val;
1112 SMLoc IdentLoc = Tok.getLoc();
1113 StringRef Identifier = Tok.getString();
1114 if (TK != AsmToken::String && !ParseRegister(TmpReg, IdentLoc, End)) {
1115 SM.onRegister(TmpReg);
1116 UpdateLocLex = false;
1117 break;
1118 } else {
1119 if (!isParsingInlineAsm()) {
1120 if (getParser().parsePrimaryExpr(Val, End))
1121 return Error(Tok.getLoc(), "Unexpected identifier!");
1122 } else {
1123 // This is a dot operator, not an adjacent identifier.
1124 if (Identifier.find('.') != StringRef::npos) {
1125 return false;
1126 } else {
1127 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1128 if (ParseIntelIdentifier(Val, Identifier, Info,
1129 /*Unevaluated=*/false, End))
1130 return true;
1131 }
1132 }
1133 SM.onIdentifierExpr(Val, Identifier);
1134 UpdateLocLex = false;
1135 break;
1136 }
1137 return Error(Tok.getLoc(), "Unexpected identifier!");
1138 }
1139 case AsmToken::Integer: {
1140 StringRef ErrMsg;
1141 if (isParsingInlineAsm() && SM.getAddImmPrefix())
1142 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
1143 Tok.getLoc()));
1144 // Look for 'b' or 'f' following an Integer as a directional label
1145 SMLoc Loc = getTok().getLoc();
1146 int64_t IntVal = getTok().getIntVal();
1147 End = consumeToken();
1148 UpdateLocLex = false;
1149 if (getLexer().getKind() == AsmToken::Identifier) {
1150 StringRef IDVal = getTok().getString();
1151 if (IDVal == "f" || IDVal == "b") {
1152 MCSymbol *Sym =
1153 getContext().GetDirectionalLocalSymbol(IntVal, IDVal == "b");
1154 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1155 const MCExpr *Val =
1156 MCSymbolRefExpr::Create(Sym, Variant, getContext());
1157 if (IDVal == "b" && Sym->isUndefined())
1158 return Error(Loc, "invalid reference to undefined symbol");
1159 StringRef Identifier = Sym->getName();
1160 SM.onIdentifierExpr(Val, Identifier);
1161 End = consumeToken();
1162 } else {
1163 if (SM.onInteger(IntVal, ErrMsg))
1164 return Error(Loc, ErrMsg);
1165 }
1166 } else {
1167 if (SM.onInteger(IntVal, ErrMsg))
1168 return Error(Loc, ErrMsg);
1169 }
1170 break;
1171 }
1172 case AsmToken::Plus: SM.onPlus(); break;
1173 case AsmToken::Minus: SM.onMinus(); break;
1174 case AsmToken::Tilde: SM.onNot(); break;
1175 case AsmToken::Star: SM.onStar(); break;
1176 case AsmToken::Slash: SM.onDivide(); break;
1177 case AsmToken::Pipe: SM.onOr(); break;
1178 case AsmToken::Amp: SM.onAnd(); break;
1179 case AsmToken::LessLess:
1180 SM.onLShift(); break;
1181 case AsmToken::GreaterGreater:
1182 SM.onRShift(); break;
1183 case AsmToken::LBrac: SM.onLBrac(); break;
1184 case AsmToken::RBrac: SM.onRBrac(); break;
1185 case AsmToken::LParen: SM.onLParen(); break;
1186 case AsmToken::RParen: SM.onRParen(); break;
1187 }
1188 if (SM.hadError())
1189 return Error(Tok.getLoc(), "unknown token in expression");
1190
1191 if (!Done && UpdateLocLex)
1192 End = consumeToken();
1193 }
1194 return false;
1195 }
1196
1197 std::unique_ptr<X86Operand>
ParseIntelBracExpression(unsigned SegReg,SMLoc Start,int64_t ImmDisp,unsigned Size)1198 X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
1199 int64_t ImmDisp, unsigned Size) {
1200 const AsmToken &Tok = Parser.getTok();
1201 SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
1202 if (getLexer().isNot(AsmToken::LBrac))
1203 return ErrorOperand(BracLoc, "Expected '[' token!");
1204 Parser.Lex(); // Eat '['
1205
1206 SMLoc StartInBrac = Tok.getLoc();
1207 // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We
1208 // may have already parsed an immediate displacement before the bracketed
1209 // expression.
1210 IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
1211 if (ParseIntelExpression(SM, End))
1212 return nullptr;
1213
1214 const MCExpr *Disp = nullptr;
1215 if (const MCExpr *Sym = SM.getSym()) {
1216 // A symbolic displacement.
1217 Disp = Sym;
1218 if (isParsingInlineAsm())
1219 RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(),
1220 ImmDisp, SM.getImm(), BracLoc, StartInBrac,
1221 End);
1222 }
1223
1224 if (SM.getImm() || !Disp) {
1225 const MCExpr *Imm = MCConstantExpr::Create(SM.getImm(), getContext());
1226 if (Disp)
1227 Disp = MCBinaryExpr::CreateAdd(Disp, Imm, getContext());
1228 else
1229 Disp = Imm; // An immediate displacement only.
1230 }
1231
1232 // Parse struct field access. Intel requires a dot, but MSVC doesn't. MSVC
1233 // will in fact do global lookup the field name inside all global typedefs,
1234 // but we don't emulate that.
1235 if (Tok.getString().find('.') != StringRef::npos) {
1236 const MCExpr *NewDisp;
1237 if (ParseIntelDotOperator(Disp, NewDisp))
1238 return nullptr;
1239
1240 End = Tok.getEndLoc();
1241 Parser.Lex(); // Eat the field.
1242 Disp = NewDisp;
1243 }
1244
1245 int BaseReg = SM.getBaseReg();
1246 int IndexReg = SM.getIndexReg();
1247 int Scale = SM.getScale();
1248 if (!isParsingInlineAsm()) {
1249 // handle [-42]
1250 if (!BaseReg && !IndexReg) {
1251 if (!SegReg)
1252 return X86Operand::CreateMem(Disp, Start, End, Size);
1253 else
1254 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size);
1255 }
1256 StringRef ErrMsg;
1257 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1258 Error(StartInBrac, ErrMsg);
1259 return nullptr;
1260 }
1261 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1262 End, Size);
1263 }
1264
1265 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1266 return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1267 End, Size, SM.getSymName(), Info);
1268 }
1269
1270 // Inline assembly may use variable names with namespace alias qualifiers.
ParseIntelIdentifier(const MCExpr * & Val,StringRef & Identifier,InlineAsmIdentifierInfo & Info,bool IsUnevaluatedOperand,SMLoc & End)1271 bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
1272 StringRef &Identifier,
1273 InlineAsmIdentifierInfo &Info,
1274 bool IsUnevaluatedOperand, SMLoc &End) {
1275 assert (isParsingInlineAsm() && "Expected to be parsing inline assembly.");
1276 Val = nullptr;
1277
1278 StringRef LineBuf(Identifier.data());
1279 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
1280
1281 const AsmToken &Tok = Parser.getTok();
1282
1283 // Advance the token stream until the end of the current token is
1284 // after the end of what the frontend claimed.
1285 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
1286 while (true) {
1287 End = Tok.getEndLoc();
1288 getLexer().Lex();
1289
1290 assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?");
1291 if (End.getPointer() == EndPtr) break;
1292 }
1293
1294 // Create the symbol reference.
1295 Identifier = LineBuf;
1296 MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
1297 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1298 Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
1299 return false;
1300 }
1301
1302 /// \brief Parse intel style segment override.
1303 std::unique_ptr<X86Operand>
ParseIntelSegmentOverride(unsigned SegReg,SMLoc Start,unsigned Size)1304 X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
1305 unsigned Size) {
1306 assert(SegReg != 0 && "Tried to parse a segment override without a segment!");
1307 const AsmToken &Tok = Parser.getTok(); // Eat colon.
1308 if (Tok.isNot(AsmToken::Colon))
1309 return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
1310 Parser.Lex(); // Eat ':'
1311
1312 int64_t ImmDisp = 0;
1313 if (getLexer().is(AsmToken::Integer)) {
1314 ImmDisp = Tok.getIntVal();
1315 AsmToken ImmDispToken = Parser.Lex(); // Eat the integer.
1316
1317 if (isParsingInlineAsm())
1318 InstInfo->AsmRewrites->push_back(
1319 AsmRewrite(AOK_ImmPrefix, ImmDispToken.getLoc()));
1320
1321 if (getLexer().isNot(AsmToken::LBrac)) {
1322 // An immediate following a 'segment register', 'colon' token sequence can
1323 // be followed by a bracketed expression. If it isn't we know we have our
1324 // final segment override.
1325 const MCExpr *Disp = MCConstantExpr::Create(ImmDisp, getContext());
1326 return X86Operand::CreateMem(SegReg, Disp, /*BaseReg=*/0, /*IndexReg=*/0,
1327 /*Scale=*/1, Start, ImmDispToken.getEndLoc(),
1328 Size);
1329 }
1330 }
1331
1332 if (getLexer().is(AsmToken::LBrac))
1333 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1334
1335 const MCExpr *Val;
1336 SMLoc End;
1337 if (!isParsingInlineAsm()) {
1338 if (getParser().parsePrimaryExpr(Val, End))
1339 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1340
1341 return X86Operand::CreateMem(Val, Start, End, Size);
1342 }
1343
1344 InlineAsmIdentifierInfo Info;
1345 StringRef Identifier = Tok.getString();
1346 if (ParseIntelIdentifier(Val, Identifier, Info,
1347 /*Unevaluated=*/false, End))
1348 return nullptr;
1349 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
1350 /*Scale=*/1, Start, End, Size, Identifier, Info);
1351 }
1352
1353 /// ParseIntelMemOperand - Parse intel style memory operand.
ParseIntelMemOperand(int64_t ImmDisp,SMLoc Start,unsigned Size)1354 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp,
1355 SMLoc Start,
1356 unsigned Size) {
1357 const AsmToken &Tok = Parser.getTok();
1358 SMLoc End;
1359
1360 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1361 if (getLexer().is(AsmToken::LBrac))
1362 return ParseIntelBracExpression(/*SegReg=*/0, Start, ImmDisp, Size);
1363 assert(ImmDisp == 0);
1364
1365 const MCExpr *Val;
1366 if (!isParsingInlineAsm()) {
1367 if (getParser().parsePrimaryExpr(Val, End))
1368 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1369
1370 return X86Operand::CreateMem(Val, Start, End, Size);
1371 }
1372
1373 InlineAsmIdentifierInfo Info;
1374 StringRef Identifier = Tok.getString();
1375 if (ParseIntelIdentifier(Val, Identifier, Info,
1376 /*Unevaluated=*/false, End))
1377 return nullptr;
1378
1379 if (!getLexer().is(AsmToken::LBrac))
1380 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0,
1381 /*Scale=*/1, Start, End, Size, Identifier, Info);
1382
1383 Parser.Lex(); // Eat '['
1384
1385 // Parse Identifier [ ImmDisp ]
1386 IntelExprStateMachine SM(/*ImmDisp=*/0, /*StopOnLBrac=*/true,
1387 /*AddImmPrefix=*/false);
1388 if (ParseIntelExpression(SM, End))
1389 return nullptr;
1390
1391 if (SM.getSym()) {
1392 Error(Start, "cannot use more than one symbol in memory operand");
1393 return nullptr;
1394 }
1395 if (SM.getBaseReg()) {
1396 Error(Start, "cannot use base register with variable reference");
1397 return nullptr;
1398 }
1399 if (SM.getIndexReg()) {
1400 Error(Start, "cannot use index register with variable reference");
1401 return nullptr;
1402 }
1403
1404 const MCExpr *Disp = MCConstantExpr::Create(SM.getImm(), getContext());
1405 // BaseReg is non-zero to avoid assertions. In the context of inline asm,
1406 // we're pointing to a local variable in memory, so the base register is
1407 // really the frame or stack pointer.
1408 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/1, /*IndexReg=*/0,
1409 /*Scale=*/1, Start, End, Size, Identifier,
1410 Info.OpDecl);
1411 }
1412
1413 /// Parse the '.' operator.
ParseIntelDotOperator(const MCExpr * Disp,const MCExpr * & NewDisp)1414 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
1415 const MCExpr *&NewDisp) {
1416 const AsmToken &Tok = Parser.getTok();
1417 int64_t OrigDispVal, DotDispVal;
1418
1419 // FIXME: Handle non-constant expressions.
1420 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
1421 OrigDispVal = OrigDisp->getValue();
1422 else
1423 return Error(Tok.getLoc(), "Non-constant offsets are not supported!");
1424
1425 // Drop the optional '.'.
1426 StringRef DotDispStr = Tok.getString();
1427 if (DotDispStr.startswith("."))
1428 DotDispStr = DotDispStr.drop_front(1);
1429
1430 // .Imm gets lexed as a real.
1431 if (Tok.is(AsmToken::Real)) {
1432 APInt DotDisp;
1433 DotDispStr.getAsInteger(10, DotDisp);
1434 DotDispVal = DotDisp.getZExtValue();
1435 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1436 unsigned DotDisp;
1437 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1438 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1439 DotDisp))
1440 return Error(Tok.getLoc(), "Unable to lookup field reference!");
1441 DotDispVal = DotDisp;
1442 } else
1443 return Error(Tok.getLoc(), "Unexpected token type!");
1444
1445 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1446 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
1447 unsigned Len = DotDispStr.size();
1448 unsigned Val = OrigDispVal + DotDispVal;
1449 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len,
1450 Val));
1451 }
1452
1453 NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
1454 return false;
1455 }
1456
1457 /// Parse the 'offset' operator. This operator is used to specify the
1458 /// location rather then the content of a variable.
ParseIntelOffsetOfOperator()1459 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
1460 const AsmToken &Tok = Parser.getTok();
1461 SMLoc OffsetOfLoc = Tok.getLoc();
1462 Parser.Lex(); // Eat offset.
1463
1464 const MCExpr *Val;
1465 InlineAsmIdentifierInfo Info;
1466 SMLoc Start = Tok.getLoc(), End;
1467 StringRef Identifier = Tok.getString();
1468 if (ParseIntelIdentifier(Val, Identifier, Info,
1469 /*Unevaluated=*/false, End))
1470 return nullptr;
1471
1472 // Don't emit the offset operator.
1473 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
1474
1475 // The offset operator will have an 'r' constraint, thus we need to create
1476 // register operand to ensure proper matching. Just pick a GPR based on
1477 // the size of a pointer.
1478 unsigned RegNo =
1479 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
1480 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1481 OffsetOfLoc, Identifier, Info.OpDecl);
1482 }
1483
1484 enum IntelOperatorKind {
1485 IOK_LENGTH,
1486 IOK_SIZE,
1487 IOK_TYPE
1488 };
1489
1490 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1491 /// returns the number of elements in an array. It returns the value 1 for
1492 /// non-array variables. The SIZE operator returns the size of a C or C++
1493 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1494 /// TYPE operator returns the size of a C or C++ type or variable. If the
1495 /// variable is an array, TYPE returns the size of a single element.
ParseIntelOperator(unsigned OpKind)1496 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) {
1497 const AsmToken &Tok = Parser.getTok();
1498 SMLoc TypeLoc = Tok.getLoc();
1499 Parser.Lex(); // Eat operator.
1500
1501 const MCExpr *Val = nullptr;
1502 InlineAsmIdentifierInfo Info;
1503 SMLoc Start = Tok.getLoc(), End;
1504 StringRef Identifier = Tok.getString();
1505 if (ParseIntelIdentifier(Val, Identifier, Info,
1506 /*Unevaluated=*/true, End))
1507 return nullptr;
1508
1509 if (!Info.OpDecl)
1510 return ErrorOperand(Start, "unable to lookup expression");
1511
1512 unsigned CVal = 0;
1513 switch(OpKind) {
1514 default: llvm_unreachable("Unexpected operand kind!");
1515 case IOK_LENGTH: CVal = Info.Length; break;
1516 case IOK_SIZE: CVal = Info.Size; break;
1517 case IOK_TYPE: CVal = Info.Type; break;
1518 }
1519
1520 // Rewrite the type operator and the C or C++ type or variable in terms of an
1521 // immediate. E.g. TYPE foo -> $$4
1522 unsigned Len = End.getPointer() - TypeLoc.getPointer();
1523 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
1524
1525 const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext());
1526 return X86Operand::CreateImm(Imm, Start, End);
1527 }
1528
ParseIntelOperand()1529 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
1530 const AsmToken &Tok = Parser.getTok();
1531 SMLoc Start, End;
1532
1533 // Offset, length, type and size operators.
1534 if (isParsingInlineAsm()) {
1535 StringRef AsmTokStr = Tok.getString();
1536 if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
1537 return ParseIntelOffsetOfOperator();
1538 if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
1539 return ParseIntelOperator(IOK_LENGTH);
1540 if (AsmTokStr == "size" || AsmTokStr == "SIZE")
1541 return ParseIntelOperator(IOK_SIZE);
1542 if (AsmTokStr == "type" || AsmTokStr == "TYPE")
1543 return ParseIntelOperator(IOK_TYPE);
1544 }
1545
1546 unsigned Size = getIntelMemOperandSize(Tok.getString());
1547 if (Size) {
1548 Parser.Lex(); // Eat operand size (e.g., byte, word).
1549 if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
1550 return ErrorOperand(Start, "Expected 'PTR' or 'ptr' token!");
1551 Parser.Lex(); // Eat ptr.
1552 }
1553 Start = Tok.getLoc();
1554
1555 // Immediate.
1556 if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) ||
1557 getLexer().is(AsmToken::Tilde) || getLexer().is(AsmToken::LParen)) {
1558 AsmToken StartTok = Tok;
1559 IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
1560 /*AddImmPrefix=*/false);
1561 if (ParseIntelExpression(SM, End))
1562 return nullptr;
1563
1564 int64_t Imm = SM.getImm();
1565 if (isParsingInlineAsm()) {
1566 unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
1567 if (StartTok.getString().size() == Len)
1568 // Just add a prefix if this wasn't a complex immediate expression.
1569 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
1570 else
1571 // Otherwise, rewrite the complex expression as a single immediate.
1572 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm));
1573 }
1574
1575 if (getLexer().isNot(AsmToken::LBrac)) {
1576 // If a directional label (ie. 1f or 2b) was parsed above from
1577 // ParseIntelExpression() then SM.getSym() was set to a pointer to
1578 // to the MCExpr with the directional local symbol and this is a
1579 // memory operand not an immediate operand.
1580 if (SM.getSym())
1581 return X86Operand::CreateMem(SM.getSym(), Start, End, Size);
1582
1583 const MCExpr *ImmExpr = MCConstantExpr::Create(Imm, getContext());
1584 return X86Operand::CreateImm(ImmExpr, Start, End);
1585 }
1586
1587 // Only positive immediates are valid.
1588 if (Imm < 0)
1589 return ErrorOperand(Start, "expected a positive immediate displacement "
1590 "before bracketed expr.");
1591
1592 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1593 return ParseIntelMemOperand(Imm, Start, Size);
1594 }
1595
1596 // Register.
1597 unsigned RegNo = 0;
1598 if (!ParseRegister(RegNo, Start, End)) {
1599 // If this is a segment register followed by a ':', then this is the start
1600 // of a segment override, otherwise this is a normal register reference.
1601 if (getLexer().isNot(AsmToken::Colon))
1602 return X86Operand::CreateReg(RegNo, Start, End);
1603
1604 return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
1605 }
1606
1607 // Memory operand.
1608 return ParseIntelMemOperand(/*Disp=*/0, Start, Size);
1609 }
1610
ParseATTOperand()1611 std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
1612 switch (getLexer().getKind()) {
1613 default:
1614 // Parse a memory operand with no segment register.
1615 return ParseMemOperand(0, Parser.getTok().getLoc());
1616 case AsmToken::Percent: {
1617 // Read the register.
1618 unsigned RegNo;
1619 SMLoc Start, End;
1620 if (ParseRegister(RegNo, Start, End)) return nullptr;
1621 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1622 Error(Start, "%eiz and %riz can only be used as index registers",
1623 SMRange(Start, End));
1624 return nullptr;
1625 }
1626
1627 // If this is a segment register followed by a ':', then this is the start
1628 // of a memory reference, otherwise this is a normal register reference.
1629 if (getLexer().isNot(AsmToken::Colon))
1630 return X86Operand::CreateReg(RegNo, Start, End);
1631
1632 getParser().Lex(); // Eat the colon.
1633 return ParseMemOperand(RegNo, Start);
1634 }
1635 case AsmToken::Dollar: {
1636 // $42 -> immediate.
1637 SMLoc Start = Parser.getTok().getLoc(), End;
1638 Parser.Lex();
1639 const MCExpr *Val;
1640 if (getParser().parseExpression(Val, End))
1641 return nullptr;
1642 return X86Operand::CreateImm(Val, Start, End);
1643 }
1644 }
1645 }
1646
HandleAVX512Operand(OperandVector & Operands,const MCParsedAsmOperand & Op)1647 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
1648 const MCParsedAsmOperand &Op) {
1649 if(STI.getFeatureBits() & X86::FeatureAVX512) {
1650 if (getLexer().is(AsmToken::LCurly)) {
1651 // Eat "{" and mark the current place.
1652 const SMLoc consumedToken = consumeToken();
1653 // Distinguish {1to<NUM>} from {%k<NUM>}.
1654 if(getLexer().is(AsmToken::Integer)) {
1655 // Parse memory broadcasting ({1to<NUM>}).
1656 if (getLexer().getTok().getIntVal() != 1)
1657 return !ErrorAndEatStatement(getLexer().getLoc(),
1658 "Expected 1to<NUM> at this point");
1659 Parser.Lex(); // Eat "1" of 1to8
1660 if (!getLexer().is(AsmToken::Identifier) ||
1661 !getLexer().getTok().getIdentifier().startswith("to"))
1662 return !ErrorAndEatStatement(getLexer().getLoc(),
1663 "Expected 1to<NUM> at this point");
1664 // Recognize only reasonable suffixes.
1665 const char *BroadcastPrimitive =
1666 StringSwitch<const char*>(getLexer().getTok().getIdentifier())
1667 .Case("to8", "{1to8}")
1668 .Case("to16", "{1to16}")
1669 .Default(nullptr);
1670 if (!BroadcastPrimitive)
1671 return !ErrorAndEatStatement(getLexer().getLoc(),
1672 "Invalid memory broadcast primitive.");
1673 Parser.Lex(); // Eat "toN" of 1toN
1674 if (!getLexer().is(AsmToken::RCurly))
1675 return !ErrorAndEatStatement(getLexer().getLoc(),
1676 "Expected } at this point");
1677 Parser.Lex(); // Eat "}"
1678 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
1679 consumedToken));
1680 // No AVX512 specific primitives can pass
1681 // after memory broadcasting, so return.
1682 return true;
1683 } else {
1684 // Parse mask register {%k1}
1685 Operands.push_back(X86Operand::CreateToken("{", consumedToken));
1686 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
1687 Operands.push_back(std::move(Op));
1688 if (!getLexer().is(AsmToken::RCurly))
1689 return !ErrorAndEatStatement(getLexer().getLoc(),
1690 "Expected } at this point");
1691 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
1692
1693 // Parse "zeroing non-masked" semantic {z}
1694 if (getLexer().is(AsmToken::LCurly)) {
1695 Operands.push_back(X86Operand::CreateToken("{z}", consumeToken()));
1696 if (!getLexer().is(AsmToken::Identifier) ||
1697 getLexer().getTok().getIdentifier() != "z")
1698 return !ErrorAndEatStatement(getLexer().getLoc(),
1699 "Expected z at this point");
1700 Parser.Lex(); // Eat the z
1701 if (!getLexer().is(AsmToken::RCurly))
1702 return !ErrorAndEatStatement(getLexer().getLoc(),
1703 "Expected } at this point");
1704 Parser.Lex(); // Eat the }
1705 }
1706 }
1707 }
1708 }
1709 }
1710 return true;
1711 }
1712
1713 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
1714 /// has already been parsed if present.
ParseMemOperand(unsigned SegReg,SMLoc MemStart)1715 std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
1716 SMLoc MemStart) {
1717
1718 // We have to disambiguate a parenthesized expression "(4+5)" from the start
1719 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
1720 // only way to do this without lookahead is to eat the '(' and see what is
1721 // after it.
1722 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
1723 if (getLexer().isNot(AsmToken::LParen)) {
1724 SMLoc ExprEnd;
1725 if (getParser().parseExpression(Disp, ExprEnd)) return nullptr;
1726
1727 // After parsing the base expression we could either have a parenthesized
1728 // memory address or not. If not, return now. If so, eat the (.
1729 if (getLexer().isNot(AsmToken::LParen)) {
1730 // Unless we have a segment register, treat this as an immediate.
1731 if (SegReg == 0)
1732 return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
1733 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1734 }
1735
1736 // Eat the '('.
1737 Parser.Lex();
1738 } else {
1739 // Okay, we have a '('. We don't know if this is an expression or not, but
1740 // so we have to eat the ( to see beyond it.
1741 SMLoc LParenLoc = Parser.getTok().getLoc();
1742 Parser.Lex(); // Eat the '('.
1743
1744 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
1745 // Nothing to do here, fall into the code below with the '(' part of the
1746 // memory operand consumed.
1747 } else {
1748 SMLoc ExprEnd;
1749
1750 // It must be an parenthesized expression, parse it now.
1751 if (getParser().parseParenExpression(Disp, ExprEnd))
1752 return nullptr;
1753
1754 // After parsing the base expression we could either have a parenthesized
1755 // memory address or not. If not, return now. If so, eat the (.
1756 if (getLexer().isNot(AsmToken::LParen)) {
1757 // Unless we have a segment register, treat this as an immediate.
1758 if (SegReg == 0)
1759 return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
1760 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1761 }
1762
1763 // Eat the '('.
1764 Parser.Lex();
1765 }
1766 }
1767
1768 // If we reached here, then we just ate the ( of the memory operand. Process
1769 // the rest of the memory operand.
1770 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
1771 SMLoc IndexLoc, BaseLoc;
1772
1773 if (getLexer().is(AsmToken::Percent)) {
1774 SMLoc StartLoc, EndLoc;
1775 BaseLoc = Parser.getTok().getLoc();
1776 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return nullptr;
1777 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
1778 Error(StartLoc, "eiz and riz can only be used as index registers",
1779 SMRange(StartLoc, EndLoc));
1780 return nullptr;
1781 }
1782 }
1783
1784 if (getLexer().is(AsmToken::Comma)) {
1785 Parser.Lex(); // Eat the comma.
1786 IndexLoc = Parser.getTok().getLoc();
1787
1788 // Following the comma we should have either an index register, or a scale
1789 // value. We don't support the later form, but we want to parse it
1790 // correctly.
1791 //
1792 // Not that even though it would be completely consistent to support syntax
1793 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
1794 if (getLexer().is(AsmToken::Percent)) {
1795 SMLoc L;
1796 if (ParseRegister(IndexReg, L, L)) return nullptr;
1797
1798 if (getLexer().isNot(AsmToken::RParen)) {
1799 // Parse the scale amount:
1800 // ::= ',' [scale-expression]
1801 if (getLexer().isNot(AsmToken::Comma)) {
1802 Error(Parser.getTok().getLoc(),
1803 "expected comma in scale expression");
1804 return nullptr;
1805 }
1806 Parser.Lex(); // Eat the comma.
1807
1808 if (getLexer().isNot(AsmToken::RParen)) {
1809 SMLoc Loc = Parser.getTok().getLoc();
1810
1811 int64_t ScaleVal;
1812 if (getParser().parseAbsoluteExpression(ScaleVal)){
1813 Error(Loc, "expected scale expression");
1814 return nullptr;
1815 }
1816
1817 // Validate the scale amount.
1818 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1819 ScaleVal != 1) {
1820 Error(Loc, "scale factor in 16-bit address must be 1");
1821 return nullptr;
1822 }
1823 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
1824 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
1825 return nullptr;
1826 }
1827 Scale = (unsigned)ScaleVal;
1828 }
1829 }
1830 } else if (getLexer().isNot(AsmToken::RParen)) {
1831 // A scale amount without an index is ignored.
1832 // index.
1833 SMLoc Loc = Parser.getTok().getLoc();
1834
1835 int64_t Value;
1836 if (getParser().parseAbsoluteExpression(Value))
1837 return nullptr;
1838
1839 if (Value != 1)
1840 Warning(Loc, "scale factor without index register is ignored");
1841 Scale = 1;
1842 }
1843 }
1844
1845 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
1846 if (getLexer().isNot(AsmToken::RParen)) {
1847 Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
1848 return nullptr;
1849 }
1850 SMLoc MemEnd = Parser.getTok().getEndLoc();
1851 Parser.Lex(); // Eat the ')'.
1852
1853 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
1854 // and then only in non-64-bit modes. Except for DX, which is a special case
1855 // because an unofficial form of in/out instructions uses it.
1856 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1857 (is64BitMode() || (BaseReg != X86::BX && BaseReg != X86::BP &&
1858 BaseReg != X86::SI && BaseReg != X86::DI)) &&
1859 BaseReg != X86::DX) {
1860 Error(BaseLoc, "invalid 16-bit base register");
1861 return nullptr;
1862 }
1863 if (BaseReg == 0 &&
1864 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
1865 Error(IndexLoc, "16-bit memory operand may not include only index register");
1866 return nullptr;
1867 }
1868
1869 StringRef ErrMsg;
1870 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1871 Error(BaseLoc, ErrMsg);
1872 return nullptr;
1873 }
1874
1875 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
1876 MemStart, MemEnd);
1877 }
1878
ParseInstruction(ParseInstructionInfo & Info,StringRef Name,SMLoc NameLoc,OperandVector & Operands)1879 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1880 SMLoc NameLoc, OperandVector &Operands) {
1881 InstInfo = &Info;
1882 StringRef PatchedName = Name;
1883
1884 // FIXME: Hack to recognize setneb as setne.
1885 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
1886 PatchedName != "setb" && PatchedName != "setnb")
1887 PatchedName = PatchedName.substr(0, Name.size()-1);
1888
1889 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
1890 const MCExpr *ExtraImmOp = nullptr;
1891 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
1892 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
1893 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
1894 bool IsVCMP = PatchedName[0] == 'v';
1895 unsigned SSECCIdx = IsVCMP ? 4 : 3;
1896 unsigned SSEComparisonCode = StringSwitch<unsigned>(
1897 PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
1898 .Case("eq", 0x00)
1899 .Case("lt", 0x01)
1900 .Case("le", 0x02)
1901 .Case("unord", 0x03)
1902 .Case("neq", 0x04)
1903 .Case("nlt", 0x05)
1904 .Case("nle", 0x06)
1905 .Case("ord", 0x07)
1906 /* AVX only from here */
1907 .Case("eq_uq", 0x08)
1908 .Case("nge", 0x09)
1909 .Case("ngt", 0x0A)
1910 .Case("false", 0x0B)
1911 .Case("neq_oq", 0x0C)
1912 .Case("ge", 0x0D)
1913 .Case("gt", 0x0E)
1914 .Case("true", 0x0F)
1915 .Case("eq_os", 0x10)
1916 .Case("lt_oq", 0x11)
1917 .Case("le_oq", 0x12)
1918 .Case("unord_s", 0x13)
1919 .Case("neq_us", 0x14)
1920 .Case("nlt_uq", 0x15)
1921 .Case("nle_uq", 0x16)
1922 .Case("ord_s", 0x17)
1923 .Case("eq_us", 0x18)
1924 .Case("nge_uq", 0x19)
1925 .Case("ngt_uq", 0x1A)
1926 .Case("false_os", 0x1B)
1927 .Case("neq_os", 0x1C)
1928 .Case("ge_oq", 0x1D)
1929 .Case("gt_oq", 0x1E)
1930 .Case("true_us", 0x1F)
1931 .Default(~0U);
1932 if (SSEComparisonCode != ~0U && (IsVCMP || SSEComparisonCode < 8)) {
1933 ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
1934 getParser().getContext());
1935 if (PatchedName.endswith("ss")) {
1936 PatchedName = IsVCMP ? "vcmpss" : "cmpss";
1937 } else if (PatchedName.endswith("sd")) {
1938 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
1939 } else if (PatchedName.endswith("ps")) {
1940 PatchedName = IsVCMP ? "vcmpps" : "cmpps";
1941 } else {
1942 assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
1943 PatchedName = IsVCMP ? "vcmppd" : "cmppd";
1944 }
1945 }
1946 }
1947
1948 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
1949
1950 if (ExtraImmOp && !isParsingIntelSyntax())
1951 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
1952
1953 // Determine whether this is an instruction prefix.
1954 bool isPrefix =
1955 Name == "lock" || Name == "rep" ||
1956 Name == "repe" || Name == "repz" ||
1957 Name == "repne" || Name == "repnz" ||
1958 Name == "rex64" || Name == "data16";
1959
1960
1961 // This does the actual operand parsing. Don't parse any more if we have a
1962 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
1963 // just want to parse the "lock" as the first instruction and the "incl" as
1964 // the next one.
1965 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
1966
1967 // Parse '*' modifier.
1968 if (getLexer().is(AsmToken::Star))
1969 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
1970
1971 // Read the operands.
1972 while(1) {
1973 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
1974 Operands.push_back(std::move(Op));
1975 if (!HandleAVX512Operand(Operands, *Operands.back()))
1976 return true;
1977 } else {
1978 Parser.eatToEndOfStatement();
1979 return true;
1980 }
1981 // check for comma and eat it
1982 if (getLexer().is(AsmToken::Comma))
1983 Parser.Lex();
1984 else
1985 break;
1986 }
1987
1988 if (getLexer().isNot(AsmToken::EndOfStatement))
1989 return ErrorAndEatStatement(getLexer().getLoc(),
1990 "unexpected token in argument list");
1991 }
1992
1993 // Consume the EndOfStatement or the prefix separator Slash
1994 if (getLexer().is(AsmToken::EndOfStatement) ||
1995 (isPrefix && getLexer().is(AsmToken::Slash)))
1996 Parser.Lex();
1997
1998 if (ExtraImmOp && isParsingIntelSyntax())
1999 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
2000
2001 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
2002 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
2003 // documented form in various unofficial manuals, so a lot of code uses it.
2004 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
2005 Operands.size() == 3) {
2006 X86Operand &Op = (X86Operand &)*Operands.back();
2007 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2008 isa<MCConstantExpr>(Op.Mem.Disp) &&
2009 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2010 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2011 SMLoc Loc = Op.getEndLoc();
2012 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2013 }
2014 }
2015 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
2016 if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
2017 Operands.size() == 3) {
2018 X86Operand &Op = (X86Operand &)*Operands[1];
2019 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2020 isa<MCConstantExpr>(Op.Mem.Disp) &&
2021 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2022 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2023 SMLoc Loc = Op.getEndLoc();
2024 Operands[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2025 }
2026 }
2027
2028 // Append default arguments to "ins[bwld]"
2029 if (Name.startswith("ins") && Operands.size() == 1 &&
2030 (Name == "insb" || Name == "insw" || Name == "insl" ||
2031 Name == "insd" )) {
2032 if (isParsingIntelSyntax()) {
2033 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2034 Operands.push_back(DefaultMemDIOperand(NameLoc));
2035 } else {
2036 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2037 Operands.push_back(DefaultMemDIOperand(NameLoc));
2038 }
2039 }
2040
2041 // Append default arguments to "outs[bwld]"
2042 if (Name.startswith("outs") && Operands.size() == 1 &&
2043 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
2044 Name == "outsd" )) {
2045 if (isParsingIntelSyntax()) {
2046 Operands.push_back(DefaultMemSIOperand(NameLoc));
2047 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2048 } else {
2049 Operands.push_back(DefaultMemSIOperand(NameLoc));
2050 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2051 }
2052 }
2053
2054 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
2055 // values of $SIREG according to the mode. It would be nice if this
2056 // could be achieved with InstAlias in the tables.
2057 if (Name.startswith("lods") && Operands.size() == 1 &&
2058 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2059 Name == "lodsl" || Name == "lodsd" || Name == "lodsq"))
2060 Operands.push_back(DefaultMemSIOperand(NameLoc));
2061
2062 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
2063 // values of $DIREG according to the mode. It would be nice if this
2064 // could be achieved with InstAlias in the tables.
2065 if (Name.startswith("stos") && Operands.size() == 1 &&
2066 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2067 Name == "stosl" || Name == "stosd" || Name == "stosq"))
2068 Operands.push_back(DefaultMemDIOperand(NameLoc));
2069
2070 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
2071 // values of $DIREG according to the mode. It would be nice if this
2072 // could be achieved with InstAlias in the tables.
2073 if (Name.startswith("scas") && Operands.size() == 1 &&
2074 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
2075 Name == "scasl" || Name == "scasd" || Name == "scasq"))
2076 Operands.push_back(DefaultMemDIOperand(NameLoc));
2077
2078 // Add default SI and DI operands to "cmps[bwlq]".
2079 if (Name.startswith("cmps") &&
2080 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
2081 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
2082 if (Operands.size() == 1) {
2083 if (isParsingIntelSyntax()) {
2084 Operands.push_back(DefaultMemSIOperand(NameLoc));
2085 Operands.push_back(DefaultMemDIOperand(NameLoc));
2086 } else {
2087 Operands.push_back(DefaultMemDIOperand(NameLoc));
2088 Operands.push_back(DefaultMemSIOperand(NameLoc));
2089 }
2090 } else if (Operands.size() == 3) {
2091 X86Operand &Op = (X86Operand &)*Operands[1];
2092 X86Operand &Op2 = (X86Operand &)*Operands[2];
2093 if (!doSrcDstMatch(Op, Op2))
2094 return Error(Op.getStartLoc(),
2095 "mismatching source and destination index registers");
2096 }
2097 }
2098
2099 // Add default SI and DI operands to "movs[bwlq]".
2100 if ((Name.startswith("movs") &&
2101 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
2102 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
2103 (Name.startswith("smov") &&
2104 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
2105 Name == "smovl" || Name == "smovd" || Name == "smovq"))) {
2106 if (Operands.size() == 1) {
2107 if (Name == "movsd")
2108 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
2109 if (isParsingIntelSyntax()) {
2110 Operands.push_back(DefaultMemDIOperand(NameLoc));
2111 Operands.push_back(DefaultMemSIOperand(NameLoc));
2112 } else {
2113 Operands.push_back(DefaultMemSIOperand(NameLoc));
2114 Operands.push_back(DefaultMemDIOperand(NameLoc));
2115 }
2116 } else if (Operands.size() == 3) {
2117 X86Operand &Op = (X86Operand &)*Operands[1];
2118 X86Operand &Op2 = (X86Operand &)*Operands[2];
2119 if (!doSrcDstMatch(Op, Op2))
2120 return Error(Op.getStartLoc(),
2121 "mismatching source and destination index registers");
2122 }
2123 }
2124
2125 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
2126 // "shift <op>".
2127 if ((Name.startswith("shr") || Name.startswith("sar") ||
2128 Name.startswith("shl") || Name.startswith("sal") ||
2129 Name.startswith("rcl") || Name.startswith("rcr") ||
2130 Name.startswith("rol") || Name.startswith("ror")) &&
2131 Operands.size() == 3) {
2132 if (isParsingIntelSyntax()) {
2133 // Intel syntax
2134 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]);
2135 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2136 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2137 Operands.pop_back();
2138 } else {
2139 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2140 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2141 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2142 Operands.erase(Operands.begin() + 1);
2143 }
2144 }
2145
2146 // Transforms "int $3" into "int3" as a size optimization. We can't write an
2147 // instalias with an immediate operand yet.
2148 if (Name == "int" && Operands.size() == 2) {
2149 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2150 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2151 cast<MCConstantExpr>(Op1.getImm())->getValue() == 3) {
2152 Operands.erase(Operands.begin() + 1);
2153 static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
2154 }
2155 }
2156
2157 return false;
2158 }
2159
convertToSExti8(MCInst & Inst,unsigned Opcode,unsigned Reg,bool isCmp)2160 static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
2161 bool isCmp) {
2162 MCInst TmpInst;
2163 TmpInst.setOpcode(Opcode);
2164 if (!isCmp)
2165 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2166 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2167 TmpInst.addOperand(Inst.getOperand(0));
2168 Inst = TmpInst;
2169 return true;
2170 }
2171
convert16i16to16ri8(MCInst & Inst,unsigned Opcode,bool isCmp=false)2172 static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
2173 bool isCmp = false) {
2174 if (!Inst.getOperand(0).isImm() ||
2175 !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
2176 return false;
2177
2178 return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
2179 }
2180
convert32i32to32ri8(MCInst & Inst,unsigned Opcode,bool isCmp=false)2181 static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
2182 bool isCmp = false) {
2183 if (!Inst.getOperand(0).isImm() ||
2184 !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
2185 return false;
2186
2187 return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
2188 }
2189
convert64i32to64ri8(MCInst & Inst,unsigned Opcode,bool isCmp=false)2190 static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
2191 bool isCmp = false) {
2192 if (!Inst.getOperand(0).isImm() ||
2193 !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
2194 return false;
2195
2196 return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
2197 }
2198
processInstruction(MCInst & Inst,const OperandVector & Ops)2199 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
2200 switch (Inst.getOpcode()) {
2201 default: return false;
2202 case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
2203 case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
2204 case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
2205 case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
2206 case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
2207 case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
2208 case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8);
2209 case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8);
2210 case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8);
2211 case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
2212 case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
2213 case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
2214 case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
2215 case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
2216 case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
2217 case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
2218 case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
2219 case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
2220 case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
2221 case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
2222 case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
2223 case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
2224 case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
2225 case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
2226 case X86::VMOVAPDrr:
2227 case X86::VMOVAPDYrr:
2228 case X86::VMOVAPSrr:
2229 case X86::VMOVAPSYrr:
2230 case X86::VMOVDQArr:
2231 case X86::VMOVDQAYrr:
2232 case X86::VMOVDQUrr:
2233 case X86::VMOVDQUYrr:
2234 case X86::VMOVUPDrr:
2235 case X86::VMOVUPDYrr:
2236 case X86::VMOVUPSrr:
2237 case X86::VMOVUPSYrr: {
2238 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2239 !X86II::isX86_64ExtendedReg(Inst.getOperand(1).getReg()))
2240 return false;
2241
2242 unsigned NewOpc;
2243 switch (Inst.getOpcode()) {
2244 default: llvm_unreachable("Invalid opcode");
2245 case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
2246 case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
2247 case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
2248 case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
2249 case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
2250 case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
2251 case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
2252 case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
2253 case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
2254 case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
2255 case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
2256 case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
2257 }
2258 Inst.setOpcode(NewOpc);
2259 return true;
2260 }
2261 case X86::VMOVSDrr:
2262 case X86::VMOVSSrr: {
2263 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2264 !X86II::isX86_64ExtendedReg(Inst.getOperand(2).getReg()))
2265 return false;
2266 unsigned NewOpc;
2267 switch (Inst.getOpcode()) {
2268 default: llvm_unreachable("Invalid opcode");
2269 case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
2270 case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
2271 }
2272 Inst.setOpcode(NewOpc);
2273 return true;
2274 }
2275 }
2276 }
2277
2278 static const char *getSubtargetFeatureName(unsigned Val);
2279
EmitInstruction(MCInst & Inst,OperandVector & Operands,MCStreamer & Out)2280 void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
2281 MCStreamer &Out) {
2282 Instrumentation->InstrumentInstruction(Inst, Operands, getContext(), MII,
2283 Out);
2284 Out.EmitInstruction(Inst, STI);
2285 }
2286
MatchAndEmitInstruction(SMLoc IDLoc,unsigned & Opcode,OperandVector & Operands,MCStreamer & Out,unsigned & ErrorInfo,bool MatchingInlineAsm)2287 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2288 OperandVector &Operands,
2289 MCStreamer &Out, unsigned &ErrorInfo,
2290 bool MatchingInlineAsm) {
2291 assert(!Operands.empty() && "Unexpect empty operand list!");
2292 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2293 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2294 ArrayRef<SMRange> EmptyRanges = None;
2295
2296 // First, handle aliases that expand to multiple instructions.
2297 // FIXME: This should be replaced with a real .td file alias mechanism.
2298 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2299 // call.
2300 if (Op.getToken() == "fstsw" || Op.getToken() == "fstcw" ||
2301 Op.getToken() == "fstsww" || Op.getToken() == "fstcww" ||
2302 Op.getToken() == "finit" || Op.getToken() == "fsave" ||
2303 Op.getToken() == "fstenv" || Op.getToken() == "fclex") {
2304 MCInst Inst;
2305 Inst.setOpcode(X86::WAIT);
2306 Inst.setLoc(IDLoc);
2307 if (!MatchingInlineAsm)
2308 EmitInstruction(Inst, Operands, Out);
2309
2310 const char *Repl = StringSwitch<const char *>(Op.getToken())
2311 .Case("finit", "fninit")
2312 .Case("fsave", "fnsave")
2313 .Case("fstcw", "fnstcw")
2314 .Case("fstcww", "fnstcw")
2315 .Case("fstenv", "fnstenv")
2316 .Case("fstsw", "fnstsw")
2317 .Case("fstsww", "fnstsw")
2318 .Case("fclex", "fnclex")
2319 .Default(nullptr);
2320 assert(Repl && "Unknown wait-prefixed instruction");
2321 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2322 }
2323
2324 bool WasOriginallyInvalidOperand = false;
2325 MCInst Inst;
2326
2327 // First, try a direct match.
2328 switch (MatchInstructionImpl(Operands, Inst,
2329 ErrorInfo, MatchingInlineAsm,
2330 isParsingIntelSyntax())) {
2331 default: break;
2332 case Match_Success:
2333 // Some instructions need post-processing to, for example, tweak which
2334 // encoding is selected. Loop on it while changes happen so the
2335 // individual transformations can chain off each other.
2336 if (!MatchingInlineAsm)
2337 while (processInstruction(Inst, Operands))
2338 ;
2339
2340 Inst.setLoc(IDLoc);
2341 if (!MatchingInlineAsm)
2342 EmitInstruction(Inst, Operands, Out);
2343 Opcode = Inst.getOpcode();
2344 return false;
2345 case Match_MissingFeature: {
2346 assert(ErrorInfo && "Unknown missing feature!");
2347 // Special case the error message for the very common case where only
2348 // a single subtarget feature is missing.
2349 std::string Msg = "instruction requires:";
2350 unsigned Mask = 1;
2351 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2352 if (ErrorInfo & Mask) {
2353 Msg += " ";
2354 Msg += getSubtargetFeatureName(ErrorInfo & Mask);
2355 }
2356 Mask <<= 1;
2357 }
2358 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2359 }
2360 case Match_InvalidOperand:
2361 WasOriginallyInvalidOperand = true;
2362 break;
2363 case Match_MnemonicFail:
2364 break;
2365 }
2366
2367 // FIXME: Ideally, we would only attempt suffix matches for things which are
2368 // valid prefixes, and we could just infer the right unambiguous
2369 // type. However, that requires substantially more matcher support than the
2370 // following hack.
2371
2372 // Change the operand to point to a temporary token.
2373 StringRef Base = Op.getToken();
2374 SmallString<16> Tmp;
2375 Tmp += Base;
2376 Tmp += ' ';
2377 Op.setTokenValue(Tmp.str());
2378
2379 // If this instruction starts with an 'f', then it is a floating point stack
2380 // instruction. These come in up to three forms for 32-bit, 64-bit, and
2381 // 80-bit floating point, which use the suffixes s,l,t respectively.
2382 //
2383 // Otherwise, we assume that this may be an integer instruction, which comes
2384 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2385 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2386
2387 // Check for the various suffix matches.
2388 Tmp[Base.size()] = Suffixes[0];
2389 unsigned ErrorInfoIgnore;
2390 unsigned ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2391 unsigned Match1, Match2, Match3, Match4;
2392
2393 Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2394 MatchingInlineAsm, isParsingIntelSyntax());
2395 // If this returned as a missing feature failure, remember that.
2396 if (Match1 == Match_MissingFeature)
2397 ErrorInfoMissingFeature = ErrorInfoIgnore;
2398 Tmp[Base.size()] = Suffixes[1];
2399 Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2400 MatchingInlineAsm, isParsingIntelSyntax());
2401 // If this returned as a missing feature failure, remember that.
2402 if (Match2 == Match_MissingFeature)
2403 ErrorInfoMissingFeature = ErrorInfoIgnore;
2404 Tmp[Base.size()] = Suffixes[2];
2405 Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2406 MatchingInlineAsm, isParsingIntelSyntax());
2407 // If this returned as a missing feature failure, remember that.
2408 if (Match3 == Match_MissingFeature)
2409 ErrorInfoMissingFeature = ErrorInfoIgnore;
2410 Tmp[Base.size()] = Suffixes[3];
2411 Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2412 MatchingInlineAsm, isParsingIntelSyntax());
2413 // If this returned as a missing feature failure, remember that.
2414 if (Match4 == Match_MissingFeature)
2415 ErrorInfoMissingFeature = ErrorInfoIgnore;
2416
2417 // Restore the old token.
2418 Op.setTokenValue(Base);
2419
2420 // If exactly one matched, then we treat that as a successful match (and the
2421 // instruction will already have been filled in correctly, since the failing
2422 // matches won't have modified it).
2423 unsigned NumSuccessfulMatches =
2424 (Match1 == Match_Success) + (Match2 == Match_Success) +
2425 (Match3 == Match_Success) + (Match4 == Match_Success);
2426 if (NumSuccessfulMatches == 1) {
2427 Inst.setLoc(IDLoc);
2428 if (!MatchingInlineAsm)
2429 EmitInstruction(Inst, Operands, Out);
2430 Opcode = Inst.getOpcode();
2431 return false;
2432 }
2433
2434 // Otherwise, the match failed, try to produce a decent error message.
2435
2436 // If we had multiple suffix matches, then identify this as an ambiguous
2437 // match.
2438 if (NumSuccessfulMatches > 1) {
2439 char MatchChars[4];
2440 unsigned NumMatches = 0;
2441 if (Match1 == Match_Success) MatchChars[NumMatches++] = Suffixes[0];
2442 if (Match2 == Match_Success) MatchChars[NumMatches++] = Suffixes[1];
2443 if (Match3 == Match_Success) MatchChars[NumMatches++] = Suffixes[2];
2444 if (Match4 == Match_Success) MatchChars[NumMatches++] = Suffixes[3];
2445
2446 SmallString<126> Msg;
2447 raw_svector_ostream OS(Msg);
2448 OS << "ambiguous instructions require an explicit suffix (could be ";
2449 for (unsigned i = 0; i != NumMatches; ++i) {
2450 if (i != 0)
2451 OS << ", ";
2452 if (i + 1 == NumMatches)
2453 OS << "or ";
2454 OS << "'" << Base << MatchChars[i] << "'";
2455 }
2456 OS << ")";
2457 Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2458 return true;
2459 }
2460
2461 // Okay, we know that none of the variants matched successfully.
2462
2463 // If all of the instructions reported an invalid mnemonic, then the original
2464 // mnemonic was invalid.
2465 if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) &&
2466 (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
2467 if (!WasOriginallyInvalidOperand) {
2468 ArrayRef<SMRange> Ranges =
2469 MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
2470 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
2471 Ranges, MatchingInlineAsm);
2472 }
2473
2474 // Recover location info for the operand if we know which was the problem.
2475 if (ErrorInfo != ~0U) {
2476 if (ErrorInfo >= Operands.size())
2477 return Error(IDLoc, "too few operands for instruction",
2478 EmptyRanges, MatchingInlineAsm);
2479
2480 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
2481 if (Operand.getStartLoc().isValid()) {
2482 SMRange OperandRange = Operand.getLocRange();
2483 return Error(Operand.getStartLoc(), "invalid operand for instruction",
2484 OperandRange, MatchingInlineAsm);
2485 }
2486 }
2487
2488 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2489 MatchingInlineAsm);
2490 }
2491
2492 // If one instruction matched with a missing feature, report this as a
2493 // missing feature.
2494 if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) +
2495 (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){
2496 std::string Msg = "instruction requires:";
2497 unsigned Mask = 1;
2498 for (unsigned i = 0; i < (sizeof(ErrorInfoMissingFeature)*8-1); ++i) {
2499 if (ErrorInfoMissingFeature & Mask) {
2500 Msg += " ";
2501 Msg += getSubtargetFeatureName(ErrorInfoMissingFeature & Mask);
2502 }
2503 Mask <<= 1;
2504 }
2505 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2506 }
2507
2508 // If one instruction matched with an invalid operand, report this as an
2509 // operand failure.
2510 if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) +
2511 (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){
2512 Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2513 MatchingInlineAsm);
2514 return true;
2515 }
2516
2517 // If all of these were an outright failure, report it in a useless way.
2518 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2519 EmptyRanges, MatchingInlineAsm);
2520 return true;
2521 }
2522
2523
ParseDirective(AsmToken DirectiveID)2524 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
2525 StringRef IDVal = DirectiveID.getIdentifier();
2526 if (IDVal == ".word")
2527 return ParseDirectiveWord(2, DirectiveID.getLoc());
2528 else if (IDVal.startswith(".code"))
2529 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
2530 else if (IDVal.startswith(".att_syntax")) {
2531 getParser().setAssemblerDialect(0);
2532 return false;
2533 } else if (IDVal.startswith(".intel_syntax")) {
2534 getParser().setAssemblerDialect(1);
2535 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2536 // FIXME: Handle noprefix
2537 if (Parser.getTok().getString() == "noprefix")
2538 Parser.Lex();
2539 }
2540 return false;
2541 }
2542 return true;
2543 }
2544
2545 /// ParseDirectiveWord
2546 /// ::= .word [ expression (, expression)* ]
ParseDirectiveWord(unsigned Size,SMLoc L)2547 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
2548 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2549 for (;;) {
2550 const MCExpr *Value;
2551 if (getParser().parseExpression(Value))
2552 return false;
2553
2554 getParser().getStreamer().EmitValue(Value, Size);
2555
2556 if (getLexer().is(AsmToken::EndOfStatement))
2557 break;
2558
2559 // FIXME: Improve diagnostic.
2560 if (getLexer().isNot(AsmToken::Comma)) {
2561 Error(L, "unexpected token in directive");
2562 return false;
2563 }
2564 Parser.Lex();
2565 }
2566 }
2567
2568 Parser.Lex();
2569 return false;
2570 }
2571
2572 /// ParseDirectiveCode
2573 /// ::= .code16 | .code32 | .code64
ParseDirectiveCode(StringRef IDVal,SMLoc L)2574 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
2575 if (IDVal == ".code16") {
2576 Parser.Lex();
2577 if (!is16BitMode()) {
2578 SwitchMode(X86::Mode16Bit);
2579 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
2580 }
2581 } else if (IDVal == ".code32") {
2582 Parser.Lex();
2583 if (!is32BitMode()) {
2584 SwitchMode(X86::Mode32Bit);
2585 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
2586 }
2587 } else if (IDVal == ".code64") {
2588 Parser.Lex();
2589 if (!is64BitMode()) {
2590 SwitchMode(X86::Mode64Bit);
2591 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
2592 }
2593 } else {
2594 Error(L, "unknown directive " + IDVal);
2595 return false;
2596 }
2597
2598 return false;
2599 }
2600
2601 // Force static initialization.
LLVMInitializeX86AsmParser()2602 extern "C" void LLVMInitializeX86AsmParser() {
2603 RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
2604 RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
2605 }
2606
2607 #define GET_REGISTER_MATCHER
2608 #define GET_MATCHER_IMPLEMENTATION
2609 #define GET_SUBTARGET_FEATURE_NAME
2610 #include "X86GenAsmMatcher.inc"
2611