1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/Error.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetParser.h"
53 #include "llvm/Support/TargetRegistry.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <cstring>
59 #include <iterator>
60 #include <map>
61 #include <memory>
62 #include <string>
63
64 using namespace llvm;
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
67
68 namespace {
69
70 class AMDGPUAsmParser;
71
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
73
74 //===----------------------------------------------------------------------===//
75 // Operand
76 //===----------------------------------------------------------------------===//
77
78 class AMDGPUOperand : public MCParsedAsmOperand {
79 enum KindTy {
80 Token,
81 Immediate,
82 Register,
83 Expression
84 } Kind;
85
86 SMLoc StartLoc, EndLoc;
87 const AMDGPUAsmParser *AsmParser;
88
89 public:
AMDGPUOperand(KindTy Kind_,const AMDGPUAsmParser * AsmParser_)90 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
92
93 using Ptr = std::unique_ptr<AMDGPUOperand>;
94
95 struct Modifiers {
96 bool Abs = false;
97 bool Neg = false;
98 bool Sext = false;
99
hasFPModifiers__anonb04631a80111::AMDGPUOperand::Modifiers100 bool hasFPModifiers() const { return Abs || Neg; }
hasIntModifiers__anonb04631a80111::AMDGPUOperand::Modifiers101 bool hasIntModifiers() const { return Sext; }
hasModifiers__anonb04631a80111::AMDGPUOperand::Modifiers102 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
103
getFPModifiersOperand__anonb04631a80111::AMDGPUOperand::Modifiers104 int64_t getFPModifiersOperand() const {
105 int64_t Operand = 0;
106 Operand |= Abs ? SISrcMods::ABS : 0u;
107 Operand |= Neg ? SISrcMods::NEG : 0u;
108 return Operand;
109 }
110
getIntModifiersOperand__anonb04631a80111::AMDGPUOperand::Modifiers111 int64_t getIntModifiersOperand() const {
112 int64_t Operand = 0;
113 Operand |= Sext ? SISrcMods::SEXT : 0u;
114 return Operand;
115 }
116
getModifiersOperand__anonb04631a80111::AMDGPUOperand::Modifiers117 int64_t getModifiersOperand() const {
118 assert(!(hasFPModifiers() && hasIntModifiers())
119 && "fp and int modifiers should not be used simultaneously");
120 if (hasFPModifiers()) {
121 return getFPModifiersOperand();
122 } else if (hasIntModifiers()) {
123 return getIntModifiersOperand();
124 } else {
125 return 0;
126 }
127 }
128
129 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
130 };
131
132 enum ImmTy {
133 ImmTyNone,
134 ImmTyGDS,
135 ImmTyLDS,
136 ImmTyOffen,
137 ImmTyIdxen,
138 ImmTyAddr64,
139 ImmTyOffset,
140 ImmTyInstOffset,
141 ImmTyOffset0,
142 ImmTyOffset1,
143 ImmTyDLC,
144 ImmTyGLC,
145 ImmTySLC,
146 ImmTySWZ,
147 ImmTyTFE,
148 ImmTyD16,
149 ImmTyClampSI,
150 ImmTyOModSI,
151 ImmTyDPP8,
152 ImmTyDppCtrl,
153 ImmTyDppRowMask,
154 ImmTyDppBankMask,
155 ImmTyDppBoundCtrl,
156 ImmTyDppFi,
157 ImmTySdwaDstSel,
158 ImmTySdwaSrc0Sel,
159 ImmTySdwaSrc1Sel,
160 ImmTySdwaDstUnused,
161 ImmTyDMask,
162 ImmTyDim,
163 ImmTyUNorm,
164 ImmTyDA,
165 ImmTyR128A16,
166 ImmTyLWE,
167 ImmTyExpTgt,
168 ImmTyExpCompr,
169 ImmTyExpVM,
170 ImmTyFORMAT,
171 ImmTyHwreg,
172 ImmTyOff,
173 ImmTySendMsg,
174 ImmTyInterpSlot,
175 ImmTyInterpAttr,
176 ImmTyAttrChan,
177 ImmTyOpSel,
178 ImmTyOpSelHi,
179 ImmTyNegLo,
180 ImmTyNegHi,
181 ImmTySwizzle,
182 ImmTyGprIdxMode,
183 ImmTyHigh,
184 ImmTyBLGP,
185 ImmTyCBSZ,
186 ImmTyABID,
187 ImmTyEndpgm,
188 };
189
190 private:
191 struct TokOp {
192 const char *Data;
193 unsigned Length;
194 };
195
196 struct ImmOp {
197 int64_t Val;
198 ImmTy Type;
199 bool IsFPImm;
200 Modifiers Mods;
201 };
202
203 struct RegOp {
204 unsigned RegNo;
205 Modifiers Mods;
206 };
207
208 union {
209 TokOp Tok;
210 ImmOp Imm;
211 RegOp Reg;
212 const MCExpr *Expr;
213 };
214
215 public:
isToken() const216 bool isToken() const override {
217 if (Kind == Token)
218 return true;
219
220 // When parsing operands, we can't always tell if something was meant to be
221 // a token, like 'gds', or an expression that references a global variable.
222 // In this case, we assume the string is an expression, and if we need to
223 // interpret is a token, then we treat the symbol name as the token.
224 return isSymbolRefExpr();
225 }
226
isSymbolRefExpr() const227 bool isSymbolRefExpr() const {
228 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
229 }
230
isImm() const231 bool isImm() const override {
232 return Kind == Immediate;
233 }
234
235 bool isInlinableImm(MVT type) const;
236 bool isLiteralImm(MVT type) const;
237
isRegKind() const238 bool isRegKind() const {
239 return Kind == Register;
240 }
241
isReg() const242 bool isReg() const override {
243 return isRegKind() && !hasModifiers();
244 }
245
isRegOrImmWithInputMods(unsigned RCID,MVT type) const246 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
247 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
248 }
249
isRegOrImmWithInt16InputMods() const250 bool isRegOrImmWithInt16InputMods() const {
251 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
252 }
253
isRegOrImmWithInt32InputMods() const254 bool isRegOrImmWithInt32InputMods() const {
255 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
256 }
257
isRegOrImmWithInt64InputMods() const258 bool isRegOrImmWithInt64InputMods() const {
259 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
260 }
261
isRegOrImmWithFP16InputMods() const262 bool isRegOrImmWithFP16InputMods() const {
263 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
264 }
265
isRegOrImmWithFP32InputMods() const266 bool isRegOrImmWithFP32InputMods() const {
267 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
268 }
269
isRegOrImmWithFP64InputMods() const270 bool isRegOrImmWithFP64InputMods() const {
271 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
272 }
273
isVReg() const274 bool isVReg() const {
275 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
276 isRegClass(AMDGPU::VReg_64RegClassID) ||
277 isRegClass(AMDGPU::VReg_96RegClassID) ||
278 isRegClass(AMDGPU::VReg_128RegClassID) ||
279 isRegClass(AMDGPU::VReg_160RegClassID) ||
280 isRegClass(AMDGPU::VReg_256RegClassID) ||
281 isRegClass(AMDGPU::VReg_512RegClassID) ||
282 isRegClass(AMDGPU::VReg_1024RegClassID);
283 }
284
isVReg32() const285 bool isVReg32() const {
286 return isRegClass(AMDGPU::VGPR_32RegClassID);
287 }
288
isVReg32OrOff() const289 bool isVReg32OrOff() const {
290 return isOff() || isVReg32();
291 }
292
isNull() const293 bool isNull() const {
294 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
295 }
296
297 bool isSDWAOperand(MVT type) const;
298 bool isSDWAFP16Operand() const;
299 bool isSDWAFP32Operand() const;
300 bool isSDWAInt16Operand() const;
301 bool isSDWAInt32Operand() const;
302
isImmTy(ImmTy ImmT) const303 bool isImmTy(ImmTy ImmT) const {
304 return isImm() && Imm.Type == ImmT;
305 }
306
isImmModifier() const307 bool isImmModifier() const {
308 return isImm() && Imm.Type != ImmTyNone;
309 }
310
isClampSI() const311 bool isClampSI() const { return isImmTy(ImmTyClampSI); }
isOModSI() const312 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
isDMask() const313 bool isDMask() const { return isImmTy(ImmTyDMask); }
isDim() const314 bool isDim() const { return isImmTy(ImmTyDim); }
isUNorm() const315 bool isUNorm() const { return isImmTy(ImmTyUNorm); }
isDA() const316 bool isDA() const { return isImmTy(ImmTyDA); }
isR128A16() const317 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
isLWE() const318 bool isLWE() const { return isImmTy(ImmTyLWE); }
isOff() const319 bool isOff() const { return isImmTy(ImmTyOff); }
isExpTgt() const320 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
isExpVM() const321 bool isExpVM() const { return isImmTy(ImmTyExpVM); }
isExpCompr() const322 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
isOffen() const323 bool isOffen() const { return isImmTy(ImmTyOffen); }
isIdxen() const324 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
isAddr64() const325 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
isOffset() const326 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
isOffset0() const327 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
isOffset1() const328 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
329
isFlatOffset() const330 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
isGDS() const331 bool isGDS() const { return isImmTy(ImmTyGDS); }
isLDS() const332 bool isLDS() const { return isImmTy(ImmTyLDS); }
isDLC() const333 bool isDLC() const { return isImmTy(ImmTyDLC); }
isGLC() const334 bool isGLC() const { return isImmTy(ImmTyGLC); }
isSLC() const335 bool isSLC() const { return isImmTy(ImmTySLC); }
isSWZ() const336 bool isSWZ() const { return isImmTy(ImmTySWZ); }
isTFE() const337 bool isTFE() const { return isImmTy(ImmTyTFE); }
isD16() const338 bool isD16() const { return isImmTy(ImmTyD16); }
isFORMAT() const339 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
isBankMask() const340 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
isRowMask() const341 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
isBoundCtrl() const342 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
isFI() const343 bool isFI() const { return isImmTy(ImmTyDppFi); }
isSDWADstSel() const344 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
isSDWASrc0Sel() const345 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
isSDWASrc1Sel() const346 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
isSDWADstUnused() const347 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
isInterpSlot() const348 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
isInterpAttr() const349 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
isAttrChan() const350 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
isOpSel() const351 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
isOpSelHi() const352 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
isNegLo() const353 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
isNegHi() const354 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
isHigh() const355 bool isHigh() const { return isImmTy(ImmTyHigh); }
356
isMod() const357 bool isMod() const {
358 return isClampSI() || isOModSI();
359 }
360
isRegOrImm() const361 bool isRegOrImm() const {
362 return isReg() || isImm();
363 }
364
365 bool isRegClass(unsigned RCID) const;
366
367 bool isInlineValue() const;
368
isRegOrInlineNoMods(unsigned RCID,MVT type) const369 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
370 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
371 }
372
isSCSrcB16() const373 bool isSCSrcB16() const {
374 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
375 }
376
isSCSrcV2B16() const377 bool isSCSrcV2B16() const {
378 return isSCSrcB16();
379 }
380
isSCSrcB32() const381 bool isSCSrcB32() const {
382 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
383 }
384
isSCSrcB64() const385 bool isSCSrcB64() const {
386 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
387 }
388
389 bool isBoolReg() const;
390
isSCSrcF16() const391 bool isSCSrcF16() const {
392 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
393 }
394
isSCSrcV2F16() const395 bool isSCSrcV2F16() const {
396 return isSCSrcF16();
397 }
398
isSCSrcF32() const399 bool isSCSrcF32() const {
400 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
401 }
402
isSCSrcF64() const403 bool isSCSrcF64() const {
404 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
405 }
406
isSSrcB32() const407 bool isSSrcB32() const {
408 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
409 }
410
isSSrcB16() const411 bool isSSrcB16() const {
412 return isSCSrcB16() || isLiteralImm(MVT::i16);
413 }
414
isSSrcV2B16() const415 bool isSSrcV2B16() const {
416 llvm_unreachable("cannot happen");
417 return isSSrcB16();
418 }
419
isSSrcB64() const420 bool isSSrcB64() const {
421 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
422 // See isVSrc64().
423 return isSCSrcB64() || isLiteralImm(MVT::i64);
424 }
425
isSSrcF32() const426 bool isSSrcF32() const {
427 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
428 }
429
isSSrcF64() const430 bool isSSrcF64() const {
431 return isSCSrcB64() || isLiteralImm(MVT::f64);
432 }
433
isSSrcF16() const434 bool isSSrcF16() const {
435 return isSCSrcB16() || isLiteralImm(MVT::f16);
436 }
437
isSSrcV2F16() const438 bool isSSrcV2F16() const {
439 llvm_unreachable("cannot happen");
440 return isSSrcF16();
441 }
442
isSSrcOrLdsB32() const443 bool isSSrcOrLdsB32() const {
444 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
445 isLiteralImm(MVT::i32) || isExpr();
446 }
447
isVCSrcB32() const448 bool isVCSrcB32() const {
449 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
450 }
451
isVCSrcB64() const452 bool isVCSrcB64() const {
453 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
454 }
455
isVCSrcB16() const456 bool isVCSrcB16() const {
457 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
458 }
459
isVCSrcV2B16() const460 bool isVCSrcV2B16() const {
461 return isVCSrcB16();
462 }
463
isVCSrcF32() const464 bool isVCSrcF32() const {
465 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
466 }
467
isVCSrcF64() const468 bool isVCSrcF64() const {
469 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
470 }
471
isVCSrcF16() const472 bool isVCSrcF16() const {
473 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
474 }
475
isVCSrcV2F16() const476 bool isVCSrcV2F16() const {
477 return isVCSrcF16();
478 }
479
isVSrcB32() const480 bool isVSrcB32() const {
481 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
482 }
483
isVSrcB64() const484 bool isVSrcB64() const {
485 return isVCSrcF64() || isLiteralImm(MVT::i64);
486 }
487
isVSrcB16() const488 bool isVSrcB16() const {
489 return isVCSrcF16() || isLiteralImm(MVT::i16);
490 }
491
isVSrcV2B16() const492 bool isVSrcV2B16() const {
493 return isVSrcB16() || isLiteralImm(MVT::v2i16);
494 }
495
isVSrcF32() const496 bool isVSrcF32() const {
497 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
498 }
499
isVSrcF64() const500 bool isVSrcF64() const {
501 return isVCSrcF64() || isLiteralImm(MVT::f64);
502 }
503
isVSrcF16() const504 bool isVSrcF16() const {
505 return isVCSrcF16() || isLiteralImm(MVT::f16);
506 }
507
isVSrcV2F16() const508 bool isVSrcV2F16() const {
509 return isVSrcF16() || isLiteralImm(MVT::v2f16);
510 }
511
isVISrcB32() const512 bool isVISrcB32() const {
513 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
514 }
515
isVISrcB16() const516 bool isVISrcB16() const {
517 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
518 }
519
isVISrcV2B16() const520 bool isVISrcV2B16() const {
521 return isVISrcB16();
522 }
523
isVISrcF32() const524 bool isVISrcF32() const {
525 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
526 }
527
isVISrcF16() const528 bool isVISrcF16() const {
529 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
530 }
531
isVISrcV2F16() const532 bool isVISrcV2F16() const {
533 return isVISrcF16() || isVISrcB32();
534 }
535
isAISrcB32() const536 bool isAISrcB32() const {
537 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
538 }
539
isAISrcB16() const540 bool isAISrcB16() const {
541 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
542 }
543
isAISrcV2B16() const544 bool isAISrcV2B16() const {
545 return isAISrcB16();
546 }
547
isAISrcF32() const548 bool isAISrcF32() const {
549 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
550 }
551
isAISrcF16() const552 bool isAISrcF16() const {
553 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
554 }
555
isAISrcV2F16() const556 bool isAISrcV2F16() const {
557 return isAISrcF16() || isAISrcB32();
558 }
559
isAISrc_128B32() const560 bool isAISrc_128B32() const {
561 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
562 }
563
isAISrc_128B16() const564 bool isAISrc_128B16() const {
565 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
566 }
567
isAISrc_128V2B16() const568 bool isAISrc_128V2B16() const {
569 return isAISrc_128B16();
570 }
571
isAISrc_128F32() const572 bool isAISrc_128F32() const {
573 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
574 }
575
isAISrc_128F16() const576 bool isAISrc_128F16() const {
577 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
578 }
579
isAISrc_128V2F16() const580 bool isAISrc_128V2F16() const {
581 return isAISrc_128F16() || isAISrc_128B32();
582 }
583
isAISrc_512B32() const584 bool isAISrc_512B32() const {
585 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
586 }
587
isAISrc_512B16() const588 bool isAISrc_512B16() const {
589 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
590 }
591
isAISrc_512V2B16() const592 bool isAISrc_512V2B16() const {
593 return isAISrc_512B16();
594 }
595
isAISrc_512F32() const596 bool isAISrc_512F32() const {
597 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
598 }
599
isAISrc_512F16() const600 bool isAISrc_512F16() const {
601 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
602 }
603
isAISrc_512V2F16() const604 bool isAISrc_512V2F16() const {
605 return isAISrc_512F16() || isAISrc_512B32();
606 }
607
isAISrc_1024B32() const608 bool isAISrc_1024B32() const {
609 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
610 }
611
isAISrc_1024B16() const612 bool isAISrc_1024B16() const {
613 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
614 }
615
isAISrc_1024V2B16() const616 bool isAISrc_1024V2B16() const {
617 return isAISrc_1024B16();
618 }
619
isAISrc_1024F32() const620 bool isAISrc_1024F32() const {
621 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
622 }
623
isAISrc_1024F16() const624 bool isAISrc_1024F16() const {
625 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
626 }
627
isAISrc_1024V2F16() const628 bool isAISrc_1024V2F16() const {
629 return isAISrc_1024F16() || isAISrc_1024B32();
630 }
631
isKImmFP32() const632 bool isKImmFP32() const {
633 return isLiteralImm(MVT::f32);
634 }
635
isKImmFP16() const636 bool isKImmFP16() const {
637 return isLiteralImm(MVT::f16);
638 }
639
isMem() const640 bool isMem() const override {
641 return false;
642 }
643
isExpr() const644 bool isExpr() const {
645 return Kind == Expression;
646 }
647
isSoppBrTarget() const648 bool isSoppBrTarget() const {
649 return isExpr() || isImm();
650 }
651
652 bool isSWaitCnt() const;
653 bool isHwreg() const;
654 bool isSendMsg() const;
655 bool isSwizzle() const;
656 bool isSMRDOffset8() const;
657 bool isSMRDOffset20() const;
658 bool isSMRDLiteralOffset() const;
659 bool isDPP8() const;
660 bool isDPPCtrl() const;
661 bool isBLGP() const;
662 bool isCBSZ() const;
663 bool isABID() const;
664 bool isGPRIdxMode() const;
665 bool isS16Imm() const;
666 bool isU16Imm() const;
667 bool isEndpgm() const;
668
getExpressionAsToken() const669 StringRef getExpressionAsToken() const {
670 assert(isExpr());
671 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
672 return S->getSymbol().getName();
673 }
674
getToken() const675 StringRef getToken() const {
676 assert(isToken());
677
678 if (Kind == Expression)
679 return getExpressionAsToken();
680
681 return StringRef(Tok.Data, Tok.Length);
682 }
683
getImm() const684 int64_t getImm() const {
685 assert(isImm());
686 return Imm.Val;
687 }
688
getImmTy() const689 ImmTy getImmTy() const {
690 assert(isImm());
691 return Imm.Type;
692 }
693
getReg() const694 unsigned getReg() const override {
695 assert(isRegKind());
696 return Reg.RegNo;
697 }
698
getStartLoc() const699 SMLoc getStartLoc() const override {
700 return StartLoc;
701 }
702
getEndLoc() const703 SMLoc getEndLoc() const override {
704 return EndLoc;
705 }
706
getLocRange() const707 SMRange getLocRange() const {
708 return SMRange(StartLoc, EndLoc);
709 }
710
getModifiers() const711 Modifiers getModifiers() const {
712 assert(isRegKind() || isImmTy(ImmTyNone));
713 return isRegKind() ? Reg.Mods : Imm.Mods;
714 }
715
setModifiers(Modifiers Mods)716 void setModifiers(Modifiers Mods) {
717 assert(isRegKind() || isImmTy(ImmTyNone));
718 if (isRegKind())
719 Reg.Mods = Mods;
720 else
721 Imm.Mods = Mods;
722 }
723
hasModifiers() const724 bool hasModifiers() const {
725 return getModifiers().hasModifiers();
726 }
727
hasFPModifiers() const728 bool hasFPModifiers() const {
729 return getModifiers().hasFPModifiers();
730 }
731
hasIntModifiers() const732 bool hasIntModifiers() const {
733 return getModifiers().hasIntModifiers();
734 }
735
736 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
737
738 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
739
740 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
741
742 template <unsigned Bitwidth>
743 void addKImmFPOperands(MCInst &Inst, unsigned N) const;
744
addKImmFP16Operands(MCInst & Inst,unsigned N) const745 void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
746 addKImmFPOperands<16>(Inst, N);
747 }
748
addKImmFP32Operands(MCInst & Inst,unsigned N) const749 void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
750 addKImmFPOperands<32>(Inst, N);
751 }
752
753 void addRegOperands(MCInst &Inst, unsigned N) const;
754
addBoolRegOperands(MCInst & Inst,unsigned N) const755 void addBoolRegOperands(MCInst &Inst, unsigned N) const {
756 addRegOperands(Inst, N);
757 }
758
addRegOrImmOperands(MCInst & Inst,unsigned N) const759 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
760 if (isRegKind())
761 addRegOperands(Inst, N);
762 else if (isExpr())
763 Inst.addOperand(MCOperand::createExpr(Expr));
764 else
765 addImmOperands(Inst, N);
766 }
767
addRegOrImmWithInputModsOperands(MCInst & Inst,unsigned N) const768 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
769 Modifiers Mods = getModifiers();
770 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
771 if (isRegKind()) {
772 addRegOperands(Inst, N);
773 } else {
774 addImmOperands(Inst, N, false);
775 }
776 }
777
addRegOrImmWithFPInputModsOperands(MCInst & Inst,unsigned N) const778 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
779 assert(!hasIntModifiers());
780 addRegOrImmWithInputModsOperands(Inst, N);
781 }
782
addRegOrImmWithIntInputModsOperands(MCInst & Inst,unsigned N) const783 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
784 assert(!hasFPModifiers());
785 addRegOrImmWithInputModsOperands(Inst, N);
786 }
787
addRegWithInputModsOperands(MCInst & Inst,unsigned N) const788 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
789 Modifiers Mods = getModifiers();
790 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
791 assert(isRegKind());
792 addRegOperands(Inst, N);
793 }
794
addRegWithFPInputModsOperands(MCInst & Inst,unsigned N) const795 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
796 assert(!hasIntModifiers());
797 addRegWithInputModsOperands(Inst, N);
798 }
799
addRegWithIntInputModsOperands(MCInst & Inst,unsigned N) const800 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
801 assert(!hasFPModifiers());
802 addRegWithInputModsOperands(Inst, N);
803 }
804
addSoppBrTargetOperands(MCInst & Inst,unsigned N) const805 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
806 if (isImm())
807 addImmOperands(Inst, N);
808 else {
809 assert(isExpr());
810 Inst.addOperand(MCOperand::createExpr(Expr));
811 }
812 }
813
printImmTy(raw_ostream & OS,ImmTy Type)814 static void printImmTy(raw_ostream& OS, ImmTy Type) {
815 switch (Type) {
816 case ImmTyNone: OS << "None"; break;
817 case ImmTyGDS: OS << "GDS"; break;
818 case ImmTyLDS: OS << "LDS"; break;
819 case ImmTyOffen: OS << "Offen"; break;
820 case ImmTyIdxen: OS << "Idxen"; break;
821 case ImmTyAddr64: OS << "Addr64"; break;
822 case ImmTyOffset: OS << "Offset"; break;
823 case ImmTyInstOffset: OS << "InstOffset"; break;
824 case ImmTyOffset0: OS << "Offset0"; break;
825 case ImmTyOffset1: OS << "Offset1"; break;
826 case ImmTyDLC: OS << "DLC"; break;
827 case ImmTyGLC: OS << "GLC"; break;
828 case ImmTySLC: OS << "SLC"; break;
829 case ImmTySWZ: OS << "SWZ"; break;
830 case ImmTyTFE: OS << "TFE"; break;
831 case ImmTyD16: OS << "D16"; break;
832 case ImmTyFORMAT: OS << "FORMAT"; break;
833 case ImmTyClampSI: OS << "ClampSI"; break;
834 case ImmTyOModSI: OS << "OModSI"; break;
835 case ImmTyDPP8: OS << "DPP8"; break;
836 case ImmTyDppCtrl: OS << "DppCtrl"; break;
837 case ImmTyDppRowMask: OS << "DppRowMask"; break;
838 case ImmTyDppBankMask: OS << "DppBankMask"; break;
839 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
840 case ImmTyDppFi: OS << "FI"; break;
841 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
842 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
843 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
844 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
845 case ImmTyDMask: OS << "DMask"; break;
846 case ImmTyDim: OS << "Dim"; break;
847 case ImmTyUNorm: OS << "UNorm"; break;
848 case ImmTyDA: OS << "DA"; break;
849 case ImmTyR128A16: OS << "R128A16"; break;
850 case ImmTyLWE: OS << "LWE"; break;
851 case ImmTyOff: OS << "Off"; break;
852 case ImmTyExpTgt: OS << "ExpTgt"; break;
853 case ImmTyExpCompr: OS << "ExpCompr"; break;
854 case ImmTyExpVM: OS << "ExpVM"; break;
855 case ImmTyHwreg: OS << "Hwreg"; break;
856 case ImmTySendMsg: OS << "SendMsg"; break;
857 case ImmTyInterpSlot: OS << "InterpSlot"; break;
858 case ImmTyInterpAttr: OS << "InterpAttr"; break;
859 case ImmTyAttrChan: OS << "AttrChan"; break;
860 case ImmTyOpSel: OS << "OpSel"; break;
861 case ImmTyOpSelHi: OS << "OpSelHi"; break;
862 case ImmTyNegLo: OS << "NegLo"; break;
863 case ImmTyNegHi: OS << "NegHi"; break;
864 case ImmTySwizzle: OS << "Swizzle"; break;
865 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
866 case ImmTyHigh: OS << "High"; break;
867 case ImmTyBLGP: OS << "BLGP"; break;
868 case ImmTyCBSZ: OS << "CBSZ"; break;
869 case ImmTyABID: OS << "ABID"; break;
870 case ImmTyEndpgm: OS << "Endpgm"; break;
871 }
872 }
873
print(raw_ostream & OS) const874 void print(raw_ostream &OS) const override {
875 switch (Kind) {
876 case Register:
877 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
878 break;
879 case Immediate:
880 OS << '<' << getImm();
881 if (getImmTy() != ImmTyNone) {
882 OS << " type: "; printImmTy(OS, getImmTy());
883 }
884 OS << " mods: " << Imm.Mods << '>';
885 break;
886 case Token:
887 OS << '\'' << getToken() << '\'';
888 break;
889 case Expression:
890 OS << "<expr " << *Expr << '>';
891 break;
892 }
893 }
894
CreateImm(const AMDGPUAsmParser * AsmParser,int64_t Val,SMLoc Loc,ImmTy Type=ImmTyNone,bool IsFPImm=false)895 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
896 int64_t Val, SMLoc Loc,
897 ImmTy Type = ImmTyNone,
898 bool IsFPImm = false) {
899 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
900 Op->Imm.Val = Val;
901 Op->Imm.IsFPImm = IsFPImm;
902 Op->Imm.Type = Type;
903 Op->Imm.Mods = Modifiers();
904 Op->StartLoc = Loc;
905 Op->EndLoc = Loc;
906 return Op;
907 }
908
CreateToken(const AMDGPUAsmParser * AsmParser,StringRef Str,SMLoc Loc,bool HasExplicitEncodingSize=true)909 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
910 StringRef Str, SMLoc Loc,
911 bool HasExplicitEncodingSize = true) {
912 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
913 Res->Tok.Data = Str.data();
914 Res->Tok.Length = Str.size();
915 Res->StartLoc = Loc;
916 Res->EndLoc = Loc;
917 return Res;
918 }
919
CreateReg(const AMDGPUAsmParser * AsmParser,unsigned RegNo,SMLoc S,SMLoc E)920 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
921 unsigned RegNo, SMLoc S,
922 SMLoc E) {
923 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
924 Op->Reg.RegNo = RegNo;
925 Op->Reg.Mods = Modifiers();
926 Op->StartLoc = S;
927 Op->EndLoc = E;
928 return Op;
929 }
930
CreateExpr(const AMDGPUAsmParser * AsmParser,const class MCExpr * Expr,SMLoc S)931 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
932 const class MCExpr *Expr, SMLoc S) {
933 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
934 Op->Expr = Expr;
935 Op->StartLoc = S;
936 Op->EndLoc = S;
937 return Op;
938 }
939 };
940
operator <<(raw_ostream & OS,AMDGPUOperand::Modifiers Mods)941 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
942 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
943 return OS;
944 }
945
946 //===----------------------------------------------------------------------===//
947 // AsmParser
948 //===----------------------------------------------------------------------===//
949
950 // Holds info related to the current kernel, e.g. count of SGPRs used.
951 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
952 // .amdgpu_hsa_kernel or at EOF.
953 class KernelScopeInfo {
954 int SgprIndexUnusedMin = -1;
955 int VgprIndexUnusedMin = -1;
956 MCContext *Ctx = nullptr;
957
usesSgprAt(int i)958 void usesSgprAt(int i) {
959 if (i >= SgprIndexUnusedMin) {
960 SgprIndexUnusedMin = ++i;
961 if (Ctx) {
962 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
963 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
964 }
965 }
966 }
967
usesVgprAt(int i)968 void usesVgprAt(int i) {
969 if (i >= VgprIndexUnusedMin) {
970 VgprIndexUnusedMin = ++i;
971 if (Ctx) {
972 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
973 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
974 }
975 }
976 }
977
978 public:
979 KernelScopeInfo() = default;
980
initialize(MCContext & Context)981 void initialize(MCContext &Context) {
982 Ctx = &Context;
983 usesSgprAt(SgprIndexUnusedMin = -1);
984 usesVgprAt(VgprIndexUnusedMin = -1);
985 }
986
usesRegister(RegisterKind RegKind,unsigned DwordRegIndex,unsigned RegWidth)987 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
988 switch (RegKind) {
989 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
990 case IS_AGPR: // fall through
991 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
992 default: break;
993 }
994 }
995 };
996
997 class AMDGPUAsmParser : public MCTargetAsmParser {
998 MCAsmParser &Parser;
999
1000 // Number of extra operands parsed after the first optional operand.
1001 // This may be necessary to skip hardcoded mandatory operands.
1002 static const unsigned MAX_OPR_LOOKAHEAD = 8;
1003
1004 unsigned ForcedEncodingSize = 0;
1005 bool ForcedDPP = false;
1006 bool ForcedSDWA = false;
1007 KernelScopeInfo KernelScope;
1008
1009 /// @name Auto-generated Match Functions
1010 /// {
1011
1012 #define GET_ASSEMBLER_HEADER
1013 #include "AMDGPUGenAsmMatcher.inc"
1014
1015 /// }
1016
1017 private:
1018 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1019 bool OutOfRangeError(SMRange Range);
1020 /// Calculate VGPR/SGPR blocks required for given target, reserved
1021 /// registers, and user-specified NextFreeXGPR values.
1022 ///
1023 /// \param Features [in] Target features, used for bug corrections.
1024 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1025 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1026 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1027 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1028 /// descriptor field, if valid.
1029 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1030 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1031 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1032 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1033 /// \param VGPRBlocks [out] Result VGPR block count.
1034 /// \param SGPRBlocks [out] Result SGPR block count.
1035 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1036 bool FlatScrUsed, bool XNACKUsed,
1037 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1038 SMRange VGPRRange, unsigned NextFreeSGPR,
1039 SMRange SGPRRange, unsigned &VGPRBlocks,
1040 unsigned &SGPRBlocks);
1041 bool ParseDirectiveAMDGCNTarget();
1042 bool ParseDirectiveAMDHSAKernel();
1043 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1044 bool ParseDirectiveHSACodeObjectVersion();
1045 bool ParseDirectiveHSACodeObjectISA();
1046 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1047 bool ParseDirectiveAMDKernelCodeT();
1048 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1049 bool ParseDirectiveAMDGPUHsaKernel();
1050
1051 bool ParseDirectiveISAVersion();
1052 bool ParseDirectiveHSAMetadata();
1053 bool ParseDirectivePALMetadataBegin();
1054 bool ParseDirectivePALMetadata();
1055 bool ParseDirectiveAMDGPULDS();
1056
1057 /// Common code to parse out a block of text (typically YAML) between start and
1058 /// end directives.
1059 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1060 const char *AssemblerDirectiveEnd,
1061 std::string &CollectString);
1062
1063 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1064 RegisterKind RegKind, unsigned Reg1);
1065 bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
1066 unsigned& RegNum, unsigned& RegWidth);
1067 unsigned ParseRegularReg(RegisterKind &RegKind,
1068 unsigned &RegNum,
1069 unsigned &RegWidth);
1070 unsigned ParseSpecialReg(RegisterKind &RegKind,
1071 unsigned &RegNum,
1072 unsigned &RegWidth);
1073 unsigned ParseRegList(RegisterKind &RegKind,
1074 unsigned &RegNum,
1075 unsigned &RegWidth);
1076 bool ParseRegRange(unsigned& Num, unsigned& Width);
1077 unsigned getRegularReg(RegisterKind RegKind,
1078 unsigned RegNum,
1079 unsigned RegWidth);
1080
1081 bool isRegister();
1082 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1083 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1084 void initializeGprCountSymbol(RegisterKind RegKind);
1085 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1086 unsigned RegWidth);
1087 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1088 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1089 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1090 bool IsGdsHardcoded);
1091
1092 public:
1093 enum AMDGPUMatchResultTy {
1094 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1095 };
1096 enum OperandMode {
1097 OperandMode_Default,
1098 OperandMode_NSA,
1099 };
1100
1101 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1102
AMDGPUAsmParser(const MCSubtargetInfo & STI,MCAsmParser & _Parser,const MCInstrInfo & MII,const MCTargetOptions & Options)1103 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1104 const MCInstrInfo &MII,
1105 const MCTargetOptions &Options)
1106 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1107 MCAsmParserExtension::Initialize(Parser);
1108
1109 if (getFeatureBits().none()) {
1110 // Set default features.
1111 copySTI().ToggleFeature("southern-islands");
1112 }
1113
1114 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1115
1116 {
1117 // TODO: make those pre-defined variables read-only.
1118 // Currently there is none suitable machinery in the core llvm-mc for this.
1119 // MCSymbol::isRedefinable is intended for another purpose, and
1120 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1121 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1122 MCContext &Ctx = getContext();
1123 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1124 MCSymbol *Sym =
1125 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1126 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1127 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1128 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1129 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1130 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1131 } else {
1132 MCSymbol *Sym =
1133 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1134 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1135 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1136 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1137 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1138 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1139 }
1140 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1141 initializeGprCountSymbol(IS_VGPR);
1142 initializeGprCountSymbol(IS_SGPR);
1143 } else
1144 KernelScope.initialize(getContext());
1145 }
1146 }
1147
hasXNACK() const1148 bool hasXNACK() const {
1149 return AMDGPU::hasXNACK(getSTI());
1150 }
1151
hasMIMG_R128() const1152 bool hasMIMG_R128() const {
1153 return AMDGPU::hasMIMG_R128(getSTI());
1154 }
1155
hasPackedD16() const1156 bool hasPackedD16() const {
1157 return AMDGPU::hasPackedD16(getSTI());
1158 }
1159
isSI() const1160 bool isSI() const {
1161 return AMDGPU::isSI(getSTI());
1162 }
1163
isCI() const1164 bool isCI() const {
1165 return AMDGPU::isCI(getSTI());
1166 }
1167
isVI() const1168 bool isVI() const {
1169 return AMDGPU::isVI(getSTI());
1170 }
1171
isGFX9() const1172 bool isGFX9() const {
1173 return AMDGPU::isGFX9(getSTI());
1174 }
1175
isGFX10() const1176 bool isGFX10() const {
1177 return AMDGPU::isGFX10(getSTI());
1178 }
1179
hasInv2PiInlineImm() const1180 bool hasInv2PiInlineImm() const {
1181 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1182 }
1183
hasFlatOffsets() const1184 bool hasFlatOffsets() const {
1185 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1186 }
1187
hasSGPR102_SGPR103() const1188 bool hasSGPR102_SGPR103() const {
1189 return !isVI() && !isGFX9();
1190 }
1191
hasSGPR104_SGPR105() const1192 bool hasSGPR104_SGPR105() const {
1193 return isGFX10();
1194 }
1195
hasIntClamp() const1196 bool hasIntClamp() const {
1197 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1198 }
1199
getTargetStreamer()1200 AMDGPUTargetStreamer &getTargetStreamer() {
1201 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1202 return static_cast<AMDGPUTargetStreamer &>(TS);
1203 }
1204
getMRI() const1205 const MCRegisterInfo *getMRI() const {
1206 // We need this const_cast because for some reason getContext() is not const
1207 // in MCAsmParser.
1208 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1209 }
1210
getMII() const1211 const MCInstrInfo *getMII() const {
1212 return &MII;
1213 }
1214
getFeatureBits() const1215 const FeatureBitset &getFeatureBits() const {
1216 return getSTI().getFeatureBits();
1217 }
1218
setForcedEncodingSize(unsigned Size)1219 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
setForcedDPP(bool ForceDPP_)1220 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
setForcedSDWA(bool ForceSDWA_)1221 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1222
getForcedEncodingSize() const1223 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
isForcedVOP3() const1224 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
isForcedDPP() const1225 bool isForcedDPP() const { return ForcedDPP; }
isForcedSDWA() const1226 bool isForcedSDWA() const { return ForcedSDWA; }
1227 ArrayRef<unsigned> getMatchedVariants() const;
1228
1229 std::unique_ptr<AMDGPUOperand> parseRegister();
1230 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1231 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1232 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1233 unsigned Kind) override;
1234 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1235 OperandVector &Operands, MCStreamer &Out,
1236 uint64_t &ErrorInfo,
1237 bool MatchingInlineAsm) override;
1238 bool ParseDirective(AsmToken DirectiveID) override;
1239 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1240 OperandMode Mode = OperandMode_Default);
1241 StringRef parseMnemonicSuffix(StringRef Name);
1242 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1243 SMLoc NameLoc, OperandVector &Operands) override;
1244 //bool ProcessInstruction(MCInst &Inst);
1245
1246 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1247
1248 OperandMatchResultTy
1249 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1250 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1251 bool (*ConvertResult)(int64_t &) = nullptr);
1252
1253 OperandMatchResultTy
1254 parseOperandArrayWithPrefix(const char *Prefix,
1255 OperandVector &Operands,
1256 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1257 bool (*ConvertResult)(int64_t&) = nullptr);
1258
1259 OperandMatchResultTy
1260 parseNamedBit(const char *Name, OperandVector &Operands,
1261 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1262 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1263 StringRef &Value);
1264
1265 bool isModifier();
1266 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1267 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1268 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1269 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1270 bool parseSP3NegModifier();
1271 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1272 OperandMatchResultTy parseReg(OperandVector &Operands);
1273 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1274 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1275 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1276 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1277 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1278 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1279 OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1280
1281 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
cvtDS(MCInst & Inst,const OperandVector & Operands)1282 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
cvtDSGds(MCInst & Inst,const OperandVector & Operands)1283 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1284 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1285
1286 bool parseCnt(int64_t &IntVal);
1287 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1288 OperandMatchResultTy parseHwreg(OperandVector &Operands);
1289
1290 private:
1291 struct OperandInfoTy {
1292 int64_t Id;
1293 bool IsSymbolic = false;
1294 bool IsDefined = false;
1295
OperandInfoTy__anonb04631a80111::AMDGPUAsmParser::OperandInfoTy1296 OperandInfoTy(int64_t Id_) : Id(Id_) {}
1297 };
1298
1299 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1300 bool validateSendMsg(const OperandInfoTy &Msg,
1301 const OperandInfoTy &Op,
1302 const OperandInfoTy &Stream,
1303 const SMLoc Loc);
1304
1305 bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1306 bool validateHwreg(const OperandInfoTy &HwReg,
1307 const int64_t Offset,
1308 const int64_t Width,
1309 const SMLoc Loc);
1310
1311 void errorExpTgt();
1312 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1313 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1314
1315 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1316 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1317 bool validateSOPLiteral(const MCInst &Inst) const;
1318 bool validateConstantBusLimitations(const MCInst &Inst);
1319 bool validateEarlyClobberLimitations(const MCInst &Inst);
1320 bool validateIntClampSupported(const MCInst &Inst);
1321 bool validateMIMGAtomicDMask(const MCInst &Inst);
1322 bool validateMIMGGatherDMask(const MCInst &Inst);
1323 bool validateMovrels(const MCInst &Inst);
1324 bool validateMIMGDataSize(const MCInst &Inst);
1325 bool validateMIMGAddrSize(const MCInst &Inst);
1326 bool validateMIMGD16(const MCInst &Inst);
1327 bool validateMIMGDim(const MCInst &Inst);
1328 bool validateLdsDirect(const MCInst &Inst);
1329 bool validateOpSel(const MCInst &Inst);
1330 bool validateVccOperand(unsigned Reg) const;
1331 bool validateVOP3Literal(const MCInst &Inst) const;
1332 unsigned getConstantBusLimit(unsigned Opcode) const;
1333 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1334 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1335 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1336
1337 bool isId(const StringRef Id) const;
1338 bool isId(const AsmToken &Token, const StringRef Id) const;
1339 bool isToken(const AsmToken::TokenKind Kind) const;
1340 bool trySkipId(const StringRef Id);
1341 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1342 bool trySkipToken(const AsmToken::TokenKind Kind);
1343 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1344 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1345 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1346 AsmToken::TokenKind getTokenKind() const;
1347 bool parseExpr(int64_t &Imm);
1348 bool parseExpr(OperandVector &Operands);
1349 StringRef getTokenStr() const;
1350 AsmToken peekToken();
1351 AsmToken getToken() const;
1352 SMLoc getLoc() const;
1353 void lex();
1354
1355 public:
1356 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1357 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1358
1359 OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1360 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1361 OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1362 OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1363 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1364 OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1365
1366 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1367 const unsigned MinVal,
1368 const unsigned MaxVal,
1369 const StringRef ErrMsg);
1370 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1371 bool parseSwizzleOffset(int64_t &Imm);
1372 bool parseSwizzleMacro(int64_t &Imm);
1373 bool parseSwizzleQuadPerm(int64_t &Imm);
1374 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1375 bool parseSwizzleBroadcast(int64_t &Imm);
1376 bool parseSwizzleSwap(int64_t &Imm);
1377 bool parseSwizzleReverse(int64_t &Imm);
1378
1379 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1380 int64_t parseGPRIdxMacro();
1381
cvtMubuf(MCInst & Inst,const OperandVector & Operands)1382 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
cvtMubufAtomic(MCInst & Inst,const OperandVector & Operands)1383 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
cvtMubufAtomicReturn(MCInst & Inst,const OperandVector & Operands)1384 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
cvtMubufLds(MCInst & Inst,const OperandVector & Operands)1385 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1386 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1387
1388 AMDGPUOperand::Ptr defaultDLC() const;
1389 AMDGPUOperand::Ptr defaultGLC() const;
1390 AMDGPUOperand::Ptr defaultSLC() const;
1391
1392 AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1393 AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1394 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1395 AMDGPUOperand::Ptr defaultFlatOffset() const;
1396
1397 OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1398
1399 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1400 OptionalImmIndexMap &OptionalIdx);
1401 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1402 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1403 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1404
1405 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1406
1407 void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1408 bool IsAtomic = false);
1409 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1410
1411 OperandMatchResultTy parseDim(OperandVector &Operands);
1412 OperandMatchResultTy parseDPP8(OperandVector &Operands);
1413 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1414 AMDGPUOperand::Ptr defaultRowMask() const;
1415 AMDGPUOperand::Ptr defaultBankMask() const;
1416 AMDGPUOperand::Ptr defaultBoundCtrl() const;
1417 AMDGPUOperand::Ptr defaultFI() const;
1418 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
cvtDPP8(MCInst & Inst,const OperandVector & Operands)1419 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1420
1421 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1422 AMDGPUOperand::ImmTy Type);
1423 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1424 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1425 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1426 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1427 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1428 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1429 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1430 uint64_t BasicInstType,
1431 bool SkipDstVcc = false,
1432 bool SkipSrcVcc = false);
1433
1434 AMDGPUOperand::Ptr defaultBLGP() const;
1435 AMDGPUOperand::Ptr defaultCBSZ() const;
1436 AMDGPUOperand::Ptr defaultABID() const;
1437
1438 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1439 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1440 };
1441
1442 struct OptionalOperand {
1443 const char *Name;
1444 AMDGPUOperand::ImmTy Type;
1445 bool IsBit;
1446 bool (*ConvertResult)(int64_t&);
1447 };
1448
1449 } // end anonymous namespace
1450
1451 // May be called with integer type with equivalent bitwidth.
getFltSemantics(unsigned Size)1452 static const fltSemantics *getFltSemantics(unsigned Size) {
1453 switch (Size) {
1454 case 4:
1455 return &APFloat::IEEEsingle();
1456 case 8:
1457 return &APFloat::IEEEdouble();
1458 case 2:
1459 return &APFloat::IEEEhalf();
1460 default:
1461 llvm_unreachable("unsupported fp type");
1462 }
1463 }
1464
getFltSemantics(MVT VT)1465 static const fltSemantics *getFltSemantics(MVT VT) {
1466 return getFltSemantics(VT.getSizeInBits() / 8);
1467 }
1468
getOpFltSemantics(uint8_t OperandType)1469 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1470 switch (OperandType) {
1471 case AMDGPU::OPERAND_REG_IMM_INT32:
1472 case AMDGPU::OPERAND_REG_IMM_FP32:
1473 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1474 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1475 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1476 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1477 return &APFloat::IEEEsingle();
1478 case AMDGPU::OPERAND_REG_IMM_INT64:
1479 case AMDGPU::OPERAND_REG_IMM_FP64:
1480 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1481 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1482 return &APFloat::IEEEdouble();
1483 case AMDGPU::OPERAND_REG_IMM_INT16:
1484 case AMDGPU::OPERAND_REG_IMM_FP16:
1485 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1486 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1487 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1488 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1489 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1490 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1491 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1492 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1493 case AMDGPU::OPERAND_REG_IMM_V2INT16:
1494 case AMDGPU::OPERAND_REG_IMM_V2FP16:
1495 return &APFloat::IEEEhalf();
1496 default:
1497 llvm_unreachable("unsupported fp type");
1498 }
1499 }
1500
1501 //===----------------------------------------------------------------------===//
1502 // Operand
1503 //===----------------------------------------------------------------------===//
1504
canLosslesslyConvertToFPType(APFloat & FPLiteral,MVT VT)1505 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1506 bool Lost;
1507
1508 // Convert literal to single precision
1509 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1510 APFloat::rmNearestTiesToEven,
1511 &Lost);
1512 // We allow precision lost but not overflow or underflow
1513 if (Status != APFloat::opOK &&
1514 Lost &&
1515 ((Status & APFloat::opOverflow) != 0 ||
1516 (Status & APFloat::opUnderflow) != 0)) {
1517 return false;
1518 }
1519
1520 return true;
1521 }
1522
isSafeTruncation(int64_t Val,unsigned Size)1523 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1524 return isUIntN(Size, Val) || isIntN(Size, Val);
1525 }
1526
isInlinableImm(MVT type) const1527 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1528
1529 // This is a hack to enable named inline values like
1530 // shared_base with both 32-bit and 64-bit operands.
1531 // Note that these values are defined as
1532 // 32-bit operands only.
1533 if (isInlineValue()) {
1534 return true;
1535 }
1536
1537 if (!isImmTy(ImmTyNone)) {
1538 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1539 return false;
1540 }
1541 // TODO: We should avoid using host float here. It would be better to
1542 // check the float bit values which is what a few other places do.
1543 // We've had bot failures before due to weird NaN support on mips hosts.
1544
1545 APInt Literal(64, Imm.Val);
1546
1547 if (Imm.IsFPImm) { // We got fp literal token
1548 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1549 return AMDGPU::isInlinableLiteral64(Imm.Val,
1550 AsmParser->hasInv2PiInlineImm());
1551 }
1552
1553 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1554 if (!canLosslesslyConvertToFPType(FPLiteral, type))
1555 return false;
1556
1557 if (type.getScalarSizeInBits() == 16) {
1558 return AMDGPU::isInlinableLiteral16(
1559 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1560 AsmParser->hasInv2PiInlineImm());
1561 }
1562
1563 // Check if single precision literal is inlinable
1564 return AMDGPU::isInlinableLiteral32(
1565 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1566 AsmParser->hasInv2PiInlineImm());
1567 }
1568
1569 // We got int literal token.
1570 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1571 return AMDGPU::isInlinableLiteral64(Imm.Val,
1572 AsmParser->hasInv2PiInlineImm());
1573 }
1574
1575 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1576 return false;
1577 }
1578
1579 if (type.getScalarSizeInBits() == 16) {
1580 return AMDGPU::isInlinableLiteral16(
1581 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1582 AsmParser->hasInv2PiInlineImm());
1583 }
1584
1585 return AMDGPU::isInlinableLiteral32(
1586 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1587 AsmParser->hasInv2PiInlineImm());
1588 }
1589
isLiteralImm(MVT type) const1590 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1591 // Check that this immediate can be added as literal
1592 if (!isImmTy(ImmTyNone)) {
1593 return false;
1594 }
1595
1596 if (!Imm.IsFPImm) {
1597 // We got int literal token.
1598
1599 if (type == MVT::f64 && hasFPModifiers()) {
1600 // Cannot apply fp modifiers to int literals preserving the same semantics
1601 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1602 // disable these cases.
1603 return false;
1604 }
1605
1606 unsigned Size = type.getSizeInBits();
1607 if (Size == 64)
1608 Size = 32;
1609
1610 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1611 // types.
1612 return isSafeTruncation(Imm.Val, Size);
1613 }
1614
1615 // We got fp literal token
1616 if (type == MVT::f64) { // Expected 64-bit fp operand
1617 // We would set low 64-bits of literal to zeroes but we accept this literals
1618 return true;
1619 }
1620
1621 if (type == MVT::i64) { // Expected 64-bit int operand
1622 // We don't allow fp literals in 64-bit integer instructions. It is
1623 // unclear how we should encode them.
1624 return false;
1625 }
1626
1627 // We allow fp literals with f16x2 operands assuming that the specified
1628 // literal goes into the lower half and the upper half is zero. We also
1629 // require that the literal may be losslesly converted to f16.
1630 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1631 (type == MVT::v2i16)? MVT::i16 : type;
1632
1633 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1634 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1635 }
1636
isRegClass(unsigned RCID) const1637 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1638 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1639 }
1640
isSDWAOperand(MVT type) const1641 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1642 if (AsmParser->isVI())
1643 return isVReg32();
1644 else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1645 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1646 else
1647 return false;
1648 }
1649
isSDWAFP16Operand() const1650 bool AMDGPUOperand::isSDWAFP16Operand() const {
1651 return isSDWAOperand(MVT::f16);
1652 }
1653
isSDWAFP32Operand() const1654 bool AMDGPUOperand::isSDWAFP32Operand() const {
1655 return isSDWAOperand(MVT::f32);
1656 }
1657
isSDWAInt16Operand() const1658 bool AMDGPUOperand::isSDWAInt16Operand() const {
1659 return isSDWAOperand(MVT::i16);
1660 }
1661
isSDWAInt32Operand() const1662 bool AMDGPUOperand::isSDWAInt32Operand() const {
1663 return isSDWAOperand(MVT::i32);
1664 }
1665
isBoolReg() const1666 bool AMDGPUOperand::isBoolReg() const {
1667 return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1668 (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1669 }
1670
applyInputFPModifiers(uint64_t Val,unsigned Size) const1671 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1672 {
1673 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1674 assert(Size == 2 || Size == 4 || Size == 8);
1675
1676 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1677
1678 if (Imm.Mods.Abs) {
1679 Val &= ~FpSignMask;
1680 }
1681 if (Imm.Mods.Neg) {
1682 Val ^= FpSignMask;
1683 }
1684
1685 return Val;
1686 }
1687
addImmOperands(MCInst & Inst,unsigned N,bool ApplyModifiers) const1688 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1689 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1690 Inst.getNumOperands())) {
1691 addLiteralImmOperand(Inst, Imm.Val,
1692 ApplyModifiers &
1693 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1694 } else {
1695 assert(!isImmTy(ImmTyNone) || !hasModifiers());
1696 Inst.addOperand(MCOperand::createImm(Imm.Val));
1697 }
1698 }
1699
addLiteralImmOperand(MCInst & Inst,int64_t Val,bool ApplyModifiers) const1700 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1701 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1702 auto OpNum = Inst.getNumOperands();
1703 // Check that this operand accepts literals
1704 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1705
1706 if (ApplyModifiers) {
1707 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1708 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1709 Val = applyInputFPModifiers(Val, Size);
1710 }
1711
1712 APInt Literal(64, Val);
1713 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1714
1715 if (Imm.IsFPImm) { // We got fp literal token
1716 switch (OpTy) {
1717 case AMDGPU::OPERAND_REG_IMM_INT64:
1718 case AMDGPU::OPERAND_REG_IMM_FP64:
1719 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1720 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1721 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1722 AsmParser->hasInv2PiInlineImm())) {
1723 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1724 return;
1725 }
1726
1727 // Non-inlineable
1728 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1729 // For fp operands we check if low 32 bits are zeros
1730 if (Literal.getLoBits(32) != 0) {
1731 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1732 "Can't encode literal as exact 64-bit floating-point operand. "
1733 "Low 32-bits will be set to zero");
1734 }
1735
1736 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1737 return;
1738 }
1739
1740 // We don't allow fp literals in 64-bit integer instructions. It is
1741 // unclear how we should encode them. This case should be checked earlier
1742 // in predicate methods (isLiteralImm())
1743 llvm_unreachable("fp literal in 64-bit integer instruction.");
1744
1745 case AMDGPU::OPERAND_REG_IMM_INT32:
1746 case AMDGPU::OPERAND_REG_IMM_FP32:
1747 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1748 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1749 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1750 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1751 case AMDGPU::OPERAND_REG_IMM_INT16:
1752 case AMDGPU::OPERAND_REG_IMM_FP16:
1753 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1754 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1755 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1756 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1757 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1758 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1759 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1760 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1761 case AMDGPU::OPERAND_REG_IMM_V2INT16:
1762 case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1763 bool lost;
1764 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1765 // Convert literal to single precision
1766 FPLiteral.convert(*getOpFltSemantics(OpTy),
1767 APFloat::rmNearestTiesToEven, &lost);
1768 // We allow precision lost but not overflow or underflow. This should be
1769 // checked earlier in isLiteralImm()
1770
1771 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1772 Inst.addOperand(MCOperand::createImm(ImmVal));
1773 return;
1774 }
1775 default:
1776 llvm_unreachable("invalid operand size");
1777 }
1778
1779 return;
1780 }
1781
1782 // We got int literal token.
1783 // Only sign extend inline immediates.
1784 switch (OpTy) {
1785 case AMDGPU::OPERAND_REG_IMM_INT32:
1786 case AMDGPU::OPERAND_REG_IMM_FP32:
1787 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1788 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1789 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1790 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1791 case AMDGPU::OPERAND_REG_IMM_V2INT16:
1792 case AMDGPU::OPERAND_REG_IMM_V2FP16:
1793 if (isSafeTruncation(Val, 32) &&
1794 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1795 AsmParser->hasInv2PiInlineImm())) {
1796 Inst.addOperand(MCOperand::createImm(Val));
1797 return;
1798 }
1799
1800 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1801 return;
1802
1803 case AMDGPU::OPERAND_REG_IMM_INT64:
1804 case AMDGPU::OPERAND_REG_IMM_FP64:
1805 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1806 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1807 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1808 Inst.addOperand(MCOperand::createImm(Val));
1809 return;
1810 }
1811
1812 Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1813 return;
1814
1815 case AMDGPU::OPERAND_REG_IMM_INT16:
1816 case AMDGPU::OPERAND_REG_IMM_FP16:
1817 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1818 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1819 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1820 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1821 if (isSafeTruncation(Val, 16) &&
1822 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1823 AsmParser->hasInv2PiInlineImm())) {
1824 Inst.addOperand(MCOperand::createImm(Val));
1825 return;
1826 }
1827
1828 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1829 return;
1830
1831 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1832 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1833 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1834 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
1835 assert(isSafeTruncation(Val, 16));
1836 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1837 AsmParser->hasInv2PiInlineImm()));
1838
1839 Inst.addOperand(MCOperand::createImm(Val));
1840 return;
1841 }
1842 default:
1843 llvm_unreachable("invalid operand size");
1844 }
1845 }
1846
1847 template <unsigned Bitwidth>
addKImmFPOperands(MCInst & Inst,unsigned N) const1848 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1849 APInt Literal(64, Imm.Val);
1850
1851 if (!Imm.IsFPImm) {
1852 // We got int literal token.
1853 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1854 return;
1855 }
1856
1857 bool Lost;
1858 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1859 FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1860 APFloat::rmNearestTiesToEven, &Lost);
1861 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1862 }
1863
addRegOperands(MCInst & Inst,unsigned N) const1864 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1865 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1866 }
1867
isInlineValue(unsigned Reg)1868 static bool isInlineValue(unsigned Reg) {
1869 switch (Reg) {
1870 case AMDGPU::SRC_SHARED_BASE:
1871 case AMDGPU::SRC_SHARED_LIMIT:
1872 case AMDGPU::SRC_PRIVATE_BASE:
1873 case AMDGPU::SRC_PRIVATE_LIMIT:
1874 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1875 return true;
1876 case AMDGPU::SRC_VCCZ:
1877 case AMDGPU::SRC_EXECZ:
1878 case AMDGPU::SRC_SCC:
1879 return true;
1880 case AMDGPU::SGPR_NULL:
1881 return true;
1882 default:
1883 return false;
1884 }
1885 }
1886
isInlineValue() const1887 bool AMDGPUOperand::isInlineValue() const {
1888 return isRegKind() && ::isInlineValue(getReg());
1889 }
1890
1891 //===----------------------------------------------------------------------===//
1892 // AsmParser
1893 //===----------------------------------------------------------------------===//
1894
getRegClass(RegisterKind Is,unsigned RegWidth)1895 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1896 if (Is == IS_VGPR) {
1897 switch (RegWidth) {
1898 default: return -1;
1899 case 1: return AMDGPU::VGPR_32RegClassID;
1900 case 2: return AMDGPU::VReg_64RegClassID;
1901 case 3: return AMDGPU::VReg_96RegClassID;
1902 case 4: return AMDGPU::VReg_128RegClassID;
1903 case 5: return AMDGPU::VReg_160RegClassID;
1904 case 8: return AMDGPU::VReg_256RegClassID;
1905 case 16: return AMDGPU::VReg_512RegClassID;
1906 case 32: return AMDGPU::VReg_1024RegClassID;
1907 }
1908 } else if (Is == IS_TTMP) {
1909 switch (RegWidth) {
1910 default: return -1;
1911 case 1: return AMDGPU::TTMP_32RegClassID;
1912 case 2: return AMDGPU::TTMP_64RegClassID;
1913 case 4: return AMDGPU::TTMP_128RegClassID;
1914 case 8: return AMDGPU::TTMP_256RegClassID;
1915 case 16: return AMDGPU::TTMP_512RegClassID;
1916 }
1917 } else if (Is == IS_SGPR) {
1918 switch (RegWidth) {
1919 default: return -1;
1920 case 1: return AMDGPU::SGPR_32RegClassID;
1921 case 2: return AMDGPU::SGPR_64RegClassID;
1922 case 4: return AMDGPU::SGPR_128RegClassID;
1923 case 8: return AMDGPU::SGPR_256RegClassID;
1924 case 16: return AMDGPU::SGPR_512RegClassID;
1925 }
1926 } else if (Is == IS_AGPR) {
1927 switch (RegWidth) {
1928 default: return -1;
1929 case 1: return AMDGPU::AGPR_32RegClassID;
1930 case 2: return AMDGPU::AReg_64RegClassID;
1931 case 4: return AMDGPU::AReg_128RegClassID;
1932 case 16: return AMDGPU::AReg_512RegClassID;
1933 case 32: return AMDGPU::AReg_1024RegClassID;
1934 }
1935 }
1936 return -1;
1937 }
1938
getSpecialRegForName(StringRef RegName)1939 static unsigned getSpecialRegForName(StringRef RegName) {
1940 return StringSwitch<unsigned>(RegName)
1941 .Case("exec", AMDGPU::EXEC)
1942 .Case("vcc", AMDGPU::VCC)
1943 .Case("flat_scratch", AMDGPU::FLAT_SCR)
1944 .Case("xnack_mask", AMDGPU::XNACK_MASK)
1945 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
1946 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
1947 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1948 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1949 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
1950 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
1951 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1952 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1953 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1954 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1955 .Case("lds_direct", AMDGPU::LDS_DIRECT)
1956 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1957 .Case("m0", AMDGPU::M0)
1958 .Case("vccz", AMDGPU::SRC_VCCZ)
1959 .Case("src_vccz", AMDGPU::SRC_VCCZ)
1960 .Case("execz", AMDGPU::SRC_EXECZ)
1961 .Case("src_execz", AMDGPU::SRC_EXECZ)
1962 .Case("scc", AMDGPU::SRC_SCC)
1963 .Case("src_scc", AMDGPU::SRC_SCC)
1964 .Case("tba", AMDGPU::TBA)
1965 .Case("tma", AMDGPU::TMA)
1966 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1967 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1968 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1969 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1970 .Case("vcc_lo", AMDGPU::VCC_LO)
1971 .Case("vcc_hi", AMDGPU::VCC_HI)
1972 .Case("exec_lo", AMDGPU::EXEC_LO)
1973 .Case("exec_hi", AMDGPU::EXEC_HI)
1974 .Case("tma_lo", AMDGPU::TMA_LO)
1975 .Case("tma_hi", AMDGPU::TMA_HI)
1976 .Case("tba_lo", AMDGPU::TBA_LO)
1977 .Case("tba_hi", AMDGPU::TBA_HI)
1978 .Case("null", AMDGPU::SGPR_NULL)
1979 .Default(AMDGPU::NoRegister);
1980 }
1981
ParseRegister(unsigned & RegNo,SMLoc & StartLoc,SMLoc & EndLoc)1982 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1983 SMLoc &EndLoc) {
1984 auto R = parseRegister();
1985 if (!R) return true;
1986 assert(R->isReg());
1987 RegNo = R->getReg();
1988 StartLoc = R->getStartLoc();
1989 EndLoc = R->getEndLoc();
1990 return false;
1991 }
1992
AddNextRegisterToList(unsigned & Reg,unsigned & RegWidth,RegisterKind RegKind,unsigned Reg1)1993 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1994 RegisterKind RegKind, unsigned Reg1) {
1995 switch (RegKind) {
1996 case IS_SPECIAL:
1997 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1998 Reg = AMDGPU::EXEC;
1999 RegWidth = 2;
2000 return true;
2001 }
2002 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2003 Reg = AMDGPU::FLAT_SCR;
2004 RegWidth = 2;
2005 return true;
2006 }
2007 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2008 Reg = AMDGPU::XNACK_MASK;
2009 RegWidth = 2;
2010 return true;
2011 }
2012 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2013 Reg = AMDGPU::VCC;
2014 RegWidth = 2;
2015 return true;
2016 }
2017 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2018 Reg = AMDGPU::TBA;
2019 RegWidth = 2;
2020 return true;
2021 }
2022 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2023 Reg = AMDGPU::TMA;
2024 RegWidth = 2;
2025 return true;
2026 }
2027 return false;
2028 case IS_VGPR:
2029 case IS_SGPR:
2030 case IS_AGPR:
2031 case IS_TTMP:
2032 if (Reg1 != Reg + RegWidth) {
2033 return false;
2034 }
2035 RegWidth++;
2036 return true;
2037 default:
2038 llvm_unreachable("unexpected register kind");
2039 }
2040 }
2041
2042 struct RegInfo {
2043 StringLiteral Name;
2044 RegisterKind Kind;
2045 };
2046
2047 static constexpr RegInfo RegularRegisters[] = {
2048 {{"v"}, IS_VGPR},
2049 {{"s"}, IS_SGPR},
2050 {{"ttmp"}, IS_TTMP},
2051 {{"acc"}, IS_AGPR},
2052 {{"a"}, IS_AGPR},
2053 };
2054
isRegularReg(RegisterKind Kind)2055 static bool isRegularReg(RegisterKind Kind) {
2056 return Kind == IS_VGPR ||
2057 Kind == IS_SGPR ||
2058 Kind == IS_TTMP ||
2059 Kind == IS_AGPR;
2060 }
2061
getRegularRegInfo(StringRef Str)2062 static const RegInfo* getRegularRegInfo(StringRef Str) {
2063 for (const RegInfo &Reg : RegularRegisters)
2064 if (Str.startswith(Reg.Name))
2065 return &Reg;
2066 return nullptr;
2067 }
2068
getRegNum(StringRef Str,unsigned & Num)2069 static bool getRegNum(StringRef Str, unsigned& Num) {
2070 return !Str.getAsInteger(10, Num);
2071 }
2072
2073 bool
isRegister(const AsmToken & Token,const AsmToken & NextToken) const2074 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2075 const AsmToken &NextToken) const {
2076
2077 // A list of consecutive registers: [s0,s1,s2,s3]
2078 if (Token.is(AsmToken::LBrac))
2079 return true;
2080
2081 if (!Token.is(AsmToken::Identifier))
2082 return false;
2083
2084 // A single register like s0 or a range of registers like s[0:1]
2085
2086 StringRef Str = Token.getString();
2087 const RegInfo *Reg = getRegularRegInfo(Str);
2088 if (Reg) {
2089 StringRef RegName = Reg->Name;
2090 StringRef RegSuffix = Str.substr(RegName.size());
2091 if (!RegSuffix.empty()) {
2092 unsigned Num;
2093 // A single register with an index: rXX
2094 if (getRegNum(RegSuffix, Num))
2095 return true;
2096 } else {
2097 // A range of registers: r[XX:YY].
2098 if (NextToken.is(AsmToken::LBrac))
2099 return true;
2100 }
2101 }
2102
2103 return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2104 }
2105
2106 bool
isRegister()2107 AMDGPUAsmParser::isRegister()
2108 {
2109 return isRegister(getToken(), peekToken());
2110 }
2111
2112 unsigned
getRegularReg(RegisterKind RegKind,unsigned RegNum,unsigned RegWidth)2113 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2114 unsigned RegNum,
2115 unsigned RegWidth) {
2116
2117 assert(isRegularReg(RegKind));
2118
2119 unsigned AlignSize = 1;
2120 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2121 // SGPR and TTMP registers must be aligned.
2122 // Max required alignment is 4 dwords.
2123 AlignSize = std::min(RegWidth, 4u);
2124 }
2125
2126 if (RegNum % AlignSize != 0)
2127 return AMDGPU::NoRegister;
2128
2129 unsigned RegIdx = RegNum / AlignSize;
2130 int RCID = getRegClass(RegKind, RegWidth);
2131 if (RCID == -1)
2132 return AMDGPU::NoRegister;
2133
2134 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2135 const MCRegisterClass RC = TRI->getRegClass(RCID);
2136 if (RegIdx >= RC.getNumRegs())
2137 return AMDGPU::NoRegister;
2138
2139 return RC.getRegister(RegIdx);
2140 }
2141
2142 bool
ParseRegRange(unsigned & Num,unsigned & Width)2143 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2144 int64_t RegLo, RegHi;
2145 if (!trySkipToken(AsmToken::LBrac))
2146 return false;
2147
2148 if (!parseExpr(RegLo))
2149 return false;
2150
2151 if (trySkipToken(AsmToken::Colon)) {
2152 if (!parseExpr(RegHi))
2153 return false;
2154 } else {
2155 RegHi = RegLo;
2156 }
2157
2158 if (!trySkipToken(AsmToken::RBrac))
2159 return false;
2160
2161 if (!isUInt<32>(RegLo) || !isUInt<32>(RegHi) || RegLo > RegHi)
2162 return false;
2163
2164 Num = static_cast<unsigned>(RegLo);
2165 Width = (RegHi - RegLo) + 1;
2166 return true;
2167 }
2168
2169 unsigned
ParseSpecialReg(RegisterKind & RegKind,unsigned & RegNum,unsigned & RegWidth)2170 AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2171 unsigned &RegNum,
2172 unsigned &RegWidth) {
2173 assert(isToken(AsmToken::Identifier));
2174 unsigned Reg = getSpecialRegForName(getTokenStr());
2175 if (Reg) {
2176 RegNum = 0;
2177 RegWidth = 1;
2178 RegKind = IS_SPECIAL;
2179 lex(); // skip register name
2180 }
2181 return Reg;
2182 }
2183
2184 unsigned
ParseRegularReg(RegisterKind & RegKind,unsigned & RegNum,unsigned & RegWidth)2185 AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2186 unsigned &RegNum,
2187 unsigned &RegWidth) {
2188 assert(isToken(AsmToken::Identifier));
2189 StringRef RegName = getTokenStr();
2190
2191 const RegInfo *RI = getRegularRegInfo(RegName);
2192 if (!RI)
2193 return AMDGPU::NoRegister;
2194 lex(); // skip register name
2195
2196 RegKind = RI->Kind;
2197 StringRef RegSuffix = RegName.substr(RI->Name.size());
2198 if (!RegSuffix.empty()) {
2199 // Single 32-bit register: vXX.
2200 if (!getRegNum(RegSuffix, RegNum))
2201 return AMDGPU::NoRegister;
2202 RegWidth = 1;
2203 } else {
2204 // Range of registers: v[XX:YY]. ":YY" is optional.
2205 if (!ParseRegRange(RegNum, RegWidth))
2206 return AMDGPU::NoRegister;
2207 }
2208
2209 return getRegularReg(RegKind, RegNum, RegWidth);
2210 }
2211
2212 unsigned
ParseRegList(RegisterKind & RegKind,unsigned & RegNum,unsigned & RegWidth)2213 AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
2214 unsigned &RegNum,
2215 unsigned &RegWidth) {
2216 unsigned Reg = AMDGPU::NoRegister;
2217
2218 if (!trySkipToken(AsmToken::LBrac))
2219 return AMDGPU::NoRegister;
2220
2221 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2222
2223 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2224 return AMDGPU::NoRegister;
2225 if (RegWidth != 1)
2226 return AMDGPU::NoRegister;
2227
2228 for (; trySkipToken(AsmToken::Comma); ) {
2229 RegisterKind NextRegKind;
2230 unsigned NextReg, NextRegNum, NextRegWidth;
2231
2232 if (!ParseAMDGPURegister(NextRegKind, NextReg, NextRegNum, NextRegWidth))
2233 return AMDGPU::NoRegister;
2234 if (NextRegWidth != 1)
2235 return AMDGPU::NoRegister;
2236 if (NextRegKind != RegKind)
2237 return AMDGPU::NoRegister;
2238 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg))
2239 return AMDGPU::NoRegister;
2240 }
2241
2242 if (!trySkipToken(AsmToken::RBrac))
2243 return AMDGPU::NoRegister;
2244
2245 if (isRegularReg(RegKind))
2246 Reg = getRegularReg(RegKind, RegNum, RegWidth);
2247
2248 return Reg;
2249 }
2250
ParseAMDGPURegister(RegisterKind & RegKind,unsigned & Reg,unsigned & RegNum,unsigned & RegWidth)2251 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
2252 unsigned &Reg,
2253 unsigned &RegNum,
2254 unsigned &RegWidth) {
2255 Reg = AMDGPU::NoRegister;
2256
2257 if (isToken(AsmToken::Identifier)) {
2258 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth);
2259 if (Reg == AMDGPU::NoRegister)
2260 Reg = ParseRegularReg(RegKind, RegNum, RegWidth);
2261 } else {
2262 Reg = ParseRegList(RegKind, RegNum, RegWidth);
2263 }
2264
2265 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2266 return Reg != AMDGPU::NoRegister && subtargetHasRegister(*TRI, Reg);
2267 }
2268
2269 Optional<StringRef>
getGprCountSymbolName(RegisterKind RegKind)2270 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2271 switch (RegKind) {
2272 case IS_VGPR:
2273 return StringRef(".amdgcn.next_free_vgpr");
2274 case IS_SGPR:
2275 return StringRef(".amdgcn.next_free_sgpr");
2276 default:
2277 return None;
2278 }
2279 }
2280
initializeGprCountSymbol(RegisterKind RegKind)2281 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2282 auto SymbolName = getGprCountSymbolName(RegKind);
2283 assert(SymbolName && "initializing invalid register kind");
2284 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2285 Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2286 }
2287
updateGprCountSymbols(RegisterKind RegKind,unsigned DwordRegIndex,unsigned RegWidth)2288 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2289 unsigned DwordRegIndex,
2290 unsigned RegWidth) {
2291 // Symbols are only defined for GCN targets
2292 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2293 return true;
2294
2295 auto SymbolName = getGprCountSymbolName(RegKind);
2296 if (!SymbolName)
2297 return true;
2298 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2299
2300 int64_t NewMax = DwordRegIndex + RegWidth - 1;
2301 int64_t OldCount;
2302
2303 if (!Sym->isVariable())
2304 return !Error(getParser().getTok().getLoc(),
2305 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2306 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2307 return !Error(
2308 getParser().getTok().getLoc(),
2309 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2310
2311 if (OldCount <= NewMax)
2312 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2313
2314 return true;
2315 }
2316
parseRegister()2317 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
2318 const auto &Tok = Parser.getTok();
2319 SMLoc StartLoc = Tok.getLoc();
2320 SMLoc EndLoc = Tok.getEndLoc();
2321 RegisterKind RegKind;
2322 unsigned Reg, RegNum, RegWidth;
2323
2324 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2325 //FIXME: improve error messages (bug 41303).
2326 Error(StartLoc, "not a valid operand.");
2327 return nullptr;
2328 }
2329 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2330 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2331 return nullptr;
2332 } else
2333 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2334 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2335 }
2336
2337 OperandMatchResultTy
parseImm(OperandVector & Operands,bool HasSP3AbsModifier)2338 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2339 // TODO: add syntactic sugar for 1/(2*PI)
2340
2341 assert(!isRegister());
2342 assert(!isModifier());
2343
2344 const auto& Tok = getToken();
2345 const auto& NextTok = peekToken();
2346 bool IsReal = Tok.is(AsmToken::Real);
2347 SMLoc S = getLoc();
2348 bool Negate = false;
2349
2350 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2351 lex();
2352 IsReal = true;
2353 Negate = true;
2354 }
2355
2356 if (IsReal) {
2357 // Floating-point expressions are not supported.
2358 // Can only allow floating-point literals with an
2359 // optional sign.
2360
2361 StringRef Num = getTokenStr();
2362 lex();
2363
2364 APFloat RealVal(APFloat::IEEEdouble());
2365 auto roundMode = APFloat::rmNearestTiesToEven;
2366 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2367 return MatchOperand_ParseFail;
2368 }
2369 if (Negate)
2370 RealVal.changeSign();
2371
2372 Operands.push_back(
2373 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2374 AMDGPUOperand::ImmTyNone, true));
2375
2376 return MatchOperand_Success;
2377
2378 } else {
2379 int64_t IntVal;
2380 const MCExpr *Expr;
2381 SMLoc S = getLoc();
2382
2383 if (HasSP3AbsModifier) {
2384 // This is a workaround for handling expressions
2385 // as arguments of SP3 'abs' modifier, for example:
2386 // |1.0|
2387 // |-1|
2388 // |1+x|
2389 // This syntax is not compatible with syntax of standard
2390 // MC expressions (due to the trailing '|').
2391 SMLoc EndLoc;
2392 if (getParser().parsePrimaryExpr(Expr, EndLoc))
2393 return MatchOperand_ParseFail;
2394 } else {
2395 if (Parser.parseExpression(Expr))
2396 return MatchOperand_ParseFail;
2397 }
2398
2399 if (Expr->evaluateAsAbsolute(IntVal)) {
2400 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2401 } else {
2402 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2403 }
2404
2405 return MatchOperand_Success;
2406 }
2407
2408 return MatchOperand_NoMatch;
2409 }
2410
2411 OperandMatchResultTy
parseReg(OperandVector & Operands)2412 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2413 if (!isRegister())
2414 return MatchOperand_NoMatch;
2415
2416 if (auto R = parseRegister()) {
2417 assert(R->isReg());
2418 Operands.push_back(std::move(R));
2419 return MatchOperand_Success;
2420 }
2421 return MatchOperand_ParseFail;
2422 }
2423
2424 OperandMatchResultTy
parseRegOrImm(OperandVector & Operands,bool HasSP3AbsMod)2425 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2426 auto res = parseReg(Operands);
2427 if (res != MatchOperand_NoMatch) {
2428 return res;
2429 } else if (isModifier()) {
2430 return MatchOperand_NoMatch;
2431 } else {
2432 return parseImm(Operands, HasSP3AbsMod);
2433 }
2434 }
2435
2436 bool
isNamedOperandModifier(const AsmToken & Token,const AsmToken & NextToken) const2437 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2438 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2439 const auto &str = Token.getString();
2440 return str == "abs" || str == "neg" || str == "sext";
2441 }
2442 return false;
2443 }
2444
2445 bool
isOpcodeModifierWithVal(const AsmToken & Token,const AsmToken & NextToken) const2446 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2447 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2448 }
2449
2450 bool
isOperandModifier(const AsmToken & Token,const AsmToken & NextToken) const2451 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2452 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2453 }
2454
2455 bool
isRegOrOperandModifier(const AsmToken & Token,const AsmToken & NextToken) const2456 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2457 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2458 }
2459
2460 // Check if this is an operand modifier or an opcode modifier
2461 // which may look like an expression but it is not. We should
2462 // avoid parsing these modifiers as expressions. Currently
2463 // recognized sequences are:
2464 // |...|
2465 // abs(...)
2466 // neg(...)
2467 // sext(...)
2468 // -reg
2469 // -|...|
2470 // -abs(...)
2471 // name:...
2472 // Note that simple opcode modifiers like 'gds' may be parsed as
2473 // expressions; this is a special case. See getExpressionAsToken.
2474 //
2475 bool
isModifier()2476 AMDGPUAsmParser::isModifier() {
2477
2478 AsmToken Tok = getToken();
2479 AsmToken NextToken[2];
2480 peekTokens(NextToken);
2481
2482 return isOperandModifier(Tok, NextToken[0]) ||
2483 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2484 isOpcodeModifierWithVal(Tok, NextToken[0]);
2485 }
2486
2487 // Check if the current token is an SP3 'neg' modifier.
2488 // Currently this modifier is allowed in the following context:
2489 //
2490 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2491 // 2. Before an 'abs' modifier: -abs(...)
2492 // 3. Before an SP3 'abs' modifier: -|...|
2493 //
2494 // In all other cases "-" is handled as a part
2495 // of an expression that follows the sign.
2496 //
2497 // Note: When "-" is followed by an integer literal,
2498 // this is interpreted as integer negation rather
2499 // than a floating-point NEG modifier applied to N.
2500 // Beside being contr-intuitive, such use of floating-point
2501 // NEG modifier would have resulted in different meaning
2502 // of integer literals used with VOP1/2/C and VOP3,
2503 // for example:
2504 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2505 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2506 // Negative fp literals with preceding "-" are
2507 // handled likewise for unifomtity
2508 //
2509 bool
parseSP3NegModifier()2510 AMDGPUAsmParser::parseSP3NegModifier() {
2511
2512 AsmToken NextToken[2];
2513 peekTokens(NextToken);
2514
2515 if (isToken(AsmToken::Minus) &&
2516 (isRegister(NextToken[0], NextToken[1]) ||
2517 NextToken[0].is(AsmToken::Pipe) ||
2518 isId(NextToken[0], "abs"))) {
2519 lex();
2520 return true;
2521 }
2522
2523 return false;
2524 }
2525
2526 OperandMatchResultTy
parseRegOrImmWithFPInputMods(OperandVector & Operands,bool AllowImm)2527 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2528 bool AllowImm) {
2529 bool Neg, SP3Neg;
2530 bool Abs, SP3Abs;
2531 SMLoc Loc;
2532
2533 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2534 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2535 Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2536 return MatchOperand_ParseFail;
2537 }
2538
2539 SP3Neg = parseSP3NegModifier();
2540
2541 Loc = getLoc();
2542 Neg = trySkipId("neg");
2543 if (Neg && SP3Neg) {
2544 Error(Loc, "expected register or immediate");
2545 return MatchOperand_ParseFail;
2546 }
2547 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2548 return MatchOperand_ParseFail;
2549
2550 Abs = trySkipId("abs");
2551 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2552 return MatchOperand_ParseFail;
2553
2554 Loc = getLoc();
2555 SP3Abs = trySkipToken(AsmToken::Pipe);
2556 if (Abs && SP3Abs) {
2557 Error(Loc, "expected register or immediate");
2558 return MatchOperand_ParseFail;
2559 }
2560
2561 OperandMatchResultTy Res;
2562 if (AllowImm) {
2563 Res = parseRegOrImm(Operands, SP3Abs);
2564 } else {
2565 Res = parseReg(Operands);
2566 }
2567 if (Res != MatchOperand_Success) {
2568 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2569 }
2570
2571 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2572 return MatchOperand_ParseFail;
2573 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2574 return MatchOperand_ParseFail;
2575 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2576 return MatchOperand_ParseFail;
2577
2578 AMDGPUOperand::Modifiers Mods;
2579 Mods.Abs = Abs || SP3Abs;
2580 Mods.Neg = Neg || SP3Neg;
2581
2582 if (Mods.hasFPModifiers()) {
2583 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2584 if (Op.isExpr()) {
2585 Error(Op.getStartLoc(), "expected an absolute expression");
2586 return MatchOperand_ParseFail;
2587 }
2588 Op.setModifiers(Mods);
2589 }
2590 return MatchOperand_Success;
2591 }
2592
2593 OperandMatchResultTy
parseRegOrImmWithIntInputMods(OperandVector & Operands,bool AllowImm)2594 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2595 bool AllowImm) {
2596 bool Sext = trySkipId("sext");
2597 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2598 return MatchOperand_ParseFail;
2599
2600 OperandMatchResultTy Res;
2601 if (AllowImm) {
2602 Res = parseRegOrImm(Operands);
2603 } else {
2604 Res = parseReg(Operands);
2605 }
2606 if (Res != MatchOperand_Success) {
2607 return Sext? MatchOperand_ParseFail : Res;
2608 }
2609
2610 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2611 return MatchOperand_ParseFail;
2612
2613 AMDGPUOperand::Modifiers Mods;
2614 Mods.Sext = Sext;
2615
2616 if (Mods.hasIntModifiers()) {
2617 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2618 if (Op.isExpr()) {
2619 Error(Op.getStartLoc(), "expected an absolute expression");
2620 return MatchOperand_ParseFail;
2621 }
2622 Op.setModifiers(Mods);
2623 }
2624
2625 return MatchOperand_Success;
2626 }
2627
2628 OperandMatchResultTy
parseRegWithFPInputMods(OperandVector & Operands)2629 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2630 return parseRegOrImmWithFPInputMods(Operands, false);
2631 }
2632
2633 OperandMatchResultTy
parseRegWithIntInputMods(OperandVector & Operands)2634 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2635 return parseRegOrImmWithIntInputMods(Operands, false);
2636 }
2637
parseVReg32OrOff(OperandVector & Operands)2638 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2639 auto Loc = getLoc();
2640 if (trySkipId("off")) {
2641 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2642 AMDGPUOperand::ImmTyOff, false));
2643 return MatchOperand_Success;
2644 }
2645
2646 if (!isRegister())
2647 return MatchOperand_NoMatch;
2648
2649 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2650 if (Reg) {
2651 Operands.push_back(std::move(Reg));
2652 return MatchOperand_Success;
2653 }
2654
2655 return MatchOperand_ParseFail;
2656
2657 }
2658
checkTargetMatchPredicate(MCInst & Inst)2659 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2660 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2661
2662 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2663 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2664 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2665 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2666 return Match_InvalidOperand;
2667
2668 if ((TSFlags & SIInstrFlags::VOP3) &&
2669 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2670 getForcedEncodingSize() != 64)
2671 return Match_PreferE32;
2672
2673 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2674 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2675 // v_mac_f32/16 allow only dst_sel == DWORD;
2676 auto OpNum =
2677 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2678 const auto &Op = Inst.getOperand(OpNum);
2679 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2680 return Match_InvalidOperand;
2681 }
2682 }
2683
2684 return Match_Success;
2685 }
2686
2687 // What asm variants we should check
getMatchedVariants() const2688 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2689 if (getForcedEncodingSize() == 32) {
2690 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2691 return makeArrayRef(Variants);
2692 }
2693
2694 if (isForcedVOP3()) {
2695 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2696 return makeArrayRef(Variants);
2697 }
2698
2699 if (isForcedSDWA()) {
2700 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2701 AMDGPUAsmVariants::SDWA9};
2702 return makeArrayRef(Variants);
2703 }
2704
2705 if (isForcedDPP()) {
2706 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2707 return makeArrayRef(Variants);
2708 }
2709
2710 static const unsigned Variants[] = {
2711 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2712 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2713 };
2714
2715 return makeArrayRef(Variants);
2716 }
2717
findImplicitSGPRReadInVOP(const MCInst & Inst) const2718 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2719 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2720 const unsigned Num = Desc.getNumImplicitUses();
2721 for (unsigned i = 0; i < Num; ++i) {
2722 unsigned Reg = Desc.ImplicitUses[i];
2723 switch (Reg) {
2724 case AMDGPU::FLAT_SCR:
2725 case AMDGPU::VCC:
2726 case AMDGPU::VCC_LO:
2727 case AMDGPU::VCC_HI:
2728 case AMDGPU::M0:
2729 return Reg;
2730 default:
2731 break;
2732 }
2733 }
2734 return AMDGPU::NoRegister;
2735 }
2736
2737 // NB: This code is correct only when used to check constant
2738 // bus limitations because GFX7 support no f16 inline constants.
2739 // Note that there are no cases when a GFX7 opcode violates
2740 // constant bus limitations due to the use of an f16 constant.
isInlineConstant(const MCInst & Inst,unsigned OpIdx) const2741 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2742 unsigned OpIdx) const {
2743 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2744
2745 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2746 return false;
2747 }
2748
2749 const MCOperand &MO = Inst.getOperand(OpIdx);
2750
2751 int64_t Val = MO.getImm();
2752 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2753
2754 switch (OpSize) { // expected operand size
2755 case 8:
2756 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2757 case 4:
2758 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2759 case 2: {
2760 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2761 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2762 OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2763 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
2764 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
2765 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 ||
2766 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) {
2767 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2768 } else {
2769 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2770 }
2771 }
2772 default:
2773 llvm_unreachable("invalid operand size");
2774 }
2775 }
2776
getConstantBusLimit(unsigned Opcode) const2777 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
2778 if (!isGFX10())
2779 return 1;
2780
2781 switch (Opcode) {
2782 // 64-bit shift instructions can use only one scalar value input
2783 case AMDGPU::V_LSHLREV_B64:
2784 case AMDGPU::V_LSHLREV_B64_gfx10:
2785 case AMDGPU::V_LSHL_B64:
2786 case AMDGPU::V_LSHRREV_B64:
2787 case AMDGPU::V_LSHRREV_B64_gfx10:
2788 case AMDGPU::V_LSHR_B64:
2789 case AMDGPU::V_ASHRREV_I64:
2790 case AMDGPU::V_ASHRREV_I64_gfx10:
2791 case AMDGPU::V_ASHR_I64:
2792 return 1;
2793 default:
2794 return 2;
2795 }
2796 }
2797
usesConstantBus(const MCInst & Inst,unsigned OpIdx)2798 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2799 const MCOperand &MO = Inst.getOperand(OpIdx);
2800 if (MO.isImm()) {
2801 return !isInlineConstant(Inst, OpIdx);
2802 } else if (MO.isReg()) {
2803 auto Reg = MO.getReg();
2804 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2805 return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL;
2806 } else {
2807 return true;
2808 }
2809 }
2810
validateConstantBusLimitations(const MCInst & Inst)2811 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2812 const unsigned Opcode = Inst.getOpcode();
2813 const MCInstrDesc &Desc = MII.get(Opcode);
2814 unsigned ConstantBusUseCount = 0;
2815 unsigned NumLiterals = 0;
2816 unsigned LiteralSize;
2817
2818 if (Desc.TSFlags &
2819 (SIInstrFlags::VOPC |
2820 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2821 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2822 SIInstrFlags::SDWA)) {
2823 // Check special imm operands (used by madmk, etc)
2824 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2825 ++ConstantBusUseCount;
2826 }
2827
2828 SmallDenseSet<unsigned> SGPRsUsed;
2829 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2830 if (SGPRUsed != AMDGPU::NoRegister) {
2831 SGPRsUsed.insert(SGPRUsed);
2832 ++ConstantBusUseCount;
2833 }
2834
2835 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2836 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2837 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2838
2839 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2840
2841 for (int OpIdx : OpIndices) {
2842 if (OpIdx == -1) break;
2843
2844 const MCOperand &MO = Inst.getOperand(OpIdx);
2845 if (usesConstantBus(Inst, OpIdx)) {
2846 if (MO.isReg()) {
2847 const unsigned Reg = mc2PseudoReg(MO.getReg());
2848 // Pairs of registers with a partial intersections like these
2849 // s0, s[0:1]
2850 // flat_scratch_lo, flat_scratch
2851 // flat_scratch_lo, flat_scratch_hi
2852 // are theoretically valid but they are disabled anyway.
2853 // Note that this code mimics SIInstrInfo::verifyInstruction
2854 if (!SGPRsUsed.count(Reg)) {
2855 SGPRsUsed.insert(Reg);
2856 ++ConstantBusUseCount;
2857 }
2858 } else { // Expression or a literal
2859
2860 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
2861 continue; // special operand like VINTERP attr_chan
2862
2863 // An instruction may use only one literal.
2864 // This has been validated on the previous step.
2865 // See validateVOP3Literal.
2866 // This literal may be used as more than one operand.
2867 // If all these operands are of the same size,
2868 // this literal counts as one scalar value.
2869 // Otherwise it counts as 2 scalar values.
2870 // See "GFX10 Shader Programming", section 3.6.2.3.
2871
2872 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
2873 if (Size < 4) Size = 4;
2874
2875 if (NumLiterals == 0) {
2876 NumLiterals = 1;
2877 LiteralSize = Size;
2878 } else if (LiteralSize != Size) {
2879 NumLiterals = 2;
2880 }
2881 }
2882 }
2883 }
2884 }
2885 ConstantBusUseCount += NumLiterals;
2886
2887 return ConstantBusUseCount <= getConstantBusLimit(Opcode);
2888 }
2889
validateEarlyClobberLimitations(const MCInst & Inst)2890 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2891 const unsigned Opcode = Inst.getOpcode();
2892 const MCInstrDesc &Desc = MII.get(Opcode);
2893
2894 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2895 if (DstIdx == -1 ||
2896 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2897 return true;
2898 }
2899
2900 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2901
2902 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2903 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2904 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2905
2906 assert(DstIdx != -1);
2907 const MCOperand &Dst = Inst.getOperand(DstIdx);
2908 assert(Dst.isReg());
2909 const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2910
2911 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2912
2913 for (int SrcIdx : SrcIndices) {
2914 if (SrcIdx == -1) break;
2915 const MCOperand &Src = Inst.getOperand(SrcIdx);
2916 if (Src.isReg()) {
2917 const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2918 if (isRegIntersect(DstReg, SrcReg, TRI)) {
2919 return false;
2920 }
2921 }
2922 }
2923
2924 return true;
2925 }
2926
validateIntClampSupported(const MCInst & Inst)2927 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2928
2929 const unsigned Opc = Inst.getOpcode();
2930 const MCInstrDesc &Desc = MII.get(Opc);
2931
2932 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2933 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2934 assert(ClampIdx != -1);
2935 return Inst.getOperand(ClampIdx).getImm() == 0;
2936 }
2937
2938 return true;
2939 }
2940
validateMIMGDataSize(const MCInst & Inst)2941 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2942
2943 const unsigned Opc = Inst.getOpcode();
2944 const MCInstrDesc &Desc = MII.get(Opc);
2945
2946 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2947 return true;
2948
2949 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2950 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2951 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2952
2953 assert(VDataIdx != -1);
2954 assert(DMaskIdx != -1);
2955 assert(TFEIdx != -1);
2956
2957 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2958 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2959 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2960 if (DMask == 0)
2961 DMask = 1;
2962
2963 unsigned DataSize =
2964 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2965 if (hasPackedD16()) {
2966 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2967 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2968 DataSize = (DataSize + 1) / 2;
2969 }
2970
2971 return (VDataSize / 4) == DataSize + TFESize;
2972 }
2973
validateMIMGAddrSize(const MCInst & Inst)2974 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
2975 const unsigned Opc = Inst.getOpcode();
2976 const MCInstrDesc &Desc = MII.get(Opc);
2977
2978 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
2979 return true;
2980
2981 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
2982 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2983 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
2984 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
2985 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
2986 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2987
2988 assert(VAddr0Idx != -1);
2989 assert(SrsrcIdx != -1);
2990 assert(DimIdx != -1);
2991 assert(SrsrcIdx > VAddr0Idx);
2992
2993 unsigned Dim = Inst.getOperand(DimIdx).getImm();
2994 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
2995 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
2996 unsigned VAddrSize =
2997 IsNSA ? SrsrcIdx - VAddr0Idx
2998 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
2999
3000 unsigned AddrSize = BaseOpcode->NumExtraArgs +
3001 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
3002 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
3003 (BaseOpcode->LodOrClampOrMip ? 1 : 0);
3004 if (!IsNSA) {
3005 if (AddrSize > 8)
3006 AddrSize = 16;
3007 else if (AddrSize > 4)
3008 AddrSize = 8;
3009 }
3010
3011 return VAddrSize == AddrSize;
3012 }
3013
validateMIMGAtomicDMask(const MCInst & Inst)3014 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3015
3016 const unsigned Opc = Inst.getOpcode();
3017 const MCInstrDesc &Desc = MII.get(Opc);
3018
3019 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3020 return true;
3021 if (!Desc.mayLoad() || !Desc.mayStore())
3022 return true; // Not atomic
3023
3024 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3025 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3026
3027 // This is an incomplete check because image_atomic_cmpswap
3028 // may only use 0x3 and 0xf while other atomic operations
3029 // may use 0x1 and 0x3. However these limitations are
3030 // verified when we check that dmask matches dst size.
3031 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3032 }
3033
validateMIMGGatherDMask(const MCInst & Inst)3034 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3035
3036 const unsigned Opc = Inst.getOpcode();
3037 const MCInstrDesc &Desc = MII.get(Opc);
3038
3039 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3040 return true;
3041
3042 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3043 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3044
3045 // GATHER4 instructions use dmask in a different fashion compared to
3046 // other MIMG instructions. The only useful DMASK values are
3047 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3048 // (red,red,red,red) etc.) The ISA document doesn't mention
3049 // this.
3050 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3051 }
3052
IsMovrelsSDWAOpcode(const unsigned Opcode)3053 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3054 {
3055 switch (Opcode) {
3056 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3057 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3058 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3059 return true;
3060 default:
3061 return false;
3062 }
3063 }
3064
3065 // movrels* opcodes should only allow VGPRS as src0.
3066 // This is specified in .td description for vop1/vop3,
3067 // but sdwa is handled differently. See isSDWAOperand.
validateMovrels(const MCInst & Inst)3068 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) {
3069
3070 const unsigned Opc = Inst.getOpcode();
3071 const MCInstrDesc &Desc = MII.get(Opc);
3072
3073 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3074 return true;
3075
3076 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3077 assert(Src0Idx != -1);
3078
3079 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3080 if (!Src0.isReg())
3081 return false;
3082
3083 auto Reg = Src0.getReg();
3084 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3085 return !isSGPR(mc2PseudoReg(Reg), TRI);
3086 }
3087
validateMIMGD16(const MCInst & Inst)3088 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3089
3090 const unsigned Opc = Inst.getOpcode();
3091 const MCInstrDesc &Desc = MII.get(Opc);
3092
3093 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3094 return true;
3095
3096 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3097 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3098 if (isCI() || isSI())
3099 return false;
3100 }
3101
3102 return true;
3103 }
3104
validateMIMGDim(const MCInst & Inst)3105 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3106 const unsigned Opc = Inst.getOpcode();
3107 const MCInstrDesc &Desc = MII.get(Opc);
3108
3109 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3110 return true;
3111
3112 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3113 if (DimIdx < 0)
3114 return true;
3115
3116 long Imm = Inst.getOperand(DimIdx).getImm();
3117 if (Imm < 0 || Imm >= 8)
3118 return false;
3119
3120 return true;
3121 }
3122
IsRevOpcode(const unsigned Opcode)3123 static bool IsRevOpcode(const unsigned Opcode)
3124 {
3125 switch (Opcode) {
3126 case AMDGPU::V_SUBREV_F32_e32:
3127 case AMDGPU::V_SUBREV_F32_e64:
3128 case AMDGPU::V_SUBREV_F32_e32_gfx10:
3129 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3130 case AMDGPU::V_SUBREV_F32_e32_vi:
3131 case AMDGPU::V_SUBREV_F32_e64_gfx10:
3132 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3133 case AMDGPU::V_SUBREV_F32_e64_vi:
3134
3135 case AMDGPU::V_SUBREV_I32_e32:
3136 case AMDGPU::V_SUBREV_I32_e64:
3137 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3138 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3139
3140 case AMDGPU::V_SUBBREV_U32_e32:
3141 case AMDGPU::V_SUBBREV_U32_e64:
3142 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3143 case AMDGPU::V_SUBBREV_U32_e32_vi:
3144 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3145 case AMDGPU::V_SUBBREV_U32_e64_vi:
3146
3147 case AMDGPU::V_SUBREV_U32_e32:
3148 case AMDGPU::V_SUBREV_U32_e64:
3149 case AMDGPU::V_SUBREV_U32_e32_gfx9:
3150 case AMDGPU::V_SUBREV_U32_e32_vi:
3151 case AMDGPU::V_SUBREV_U32_e64_gfx9:
3152 case AMDGPU::V_SUBREV_U32_e64_vi:
3153
3154 case AMDGPU::V_SUBREV_F16_e32:
3155 case AMDGPU::V_SUBREV_F16_e64:
3156 case AMDGPU::V_SUBREV_F16_e32_gfx10:
3157 case AMDGPU::V_SUBREV_F16_e32_vi:
3158 case AMDGPU::V_SUBREV_F16_e64_gfx10:
3159 case AMDGPU::V_SUBREV_F16_e64_vi:
3160
3161 case AMDGPU::V_SUBREV_U16_e32:
3162 case AMDGPU::V_SUBREV_U16_e64:
3163 case AMDGPU::V_SUBREV_U16_e32_vi:
3164 case AMDGPU::V_SUBREV_U16_e64_vi:
3165
3166 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3167 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3168 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3169
3170 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3171 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3172
3173 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3174 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3175
3176 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3177 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3178
3179 case AMDGPU::V_LSHRREV_B32_e32:
3180 case AMDGPU::V_LSHRREV_B32_e64:
3181 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3182 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3183 case AMDGPU::V_LSHRREV_B32_e32_vi:
3184 case AMDGPU::V_LSHRREV_B32_e64_vi:
3185 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3186 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3187
3188 case AMDGPU::V_ASHRREV_I32_e32:
3189 case AMDGPU::V_ASHRREV_I32_e64:
3190 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3191 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3192 case AMDGPU::V_ASHRREV_I32_e32_vi:
3193 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3194 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3195 case AMDGPU::V_ASHRREV_I32_e64_vi:
3196
3197 case AMDGPU::V_LSHLREV_B32_e32:
3198 case AMDGPU::V_LSHLREV_B32_e64:
3199 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3200 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3201 case AMDGPU::V_LSHLREV_B32_e32_vi:
3202 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3203 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3204 case AMDGPU::V_LSHLREV_B32_e64_vi:
3205
3206 case AMDGPU::V_LSHLREV_B16_e32:
3207 case AMDGPU::V_LSHLREV_B16_e64:
3208 case AMDGPU::V_LSHLREV_B16_e32_vi:
3209 case AMDGPU::V_LSHLREV_B16_e64_vi:
3210 case AMDGPU::V_LSHLREV_B16_gfx10:
3211
3212 case AMDGPU::V_LSHRREV_B16_e32:
3213 case AMDGPU::V_LSHRREV_B16_e64:
3214 case AMDGPU::V_LSHRREV_B16_e32_vi:
3215 case AMDGPU::V_LSHRREV_B16_e64_vi:
3216 case AMDGPU::V_LSHRREV_B16_gfx10:
3217
3218 case AMDGPU::V_ASHRREV_I16_e32:
3219 case AMDGPU::V_ASHRREV_I16_e64:
3220 case AMDGPU::V_ASHRREV_I16_e32_vi:
3221 case AMDGPU::V_ASHRREV_I16_e64_vi:
3222 case AMDGPU::V_ASHRREV_I16_gfx10:
3223
3224 case AMDGPU::V_LSHLREV_B64:
3225 case AMDGPU::V_LSHLREV_B64_gfx10:
3226 case AMDGPU::V_LSHLREV_B64_vi:
3227
3228 case AMDGPU::V_LSHRREV_B64:
3229 case AMDGPU::V_LSHRREV_B64_gfx10:
3230 case AMDGPU::V_LSHRREV_B64_vi:
3231
3232 case AMDGPU::V_ASHRREV_I64:
3233 case AMDGPU::V_ASHRREV_I64_gfx10:
3234 case AMDGPU::V_ASHRREV_I64_vi:
3235
3236 case AMDGPU::V_PK_LSHLREV_B16:
3237 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3238 case AMDGPU::V_PK_LSHLREV_B16_vi:
3239
3240 case AMDGPU::V_PK_LSHRREV_B16:
3241 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3242 case AMDGPU::V_PK_LSHRREV_B16_vi:
3243 case AMDGPU::V_PK_ASHRREV_I16:
3244 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3245 case AMDGPU::V_PK_ASHRREV_I16_vi:
3246 return true;
3247 default:
3248 return false;
3249 }
3250 }
3251
validateLdsDirect(const MCInst & Inst)3252 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3253
3254 using namespace SIInstrFlags;
3255 const unsigned Opcode = Inst.getOpcode();
3256 const MCInstrDesc &Desc = MII.get(Opcode);
3257
3258 // lds_direct register is defined so that it can be used
3259 // with 9-bit operands only. Ignore encodings which do not accept these.
3260 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3261 return true;
3262
3263 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3264 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3265 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3266
3267 const int SrcIndices[] = { Src1Idx, Src2Idx };
3268
3269 // lds_direct cannot be specified as either src1 or src2.
3270 for (int SrcIdx : SrcIndices) {
3271 if (SrcIdx == -1) break;
3272 const MCOperand &Src = Inst.getOperand(SrcIdx);
3273 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3274 return false;
3275 }
3276 }
3277
3278 if (Src0Idx == -1)
3279 return true;
3280
3281 const MCOperand &Src = Inst.getOperand(Src0Idx);
3282 if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3283 return true;
3284
3285 // lds_direct is specified as src0. Check additional limitations.
3286 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3287 }
3288
getFlatOffsetLoc(const OperandVector & Operands) const3289 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3290 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3291 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3292 if (Op.isFlatOffset())
3293 return Op.getStartLoc();
3294 }
3295 return getLoc();
3296 }
3297
validateFlatOffset(const MCInst & Inst,const OperandVector & Operands)3298 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3299 const OperandVector &Operands) {
3300 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3301 if ((TSFlags & SIInstrFlags::FLAT) == 0)
3302 return true;
3303
3304 auto Opcode = Inst.getOpcode();
3305 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3306 assert(OpNum != -1);
3307
3308 const auto &Op = Inst.getOperand(OpNum);
3309 if (!hasFlatOffsets() && Op.getImm() != 0) {
3310 Error(getFlatOffsetLoc(Operands),
3311 "flat offset modifier is not supported on this GPU");
3312 return false;
3313 }
3314
3315 // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
3316 // For FLAT segment the offset must be positive;
3317 // MSB is ignored and forced to zero.
3318 unsigned OffsetSize = isGFX9() ? 13 : 12;
3319 if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
3320 if (!isIntN(OffsetSize, Op.getImm())) {
3321 Error(getFlatOffsetLoc(Operands),
3322 isGFX9() ? "expected a 13-bit signed offset" :
3323 "expected a 12-bit signed offset");
3324 return false;
3325 }
3326 } else {
3327 if (!isUIntN(OffsetSize - 1, Op.getImm())) {
3328 Error(getFlatOffsetLoc(Operands),
3329 isGFX9() ? "expected a 12-bit unsigned offset" :
3330 "expected an 11-bit unsigned offset");
3331 return false;
3332 }
3333 }
3334
3335 return true;
3336 }
3337
validateSOPLiteral(const MCInst & Inst) const3338 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3339 unsigned Opcode = Inst.getOpcode();
3340 const MCInstrDesc &Desc = MII.get(Opcode);
3341 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3342 return true;
3343
3344 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3345 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3346
3347 const int OpIndices[] = { Src0Idx, Src1Idx };
3348
3349 unsigned NumExprs = 0;
3350 unsigned NumLiterals = 0;
3351 uint32_t LiteralValue;
3352
3353 for (int OpIdx : OpIndices) {
3354 if (OpIdx == -1) break;
3355
3356 const MCOperand &MO = Inst.getOperand(OpIdx);
3357 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3358 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3359 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3360 uint32_t Value = static_cast<uint32_t>(MO.getImm());
3361 if (NumLiterals == 0 || LiteralValue != Value) {
3362 LiteralValue = Value;
3363 ++NumLiterals;
3364 }
3365 } else if (MO.isExpr()) {
3366 ++NumExprs;
3367 }
3368 }
3369 }
3370
3371 return NumLiterals + NumExprs <= 1;
3372 }
3373
validateOpSel(const MCInst & Inst)3374 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3375 const unsigned Opc = Inst.getOpcode();
3376 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3377 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3378 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3379 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3380
3381 if (OpSel & ~3)
3382 return false;
3383 }
3384 return true;
3385 }
3386
3387 // Check if VCC register matches wavefront size
validateVccOperand(unsigned Reg) const3388 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3389 auto FB = getFeatureBits();
3390 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3391 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3392 }
3393
3394 // VOP3 literal is only allowed in GFX10+ and only one can be used
validateVOP3Literal(const MCInst & Inst) const3395 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
3396 unsigned Opcode = Inst.getOpcode();
3397 const MCInstrDesc &Desc = MII.get(Opcode);
3398 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3399 return true;
3400
3401 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3402 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3403 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3404
3405 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3406
3407 unsigned NumExprs = 0;
3408 unsigned NumLiterals = 0;
3409 uint32_t LiteralValue;
3410
3411 for (int OpIdx : OpIndices) {
3412 if (OpIdx == -1) break;
3413
3414 const MCOperand &MO = Inst.getOperand(OpIdx);
3415 if (!MO.isImm() && !MO.isExpr())
3416 continue;
3417 if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
3418 continue;
3419
3420 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3421 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug])
3422 return false;
3423
3424 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3425 uint32_t Value = static_cast<uint32_t>(MO.getImm());
3426 if (NumLiterals == 0 || LiteralValue != Value) {
3427 LiteralValue = Value;
3428 ++NumLiterals;
3429 }
3430 } else if (MO.isExpr()) {
3431 ++NumExprs;
3432 }
3433 }
3434 NumLiterals += NumExprs;
3435
3436 return !NumLiterals ||
3437 (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
3438 }
3439
validateInstruction(const MCInst & Inst,const SMLoc & IDLoc,const OperandVector & Operands)3440 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3441 const SMLoc &IDLoc,
3442 const OperandVector &Operands) {
3443 if (!validateLdsDirect(Inst)) {
3444 Error(IDLoc,
3445 "invalid use of lds_direct");
3446 return false;
3447 }
3448 if (!validateSOPLiteral(Inst)) {
3449 Error(IDLoc,
3450 "only one literal operand is allowed");
3451 return false;
3452 }
3453 if (!validateVOP3Literal(Inst)) {
3454 Error(IDLoc,
3455 "invalid literal operand");
3456 return false;
3457 }
3458 if (!validateConstantBusLimitations(Inst)) {
3459 Error(IDLoc,
3460 "invalid operand (violates constant bus restrictions)");
3461 return false;
3462 }
3463 if (!validateEarlyClobberLimitations(Inst)) {
3464 Error(IDLoc,
3465 "destination must be different than all sources");
3466 return false;
3467 }
3468 if (!validateIntClampSupported(Inst)) {
3469 Error(IDLoc,
3470 "integer clamping is not supported on this GPU");
3471 return false;
3472 }
3473 if (!validateOpSel(Inst)) {
3474 Error(IDLoc,
3475 "invalid op_sel operand");
3476 return false;
3477 }
3478 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3479 if (!validateMIMGD16(Inst)) {
3480 Error(IDLoc,
3481 "d16 modifier is not supported on this GPU");
3482 return false;
3483 }
3484 if (!validateMIMGDim(Inst)) {
3485 Error(IDLoc, "dim modifier is required on this GPU");
3486 return false;
3487 }
3488 if (!validateMIMGDataSize(Inst)) {
3489 Error(IDLoc,
3490 "image data size does not match dmask and tfe");
3491 return false;
3492 }
3493 if (!validateMIMGAddrSize(Inst)) {
3494 Error(IDLoc,
3495 "image address size does not match dim and a16");
3496 return false;
3497 }
3498 if (!validateMIMGAtomicDMask(Inst)) {
3499 Error(IDLoc,
3500 "invalid atomic image dmask");
3501 return false;
3502 }
3503 if (!validateMIMGGatherDMask(Inst)) {
3504 Error(IDLoc,
3505 "invalid image_gather dmask: only one bit must be set");
3506 return false;
3507 }
3508 if (!validateMovrels(Inst)) {
3509 Error(IDLoc, "source operand must be a VGPR");
3510 return false;
3511 }
3512 if (!validateFlatOffset(Inst, Operands)) {
3513 return false;
3514 }
3515
3516 return true;
3517 }
3518
3519 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3520 const FeatureBitset &FBS,
3521 unsigned VariantID = 0);
3522
MatchAndEmitInstruction(SMLoc IDLoc,unsigned & Opcode,OperandVector & Operands,MCStreamer & Out,uint64_t & ErrorInfo,bool MatchingInlineAsm)3523 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3524 OperandVector &Operands,
3525 MCStreamer &Out,
3526 uint64_t &ErrorInfo,
3527 bool MatchingInlineAsm) {
3528 MCInst Inst;
3529 unsigned Result = Match_Success;
3530 for (auto Variant : getMatchedVariants()) {
3531 uint64_t EI;
3532 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3533 Variant);
3534 // We order match statuses from least to most specific. We use most specific
3535 // status as resulting
3536 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3537 if ((R == Match_Success) ||
3538 (R == Match_PreferE32) ||
3539 (R == Match_MissingFeature && Result != Match_PreferE32) ||
3540 (R == Match_InvalidOperand && Result != Match_MissingFeature
3541 && Result != Match_PreferE32) ||
3542 (R == Match_MnemonicFail && Result != Match_InvalidOperand
3543 && Result != Match_MissingFeature
3544 && Result != Match_PreferE32)) {
3545 Result = R;
3546 ErrorInfo = EI;
3547 }
3548 if (R == Match_Success)
3549 break;
3550 }
3551
3552 switch (Result) {
3553 default: break;
3554 case Match_Success:
3555 if (!validateInstruction(Inst, IDLoc, Operands)) {
3556 return true;
3557 }
3558 Inst.setLoc(IDLoc);
3559 Out.EmitInstruction(Inst, getSTI());
3560 return false;
3561
3562 case Match_MissingFeature:
3563 return Error(IDLoc, "instruction not supported on this GPU");
3564
3565 case Match_MnemonicFail: {
3566 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3567 std::string Suggestion = AMDGPUMnemonicSpellCheck(
3568 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
3569 return Error(IDLoc, "invalid instruction" + Suggestion,
3570 ((AMDGPUOperand &)*Operands[0]).getLocRange());
3571 }
3572
3573 case Match_InvalidOperand: {
3574 SMLoc ErrorLoc = IDLoc;
3575 if (ErrorInfo != ~0ULL) {
3576 if (ErrorInfo >= Operands.size()) {
3577 return Error(IDLoc, "too few operands for instruction");
3578 }
3579 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3580 if (ErrorLoc == SMLoc())
3581 ErrorLoc = IDLoc;
3582 }
3583 return Error(ErrorLoc, "invalid operand for instruction");
3584 }
3585
3586 case Match_PreferE32:
3587 return Error(IDLoc, "internal error: instruction without _e64 suffix "
3588 "should be encoded as e32");
3589 }
3590 llvm_unreachable("Implement any new match types added!");
3591 }
3592
ParseAsAbsoluteExpression(uint32_t & Ret)3593 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3594 int64_t Tmp = -1;
3595 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3596 return true;
3597 }
3598 if (getParser().parseAbsoluteExpression(Tmp)) {
3599 return true;
3600 }
3601 Ret = static_cast<uint32_t>(Tmp);
3602 return false;
3603 }
3604
ParseDirectiveMajorMinor(uint32_t & Major,uint32_t & Minor)3605 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3606 uint32_t &Minor) {
3607 if (ParseAsAbsoluteExpression(Major))
3608 return TokError("invalid major version");
3609
3610 if (getLexer().isNot(AsmToken::Comma))
3611 return TokError("minor version number required, comma expected");
3612 Lex();
3613
3614 if (ParseAsAbsoluteExpression(Minor))
3615 return TokError("invalid minor version");
3616
3617 return false;
3618 }
3619
ParseDirectiveAMDGCNTarget()3620 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3621 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3622 return TokError("directive only supported for amdgcn architecture");
3623
3624 std::string Target;
3625
3626 SMLoc TargetStart = getTok().getLoc();
3627 if (getParser().parseEscapedString(Target))
3628 return true;
3629 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3630
3631 std::string ExpectedTarget;
3632 raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3633 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3634
3635 if (Target != ExpectedTargetOS.str())
3636 return getParser().Error(TargetRange.Start, "target must match options",
3637 TargetRange);
3638
3639 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3640 return false;
3641 }
3642
OutOfRangeError(SMRange Range)3643 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3644 return getParser().Error(Range.Start, "value out of range", Range);
3645 }
3646
calculateGPRBlocks(const FeatureBitset & Features,bool VCCUsed,bool FlatScrUsed,bool XNACKUsed,Optional<bool> EnableWavefrontSize32,unsigned NextFreeVGPR,SMRange VGPRRange,unsigned NextFreeSGPR,SMRange SGPRRange,unsigned & VGPRBlocks,unsigned & SGPRBlocks)3647 bool AMDGPUAsmParser::calculateGPRBlocks(
3648 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3649 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
3650 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
3651 unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
3652 // TODO(scott.linder): These calculations are duplicated from
3653 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3654 IsaVersion Version = getIsaVersion(getSTI().getCPU());
3655
3656 unsigned NumVGPRs = NextFreeVGPR;
3657 unsigned NumSGPRs = NextFreeSGPR;
3658
3659 if (Version.Major >= 10)
3660 NumSGPRs = 0;
3661 else {
3662 unsigned MaxAddressableNumSGPRs =
3663 IsaInfo::getAddressableNumSGPRs(&getSTI());
3664
3665 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
3666 NumSGPRs > MaxAddressableNumSGPRs)
3667 return OutOfRangeError(SGPRRange);
3668
3669 NumSGPRs +=
3670 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
3671
3672 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
3673 NumSGPRs > MaxAddressableNumSGPRs)
3674 return OutOfRangeError(SGPRRange);
3675
3676 if (Features.test(FeatureSGPRInitBug))
3677 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
3678 }
3679
3680 VGPRBlocks =
3681 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
3682 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
3683
3684 return false;
3685 }
3686
ParseDirectiveAMDHSAKernel()3687 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
3688 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3689 return TokError("directive only supported for amdgcn architecture");
3690
3691 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
3692 return TokError("directive only supported for amdhsa OS");
3693
3694 StringRef KernelName;
3695 if (getParser().parseIdentifier(KernelName))
3696 return true;
3697
3698 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
3699
3700 StringSet<> Seen;
3701
3702 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
3703
3704 SMRange VGPRRange;
3705 uint64_t NextFreeVGPR = 0;
3706 SMRange SGPRRange;
3707 uint64_t NextFreeSGPR = 0;
3708 unsigned UserSGPRCount = 0;
3709 bool ReserveVCC = true;
3710 bool ReserveFlatScr = true;
3711 bool ReserveXNACK = hasXNACK();
3712 Optional<bool> EnableWavefrontSize32;
3713
3714 while (true) {
3715 while (getLexer().is(AsmToken::EndOfStatement))
3716 Lex();
3717
3718 if (getLexer().isNot(AsmToken::Identifier))
3719 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3720
3721 StringRef ID = getTok().getIdentifier();
3722 SMRange IDRange = getTok().getLocRange();
3723 Lex();
3724
3725 if (ID == ".end_amdhsa_kernel")
3726 break;
3727
3728 if (Seen.find(ID) != Seen.end())
3729 return TokError(".amdhsa_ directives cannot be repeated");
3730 Seen.insert(ID);
3731
3732 SMLoc ValStart = getTok().getLoc();
3733 int64_t IVal;
3734 if (getParser().parseAbsoluteExpression(IVal))
3735 return true;
3736 SMLoc ValEnd = getTok().getLoc();
3737 SMRange ValRange = SMRange(ValStart, ValEnd);
3738
3739 if (IVal < 0)
3740 return OutOfRangeError(ValRange);
3741
3742 uint64_t Val = IVal;
3743
3744 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
3745 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
3746 return OutOfRangeError(RANGE); \
3747 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3748
3749 if (ID == ".amdhsa_group_segment_fixed_size") {
3750 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3751 return OutOfRangeError(ValRange);
3752 KD.group_segment_fixed_size = Val;
3753 } else if (ID == ".amdhsa_private_segment_fixed_size") {
3754 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3755 return OutOfRangeError(ValRange);
3756 KD.private_segment_fixed_size = Val;
3757 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3758 PARSE_BITS_ENTRY(KD.kernel_code_properties,
3759 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3760 Val, ValRange);
3761 if (Val)
3762 UserSGPRCount += 4;
3763 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3764 PARSE_BITS_ENTRY(KD.kernel_code_properties,
3765 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3766 ValRange);
3767 if (Val)
3768 UserSGPRCount += 2;
3769 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3770 PARSE_BITS_ENTRY(KD.kernel_code_properties,
3771 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
3772 ValRange);
3773 if (Val)
3774 UserSGPRCount += 2;
3775 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3776 PARSE_BITS_ENTRY(KD.kernel_code_properties,
3777 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
3778 Val, ValRange);
3779 if (Val)
3780 UserSGPRCount += 2;
3781 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
3782 PARSE_BITS_ENTRY(KD.kernel_code_properties,
3783 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
3784 ValRange);
3785 if (Val)
3786 UserSGPRCount += 2;
3787 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
3788 PARSE_BITS_ENTRY(KD.kernel_code_properties,
3789 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
3790 ValRange);
3791 if (Val)
3792 UserSGPRCount += 2;
3793 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
3794 PARSE_BITS_ENTRY(KD.kernel_code_properties,
3795 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
3796 Val, ValRange);
3797 if (Val)
3798 UserSGPRCount += 1;
3799 } else if (ID == ".amdhsa_wavefront_size32") {
3800 if (IVersion.Major < 10)
3801 return getParser().Error(IDRange.Start, "directive requires gfx10+",
3802 IDRange);
3803 EnableWavefrontSize32 = Val;
3804 PARSE_BITS_ENTRY(KD.kernel_code_properties,
3805 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
3806 Val, ValRange);
3807 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3808 PARSE_BITS_ENTRY(
3809 KD.compute_pgm_rsrc2,
3810 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
3811 ValRange);
3812 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
3813 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3814 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3815 ValRange);
3816 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3817 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3818 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3819 ValRange);
3820 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3821 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3822 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3823 ValRange);
3824 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3825 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3826 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3827 ValRange);
3828 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3829 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3830 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3831 ValRange);
3832 } else if (ID == ".amdhsa_next_free_vgpr") {
3833 VGPRRange = ValRange;
3834 NextFreeVGPR = Val;
3835 } else if (ID == ".amdhsa_next_free_sgpr") {
3836 SGPRRange = ValRange;
3837 NextFreeSGPR = Val;
3838 } else if (ID == ".amdhsa_reserve_vcc") {
3839 if (!isUInt<1>(Val))
3840 return OutOfRangeError(ValRange);
3841 ReserveVCC = Val;
3842 } else if (ID == ".amdhsa_reserve_flat_scratch") {
3843 if (IVersion.Major < 7)
3844 return getParser().Error(IDRange.Start, "directive requires gfx7+",
3845 IDRange);
3846 if (!isUInt<1>(Val))
3847 return OutOfRangeError(ValRange);
3848 ReserveFlatScr = Val;
3849 } else if (ID == ".amdhsa_reserve_xnack_mask") {
3850 if (IVersion.Major < 8)
3851 return getParser().Error(IDRange.Start, "directive requires gfx8+",
3852 IDRange);
3853 if (!isUInt<1>(Val))
3854 return OutOfRangeError(ValRange);
3855 ReserveXNACK = Val;
3856 } else if (ID == ".amdhsa_float_round_mode_32") {
3857 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3858 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
3859 } else if (ID == ".amdhsa_float_round_mode_16_64") {
3860 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3861 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
3862 } else if (ID == ".amdhsa_float_denorm_mode_32") {
3863 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3864 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
3865 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
3866 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3867 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
3868 ValRange);
3869 } else if (ID == ".amdhsa_dx10_clamp") {
3870 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3871 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
3872 } else if (ID == ".amdhsa_ieee_mode") {
3873 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
3874 Val, ValRange);
3875 } else if (ID == ".amdhsa_fp16_overflow") {
3876 if (IVersion.Major < 9)
3877 return getParser().Error(IDRange.Start, "directive requires gfx9+",
3878 IDRange);
3879 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
3880 ValRange);
3881 } else if (ID == ".amdhsa_workgroup_processor_mode") {
3882 if (IVersion.Major < 10)
3883 return getParser().Error(IDRange.Start, "directive requires gfx10+",
3884 IDRange);
3885 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
3886 ValRange);
3887 } else if (ID == ".amdhsa_memory_ordered") {
3888 if (IVersion.Major < 10)
3889 return getParser().Error(IDRange.Start, "directive requires gfx10+",
3890 IDRange);
3891 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
3892 ValRange);
3893 } else if (ID == ".amdhsa_forward_progress") {
3894 if (IVersion.Major < 10)
3895 return getParser().Error(IDRange.Start, "directive requires gfx10+",
3896 IDRange);
3897 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
3898 ValRange);
3899 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
3900 PARSE_BITS_ENTRY(
3901 KD.compute_pgm_rsrc2,
3902 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
3903 ValRange);
3904 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
3905 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3906 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
3907 Val, ValRange);
3908 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
3909 PARSE_BITS_ENTRY(
3910 KD.compute_pgm_rsrc2,
3911 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
3912 ValRange);
3913 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
3914 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3915 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
3916 Val, ValRange);
3917 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
3918 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3919 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
3920 Val, ValRange);
3921 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
3922 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3923 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
3924 Val, ValRange);
3925 } else if (ID == ".amdhsa_exception_int_div_zero") {
3926 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3927 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
3928 Val, ValRange);
3929 } else {
3930 return getParser().Error(IDRange.Start,
3931 "unknown .amdhsa_kernel directive", IDRange);
3932 }
3933
3934 #undef PARSE_BITS_ENTRY
3935 }
3936
3937 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
3938 return TokError(".amdhsa_next_free_vgpr directive is required");
3939
3940 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
3941 return TokError(".amdhsa_next_free_sgpr directive is required");
3942
3943 unsigned VGPRBlocks;
3944 unsigned SGPRBlocks;
3945 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
3946 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
3947 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
3948 SGPRBlocks))
3949 return true;
3950
3951 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
3952 VGPRBlocks))
3953 return OutOfRangeError(VGPRRange);
3954 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3955 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
3956
3957 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
3958 SGPRBlocks))
3959 return OutOfRangeError(SGPRRange);
3960 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3961 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
3962 SGPRBlocks);
3963
3964 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
3965 return TokError("too many user SGPRs enabled");
3966 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
3967 UserSGPRCount);
3968
3969 getTargetStreamer().EmitAmdhsaKernelDescriptor(
3970 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
3971 ReserveFlatScr, ReserveXNACK);
3972 return false;
3973 }
3974
ParseDirectiveHSACodeObjectVersion()3975 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
3976 uint32_t Major;
3977 uint32_t Minor;
3978
3979 if (ParseDirectiveMajorMinor(Major, Minor))
3980 return true;
3981
3982 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
3983 return false;
3984 }
3985
ParseDirectiveHSACodeObjectISA()3986 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
3987 uint32_t Major;
3988 uint32_t Minor;
3989 uint32_t Stepping;
3990 StringRef VendorName;
3991 StringRef ArchName;
3992
3993 // If this directive has no arguments, then use the ISA version for the
3994 // targeted GPU.
3995 if (getLexer().is(AsmToken::EndOfStatement)) {
3996 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3997 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
3998 ISA.Stepping,
3999 "AMD", "AMDGPU");
4000 return false;
4001 }
4002
4003 if (ParseDirectiveMajorMinor(Major, Minor))
4004 return true;
4005
4006 if (getLexer().isNot(AsmToken::Comma))
4007 return TokError("stepping version number required, comma expected");
4008 Lex();
4009
4010 if (ParseAsAbsoluteExpression(Stepping))
4011 return TokError("invalid stepping version");
4012
4013 if (getLexer().isNot(AsmToken::Comma))
4014 return TokError("vendor name required, comma expected");
4015 Lex();
4016
4017 if (getLexer().isNot(AsmToken::String))
4018 return TokError("invalid vendor name");
4019
4020 VendorName = getLexer().getTok().getStringContents();
4021 Lex();
4022
4023 if (getLexer().isNot(AsmToken::Comma))
4024 return TokError("arch name required, comma expected");
4025 Lex();
4026
4027 if (getLexer().isNot(AsmToken::String))
4028 return TokError("invalid arch name");
4029
4030 ArchName = getLexer().getTok().getStringContents();
4031 Lex();
4032
4033 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
4034 VendorName, ArchName);
4035 return false;
4036 }
4037
ParseAMDKernelCodeTValue(StringRef ID,amd_kernel_code_t & Header)4038 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4039 amd_kernel_code_t &Header) {
4040 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4041 // assembly for backwards compatibility.
4042 if (ID == "max_scratch_backing_memory_byte_size") {
4043 Parser.eatToEndOfStatement();
4044 return false;
4045 }
4046
4047 SmallString<40> ErrStr;
4048 raw_svector_ostream Err(ErrStr);
4049 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4050 return TokError(Err.str());
4051 }
4052 Lex();
4053
4054 if (ID == "enable_wavefront_size32") {
4055 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4056 if (!isGFX10())
4057 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4058 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4059 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4060 } else {
4061 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4062 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4063 }
4064 }
4065
4066 if (ID == "wavefront_size") {
4067 if (Header.wavefront_size == 5) {
4068 if (!isGFX10())
4069 return TokError("wavefront_size=5 is only allowed on GFX10+");
4070 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4071 return TokError("wavefront_size=5 requires +WavefrontSize32");
4072 } else if (Header.wavefront_size == 6) {
4073 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4074 return TokError("wavefront_size=6 requires +WavefrontSize64");
4075 }
4076 }
4077
4078 if (ID == "enable_wgp_mode") {
4079 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
4080 return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4081 }
4082
4083 if (ID == "enable_mem_ordered") {
4084 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
4085 return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4086 }
4087
4088 if (ID == "enable_fwd_progress") {
4089 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
4090 return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4091 }
4092
4093 return false;
4094 }
4095
ParseDirectiveAMDKernelCodeT()4096 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4097 amd_kernel_code_t Header;
4098 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4099
4100 while (true) {
4101 // Lex EndOfStatement. This is in a while loop, because lexing a comment
4102 // will set the current token to EndOfStatement.
4103 while(getLexer().is(AsmToken::EndOfStatement))
4104 Lex();
4105
4106 if (getLexer().isNot(AsmToken::Identifier))
4107 return TokError("expected value identifier or .end_amd_kernel_code_t");
4108
4109 StringRef ID = getLexer().getTok().getIdentifier();
4110 Lex();
4111
4112 if (ID == ".end_amd_kernel_code_t")
4113 break;
4114
4115 if (ParseAMDKernelCodeTValue(ID, Header))
4116 return true;
4117 }
4118
4119 getTargetStreamer().EmitAMDKernelCodeT(Header);
4120
4121 return false;
4122 }
4123
ParseDirectiveAMDGPUHsaKernel()4124 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4125 if (getLexer().isNot(AsmToken::Identifier))
4126 return TokError("expected symbol name");
4127
4128 StringRef KernelName = Parser.getTok().getString();
4129
4130 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4131 ELF::STT_AMDGPU_HSA_KERNEL);
4132 Lex();
4133 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
4134 KernelScope.initialize(getContext());
4135 return false;
4136 }
4137
ParseDirectiveISAVersion()4138 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4139 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4140 return Error(getParser().getTok().getLoc(),
4141 ".amd_amdgpu_isa directive is not available on non-amdgcn "
4142 "architectures");
4143 }
4144
4145 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
4146
4147 std::string ISAVersionStringFromSTI;
4148 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
4149 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
4150
4151 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4152 return Error(getParser().getTok().getLoc(),
4153 ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4154 "arguments specified through the command line");
4155 }
4156
4157 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4158 Lex();
4159
4160 return false;
4161 }
4162
ParseDirectiveHSAMetadata()4163 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4164 const char *AssemblerDirectiveBegin;
4165 const char *AssemblerDirectiveEnd;
4166 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4167 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
4168 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4169 HSAMD::V3::AssemblerDirectiveEnd)
4170 : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4171 HSAMD::AssemblerDirectiveEnd);
4172
4173 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4174 return Error(getParser().getTok().getLoc(),
4175 (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4176 "not available on non-amdhsa OSes")).str());
4177 }
4178
4179 std::string HSAMetadataString;
4180 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4181 HSAMetadataString))
4182 return true;
4183
4184 if (IsaInfo::hasCodeObjectV3(&getSTI())) {
4185 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4186 return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4187 } else {
4188 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4189 return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4190 }
4191
4192 return false;
4193 }
4194
4195 /// Common code to parse out a block of text (typically YAML) between start and
4196 /// end directives.
ParseToEndDirective(const char * AssemblerDirectiveBegin,const char * AssemblerDirectiveEnd,std::string & CollectString)4197 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4198 const char *AssemblerDirectiveEnd,
4199 std::string &CollectString) {
4200
4201 raw_string_ostream CollectStream(CollectString);
4202
4203 getLexer().setSkipSpace(false);
4204
4205 bool FoundEnd = false;
4206 while (!getLexer().is(AsmToken::Eof)) {
4207 while (getLexer().is(AsmToken::Space)) {
4208 CollectStream << getLexer().getTok().getString();
4209 Lex();
4210 }
4211
4212 if (getLexer().is(AsmToken::Identifier)) {
4213 StringRef ID = getLexer().getTok().getIdentifier();
4214 if (ID == AssemblerDirectiveEnd) {
4215 Lex();
4216 FoundEnd = true;
4217 break;
4218 }
4219 }
4220
4221 CollectStream << Parser.parseStringToEndOfStatement()
4222 << getContext().getAsmInfo()->getSeparatorString();
4223
4224 Parser.eatToEndOfStatement();
4225 }
4226
4227 getLexer().setSkipSpace(true);
4228
4229 if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
4230 return TokError(Twine("expected directive ") +
4231 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4232 }
4233
4234 CollectStream.flush();
4235 return false;
4236 }
4237
4238 /// Parse the assembler directive for new MsgPack-format PAL metadata.
ParseDirectivePALMetadataBegin()4239 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4240 std::string String;
4241 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4242 AMDGPU::PALMD::AssemblerDirectiveEnd, String))
4243 return true;
4244
4245 auto PALMetadata = getTargetStreamer().getPALMetadata();
4246 if (!PALMetadata->setFromString(String))
4247 return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
4248 return false;
4249 }
4250
4251 /// Parse the assembler directive for old linear-format PAL metadata.
ParseDirectivePALMetadata()4252 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4253 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4254 return Error(getParser().getTok().getLoc(),
4255 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4256 "not available on non-amdpal OSes")).str());
4257 }
4258
4259 auto PALMetadata = getTargetStreamer().getPALMetadata();
4260 PALMetadata->setLegacy();
4261 for (;;) {
4262 uint32_t Key, Value;
4263 if (ParseAsAbsoluteExpression(Key)) {
4264 return TokError(Twine("invalid value in ") +
4265 Twine(PALMD::AssemblerDirective));
4266 }
4267 if (getLexer().isNot(AsmToken::Comma)) {
4268 return TokError(Twine("expected an even number of values in ") +
4269 Twine(PALMD::AssemblerDirective));
4270 }
4271 Lex();
4272 if (ParseAsAbsoluteExpression(Value)) {
4273 return TokError(Twine("invalid value in ") +
4274 Twine(PALMD::AssemblerDirective));
4275 }
4276 PALMetadata->setRegister(Key, Value);
4277 if (getLexer().isNot(AsmToken::Comma))
4278 break;
4279 Lex();
4280 }
4281 return false;
4282 }
4283
4284 /// ParseDirectiveAMDGPULDS
4285 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
ParseDirectiveAMDGPULDS()4286 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4287 if (getParser().checkForValidSection())
4288 return true;
4289
4290 StringRef Name;
4291 SMLoc NameLoc = getLexer().getLoc();
4292 if (getParser().parseIdentifier(Name))
4293 return TokError("expected identifier in directive");
4294
4295 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
4296 if (parseToken(AsmToken::Comma, "expected ','"))
4297 return true;
4298
4299 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4300
4301 int64_t Size;
4302 SMLoc SizeLoc = getLexer().getLoc();
4303 if (getParser().parseAbsoluteExpression(Size))
4304 return true;
4305 if (Size < 0)
4306 return Error(SizeLoc, "size must be non-negative");
4307 if (Size > LocalMemorySize)
4308 return Error(SizeLoc, "size is too large");
4309
4310 int64_t Align = 4;
4311 if (getLexer().is(AsmToken::Comma)) {
4312 Lex();
4313 SMLoc AlignLoc = getLexer().getLoc();
4314 if (getParser().parseAbsoluteExpression(Align))
4315 return true;
4316 if (Align < 0 || !isPowerOf2_64(Align))
4317 return Error(AlignLoc, "alignment must be a power of two");
4318
4319 // Alignment larger than the size of LDS is possible in theory, as long
4320 // as the linker manages to place to symbol at address 0, but we do want
4321 // to make sure the alignment fits nicely into a 32-bit integer.
4322 if (Align >= 1u << 31)
4323 return Error(AlignLoc, "alignment is too large");
4324 }
4325
4326 if (parseToken(AsmToken::EndOfStatement,
4327 "unexpected token in '.amdgpu_lds' directive"))
4328 return true;
4329
4330 Symbol->redefineIfPossible();
4331 if (!Symbol->isUndefined())
4332 return Error(NameLoc, "invalid symbol redefinition");
4333
4334 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align);
4335 return false;
4336 }
4337
ParseDirective(AsmToken DirectiveID)4338 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
4339 StringRef IDVal = DirectiveID.getString();
4340
4341 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
4342 if (IDVal == ".amdgcn_target")
4343 return ParseDirectiveAMDGCNTarget();
4344
4345 if (IDVal == ".amdhsa_kernel")
4346 return ParseDirectiveAMDHSAKernel();
4347
4348 // TODO: Restructure/combine with PAL metadata directive.
4349 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
4350 return ParseDirectiveHSAMetadata();
4351 } else {
4352 if (IDVal == ".hsa_code_object_version")
4353 return ParseDirectiveHSACodeObjectVersion();
4354
4355 if (IDVal == ".hsa_code_object_isa")
4356 return ParseDirectiveHSACodeObjectISA();
4357
4358 if (IDVal == ".amd_kernel_code_t")
4359 return ParseDirectiveAMDKernelCodeT();
4360
4361 if (IDVal == ".amdgpu_hsa_kernel")
4362 return ParseDirectiveAMDGPUHsaKernel();
4363
4364 if (IDVal == ".amd_amdgpu_isa")
4365 return ParseDirectiveISAVersion();
4366
4367 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
4368 return ParseDirectiveHSAMetadata();
4369 }
4370
4371 if (IDVal == ".amdgpu_lds")
4372 return ParseDirectiveAMDGPULDS();
4373
4374 if (IDVal == PALMD::AssemblerDirectiveBegin)
4375 return ParseDirectivePALMetadataBegin();
4376
4377 if (IDVal == PALMD::AssemblerDirective)
4378 return ParseDirectivePALMetadata();
4379
4380 return true;
4381 }
4382
subtargetHasRegister(const MCRegisterInfo & MRI,unsigned RegNo) const4383 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4384 unsigned RegNo) const {
4385
4386 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4387 R.isValid(); ++R) {
4388 if (*R == RegNo)
4389 return isGFX9() || isGFX10();
4390 }
4391
4392 // GFX10 has 2 more SGPRs 104 and 105.
4393 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4394 R.isValid(); ++R) {
4395 if (*R == RegNo)
4396 return hasSGPR104_SGPR105();
4397 }
4398
4399 switch (RegNo) {
4400 case AMDGPU::SRC_SHARED_BASE:
4401 case AMDGPU::SRC_SHARED_LIMIT:
4402 case AMDGPU::SRC_PRIVATE_BASE:
4403 case AMDGPU::SRC_PRIVATE_LIMIT:
4404 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4405 return !isCI() && !isSI() && !isVI();
4406 case AMDGPU::TBA:
4407 case AMDGPU::TBA_LO:
4408 case AMDGPU::TBA_HI:
4409 case AMDGPU::TMA:
4410 case AMDGPU::TMA_LO:
4411 case AMDGPU::TMA_HI:
4412 return !isGFX9() && !isGFX10();
4413 case AMDGPU::XNACK_MASK:
4414 case AMDGPU::XNACK_MASK_LO:
4415 case AMDGPU::XNACK_MASK_HI:
4416 return !isCI() && !isSI() && !isGFX10() && hasXNACK();
4417 case AMDGPU::SGPR_NULL:
4418 return isGFX10();
4419 default:
4420 break;
4421 }
4422
4423 if (isCI())
4424 return true;
4425
4426 if (isSI() || isGFX10()) {
4427 // No flat_scr on SI.
4428 // On GFX10 flat scratch is not a valid register operand and can only be
4429 // accessed with s_setreg/s_getreg.
4430 switch (RegNo) {
4431 case AMDGPU::FLAT_SCR:
4432 case AMDGPU::FLAT_SCR_LO:
4433 case AMDGPU::FLAT_SCR_HI:
4434 return false;
4435 default:
4436 return true;
4437 }
4438 }
4439
4440 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4441 // SI/CI have.
4442 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4443 R.isValid(); ++R) {
4444 if (*R == RegNo)
4445 return hasSGPR102_SGPR103();
4446 }
4447
4448 return true;
4449 }
4450
4451 OperandMatchResultTy
parseOperand(OperandVector & Operands,StringRef Mnemonic,OperandMode Mode)4452 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4453 OperandMode Mode) {
4454 // Try to parse with a custom parser
4455 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4456
4457 // If we successfully parsed the operand or if there as an error parsing,
4458 // we are done.
4459 //
4460 // If we are parsing after we reach EndOfStatement then this means we
4461 // are appending default values to the Operands list. This is only done
4462 // by custom parser, so we shouldn't continue on to the generic parsing.
4463 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4464 getLexer().is(AsmToken::EndOfStatement))
4465 return ResTy;
4466
4467 if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
4468 unsigned Prefix = Operands.size();
4469 SMLoc LBraceLoc = getTok().getLoc();
4470 Parser.Lex(); // eat the '['
4471
4472 for (;;) {
4473 ResTy = parseReg(Operands);
4474 if (ResTy != MatchOperand_Success)
4475 return ResTy;
4476
4477 if (getLexer().is(AsmToken::RBrac))
4478 break;
4479
4480 if (getLexer().isNot(AsmToken::Comma))
4481 return MatchOperand_ParseFail;
4482 Parser.Lex();
4483 }
4484
4485 if (Operands.size() - Prefix > 1) {
4486 Operands.insert(Operands.begin() + Prefix,
4487 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4488 Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
4489 getTok().getLoc()));
4490 }
4491
4492 Parser.Lex(); // eat the ']'
4493 return MatchOperand_Success;
4494 }
4495
4496 return parseRegOrImm(Operands);
4497 }
4498
parseMnemonicSuffix(StringRef Name)4499 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4500 // Clear any forced encodings from the previous instruction.
4501 setForcedEncodingSize(0);
4502 setForcedDPP(false);
4503 setForcedSDWA(false);
4504
4505 if (Name.endswith("_e64")) {
4506 setForcedEncodingSize(64);
4507 return Name.substr(0, Name.size() - 4);
4508 } else if (Name.endswith("_e32")) {
4509 setForcedEncodingSize(32);
4510 return Name.substr(0, Name.size() - 4);
4511 } else if (Name.endswith("_dpp")) {
4512 setForcedDPP(true);
4513 return Name.substr(0, Name.size() - 4);
4514 } else if (Name.endswith("_sdwa")) {
4515 setForcedSDWA(true);
4516 return Name.substr(0, Name.size() - 5);
4517 }
4518 return Name;
4519 }
4520
ParseInstruction(ParseInstructionInfo & Info,StringRef Name,SMLoc NameLoc,OperandVector & Operands)4521 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4522 StringRef Name,
4523 SMLoc NameLoc, OperandVector &Operands) {
4524 // Add the instruction mnemonic
4525 Name = parseMnemonicSuffix(Name);
4526 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4527
4528 bool IsMIMG = Name.startswith("image_");
4529
4530 while (!getLexer().is(AsmToken::EndOfStatement)) {
4531 OperandMode Mode = OperandMode_Default;
4532 if (IsMIMG && isGFX10() && Operands.size() == 2)
4533 Mode = OperandMode_NSA;
4534 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4535
4536 // Eat the comma or space if there is one.
4537 if (getLexer().is(AsmToken::Comma))
4538 Parser.Lex();
4539
4540 switch (Res) {
4541 case MatchOperand_Success: break;
4542 case MatchOperand_ParseFail:
4543 // FIXME: use real operand location rather than the current location.
4544 Error(getLexer().getLoc(), "failed parsing operand.");
4545 while (!getLexer().is(AsmToken::EndOfStatement)) {
4546 Parser.Lex();
4547 }
4548 return true;
4549 case MatchOperand_NoMatch:
4550 // FIXME: use real operand location rather than the current location.
4551 Error(getLexer().getLoc(), "not a valid operand.");
4552 while (!getLexer().is(AsmToken::EndOfStatement)) {
4553 Parser.Lex();
4554 }
4555 return true;
4556 }
4557 }
4558
4559 return false;
4560 }
4561
4562 //===----------------------------------------------------------------------===//
4563 // Utility functions
4564 //===----------------------------------------------------------------------===//
4565
4566 OperandMatchResultTy
parseIntWithPrefix(const char * Prefix,int64_t & IntVal)4567 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4568
4569 if (!trySkipId(Prefix, AsmToken::Colon))
4570 return MatchOperand_NoMatch;
4571
4572 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4573 }
4574
4575 OperandMatchResultTy
parseIntWithPrefix(const char * Prefix,OperandVector & Operands,AMDGPUOperand::ImmTy ImmTy,bool (* ConvertResult)(int64_t &))4576 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4577 AMDGPUOperand::ImmTy ImmTy,
4578 bool (*ConvertResult)(int64_t&)) {
4579 SMLoc S = getLoc();
4580 int64_t Value = 0;
4581
4582 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4583 if (Res != MatchOperand_Success)
4584 return Res;
4585
4586 if (ConvertResult && !ConvertResult(Value)) {
4587 Error(S, "invalid " + StringRef(Prefix) + " value.");
4588 }
4589
4590 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4591 return MatchOperand_Success;
4592 }
4593
4594 OperandMatchResultTy
parseOperandArrayWithPrefix(const char * Prefix,OperandVector & Operands,AMDGPUOperand::ImmTy ImmTy,bool (* ConvertResult)(int64_t &))4595 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
4596 OperandVector &Operands,
4597 AMDGPUOperand::ImmTy ImmTy,
4598 bool (*ConvertResult)(int64_t&)) {
4599 SMLoc S = getLoc();
4600 if (!trySkipId(Prefix, AsmToken::Colon))
4601 return MatchOperand_NoMatch;
4602
4603 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
4604 return MatchOperand_ParseFail;
4605
4606 unsigned Val = 0;
4607 const unsigned MaxSize = 4;
4608
4609 // FIXME: How to verify the number of elements matches the number of src
4610 // operands?
4611 for (int I = 0; ; ++I) {
4612 int64_t Op;
4613 SMLoc Loc = getLoc();
4614 if (!parseExpr(Op))
4615 return MatchOperand_ParseFail;
4616
4617 if (Op != 0 && Op != 1) {
4618 Error(Loc, "invalid " + StringRef(Prefix) + " value.");
4619 return MatchOperand_ParseFail;
4620 }
4621
4622 Val |= (Op << I);
4623
4624 if (trySkipToken(AsmToken::RBrac))
4625 break;
4626
4627 if (I + 1 == MaxSize) {
4628 Error(getLoc(), "expected a closing square bracket");
4629 return MatchOperand_ParseFail;
4630 }
4631
4632 if (!skipToken(AsmToken::Comma, "expected a comma"))
4633 return MatchOperand_ParseFail;
4634 }
4635
4636 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4637 return MatchOperand_Success;
4638 }
4639
4640 OperandMatchResultTy
parseNamedBit(const char * Name,OperandVector & Operands,AMDGPUOperand::ImmTy ImmTy)4641 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4642 AMDGPUOperand::ImmTy ImmTy) {
4643 int64_t Bit = 0;
4644 SMLoc S = Parser.getTok().getLoc();
4645
4646 // We are at the end of the statement, and this is a default argument, so
4647 // use a default value.
4648 if (getLexer().isNot(AsmToken::EndOfStatement)) {
4649 switch(getLexer().getKind()) {
4650 case AsmToken::Identifier: {
4651 StringRef Tok = Parser.getTok().getString();
4652 if (Tok == Name) {
4653 if (Tok == "r128" && isGFX9())
4654 Error(S, "r128 modifier is not supported on this GPU");
4655 if (Tok == "a16" && !isGFX9() && !isGFX10())
4656 Error(S, "a16 modifier is not supported on this GPU");
4657 Bit = 1;
4658 Parser.Lex();
4659 } else if (Tok.startswith("no") && Tok.endswith(Name)) {
4660 Bit = 0;
4661 Parser.Lex();
4662 } else {
4663 return MatchOperand_NoMatch;
4664 }
4665 break;
4666 }
4667 default:
4668 return MatchOperand_NoMatch;
4669 }
4670 }
4671
4672 if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
4673 return MatchOperand_ParseFail;
4674
4675 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
4676 return MatchOperand_Success;
4677 }
4678
addOptionalImmOperand(MCInst & Inst,const OperandVector & Operands,AMDGPUAsmParser::OptionalImmIndexMap & OptionalIdx,AMDGPUOperand::ImmTy ImmT,int64_t Default=0)4679 static void addOptionalImmOperand(
4680 MCInst& Inst, const OperandVector& Operands,
4681 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
4682 AMDGPUOperand::ImmTy ImmT,
4683 int64_t Default = 0) {
4684 auto i = OptionalIdx.find(ImmT);
4685 if (i != OptionalIdx.end()) {
4686 unsigned Idx = i->second;
4687 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
4688 } else {
4689 Inst.addOperand(MCOperand::createImm(Default));
4690 }
4691 }
4692
4693 OperandMatchResultTy
parseStringWithPrefix(StringRef Prefix,StringRef & Value)4694 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
4695 if (getLexer().isNot(AsmToken::Identifier)) {
4696 return MatchOperand_NoMatch;
4697 }
4698 StringRef Tok = Parser.getTok().getString();
4699 if (Tok != Prefix) {
4700 return MatchOperand_NoMatch;
4701 }
4702
4703 Parser.Lex();
4704 if (getLexer().isNot(AsmToken::Colon)) {
4705 return MatchOperand_ParseFail;
4706 }
4707
4708 Parser.Lex();
4709 if (getLexer().isNot(AsmToken::Identifier)) {
4710 return MatchOperand_ParseFail;
4711 }
4712
4713 Value = Parser.getTok().getString();
4714 return MatchOperand_Success;
4715 }
4716
4717 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
4718 // values to live in a joint format operand in the MCInst encoding.
4719 OperandMatchResultTy
parseDfmtNfmt(OperandVector & Operands)4720 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
4721 SMLoc S = Parser.getTok().getLoc();
4722 int64_t Dfmt = 0, Nfmt = 0;
4723 // dfmt and nfmt can appear in either order, and each is optional.
4724 bool GotDfmt = false, GotNfmt = false;
4725 while (!GotDfmt || !GotNfmt) {
4726 if (!GotDfmt) {
4727 auto Res = parseIntWithPrefix("dfmt", Dfmt);
4728 if (Res != MatchOperand_NoMatch) {
4729 if (Res != MatchOperand_Success)
4730 return Res;
4731 if (Dfmt >= 16) {
4732 Error(Parser.getTok().getLoc(), "out of range dfmt");
4733 return MatchOperand_ParseFail;
4734 }
4735 GotDfmt = true;
4736 Parser.Lex();
4737 continue;
4738 }
4739 }
4740 if (!GotNfmt) {
4741 auto Res = parseIntWithPrefix("nfmt", Nfmt);
4742 if (Res != MatchOperand_NoMatch) {
4743 if (Res != MatchOperand_Success)
4744 return Res;
4745 if (Nfmt >= 8) {
4746 Error(Parser.getTok().getLoc(), "out of range nfmt");
4747 return MatchOperand_ParseFail;
4748 }
4749 GotNfmt = true;
4750 Parser.Lex();
4751 continue;
4752 }
4753 }
4754 break;
4755 }
4756 if (!GotDfmt && !GotNfmt)
4757 return MatchOperand_NoMatch;
4758 auto Format = Dfmt | Nfmt << 4;
4759 Operands.push_back(
4760 AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
4761 return MatchOperand_Success;
4762 }
4763
4764 //===----------------------------------------------------------------------===//
4765 // ds
4766 //===----------------------------------------------------------------------===//
4767
cvtDSOffset01(MCInst & Inst,const OperandVector & Operands)4768 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
4769 const OperandVector &Operands) {
4770 OptionalImmIndexMap OptionalIdx;
4771
4772 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4773 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4774
4775 // Add the register arguments
4776 if (Op.isReg()) {
4777 Op.addRegOperands(Inst, 1);
4778 continue;
4779 }
4780
4781 // Handle optional arguments
4782 OptionalIdx[Op.getImmTy()] = i;
4783 }
4784
4785 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
4786 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
4787 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4788
4789 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4790 }
4791
cvtDSImpl(MCInst & Inst,const OperandVector & Operands,bool IsGdsHardcoded)4792 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
4793 bool IsGdsHardcoded) {
4794 OptionalImmIndexMap OptionalIdx;
4795
4796 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4797 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4798
4799 // Add the register arguments
4800 if (Op.isReg()) {
4801 Op.addRegOperands(Inst, 1);
4802 continue;
4803 }
4804
4805 if (Op.isToken() && Op.getToken() == "gds") {
4806 IsGdsHardcoded = true;
4807 continue;
4808 }
4809
4810 // Handle optional arguments
4811 OptionalIdx[Op.getImmTy()] = i;
4812 }
4813
4814 AMDGPUOperand::ImmTy OffsetType =
4815 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
4816 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
4817 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
4818 AMDGPUOperand::ImmTyOffset;
4819
4820 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
4821
4822 if (!IsGdsHardcoded) {
4823 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4824 }
4825 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4826 }
4827
cvtExp(MCInst & Inst,const OperandVector & Operands)4828 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
4829 OptionalImmIndexMap OptionalIdx;
4830
4831 unsigned OperandIdx[4];
4832 unsigned EnMask = 0;
4833 int SrcIdx = 0;
4834
4835 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4836 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4837
4838 // Add the register arguments
4839 if (Op.isReg()) {
4840 assert(SrcIdx < 4);
4841 OperandIdx[SrcIdx] = Inst.size();
4842 Op.addRegOperands(Inst, 1);
4843 ++SrcIdx;
4844 continue;
4845 }
4846
4847 if (Op.isOff()) {
4848 assert(SrcIdx < 4);
4849 OperandIdx[SrcIdx] = Inst.size();
4850 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
4851 ++SrcIdx;
4852 continue;
4853 }
4854
4855 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
4856 Op.addImmOperands(Inst, 1);
4857 continue;
4858 }
4859
4860 if (Op.isToken() && Op.getToken() == "done")
4861 continue;
4862
4863 // Handle optional arguments
4864 OptionalIdx[Op.getImmTy()] = i;
4865 }
4866
4867 assert(SrcIdx == 4);
4868
4869 bool Compr = false;
4870 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
4871 Compr = true;
4872 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
4873 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
4874 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
4875 }
4876
4877 for (auto i = 0; i < SrcIdx; ++i) {
4878 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
4879 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
4880 }
4881 }
4882
4883 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
4884 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
4885
4886 Inst.addOperand(MCOperand::createImm(EnMask));
4887 }
4888
4889 //===----------------------------------------------------------------------===//
4890 // s_waitcnt
4891 //===----------------------------------------------------------------------===//
4892
4893 static bool
encodeCnt(const AMDGPU::IsaVersion ISA,int64_t & IntVal,int64_t CntVal,bool Saturate,unsigned (* encode)(const IsaVersion & Version,unsigned,unsigned),unsigned (* decode)(const IsaVersion & Version,unsigned))4894 encodeCnt(
4895 const AMDGPU::IsaVersion ISA,
4896 int64_t &IntVal,
4897 int64_t CntVal,
4898 bool Saturate,
4899 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
4900 unsigned (*decode)(const IsaVersion &Version, unsigned))
4901 {
4902 bool Failed = false;
4903
4904 IntVal = encode(ISA, IntVal, CntVal);
4905 if (CntVal != decode(ISA, IntVal)) {
4906 if (Saturate) {
4907 IntVal = encode(ISA, IntVal, -1);
4908 } else {
4909 Failed = true;
4910 }
4911 }
4912 return Failed;
4913 }
4914
parseCnt(int64_t & IntVal)4915 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
4916
4917 SMLoc CntLoc = getLoc();
4918 StringRef CntName = getTokenStr();
4919
4920 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
4921 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
4922 return false;
4923
4924 int64_t CntVal;
4925 SMLoc ValLoc = getLoc();
4926 if (!parseExpr(CntVal))
4927 return false;
4928
4929 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4930
4931 bool Failed = true;
4932 bool Sat = CntName.endswith("_sat");
4933
4934 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
4935 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
4936 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
4937 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
4938 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
4939 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
4940 } else {
4941 Error(CntLoc, "invalid counter name " + CntName);
4942 return false;
4943 }
4944
4945 if (Failed) {
4946 Error(ValLoc, "too large value for " + CntName);
4947 return false;
4948 }
4949
4950 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
4951 return false;
4952
4953 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
4954 if (isToken(AsmToken::EndOfStatement)) {
4955 Error(getLoc(), "expected a counter name");
4956 return false;
4957 }
4958 }
4959
4960 return true;
4961 }
4962
4963 OperandMatchResultTy
parseSWaitCntOps(OperandVector & Operands)4964 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
4965 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4966 int64_t Waitcnt = getWaitcntBitMask(ISA);
4967 SMLoc S = getLoc();
4968
4969 // If parse failed, do not return error code
4970 // to avoid excessive error messages.
4971 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
4972 while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement));
4973 } else {
4974 parseExpr(Waitcnt);
4975 }
4976
4977 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
4978 return MatchOperand_Success;
4979 }
4980
4981 bool
isSWaitCnt() const4982 AMDGPUOperand::isSWaitCnt() const {
4983 return isImm();
4984 }
4985
4986 //===----------------------------------------------------------------------===//
4987 // hwreg
4988 //===----------------------------------------------------------------------===//
4989
4990 bool
parseHwregBody(OperandInfoTy & HwReg,int64_t & Offset,int64_t & Width)4991 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
4992 int64_t &Offset,
4993 int64_t &Width) {
4994 using namespace llvm::AMDGPU::Hwreg;
4995
4996 // The register may be specified by name or using a numeric code
4997 if (isToken(AsmToken::Identifier) &&
4998 (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
4999 HwReg.IsSymbolic = true;
5000 lex(); // skip message name
5001 } else if (!parseExpr(HwReg.Id)) {
5002 return false;
5003 }
5004
5005 if (trySkipToken(AsmToken::RParen))
5006 return true;
5007
5008 // parse optional params
5009 return
5010 skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
5011 parseExpr(Offset) &&
5012 skipToken(AsmToken::Comma, "expected a comma") &&
5013 parseExpr(Width) &&
5014 skipToken(AsmToken::RParen, "expected a closing parenthesis");
5015 }
5016
5017 bool
validateHwreg(const OperandInfoTy & HwReg,const int64_t Offset,const int64_t Width,const SMLoc Loc)5018 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
5019 const int64_t Offset,
5020 const int64_t Width,
5021 const SMLoc Loc) {
5022
5023 using namespace llvm::AMDGPU::Hwreg;
5024
5025 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
5026 Error(Loc, "specified hardware register is not supported on this GPU");
5027 return false;
5028 } else if (!isValidHwreg(HwReg.Id)) {
5029 Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
5030 return false;
5031 } else if (!isValidHwregOffset(Offset)) {
5032 Error(Loc, "invalid bit offset: only 5-bit values are legal");
5033 return false;
5034 } else if (!isValidHwregWidth(Width)) {
5035 Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
5036 return false;
5037 }
5038 return true;
5039 }
5040
5041 OperandMatchResultTy
parseHwreg(OperandVector & Operands)5042 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
5043 using namespace llvm::AMDGPU::Hwreg;
5044
5045 int64_t ImmVal = 0;
5046 SMLoc Loc = getLoc();
5047
5048 // If parse failed, do not return error code
5049 // to avoid excessive error messages.
5050 if (trySkipId("hwreg", AsmToken::LParen)) {
5051 OperandInfoTy HwReg(ID_UNKNOWN_);
5052 int64_t Offset = OFFSET_DEFAULT_;
5053 int64_t Width = WIDTH_DEFAULT_;
5054 if (parseHwregBody(HwReg, Offset, Width) &&
5055 validateHwreg(HwReg, Offset, Width, Loc)) {
5056 ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
5057 }
5058 } else if (parseExpr(ImmVal)) {
5059 if (ImmVal < 0 || !isUInt<16>(ImmVal))
5060 Error(Loc, "invalid immediate: only 16-bit values are legal");
5061 }
5062
5063 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
5064 return MatchOperand_Success;
5065 }
5066
isHwreg() const5067 bool AMDGPUOperand::isHwreg() const {
5068 return isImmTy(ImmTyHwreg);
5069 }
5070
5071 //===----------------------------------------------------------------------===//
5072 // sendmsg
5073 //===----------------------------------------------------------------------===//
5074
5075 bool
parseSendMsgBody(OperandInfoTy & Msg,OperandInfoTy & Op,OperandInfoTy & Stream)5076 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
5077 OperandInfoTy &Op,
5078 OperandInfoTy &Stream) {
5079 using namespace llvm::AMDGPU::SendMsg;
5080
5081 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
5082 Msg.IsSymbolic = true;
5083 lex(); // skip message name
5084 } else if (!parseExpr(Msg.Id)) {
5085 return false;
5086 }
5087
5088 if (trySkipToken(AsmToken::Comma)) {
5089 Op.IsDefined = true;
5090 if (isToken(AsmToken::Identifier) &&
5091 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
5092 lex(); // skip operation name
5093 } else if (!parseExpr(Op.Id)) {
5094 return false;
5095 }
5096
5097 if (trySkipToken(AsmToken::Comma)) {
5098 Stream.IsDefined = true;
5099 if (!parseExpr(Stream.Id))
5100 return false;
5101 }
5102 }
5103
5104 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
5105 }
5106
5107 bool
validateSendMsg(const OperandInfoTy & Msg,const OperandInfoTy & Op,const OperandInfoTy & Stream,const SMLoc S)5108 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
5109 const OperandInfoTy &Op,
5110 const OperandInfoTy &Stream,
5111 const SMLoc S) {
5112 using namespace llvm::AMDGPU::SendMsg;
5113
5114 // Validation strictness depends on whether message is specified
5115 // in a symbolc or in a numeric form. In the latter case
5116 // only encoding possibility is checked.
5117 bool Strict = Msg.IsSymbolic;
5118
5119 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
5120 Error(S, "invalid message id");
5121 return false;
5122 } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
5123 Error(S, Op.IsDefined ?
5124 "message does not support operations" :
5125 "missing message operation");
5126 return false;
5127 } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
5128 Error(S, "invalid operation id");
5129 return false;
5130 } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
5131 Error(S, "message operation does not support streams");
5132 return false;
5133 } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
5134 Error(S, "invalid message stream id");
5135 return false;
5136 }
5137 return true;
5138 }
5139
5140 OperandMatchResultTy
parseSendMsgOp(OperandVector & Operands)5141 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
5142 using namespace llvm::AMDGPU::SendMsg;
5143
5144 int64_t ImmVal = 0;
5145 SMLoc Loc = getLoc();
5146
5147 // If parse failed, do not return error code
5148 // to avoid excessive error messages.
5149 if (trySkipId("sendmsg", AsmToken::LParen)) {
5150 OperandInfoTy Msg(ID_UNKNOWN_);
5151 OperandInfoTy Op(OP_NONE_);
5152 OperandInfoTy Stream(STREAM_ID_NONE_);
5153 if (parseSendMsgBody(Msg, Op, Stream) &&
5154 validateSendMsg(Msg, Op, Stream, Loc)) {
5155 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
5156 }
5157 } else if (parseExpr(ImmVal)) {
5158 if (ImmVal < 0 || !isUInt<16>(ImmVal))
5159 Error(Loc, "invalid immediate: only 16-bit values are legal");
5160 }
5161
5162 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
5163 return MatchOperand_Success;
5164 }
5165
isSendMsg() const5166 bool AMDGPUOperand::isSendMsg() const {
5167 return isImmTy(ImmTySendMsg);
5168 }
5169
5170 //===----------------------------------------------------------------------===//
5171 // v_interp
5172 //===----------------------------------------------------------------------===//
5173
parseInterpSlot(OperandVector & Operands)5174 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
5175 if (getLexer().getKind() != AsmToken::Identifier)
5176 return MatchOperand_NoMatch;
5177
5178 StringRef Str = Parser.getTok().getString();
5179 int Slot = StringSwitch<int>(Str)
5180 .Case("p10", 0)
5181 .Case("p20", 1)
5182 .Case("p0", 2)
5183 .Default(-1);
5184
5185 SMLoc S = Parser.getTok().getLoc();
5186 if (Slot == -1)
5187 return MatchOperand_ParseFail;
5188
5189 Parser.Lex();
5190 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
5191 AMDGPUOperand::ImmTyInterpSlot));
5192 return MatchOperand_Success;
5193 }
5194
parseInterpAttr(OperandVector & Operands)5195 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
5196 if (getLexer().getKind() != AsmToken::Identifier)
5197 return MatchOperand_NoMatch;
5198
5199 StringRef Str = Parser.getTok().getString();
5200 if (!Str.startswith("attr"))
5201 return MatchOperand_NoMatch;
5202
5203 StringRef Chan = Str.take_back(2);
5204 int AttrChan = StringSwitch<int>(Chan)
5205 .Case(".x", 0)
5206 .Case(".y", 1)
5207 .Case(".z", 2)
5208 .Case(".w", 3)
5209 .Default(-1);
5210 if (AttrChan == -1)
5211 return MatchOperand_ParseFail;
5212
5213 Str = Str.drop_back(2).drop_front(4);
5214
5215 uint8_t Attr;
5216 if (Str.getAsInteger(10, Attr))
5217 return MatchOperand_ParseFail;
5218
5219 SMLoc S = Parser.getTok().getLoc();
5220 Parser.Lex();
5221 if (Attr > 63) {
5222 Error(S, "out of bounds attr");
5223 return MatchOperand_Success;
5224 }
5225
5226 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
5227
5228 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
5229 AMDGPUOperand::ImmTyInterpAttr));
5230 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
5231 AMDGPUOperand::ImmTyAttrChan));
5232 return MatchOperand_Success;
5233 }
5234
5235 //===----------------------------------------------------------------------===//
5236 // exp
5237 //===----------------------------------------------------------------------===//
5238
errorExpTgt()5239 void AMDGPUAsmParser::errorExpTgt() {
5240 Error(Parser.getTok().getLoc(), "invalid exp target");
5241 }
5242
parseExpTgtImpl(StringRef Str,uint8_t & Val)5243 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
5244 uint8_t &Val) {
5245 if (Str == "null") {
5246 Val = 9;
5247 return MatchOperand_Success;
5248 }
5249
5250 if (Str.startswith("mrt")) {
5251 Str = Str.drop_front(3);
5252 if (Str == "z") { // == mrtz
5253 Val = 8;
5254 return MatchOperand_Success;
5255 }
5256
5257 if (Str.getAsInteger(10, Val))
5258 return MatchOperand_ParseFail;
5259
5260 if (Val > 7)
5261 errorExpTgt();
5262
5263 return MatchOperand_Success;
5264 }
5265
5266 if (Str.startswith("pos")) {
5267 Str = Str.drop_front(3);
5268 if (Str.getAsInteger(10, Val))
5269 return MatchOperand_ParseFail;
5270
5271 if (Val > 4 || (Val == 4 && !isGFX10()))
5272 errorExpTgt();
5273
5274 Val += 12;
5275 return MatchOperand_Success;
5276 }
5277
5278 if (isGFX10() && Str == "prim") {
5279 Val = 20;
5280 return MatchOperand_Success;
5281 }
5282
5283 if (Str.startswith("param")) {
5284 Str = Str.drop_front(5);
5285 if (Str.getAsInteger(10, Val))
5286 return MatchOperand_ParseFail;
5287
5288 if (Val >= 32)
5289 errorExpTgt();
5290
5291 Val += 32;
5292 return MatchOperand_Success;
5293 }
5294
5295 if (Str.startswith("invalid_target_")) {
5296 Str = Str.drop_front(15);
5297 if (Str.getAsInteger(10, Val))
5298 return MatchOperand_ParseFail;
5299
5300 errorExpTgt();
5301 return MatchOperand_Success;
5302 }
5303
5304 return MatchOperand_NoMatch;
5305 }
5306
parseExpTgt(OperandVector & Operands)5307 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
5308 uint8_t Val;
5309 StringRef Str = Parser.getTok().getString();
5310
5311 auto Res = parseExpTgtImpl(Str, Val);
5312 if (Res != MatchOperand_Success)
5313 return Res;
5314
5315 SMLoc S = Parser.getTok().getLoc();
5316 Parser.Lex();
5317
5318 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
5319 AMDGPUOperand::ImmTyExpTgt));
5320 return MatchOperand_Success;
5321 }
5322
5323 //===----------------------------------------------------------------------===//
5324 // parser helpers
5325 //===----------------------------------------------------------------------===//
5326
5327 bool
isId(const AsmToken & Token,const StringRef Id) const5328 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
5329 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
5330 }
5331
5332 bool
isId(const StringRef Id) const5333 AMDGPUAsmParser::isId(const StringRef Id) const {
5334 return isId(getToken(), Id);
5335 }
5336
5337 bool
isToken(const AsmToken::TokenKind Kind) const5338 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
5339 return getTokenKind() == Kind;
5340 }
5341
5342 bool
trySkipId(const StringRef Id)5343 AMDGPUAsmParser::trySkipId(const StringRef Id) {
5344 if (isId(Id)) {
5345 lex();
5346 return true;
5347 }
5348 return false;
5349 }
5350
5351 bool
trySkipId(const StringRef Id,const AsmToken::TokenKind Kind)5352 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
5353 if (isId(Id) && peekToken().is(Kind)) {
5354 lex();
5355 lex();
5356 return true;
5357 }
5358 return false;
5359 }
5360
5361 bool
trySkipToken(const AsmToken::TokenKind Kind)5362 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
5363 if (isToken(Kind)) {
5364 lex();
5365 return true;
5366 }
5367 return false;
5368 }
5369
5370 bool
skipToken(const AsmToken::TokenKind Kind,const StringRef ErrMsg)5371 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
5372 const StringRef ErrMsg) {
5373 if (!trySkipToken(Kind)) {
5374 Error(getLoc(), ErrMsg);
5375 return false;
5376 }
5377 return true;
5378 }
5379
5380 bool
parseExpr(int64_t & Imm)5381 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
5382 return !getParser().parseAbsoluteExpression(Imm);
5383 }
5384
5385 bool
parseExpr(OperandVector & Operands)5386 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
5387 SMLoc S = getLoc();
5388
5389 const MCExpr *Expr;
5390 if (Parser.parseExpression(Expr))
5391 return false;
5392
5393 int64_t IntVal;
5394 if (Expr->evaluateAsAbsolute(IntVal)) {
5395 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
5396 } else {
5397 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
5398 }
5399 return true;
5400 }
5401
5402 bool
parseString(StringRef & Val,const StringRef ErrMsg)5403 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
5404 if (isToken(AsmToken::String)) {
5405 Val = getToken().getStringContents();
5406 lex();
5407 return true;
5408 } else {
5409 Error(getLoc(), ErrMsg);
5410 return false;
5411 }
5412 }
5413
5414 AsmToken
getToken() const5415 AMDGPUAsmParser::getToken() const {
5416 return Parser.getTok();
5417 }
5418
5419 AsmToken
peekToken()5420 AMDGPUAsmParser::peekToken() {
5421 return getLexer().peekTok();
5422 }
5423
5424 void
peekTokens(MutableArrayRef<AsmToken> Tokens)5425 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
5426 auto TokCount = getLexer().peekTokens(Tokens);
5427
5428 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
5429 Tokens[Idx] = AsmToken(AsmToken::Error, "");
5430 }
5431
5432 AsmToken::TokenKind
getTokenKind() const5433 AMDGPUAsmParser::getTokenKind() const {
5434 return getLexer().getKind();
5435 }
5436
5437 SMLoc
getLoc() const5438 AMDGPUAsmParser::getLoc() const {
5439 return getToken().getLoc();
5440 }
5441
5442 StringRef
getTokenStr() const5443 AMDGPUAsmParser::getTokenStr() const {
5444 return getToken().getString();
5445 }
5446
5447 void
lex()5448 AMDGPUAsmParser::lex() {
5449 Parser.Lex();
5450 }
5451
5452 //===----------------------------------------------------------------------===//
5453 // swizzle
5454 //===----------------------------------------------------------------------===//
5455
5456 LLVM_READNONE
5457 static unsigned
encodeBitmaskPerm(const unsigned AndMask,const unsigned OrMask,const unsigned XorMask)5458 encodeBitmaskPerm(const unsigned AndMask,
5459 const unsigned OrMask,
5460 const unsigned XorMask) {
5461 using namespace llvm::AMDGPU::Swizzle;
5462
5463 return BITMASK_PERM_ENC |
5464 (AndMask << BITMASK_AND_SHIFT) |
5465 (OrMask << BITMASK_OR_SHIFT) |
5466 (XorMask << BITMASK_XOR_SHIFT);
5467 }
5468
5469 bool
parseSwizzleOperands(const unsigned OpNum,int64_t * Op,const unsigned MinVal,const unsigned MaxVal,const StringRef ErrMsg)5470 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
5471 const unsigned MinVal,
5472 const unsigned MaxVal,
5473 const StringRef ErrMsg) {
5474 for (unsigned i = 0; i < OpNum; ++i) {
5475 if (!skipToken(AsmToken::Comma, "expected a comma")){
5476 return false;
5477 }
5478 SMLoc ExprLoc = Parser.getTok().getLoc();
5479 if (!parseExpr(Op[i])) {
5480 return false;
5481 }
5482 if (Op[i] < MinVal || Op[i] > MaxVal) {
5483 Error(ExprLoc, ErrMsg);
5484 return false;
5485 }
5486 }
5487
5488 return true;
5489 }
5490
5491 bool
parseSwizzleQuadPerm(int64_t & Imm)5492 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
5493 using namespace llvm::AMDGPU::Swizzle;
5494
5495 int64_t Lane[LANE_NUM];
5496 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
5497 "expected a 2-bit lane id")) {
5498 Imm = QUAD_PERM_ENC;
5499 for (unsigned I = 0; I < LANE_NUM; ++I) {
5500 Imm |= Lane[I] << (LANE_SHIFT * I);
5501 }
5502 return true;
5503 }
5504 return false;
5505 }
5506
5507 bool
parseSwizzleBroadcast(int64_t & Imm)5508 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
5509 using namespace llvm::AMDGPU::Swizzle;
5510
5511 SMLoc S = Parser.getTok().getLoc();
5512 int64_t GroupSize;
5513 int64_t LaneIdx;
5514
5515 if (!parseSwizzleOperands(1, &GroupSize,
5516 2, 32,
5517 "group size must be in the interval [2,32]")) {
5518 return false;
5519 }
5520 if (!isPowerOf2_64(GroupSize)) {
5521 Error(S, "group size must be a power of two");
5522 return false;
5523 }
5524 if (parseSwizzleOperands(1, &LaneIdx,
5525 0, GroupSize - 1,
5526 "lane id must be in the interval [0,group size - 1]")) {
5527 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
5528 return true;
5529 }
5530 return false;
5531 }
5532
5533 bool
parseSwizzleReverse(int64_t & Imm)5534 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
5535 using namespace llvm::AMDGPU::Swizzle;
5536
5537 SMLoc S = Parser.getTok().getLoc();
5538 int64_t GroupSize;
5539
5540 if (!parseSwizzleOperands(1, &GroupSize,
5541 2, 32, "group size must be in the interval [2,32]")) {
5542 return false;
5543 }
5544 if (!isPowerOf2_64(GroupSize)) {
5545 Error(S, "group size must be a power of two");
5546 return false;
5547 }
5548
5549 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
5550 return true;
5551 }
5552
5553 bool
parseSwizzleSwap(int64_t & Imm)5554 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
5555 using namespace llvm::AMDGPU::Swizzle;
5556
5557 SMLoc S = Parser.getTok().getLoc();
5558 int64_t GroupSize;
5559
5560 if (!parseSwizzleOperands(1, &GroupSize,
5561 1, 16, "group size must be in the interval [1,16]")) {
5562 return false;
5563 }
5564 if (!isPowerOf2_64(GroupSize)) {
5565 Error(S, "group size must be a power of two");
5566 return false;
5567 }
5568
5569 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
5570 return true;
5571 }
5572
5573 bool
parseSwizzleBitmaskPerm(int64_t & Imm)5574 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
5575 using namespace llvm::AMDGPU::Swizzle;
5576
5577 if (!skipToken(AsmToken::Comma, "expected a comma")) {
5578 return false;
5579 }
5580
5581 StringRef Ctl;
5582 SMLoc StrLoc = Parser.getTok().getLoc();
5583 if (!parseString(Ctl)) {
5584 return false;
5585 }
5586 if (Ctl.size() != BITMASK_WIDTH) {
5587 Error(StrLoc, "expected a 5-character mask");
5588 return false;
5589 }
5590
5591 unsigned AndMask = 0;
5592 unsigned OrMask = 0;
5593 unsigned XorMask = 0;
5594
5595 for (size_t i = 0; i < Ctl.size(); ++i) {
5596 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
5597 switch(Ctl[i]) {
5598 default:
5599 Error(StrLoc, "invalid mask");
5600 return false;
5601 case '0':
5602 break;
5603 case '1':
5604 OrMask |= Mask;
5605 break;
5606 case 'p':
5607 AndMask |= Mask;
5608 break;
5609 case 'i':
5610 AndMask |= Mask;
5611 XorMask |= Mask;
5612 break;
5613 }
5614 }
5615
5616 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
5617 return true;
5618 }
5619
5620 bool
parseSwizzleOffset(int64_t & Imm)5621 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
5622
5623 SMLoc OffsetLoc = Parser.getTok().getLoc();
5624
5625 if (!parseExpr(Imm)) {
5626 return false;
5627 }
5628 if (!isUInt<16>(Imm)) {
5629 Error(OffsetLoc, "expected a 16-bit offset");
5630 return false;
5631 }
5632 return true;
5633 }
5634
5635 bool
parseSwizzleMacro(int64_t & Imm)5636 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
5637 using namespace llvm::AMDGPU::Swizzle;
5638
5639 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
5640
5641 SMLoc ModeLoc = Parser.getTok().getLoc();
5642 bool Ok = false;
5643
5644 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
5645 Ok = parseSwizzleQuadPerm(Imm);
5646 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
5647 Ok = parseSwizzleBitmaskPerm(Imm);
5648 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
5649 Ok = parseSwizzleBroadcast(Imm);
5650 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
5651 Ok = parseSwizzleSwap(Imm);
5652 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
5653 Ok = parseSwizzleReverse(Imm);
5654 } else {
5655 Error(ModeLoc, "expected a swizzle mode");
5656 }
5657
5658 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
5659 }
5660
5661 return false;
5662 }
5663
5664 OperandMatchResultTy
parseSwizzleOp(OperandVector & Operands)5665 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
5666 SMLoc S = Parser.getTok().getLoc();
5667 int64_t Imm = 0;
5668
5669 if (trySkipId("offset")) {
5670
5671 bool Ok = false;
5672 if (skipToken(AsmToken::Colon, "expected a colon")) {
5673 if (trySkipId("swizzle")) {
5674 Ok = parseSwizzleMacro(Imm);
5675 } else {
5676 Ok = parseSwizzleOffset(Imm);
5677 }
5678 }
5679
5680 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
5681
5682 return Ok? MatchOperand_Success : MatchOperand_ParseFail;
5683 } else {
5684 // Swizzle "offset" operand is optional.
5685 // If it is omitted, try parsing other optional operands.
5686 return parseOptionalOpr(Operands);
5687 }
5688 }
5689
5690 bool
isSwizzle() const5691 AMDGPUOperand::isSwizzle() const {
5692 return isImmTy(ImmTySwizzle);
5693 }
5694
5695 //===----------------------------------------------------------------------===//
5696 // VGPR Index Mode
5697 //===----------------------------------------------------------------------===//
5698
parseGPRIdxMacro()5699 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
5700
5701 using namespace llvm::AMDGPU::VGPRIndexMode;
5702
5703 if (trySkipToken(AsmToken::RParen)) {
5704 return OFF;
5705 }
5706
5707 int64_t Imm = 0;
5708
5709 while (true) {
5710 unsigned Mode = 0;
5711 SMLoc S = Parser.getTok().getLoc();
5712
5713 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
5714 if (trySkipId(IdSymbolic[ModeId])) {
5715 Mode = 1 << ModeId;
5716 break;
5717 }
5718 }
5719
5720 if (Mode == 0) {
5721 Error(S, (Imm == 0)?
5722 "expected a VGPR index mode or a closing parenthesis" :
5723 "expected a VGPR index mode");
5724 break;
5725 }
5726
5727 if (Imm & Mode) {
5728 Error(S, "duplicate VGPR index mode");
5729 break;
5730 }
5731 Imm |= Mode;
5732
5733 if (trySkipToken(AsmToken::RParen))
5734 break;
5735 if (!skipToken(AsmToken::Comma,
5736 "expected a comma or a closing parenthesis"))
5737 break;
5738 }
5739
5740 return Imm;
5741 }
5742
5743 OperandMatchResultTy
parseGPRIdxMode(OperandVector & Operands)5744 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
5745
5746 int64_t Imm = 0;
5747 SMLoc S = Parser.getTok().getLoc();
5748
5749 if (getLexer().getKind() == AsmToken::Identifier &&
5750 Parser.getTok().getString() == "gpr_idx" &&
5751 getLexer().peekTok().is(AsmToken::LParen)) {
5752
5753 Parser.Lex();
5754 Parser.Lex();
5755
5756 // If parse failed, trigger an error but do not return error code
5757 // to avoid excessive error messages.
5758 Imm = parseGPRIdxMacro();
5759
5760 } else {
5761 if (getParser().parseAbsoluteExpression(Imm))
5762 return MatchOperand_NoMatch;
5763 if (Imm < 0 || !isUInt<4>(Imm)) {
5764 Error(S, "invalid immediate: only 4-bit values are legal");
5765 }
5766 }
5767
5768 Operands.push_back(
5769 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
5770 return MatchOperand_Success;
5771 }
5772
isGPRIdxMode() const5773 bool AMDGPUOperand::isGPRIdxMode() const {
5774 return isImmTy(ImmTyGprIdxMode);
5775 }
5776
5777 //===----------------------------------------------------------------------===//
5778 // sopp branch targets
5779 //===----------------------------------------------------------------------===//
5780
5781 OperandMatchResultTy
parseSOppBrTarget(OperandVector & Operands)5782 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
5783
5784 // Make sure we are not parsing something
5785 // that looks like a label or an expression but is not.
5786 // This will improve error messages.
5787 if (isRegister() || isModifier())
5788 return MatchOperand_NoMatch;
5789
5790 if (parseExpr(Operands)) {
5791
5792 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
5793 assert(Opr.isImm() || Opr.isExpr());
5794 SMLoc Loc = Opr.getStartLoc();
5795
5796 // Currently we do not support arbitrary expressions as branch targets.
5797 // Only labels and absolute expressions are accepted.
5798 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
5799 Error(Loc, "expected an absolute expression or a label");
5800 } else if (Opr.isImm() && !Opr.isS16Imm()) {
5801 Error(Loc, "expected a 16-bit signed jump offset");
5802 }
5803 }
5804
5805 return MatchOperand_Success; // avoid excessive error messages
5806 }
5807
5808 //===----------------------------------------------------------------------===//
5809 // Boolean holding registers
5810 //===----------------------------------------------------------------------===//
5811
5812 OperandMatchResultTy
parseBoolReg(OperandVector & Operands)5813 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
5814 return parseReg(Operands);
5815 }
5816
5817 //===----------------------------------------------------------------------===//
5818 // mubuf
5819 //===----------------------------------------------------------------------===//
5820
defaultDLC() const5821 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
5822 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
5823 }
5824
defaultGLC() const5825 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
5826 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
5827 }
5828
defaultSLC() const5829 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
5830 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
5831 }
5832
cvtMubufImpl(MCInst & Inst,const OperandVector & Operands,bool IsAtomic,bool IsAtomicReturn,bool IsLds)5833 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
5834 const OperandVector &Operands,
5835 bool IsAtomic,
5836 bool IsAtomicReturn,
5837 bool IsLds) {
5838 bool IsLdsOpcode = IsLds;
5839 bool HasLdsModifier = false;
5840 OptionalImmIndexMap OptionalIdx;
5841 assert(IsAtomicReturn ? IsAtomic : true);
5842 unsigned FirstOperandIdx = 1;
5843
5844 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
5845 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5846
5847 // Add the register arguments
5848 if (Op.isReg()) {
5849 Op.addRegOperands(Inst, 1);
5850 // Insert a tied src for atomic return dst.
5851 // This cannot be postponed as subsequent calls to
5852 // addImmOperands rely on correct number of MC operands.
5853 if (IsAtomicReturn && i == FirstOperandIdx)
5854 Op.addRegOperands(Inst, 1);
5855 continue;
5856 }
5857
5858 // Handle the case where soffset is an immediate
5859 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5860 Op.addImmOperands(Inst, 1);
5861 continue;
5862 }
5863
5864 HasLdsModifier |= Op.isLDS();
5865
5866 // Handle tokens like 'offen' which are sometimes hard-coded into the
5867 // asm string. There are no MCInst operands for these.
5868 if (Op.isToken()) {
5869 continue;
5870 }
5871 assert(Op.isImm());
5872
5873 // Handle optional arguments
5874 OptionalIdx[Op.getImmTy()] = i;
5875 }
5876
5877 // This is a workaround for an llvm quirk which may result in an
5878 // incorrect instruction selection. Lds and non-lds versions of
5879 // MUBUF instructions are identical except that lds versions
5880 // have mandatory 'lds' modifier. However this modifier follows
5881 // optional modifiers and llvm asm matcher regards this 'lds'
5882 // modifier as an optional one. As a result, an lds version
5883 // of opcode may be selected even if it has no 'lds' modifier.
5884 if (IsLdsOpcode && !HasLdsModifier) {
5885 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
5886 if (NoLdsOpcode != -1) { // Got lds version - correct it.
5887 Inst.setOpcode(NoLdsOpcode);
5888 IsLdsOpcode = false;
5889 }
5890 }
5891
5892 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
5893 if (!IsAtomic) { // glc is hard-coded.
5894 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5895 }
5896 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5897
5898 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
5899 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5900 }
5901
5902 if (isGFX10())
5903 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5904 }
5905
cvtMtbuf(MCInst & Inst,const OperandVector & Operands)5906 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
5907 OptionalImmIndexMap OptionalIdx;
5908
5909 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5910 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5911
5912 // Add the register arguments
5913 if (Op.isReg()) {
5914 Op.addRegOperands(Inst, 1);
5915 continue;
5916 }
5917
5918 // Handle the case where soffset is an immediate
5919 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5920 Op.addImmOperands(Inst, 1);
5921 continue;
5922 }
5923
5924 // Handle tokens like 'offen' which are sometimes hard-coded into the
5925 // asm string. There are no MCInst operands for these.
5926 if (Op.isToken()) {
5927 continue;
5928 }
5929 assert(Op.isImm());
5930
5931 // Handle optional arguments
5932 OptionalIdx[Op.getImmTy()] = i;
5933 }
5934
5935 addOptionalImmOperand(Inst, Operands, OptionalIdx,
5936 AMDGPUOperand::ImmTyOffset);
5937 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
5938 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5939 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5940 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5941
5942 if (isGFX10())
5943 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5944 }
5945
5946 //===----------------------------------------------------------------------===//
5947 // mimg
5948 //===----------------------------------------------------------------------===//
5949
cvtMIMG(MCInst & Inst,const OperandVector & Operands,bool IsAtomic)5950 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
5951 bool IsAtomic) {
5952 unsigned I = 1;
5953 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5954 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5955 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5956 }
5957
5958 if (IsAtomic) {
5959 // Add src, same as dst
5960 assert(Desc.getNumDefs() == 1);
5961 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
5962 }
5963
5964 OptionalImmIndexMap OptionalIdx;
5965
5966 for (unsigned E = Operands.size(); I != E; ++I) {
5967 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5968
5969 // Add the register arguments
5970 if (Op.isReg()) {
5971 Op.addRegOperands(Inst, 1);
5972 } else if (Op.isImmModifier()) {
5973 OptionalIdx[Op.getImmTy()] = I;
5974 } else if (!Op.isToken()) {
5975 llvm_unreachable("unexpected operand type");
5976 }
5977 }
5978
5979 bool IsGFX10 = isGFX10();
5980
5981 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
5982 if (IsGFX10)
5983 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
5984 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
5985 if (IsGFX10)
5986 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5987 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5988 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5989 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
5990 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5991 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
5992 if (!IsGFX10)
5993 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
5994 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
5995 }
5996
cvtMIMGAtomic(MCInst & Inst,const OperandVector & Operands)5997 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
5998 cvtMIMG(Inst, Operands, true);
5999 }
6000
6001 //===----------------------------------------------------------------------===//
6002 // smrd
6003 //===----------------------------------------------------------------------===//
6004
isSMRDOffset8() const6005 bool AMDGPUOperand::isSMRDOffset8() const {
6006 return isImm() && isUInt<8>(getImm());
6007 }
6008
isSMRDOffset20() const6009 bool AMDGPUOperand::isSMRDOffset20() const {
6010 return isImm() && isUInt<20>(getImm());
6011 }
6012
isSMRDLiteralOffset() const6013 bool AMDGPUOperand::isSMRDLiteralOffset() const {
6014 // 32-bit literals are only supported on CI and we only want to use them
6015 // when the offset is > 8-bits.
6016 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
6017 }
6018
defaultSMRDOffset8() const6019 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
6020 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6021 }
6022
defaultSMRDOffset20() const6023 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
6024 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6025 }
6026
defaultSMRDLiteralOffset() const6027 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
6028 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6029 }
6030
defaultFlatOffset() const6031 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
6032 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6033 }
6034
6035 //===----------------------------------------------------------------------===//
6036 // vop3
6037 //===----------------------------------------------------------------------===//
6038
ConvertOmodMul(int64_t & Mul)6039 static bool ConvertOmodMul(int64_t &Mul) {
6040 if (Mul != 1 && Mul != 2 && Mul != 4)
6041 return false;
6042
6043 Mul >>= 1;
6044 return true;
6045 }
6046
ConvertOmodDiv(int64_t & Div)6047 static bool ConvertOmodDiv(int64_t &Div) {
6048 if (Div == 1) {
6049 Div = 0;
6050 return true;
6051 }
6052
6053 if (Div == 2) {
6054 Div = 3;
6055 return true;
6056 }
6057
6058 return false;
6059 }
6060
ConvertBoundCtrl(int64_t & BoundCtrl)6061 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
6062 if (BoundCtrl == 0) {
6063 BoundCtrl = 1;
6064 return true;
6065 }
6066
6067 if (BoundCtrl == -1) {
6068 BoundCtrl = 0;
6069 return true;
6070 }
6071
6072 return false;
6073 }
6074
6075 // Note: the order in this table matches the order of operands in AsmString.
6076 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
6077 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr},
6078 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr},
6079 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr},
6080 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
6081 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
6082 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr},
6083 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr},
6084 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr},
6085 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
6086 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr},
6087 {"format", AMDGPUOperand::ImmTyFORMAT, false, nullptr},
6088 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr},
6089 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr},
6090 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr},
6091 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr},
6092 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
6093 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr},
6094 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr},
6095 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
6096 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr},
6097 {"da", AMDGPUOperand::ImmTyDA, true, nullptr},
6098 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr},
6099 {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr},
6100 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr},
6101 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
6102 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr},
6103 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr},
6104 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
6105 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
6106 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
6107 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr},
6108 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
6109 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
6110 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
6111 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
6112 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
6113 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
6114 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
6115 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
6116 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
6117 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
6118 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
6119 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
6120 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
6121 };
6122
parseOptionalOperand(OperandVector & Operands)6123 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
6124
6125 OperandMatchResultTy res = parseOptionalOpr(Operands);
6126
6127 // This is a hack to enable hardcoded mandatory operands which follow
6128 // optional operands.
6129 //
6130 // Current design assumes that all operands after the first optional operand
6131 // are also optional. However implementation of some instructions violates
6132 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
6133 //
6134 // To alleviate this problem, we have to (implicitly) parse extra operands
6135 // to make sure autogenerated parser of custom operands never hit hardcoded
6136 // mandatory operands.
6137
6138 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
6139 if (res != MatchOperand_Success ||
6140 isToken(AsmToken::EndOfStatement))
6141 break;
6142
6143 trySkipToken(AsmToken::Comma);
6144 res = parseOptionalOpr(Operands);
6145 }
6146
6147 return res;
6148 }
6149
parseOptionalOpr(OperandVector & Operands)6150 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
6151 OperandMatchResultTy res;
6152 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
6153 // try to parse any optional operand here
6154 if (Op.IsBit) {
6155 res = parseNamedBit(Op.Name, Operands, Op.Type);
6156 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
6157 res = parseOModOperand(Operands);
6158 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
6159 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
6160 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
6161 res = parseSDWASel(Operands, Op.Name, Op.Type);
6162 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
6163 res = parseSDWADstUnused(Operands);
6164 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
6165 Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
6166 Op.Type == AMDGPUOperand::ImmTyNegLo ||
6167 Op.Type == AMDGPUOperand::ImmTyNegHi) {
6168 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
6169 Op.ConvertResult);
6170 } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
6171 res = parseDim(Operands);
6172 } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) {
6173 res = parseDfmtNfmt(Operands);
6174 } else {
6175 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
6176 }
6177 if (res != MatchOperand_NoMatch) {
6178 return res;
6179 }
6180 }
6181 return MatchOperand_NoMatch;
6182 }
6183
parseOModOperand(OperandVector & Operands)6184 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
6185 StringRef Name = Parser.getTok().getString();
6186 if (Name == "mul") {
6187 return parseIntWithPrefix("mul", Operands,
6188 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
6189 }
6190
6191 if (Name == "div") {
6192 return parseIntWithPrefix("div", Operands,
6193 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
6194 }
6195
6196 return MatchOperand_NoMatch;
6197 }
6198
cvtVOP3OpSel(MCInst & Inst,const OperandVector & Operands)6199 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
6200 cvtVOP3P(Inst, Operands);
6201
6202 int Opc = Inst.getOpcode();
6203
6204 int SrcNum;
6205 const int Ops[] = { AMDGPU::OpName::src0,
6206 AMDGPU::OpName::src1,
6207 AMDGPU::OpName::src2 };
6208 for (SrcNum = 0;
6209 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
6210 ++SrcNum);
6211 assert(SrcNum > 0);
6212
6213 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6214 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6215
6216 if ((OpSel & (1 << SrcNum)) != 0) {
6217 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
6218 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
6219 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
6220 }
6221 }
6222
isRegOrImmWithInputMods(const MCInstrDesc & Desc,unsigned OpNum)6223 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
6224 // 1. This operand is input modifiers
6225 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
6226 // 2. This is not last operand
6227 && Desc.NumOperands > (OpNum + 1)
6228 // 3. Next operand is register class
6229 && Desc.OpInfo[OpNum + 1].RegClass != -1
6230 // 4. Next register is not tied to any other operand
6231 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
6232 }
6233
cvtVOP3Interp(MCInst & Inst,const OperandVector & Operands)6234 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
6235 {
6236 OptionalImmIndexMap OptionalIdx;
6237 unsigned Opc = Inst.getOpcode();
6238
6239 unsigned I = 1;
6240 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6241 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6242 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6243 }
6244
6245 for (unsigned E = Operands.size(); I != E; ++I) {
6246 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6247 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6248 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6249 } else if (Op.isInterpSlot() ||
6250 Op.isInterpAttr() ||
6251 Op.isAttrChan()) {
6252 Inst.addOperand(MCOperand::createImm(Op.getImm()));
6253 } else if (Op.isImmModifier()) {
6254 OptionalIdx[Op.getImmTy()] = I;
6255 } else {
6256 llvm_unreachable("unhandled operand type");
6257 }
6258 }
6259
6260 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
6261 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
6262 }
6263
6264 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6265 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6266 }
6267
6268 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6269 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6270 }
6271 }
6272
cvtVOP3(MCInst & Inst,const OperandVector & Operands,OptionalImmIndexMap & OptionalIdx)6273 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
6274 OptionalImmIndexMap &OptionalIdx) {
6275 unsigned Opc = Inst.getOpcode();
6276
6277 unsigned I = 1;
6278 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6279 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6280 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6281 }
6282
6283 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
6284 // This instruction has src modifiers
6285 for (unsigned E = Operands.size(); I != E; ++I) {
6286 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6287 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6288 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6289 } else if (Op.isImmModifier()) {
6290 OptionalIdx[Op.getImmTy()] = I;
6291 } else if (Op.isRegOrImm()) {
6292 Op.addRegOrImmOperands(Inst, 1);
6293 } else {
6294 llvm_unreachable("unhandled operand type");
6295 }
6296 }
6297 } else {
6298 // No src modifiers
6299 for (unsigned E = Operands.size(); I != E; ++I) {
6300 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6301 if (Op.isMod()) {
6302 OptionalIdx[Op.getImmTy()] = I;
6303 } else {
6304 Op.addRegOrImmOperands(Inst, 1);
6305 }
6306 }
6307 }
6308
6309 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6310 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6311 }
6312
6313 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6314 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6315 }
6316
6317 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
6318 // it has src2 register operand that is tied to dst operand
6319 // we don't allow modifiers for this operand in assembler so src2_modifiers
6320 // should be 0.
6321 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
6322 Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
6323 Opc == AMDGPU::V_MAC_F32_e64_vi ||
6324 Opc == AMDGPU::V_MAC_F16_e64_vi ||
6325 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
6326 Opc == AMDGPU::V_FMAC_F32_e64_vi ||
6327 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
6328 auto it = Inst.begin();
6329 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
6330 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
6331 ++it;
6332 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6333 }
6334 }
6335
cvtVOP3(MCInst & Inst,const OperandVector & Operands)6336 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
6337 OptionalImmIndexMap OptionalIdx;
6338 cvtVOP3(Inst, Operands, OptionalIdx);
6339 }
6340
cvtVOP3P(MCInst & Inst,const OperandVector & Operands)6341 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
6342 const OperandVector &Operands) {
6343 OptionalImmIndexMap OptIdx;
6344 const int Opc = Inst.getOpcode();
6345 const MCInstrDesc &Desc = MII.get(Opc);
6346
6347 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
6348
6349 cvtVOP3(Inst, Operands, OptIdx);
6350
6351 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
6352 assert(!IsPacked);
6353 Inst.addOperand(Inst.getOperand(0));
6354 }
6355
6356 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
6357 // instruction, and then figure out where to actually put the modifiers
6358
6359 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
6360
6361 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
6362 if (OpSelHiIdx != -1) {
6363 int DefaultVal = IsPacked ? -1 : 0;
6364 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
6365 DefaultVal);
6366 }
6367
6368 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
6369 if (NegLoIdx != -1) {
6370 assert(IsPacked);
6371 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
6372 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
6373 }
6374
6375 const int Ops[] = { AMDGPU::OpName::src0,
6376 AMDGPU::OpName::src1,
6377 AMDGPU::OpName::src2 };
6378 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
6379 AMDGPU::OpName::src1_modifiers,
6380 AMDGPU::OpName::src2_modifiers };
6381
6382 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6383
6384 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6385 unsigned OpSelHi = 0;
6386 unsigned NegLo = 0;
6387 unsigned NegHi = 0;
6388
6389 if (OpSelHiIdx != -1) {
6390 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
6391 }
6392
6393 if (NegLoIdx != -1) {
6394 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
6395 NegLo = Inst.getOperand(NegLoIdx).getImm();
6396 NegHi = Inst.getOperand(NegHiIdx).getImm();
6397 }
6398
6399 for (int J = 0; J < 3; ++J) {
6400 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
6401 if (OpIdx == -1)
6402 break;
6403
6404 uint32_t ModVal = 0;
6405
6406 if ((OpSel & (1 << J)) != 0)
6407 ModVal |= SISrcMods::OP_SEL_0;
6408
6409 if ((OpSelHi & (1 << J)) != 0)
6410 ModVal |= SISrcMods::OP_SEL_1;
6411
6412 if ((NegLo & (1 << J)) != 0)
6413 ModVal |= SISrcMods::NEG;
6414
6415 if ((NegHi & (1 << J)) != 0)
6416 ModVal |= SISrcMods::NEG_HI;
6417
6418 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
6419
6420 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
6421 }
6422 }
6423
6424 //===----------------------------------------------------------------------===//
6425 // dpp
6426 //===----------------------------------------------------------------------===//
6427
isDPP8() const6428 bool AMDGPUOperand::isDPP8() const {
6429 return isImmTy(ImmTyDPP8);
6430 }
6431
isDPPCtrl() const6432 bool AMDGPUOperand::isDPPCtrl() const {
6433 using namespace AMDGPU::DPP;
6434
6435 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
6436 if (result) {
6437 int64_t Imm = getImm();
6438 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
6439 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
6440 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
6441 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
6442 (Imm == DppCtrl::WAVE_SHL1) ||
6443 (Imm == DppCtrl::WAVE_ROL1) ||
6444 (Imm == DppCtrl::WAVE_SHR1) ||
6445 (Imm == DppCtrl::WAVE_ROR1) ||
6446 (Imm == DppCtrl::ROW_MIRROR) ||
6447 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
6448 (Imm == DppCtrl::BCAST15) ||
6449 (Imm == DppCtrl::BCAST31) ||
6450 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
6451 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
6452 }
6453 return false;
6454 }
6455
6456 //===----------------------------------------------------------------------===//
6457 // mAI
6458 //===----------------------------------------------------------------------===//
6459
isBLGP() const6460 bool AMDGPUOperand::isBLGP() const {
6461 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
6462 }
6463
isCBSZ() const6464 bool AMDGPUOperand::isCBSZ() const {
6465 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
6466 }
6467
isABID() const6468 bool AMDGPUOperand::isABID() const {
6469 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
6470 }
6471
isS16Imm() const6472 bool AMDGPUOperand::isS16Imm() const {
6473 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
6474 }
6475
isU16Imm() const6476 bool AMDGPUOperand::isU16Imm() const {
6477 return isImm() && isUInt<16>(getImm());
6478 }
6479
parseDim(OperandVector & Operands)6480 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
6481 if (!isGFX10())
6482 return MatchOperand_NoMatch;
6483
6484 SMLoc S = Parser.getTok().getLoc();
6485
6486 if (getLexer().isNot(AsmToken::Identifier))
6487 return MatchOperand_NoMatch;
6488 if (getLexer().getTok().getString() != "dim")
6489 return MatchOperand_NoMatch;
6490
6491 Parser.Lex();
6492 if (getLexer().isNot(AsmToken::Colon))
6493 return MatchOperand_ParseFail;
6494
6495 Parser.Lex();
6496
6497 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
6498 // integer.
6499 std::string Token;
6500 if (getLexer().is(AsmToken::Integer)) {
6501 SMLoc Loc = getLexer().getTok().getEndLoc();
6502 Token = getLexer().getTok().getString();
6503 Parser.Lex();
6504 if (getLexer().getTok().getLoc() != Loc)
6505 return MatchOperand_ParseFail;
6506 }
6507 if (getLexer().isNot(AsmToken::Identifier))
6508 return MatchOperand_ParseFail;
6509 Token += getLexer().getTok().getString();
6510
6511 StringRef DimId = Token;
6512 if (DimId.startswith("SQ_RSRC_IMG_"))
6513 DimId = DimId.substr(12);
6514
6515 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
6516 if (!DimInfo)
6517 return MatchOperand_ParseFail;
6518
6519 Parser.Lex();
6520
6521 Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
6522 AMDGPUOperand::ImmTyDim));
6523 return MatchOperand_Success;
6524 }
6525
parseDPP8(OperandVector & Operands)6526 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
6527 SMLoc S = Parser.getTok().getLoc();
6528 StringRef Prefix;
6529
6530 if (getLexer().getKind() == AsmToken::Identifier) {
6531 Prefix = Parser.getTok().getString();
6532 } else {
6533 return MatchOperand_NoMatch;
6534 }
6535
6536 if (Prefix != "dpp8")
6537 return parseDPPCtrl(Operands);
6538 if (!isGFX10())
6539 return MatchOperand_NoMatch;
6540
6541 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
6542
6543 int64_t Sels[8];
6544
6545 Parser.Lex();
6546 if (getLexer().isNot(AsmToken::Colon))
6547 return MatchOperand_ParseFail;
6548
6549 Parser.Lex();
6550 if (getLexer().isNot(AsmToken::LBrac))
6551 return MatchOperand_ParseFail;
6552
6553 Parser.Lex();
6554 if (getParser().parseAbsoluteExpression(Sels[0]))
6555 return MatchOperand_ParseFail;
6556 if (0 > Sels[0] || 7 < Sels[0])
6557 return MatchOperand_ParseFail;
6558
6559 for (size_t i = 1; i < 8; ++i) {
6560 if (getLexer().isNot(AsmToken::Comma))
6561 return MatchOperand_ParseFail;
6562
6563 Parser.Lex();
6564 if (getParser().parseAbsoluteExpression(Sels[i]))
6565 return MatchOperand_ParseFail;
6566 if (0 > Sels[i] || 7 < Sels[i])
6567 return MatchOperand_ParseFail;
6568 }
6569
6570 if (getLexer().isNot(AsmToken::RBrac))
6571 return MatchOperand_ParseFail;
6572 Parser.Lex();
6573
6574 unsigned DPP8 = 0;
6575 for (size_t i = 0; i < 8; ++i)
6576 DPP8 |= (Sels[i] << (i * 3));
6577
6578 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
6579 return MatchOperand_Success;
6580 }
6581
6582 OperandMatchResultTy
parseDPPCtrl(OperandVector & Operands)6583 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
6584 using namespace AMDGPU::DPP;
6585
6586 SMLoc S = Parser.getTok().getLoc();
6587 StringRef Prefix;
6588 int64_t Int;
6589
6590 if (getLexer().getKind() == AsmToken::Identifier) {
6591 Prefix = Parser.getTok().getString();
6592 } else {
6593 return MatchOperand_NoMatch;
6594 }
6595
6596 if (Prefix == "row_mirror") {
6597 Int = DppCtrl::ROW_MIRROR;
6598 Parser.Lex();
6599 } else if (Prefix == "row_half_mirror") {
6600 Int = DppCtrl::ROW_HALF_MIRROR;
6601 Parser.Lex();
6602 } else {
6603 // Check to prevent parseDPPCtrlOps from eating invalid tokens
6604 if (Prefix != "quad_perm"
6605 && Prefix != "row_shl"
6606 && Prefix != "row_shr"
6607 && Prefix != "row_ror"
6608 && Prefix != "wave_shl"
6609 && Prefix != "wave_rol"
6610 && Prefix != "wave_shr"
6611 && Prefix != "wave_ror"
6612 && Prefix != "row_bcast"
6613 && Prefix != "row_share"
6614 && Prefix != "row_xmask") {
6615 return MatchOperand_NoMatch;
6616 }
6617
6618 if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask"))
6619 return MatchOperand_NoMatch;
6620
6621 if (!isVI() && !isGFX9() &&
6622 (Prefix == "wave_shl" || Prefix == "wave_shr" ||
6623 Prefix == "wave_rol" || Prefix == "wave_ror" ||
6624 Prefix == "row_bcast"))
6625 return MatchOperand_NoMatch;
6626
6627 Parser.Lex();
6628 if (getLexer().isNot(AsmToken::Colon))
6629 return MatchOperand_ParseFail;
6630
6631 if (Prefix == "quad_perm") {
6632 // quad_perm:[%d,%d,%d,%d]
6633 Parser.Lex();
6634 if (getLexer().isNot(AsmToken::LBrac))
6635 return MatchOperand_ParseFail;
6636 Parser.Lex();
6637
6638 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
6639 return MatchOperand_ParseFail;
6640
6641 for (int i = 0; i < 3; ++i) {
6642 if (getLexer().isNot(AsmToken::Comma))
6643 return MatchOperand_ParseFail;
6644 Parser.Lex();
6645
6646 int64_t Temp;
6647 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
6648 return MatchOperand_ParseFail;
6649 const int shift = i*2 + 2;
6650 Int += (Temp << shift);
6651 }
6652
6653 if (getLexer().isNot(AsmToken::RBrac))
6654 return MatchOperand_ParseFail;
6655 Parser.Lex();
6656 } else {
6657 // sel:%d
6658 Parser.Lex();
6659 if (getParser().parseAbsoluteExpression(Int))
6660 return MatchOperand_ParseFail;
6661
6662 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
6663 Int |= DppCtrl::ROW_SHL0;
6664 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
6665 Int |= DppCtrl::ROW_SHR0;
6666 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
6667 Int |= DppCtrl::ROW_ROR0;
6668 } else if (Prefix == "wave_shl" && 1 == Int) {
6669 Int = DppCtrl::WAVE_SHL1;
6670 } else if (Prefix == "wave_rol" && 1 == Int) {
6671 Int = DppCtrl::WAVE_ROL1;
6672 } else if (Prefix == "wave_shr" && 1 == Int) {
6673 Int = DppCtrl::WAVE_SHR1;
6674 } else if (Prefix == "wave_ror" && 1 == Int) {
6675 Int = DppCtrl::WAVE_ROR1;
6676 } else if (Prefix == "row_bcast") {
6677 if (Int == 15) {
6678 Int = DppCtrl::BCAST15;
6679 } else if (Int == 31) {
6680 Int = DppCtrl::BCAST31;
6681 } else {
6682 return MatchOperand_ParseFail;
6683 }
6684 } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
6685 Int |= DppCtrl::ROW_SHARE_FIRST;
6686 } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
6687 Int |= DppCtrl::ROW_XMASK_FIRST;
6688 } else {
6689 return MatchOperand_ParseFail;
6690 }
6691 }
6692 }
6693
6694 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
6695 return MatchOperand_Success;
6696 }
6697
defaultRowMask() const6698 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
6699 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
6700 }
6701
defaultEndpgmImmOperands() const6702 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
6703 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
6704 }
6705
defaultBankMask() const6706 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
6707 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
6708 }
6709
defaultBoundCtrl() const6710 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
6711 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
6712 }
6713
defaultFI() const6714 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
6715 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
6716 }
6717
cvtDPP(MCInst & Inst,const OperandVector & Operands,bool IsDPP8)6718 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
6719 OptionalImmIndexMap OptionalIdx;
6720
6721 unsigned I = 1;
6722 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6723 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6724 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6725 }
6726
6727 int Fi = 0;
6728 for (unsigned E = Operands.size(); I != E; ++I) {
6729 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
6730 MCOI::TIED_TO);
6731 if (TiedTo != -1) {
6732 assert((unsigned)TiedTo < Inst.getNumOperands());
6733 // handle tied old or src2 for MAC instructions
6734 Inst.addOperand(Inst.getOperand(TiedTo));
6735 }
6736 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6737 // Add the register arguments
6738 if (Op.isReg() && validateVccOperand(Op.getReg())) {
6739 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
6740 // Skip it.
6741 continue;
6742 }
6743
6744 if (IsDPP8) {
6745 if (Op.isDPP8()) {
6746 Op.addImmOperands(Inst, 1);
6747 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6748 Op.addRegWithFPInputModsOperands(Inst, 2);
6749 } else if (Op.isFI()) {
6750 Fi = Op.getImm();
6751 } else if (Op.isReg()) {
6752 Op.addRegOperands(Inst, 1);
6753 } else {
6754 llvm_unreachable("Invalid operand type");
6755 }
6756 } else {
6757 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6758 Op.addRegWithFPInputModsOperands(Inst, 2);
6759 } else if (Op.isDPPCtrl()) {
6760 Op.addImmOperands(Inst, 1);
6761 } else if (Op.isImm()) {
6762 // Handle optional arguments
6763 OptionalIdx[Op.getImmTy()] = I;
6764 } else {
6765 llvm_unreachable("Invalid operand type");
6766 }
6767 }
6768 }
6769
6770 if (IsDPP8) {
6771 using namespace llvm::AMDGPU::DPP;
6772 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
6773 } else {
6774 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
6775 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
6776 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
6777 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
6778 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
6779 }
6780 }
6781 }
6782
6783 //===----------------------------------------------------------------------===//
6784 // sdwa
6785 //===----------------------------------------------------------------------===//
6786
6787 OperandMatchResultTy
parseSDWASel(OperandVector & Operands,StringRef Prefix,AMDGPUOperand::ImmTy Type)6788 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
6789 AMDGPUOperand::ImmTy Type) {
6790 using namespace llvm::AMDGPU::SDWA;
6791
6792 SMLoc S = Parser.getTok().getLoc();
6793 StringRef Value;
6794 OperandMatchResultTy res;
6795
6796 res = parseStringWithPrefix(Prefix, Value);
6797 if (res != MatchOperand_Success) {
6798 return res;
6799 }
6800
6801 int64_t Int;
6802 Int = StringSwitch<int64_t>(Value)
6803 .Case("BYTE_0", SdwaSel::BYTE_0)
6804 .Case("BYTE_1", SdwaSel::BYTE_1)
6805 .Case("BYTE_2", SdwaSel::BYTE_2)
6806 .Case("BYTE_3", SdwaSel::BYTE_3)
6807 .Case("WORD_0", SdwaSel::WORD_0)
6808 .Case("WORD_1", SdwaSel::WORD_1)
6809 .Case("DWORD", SdwaSel::DWORD)
6810 .Default(0xffffffff);
6811 Parser.Lex(); // eat last token
6812
6813 if (Int == 0xffffffff) {
6814 return MatchOperand_ParseFail;
6815 }
6816
6817 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
6818 return MatchOperand_Success;
6819 }
6820
6821 OperandMatchResultTy
parseSDWADstUnused(OperandVector & Operands)6822 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
6823 using namespace llvm::AMDGPU::SDWA;
6824
6825 SMLoc S = Parser.getTok().getLoc();
6826 StringRef Value;
6827 OperandMatchResultTy res;
6828
6829 res = parseStringWithPrefix("dst_unused", Value);
6830 if (res != MatchOperand_Success) {
6831 return res;
6832 }
6833
6834 int64_t Int;
6835 Int = StringSwitch<int64_t>(Value)
6836 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
6837 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
6838 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
6839 .Default(0xffffffff);
6840 Parser.Lex(); // eat last token
6841
6842 if (Int == 0xffffffff) {
6843 return MatchOperand_ParseFail;
6844 }
6845
6846 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
6847 return MatchOperand_Success;
6848 }
6849
cvtSdwaVOP1(MCInst & Inst,const OperandVector & Operands)6850 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
6851 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
6852 }
6853
cvtSdwaVOP2(MCInst & Inst,const OperandVector & Operands)6854 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
6855 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
6856 }
6857
cvtSdwaVOP2b(MCInst & Inst,const OperandVector & Operands)6858 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
6859 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
6860 }
6861
cvtSdwaVOP2e(MCInst & Inst,const OperandVector & Operands)6862 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
6863 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
6864 }
6865
cvtSdwaVOPC(MCInst & Inst,const OperandVector & Operands)6866 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
6867 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
6868 }
6869
cvtSDWA(MCInst & Inst,const OperandVector & Operands,uint64_t BasicInstType,bool SkipDstVcc,bool SkipSrcVcc)6870 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
6871 uint64_t BasicInstType,
6872 bool SkipDstVcc,
6873 bool SkipSrcVcc) {
6874 using namespace llvm::AMDGPU::SDWA;
6875
6876 OptionalImmIndexMap OptionalIdx;
6877 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
6878 bool SkippedVcc = false;
6879
6880 unsigned I = 1;
6881 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6882 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6883 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6884 }
6885
6886 for (unsigned E = Operands.size(); I != E; ++I) {
6887 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6888 if (SkipVcc && !SkippedVcc && Op.isReg() &&
6889 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
6890 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
6891 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
6892 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
6893 // Skip VCC only if we didn't skip it on previous iteration.
6894 // Note that src0 and src1 occupy 2 slots each because of modifiers.
6895 if (BasicInstType == SIInstrFlags::VOP2 &&
6896 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
6897 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
6898 SkippedVcc = true;
6899 continue;
6900 } else if (BasicInstType == SIInstrFlags::VOPC &&
6901 Inst.getNumOperands() == 0) {
6902 SkippedVcc = true;
6903 continue;
6904 }
6905 }
6906 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6907 Op.addRegOrImmWithInputModsOperands(Inst, 2);
6908 } else if (Op.isImm()) {
6909 // Handle optional arguments
6910 OptionalIdx[Op.getImmTy()] = I;
6911 } else {
6912 llvm_unreachable("Invalid operand type");
6913 }
6914 SkippedVcc = false;
6915 }
6916
6917 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
6918 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
6919 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
6920 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
6921 switch (BasicInstType) {
6922 case SIInstrFlags::VOP1:
6923 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6924 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6925 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6926 }
6927 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6928 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6929 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6930 break;
6931
6932 case SIInstrFlags::VOP2:
6933 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6934 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6935 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6936 }
6937 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6938 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6939 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6940 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6941 break;
6942
6943 case SIInstrFlags::VOPC:
6944 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
6945 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6946 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6947 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6948 break;
6949
6950 default:
6951 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
6952 }
6953 }
6954
6955 // special case v_mac_{f16, f32}:
6956 // it has src2 register operand that is tied to dst operand
6957 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
6958 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
6959 auto it = Inst.begin();
6960 std::advance(
6961 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
6962 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6963 }
6964 }
6965
6966 //===----------------------------------------------------------------------===//
6967 // mAI
6968 //===----------------------------------------------------------------------===//
6969
defaultBLGP() const6970 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
6971 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
6972 }
6973
defaultCBSZ() const6974 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
6975 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
6976 }
6977
defaultABID() const6978 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
6979 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
6980 }
6981
6982 /// Force static initialization.
LLVMInitializeAMDGPUAsmParser()6983 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
6984 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
6985 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
6986 }
6987
6988 #define GET_REGISTER_MATCHER
6989 #define GET_MATCHER_IMPLEMENTATION
6990 #define GET_MNEMONIC_SPELL_CHECKER
6991 #include "AMDGPUGenAsmMatcher.inc"
6992
6993 // This fuction should be defined after auto-generated include so that we have
6994 // MatchClassKind enum defined
validateTargetOperandClass(MCParsedAsmOperand & Op,unsigned Kind)6995 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
6996 unsigned Kind) {
6997 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
6998 // But MatchInstructionImpl() expects to meet token and fails to validate
6999 // operand. This method checks if we are given immediate operand but expect to
7000 // get corresponding token.
7001 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
7002 switch (Kind) {
7003 case MCK_addr64:
7004 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
7005 case MCK_gds:
7006 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
7007 case MCK_lds:
7008 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
7009 case MCK_glc:
7010 return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
7011 case MCK_idxen:
7012 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
7013 case MCK_offen:
7014 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
7015 case MCK_SSrcB32:
7016 // When operands have expression values, they will return true for isToken,
7017 // because it is not possible to distinguish between a token and an
7018 // expression at parse time. MatchInstructionImpl() will always try to
7019 // match an operand as a token, when isToken returns true, and when the
7020 // name of the expression is not a valid token, the match will fail,
7021 // so we need to handle it here.
7022 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
7023 case MCK_SSrcF32:
7024 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
7025 case MCK_SoppBrTarget:
7026 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
7027 case MCK_VReg32OrOff:
7028 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
7029 case MCK_InterpSlot:
7030 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
7031 case MCK_Attr:
7032 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
7033 case MCK_AttrChan:
7034 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
7035 case MCK_SReg_64:
7036 case MCK_SReg_64_XEXEC:
7037 // Null is defined as a 32-bit register but
7038 // it should also be enabled with 64-bit operands.
7039 // The following code enables it for SReg_64 operands
7040 // used as source and destination. Remaining source
7041 // operands are handled in isInlinableImm.
7042 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
7043 default:
7044 return Match_InvalidOperand;
7045 }
7046 }
7047
7048 //===----------------------------------------------------------------------===//
7049 // endpgm
7050 //===----------------------------------------------------------------------===//
7051
parseEndpgmOp(OperandVector & Operands)7052 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
7053 SMLoc S = Parser.getTok().getLoc();
7054 int64_t Imm = 0;
7055
7056 if (!parseExpr(Imm)) {
7057 // The operand is optional, if not present default to 0
7058 Imm = 0;
7059 }
7060
7061 if (!isUInt<16>(Imm)) {
7062 Error(S, "expected a 16-bit value");
7063 return MatchOperand_ParseFail;
7064 }
7065
7066 Operands.push_back(
7067 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
7068 return MatchOperand_Success;
7069 }
7070
isEndpgm() const7071 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
7072