1 //==- WebAssemblyAsmParser.cpp - Assembler for WebAssembly -*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file is part of the WebAssembly Assembler.
11 ///
12 /// It contains code to translate a parsed .s file into MCInsts.
13 ///
14 //===----------------------------------------------------------------------===//
15
16 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
17 #include "MCTargetDesc/WebAssemblyTargetStreamer.h"
18 #include "TargetInfo/WebAssemblyTargetInfo.h"
19 #include "WebAssembly.h"
20 #include "llvm/MC/MCContext.h"
21 #include "llvm/MC/MCExpr.h"
22 #include "llvm/MC/MCInst.h"
23 #include "llvm/MC/MCInstrInfo.h"
24 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
25 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
26 #include "llvm/MC/MCSectionWasm.h"
27 #include "llvm/MC/MCStreamer.h"
28 #include "llvm/MC/MCSubtargetInfo.h"
29 #include "llvm/MC/MCSymbol.h"
30 #include "llvm/MC/MCSymbolWasm.h"
31 #include "llvm/Support/Endian.h"
32 #include "llvm/Support/TargetRegistry.h"
33
34 using namespace llvm;
35
36 #define DEBUG_TYPE "wasm-asm-parser"
37
38 static const char *getSubtargetFeatureName(uint64_t Val);
39
40 namespace {
41
42 /// WebAssemblyOperand - Instances of this class represent the operands in a
43 /// parsed Wasm machine instruction.
44 struct WebAssemblyOperand : public MCParsedAsmOperand {
45 enum KindTy { Token, Integer, Float, Symbol, BrList } Kind;
46
47 SMLoc StartLoc, EndLoc;
48
49 struct TokOp {
50 StringRef Tok;
51 };
52
53 struct IntOp {
54 int64_t Val;
55 };
56
57 struct FltOp {
58 double Val;
59 };
60
61 struct SymOp {
62 const MCExpr *Exp;
63 };
64
65 struct BrLOp {
66 std::vector<unsigned> List;
67 };
68
69 union {
70 struct TokOp Tok;
71 struct IntOp Int;
72 struct FltOp Flt;
73 struct SymOp Sym;
74 struct BrLOp BrL;
75 };
76
WebAssemblyOperand__anonf4bcd6730111::WebAssemblyOperand77 WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, TokOp T)
78 : Kind(K), StartLoc(Start), EndLoc(End), Tok(T) {}
WebAssemblyOperand__anonf4bcd6730111::WebAssemblyOperand79 WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, IntOp I)
80 : Kind(K), StartLoc(Start), EndLoc(End), Int(I) {}
WebAssemblyOperand__anonf4bcd6730111::WebAssemblyOperand81 WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, FltOp F)
82 : Kind(K), StartLoc(Start), EndLoc(End), Flt(F) {}
WebAssemblyOperand__anonf4bcd6730111::WebAssemblyOperand83 WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, SymOp S)
84 : Kind(K), StartLoc(Start), EndLoc(End), Sym(S) {}
WebAssemblyOperand__anonf4bcd6730111::WebAssemblyOperand85 WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End)
86 : Kind(K), StartLoc(Start), EndLoc(End), BrL() {}
87
~WebAssemblyOperand__anonf4bcd6730111::WebAssemblyOperand88 ~WebAssemblyOperand() {
89 if (isBrList())
90 BrL.~BrLOp();
91 }
92
isToken__anonf4bcd6730111::WebAssemblyOperand93 bool isToken() const override { return Kind == Token; }
isImm__anonf4bcd6730111::WebAssemblyOperand94 bool isImm() const override { return Kind == Integer || Kind == Symbol; }
isFPImm__anonf4bcd6730111::WebAssemblyOperand95 bool isFPImm() const { return Kind == Float; }
isMem__anonf4bcd6730111::WebAssemblyOperand96 bool isMem() const override { return false; }
isReg__anonf4bcd6730111::WebAssemblyOperand97 bool isReg() const override { return false; }
isBrList__anonf4bcd6730111::WebAssemblyOperand98 bool isBrList() const { return Kind == BrList; }
99
getReg__anonf4bcd6730111::WebAssemblyOperand100 unsigned getReg() const override {
101 llvm_unreachable("Assembly inspects a register operand");
102 return 0;
103 }
104
getToken__anonf4bcd6730111::WebAssemblyOperand105 StringRef getToken() const {
106 assert(isToken());
107 return Tok.Tok;
108 }
109
getStartLoc__anonf4bcd6730111::WebAssemblyOperand110 SMLoc getStartLoc() const override { return StartLoc; }
getEndLoc__anonf4bcd6730111::WebAssemblyOperand111 SMLoc getEndLoc() const override { return EndLoc; }
112
addRegOperands__anonf4bcd6730111::WebAssemblyOperand113 void addRegOperands(MCInst &, unsigned) const {
114 // Required by the assembly matcher.
115 llvm_unreachable("Assembly matcher creates register operands");
116 }
117
addImmOperands__anonf4bcd6730111::WebAssemblyOperand118 void addImmOperands(MCInst &Inst, unsigned N) const {
119 assert(N == 1 && "Invalid number of operands!");
120 if (Kind == Integer)
121 Inst.addOperand(MCOperand::createImm(Int.Val));
122 else if (Kind == Symbol)
123 Inst.addOperand(MCOperand::createExpr(Sym.Exp));
124 else
125 llvm_unreachable("Should be integer immediate or symbol!");
126 }
127
addFPImmOperands__anonf4bcd6730111::WebAssemblyOperand128 void addFPImmOperands(MCInst &Inst, unsigned N) const {
129 assert(N == 1 && "Invalid number of operands!");
130 if (Kind == Float)
131 Inst.addOperand(MCOperand::createFPImm(Flt.Val));
132 else
133 llvm_unreachable("Should be float immediate!");
134 }
135
addBrListOperands__anonf4bcd6730111::WebAssemblyOperand136 void addBrListOperands(MCInst &Inst, unsigned N) const {
137 assert(N == 1 && isBrList() && "Invalid BrList!");
138 for (auto Br : BrL.List)
139 Inst.addOperand(MCOperand::createImm(Br));
140 }
141
print__anonf4bcd6730111::WebAssemblyOperand142 void print(raw_ostream &OS) const override {
143 switch (Kind) {
144 case Token:
145 OS << "Tok:" << Tok.Tok;
146 break;
147 case Integer:
148 OS << "Int:" << Int.Val;
149 break;
150 case Float:
151 OS << "Flt:" << Flt.Val;
152 break;
153 case Symbol:
154 OS << "Sym:" << Sym.Exp;
155 break;
156 case BrList:
157 OS << "BrList:" << BrL.List.size();
158 break;
159 }
160 }
161 };
162
163 class WebAssemblyAsmParser final : public MCTargetAsmParser {
164 MCAsmParser &Parser;
165 MCAsmLexer &Lexer;
166
167 // Much like WebAssemblyAsmPrinter in the backend, we have to own these.
168 std::vector<std::unique_ptr<wasm::WasmSignature>> Signatures;
169 std::vector<std::unique_ptr<std::string>> Names;
170
171 // Order of labels, directives and instructions in a .s file have no
172 // syntactical enforcement. This class is a callback from the actual parser,
173 // and yet we have to be feeding data to the streamer in a very particular
174 // order to ensure a correct binary encoding that matches the regular backend
175 // (the streamer does not enforce this). This "state machine" enum helps
176 // guarantee that correct order.
177 enum ParserState {
178 FileStart,
179 Label,
180 FunctionStart,
181 FunctionLocals,
182 Instructions,
183 EndFunction,
184 DataSection,
185 } CurrentState = FileStart;
186
187 // For ensuring blocks are properly nested.
188 enum NestingType {
189 Function,
190 Block,
191 Loop,
192 Try,
193 If,
194 Else,
195 Undefined,
196 };
197 std::vector<NestingType> NestingStack;
198
199 // We track this to see if a .functype following a label is the same,
200 // as this is how we recognize the start of a function.
201 MCSymbol *LastLabel = nullptr;
202 MCSymbol *LastFunctionLabel = nullptr;
203
204 public:
WebAssemblyAsmParser(const MCSubtargetInfo & STI,MCAsmParser & Parser,const MCInstrInfo & MII,const MCTargetOptions & Options)205 WebAssemblyAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
206 const MCInstrInfo &MII, const MCTargetOptions &Options)
207 : MCTargetAsmParser(Options, STI, MII), Parser(Parser),
208 Lexer(Parser.getLexer()) {
209 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
210 }
211
212 #define GET_ASSEMBLER_HEADER
213 #include "WebAssemblyGenAsmMatcher.inc"
214
215 // TODO: This is required to be implemented, but appears unused.
ParseRegister(unsigned &,SMLoc &,SMLoc &)216 bool ParseRegister(unsigned & /*RegNo*/, SMLoc & /*StartLoc*/,
217 SMLoc & /*EndLoc*/) override {
218 llvm_unreachable("ParseRegister is not implemented.");
219 }
tryParseRegister(unsigned &,SMLoc &,SMLoc &)220 OperandMatchResultTy tryParseRegister(unsigned & /*RegNo*/,
221 SMLoc & /*StartLoc*/,
222 SMLoc & /*EndLoc*/) override {
223 llvm_unreachable("tryParseRegister is not implemented.");
224 }
225
error(const Twine & Msg,const AsmToken & Tok)226 bool error(const Twine &Msg, const AsmToken &Tok) {
227 return Parser.Error(Tok.getLoc(), Msg + Tok.getString());
228 }
229
error(const Twine & Msg)230 bool error(const Twine &Msg) {
231 return Parser.Error(Lexer.getTok().getLoc(), Msg);
232 }
233
addSignature(std::unique_ptr<wasm::WasmSignature> && Sig)234 void addSignature(std::unique_ptr<wasm::WasmSignature> &&Sig) {
235 Signatures.push_back(std::move(Sig));
236 }
237
storeName(StringRef Name)238 StringRef storeName(StringRef Name) {
239 std::unique_ptr<std::string> N = std::make_unique<std::string>(Name);
240 Names.push_back(std::move(N));
241 return *Names.back();
242 }
243
nestingString(NestingType NT)244 std::pair<StringRef, StringRef> nestingString(NestingType NT) {
245 switch (NT) {
246 case Function:
247 return {"function", "end_function"};
248 case Block:
249 return {"block", "end_block"};
250 case Loop:
251 return {"loop", "end_loop"};
252 case Try:
253 return {"try", "end_try"};
254 case If:
255 return {"if", "end_if"};
256 case Else:
257 return {"else", "end_if"};
258 default:
259 llvm_unreachable("unknown NestingType");
260 }
261 }
262
push(NestingType NT)263 void push(NestingType NT) { NestingStack.push_back(NT); }
264
pop(StringRef Ins,NestingType NT1,NestingType NT2=Undefined)265 bool pop(StringRef Ins, NestingType NT1, NestingType NT2 = Undefined) {
266 if (NestingStack.empty())
267 return error(Twine("End of block construct with no start: ") + Ins);
268 auto Top = NestingStack.back();
269 if (Top != NT1 && Top != NT2)
270 return error(Twine("Block construct type mismatch, expected: ") +
271 nestingString(Top).second + ", instead got: " + Ins);
272 NestingStack.pop_back();
273 return false;
274 }
275
ensureEmptyNestingStack()276 bool ensureEmptyNestingStack() {
277 auto Err = !NestingStack.empty();
278 while (!NestingStack.empty()) {
279 error(Twine("Unmatched block construct(s) at function end: ") +
280 nestingString(NestingStack.back()).first);
281 NestingStack.pop_back();
282 }
283 return Err;
284 }
285
isNext(AsmToken::TokenKind Kind)286 bool isNext(AsmToken::TokenKind Kind) {
287 auto Ok = Lexer.is(Kind);
288 if (Ok)
289 Parser.Lex();
290 return Ok;
291 }
292
expect(AsmToken::TokenKind Kind,const char * KindName)293 bool expect(AsmToken::TokenKind Kind, const char *KindName) {
294 if (!isNext(Kind))
295 return error(std::string("Expected ") + KindName + ", instead got: ",
296 Lexer.getTok());
297 return false;
298 }
299
expectIdent()300 StringRef expectIdent() {
301 if (!Lexer.is(AsmToken::Identifier)) {
302 error("Expected identifier, got: ", Lexer.getTok());
303 return StringRef();
304 }
305 auto Name = Lexer.getTok().getString();
306 Parser.Lex();
307 return Name;
308 }
309
parseType(const StringRef & Type)310 Optional<wasm::ValType> parseType(const StringRef &Type) {
311 // FIXME: can't use StringSwitch because wasm::ValType doesn't have a
312 // "invalid" value.
313 if (Type == "i32")
314 return wasm::ValType::I32;
315 if (Type == "i64")
316 return wasm::ValType::I64;
317 if (Type == "f32")
318 return wasm::ValType::F32;
319 if (Type == "f64")
320 return wasm::ValType::F64;
321 if (Type == "v128" || Type == "i8x16" || Type == "i16x8" ||
322 Type == "i32x4" || Type == "i64x2" || Type == "f32x4" ||
323 Type == "f64x2")
324 return wasm::ValType::V128;
325 if (Type == "exnref")
326 return wasm::ValType::EXNREF;
327 if (Type == "funcref")
328 return wasm::ValType::FUNCREF;
329 if (Type == "externref")
330 return wasm::ValType::EXTERNREF;
331 return Optional<wasm::ValType>();
332 }
333
parseBlockType(StringRef ID)334 WebAssembly::BlockType parseBlockType(StringRef ID) {
335 // Multivalue block types are handled separately in parseSignature
336 return StringSwitch<WebAssembly::BlockType>(ID)
337 .Case("i32", WebAssembly::BlockType::I32)
338 .Case("i64", WebAssembly::BlockType::I64)
339 .Case("f32", WebAssembly::BlockType::F32)
340 .Case("f64", WebAssembly::BlockType::F64)
341 .Case("v128", WebAssembly::BlockType::V128)
342 .Case("funcref", WebAssembly::BlockType::Funcref)
343 .Case("externref", WebAssembly::BlockType::Externref)
344 .Case("exnref", WebAssembly::BlockType::Exnref)
345 .Case("void", WebAssembly::BlockType::Void)
346 .Default(WebAssembly::BlockType::Invalid);
347 }
348
parseRegTypeList(SmallVectorImpl<wasm::ValType> & Types)349 bool parseRegTypeList(SmallVectorImpl<wasm::ValType> &Types) {
350 while (Lexer.is(AsmToken::Identifier)) {
351 auto Type = parseType(Lexer.getTok().getString());
352 if (!Type)
353 return error("unknown type: ", Lexer.getTok());
354 Types.push_back(Type.getValue());
355 Parser.Lex();
356 if (!isNext(AsmToken::Comma))
357 break;
358 }
359 return false;
360 }
361
parseSingleInteger(bool IsNegative,OperandVector & Operands)362 void parseSingleInteger(bool IsNegative, OperandVector &Operands) {
363 auto &Int = Lexer.getTok();
364 int64_t Val = Int.getIntVal();
365 if (IsNegative)
366 Val = -Val;
367 Operands.push_back(std::make_unique<WebAssemblyOperand>(
368 WebAssemblyOperand::Integer, Int.getLoc(), Int.getEndLoc(),
369 WebAssemblyOperand::IntOp{Val}));
370 Parser.Lex();
371 }
372
parseSingleFloat(bool IsNegative,OperandVector & Operands)373 bool parseSingleFloat(bool IsNegative, OperandVector &Operands) {
374 auto &Flt = Lexer.getTok();
375 double Val;
376 if (Flt.getString().getAsDouble(Val, false))
377 return error("Cannot parse real: ", Flt);
378 if (IsNegative)
379 Val = -Val;
380 Operands.push_back(std::make_unique<WebAssemblyOperand>(
381 WebAssemblyOperand::Float, Flt.getLoc(), Flt.getEndLoc(),
382 WebAssemblyOperand::FltOp{Val}));
383 Parser.Lex();
384 return false;
385 }
386
parseSpecialFloatMaybe(bool IsNegative,OperandVector & Operands)387 bool parseSpecialFloatMaybe(bool IsNegative, OperandVector &Operands) {
388 if (Lexer.isNot(AsmToken::Identifier))
389 return true;
390 auto &Flt = Lexer.getTok();
391 auto S = Flt.getString();
392 double Val;
393 if (S.compare_lower("infinity") == 0) {
394 Val = std::numeric_limits<double>::infinity();
395 } else if (S.compare_lower("nan") == 0) {
396 Val = std::numeric_limits<double>::quiet_NaN();
397 } else {
398 return true;
399 }
400 if (IsNegative)
401 Val = -Val;
402 Operands.push_back(std::make_unique<WebAssemblyOperand>(
403 WebAssemblyOperand::Float, Flt.getLoc(), Flt.getEndLoc(),
404 WebAssemblyOperand::FltOp{Val}));
405 Parser.Lex();
406 return false;
407 }
408
checkForP2AlignIfLoadStore(OperandVector & Operands,StringRef InstName)409 bool checkForP2AlignIfLoadStore(OperandVector &Operands, StringRef InstName) {
410 // FIXME: there is probably a cleaner way to do this.
411 auto IsLoadStore = InstName.find(".load") != StringRef::npos ||
412 InstName.find(".store") != StringRef::npos;
413 auto IsAtomic = InstName.find("atomic.") != StringRef::npos;
414 if (IsLoadStore || IsAtomic) {
415 // Parse load/store operands of the form: offset:p2align=align
416 if (IsLoadStore && isNext(AsmToken::Colon)) {
417 auto Id = expectIdent();
418 if (Id != "p2align")
419 return error("Expected p2align, instead got: " + Id);
420 if (expect(AsmToken::Equal, "="))
421 return true;
422 if (!Lexer.is(AsmToken::Integer))
423 return error("Expected integer constant");
424 parseSingleInteger(false, Operands);
425 } else {
426 // v128.{load,store}{8,16,32,64}_lane has both a memarg and a lane
427 // index. We need to avoid parsing an extra alignment operand for the
428 // lane index.
429 auto IsLoadStoreLane = InstName.find("_lane") != StringRef::npos;
430 if (IsLoadStoreLane && Operands.size() == 4)
431 return false;
432 // Alignment not specified (or atomics, must use default alignment).
433 // We can't just call WebAssembly::GetDefaultP2Align since we don't have
434 // an opcode until after the assembly matcher, so set a default to fix
435 // up later.
436 auto Tok = Lexer.getTok();
437 Operands.push_back(std::make_unique<WebAssemblyOperand>(
438 WebAssemblyOperand::Integer, Tok.getLoc(), Tok.getEndLoc(),
439 WebAssemblyOperand::IntOp{-1}));
440 }
441 }
442 return false;
443 }
444
parseHeapType(StringRef Id)445 WebAssembly::HeapType parseHeapType(StringRef Id) {
446 return StringSwitch<WebAssembly::HeapType>(Id)
447 .Case("extern", WebAssembly::HeapType::Externref)
448 .Case("func", WebAssembly::HeapType::Funcref)
449 .Default(WebAssembly::HeapType::Invalid);
450 }
451
addBlockTypeOperand(OperandVector & Operands,SMLoc NameLoc,WebAssembly::BlockType BT)452 void addBlockTypeOperand(OperandVector &Operands, SMLoc NameLoc,
453 WebAssembly::BlockType BT) {
454 Operands.push_back(std::make_unique<WebAssemblyOperand>(
455 WebAssemblyOperand::Integer, NameLoc, NameLoc,
456 WebAssemblyOperand::IntOp{static_cast<int64_t>(BT)}));
457 }
458
ParseInstruction(ParseInstructionInfo &,StringRef Name,SMLoc NameLoc,OperandVector & Operands)459 bool ParseInstruction(ParseInstructionInfo & /*Info*/, StringRef Name,
460 SMLoc NameLoc, OperandVector &Operands) override {
461 // Note: Name does NOT point into the sourcecode, but to a local, so
462 // use NameLoc instead.
463 Name = StringRef(NameLoc.getPointer(), Name.size());
464
465 // WebAssembly has instructions with / in them, which AsmLexer parses
466 // as separate tokens, so if we find such tokens immediately adjacent (no
467 // whitespace), expand the name to include them:
468 for (;;) {
469 auto &Sep = Lexer.getTok();
470 if (Sep.getLoc().getPointer() != Name.end() ||
471 Sep.getKind() != AsmToken::Slash)
472 break;
473 // Extend name with /
474 Name = StringRef(Name.begin(), Name.size() + Sep.getString().size());
475 Parser.Lex();
476 // We must now find another identifier, or error.
477 auto &Id = Lexer.getTok();
478 if (Id.getKind() != AsmToken::Identifier ||
479 Id.getLoc().getPointer() != Name.end())
480 return error("Incomplete instruction name: ", Id);
481 Name = StringRef(Name.begin(), Name.size() + Id.getString().size());
482 Parser.Lex();
483 }
484
485 // Now construct the name as first operand.
486 Operands.push_back(std::make_unique<WebAssemblyOperand>(
487 WebAssemblyOperand::Token, NameLoc, SMLoc::getFromPointer(Name.end()),
488 WebAssemblyOperand::TokOp{Name}));
489
490 // If this instruction is part of a control flow structure, ensure
491 // proper nesting.
492 bool ExpectBlockType = false;
493 bool ExpectFuncType = false;
494 bool ExpectHeapType = false;
495 if (Name == "block") {
496 push(Block);
497 ExpectBlockType = true;
498 } else if (Name == "loop") {
499 push(Loop);
500 ExpectBlockType = true;
501 } else if (Name == "try") {
502 push(Try);
503 ExpectBlockType = true;
504 } else if (Name == "if") {
505 push(If);
506 ExpectBlockType = true;
507 } else if (Name == "else") {
508 if (pop(Name, If))
509 return true;
510 push(Else);
511 } else if (Name == "catch") {
512 if (pop(Name, Try))
513 return true;
514 push(Try);
515 } else if (Name == "end_if") {
516 if (pop(Name, If, Else))
517 return true;
518 } else if (Name == "end_try") {
519 if (pop(Name, Try))
520 return true;
521 } else if (Name == "end_loop") {
522 if (pop(Name, Loop))
523 return true;
524 } else if (Name == "end_block") {
525 if (pop(Name, Block))
526 return true;
527 } else if (Name == "end_function") {
528 ensureLocals(getStreamer());
529 CurrentState = EndFunction;
530 if (pop(Name, Function) || ensureEmptyNestingStack())
531 return true;
532 } else if (Name == "call_indirect" || Name == "return_call_indirect") {
533 ExpectFuncType = true;
534 } else if (Name == "ref.null") {
535 ExpectHeapType = true;
536 }
537
538 if (ExpectFuncType || (ExpectBlockType && Lexer.is(AsmToken::LParen))) {
539 // This has a special TYPEINDEX operand which in text we
540 // represent as a signature, such that we can re-build this signature,
541 // attach it to an anonymous symbol, which is what WasmObjectWriter
542 // expects to be able to recreate the actual unique-ified type indices.
543 auto Loc = Parser.getTok();
544 auto Signature = std::make_unique<wasm::WasmSignature>();
545 if (parseSignature(Signature.get()))
546 return true;
547 // Got signature as block type, don't need more
548 ExpectBlockType = false;
549 auto &Ctx = getStreamer().getContext();
550 // The "true" here will cause this to be a nameless symbol.
551 MCSymbol *Sym = Ctx.createTempSymbol("typeindex", true);
552 auto *WasmSym = cast<MCSymbolWasm>(Sym);
553 WasmSym->setSignature(Signature.get());
554 addSignature(std::move(Signature));
555 WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
556 const MCExpr *Expr = MCSymbolRefExpr::create(
557 WasmSym, MCSymbolRefExpr::VK_WASM_TYPEINDEX, Ctx);
558 Operands.push_back(std::make_unique<WebAssemblyOperand>(
559 WebAssemblyOperand::Symbol, Loc.getLoc(), Loc.getEndLoc(),
560 WebAssemblyOperand::SymOp{Expr}));
561 }
562
563 while (Lexer.isNot(AsmToken::EndOfStatement)) {
564 auto &Tok = Lexer.getTok();
565 switch (Tok.getKind()) {
566 case AsmToken::Identifier: {
567 if (!parseSpecialFloatMaybe(false, Operands))
568 break;
569 auto &Id = Lexer.getTok();
570 if (ExpectBlockType) {
571 // Assume this identifier is a block_type.
572 auto BT = parseBlockType(Id.getString());
573 if (BT == WebAssembly::BlockType::Invalid)
574 return error("Unknown block type: ", Id);
575 addBlockTypeOperand(Operands, NameLoc, BT);
576 Parser.Lex();
577 } else if (ExpectHeapType) {
578 auto HeapType = parseHeapType(Id.getString());
579 if (HeapType == WebAssembly::HeapType::Invalid) {
580 return error("Expected a heap type: ", Id);
581 }
582 Operands.push_back(std::make_unique<WebAssemblyOperand>(
583 WebAssemblyOperand::Integer, Id.getLoc(), Id.getEndLoc(),
584 WebAssemblyOperand::IntOp{static_cast<int64_t>(HeapType)}));
585 Parser.Lex();
586 } else {
587 // Assume this identifier is a label.
588 const MCExpr *Val;
589 SMLoc End;
590 if (Parser.parseExpression(Val, End))
591 return error("Cannot parse symbol: ", Lexer.getTok());
592 Operands.push_back(std::make_unique<WebAssemblyOperand>(
593 WebAssemblyOperand::Symbol, Id.getLoc(), Id.getEndLoc(),
594 WebAssemblyOperand::SymOp{Val}));
595 if (checkForP2AlignIfLoadStore(Operands, Name))
596 return true;
597 }
598 break;
599 }
600 case AsmToken::Minus:
601 Parser.Lex();
602 if (Lexer.is(AsmToken::Integer)) {
603 parseSingleInteger(true, Operands);
604 if (checkForP2AlignIfLoadStore(Operands, Name))
605 return true;
606 } else if(Lexer.is(AsmToken::Real)) {
607 if (parseSingleFloat(true, Operands))
608 return true;
609 } else if (!parseSpecialFloatMaybe(true, Operands)) {
610 } else {
611 return error("Expected numeric constant instead got: ",
612 Lexer.getTok());
613 }
614 break;
615 case AsmToken::Integer:
616 parseSingleInteger(false, Operands);
617 if (checkForP2AlignIfLoadStore(Operands, Name))
618 return true;
619 break;
620 case AsmToken::Real: {
621 if (parseSingleFloat(false, Operands))
622 return true;
623 break;
624 }
625 case AsmToken::LCurly: {
626 Parser.Lex();
627 auto Op = std::make_unique<WebAssemblyOperand>(
628 WebAssemblyOperand::BrList, Tok.getLoc(), Tok.getEndLoc());
629 if (!Lexer.is(AsmToken::RCurly))
630 for (;;) {
631 Op->BrL.List.push_back(Lexer.getTok().getIntVal());
632 expect(AsmToken::Integer, "integer");
633 if (!isNext(AsmToken::Comma))
634 break;
635 }
636 expect(AsmToken::RCurly, "}");
637 Operands.push_back(std::move(Op));
638 break;
639 }
640 default:
641 return error("Unexpected token in operand: ", Tok);
642 }
643 if (Lexer.isNot(AsmToken::EndOfStatement)) {
644 if (expect(AsmToken::Comma, ","))
645 return true;
646 }
647 }
648 if (ExpectBlockType && Operands.size() == 1) {
649 // Support blocks with no operands as default to void.
650 addBlockTypeOperand(Operands, NameLoc, WebAssembly::BlockType::Void);
651 }
652 Parser.Lex();
653 return false;
654 }
655
onLabelParsed(MCSymbol * Symbol)656 void onLabelParsed(MCSymbol *Symbol) override {
657 LastLabel = Symbol;
658 CurrentState = Label;
659 }
660
parseSignature(wasm::WasmSignature * Signature)661 bool parseSignature(wasm::WasmSignature *Signature) {
662 if (expect(AsmToken::LParen, "("))
663 return true;
664 if (parseRegTypeList(Signature->Params))
665 return true;
666 if (expect(AsmToken::RParen, ")"))
667 return true;
668 if (expect(AsmToken::MinusGreater, "->"))
669 return true;
670 if (expect(AsmToken::LParen, "("))
671 return true;
672 if (parseRegTypeList(Signature->Returns))
673 return true;
674 if (expect(AsmToken::RParen, ")"))
675 return true;
676 return false;
677 }
678
CheckDataSection()679 bool CheckDataSection() {
680 if (CurrentState != DataSection) {
681 auto WS = cast<MCSectionWasm>(getStreamer().getCurrentSection().first);
682 if (WS && WS->getKind().isText())
683 return error("data directive must occur in a data segment: ",
684 Lexer.getTok());
685 }
686 CurrentState = DataSection;
687 return false;
688 }
689
690 // This function processes wasm-specific directives streamed to
691 // WebAssemblyTargetStreamer, all others go to the generic parser
692 // (see WasmAsmParser).
ParseDirective(AsmToken DirectiveID)693 bool ParseDirective(AsmToken DirectiveID) override {
694 // This function has a really weird return value behavior that is different
695 // from all the other parsing functions:
696 // - return true && no tokens consumed -> don't know this directive / let
697 // the generic parser handle it.
698 // - return true && tokens consumed -> a parsing error occurred.
699 // - return false -> processed this directive successfully.
700 assert(DirectiveID.getKind() == AsmToken::Identifier);
701 auto &Out = getStreamer();
702 auto &TOut =
703 reinterpret_cast<WebAssemblyTargetStreamer &>(*Out.getTargetStreamer());
704 auto &Ctx = Out.getContext();
705
706 // TODO: any time we return an error, at least one token must have been
707 // consumed, otherwise this will not signal an error to the caller.
708 if (DirectiveID.getString() == ".globaltype") {
709 auto SymName = expectIdent();
710 if (SymName.empty())
711 return true;
712 if (expect(AsmToken::Comma, ","))
713 return true;
714 auto TypeTok = Lexer.getTok();
715 auto TypeName = expectIdent();
716 if (TypeName.empty())
717 return true;
718 auto Type = parseType(TypeName);
719 if (!Type)
720 return error("Unknown type in .globaltype directive: ", TypeTok);
721 // Optional mutable modifier. Default to mutable for historical reasons.
722 // Ideally we would have gone with immutable as the default and used `mut`
723 // as the modifier to match the `.wat` format.
724 bool Mutable = true;
725 if (isNext(AsmToken::Comma)) {
726 TypeTok = Lexer.getTok();
727 auto Id = expectIdent();
728 if (Id == "immutable")
729 Mutable = false;
730 else
731 // Should we also allow `mutable` and `mut` here for clarity?
732 return error("Unknown type in .globaltype modifier: ", TypeTok);
733 }
734 // Now set this symbol with the correct type.
735 auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
736 WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
737 WasmSym->setGlobalType(
738 wasm::WasmGlobalType{uint8_t(Type.getValue()), Mutable});
739 // And emit the directive again.
740 TOut.emitGlobalType(WasmSym);
741 return expect(AsmToken::EndOfStatement, "EOL");
742 }
743
744 if (DirectiveID.getString() == ".tabletype") {
745 auto SymName = expectIdent();
746 if (SymName.empty())
747 return true;
748 if (expect(AsmToken::Comma, ","))
749 return true;
750 auto TypeTok = Lexer.getTok();
751 auto TypeName = expectIdent();
752 if (TypeName.empty())
753 return true;
754 auto Type = parseType(TypeName);
755 if (!Type)
756 return error("Unknown type in .tabletype directive: ", TypeTok);
757
758 // Now that we have the name and table type, we can actually create the
759 // symbol
760 auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
761 WasmSym->setType(wasm::WASM_SYMBOL_TYPE_TABLE);
762 WasmSym->setTableType(Type.getValue());
763 TOut.emitTableType(WasmSym);
764 return expect(AsmToken::EndOfStatement, "EOL");
765 }
766
767 if (DirectiveID.getString() == ".functype") {
768 // This code has to send things to the streamer similar to
769 // WebAssemblyAsmPrinter::EmitFunctionBodyStart.
770 // TODO: would be good to factor this into a common function, but the
771 // assembler and backend really don't share any common code, and this code
772 // parses the locals separately.
773 auto SymName = expectIdent();
774 if (SymName.empty())
775 return true;
776 auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
777 if (CurrentState == Label && WasmSym == LastLabel) {
778 // This .functype indicates a start of a function.
779 if (ensureEmptyNestingStack())
780 return true;
781 CurrentState = FunctionStart;
782 LastFunctionLabel = LastLabel;
783 push(Function);
784 }
785 auto Signature = std::make_unique<wasm::WasmSignature>();
786 if (parseSignature(Signature.get()))
787 return true;
788 WasmSym->setSignature(Signature.get());
789 addSignature(std::move(Signature));
790 WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
791 TOut.emitFunctionType(WasmSym);
792 // TODO: backend also calls TOut.emitIndIdx, but that is not implemented.
793 return expect(AsmToken::EndOfStatement, "EOL");
794 }
795
796 if (DirectiveID.getString() == ".export_name") {
797 auto SymName = expectIdent();
798 if (SymName.empty())
799 return true;
800 if (expect(AsmToken::Comma, ","))
801 return true;
802 auto ExportName = expectIdent();
803 auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
804 WasmSym->setExportName(storeName(ExportName));
805 TOut.emitExportName(WasmSym, ExportName);
806 }
807
808 if (DirectiveID.getString() == ".import_module") {
809 auto SymName = expectIdent();
810 if (SymName.empty())
811 return true;
812 if (expect(AsmToken::Comma, ","))
813 return true;
814 auto ImportModule = expectIdent();
815 auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
816 WasmSym->setImportModule(storeName(ImportModule));
817 TOut.emitImportModule(WasmSym, ImportModule);
818 }
819
820 if (DirectiveID.getString() == ".import_name") {
821 auto SymName = expectIdent();
822 if (SymName.empty())
823 return true;
824 if (expect(AsmToken::Comma, ","))
825 return true;
826 auto ImportName = expectIdent();
827 auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
828 WasmSym->setImportName(storeName(ImportName));
829 TOut.emitImportName(WasmSym, ImportName);
830 }
831
832 if (DirectiveID.getString() == ".eventtype") {
833 auto SymName = expectIdent();
834 if (SymName.empty())
835 return true;
836 auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
837 auto Signature = std::make_unique<wasm::WasmSignature>();
838 if (parseRegTypeList(Signature->Params))
839 return true;
840 WasmSym->setSignature(Signature.get());
841 addSignature(std::move(Signature));
842 WasmSym->setType(wasm::WASM_SYMBOL_TYPE_EVENT);
843 TOut.emitEventType(WasmSym);
844 // TODO: backend also calls TOut.emitIndIdx, but that is not implemented.
845 return expect(AsmToken::EndOfStatement, "EOL");
846 }
847
848 if (DirectiveID.getString() == ".local") {
849 if (CurrentState != FunctionStart)
850 return error(".local directive should follow the start of a function",
851 Lexer.getTok());
852 SmallVector<wasm::ValType, 4> Locals;
853 if (parseRegTypeList(Locals))
854 return true;
855 TOut.emitLocal(Locals);
856 CurrentState = FunctionLocals;
857 return expect(AsmToken::EndOfStatement, "EOL");
858 }
859
860 if (DirectiveID.getString() == ".int8" ||
861 DirectiveID.getString() == ".int16" ||
862 DirectiveID.getString() == ".int32" ||
863 DirectiveID.getString() == ".int64") {
864 if (CheckDataSection()) return true;
865 const MCExpr *Val;
866 SMLoc End;
867 if (Parser.parseExpression(Val, End))
868 return error("Cannot parse .int expression: ", Lexer.getTok());
869 size_t NumBits = 0;
870 DirectiveID.getString().drop_front(4).getAsInteger(10, NumBits);
871 Out.emitValue(Val, NumBits / 8, End);
872 return expect(AsmToken::EndOfStatement, "EOL");
873 }
874
875 if (DirectiveID.getString() == ".asciz") {
876 if (CheckDataSection()) return true;
877 std::string S;
878 if (Parser.parseEscapedString(S))
879 return error("Cannot parse string constant: ", Lexer.getTok());
880 Out.emitBytes(StringRef(S.c_str(), S.length() + 1));
881 return expect(AsmToken::EndOfStatement, "EOL");
882 }
883
884 return true; // We didn't process this directive.
885 }
886
887 // Called either when the first instruction is parsed of the function ends.
ensureLocals(MCStreamer & Out)888 void ensureLocals(MCStreamer &Out) {
889 if (CurrentState == FunctionStart) {
890 // We haven't seen a .local directive yet. The streamer requires locals to
891 // be encoded as a prelude to the instructions, so emit an empty list of
892 // locals here.
893 auto &TOut = reinterpret_cast<WebAssemblyTargetStreamer &>(
894 *Out.getTargetStreamer());
895 TOut.emitLocal(SmallVector<wasm::ValType, 0>());
896 CurrentState = FunctionLocals;
897 }
898 }
899
MatchAndEmitInstruction(SMLoc IDLoc,unsigned &,OperandVector & Operands,MCStreamer & Out,uint64_t & ErrorInfo,bool MatchingInlineAsm)900 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned & /*Opcode*/,
901 OperandVector &Operands, MCStreamer &Out,
902 uint64_t &ErrorInfo,
903 bool MatchingInlineAsm) override {
904 MCInst Inst;
905 Inst.setLoc(IDLoc);
906 FeatureBitset MissingFeatures;
907 unsigned MatchResult = MatchInstructionImpl(
908 Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm);
909 switch (MatchResult) {
910 case Match_Success: {
911 ensureLocals(Out);
912 // Fix unknown p2align operands.
913 auto Align = WebAssembly::GetDefaultP2AlignAny(Inst.getOpcode());
914 if (Align != -1U) {
915 auto &Op0 = Inst.getOperand(0);
916 if (Op0.getImm() == -1)
917 Op0.setImm(Align);
918 }
919 if (getSTI().getTargetTriple().isArch64Bit()) {
920 // Upgrade 32-bit loads/stores to 64-bit. These mostly differ by having
921 // an offset64 arg instead of offset32, but to the assembler matcher
922 // they're both immediates so don't get selected for.
923 auto Opc64 = WebAssembly::getWasm64Opcode(
924 static_cast<uint16_t>(Inst.getOpcode()));
925 if (Opc64 >= 0) {
926 Inst.setOpcode(Opc64);
927 }
928 }
929 Out.emitInstruction(Inst, getSTI());
930 if (CurrentState == EndFunction) {
931 onEndOfFunction();
932 } else {
933 CurrentState = Instructions;
934 }
935 return false;
936 }
937 case Match_MissingFeature: {
938 assert(MissingFeatures.count() > 0 && "Expected missing features");
939 SmallString<128> Message;
940 raw_svector_ostream OS(Message);
941 OS << "instruction requires:";
942 for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i)
943 if (MissingFeatures.test(i))
944 OS << ' ' << getSubtargetFeatureName(i);
945 return Parser.Error(IDLoc, Message);
946 }
947 case Match_MnemonicFail:
948 return Parser.Error(IDLoc, "invalid instruction");
949 case Match_NearMisses:
950 return Parser.Error(IDLoc, "ambiguous instruction");
951 case Match_InvalidTiedOperand:
952 case Match_InvalidOperand: {
953 SMLoc ErrorLoc = IDLoc;
954 if (ErrorInfo != ~0ULL) {
955 if (ErrorInfo >= Operands.size())
956 return Parser.Error(IDLoc, "too few operands for instruction");
957 ErrorLoc = Operands[ErrorInfo]->getStartLoc();
958 if (ErrorLoc == SMLoc())
959 ErrorLoc = IDLoc;
960 }
961 return Parser.Error(ErrorLoc, "invalid operand for instruction");
962 }
963 }
964 llvm_unreachable("Implement any new match types added!");
965 }
966
doBeforeLabelEmit(MCSymbol * Symbol)967 void doBeforeLabelEmit(MCSymbol *Symbol) override {
968 // Start a new section for the next function automatically, since our
969 // object writer expects each function to have its own section. This way
970 // The user can't forget this "convention".
971 auto SymName = Symbol->getName();
972 if (SymName.startswith(".L"))
973 return; // Local Symbol.
974 // Only create a new text section if we're already in one.
975 auto CWS = cast<MCSectionWasm>(getStreamer().getCurrentSection().first);
976 if (!CWS || !CWS->getKind().isText())
977 return;
978 auto SecName = ".text." + SymName;
979 auto WS = getContext().getWasmSection(SecName, SectionKind::getText());
980 getStreamer().SwitchSection(WS);
981 // Also generate DWARF for this section if requested.
982 if (getContext().getGenDwarfForAssembly())
983 getContext().addGenDwarfSection(WS);
984 }
985
onEndOfFunction()986 void onEndOfFunction() {
987 // Automatically output a .size directive, so it becomes optional for the
988 // user.
989 if (!LastFunctionLabel) return;
990 auto TempSym = getContext().createLinkerPrivateTempSymbol();
991 getStreamer().emitLabel(TempSym);
992 auto Start = MCSymbolRefExpr::create(LastFunctionLabel, getContext());
993 auto End = MCSymbolRefExpr::create(TempSym, getContext());
994 auto Expr =
995 MCBinaryExpr::create(MCBinaryExpr::Sub, End, Start, getContext());
996 getStreamer().emitELFSize(LastFunctionLabel, Expr);
997 }
998
onEndOfFile()999 void onEndOfFile() override { ensureEmptyNestingStack(); }
1000 };
1001 } // end anonymous namespace
1002
1003 // Force static initialization.
LLVMInitializeWebAssemblyAsmParser()1004 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeWebAssemblyAsmParser() {
1005 RegisterMCAsmParser<WebAssemblyAsmParser> X(getTheWebAssemblyTarget32());
1006 RegisterMCAsmParser<WebAssemblyAsmParser> Y(getTheWebAssemblyTarget64());
1007 }
1008
1009 #define GET_REGISTER_MATCHER
1010 #define GET_SUBTARGET_FEATURE_NAME
1011 #define GET_MATCHER_IMPLEMENTATION
1012 #include "WebAssemblyGenAsmMatcher.inc"
1013