• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===- AsmParser.cpp - Parser for Assembly Files --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This class implements the parser for assembly files.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/ADT/APFloat.h"
14 #include "llvm/ADT/APInt.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/None.h"
17 #include "llvm/ADT/Optional.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/StringExtras.h"
22 #include "llvm/ADT/StringMap.h"
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/ADT/StringSwitch.h"
25 #include "llvm/ADT/Twine.h"
26 #include "llvm/BinaryFormat/Dwarf.h"
27 #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
28 #include "llvm/MC/MCAsmInfo.h"
29 #include "llvm/MC/MCCodeView.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCDirectives.h"
32 #include "llvm/MC/MCDwarf.h"
33 #include "llvm/MC/MCExpr.h"
34 #include "llvm/MC/MCInstPrinter.h"
35 #include "llvm/MC/MCInstrDesc.h"
36 #include "llvm/MC/MCInstrInfo.h"
37 #include "llvm/MC/MCObjectFileInfo.h"
38 #include "llvm/MC/MCParser/AsmCond.h"
39 #include "llvm/MC/MCParser/AsmLexer.h"
40 #include "llvm/MC/MCParser/MCAsmLexer.h"
41 #include "llvm/MC/MCParser/MCAsmParser.h"
42 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
43 #include "llvm/MC/MCParser/MCAsmParserUtils.h"
44 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
45 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
46 #include "llvm/MC/MCRegisterInfo.h"
47 #include "llvm/MC/MCSection.h"
48 #include "llvm/MC/MCStreamer.h"
49 #include "llvm/MC/MCSymbol.h"
50 #include "llvm/MC/MCTargetOptions.h"
51 #include "llvm/MC/MCValue.h"
52 #include "llvm/Support/Casting.h"
53 #include "llvm/Support/CommandLine.h"
54 #include "llvm/Support/ErrorHandling.h"
55 #include "llvm/Support/Format.h"
56 #include "llvm/Support/MD5.h"
57 #include "llvm/Support/MathExtras.h"
58 #include "llvm/Support/MemoryBuffer.h"
59 #include "llvm/Support/SMLoc.h"
60 #include "llvm/Support/SourceMgr.h"
61 #include "llvm/Support/raw_ostream.h"
62 #include <algorithm>
63 #include <cassert>
64 #include <cctype>
65 #include <climits>
66 #include <cstddef>
67 #include <cstdint>
68 #include <deque>
69 #include <memory>
70 #include <sstream>
71 #include <string>
72 #include <tuple>
73 #include <utility>
74 #include <vector>
75 
76 using namespace llvm;
77 
78 extern cl::opt<unsigned> AsmMacroMaxNestingDepth;
79 
80 namespace {
81 
82 /// Helper types for tracking macro definitions.
83 typedef std::vector<AsmToken> MCAsmMacroArgument;
84 typedef std::vector<MCAsmMacroArgument> MCAsmMacroArguments;
85 
86 /// Helper class for storing information about an active macro instantiation.
87 struct MacroInstantiation {
88   /// The location of the instantiation.
89   SMLoc InstantiationLoc;
90 
91   /// The buffer where parsing should resume upon instantiation completion.
92   unsigned ExitBuffer;
93 
94   /// The location where parsing should resume upon instantiation completion.
95   SMLoc ExitLoc;
96 
97   /// The depth of TheCondStack at the start of the instantiation.
98   size_t CondStackDepth;
99 };
100 
101 struct ParseStatementInfo {
102   /// The parsed operands from the last parsed statement.
103   SmallVector<std::unique_ptr<MCParsedAsmOperand>, 8> ParsedOperands;
104 
105   /// The opcode from the last parsed instruction.
106   unsigned Opcode = ~0U;
107 
108   /// Was there an error parsing the inline assembly?
109   bool ParseError = false;
110 
111   /// The value associated with a macro exit.
112   Optional<std::string> ExitValue;
113 
114   SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr;
115 
116   ParseStatementInfo() = delete;
ParseStatementInfo__anond7a80e8d0111::ParseStatementInfo117   ParseStatementInfo(SmallVectorImpl<AsmRewrite> *rewrites)
118       : AsmRewrites(rewrites) {}
119 };
120 
121 enum FieldType {
122   FT_INTEGRAL, // Initializer: integer expression, stored as an MCExpr.
123   FT_REAL,     // Initializer: real number, stored as an APInt.
124   FT_STRUCT    // Initializer: struct initializer, stored recursively.
125 };
126 
127 struct FieldInfo;
128 struct StructInfo {
129   StringRef Name;
130   bool IsUnion = false;
131   unsigned Alignment = 0;
132   unsigned Size = 0;
133   unsigned AlignmentSize = 0;
134   std::vector<FieldInfo> Fields;
135   StringMap<size_t> FieldsByName;
136 
137   FieldInfo &addField(StringRef FieldName, FieldType FT,
138                       unsigned FieldAlignmentSize);
139 
140   StructInfo() = default;
141 
StructInfo__anond7a80e8d0111::StructInfo142   StructInfo(StringRef StructName, bool Union, unsigned AlignmentValue)
143       : Name(StructName), IsUnion(Union), Alignment(AlignmentValue) {}
144 };
145 
146 // FIXME: This should probably use a class hierarchy, raw pointers between the
147 // objects, and dynamic type resolution instead of a union. On the other hand,
148 // ownership then becomes much more complicated; the obvious thing would be to
149 // use BumpPtrAllocator, but the lack of a destructor makes that messy.
150 
151 struct StructInitializer;
152 struct IntFieldInfo {
153   SmallVector<const MCExpr *, 1> Values;
154 
155   IntFieldInfo() = default;
IntFieldInfo__anond7a80e8d0111::IntFieldInfo156   IntFieldInfo(const SmallVector<const MCExpr *, 1> &V) { Values = V; }
IntFieldInfo__anond7a80e8d0111::IntFieldInfo157   IntFieldInfo(SmallVector<const MCExpr *, 1> &&V) { Values = V; }
158 };
159 struct RealFieldInfo {
160   SmallVector<APInt, 1> AsIntValues;
161 
162   RealFieldInfo() = default;
RealFieldInfo__anond7a80e8d0111::RealFieldInfo163   RealFieldInfo(const SmallVector<APInt, 1> &V) { AsIntValues = V; }
RealFieldInfo__anond7a80e8d0111::RealFieldInfo164   RealFieldInfo(SmallVector<APInt, 1> &&V) { AsIntValues = V; }
165 };
166 struct StructFieldInfo {
167   std::vector<StructInitializer> Initializers;
168   StructInfo Structure;
169 
170   StructFieldInfo() = default;
StructFieldInfo__anond7a80e8d0111::StructFieldInfo171   StructFieldInfo(const std::vector<StructInitializer> &V, StructInfo S) {
172     Initializers = V;
173     Structure = S;
174   }
StructFieldInfo__anond7a80e8d0111::StructFieldInfo175   StructFieldInfo(std::vector<StructInitializer> &&V, StructInfo S) {
176     Initializers = V;
177     Structure = S;
178   }
179 };
180 
181 class FieldInitializer {
182 public:
183   FieldType FT;
184   union {
185     IntFieldInfo IntInfo;
186     RealFieldInfo RealInfo;
187     StructFieldInfo StructInfo;
188   };
189 
~FieldInitializer()190   ~FieldInitializer() {
191     switch (FT) {
192     case FT_INTEGRAL:
193       IntInfo.~IntFieldInfo();
194       break;
195     case FT_REAL:
196       RealInfo.~RealFieldInfo();
197       break;
198     case FT_STRUCT:
199       StructInfo.~StructFieldInfo();
200       break;
201     }
202   }
203 
FieldInitializer(FieldType FT)204   FieldInitializer(FieldType FT) : FT(FT) {
205     switch (FT) {
206     case FT_INTEGRAL:
207       new (&IntInfo) IntFieldInfo();
208       break;
209     case FT_REAL:
210       new (&RealInfo) RealFieldInfo();
211       break;
212     case FT_STRUCT:
213       new (&StructInfo) StructFieldInfo();
214       break;
215     }
216   }
217 
FieldInitializer(SmallVector<const MCExpr *,1> && Values)218   FieldInitializer(SmallVector<const MCExpr *, 1> &&Values) : FT(FT_INTEGRAL) {
219     new (&IntInfo) IntFieldInfo(Values);
220   }
221 
FieldInitializer(SmallVector<APInt,1> && AsIntValues)222   FieldInitializer(SmallVector<APInt, 1> &&AsIntValues) : FT(FT_REAL) {
223     new (&RealInfo) RealFieldInfo(AsIntValues);
224   }
225 
FieldInitializer(std::vector<StructInitializer> && Initializers,struct StructInfo Structure)226   FieldInitializer(std::vector<StructInitializer> &&Initializers,
227                    struct StructInfo Structure)
228       : FT(FT_STRUCT) {
229     new (&StructInfo) StructFieldInfo(Initializers, Structure);
230   }
231 
FieldInitializer(const FieldInitializer & Initializer)232   FieldInitializer(const FieldInitializer &Initializer) : FT(Initializer.FT) {
233     switch (FT) {
234     case FT_INTEGRAL:
235       new (&IntInfo) IntFieldInfo(Initializer.IntInfo);
236       break;
237     case FT_REAL:
238       new (&RealInfo) RealFieldInfo(Initializer.RealInfo);
239       break;
240     case FT_STRUCT:
241       new (&StructInfo) StructFieldInfo(Initializer.StructInfo);
242       break;
243     }
244   }
245 
FieldInitializer(FieldInitializer && Initializer)246   FieldInitializer(FieldInitializer &&Initializer) : FT(Initializer.FT) {
247     switch (FT) {
248     case FT_INTEGRAL:
249       new (&IntInfo) IntFieldInfo(Initializer.IntInfo);
250       break;
251     case FT_REAL:
252       new (&RealInfo) RealFieldInfo(Initializer.RealInfo);
253       break;
254     case FT_STRUCT:
255       new (&StructInfo) StructFieldInfo(Initializer.StructInfo);
256       break;
257     }
258   }
259 
operator =(const FieldInitializer & Initializer)260   FieldInitializer &operator=(const FieldInitializer &Initializer) {
261     if (FT != Initializer.FT) {
262       switch (FT) {
263       case FT_INTEGRAL:
264         IntInfo.~IntFieldInfo();
265         break;
266       case FT_REAL:
267         RealInfo.~RealFieldInfo();
268         break;
269       case FT_STRUCT:
270         StructInfo.~StructFieldInfo();
271         break;
272       }
273     }
274     FT = Initializer.FT;
275     switch (FT) {
276     case FT_INTEGRAL:
277       IntInfo = Initializer.IntInfo;
278       break;
279     case FT_REAL:
280       RealInfo = Initializer.RealInfo;
281       break;
282     case FT_STRUCT:
283       StructInfo = Initializer.StructInfo;
284       break;
285     }
286     return *this;
287   }
288 
operator =(FieldInitializer && Initializer)289   FieldInitializer &operator=(FieldInitializer &&Initializer) {
290     if (FT != Initializer.FT) {
291       switch (FT) {
292       case FT_INTEGRAL:
293         IntInfo.~IntFieldInfo();
294         break;
295       case FT_REAL:
296         RealInfo.~RealFieldInfo();
297         break;
298       case FT_STRUCT:
299         StructInfo.~StructFieldInfo();
300         break;
301       }
302     }
303     FT = Initializer.FT;
304     switch (FT) {
305     case FT_INTEGRAL:
306       IntInfo = Initializer.IntInfo;
307       break;
308     case FT_REAL:
309       RealInfo = Initializer.RealInfo;
310       break;
311     case FT_STRUCT:
312       StructInfo = Initializer.StructInfo;
313       break;
314     }
315     return *this;
316   }
317 };
318 
319 struct StructInitializer {
320   std::vector<FieldInitializer> FieldInitializers;
321 };
322 
323 struct FieldInfo {
324   // Offset of the field within the containing STRUCT.
325   size_t Offset = 0;
326 
327   // Total size of the field (= LengthOf * Type).
328   unsigned SizeOf = 0;
329 
330   // Number of elements in the field (1 if scalar, >1 if an array).
331   unsigned LengthOf = 0;
332 
333   // Size of a single entry in this field, in bytes ("type" in MASM standards).
334   unsigned Type = 0;
335 
336   FieldInitializer Contents;
337 
FieldInfo__anond7a80e8d0111::FieldInfo338   FieldInfo(FieldType FT) : Contents(FT) {}
339 };
340 
addField(StringRef FieldName,FieldType FT,unsigned FieldAlignmentSize)341 FieldInfo &StructInfo::addField(StringRef FieldName, FieldType FT,
342                                 unsigned FieldAlignmentSize) {
343   if (!FieldName.empty())
344     FieldsByName[FieldName.lower()] = Fields.size();
345   Fields.emplace_back(FT);
346   FieldInfo &Field = Fields.back();
347   if (IsUnion) {
348     Field.Offset = 0;
349   } else {
350     Size = llvm::alignTo(Size, std::min(Alignment, FieldAlignmentSize));
351     Field.Offset = Size;
352   }
353   AlignmentSize = std::max(AlignmentSize, FieldAlignmentSize);
354   return Field;
355 }
356 
357 /// The concrete assembly parser instance.
358 // Note that this is a full MCAsmParser, not an MCAsmParserExtension!
359 // It's a peer of AsmParser, not of COFFAsmParser, WasmAsmParser, etc.
360 class MasmParser : public MCAsmParser {
361 private:
362   AsmLexer Lexer;
363   MCContext &Ctx;
364   MCStreamer &Out;
365   const MCAsmInfo &MAI;
366   SourceMgr &SrcMgr;
367   SourceMgr::DiagHandlerTy SavedDiagHandler;
368   void *SavedDiagContext;
369   std::unique_ptr<MCAsmParserExtension> PlatformParser;
370 
371   /// This is the current buffer index we're lexing from as managed by the
372   /// SourceMgr object.
373   unsigned CurBuffer;
374   std::vector<bool> EndStatementAtEOFStack;
375 
376   AsmCond TheCondState;
377   std::vector<AsmCond> TheCondStack;
378 
379   /// maps directive names to handler methods in parser
380   /// extensions. Extensions register themselves in this map by calling
381   /// addDirectiveHandler.
382   StringMap<ExtensionDirectiveHandler> ExtensionDirectiveMap;
383 
384   /// maps assembly-time variable names to variables.
385   struct Variable {
386     StringRef Name;
387     bool Redefinable = true;
388     bool IsText = false;
389     int64_t NumericValue = 0;
390     std::string TextValue;
391   };
392   StringMap<Variable> Variables;
393 
394   /// Stack of active struct definitions.
395   SmallVector<StructInfo, 1> StructInProgress;
396 
397   /// Maps struct tags to struct definitions.
398   StringMap<StructInfo> Structs;
399 
400   /// Maps data location names to types.
401   StringMap<AsmTypeInfo> KnownType;
402 
403   /// Stack of active macro instantiations.
404   std::vector<MacroInstantiation*> ActiveMacros;
405 
406   /// List of bodies of anonymous macros.
407   std::deque<MCAsmMacro> MacroLikeBodies;
408 
409   /// Keeps track of how many .macro's have been instantiated.
410   unsigned NumOfMacroInstantiations;
411 
412   /// The values from the last parsed cpp hash file line comment if any.
413   struct CppHashInfoTy {
414     StringRef Filename;
415     int64_t LineNumber;
416     SMLoc Loc;
417     unsigned Buf;
CppHashInfoTy__anond7a80e8d0111::MasmParser::CppHashInfoTy418     CppHashInfoTy() : Filename(), LineNumber(0), Loc(), Buf(0) {}
419   };
420   CppHashInfoTy CppHashInfo;
421 
422   /// The filename from the first cpp hash file line comment, if any.
423   StringRef FirstCppHashFilename;
424 
425   /// List of forward directional labels for diagnosis at the end.
426   SmallVector<std::tuple<SMLoc, CppHashInfoTy, MCSymbol *>, 4> DirLabels;
427 
428   /// AssemblerDialect. ~OU means unset value and use value provided by MAI.
429   /// Defaults to 1U, meaning Intel.
430   unsigned AssemblerDialect = 1U;
431 
432   /// is Darwin compatibility enabled?
433   bool IsDarwin = false;
434 
435   /// Are we parsing ms-style inline assembly?
436   bool ParsingMSInlineAsm = false;
437 
438   /// Did we already inform the user about inconsistent MD5 usage?
439   bool ReportedInconsistentMD5 = false;
440 
441   // Current <...> expression depth.
442   unsigned AngleBracketDepth = 0U;
443 
444   // Number of locals defined.
445   uint16_t LocalCounter = 0;
446 
447 public:
448   MasmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
449              const MCAsmInfo &MAI, unsigned CB);
450   MasmParser(const MasmParser &) = delete;
451   MasmParser &operator=(const MasmParser &) = delete;
452   ~MasmParser() override;
453 
454   bool Run(bool NoInitialTextSection, bool NoFinalize = false) override;
455 
addDirectiveHandler(StringRef Directive,ExtensionDirectiveHandler Handler)456   void addDirectiveHandler(StringRef Directive,
457                            ExtensionDirectiveHandler Handler) override {
458     ExtensionDirectiveMap[Directive] = Handler;
459     if (DirectiveKindMap.find(Directive) == DirectiveKindMap.end()) {
460       DirectiveKindMap[Directive] = DK_HANDLER_DIRECTIVE;
461     }
462   }
463 
addAliasForDirective(StringRef Directive,StringRef Alias)464   void addAliasForDirective(StringRef Directive, StringRef Alias) override {
465     DirectiveKindMap[Directive] = DirectiveKindMap[Alias];
466   }
467 
468   /// @name MCAsmParser Interface
469   /// {
470 
getSourceManager()471   SourceMgr &getSourceManager() override { return SrcMgr; }
getLexer()472   MCAsmLexer &getLexer() override { return Lexer; }
getContext()473   MCContext &getContext() override { return Ctx; }
getStreamer()474   MCStreamer &getStreamer() override { return Out; }
475 
getCVContext()476   CodeViewContext &getCVContext() { return Ctx.getCVContext(); }
477 
getAssemblerDialect()478   unsigned getAssemblerDialect() override {
479     if (AssemblerDialect == ~0U)
480       return MAI.getAssemblerDialect();
481     else
482       return AssemblerDialect;
483   }
setAssemblerDialect(unsigned i)484   void setAssemblerDialect(unsigned i) override {
485     AssemblerDialect = i;
486   }
487 
488   void Note(SMLoc L, const Twine &Msg, SMRange Range = None) override;
489   bool Warning(SMLoc L, const Twine &Msg, SMRange Range = None) override;
490   bool printError(SMLoc L, const Twine &Msg, SMRange Range = None) override;
491 
492   const AsmToken &Lex() override;
493 
setParsingMSInlineAsm(bool V)494   void setParsingMSInlineAsm(bool V) override {
495     ParsingMSInlineAsm = V;
496     // When parsing MS inline asm, we must lex 0b1101 and 0ABCH as binary and
497     // hex integer literals.
498     Lexer.setLexMasmIntegers(V);
499   }
isParsingMSInlineAsm()500   bool isParsingMSInlineAsm() override { return ParsingMSInlineAsm; }
501 
isParsingMasm() const502   bool isParsingMasm() const override { return true; }
503 
504   bool defineMacro(StringRef Name, StringRef Value) override;
505 
506   bool lookUpField(StringRef Name, AsmFieldInfo &Info) const override;
507   bool lookUpField(StringRef Base, StringRef Member,
508                    AsmFieldInfo &Info) const override;
509 
510   bool lookUpType(StringRef Name, AsmTypeInfo &Info) const override;
511 
512   bool parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
513                         unsigned &NumOutputs, unsigned &NumInputs,
514                         SmallVectorImpl<std::pair<void *,bool>> &OpDecls,
515                         SmallVectorImpl<std::string> &Constraints,
516                         SmallVectorImpl<std::string> &Clobbers,
517                         const MCInstrInfo *MII, const MCInstPrinter *IP,
518                         MCAsmParserSemaCallback &SI) override;
519 
520   bool parseExpression(const MCExpr *&Res);
521   bool parseExpression(const MCExpr *&Res, SMLoc &EndLoc) override;
522   bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
523                         AsmTypeInfo *TypeInfo) override;
524   bool parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) override;
525   bool parseParenExprOfDepth(unsigned ParenDepth, const MCExpr *&Res,
526                              SMLoc &EndLoc) override;
527   bool parseAbsoluteExpression(int64_t &Res) override;
528 
529   /// Parse a floating point expression using the float \p Semantics
530   /// and set \p Res to the value.
531   bool parseRealValue(const fltSemantics &Semantics, APInt &Res);
532 
533   /// Parse an identifier or string (as a quoted identifier)
534   /// and set \p Res to the identifier contents.
535   bool parseIdentifier(StringRef &Res) override;
536   void eatToEndOfStatement() override;
537 
538   bool checkForValidSection() override;
539 
540   /// }
541 
542 private:
543   bool parseStatement(ParseStatementInfo &Info,
544                       MCAsmParserSemaCallback *SI);
545   bool parseCurlyBlockScope(SmallVectorImpl<AsmRewrite>& AsmStrRewrites);
546   bool parseCppHashLineFilenameComment(SMLoc L);
547 
548   bool expandMacro(raw_svector_ostream &OS, StringRef Body,
549                    ArrayRef<MCAsmMacroParameter> Parameters,
550                    ArrayRef<MCAsmMacroArgument> A,
551                    const std::vector<std::string> &Locals, SMLoc L);
552 
553   /// Are we inside a macro instantiation?
isInsideMacroInstantiation()554   bool isInsideMacroInstantiation() {return !ActiveMacros.empty();}
555 
556   /// Handle entry to macro instantiation.
557   ///
558   /// \param M The macro.
559   /// \param NameLoc Instantiation location.
560   bool handleMacroEntry(
561       const MCAsmMacro *M, SMLoc NameLoc,
562       AsmToken::TokenKind ArgumentEndTok = AsmToken::EndOfStatement);
563 
564   /// Handle invocation of macro function.
565   ///
566   /// \param M The macro.
567   /// \param NameLoc Invocation location.
568   bool handleMacroInvocation(const MCAsmMacro *M, SMLoc NameLoc);
569 
570   /// Handle exit from macro instantiation.
571   void handleMacroExit();
572 
573   /// Extract AsmTokens for a macro argument.
574   bool
575   parseMacroArgument(const MCAsmMacroParameter *MP, MCAsmMacroArgument &MA,
576                      AsmToken::TokenKind EndTok = AsmToken::EndOfStatement);
577 
578   /// Parse all macro arguments for a given macro.
579   bool
580   parseMacroArguments(const MCAsmMacro *M, MCAsmMacroArguments &A,
581                       AsmToken::TokenKind EndTok = AsmToken::EndOfStatement);
582 
583   void printMacroInstantiations();
584 
585   bool expandStatement(SMLoc Loc);
586 
printMessage(SMLoc Loc,SourceMgr::DiagKind Kind,const Twine & Msg,SMRange Range=None) const587   void printMessage(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Msg,
588                     SMRange Range = None) const {
589     ArrayRef<SMRange> Ranges(Range);
590     SrcMgr.PrintMessage(Loc, Kind, Msg, Ranges);
591   }
592   static void DiagHandler(const SMDiagnostic &Diag, void *Context);
593 
594   bool lookUpField(const StructInfo &Structure, StringRef Member,
595                    AsmFieldInfo &Info) const;
596 
597   /// Should we emit DWARF describing this assembler source?  (Returns false if
598   /// the source has .file directives, which means we don't want to generate
599   /// info describing the assembler source itself.)
600   bool enabledGenDwarfForAssembly();
601 
602   /// Enter the specified file. This returns true on failure.
603   bool enterIncludeFile(const std::string &Filename);
604 
605   /// Reset the current lexer position to that given by \p Loc. The
606   /// current token is not set; clients should ensure Lex() is called
607   /// subsequently.
608   ///
609   /// \param InBuffer If not 0, should be the known buffer id that contains the
610   /// location.
611   void jumpToLoc(SMLoc Loc, unsigned InBuffer = 0,
612                  bool EndStatementAtEOF = true);
613 
614   /// Parse up to a token of kind \p EndTok and return the contents from the
615   /// current token up to (but not including) this token; the current token on
616   /// exit will be either this kind or EOF. Reads through instantiated macro
617   /// functions and text macros.
618   SmallVector<StringRef, 1> parseStringRefsTo(AsmToken::TokenKind EndTok);
619   std::string parseStringTo(AsmToken::TokenKind EndTok);
620 
621   /// Parse up to the end of statement and return the contents from the current
622   /// token until the end of the statement; the current token on exit will be
623   /// either the EndOfStatement or EOF.
624   StringRef parseStringToEndOfStatement() override;
625 
626   bool parseTextItem(std::string &Data);
627 
628   unsigned getBinOpPrecedence(AsmToken::TokenKind K,
629                               MCBinaryExpr::Opcode &Kind);
630 
631   bool parseBinOpRHS(unsigned Precedence, const MCExpr *&Res, SMLoc &EndLoc);
632   bool parseParenExpr(const MCExpr *&Res, SMLoc &EndLoc);
633   bool parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc);
634 
635   bool parseRegisterOrRegisterNumber(int64_t &Register, SMLoc DirectiveLoc);
636 
637   bool parseCVFunctionId(int64_t &FunctionId, StringRef DirectiveName);
638   bool parseCVFileId(int64_t &FileId, StringRef DirectiveName);
639 
640   // Generic (target and platform independent) directive parsing.
641   enum DirectiveKind {
642     DK_NO_DIRECTIVE, // Placeholder
643     DK_HANDLER_DIRECTIVE,
644     DK_ASSIGN,
645     DK_EQU,
646     DK_TEXTEQU,
647     DK_ASCII,
648     DK_ASCIZ,
649     DK_STRING,
650     DK_BYTE,
651     DK_SBYTE,
652     DK_WORD,
653     DK_SWORD,
654     DK_DWORD,
655     DK_SDWORD,
656     DK_FWORD,
657     DK_QWORD,
658     DK_SQWORD,
659     DK_DB,
660     DK_DD,
661     DK_DF,
662     DK_DQ,
663     DK_DW,
664     DK_REAL4,
665     DK_REAL8,
666     DK_REAL10,
667     DK_ALIGN,
668     DK_ORG,
669     DK_ENDR,
670     DK_EXTERN,
671     DK_PUBLIC,
672     DK_COMM,
673     DK_COMMENT,
674     DK_INCLUDE,
675     DK_REPEAT,
676     DK_WHILE,
677     DK_FOR,
678     DK_FORC,
679     DK_IF,
680     DK_IFE,
681     DK_IFB,
682     DK_IFNB,
683     DK_IFDEF,
684     DK_IFNDEF,
685     DK_IFDIF,
686     DK_IFDIFI,
687     DK_IFIDN,
688     DK_IFIDNI,
689     DK_ELSEIF,
690     DK_ELSEIFE,
691     DK_ELSEIFB,
692     DK_ELSEIFNB,
693     DK_ELSEIFDEF,
694     DK_ELSEIFNDEF,
695     DK_ELSEIFDIF,
696     DK_ELSEIFDIFI,
697     DK_ELSEIFIDN,
698     DK_ELSEIFIDNI,
699     DK_ELSE,
700     DK_ENDIF,
701     DK_FILE,
702     DK_LINE,
703     DK_LOC,
704     DK_STABS,
705     DK_CV_FILE,
706     DK_CV_FUNC_ID,
707     DK_CV_INLINE_SITE_ID,
708     DK_CV_LOC,
709     DK_CV_LINETABLE,
710     DK_CV_INLINE_LINETABLE,
711     DK_CV_DEF_RANGE,
712     DK_CV_STRINGTABLE,
713     DK_CV_STRING,
714     DK_CV_FILECHECKSUMS,
715     DK_CV_FILECHECKSUM_OFFSET,
716     DK_CV_FPO_DATA,
717     DK_CFI_SECTIONS,
718     DK_CFI_STARTPROC,
719     DK_CFI_ENDPROC,
720     DK_CFI_DEF_CFA,
721     DK_CFI_DEF_CFA_OFFSET,
722     DK_CFI_ADJUST_CFA_OFFSET,
723     DK_CFI_DEF_CFA_REGISTER,
724     DK_CFI_OFFSET,
725     DK_CFI_REL_OFFSET,
726     DK_CFI_PERSONALITY,
727     DK_CFI_LSDA,
728     DK_CFI_REMEMBER_STATE,
729     DK_CFI_RESTORE_STATE,
730     DK_CFI_SAME_VALUE,
731     DK_CFI_RESTORE,
732     DK_CFI_ESCAPE,
733     DK_CFI_RETURN_COLUMN,
734     DK_CFI_SIGNAL_FRAME,
735     DK_CFI_UNDEFINED,
736     DK_CFI_REGISTER,
737     DK_CFI_WINDOW_SAVE,
738     DK_CFI_B_KEY_FRAME,
739     DK_MACRO,
740     DK_EXITM,
741     DK_ENDM,
742     DK_PURGE,
743     DK_ERR,
744     DK_ERRB,
745     DK_ERRNB,
746     DK_ERRDEF,
747     DK_ERRNDEF,
748     DK_ERRDIF,
749     DK_ERRDIFI,
750     DK_ERRIDN,
751     DK_ERRIDNI,
752     DK_ERRE,
753     DK_ERRNZ,
754     DK_ECHO,
755     DK_STRUCT,
756     DK_UNION,
757     DK_ENDS,
758     DK_END,
759     DK_PUSHFRAME,
760     DK_PUSHREG,
761     DK_SAVEREG,
762     DK_SAVEXMM128,
763     DK_SETFRAME,
764     DK_RADIX,
765   };
766 
767   /// Maps directive name --> DirectiveKind enum, for directives parsed by this
768   /// class.
769   StringMap<DirectiveKind> DirectiveKindMap;
770 
771   bool isMacroLikeDirective();
772 
773   // Codeview def_range type parsing.
774   enum CVDefRangeType {
775     CVDR_DEFRANGE = 0, // Placeholder
776     CVDR_DEFRANGE_REGISTER,
777     CVDR_DEFRANGE_FRAMEPOINTER_REL,
778     CVDR_DEFRANGE_SUBFIELD_REGISTER,
779     CVDR_DEFRANGE_REGISTER_REL
780   };
781 
782   /// Maps Codeview def_range types --> CVDefRangeType enum, for Codeview
783   /// def_range types parsed by this class.
784   StringMap<CVDefRangeType> CVDefRangeTypeMap;
785 
786   // ".ascii", ".asciz", ".string"
787   bool parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated);
788 
789   // "byte", "word", ...
790   bool emitIntValue(const MCExpr *Value, unsigned Size);
791   bool parseScalarInitializer(unsigned Size,
792                               SmallVectorImpl<const MCExpr *> &Values,
793                               unsigned StringPadLength = 0);
794   bool parseScalarInstList(
795       unsigned Size, SmallVectorImpl<const MCExpr *> &Values,
796       const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement);
797   bool emitIntegralValues(unsigned Size, unsigned *Count = nullptr);
798   bool addIntegralField(StringRef Name, unsigned Size);
799   bool parseDirectiveValue(StringRef IDVal, unsigned Size);
800   bool parseDirectiveNamedValue(StringRef TypeName, unsigned Size,
801                                 StringRef Name, SMLoc NameLoc);
802 
803   // "real4", "real8", "real10"
804   bool emitRealValues(const fltSemantics &Semantics, unsigned *Count = nullptr);
805   bool addRealField(StringRef Name, const fltSemantics &Semantics, size_t Size);
806   bool parseDirectiveRealValue(StringRef IDVal, const fltSemantics &Semantics,
807                                size_t Size);
808   bool parseRealInstList(
809       const fltSemantics &Semantics, SmallVectorImpl<APInt> &Values,
810       const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement);
811   bool parseDirectiveNamedRealValue(StringRef TypeName,
812                                     const fltSemantics &Semantics,
813                                     unsigned Size, StringRef Name,
814                                     SMLoc NameLoc);
815 
816   bool parseOptionalAngleBracketOpen();
817   bool parseAngleBracketClose(const Twine &Msg = "expected '>'");
818 
819   bool parseFieldInitializer(const FieldInfo &Field,
820                              FieldInitializer &Initializer);
821   bool parseFieldInitializer(const FieldInfo &Field,
822                              const IntFieldInfo &Contents,
823                              FieldInitializer &Initializer);
824   bool parseFieldInitializer(const FieldInfo &Field,
825                              const RealFieldInfo &Contents,
826                              FieldInitializer &Initializer);
827   bool parseFieldInitializer(const FieldInfo &Field,
828                              const StructFieldInfo &Contents,
829                              FieldInitializer &Initializer);
830 
831   bool parseStructInitializer(const StructInfo &Structure,
832                               StructInitializer &Initializer);
833   bool parseStructInstList(
834       const StructInfo &Structure, std::vector<StructInitializer> &Initializers,
835       const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement);
836 
837   bool emitFieldValue(const FieldInfo &Field);
838   bool emitFieldValue(const FieldInfo &Field, const IntFieldInfo &Contents);
839   bool emitFieldValue(const FieldInfo &Field, const RealFieldInfo &Contents);
840   bool emitFieldValue(const FieldInfo &Field, const StructFieldInfo &Contents);
841 
842   bool emitFieldInitializer(const FieldInfo &Field,
843                             const FieldInitializer &Initializer);
844   bool emitFieldInitializer(const FieldInfo &Field,
845                             const IntFieldInfo &Contents,
846                             const IntFieldInfo &Initializer);
847   bool emitFieldInitializer(const FieldInfo &Field,
848                             const RealFieldInfo &Contents,
849                             const RealFieldInfo &Initializer);
850   bool emitFieldInitializer(const FieldInfo &Field,
851                             const StructFieldInfo &Contents,
852                             const StructFieldInfo &Initializer);
853 
854   bool emitStructInitializer(const StructInfo &Structure,
855                              const StructInitializer &Initializer);
856 
857   // User-defined types (structs, unions):
858   bool emitStructValues(const StructInfo &Structure, unsigned *Count = nullptr);
859   bool addStructField(StringRef Name, const StructInfo &Structure);
860   bool parseDirectiveStructValue(const StructInfo &Structure,
861                                  StringRef Directive, SMLoc DirLoc);
862   bool parseDirectiveNamedStructValue(const StructInfo &Structure,
863                                       StringRef Directive, SMLoc DirLoc,
864                                       StringRef Name);
865 
866   // "=", "equ", "textequ"
867   bool parseDirectiveEquate(StringRef IDVal, StringRef Name,
868                             DirectiveKind DirKind);
869 
870   bool parseDirectiveOrg(); // ".org"
871   bool parseDirectiveAlign();  // "align"
872 
873   // ".file", ".line", ".loc", ".stabs"
874   bool parseDirectiveFile(SMLoc DirectiveLoc);
875   bool parseDirectiveLine();
876   bool parseDirectiveLoc();
877   bool parseDirectiveStabs();
878 
879   // ".cv_file", ".cv_func_id", ".cv_inline_site_id", ".cv_loc", ".cv_linetable",
880   // ".cv_inline_linetable", ".cv_def_range", ".cv_string"
881   bool parseDirectiveCVFile();
882   bool parseDirectiveCVFuncId();
883   bool parseDirectiveCVInlineSiteId();
884   bool parseDirectiveCVLoc();
885   bool parseDirectiveCVLinetable();
886   bool parseDirectiveCVInlineLinetable();
887   bool parseDirectiveCVDefRange();
888   bool parseDirectiveCVString();
889   bool parseDirectiveCVStringTable();
890   bool parseDirectiveCVFileChecksums();
891   bool parseDirectiveCVFileChecksumOffset();
892   bool parseDirectiveCVFPOData();
893 
894   // .cfi directives
895   bool parseDirectiveCFIRegister(SMLoc DirectiveLoc);
896   bool parseDirectiveCFIWindowSave();
897   bool parseDirectiveCFISections();
898   bool parseDirectiveCFIStartProc();
899   bool parseDirectiveCFIEndProc();
900   bool parseDirectiveCFIDefCfaOffset();
901   bool parseDirectiveCFIDefCfa(SMLoc DirectiveLoc);
902   bool parseDirectiveCFIAdjustCfaOffset();
903   bool parseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc);
904   bool parseDirectiveCFIOffset(SMLoc DirectiveLoc);
905   bool parseDirectiveCFIRelOffset(SMLoc DirectiveLoc);
906   bool parseDirectiveCFIPersonalityOrLsda(bool IsPersonality);
907   bool parseDirectiveCFIRememberState();
908   bool parseDirectiveCFIRestoreState();
909   bool parseDirectiveCFISameValue(SMLoc DirectiveLoc);
910   bool parseDirectiveCFIRestore(SMLoc DirectiveLoc);
911   bool parseDirectiveCFIEscape();
912   bool parseDirectiveCFIReturnColumn(SMLoc DirectiveLoc);
913   bool parseDirectiveCFISignalFrame();
914   bool parseDirectiveCFIUndefined(SMLoc DirectiveLoc);
915 
916   // macro directives
917   bool parseDirectivePurgeMacro(SMLoc DirectiveLoc);
918   bool parseDirectiveExitMacro(SMLoc DirectiveLoc, StringRef Directive,
919                                std::string &Value);
920   bool parseDirectiveEndMacro(StringRef Directive);
921   bool parseDirectiveMacro(StringRef Name, SMLoc NameLoc);
922 
923   bool parseDirectiveStruct(StringRef Directive, DirectiveKind DirKind,
924                             StringRef Name, SMLoc NameLoc);
925   bool parseDirectiveNestedStruct(StringRef Directive, DirectiveKind DirKind);
926   bool parseDirectiveEnds(StringRef Name, SMLoc NameLoc);
927   bool parseDirectiveNestedEnds();
928 
929   /// Parse a directive like ".globl" which accepts a single symbol (which
930   /// should be a label or an external).
931   bool parseDirectiveSymbolAttribute(MCSymbolAttr Attr);
932 
933   bool parseDirectiveComm(bool IsLocal); // ".comm" and ".lcomm"
934 
935   bool parseDirectiveComment(SMLoc DirectiveLoc); // "comment"
936 
937   bool parseDirectiveInclude(); // "include"
938 
939   // "if" or "ife"
940   bool parseDirectiveIf(SMLoc DirectiveLoc, DirectiveKind DirKind);
941   // "ifb" or "ifnb", depending on ExpectBlank.
942   bool parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank);
943   // "ifidn", "ifdif", "ifidni", or "ifdifi", depending on ExpectEqual and
944   // CaseInsensitive.
945   bool parseDirectiveIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
946                            bool CaseInsensitive);
947   // "ifdef" or "ifndef", depending on expect_defined
948   bool parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined);
949   // "elseif" or "elseife"
950   bool parseDirectiveElseIf(SMLoc DirectiveLoc, DirectiveKind DirKind);
951   // "elseifb" or "elseifnb", depending on ExpectBlank.
952   bool parseDirectiveElseIfb(SMLoc DirectiveLoc, bool ExpectBlank);
953   // ".elseifdef" or ".elseifndef", depending on expect_defined
954   bool parseDirectiveElseIfdef(SMLoc DirectiveLoc, bool expect_defined);
955   // "elseifidn", "elseifdif", "elseifidni", or "elseifdifi", depending on
956   // ExpectEqual and CaseInsensitive.
957   bool parseDirectiveElseIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
958                                bool CaseInsensitive);
959   bool parseDirectiveElse(SMLoc DirectiveLoc);   // "else"
960   bool parseDirectiveEndIf(SMLoc DirectiveLoc);  // "endif"
961   bool parseEscapedString(std::string &Data) override;
962   bool parseAngleBracketString(std::string &Data) override;
963 
964   // Macro-like directives
965   MCAsmMacro *parseMacroLikeBody(SMLoc DirectiveLoc);
966   void instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
967                                 raw_svector_ostream &OS);
968   void instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
969                                 SMLoc ExitLoc, raw_svector_ostream &OS);
970   bool parseDirectiveRepeat(SMLoc DirectiveLoc, StringRef Directive);
971   bool parseDirectiveFor(SMLoc DirectiveLoc, StringRef Directive);
972   bool parseDirectiveForc(SMLoc DirectiveLoc, StringRef Directive);
973   bool parseDirectiveWhile(SMLoc DirectiveLoc);
974 
975   // "_emit" or "__emit"
976   bool parseDirectiveMSEmit(SMLoc DirectiveLoc, ParseStatementInfo &Info,
977                             size_t Len);
978 
979   // "align"
980   bool parseDirectiveMSAlign(SMLoc DirectiveLoc, ParseStatementInfo &Info);
981 
982   // "end"
983   bool parseDirectiveEnd(SMLoc DirectiveLoc);
984 
985   // ".err"
986   bool parseDirectiveError(SMLoc DirectiveLoc);
987   // ".errb" or ".errnb", depending on ExpectBlank.
988   bool parseDirectiveErrorIfb(SMLoc DirectiveLoc, bool ExpectBlank);
989   // ".errdef" or ".errndef", depending on ExpectBlank.
990   bool parseDirectiveErrorIfdef(SMLoc DirectiveLoc, bool ExpectDefined);
991   // ".erridn", ".errdif", ".erridni", or ".errdifi", depending on ExpectEqual
992   // and CaseInsensitive.
993   bool parseDirectiveErrorIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
994                                 bool CaseInsensitive);
995   // ".erre" or ".errnz", depending on ExpectZero.
996   bool parseDirectiveErrorIfe(SMLoc DirectiveLoc, bool ExpectZero);
997 
998   // ".radix"
999   bool parseDirectiveRadix(SMLoc DirectiveLoc);
1000 
1001   // "echo"
1002   bool parseDirectiveEcho();
1003 
1004   void initializeDirectiveKindMap();
1005   void initializeCVDefRangeTypeMap();
1006 };
1007 
1008 } // end anonymous namespace
1009 
1010 namespace llvm {
1011 
1012 extern MCAsmParserExtension *createCOFFMasmParser();
1013 
1014 } // end namespace llvm
1015 
1016 enum { DEFAULT_ADDRSPACE = 0 };
1017 
MasmParser(SourceMgr & SM,MCContext & Ctx,MCStreamer & Out,const MCAsmInfo & MAI,unsigned CB=0)1018 MasmParser::MasmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
1019                        const MCAsmInfo &MAI, unsigned CB = 0)
1020     : Lexer(MAI), Ctx(Ctx), Out(Out), MAI(MAI), SrcMgr(SM),
1021       CurBuffer(CB ? CB : SM.getMainFileID()) {
1022   HadError = false;
1023   // Save the old handler.
1024   SavedDiagHandler = SrcMgr.getDiagHandler();
1025   SavedDiagContext = SrcMgr.getDiagContext();
1026   // Set our own handler which calls the saved handler.
1027   SrcMgr.setDiagHandler(DiagHandler, this);
1028   Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
1029   EndStatementAtEOFStack.push_back(true);
1030 
1031   // Initialize the platform / file format parser.
1032   switch (Ctx.getObjectFileInfo()->getObjectFileType()) {
1033   case MCObjectFileInfo::IsCOFF:
1034     PlatformParser.reset(createCOFFMasmParser());
1035     break;
1036   default:
1037     report_fatal_error("llvm-ml currently supports only COFF output.");
1038     break;
1039   }
1040 
1041   initializeDirectiveKindMap();
1042   PlatformParser->Initialize(*this);
1043   initializeCVDefRangeTypeMap();
1044 
1045   NumOfMacroInstantiations = 0;
1046 }
1047 
~MasmParser()1048 MasmParser::~MasmParser() {
1049   assert((HadError || ActiveMacros.empty()) &&
1050          "Unexpected active macro instantiation!");
1051 
1052   // Restore the saved diagnostics handler and context for use during
1053   // finalization.
1054   SrcMgr.setDiagHandler(SavedDiagHandler, SavedDiagContext);
1055 }
1056 
printMacroInstantiations()1057 void MasmParser::printMacroInstantiations() {
1058   // Print the active macro instantiation stack.
1059   for (std::vector<MacroInstantiation *>::const_reverse_iterator
1060            it = ActiveMacros.rbegin(),
1061            ie = ActiveMacros.rend();
1062        it != ie; ++it)
1063     printMessage((*it)->InstantiationLoc, SourceMgr::DK_Note,
1064                  "while in macro instantiation");
1065 }
1066 
Note(SMLoc L,const Twine & Msg,SMRange Range)1067 void MasmParser::Note(SMLoc L, const Twine &Msg, SMRange Range) {
1068   printPendingErrors();
1069   printMessage(L, SourceMgr::DK_Note, Msg, Range);
1070   printMacroInstantiations();
1071 }
1072 
Warning(SMLoc L,const Twine & Msg,SMRange Range)1073 bool MasmParser::Warning(SMLoc L, const Twine &Msg, SMRange Range) {
1074   if (getTargetParser().getTargetOptions().MCNoWarn)
1075     return false;
1076   if (getTargetParser().getTargetOptions().MCFatalWarnings)
1077     return Error(L, Msg, Range);
1078   printMessage(L, SourceMgr::DK_Warning, Msg, Range);
1079   printMacroInstantiations();
1080   return false;
1081 }
1082 
printError(SMLoc L,const Twine & Msg,SMRange Range)1083 bool MasmParser::printError(SMLoc L, const Twine &Msg, SMRange Range) {
1084   HadError = true;
1085   printMessage(L, SourceMgr::DK_Error, Msg, Range);
1086   printMacroInstantiations();
1087   return true;
1088 }
1089 
enterIncludeFile(const std::string & Filename)1090 bool MasmParser::enterIncludeFile(const std::string &Filename) {
1091   std::string IncludedFile;
1092   unsigned NewBuf =
1093       SrcMgr.AddIncludeFile(Filename, Lexer.getLoc(), IncludedFile);
1094   if (!NewBuf)
1095     return true;
1096 
1097   CurBuffer = NewBuf;
1098   Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
1099   EndStatementAtEOFStack.push_back(true);
1100   return false;
1101 }
1102 
jumpToLoc(SMLoc Loc,unsigned InBuffer,bool EndStatementAtEOF)1103 void MasmParser::jumpToLoc(SMLoc Loc, unsigned InBuffer,
1104                            bool EndStatementAtEOF) {
1105   CurBuffer = InBuffer ? InBuffer : SrcMgr.FindBufferContainingLoc(Loc);
1106   Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(),
1107                   Loc.getPointer(), EndStatementAtEOF);
1108 }
1109 
Lex()1110 const AsmToken &MasmParser::Lex() {
1111   if (Lexer.getTok().is(AsmToken::Error))
1112     Error(Lexer.getErrLoc(), Lexer.getErr());
1113 
1114   // if it's a end of statement with a comment in it
1115   if (getTok().is(AsmToken::EndOfStatement)) {
1116     // if this is a line comment output it.
1117     if (!getTok().getString().empty() && getTok().getString().front() != '\n' &&
1118         getTok().getString().front() != '\r' && MAI.preserveAsmComments())
1119       Out.addExplicitComment(Twine(getTok().getString()));
1120   }
1121 
1122   const AsmToken *tok = &Lexer.Lex();
1123 
1124   while (tok->is(AsmToken::Identifier)) {
1125     auto it = Variables.find(tok->getIdentifier().lower());
1126     const llvm::MCAsmMacro *M =
1127         getContext().lookupMacro(tok->getIdentifier().lower());
1128     if (it != Variables.end() && it->second.IsText) {
1129       // This is a textmacro; expand it in place.
1130       std::unique_ptr<MemoryBuffer> Instantiation =
1131           MemoryBuffer::getMemBufferCopy(it->second.TextValue,
1132                                          "<instantiation>");
1133 
1134       // Jump to the macro instantiation and prime the lexer.
1135       CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Instantiation),
1136                                             getTok().getEndLoc());
1137       Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), nullptr,
1138                       /*EndStatementAtEOF=*/false);
1139       EndStatementAtEOFStack.push_back(false);
1140       tok = &Lexer.Lex();
1141     } else if (M && M->IsFunction && Lexer.peekTok().is(AsmToken::LParen)) {
1142       // This is a macro function invocation; expand it in place.
1143       const AsmToken MacroTok = *tok;
1144       tok = &Lexer.Lex();
1145       if (handleMacroInvocation(M, MacroTok.getLoc())) {
1146         Lexer.UnLex(AsmToken(AsmToken::Error, MacroTok.getIdentifier()));
1147         tok = &Lexer.Lex();
1148       }
1149       continue;
1150     } else {
1151       break;
1152     }
1153   }
1154 
1155   // Parse comments here to be deferred until end of next statement.
1156   while (tok->is(AsmToken::Comment)) {
1157     if (MAI.preserveAsmComments())
1158       Out.addExplicitComment(Twine(tok->getString()));
1159     tok = &Lexer.Lex();
1160   }
1161 
1162   // Recognize and bypass line continuations.
1163   while (tok->is(AsmToken::BackSlash) &&
1164          Lexer.peekTok().is(AsmToken::EndOfStatement)) {
1165     // Eat both the backslash and the end of statement.
1166     Lexer.Lex();
1167     tok = &Lexer.Lex();
1168   }
1169 
1170   if (tok->is(AsmToken::Eof)) {
1171     // If this is the end of an included file, pop the parent file off the
1172     // include stack.
1173     SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1174     if (ParentIncludeLoc != SMLoc()) {
1175       EndStatementAtEOFStack.pop_back();
1176       jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1177       return Lex();
1178     }
1179     EndStatementAtEOFStack.pop_back();
1180     assert(EndStatementAtEOFStack.empty());
1181   }
1182 
1183   return *tok;
1184 }
1185 
enabledGenDwarfForAssembly()1186 bool MasmParser::enabledGenDwarfForAssembly() {
1187   // Check whether the user specified -g.
1188   if (!getContext().getGenDwarfForAssembly())
1189     return false;
1190   // If we haven't encountered any .file directives (which would imply that
1191   // the assembler source was produced with debug info already) then emit one
1192   // describing the assembler source file itself.
1193   if (getContext().getGenDwarfFileNumber() == 0) {
1194     // Use the first #line directive for this, if any. It's preprocessed, so
1195     // there is no checksum, and of course no source directive.
1196     if (!FirstCppHashFilename.empty())
1197       getContext().setMCLineTableRootFile(/*CUID=*/0,
1198                                           getContext().getCompilationDir(),
1199                                           FirstCppHashFilename,
1200                                           /*Cksum=*/None, /*Source=*/None);
1201     const MCDwarfFile &RootFile =
1202         getContext().getMCDwarfLineTable(/*CUID=*/0).getRootFile();
1203     getContext().setGenDwarfFileNumber(getStreamer().emitDwarfFileDirective(
1204         /*CUID=*/0, getContext().getCompilationDir(), RootFile.Name,
1205         RootFile.Checksum, RootFile.Source));
1206   }
1207   return true;
1208 }
1209 
Run(bool NoInitialTextSection,bool NoFinalize)1210 bool MasmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
1211   // Create the initial section, if requested.
1212   if (!NoInitialTextSection)
1213     Out.InitSections(false);
1214 
1215   // Prime the lexer.
1216   Lex();
1217 
1218   HadError = false;
1219   AsmCond StartingCondState = TheCondState;
1220   SmallVector<AsmRewrite, 4> AsmStrRewrites;
1221 
1222   // If we are generating dwarf for assembly source files save the initial text
1223   // section.  (Don't use enabledGenDwarfForAssembly() here, as we aren't
1224   // emitting any actual debug info yet and haven't had a chance to parse any
1225   // embedded .file directives.)
1226   if (getContext().getGenDwarfForAssembly()) {
1227     MCSection *Sec = getStreamer().getCurrentSectionOnly();
1228     if (!Sec->getBeginSymbol()) {
1229       MCSymbol *SectionStartSym = getContext().createTempSymbol();
1230       getStreamer().emitLabel(SectionStartSym);
1231       Sec->setBeginSymbol(SectionStartSym);
1232     }
1233     bool InsertResult = getContext().addGenDwarfSection(Sec);
1234     assert(InsertResult && ".text section should not have debug info yet");
1235     (void)InsertResult;
1236   }
1237 
1238   // While we have input, parse each statement.
1239   while (Lexer.isNot(AsmToken::Eof) ||
1240          SrcMgr.getParentIncludeLoc(CurBuffer) != SMLoc()) {
1241     // Skip through the EOF at the end of an inclusion.
1242     if (Lexer.is(AsmToken::Eof))
1243       Lex();
1244 
1245     ParseStatementInfo Info(&AsmStrRewrites);
1246     bool Parsed = parseStatement(Info, nullptr);
1247 
1248     // If we have a Lexer Error we are on an Error Token. Load in Lexer Error
1249     // for printing ErrMsg via Lex() only if no (presumably better) parser error
1250     // exists.
1251     if (Parsed && !hasPendingError() && Lexer.getTok().is(AsmToken::Error)) {
1252       Lex();
1253     }
1254 
1255     // parseStatement returned true so may need to emit an error.
1256     printPendingErrors();
1257 
1258     // Skipping to the next line if needed.
1259     if (Parsed && !getLexer().isAtStartOfStatement())
1260       eatToEndOfStatement();
1261   }
1262 
1263   getTargetParser().onEndOfFile();
1264   printPendingErrors();
1265 
1266   // All errors should have been emitted.
1267   assert(!hasPendingError() && "unexpected error from parseStatement");
1268 
1269   getTargetParser().flushPendingInstructions(getStreamer());
1270 
1271   if (TheCondState.TheCond != StartingCondState.TheCond ||
1272       TheCondState.Ignore != StartingCondState.Ignore)
1273     printError(getTok().getLoc(), "unmatched .ifs or .elses");
1274   // Check to see there are no empty DwarfFile slots.
1275   const auto &LineTables = getContext().getMCDwarfLineTables();
1276   if (!LineTables.empty()) {
1277     unsigned Index = 0;
1278     for (const auto &File : LineTables.begin()->second.getMCDwarfFiles()) {
1279       if (File.Name.empty() && Index != 0)
1280         printError(getTok().getLoc(), "unassigned file number: " +
1281                                           Twine(Index) +
1282                                           " for .file directives");
1283       ++Index;
1284     }
1285   }
1286 
1287   // Check to see that all assembler local symbols were actually defined.
1288   // Targets that don't do subsections via symbols may not want this, though,
1289   // so conservatively exclude them. Only do this if we're finalizing, though,
1290   // as otherwise we won't necessarilly have seen everything yet.
1291   if (!NoFinalize) {
1292     if (MAI.hasSubsectionsViaSymbols()) {
1293       for (const auto &TableEntry : getContext().getSymbols()) {
1294         MCSymbol *Sym = TableEntry.getValue();
1295         // Variable symbols may not be marked as defined, so check those
1296         // explicitly. If we know it's a variable, we have a definition for
1297         // the purposes of this check.
1298         if (Sym->isTemporary() && !Sym->isVariable() && !Sym->isDefined())
1299           // FIXME: We would really like to refer back to where the symbol was
1300           // first referenced for a source location. We need to add something
1301           // to track that. Currently, we just point to the end of the file.
1302           printError(getTok().getLoc(), "assembler local symbol '" +
1303                                             Sym->getName() + "' not defined");
1304       }
1305     }
1306 
1307     // Temporary symbols like the ones for directional jumps don't go in the
1308     // symbol table. They also need to be diagnosed in all (final) cases.
1309     for (std::tuple<SMLoc, CppHashInfoTy, MCSymbol *> &LocSym : DirLabels) {
1310       if (std::get<2>(LocSym)->isUndefined()) {
1311         // Reset the state of any "# line file" directives we've seen to the
1312         // context as it was at the diagnostic site.
1313         CppHashInfo = std::get<1>(LocSym);
1314         printError(std::get<0>(LocSym), "directional label undefined");
1315       }
1316     }
1317   }
1318 
1319   // Finalize the output stream if there are no errors and if the client wants
1320   // us to.
1321   if (!HadError && !NoFinalize)
1322     Out.Finish(Lexer.getLoc());
1323 
1324   return HadError || getContext().hadError();
1325 }
1326 
checkForValidSection()1327 bool MasmParser::checkForValidSection() {
1328   if (!ParsingMSInlineAsm && !getStreamer().getCurrentSectionOnly()) {
1329     Out.InitSections(false);
1330     return Error(getTok().getLoc(),
1331                  "expected section directive before assembly directive");
1332   }
1333   return false;
1334 }
1335 
1336 /// Throw away the rest of the line for testing purposes.
eatToEndOfStatement()1337 void MasmParser::eatToEndOfStatement() {
1338   while (Lexer.isNot(AsmToken::EndOfStatement)) {
1339     if (Lexer.is(AsmToken::Eof)) {
1340       SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1341       if (ParentIncludeLoc == SMLoc()) {
1342         break;
1343       }
1344 
1345       EndStatementAtEOFStack.pop_back();
1346       jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1347     }
1348 
1349     Lexer.Lex();
1350   }
1351 
1352   // Eat EOL.
1353   if (Lexer.is(AsmToken::EndOfStatement))
1354     Lexer.Lex();
1355 }
1356 
1357 SmallVector<StringRef, 1>
parseStringRefsTo(AsmToken::TokenKind EndTok)1358 MasmParser::parseStringRefsTo(AsmToken::TokenKind EndTok) {
1359   SmallVector<StringRef, 1> Refs;
1360   const char *Start = getTok().getLoc().getPointer();
1361   while (Lexer.isNot(EndTok)) {
1362     if (Lexer.is(AsmToken::Eof)) {
1363       SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1364       if (ParentIncludeLoc == SMLoc()) {
1365         break;
1366       }
1367       Refs.emplace_back(Start, getTok().getLoc().getPointer() - Start);
1368 
1369       EndStatementAtEOFStack.pop_back();
1370       jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1371       Lexer.Lex();
1372       Start = getTok().getLoc().getPointer();
1373     } else {
1374       Lexer.Lex();
1375     }
1376   }
1377   Refs.emplace_back(Start, getTok().getLoc().getPointer() - Start);
1378   return Refs;
1379 }
1380 
parseStringTo(AsmToken::TokenKind EndTok)1381 std::string MasmParser::parseStringTo(AsmToken::TokenKind EndTok) {
1382   SmallVector<StringRef, 1> Refs = parseStringRefsTo(EndTok);
1383   std::string Str;
1384   for (StringRef S : Refs) {
1385     Str.append(S.str());
1386   }
1387   return Str;
1388 }
1389 
parseStringToEndOfStatement()1390 StringRef MasmParser::parseStringToEndOfStatement() {
1391   const char *Start = getTok().getLoc().getPointer();
1392 
1393   while (Lexer.isNot(AsmToken::EndOfStatement) && Lexer.isNot(AsmToken::Eof))
1394     Lexer.Lex();
1395 
1396   const char *End = getTok().getLoc().getPointer();
1397   return StringRef(Start, End - Start);
1398 }
1399 
1400 /// Parse a paren expression and return it.
1401 /// NOTE: This assumes the leading '(' has already been consumed.
1402 ///
1403 /// parenexpr ::= expr)
1404 ///
parseParenExpr(const MCExpr * & Res,SMLoc & EndLoc)1405 bool MasmParser::parseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) {
1406   if (parseExpression(Res))
1407     return true;
1408   if (Lexer.isNot(AsmToken::RParen))
1409     return TokError("expected ')' in parentheses expression");
1410   EndLoc = Lexer.getTok().getEndLoc();
1411   Lex();
1412   return false;
1413 }
1414 
1415 /// Parse a bracket expression and return it.
1416 /// NOTE: This assumes the leading '[' has already been consumed.
1417 ///
1418 /// bracketexpr ::= expr]
1419 ///
parseBracketExpr(const MCExpr * & Res,SMLoc & EndLoc)1420 bool MasmParser::parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) {
1421   if (parseExpression(Res))
1422     return true;
1423   EndLoc = getTok().getEndLoc();
1424   if (parseToken(AsmToken::RBrac, "expected ']' in brackets expression"))
1425     return true;
1426   return false;
1427 }
1428 
1429 /// Parse a primary expression and return it.
1430 ///  primaryexpr ::= (parenexpr
1431 ///  primaryexpr ::= symbol
1432 ///  primaryexpr ::= number
1433 ///  primaryexpr ::= '.'
1434 ///  primaryexpr ::= ~,+,-,'not' primaryexpr
1435 ///  primaryexpr ::= string
1436 ///          (a string is interpreted as a 64-bit number in big-endian base-256)
parsePrimaryExpr(const MCExpr * & Res,SMLoc & EndLoc,AsmTypeInfo * TypeInfo)1437 bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
1438                                   AsmTypeInfo *TypeInfo) {
1439   SMLoc FirstTokenLoc = getLexer().getLoc();
1440   AsmToken::TokenKind FirstTokenKind = Lexer.getKind();
1441   switch (FirstTokenKind) {
1442   default:
1443     return TokError("unknown token in expression");
1444   // If we have an error assume that we've already handled it.
1445   case AsmToken::Error:
1446     return true;
1447   case AsmToken::Exclaim:
1448     Lex(); // Eat the operator.
1449     if (parsePrimaryExpr(Res, EndLoc, nullptr))
1450       return true;
1451     Res = MCUnaryExpr::createLNot(Res, getContext(), FirstTokenLoc);
1452     return false;
1453   case AsmToken::Dollar:
1454   case AsmToken::At:
1455   case AsmToken::Identifier: {
1456     StringRef Identifier;
1457     if (parseIdentifier(Identifier)) {
1458       // We may have failed but $ may be a valid token.
1459       if (getTok().is(AsmToken::Dollar)) {
1460         if (Lexer.getMAI().getDollarIsPC()) {
1461           Lex();
1462           // This is a '$' reference, which references the current PC.  Emit a
1463           // temporary label to the streamer and refer to it.
1464           MCSymbol *Sym = Ctx.createTempSymbol();
1465           Out.emitLabel(Sym);
1466           Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None,
1467                                         getContext());
1468           EndLoc = FirstTokenLoc;
1469           return false;
1470         }
1471         return Error(FirstTokenLoc, "invalid token in expression");
1472       }
1473     }
1474     // Parse named bitwise negation.
1475     if (Identifier.equals_lower("not")) {
1476       if (parsePrimaryExpr(Res, EndLoc, nullptr))
1477         return true;
1478       Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc);
1479       return false;
1480     }
1481     // Parse symbol variant.
1482     std::pair<StringRef, StringRef> Split;
1483     if (!MAI.useParensForSymbolVariant()) {
1484       if (FirstTokenKind == AsmToken::String) {
1485         if (Lexer.is(AsmToken::At)) {
1486           Lex(); // eat @
1487           SMLoc AtLoc = getLexer().getLoc();
1488           StringRef VName;
1489           if (parseIdentifier(VName))
1490             return Error(AtLoc, "expected symbol variant after '@'");
1491 
1492           Split = std::make_pair(Identifier, VName);
1493         }
1494       } else {
1495         Split = Identifier.split('@');
1496       }
1497     } else if (Lexer.is(AsmToken::LParen)) {
1498       Lex(); // eat '('.
1499       StringRef VName;
1500       parseIdentifier(VName);
1501       // eat ')'.
1502       if (parseToken(AsmToken::RParen,
1503                      "unexpected token in variant, expected ')'"))
1504         return true;
1505       Split = std::make_pair(Identifier, VName);
1506     }
1507 
1508     EndLoc = SMLoc::getFromPointer(Identifier.end());
1509 
1510     // This is a symbol reference.
1511     StringRef SymbolName = Identifier;
1512     if (SymbolName.empty())
1513       return Error(getLexer().getLoc(), "expected a symbol reference");
1514 
1515     MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1516 
1517     // Look up the symbol variant if used.
1518     if (!Split.second.empty()) {
1519       Variant = MCSymbolRefExpr::getVariantKindForName(Split.second);
1520       if (Variant != MCSymbolRefExpr::VK_Invalid) {
1521         SymbolName = Split.first;
1522       } else if (MAI.doesAllowAtInName() && !MAI.useParensForSymbolVariant()) {
1523         Variant = MCSymbolRefExpr::VK_None;
1524       } else {
1525         return Error(SMLoc::getFromPointer(Split.second.begin()),
1526                      "invalid variant '" + Split.second + "'");
1527       }
1528     }
1529 
1530     // Find the field offset if used.
1531     AsmFieldInfo Info;
1532     Split = SymbolName.split('.');
1533     if (Split.second.empty()) {
1534     } else {
1535       SymbolName = Split.first;
1536       if (lookUpField(SymbolName, Split.second, Info)) {
1537         std::pair<StringRef, StringRef> BaseMember = Split.second.split('.');
1538         StringRef Base = BaseMember.first, Member = BaseMember.second;
1539         lookUpField(Base, Member, Info);
1540       } else if (Structs.count(SymbolName.lower())) {
1541         // This is actually a reference to a field offset.
1542         Res = MCConstantExpr::create(Info.Offset, getContext());
1543         return false;
1544       }
1545     }
1546 
1547     MCSymbol *Sym = getContext().getInlineAsmLabel(SymbolName);
1548     if (!Sym)
1549       Sym = getContext().getOrCreateSymbol(SymbolName);
1550 
1551     // If this is an absolute variable reference, substitute it now to preserve
1552     // semantics in the face of reassignment.
1553     if (Sym->isVariable()) {
1554       auto V = Sym->getVariableValue(/*SetUsed*/ false);
1555       bool DoInline = isa<MCConstantExpr>(V) && !Variant;
1556       if (auto TV = dyn_cast<MCTargetExpr>(V))
1557         DoInline = TV->inlineAssignedExpr();
1558       if (DoInline) {
1559         if (Variant)
1560           return Error(EndLoc, "unexpected modifier on variable reference");
1561         Res = Sym->getVariableValue(/*SetUsed*/ false);
1562         return false;
1563       }
1564     }
1565 
1566     // Otherwise create a symbol ref.
1567     const MCExpr *SymRef =
1568         MCSymbolRefExpr::create(Sym, Variant, getContext(), FirstTokenLoc);
1569     if (Info.Offset) {
1570       Res = MCBinaryExpr::create(
1571           MCBinaryExpr::Add, SymRef,
1572           MCConstantExpr::create(Info.Offset, getContext()), getContext());
1573     } else {
1574       Res = SymRef;
1575     }
1576     if (TypeInfo) {
1577       if (Info.Type.Name.empty()) {
1578         auto TypeIt = KnownType.find(Identifier.lower());
1579         if (TypeIt != KnownType.end()) {
1580           Info.Type = TypeIt->second;
1581         }
1582       }
1583 
1584       *TypeInfo = Info.Type;
1585     }
1586     return false;
1587   }
1588   case AsmToken::BigNum:
1589     return TokError("literal value out of range for directive");
1590   case AsmToken::Integer: {
1591     SMLoc Loc = getTok().getLoc();
1592     int64_t IntVal = getTok().getIntVal();
1593     Res = MCConstantExpr::create(IntVal, getContext());
1594     EndLoc = Lexer.getTok().getEndLoc();
1595     Lex(); // Eat token.
1596     // Look for 'b' or 'f' following an Integer as a directional label.
1597     if (Lexer.getKind() == AsmToken::Identifier) {
1598       StringRef IDVal = getTok().getString();
1599       // Look up the symbol variant if used.
1600       std::pair<StringRef, StringRef> Split = IDVal.split('@');
1601       MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1602       if (Split.first.size() != IDVal.size()) {
1603         Variant = MCSymbolRefExpr::getVariantKindForName(Split.second);
1604         if (Variant == MCSymbolRefExpr::VK_Invalid)
1605           return TokError("invalid variant '" + Split.second + "'");
1606         IDVal = Split.first;
1607       }
1608       if (IDVal == "f" || IDVal == "b") {
1609         MCSymbol *Sym =
1610             Ctx.getDirectionalLocalSymbol(IntVal, IDVal == "b");
1611         Res = MCSymbolRefExpr::create(Sym, Variant, getContext());
1612         if (IDVal == "b" && Sym->isUndefined())
1613           return Error(Loc, "directional label undefined");
1614         DirLabels.push_back(std::make_tuple(Loc, CppHashInfo, Sym));
1615         EndLoc = Lexer.getTok().getEndLoc();
1616         Lex(); // Eat identifier.
1617       }
1618     }
1619     return false;
1620   }
1621   case AsmToken::String: {
1622     // MASM strings (used as constants) are interpreted as big-endian base-256.
1623     SMLoc ValueLoc = getTok().getLoc();
1624     std::string Value;
1625     if (parseEscapedString(Value))
1626       return true;
1627     if (Value.size() > 8)
1628       return Error(ValueLoc, "literal value out of range");
1629     uint64_t IntValue = 0;
1630     for (const unsigned char CharVal : Value)
1631       IntValue = (IntValue << 8) | CharVal;
1632     Res = MCConstantExpr::create(IntValue, getContext());
1633     return false;
1634   }
1635   case AsmToken::Real: {
1636     APFloat RealVal(APFloat::IEEEdouble(), getTok().getString());
1637     uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
1638     Res = MCConstantExpr::create(IntVal, getContext());
1639     EndLoc = Lexer.getTok().getEndLoc();
1640     Lex(); // Eat token.
1641     return false;
1642   }
1643   case AsmToken::Dot: {
1644     // This is a '.' reference, which references the current PC.  Emit a
1645     // temporary label to the streamer and refer to it.
1646     MCSymbol *Sym = Ctx.createTempSymbol();
1647     Out.emitLabel(Sym);
1648     Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
1649     EndLoc = Lexer.getTok().getEndLoc();
1650     Lex(); // Eat identifier.
1651     return false;
1652   }
1653   case AsmToken::LParen:
1654     Lex(); // Eat the '('.
1655     return parseParenExpr(Res, EndLoc);
1656   case AsmToken::LBrac:
1657     if (!PlatformParser->HasBracketExpressions())
1658       return TokError("brackets expression not supported on this target");
1659     Lex(); // Eat the '['.
1660     return parseBracketExpr(Res, EndLoc);
1661   case AsmToken::Minus:
1662     Lex(); // Eat the operator.
1663     if (parsePrimaryExpr(Res, EndLoc, nullptr))
1664       return true;
1665     Res = MCUnaryExpr::createMinus(Res, getContext(), FirstTokenLoc);
1666     return false;
1667   case AsmToken::Plus:
1668     Lex(); // Eat the operator.
1669     if (parsePrimaryExpr(Res, EndLoc, nullptr))
1670       return true;
1671     Res = MCUnaryExpr::createPlus(Res, getContext(), FirstTokenLoc);
1672     return false;
1673   case AsmToken::Tilde:
1674     Lex(); // Eat the operator.
1675     if (parsePrimaryExpr(Res, EndLoc, nullptr))
1676       return true;
1677     Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc);
1678     return false;
1679   // MIPS unary expression operators. The lexer won't generate these tokens if
1680   // MCAsmInfo::HasMipsExpressions is false for the target.
1681   case AsmToken::PercentCall16:
1682   case AsmToken::PercentCall_Hi:
1683   case AsmToken::PercentCall_Lo:
1684   case AsmToken::PercentDtprel_Hi:
1685   case AsmToken::PercentDtprel_Lo:
1686   case AsmToken::PercentGot:
1687   case AsmToken::PercentGot_Disp:
1688   case AsmToken::PercentGot_Hi:
1689   case AsmToken::PercentGot_Lo:
1690   case AsmToken::PercentGot_Ofst:
1691   case AsmToken::PercentGot_Page:
1692   case AsmToken::PercentGottprel:
1693   case AsmToken::PercentGp_Rel:
1694   case AsmToken::PercentHi:
1695   case AsmToken::PercentHigher:
1696   case AsmToken::PercentHighest:
1697   case AsmToken::PercentLo:
1698   case AsmToken::PercentNeg:
1699   case AsmToken::PercentPcrel_Hi:
1700   case AsmToken::PercentPcrel_Lo:
1701   case AsmToken::PercentTlsgd:
1702   case AsmToken::PercentTlsldm:
1703   case AsmToken::PercentTprel_Hi:
1704   case AsmToken::PercentTprel_Lo:
1705     Lex(); // Eat the operator.
1706     if (Lexer.isNot(AsmToken::LParen))
1707       return TokError("expected '(' after operator");
1708     Lex(); // Eat the operator.
1709     if (parseExpression(Res, EndLoc))
1710       return true;
1711     if (Lexer.isNot(AsmToken::RParen))
1712       return TokError("expected ')'");
1713     Lex(); // Eat the operator.
1714     Res = getTargetParser().createTargetUnaryExpr(Res, FirstTokenKind, Ctx);
1715     return !Res;
1716   }
1717 }
1718 
parseExpression(const MCExpr * & Res)1719 bool MasmParser::parseExpression(const MCExpr *&Res) {
1720   SMLoc EndLoc;
1721   return parseExpression(Res, EndLoc);
1722 }
1723 
1724 /// This function checks if the next token is <string> type or arithmetic.
1725 /// string that begin with character '<' must end with character '>'.
1726 /// otherwise it is arithmetics.
1727 /// If the function returns a 'true' value,
1728 /// the End argument will be filled with the last location pointed to the '>'
1729 /// character.
isAngleBracketString(SMLoc & StrLoc,SMLoc & EndLoc)1730 static bool isAngleBracketString(SMLoc &StrLoc, SMLoc &EndLoc) {
1731   assert((StrLoc.getPointer() != nullptr) &&
1732          "Argument to the function cannot be a NULL value");
1733   const char *CharPtr = StrLoc.getPointer();
1734   while ((*CharPtr != '>') && (*CharPtr != '\n') && (*CharPtr != '\r') &&
1735          (*CharPtr != '\0')) {
1736     if (*CharPtr == '!')
1737       CharPtr++;
1738     CharPtr++;
1739   }
1740   if (*CharPtr == '>') {
1741     EndLoc = StrLoc.getFromPointer(CharPtr + 1);
1742     return true;
1743   }
1744   return false;
1745 }
1746 
1747 /// creating a string without the escape characters '!'.
angleBracketString(StringRef BracketContents)1748 static std::string angleBracketString(StringRef BracketContents) {
1749   std::string Res;
1750   for (size_t Pos = 0; Pos < BracketContents.size(); Pos++) {
1751     if (BracketContents[Pos] == '!')
1752       Pos++;
1753     Res += BracketContents[Pos];
1754   }
1755   return Res;
1756 }
1757 
1758 /// Parse an expression and return it.
1759 ///
1760 ///  expr ::= expr &&,|| expr               -> lowest.
1761 ///  expr ::= expr |,^,&,! expr
1762 ///  expr ::= expr ==,!=,<>,<,<=,>,>= expr
1763 ///  expr ::= expr <<,>> expr
1764 ///  expr ::= expr +,- expr
1765 ///  expr ::= expr *,/,% expr               -> highest.
1766 ///  expr ::= primaryexpr
1767 ///
parseExpression(const MCExpr * & Res,SMLoc & EndLoc)1768 bool MasmParser::parseExpression(const MCExpr *&Res, SMLoc &EndLoc) {
1769   // Parse the expression.
1770   Res = nullptr;
1771   if (getTargetParser().parsePrimaryExpr(Res, EndLoc) ||
1772       parseBinOpRHS(1, Res, EndLoc))
1773     return true;
1774 
1775   // Try to constant fold it up front, if possible. Do not exploit
1776   // assembler here.
1777   int64_t Value;
1778   if (Res->evaluateAsAbsolute(Value))
1779     Res = MCConstantExpr::create(Value, getContext());
1780 
1781   return false;
1782 }
1783 
parseParenExpression(const MCExpr * & Res,SMLoc & EndLoc)1784 bool MasmParser::parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) {
1785   Res = nullptr;
1786   return parseParenExpr(Res, EndLoc) || parseBinOpRHS(1, Res, EndLoc);
1787 }
1788 
parseParenExprOfDepth(unsigned ParenDepth,const MCExpr * & Res,SMLoc & EndLoc)1789 bool MasmParser::parseParenExprOfDepth(unsigned ParenDepth, const MCExpr *&Res,
1790                                        SMLoc &EndLoc) {
1791   if (parseParenExpr(Res, EndLoc))
1792     return true;
1793 
1794   for (; ParenDepth > 0; --ParenDepth) {
1795     if (parseBinOpRHS(1, Res, EndLoc))
1796       return true;
1797 
1798     // We don't Lex() the last RParen.
1799     // This is the same behavior as parseParenExpression().
1800     if (ParenDepth - 1 > 0) {
1801       EndLoc = getTok().getEndLoc();
1802       if (parseToken(AsmToken::RParen,
1803                      "expected ')' in parentheses expression"))
1804         return true;
1805     }
1806   }
1807   return false;
1808 }
1809 
parseAbsoluteExpression(int64_t & Res)1810 bool MasmParser::parseAbsoluteExpression(int64_t &Res) {
1811   const MCExpr *Expr;
1812 
1813   SMLoc StartLoc = Lexer.getLoc();
1814   if (parseExpression(Expr))
1815     return true;
1816 
1817   if (!Expr->evaluateAsAbsolute(Res, getStreamer().getAssemblerPtr()))
1818     return Error(StartLoc, "expected absolute expression");
1819 
1820   return false;
1821 }
1822 
getGNUBinOpPrecedence(AsmToken::TokenKind K,MCBinaryExpr::Opcode & Kind,bool ShouldUseLogicalShr,bool EndExpressionAtGreater)1823 static unsigned getGNUBinOpPrecedence(AsmToken::TokenKind K,
1824                                       MCBinaryExpr::Opcode &Kind,
1825                                       bool ShouldUseLogicalShr,
1826                                       bool EndExpressionAtGreater) {
1827   switch (K) {
1828   default:
1829     return 0; // not a binop.
1830 
1831   // Lowest Precedence: &&, ||
1832   case AsmToken::AmpAmp:
1833     Kind = MCBinaryExpr::LAnd;
1834     return 2;
1835   case AsmToken::PipePipe:
1836     Kind = MCBinaryExpr::LOr;
1837     return 1;
1838 
1839   // Low Precedence: ==, !=, <>, <, <=, >, >=
1840   case AsmToken::EqualEqual:
1841     Kind = MCBinaryExpr::EQ;
1842     return 3;
1843   case AsmToken::ExclaimEqual:
1844   case AsmToken::LessGreater:
1845     Kind = MCBinaryExpr::NE;
1846     return 3;
1847   case AsmToken::Less:
1848     Kind = MCBinaryExpr::LT;
1849     return 3;
1850   case AsmToken::LessEqual:
1851     Kind = MCBinaryExpr::LTE;
1852     return 3;
1853   case AsmToken::Greater:
1854     if (EndExpressionAtGreater)
1855       return 0;
1856     Kind = MCBinaryExpr::GT;
1857     return 3;
1858   case AsmToken::GreaterEqual:
1859     Kind = MCBinaryExpr::GTE;
1860     return 3;
1861 
1862   // Low Intermediate Precedence: +, -
1863   case AsmToken::Plus:
1864     Kind = MCBinaryExpr::Add;
1865     return 4;
1866   case AsmToken::Minus:
1867     Kind = MCBinaryExpr::Sub;
1868     return 4;
1869 
1870   // High Intermediate Precedence: |, &, ^
1871   case AsmToken::Pipe:
1872     Kind = MCBinaryExpr::Or;
1873     return 5;
1874   case AsmToken::Caret:
1875     Kind = MCBinaryExpr::Xor;
1876     return 5;
1877   case AsmToken::Amp:
1878     Kind = MCBinaryExpr::And;
1879     return 5;
1880 
1881   // Highest Precedence: *, /, %, <<, >>
1882   case AsmToken::Star:
1883     Kind = MCBinaryExpr::Mul;
1884     return 6;
1885   case AsmToken::Slash:
1886     Kind = MCBinaryExpr::Div;
1887     return 6;
1888   case AsmToken::Percent:
1889     Kind = MCBinaryExpr::Mod;
1890     return 6;
1891   case AsmToken::LessLess:
1892     Kind = MCBinaryExpr::Shl;
1893     return 6;
1894   case AsmToken::GreaterGreater:
1895     if (EndExpressionAtGreater)
1896       return 0;
1897     Kind = ShouldUseLogicalShr ? MCBinaryExpr::LShr : MCBinaryExpr::AShr;
1898     return 6;
1899   }
1900 }
1901 
getBinOpPrecedence(AsmToken::TokenKind K,MCBinaryExpr::Opcode & Kind)1902 unsigned MasmParser::getBinOpPrecedence(AsmToken::TokenKind K,
1903                                         MCBinaryExpr::Opcode &Kind) {
1904   bool ShouldUseLogicalShr = MAI.shouldUseLogicalShr();
1905   return getGNUBinOpPrecedence(K, Kind, ShouldUseLogicalShr,
1906                                AngleBracketDepth > 0);
1907 }
1908 
1909 /// Parse all binary operators with precedence >= 'Precedence'.
1910 /// Res contains the LHS of the expression on input.
parseBinOpRHS(unsigned Precedence,const MCExpr * & Res,SMLoc & EndLoc)1911 bool MasmParser::parseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
1912                                SMLoc &EndLoc) {
1913   SMLoc StartLoc = Lexer.getLoc();
1914   while (true) {
1915     AsmToken::TokenKind TokKind = Lexer.getKind();
1916     if (Lexer.getKind() == AsmToken::Identifier) {
1917       TokKind = StringSwitch<AsmToken::TokenKind>(Lexer.getTok().getString())
1918                     .CaseLower("and", AsmToken::Amp)
1919                     .CaseLower("not", AsmToken::Exclaim)
1920                     .CaseLower("or", AsmToken::Pipe)
1921                     .CaseLower("eq", AsmToken::EqualEqual)
1922                     .CaseLower("ne", AsmToken::ExclaimEqual)
1923                     .CaseLower("lt", AsmToken::Less)
1924                     .CaseLower("le", AsmToken::LessEqual)
1925                     .CaseLower("gt", AsmToken::Greater)
1926                     .CaseLower("ge", AsmToken::GreaterEqual)
1927                     .Default(TokKind);
1928     }
1929     MCBinaryExpr::Opcode Kind = MCBinaryExpr::Add;
1930     unsigned TokPrec = getBinOpPrecedence(TokKind, Kind);
1931 
1932     // If the next token is lower precedence than we are allowed to eat, return
1933     // successfully with what we ate already.
1934     if (TokPrec < Precedence)
1935       return false;
1936 
1937     Lex();
1938 
1939     // Eat the next primary expression.
1940     const MCExpr *RHS;
1941     if (getTargetParser().parsePrimaryExpr(RHS, EndLoc))
1942       return true;
1943 
1944     // If BinOp binds less tightly with RHS than the operator after RHS, let
1945     // the pending operator take RHS as its LHS.
1946     MCBinaryExpr::Opcode Dummy;
1947     unsigned NextTokPrec = getBinOpPrecedence(Lexer.getKind(), Dummy);
1948     if (TokPrec < NextTokPrec && parseBinOpRHS(TokPrec + 1, RHS, EndLoc))
1949       return true;
1950 
1951     // Merge LHS and RHS according to operator.
1952     Res = MCBinaryExpr::create(Kind, Res, RHS, getContext(), StartLoc);
1953   }
1954 }
1955 
1956 /// ParseStatement:
1957 ///   ::= % statement
1958 ///   ::= EndOfStatement
1959 ///   ::= Label* Directive ...Operands... EndOfStatement
1960 ///   ::= Label* Identifier OperandList* EndOfStatement
parseStatement(ParseStatementInfo & Info,MCAsmParserSemaCallback * SI)1961 bool MasmParser::parseStatement(ParseStatementInfo &Info,
1962                                 MCAsmParserSemaCallback *SI) {
1963   assert(!hasPendingError() && "parseStatement started with pending error");
1964   // Eat initial spaces and comments.
1965   while (Lexer.is(AsmToken::Space))
1966     Lex();
1967   if (Lexer.is(AsmToken::EndOfStatement)) {
1968     // If this is a line comment we can drop it safely.
1969     if (getTok().getString().empty() || getTok().getString().front() == '\r' ||
1970         getTok().getString().front() == '\n')
1971       Out.AddBlankLine();
1972     Lex();
1973     return false;
1974   }
1975 
1976   // If preceded by an expansion operator, first expand all text macros and
1977   // macro functions.
1978   if (getTok().is(AsmToken::Percent)) {
1979     SMLoc ExpansionLoc = getTok().getLoc();
1980     if (parseToken(AsmToken::Percent) || expandStatement(ExpansionLoc))
1981       return true;
1982   }
1983 
1984   // Statements always start with an identifier, unless we're dealing with a
1985   // processor directive (.386, .686, etc.) that lexes as a real.
1986   AsmToken ID = getTok();
1987   SMLoc IDLoc = ID.getLoc();
1988   StringRef IDVal;
1989   int64_t LocalLabelVal = -1;
1990   if (Lexer.is(AsmToken::HashDirective))
1991     return parseCppHashLineFilenameComment(IDLoc);
1992   // Allow an integer followed by a ':' as a directional local label.
1993   if (Lexer.is(AsmToken::Integer)) {
1994     LocalLabelVal = getTok().getIntVal();
1995     if (LocalLabelVal < 0) {
1996       if (!TheCondState.Ignore) {
1997         Lex(); // always eat a token
1998         return Error(IDLoc, "unexpected token at start of statement");
1999       }
2000       IDVal = "";
2001     } else {
2002       IDVal = getTok().getString();
2003       Lex(); // Consume the integer token to be used as an identifier token.
2004       if (Lexer.getKind() != AsmToken::Colon) {
2005         if (!TheCondState.Ignore) {
2006           Lex(); // always eat a token
2007           return Error(IDLoc, "unexpected token at start of statement");
2008         }
2009       }
2010     }
2011   } else if (Lexer.is(AsmToken::Dot)) {
2012     // Treat '.' as a valid identifier in this context.
2013     Lex();
2014     IDVal = ".";
2015   } else if (Lexer.is(AsmToken::LCurly)) {
2016     // Treat '{' as a valid identifier in this context.
2017     Lex();
2018     IDVal = "{";
2019 
2020   } else if (Lexer.is(AsmToken::RCurly)) {
2021     // Treat '}' as a valid identifier in this context.
2022     Lex();
2023     IDVal = "}";
2024   } else if (Lexer.is(AsmToken::Star) &&
2025              getTargetParser().starIsStartOfStatement()) {
2026     // Accept '*' as a valid start of statement.
2027     Lex();
2028     IDVal = "*";
2029   } else if (Lexer.is(AsmToken::Real)) {
2030     // Treat ".<number>" as a valid identifier in this context.
2031     IDVal = getTok().getString();
2032     Lex(); // always eat a token
2033     if (!IDVal.startswith("."))
2034       return Error(IDLoc, "unexpected token at start of statement");
2035   } else if (Lexer.is(AsmToken::Identifier) &&
2036              getTok().getString().equals_lower("echo")) {
2037     // Intercept echo early to avoid lexical substitution in its message, and
2038     // delegate all handling to the appropriate function.
2039     return parseDirectiveEcho();
2040   } else if (parseIdentifier(IDVal)) {
2041     if (!TheCondState.Ignore) {
2042       Lex(); // always eat a token
2043       return Error(IDLoc, "unexpected token at start of statement");
2044     }
2045     IDVal = "";
2046   }
2047 
2048   // Handle conditional assembly here before checking for skipping.  We
2049   // have to do this so that .endif isn't skipped in a ".if 0" block for
2050   // example.
2051   StringMap<DirectiveKind>::const_iterator DirKindIt =
2052       DirectiveKindMap.find(IDVal.lower());
2053   DirectiveKind DirKind = (DirKindIt == DirectiveKindMap.end())
2054                               ? DK_NO_DIRECTIVE
2055                               : DirKindIt->getValue();
2056   switch (DirKind) {
2057   default:
2058     break;
2059   case DK_IF:
2060   case DK_IFE:
2061     return parseDirectiveIf(IDLoc, DirKind);
2062   case DK_IFB:
2063     return parseDirectiveIfb(IDLoc, true);
2064   case DK_IFNB:
2065     return parseDirectiveIfb(IDLoc, false);
2066   case DK_IFDEF:
2067     return parseDirectiveIfdef(IDLoc, true);
2068   case DK_IFNDEF:
2069     return parseDirectiveIfdef(IDLoc, false);
2070   case DK_IFDIF:
2071     return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/false,
2072                                /*CaseInsensitive=*/false);
2073   case DK_IFDIFI:
2074     return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/false,
2075                                /*CaseInsensitive=*/true);
2076   case DK_IFIDN:
2077     return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/true,
2078                                /*CaseInsensitive=*/false);
2079   case DK_IFIDNI:
2080     return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/true,
2081                                /*CaseInsensitive=*/true);
2082   case DK_ELSEIF:
2083   case DK_ELSEIFE:
2084     return parseDirectiveElseIf(IDLoc, DirKind);
2085   case DK_ELSEIFB:
2086     return parseDirectiveElseIfb(IDLoc, true);
2087   case DK_ELSEIFNB:
2088     return parseDirectiveElseIfb(IDLoc, false);
2089   case DK_ELSEIFDEF:
2090     return parseDirectiveElseIfdef(IDLoc, true);
2091   case DK_ELSEIFNDEF:
2092     return parseDirectiveElseIfdef(IDLoc, false);
2093   case DK_ELSEIFDIF:
2094     return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/false,
2095                                    /*CaseInsensitive=*/false);
2096   case DK_ELSEIFDIFI:
2097     return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/false,
2098                                    /*CaseInsensitive=*/true);
2099   case DK_ELSEIFIDN:
2100     return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/true,
2101                                    /*CaseInsensitive=*/false);
2102   case DK_ELSEIFIDNI:
2103     return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/true,
2104                                    /*CaseInsensitive=*/true);
2105   case DK_ELSE:
2106     return parseDirectiveElse(IDLoc);
2107   case DK_ENDIF:
2108     return parseDirectiveEndIf(IDLoc);
2109   }
2110 
2111   // Ignore the statement if in the middle of inactive conditional
2112   // (e.g. ".if 0").
2113   if (TheCondState.Ignore) {
2114     eatToEndOfStatement();
2115     return false;
2116   }
2117 
2118   // FIXME: Recurse on local labels?
2119 
2120   // See what kind of statement we have.
2121   switch (Lexer.getKind()) {
2122   case AsmToken::Colon: {
2123     if (!getTargetParser().isLabel(ID))
2124       break;
2125     if (checkForValidSection())
2126       return true;
2127 
2128     // identifier ':'   -> Label.
2129     Lex();
2130 
2131     // Diagnose attempt to use '.' as a label.
2132     if (IDVal == ".")
2133       return Error(IDLoc, "invalid use of pseudo-symbol '.' as a label");
2134 
2135     // Diagnose attempt to use a variable as a label.
2136     //
2137     // FIXME: Diagnostics. Note the location of the definition as a label.
2138     // FIXME: This doesn't diagnose assignment to a symbol which has been
2139     // implicitly marked as external.
2140     MCSymbol *Sym;
2141     if (LocalLabelVal == -1) {
2142       if (ParsingMSInlineAsm && SI) {
2143         StringRef RewrittenLabel =
2144             SI->LookupInlineAsmLabel(IDVal, getSourceManager(), IDLoc, true);
2145         assert(!RewrittenLabel.empty() &&
2146                "We should have an internal name here.");
2147         Info.AsmRewrites->emplace_back(AOK_Label, IDLoc, IDVal.size(),
2148                                        RewrittenLabel);
2149         IDVal = RewrittenLabel;
2150       }
2151       Sym = getContext().getOrCreateSymbol(IDVal);
2152     } else
2153       Sym = Ctx.createDirectionalLocalSymbol(LocalLabelVal);
2154     // End of Labels should be treated as end of line for lexing
2155     // purposes but that information is not available to the Lexer who
2156     // does not understand Labels. This may cause us to see a Hash
2157     // here instead of a preprocessor line comment.
2158     if (getTok().is(AsmToken::Hash)) {
2159       std::string CommentStr = parseStringTo(AsmToken::EndOfStatement);
2160       Lexer.Lex();
2161       Lexer.UnLex(AsmToken(AsmToken::EndOfStatement, CommentStr));
2162     }
2163 
2164     // Consume any end of statement token, if present, to avoid spurious
2165     // AddBlankLine calls().
2166     if (getTok().is(AsmToken::EndOfStatement)) {
2167       Lex();
2168     }
2169 
2170     getTargetParser().doBeforeLabelEmit(Sym);
2171 
2172     // Emit the label.
2173     if (!getTargetParser().isParsingMSInlineAsm())
2174       Out.emitLabel(Sym, IDLoc);
2175 
2176     // If we are generating dwarf for assembly source files then gather the
2177     // info to make a dwarf label entry for this label if needed.
2178     if (enabledGenDwarfForAssembly())
2179       MCGenDwarfLabelEntry::Make(Sym, &getStreamer(), getSourceManager(),
2180                                  IDLoc);
2181 
2182     getTargetParser().onLabelParsed(Sym);
2183 
2184     return false;
2185   }
2186 
2187   default: // Normal instruction or directive.
2188     break;
2189   }
2190 
2191   // If macros are enabled, check to see if this is a macro instantiation.
2192   if (const MCAsmMacro *M = getContext().lookupMacro(IDVal.lower())) {
2193     return handleMacroEntry(M, IDLoc);
2194   }
2195 
2196   // Otherwise, we have a normal instruction or directive.
2197 
2198   if (DirKind != DK_NO_DIRECTIVE) {
2199     // There are several entities interested in parsing directives:
2200     //
2201     // 1. Asm parser extensions. For example, platform-specific parsers
2202     //    (like the ELF parser) register themselves as extensions.
2203     // 2. The target-specific assembly parser. Some directives are target
2204     //    specific or may potentially behave differently on certain targets.
2205     // 3. The generic directive parser implemented by this class. These are
2206     //    all the directives that behave in a target and platform independent
2207     //    manner, or at least have a default behavior that's shared between
2208     //    all targets and platforms.
2209 
2210     getTargetParser().flushPendingInstructions(getStreamer());
2211 
2212     // Special-case handling of structure-end directives at higher priority,
2213     // since ENDS is overloaded as a segment-end directive.
2214     if (IDVal.equals_lower("ends") && StructInProgress.size() > 1 &&
2215         getTok().is(AsmToken::EndOfStatement)) {
2216       return parseDirectiveNestedEnds();
2217     }
2218 
2219     // First, check the extension directive map to see if any extension has
2220     // registered itself to parse this directive.
2221     std::pair<MCAsmParserExtension *, DirectiveHandler> Handler =
2222         ExtensionDirectiveMap.lookup(IDVal.lower());
2223     if (Handler.first)
2224       return (*Handler.second)(Handler.first, IDVal, IDLoc);
2225 
2226     // Next, let the target-specific assembly parser try.
2227     SMLoc StartTokLoc = getTok().getLoc();
2228     bool TPDirectiveReturn =
2229         ID.is(AsmToken::Identifier) && getTargetParser().ParseDirective(ID);
2230 
2231     if (hasPendingError())
2232       return true;
2233     // Currently the return value should be true if we are
2234     // uninterested but as this is at odds with the standard parsing
2235     // convention (return true = error) we have instances of a parsed
2236     // directive that fails returning true as an error. Catch these
2237     // cases as best as possible errors here.
2238     if (TPDirectiveReturn && StartTokLoc != getTok().getLoc())
2239       return true;
2240     // Return if we did some parsing or believe we succeeded.
2241     if (!TPDirectiveReturn || StartTokLoc != getTok().getLoc())
2242       return false;
2243 
2244     // Finally, if no one else is interested in this directive, it must be
2245     // generic and familiar to this class.
2246     switch (DirKind) {
2247     default:
2248       break;
2249     case DK_ASCII:
2250       return parseDirectiveAscii(IDVal, false);
2251     case DK_ASCIZ:
2252     case DK_STRING:
2253       return parseDirectiveAscii(IDVal, true);
2254     case DK_BYTE:
2255     case DK_SBYTE:
2256     case DK_DB:
2257       return parseDirectiveValue(IDVal, 1);
2258     case DK_WORD:
2259     case DK_SWORD:
2260     case DK_DW:
2261       return parseDirectiveValue(IDVal, 2);
2262     case DK_DWORD:
2263     case DK_SDWORD:
2264     case DK_DD:
2265       return parseDirectiveValue(IDVal, 4);
2266     case DK_FWORD:
2267     case DK_DF:
2268       return parseDirectiveValue(IDVal, 6);
2269     case DK_QWORD:
2270     case DK_SQWORD:
2271     case DK_DQ:
2272       return parseDirectiveValue(IDVal, 8);
2273     case DK_REAL4:
2274       return parseDirectiveRealValue(IDVal, APFloat::IEEEsingle(), 4);
2275     case DK_REAL8:
2276       return parseDirectiveRealValue(IDVal, APFloat::IEEEdouble(), 8);
2277     case DK_REAL10:
2278       return parseDirectiveRealValue(IDVal, APFloat::x87DoubleExtended(), 10);
2279     case DK_STRUCT:
2280     case DK_UNION:
2281       return parseDirectiveNestedStruct(IDVal, DirKind);
2282     case DK_ENDS:
2283       return parseDirectiveNestedEnds();
2284     case DK_ALIGN:
2285       return parseDirectiveAlign();
2286     case DK_ORG:
2287       return parseDirectiveOrg();
2288     case DK_EXTERN:
2289       eatToEndOfStatement(); // .extern is the default, ignore it.
2290       return false;
2291     case DK_PUBLIC:
2292       return parseDirectiveSymbolAttribute(MCSA_Global);
2293     case DK_COMM:
2294       return parseDirectiveComm(/*IsLocal=*/false);
2295     case DK_COMMENT:
2296       return parseDirectiveComment(IDLoc);
2297     case DK_INCLUDE:
2298       return parseDirectiveInclude();
2299     case DK_REPEAT:
2300       return parseDirectiveRepeat(IDLoc, IDVal);
2301     case DK_WHILE:
2302       return parseDirectiveWhile(IDLoc);
2303     case DK_FOR:
2304       return parseDirectiveFor(IDLoc, IDVal);
2305     case DK_FORC:
2306       return parseDirectiveForc(IDLoc, IDVal);
2307     case DK_FILE:
2308       return parseDirectiveFile(IDLoc);
2309     case DK_LINE:
2310       return parseDirectiveLine();
2311     case DK_LOC:
2312       return parseDirectiveLoc();
2313     case DK_STABS:
2314       return parseDirectiveStabs();
2315     case DK_CV_FILE:
2316       return parseDirectiveCVFile();
2317     case DK_CV_FUNC_ID:
2318       return parseDirectiveCVFuncId();
2319     case DK_CV_INLINE_SITE_ID:
2320       return parseDirectiveCVInlineSiteId();
2321     case DK_CV_LOC:
2322       return parseDirectiveCVLoc();
2323     case DK_CV_LINETABLE:
2324       return parseDirectiveCVLinetable();
2325     case DK_CV_INLINE_LINETABLE:
2326       return parseDirectiveCVInlineLinetable();
2327     case DK_CV_DEF_RANGE:
2328       return parseDirectiveCVDefRange();
2329     case DK_CV_STRING:
2330       return parseDirectiveCVString();
2331     case DK_CV_STRINGTABLE:
2332       return parseDirectiveCVStringTable();
2333     case DK_CV_FILECHECKSUMS:
2334       return parseDirectiveCVFileChecksums();
2335     case DK_CV_FILECHECKSUM_OFFSET:
2336       return parseDirectiveCVFileChecksumOffset();
2337     case DK_CV_FPO_DATA:
2338       return parseDirectiveCVFPOData();
2339     case DK_CFI_SECTIONS:
2340       return parseDirectiveCFISections();
2341     case DK_CFI_STARTPROC:
2342       return parseDirectiveCFIStartProc();
2343     case DK_CFI_ENDPROC:
2344       return parseDirectiveCFIEndProc();
2345     case DK_CFI_DEF_CFA:
2346       return parseDirectiveCFIDefCfa(IDLoc);
2347     case DK_CFI_DEF_CFA_OFFSET:
2348       return parseDirectiveCFIDefCfaOffset();
2349     case DK_CFI_ADJUST_CFA_OFFSET:
2350       return parseDirectiveCFIAdjustCfaOffset();
2351     case DK_CFI_DEF_CFA_REGISTER:
2352       return parseDirectiveCFIDefCfaRegister(IDLoc);
2353     case DK_CFI_OFFSET:
2354       return parseDirectiveCFIOffset(IDLoc);
2355     case DK_CFI_REL_OFFSET:
2356       return parseDirectiveCFIRelOffset(IDLoc);
2357     case DK_CFI_PERSONALITY:
2358       return parseDirectiveCFIPersonalityOrLsda(true);
2359     case DK_CFI_LSDA:
2360       return parseDirectiveCFIPersonalityOrLsda(false);
2361     case DK_CFI_REMEMBER_STATE:
2362       return parseDirectiveCFIRememberState();
2363     case DK_CFI_RESTORE_STATE:
2364       return parseDirectiveCFIRestoreState();
2365     case DK_CFI_SAME_VALUE:
2366       return parseDirectiveCFISameValue(IDLoc);
2367     case DK_CFI_RESTORE:
2368       return parseDirectiveCFIRestore(IDLoc);
2369     case DK_CFI_ESCAPE:
2370       return parseDirectiveCFIEscape();
2371     case DK_CFI_RETURN_COLUMN:
2372       return parseDirectiveCFIReturnColumn(IDLoc);
2373     case DK_CFI_SIGNAL_FRAME:
2374       return parseDirectiveCFISignalFrame();
2375     case DK_CFI_UNDEFINED:
2376       return parseDirectiveCFIUndefined(IDLoc);
2377     case DK_CFI_REGISTER:
2378       return parseDirectiveCFIRegister(IDLoc);
2379     case DK_CFI_WINDOW_SAVE:
2380       return parseDirectiveCFIWindowSave();
2381     case DK_EXITM:
2382       Info.ExitValue = "";
2383       return parseDirectiveExitMacro(IDLoc, IDVal, *Info.ExitValue);
2384     case DK_ENDM:
2385       Info.ExitValue = "";
2386       return parseDirectiveEndMacro(IDVal);
2387     case DK_PURGE:
2388       return parseDirectivePurgeMacro(IDLoc);
2389     case DK_END:
2390       return parseDirectiveEnd(IDLoc);
2391     case DK_ERR:
2392       return parseDirectiveError(IDLoc);
2393     case DK_ERRB:
2394       return parseDirectiveErrorIfb(IDLoc, true);
2395     case DK_ERRNB:
2396       return parseDirectiveErrorIfb(IDLoc, false);
2397     case DK_ERRDEF:
2398       return parseDirectiveErrorIfdef(IDLoc, true);
2399     case DK_ERRNDEF:
2400       return parseDirectiveErrorIfdef(IDLoc, false);
2401     case DK_ERRDIF:
2402       return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/false,
2403                                       /*CaseInsensitive=*/false);
2404     case DK_ERRDIFI:
2405       return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/false,
2406                                       /*CaseInsensitive=*/true);
2407     case DK_ERRIDN:
2408       return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/true,
2409                                       /*CaseInsensitive=*/false);
2410     case DK_ERRIDNI:
2411       return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/true,
2412                                       /*CaseInsensitive=*/true);
2413     case DK_ERRE:
2414       return parseDirectiveErrorIfe(IDLoc, true);
2415     case DK_ERRNZ:
2416       return parseDirectiveErrorIfe(IDLoc, false);
2417     case DK_RADIX:
2418       return parseDirectiveRadix(IDLoc);
2419     }
2420 
2421     return Error(IDLoc, "unknown directive");
2422   }
2423 
2424   // We also check if this is allocating memory with user-defined type.
2425   auto IDIt = Structs.find(IDVal.lower());
2426   if (IDIt != Structs.end())
2427     return parseDirectiveStructValue(/*Structure=*/IDIt->getValue(), IDVal,
2428                                      IDLoc);
2429 
2430   // Non-conditional Microsoft directives sometimes follow their first argument.
2431   const AsmToken nextTok = getTok();
2432   const StringRef nextVal = nextTok.getString();
2433   const SMLoc nextLoc = nextTok.getLoc();
2434 
2435   // There are several entities interested in parsing infix directives:
2436   //
2437   // 1. Asm parser extensions. For example, platform-specific parsers
2438   //    (like the ELF parser) register themselves as extensions.
2439   // 2. The generic directive parser implemented by this class. These are
2440   //    all the directives that behave in a target and platform independent
2441   //    manner, or at least have a default behavior that's shared between
2442   //    all targets and platforms.
2443 
2444   getTargetParser().flushPendingInstructions(getStreamer());
2445 
2446   // Special-case handling of structure-end directives at higher priority, since
2447   // ENDS is overloaded as a segment-end directive.
2448   if (nextVal.equals_lower("ends") && StructInProgress.size() == 1) {
2449     Lex();
2450     return parseDirectiveEnds(IDVal, IDLoc);
2451   }
2452 
2453   // First, check the extension directive map to see if any extension has
2454   // registered itself to parse this directive.
2455   std::pair<MCAsmParserExtension *, DirectiveHandler> Handler =
2456       ExtensionDirectiveMap.lookup(nextVal.lower());
2457   if (Handler.first) {
2458     Lex();
2459     Lexer.UnLex(ID);
2460     return (*Handler.second)(Handler.first, nextVal, nextLoc);
2461   }
2462 
2463   // If no one else is interested in this directive, it must be
2464   // generic and familiar to this class.
2465   DirKindIt = DirectiveKindMap.find(nextVal.lower());
2466   DirKind = (DirKindIt == DirectiveKindMap.end())
2467                 ? DK_NO_DIRECTIVE
2468                 : DirKindIt->getValue();
2469   switch (DirKind) {
2470   default:
2471     break;
2472   case DK_ASSIGN:
2473   case DK_EQU:
2474   case DK_TEXTEQU:
2475     Lex();
2476     return parseDirectiveEquate(nextVal, IDVal, DirKind);
2477   case DK_BYTE:
2478   case DK_SBYTE:
2479   case DK_DB:
2480     Lex();
2481     return parseDirectiveNamedValue(nextVal, 1, IDVal, IDLoc);
2482   case DK_WORD:
2483   case DK_SWORD:
2484   case DK_DW:
2485     Lex();
2486     return parseDirectiveNamedValue(nextVal, 2, IDVal, IDLoc);
2487   case DK_DWORD:
2488   case DK_SDWORD:
2489   case DK_DD:
2490     Lex();
2491     return parseDirectiveNamedValue(nextVal, 4, IDVal, IDLoc);
2492   case DK_FWORD:
2493   case DK_DF:
2494     Lex();
2495     return parseDirectiveNamedValue(nextVal, 6, IDVal, IDLoc);
2496   case DK_QWORD:
2497   case DK_SQWORD:
2498   case DK_DQ:
2499     Lex();
2500     return parseDirectiveNamedValue(nextVal, 8, IDVal, IDLoc);
2501   case DK_REAL4:
2502     Lex();
2503     return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEsingle(), 4,
2504                                         IDVal, IDLoc);
2505   case DK_REAL8:
2506     Lex();
2507     return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEdouble(), 8,
2508                                         IDVal, IDLoc);
2509   case DK_REAL10:
2510     Lex();
2511     return parseDirectiveNamedRealValue(nextVal, APFloat::x87DoubleExtended(),
2512                                         10, IDVal, IDLoc);
2513   case DK_STRUCT:
2514   case DK_UNION:
2515     Lex();
2516     return parseDirectiveStruct(nextVal, DirKind, IDVal, IDLoc);
2517   case DK_ENDS:
2518     Lex();
2519     return parseDirectiveEnds(IDVal, IDLoc);
2520   case DK_MACRO:
2521     Lex();
2522     return parseDirectiveMacro(IDVal, IDLoc);
2523   }
2524 
2525   // Finally, we check if this is allocating a variable with user-defined type.
2526   auto NextIt = Structs.find(nextVal.lower());
2527   if (NextIt != Structs.end()) {
2528     Lex();
2529     return parseDirectiveNamedStructValue(/*Structure=*/NextIt->getValue(),
2530                                           nextVal, nextLoc, IDVal);
2531   }
2532 
2533   // __asm _emit or __asm __emit
2534   if (ParsingMSInlineAsm && (IDVal == "_emit" || IDVal == "__emit" ||
2535                              IDVal == "_EMIT" || IDVal == "__EMIT"))
2536     return parseDirectiveMSEmit(IDLoc, Info, IDVal.size());
2537 
2538   // __asm align
2539   if (ParsingMSInlineAsm && (IDVal == "align" || IDVal == "ALIGN"))
2540     return parseDirectiveMSAlign(IDLoc, Info);
2541 
2542   if (ParsingMSInlineAsm && (IDVal == "even" || IDVal == "EVEN"))
2543     Info.AsmRewrites->emplace_back(AOK_EVEN, IDLoc, 4);
2544   if (checkForValidSection())
2545     return true;
2546 
2547   // Canonicalize the opcode to lower case.
2548   std::string OpcodeStr = IDVal.lower();
2549   ParseInstructionInfo IInfo(Info.AsmRewrites);
2550   bool ParseHadError = getTargetParser().ParseInstruction(IInfo, OpcodeStr, ID,
2551                                                           Info.ParsedOperands);
2552   Info.ParseError = ParseHadError;
2553 
2554   // Dump the parsed representation, if requested.
2555   if (getShowParsedOperands()) {
2556     SmallString<256> Str;
2557     raw_svector_ostream OS(Str);
2558     OS << "parsed instruction: [";
2559     for (unsigned i = 0; i != Info.ParsedOperands.size(); ++i) {
2560       if (i != 0)
2561         OS << ", ";
2562       Info.ParsedOperands[i]->print(OS);
2563     }
2564     OS << "]";
2565 
2566     printMessage(IDLoc, SourceMgr::DK_Note, OS.str());
2567   }
2568 
2569   // Fail even if ParseInstruction erroneously returns false.
2570   if (hasPendingError() || ParseHadError)
2571     return true;
2572 
2573   // If we are generating dwarf for the current section then generate a .loc
2574   // directive for the instruction.
2575   if (!ParseHadError && enabledGenDwarfForAssembly() &&
2576       getContext().getGenDwarfSectionSyms().count(
2577           getStreamer().getCurrentSectionOnly())) {
2578     unsigned Line;
2579     if (ActiveMacros.empty())
2580       Line = SrcMgr.FindLineNumber(IDLoc, CurBuffer);
2581     else
2582       Line = SrcMgr.FindLineNumber(ActiveMacros.front()->InstantiationLoc,
2583                                    ActiveMacros.front()->ExitBuffer);
2584 
2585     // If we previously parsed a cpp hash file line comment then make sure the
2586     // current Dwarf File is for the CppHashFilename if not then emit the
2587     // Dwarf File table for it and adjust the line number for the .loc.
2588     if (!CppHashInfo.Filename.empty()) {
2589       unsigned FileNumber = getStreamer().emitDwarfFileDirective(
2590           0, StringRef(), CppHashInfo.Filename);
2591       getContext().setGenDwarfFileNumber(FileNumber);
2592 
2593       unsigned CppHashLocLineNo =
2594         SrcMgr.FindLineNumber(CppHashInfo.Loc, CppHashInfo.Buf);
2595       Line = CppHashInfo.LineNumber - 1 + (Line - CppHashLocLineNo);
2596     }
2597 
2598     getStreamer().emitDwarfLocDirective(
2599         getContext().getGenDwarfFileNumber(), Line, 0,
2600         DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0, 0, 0,
2601         StringRef());
2602   }
2603 
2604   // If parsing succeeded, match the instruction.
2605   if (!ParseHadError) {
2606     uint64_t ErrorInfo;
2607     if (getTargetParser().MatchAndEmitInstruction(
2608             IDLoc, Info.Opcode, Info.ParsedOperands, Out, ErrorInfo,
2609             getTargetParser().isParsingMSInlineAsm()))
2610       return true;
2611   }
2612   return false;
2613 }
2614 
2615 // Parse and erase curly braces marking block start/end.
parseCurlyBlockScope(SmallVectorImpl<AsmRewrite> & AsmStrRewrites)2616 bool MasmParser::parseCurlyBlockScope(
2617     SmallVectorImpl<AsmRewrite> &AsmStrRewrites) {
2618   // Identify curly brace marking block start/end.
2619   if (Lexer.isNot(AsmToken::LCurly) && Lexer.isNot(AsmToken::RCurly))
2620     return false;
2621 
2622   SMLoc StartLoc = Lexer.getLoc();
2623   Lex(); // Eat the brace.
2624   if (Lexer.is(AsmToken::EndOfStatement))
2625     Lex(); // Eat EndOfStatement following the brace.
2626 
2627   // Erase the block start/end brace from the output asm string.
2628   AsmStrRewrites.emplace_back(AOK_Skip, StartLoc, Lexer.getLoc().getPointer() -
2629                                                   StartLoc.getPointer());
2630   return true;
2631 }
2632 
2633 /// parseCppHashLineFilenameComment as this:
2634 ///   ::= # number "filename"
parseCppHashLineFilenameComment(SMLoc L)2635 bool MasmParser::parseCppHashLineFilenameComment(SMLoc L) {
2636   Lex(); // Eat the hash token.
2637   // Lexer only ever emits HashDirective if it fully formed if it's
2638   // done the checking already so this is an internal error.
2639   assert(getTok().is(AsmToken::Integer) &&
2640          "Lexing Cpp line comment: Expected Integer");
2641   int64_t LineNumber = getTok().getIntVal();
2642   Lex();
2643   assert(getTok().is(AsmToken::String) &&
2644          "Lexing Cpp line comment: Expected String");
2645   StringRef Filename = getTok().getString();
2646   Lex();
2647 
2648   // Get rid of the enclosing quotes.
2649   Filename = Filename.substr(1, Filename.size() - 2);
2650 
2651   // Save the SMLoc, Filename and LineNumber for later use by diagnostics
2652   // and possibly DWARF file info.
2653   CppHashInfo.Loc = L;
2654   CppHashInfo.Filename = Filename;
2655   CppHashInfo.LineNumber = LineNumber;
2656   CppHashInfo.Buf = CurBuffer;
2657   if (FirstCppHashFilename.empty())
2658     FirstCppHashFilename = Filename;
2659   return false;
2660 }
2661 
2662 /// will use the last parsed cpp hash line filename comment
2663 /// for the Filename and LineNo if any in the diagnostic.
DiagHandler(const SMDiagnostic & Diag,void * Context)2664 void MasmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) {
2665   const MasmParser *Parser = static_cast<const MasmParser *>(Context);
2666   raw_ostream &OS = errs();
2667 
2668   const SourceMgr &DiagSrcMgr = *Diag.getSourceMgr();
2669   SMLoc DiagLoc = Diag.getLoc();
2670   unsigned DiagBuf = DiagSrcMgr.FindBufferContainingLoc(DiagLoc);
2671   unsigned CppHashBuf =
2672       Parser->SrcMgr.FindBufferContainingLoc(Parser->CppHashInfo.Loc);
2673 
2674   // Like SourceMgr::printMessage() we need to print the include stack if any
2675   // before printing the message.
2676   unsigned DiagCurBuffer = DiagSrcMgr.FindBufferContainingLoc(DiagLoc);
2677   if (!Parser->SavedDiagHandler && DiagCurBuffer &&
2678       DiagCurBuffer != DiagSrcMgr.getMainFileID()) {
2679     SMLoc ParentIncludeLoc = DiagSrcMgr.getParentIncludeLoc(DiagCurBuffer);
2680     DiagSrcMgr.PrintIncludeStack(ParentIncludeLoc, OS);
2681   }
2682 
2683   // If we have not parsed a cpp hash line filename comment or the source
2684   // manager changed or buffer changed (like in a nested include) then just
2685   // print the normal diagnostic using its Filename and LineNo.
2686   if (!Parser->CppHashInfo.LineNumber || &DiagSrcMgr != &Parser->SrcMgr ||
2687       DiagBuf != CppHashBuf) {
2688     if (Parser->SavedDiagHandler)
2689       Parser->SavedDiagHandler(Diag, Parser->SavedDiagContext);
2690     else
2691       Diag.print(nullptr, OS);
2692     return;
2693   }
2694 
2695   // Use the CppHashFilename and calculate a line number based on the
2696   // CppHashInfo.Loc and CppHashInfo.LineNumber relative to this Diag's SMLoc
2697   // for the diagnostic.
2698   const std::string &Filename = std::string(Parser->CppHashInfo.Filename);
2699 
2700   int DiagLocLineNo = DiagSrcMgr.FindLineNumber(DiagLoc, DiagBuf);
2701   int CppHashLocLineNo =
2702       Parser->SrcMgr.FindLineNumber(Parser->CppHashInfo.Loc, CppHashBuf);
2703   int LineNo =
2704       Parser->CppHashInfo.LineNumber - 1 + (DiagLocLineNo - CppHashLocLineNo);
2705 
2706   SMDiagnostic NewDiag(*Diag.getSourceMgr(), Diag.getLoc(), Filename, LineNo,
2707                        Diag.getColumnNo(), Diag.getKind(), Diag.getMessage(),
2708                        Diag.getLineContents(), Diag.getRanges());
2709 
2710   if (Parser->SavedDiagHandler)
2711     Parser->SavedDiagHandler(NewDiag, Parser->SavedDiagContext);
2712   else
2713     NewDiag.print(nullptr, OS);
2714 }
2715 
2716 // This is similar to the IsIdentifierChar function in AsmLexer.cpp, but does
2717 // not accept '.'.
isMacroParameterChar(char C)2718 static bool isMacroParameterChar(char C) {
2719   return isAlnum(C) || C == '_' || C == '$' || C == '@' || C == '?';
2720 }
2721 
expandMacro(raw_svector_ostream & OS,StringRef Body,ArrayRef<MCAsmMacroParameter> Parameters,ArrayRef<MCAsmMacroArgument> A,const std::vector<std::string> & Locals,SMLoc L)2722 bool MasmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
2723                              ArrayRef<MCAsmMacroParameter> Parameters,
2724                              ArrayRef<MCAsmMacroArgument> A,
2725                              const std::vector<std::string> &Locals, SMLoc L) {
2726   unsigned NParameters = Parameters.size();
2727   if (NParameters != A.size())
2728     return Error(L, "Wrong number of arguments");
2729   StringMap<std::string> LocalSymbols;
2730   std::string Name;
2731   Name.reserve(6);
2732   for (StringRef Local : Locals) {
2733     raw_string_ostream LocalName(Name);
2734     LocalName << "??"
2735               << format_hex_no_prefix(LocalCounter++, 4, /*Upper=*/true);
2736     LocalSymbols.insert({Local, LocalName.str()});
2737     Name.clear();
2738   }
2739 
2740   Optional<char> CurrentQuote;
2741   while (!Body.empty()) {
2742     // Scan for the next substitution.
2743     std::size_t End = Body.size(), Pos = 0;
2744     std::size_t IdentifierPos = End;
2745     for (; Pos != End; ++Pos) {
2746       // Find the next possible macro parameter, including preceding a '&'
2747       // inside quotes.
2748       if (Body[Pos] == '&')
2749         break;
2750       if (isMacroParameterChar(Body[Pos])) {
2751         if (!CurrentQuote.hasValue())
2752           break;
2753         if (IdentifierPos == End)
2754           IdentifierPos = Pos;
2755       } else {
2756         IdentifierPos = End;
2757       }
2758 
2759       // Track quotation status
2760       if (!CurrentQuote.hasValue()) {
2761         if (Body[Pos] == '\'' || Body[Pos] == '"')
2762           CurrentQuote = Body[Pos];
2763       } else if (Body[Pos] == CurrentQuote) {
2764         if (Pos + 1 != End && Body[Pos + 1] == CurrentQuote) {
2765           // Escaped quote, and quotes aren't identifier chars; skip
2766           ++Pos;
2767           continue;
2768         } else {
2769           CurrentQuote.reset();
2770         }
2771       }
2772     }
2773     if (IdentifierPos != End) {
2774       // We've recognized an identifier before an apostrophe inside quotes;
2775       // check once to see if we can expand it.
2776       Pos = IdentifierPos;
2777       IdentifierPos = End;
2778     }
2779 
2780     // Add the prefix.
2781     OS << Body.slice(0, Pos);
2782 
2783     // Check if we reached the end.
2784     if (Pos == End)
2785       break;
2786 
2787     unsigned I = Pos;
2788     bool InitialAmpersand = (Body[I] == '&');
2789     if (InitialAmpersand) {
2790       ++I;
2791       ++Pos;
2792     }
2793     while (I < End && isMacroParameterChar(Body[I]))
2794       ++I;
2795 
2796     const char *Begin = Body.data() + Pos;
2797     StringRef Argument(Begin, I - Pos);
2798     unsigned Index = 0;
2799 
2800     for (; Index < NParameters; ++Index)
2801       if (Parameters[Index].Name == Argument)
2802         break;
2803 
2804     if (Index == NParameters) {
2805       if (InitialAmpersand)
2806         OS << '&';
2807       auto it = LocalSymbols.find(Argument.lower());
2808       if (it != LocalSymbols.end())
2809         OS << it->second;
2810       else
2811         OS << Argument;
2812       Pos = I;
2813     } else {
2814       for (const AsmToken &Token : A[Index]) {
2815         // In MASM, you can write '%expr'.
2816         // The prefix '%' evaluates the expression 'expr'
2817         // and uses the result as a string (e.g. replace %(1+2) with the
2818         // string "3").
2819         // Here, we identify the integer token which is the result of the
2820         // absolute expression evaluation and replace it with its string
2821         // representation.
2822         if (Token.getString().front() == '%' && Token.is(AsmToken::Integer))
2823           // Emit an integer value to the buffer.
2824           OS << Token.getIntVal();
2825         else
2826           OS << Token.getString();
2827       }
2828 
2829       Pos += Argument.size();
2830       if (Pos < End && Body[Pos] == '&') {
2831         ++Pos;
2832       }
2833     }
2834     // Update the scan point.
2835     Body = Body.substr(Pos);
2836   }
2837 
2838   return false;
2839 }
2840 
isOperator(AsmToken::TokenKind kind)2841 static bool isOperator(AsmToken::TokenKind kind) {
2842   switch (kind) {
2843   default:
2844     return false;
2845   case AsmToken::Plus:
2846   case AsmToken::Minus:
2847   case AsmToken::Tilde:
2848   case AsmToken::Slash:
2849   case AsmToken::Star:
2850   case AsmToken::Dot:
2851   case AsmToken::Equal:
2852   case AsmToken::EqualEqual:
2853   case AsmToken::Pipe:
2854   case AsmToken::PipePipe:
2855   case AsmToken::Caret:
2856   case AsmToken::Amp:
2857   case AsmToken::AmpAmp:
2858   case AsmToken::Exclaim:
2859   case AsmToken::ExclaimEqual:
2860   case AsmToken::Less:
2861   case AsmToken::LessEqual:
2862   case AsmToken::LessLess:
2863   case AsmToken::LessGreater:
2864   case AsmToken::Greater:
2865   case AsmToken::GreaterEqual:
2866   case AsmToken::GreaterGreater:
2867     return true;
2868   }
2869 }
2870 
2871 namespace {
2872 
2873 class AsmLexerSkipSpaceRAII {
2874 public:
AsmLexerSkipSpaceRAII(AsmLexer & Lexer,bool SkipSpace)2875   AsmLexerSkipSpaceRAII(AsmLexer &Lexer, bool SkipSpace) : Lexer(Lexer) {
2876     Lexer.setSkipSpace(SkipSpace);
2877   }
2878 
~AsmLexerSkipSpaceRAII()2879   ~AsmLexerSkipSpaceRAII() {
2880     Lexer.setSkipSpace(true);
2881   }
2882 
2883 private:
2884   AsmLexer &Lexer;
2885 };
2886 
2887 } // end anonymous namespace
2888 
parseMacroArgument(const MCAsmMacroParameter * MP,MCAsmMacroArgument & MA,AsmToken::TokenKind EndTok)2889 bool MasmParser::parseMacroArgument(const MCAsmMacroParameter *MP,
2890                                     MCAsmMacroArgument &MA,
2891                                     AsmToken::TokenKind EndTok) {
2892   if (MP && MP->Vararg) {
2893     if (Lexer.isNot(EndTok)) {
2894       SmallVector<StringRef, 1> Str = parseStringRefsTo(EndTok);
2895       for (StringRef S : Str) {
2896         MA.emplace_back(AsmToken::String, S);
2897       }
2898     }
2899     return false;
2900   }
2901 
2902   SMLoc StrLoc = Lexer.getLoc(), EndLoc;
2903   if (Lexer.is(AsmToken::Less) && isAngleBracketString(StrLoc, EndLoc)) {
2904     const char *StrChar = StrLoc.getPointer() + 1;
2905     const char *EndChar = EndLoc.getPointer() - 1;
2906     jumpToLoc(EndLoc, CurBuffer, EndStatementAtEOFStack.back());
2907     /// Eat from '<' to '>'.
2908     Lex();
2909     MA.emplace_back(AsmToken::String, StringRef(StrChar, EndChar - StrChar));
2910     return false;
2911   }
2912 
2913   unsigned ParenLevel = 0;
2914 
2915   // Darwin doesn't use spaces to delmit arguments.
2916   AsmLexerSkipSpaceRAII ScopedSkipSpace(Lexer, IsDarwin);
2917 
2918   bool SpaceEaten;
2919 
2920   while (true) {
2921     SpaceEaten = false;
2922     if (Lexer.is(AsmToken::Eof) || Lexer.is(AsmToken::Equal))
2923       return TokError("unexpected token");
2924 
2925     if (ParenLevel == 0) {
2926       if (Lexer.is(AsmToken::Comma))
2927         break;
2928 
2929       if (Lexer.is(AsmToken::Space)) {
2930         SpaceEaten = true;
2931         Lex(); // Eat spaces.
2932       }
2933 
2934       // Spaces can delimit parameters, but could also be part an expression.
2935       // If the token after a space is an operator, add the token and the next
2936       // one into this argument
2937       if (!IsDarwin) {
2938         if (isOperator(Lexer.getKind()) && Lexer.isNot(EndTok)) {
2939           MA.push_back(getTok());
2940           Lex();
2941 
2942           // Whitespace after an operator can be ignored.
2943           if (Lexer.is(AsmToken::Space))
2944             Lex();
2945 
2946           continue;
2947         }
2948       }
2949       if (SpaceEaten)
2950         break;
2951     }
2952 
2953     // handleMacroEntry relies on not advancing the lexer here
2954     // to be able to fill in the remaining default parameter values
2955     if (Lexer.is(EndTok) && (EndTok != AsmToken::RParen || ParenLevel == 0))
2956       break;
2957 
2958     // Adjust the current parentheses level.
2959     if (Lexer.is(AsmToken::LParen))
2960       ++ParenLevel;
2961     else if (Lexer.is(AsmToken::RParen) && ParenLevel)
2962       --ParenLevel;
2963 
2964     // Append the token to the current argument list.
2965     MA.push_back(getTok());
2966     Lex();
2967   }
2968 
2969   if (ParenLevel != 0)
2970     return TokError("unbalanced parentheses in argument");
2971 
2972   if (MA.empty() && MP) {
2973     if (MP->Required) {
2974       return TokError("missing value for required parameter '" + MP->Name +
2975                       "'");
2976     } else {
2977       MA = MP->Value;
2978     }
2979   }
2980   return false;
2981 }
2982 
2983 // Parse the macro instantiation arguments.
parseMacroArguments(const MCAsmMacro * M,MCAsmMacroArguments & A,AsmToken::TokenKind EndTok)2984 bool MasmParser::parseMacroArguments(const MCAsmMacro *M,
2985                                      MCAsmMacroArguments &A,
2986                                      AsmToken::TokenKind EndTok) {
2987   const unsigned NParameters = M ? M->Parameters.size() : 0;
2988   bool NamedParametersFound = false;
2989   SmallVector<SMLoc, 4> FALocs;
2990 
2991   A.resize(NParameters);
2992   FALocs.resize(NParameters);
2993 
2994   // Parse two kinds of macro invocations:
2995   // - macros defined without any parameters accept an arbitrary number of them
2996   // - macros defined with parameters accept at most that many of them
2997   for (unsigned Parameter = 0; !NParameters || Parameter < NParameters;
2998        ++Parameter) {
2999     SMLoc IDLoc = Lexer.getLoc();
3000     MCAsmMacroParameter FA;
3001 
3002     if (Lexer.is(AsmToken::Identifier) && Lexer.peekTok().is(AsmToken::Equal)) {
3003       if (parseIdentifier(FA.Name))
3004         return Error(IDLoc, "invalid argument identifier for formal argument");
3005 
3006       if (Lexer.isNot(AsmToken::Equal))
3007         return TokError("expected '=' after formal parameter identifier");
3008 
3009       Lex();
3010 
3011       NamedParametersFound = true;
3012     }
3013 
3014     if (NamedParametersFound && FA.Name.empty())
3015       return Error(IDLoc, "cannot mix positional and keyword arguments");
3016 
3017     unsigned PI = Parameter;
3018     if (!FA.Name.empty()) {
3019       assert(M && "expected macro to be defined");
3020       unsigned FAI = 0;
3021       for (FAI = 0; FAI < NParameters; ++FAI)
3022         if (M->Parameters[FAI].Name == FA.Name)
3023           break;
3024 
3025       if (FAI >= NParameters) {
3026         return Error(IDLoc, "parameter named '" + FA.Name +
3027                                 "' does not exist for macro '" + M->Name + "'");
3028       }
3029       PI = FAI;
3030     }
3031     const MCAsmMacroParameter *MP = nullptr;
3032     if (M && PI < NParameters)
3033       MP = &M->Parameters[PI];
3034 
3035     SMLoc StrLoc = Lexer.getLoc();
3036     SMLoc EndLoc;
3037     if (Lexer.is(AsmToken::Percent)) {
3038       const MCExpr *AbsoluteExp;
3039       int64_t Value;
3040       /// Eat '%'.
3041       Lex();
3042       if (parseExpression(AbsoluteExp, EndLoc))
3043         return false;
3044       if (!AbsoluteExp->evaluateAsAbsolute(Value,
3045                                            getStreamer().getAssemblerPtr()))
3046         return Error(StrLoc, "expected absolute expression");
3047       const char *StrChar = StrLoc.getPointer();
3048       const char *EndChar = EndLoc.getPointer();
3049       AsmToken newToken(AsmToken::Integer,
3050                         StringRef(StrChar, EndChar - StrChar), Value);
3051       FA.Value.push_back(newToken);
3052     } else if (parseMacroArgument(MP, FA.Value, EndTok)) {
3053       if (M)
3054         return addErrorSuffix(" in '" + M->Name + "' macro");
3055       else
3056         return true;
3057     }
3058 
3059     if (!FA.Value.empty()) {
3060       if (A.size() <= PI)
3061         A.resize(PI + 1);
3062       A[PI] = FA.Value;
3063 
3064       if (FALocs.size() <= PI)
3065         FALocs.resize(PI + 1);
3066 
3067       FALocs[PI] = Lexer.getLoc();
3068     }
3069 
3070     // At the end of the statement, fill in remaining arguments that have
3071     // default values. If there aren't any, then the next argument is
3072     // required but missing
3073     if (Lexer.is(EndTok)) {
3074       bool Failure = false;
3075       for (unsigned FAI = 0; FAI < NParameters; ++FAI) {
3076         if (A[FAI].empty()) {
3077           if (M->Parameters[FAI].Required) {
3078             Error(FALocs[FAI].isValid() ? FALocs[FAI] : Lexer.getLoc(),
3079                   "missing value for required parameter "
3080                   "'" +
3081                       M->Parameters[FAI].Name + "' in macro '" + M->Name + "'");
3082             Failure = true;
3083           }
3084 
3085           if (!M->Parameters[FAI].Value.empty())
3086             A[FAI] = M->Parameters[FAI].Value;
3087         }
3088       }
3089       return Failure;
3090     }
3091 
3092     if (Lexer.is(AsmToken::Comma))
3093       Lex();
3094   }
3095 
3096   return TokError("too many positional arguments");
3097 }
3098 
handleMacroEntry(const MCAsmMacro * M,SMLoc NameLoc,AsmToken::TokenKind ArgumentEndTok)3099 bool MasmParser::handleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc,
3100                                   AsmToken::TokenKind ArgumentEndTok) {
3101   // Arbitrarily limit macro nesting depth (default matches 'as'). We can
3102   // eliminate this, although we should protect against infinite loops.
3103   unsigned MaxNestingDepth = AsmMacroMaxNestingDepth;
3104   if (ActiveMacros.size() == MaxNestingDepth) {
3105     std::ostringstream MaxNestingDepthError;
3106     MaxNestingDepthError << "macros cannot be nested more than "
3107                          << MaxNestingDepth << " levels deep."
3108                          << " Use -asm-macro-max-nesting-depth to increase "
3109                             "this limit.";
3110     return TokError(MaxNestingDepthError.str());
3111   }
3112 
3113   MCAsmMacroArguments A;
3114   if (parseMacroArguments(M, A, ArgumentEndTok))
3115     return true;
3116 
3117   // Macro instantiation is lexical, unfortunately. We construct a new buffer
3118   // to hold the macro body with substitutions.
3119   SmallString<256> Buf;
3120   StringRef Body = M->Body;
3121   raw_svector_ostream OS(Buf);
3122 
3123   if (expandMacro(OS, Body, M->Parameters, A, M->Locals, getTok().getLoc()))
3124     return true;
3125 
3126   // We include the endm in the buffer as our cue to exit the macro
3127   // instantiation.
3128   OS << "endm\n";
3129 
3130   std::unique_ptr<MemoryBuffer> Instantiation =
3131       MemoryBuffer::getMemBufferCopy(OS.str(), "<instantiation>");
3132 
3133   // Create the macro instantiation object and add to the current macro
3134   // instantiation stack.
3135   MacroInstantiation *MI = new MacroInstantiation{
3136       NameLoc, CurBuffer, getTok().getLoc(), TheCondStack.size()};
3137   ActiveMacros.push_back(MI);
3138 
3139   ++NumOfMacroInstantiations;
3140 
3141   // Jump to the macro instantiation and prime the lexer.
3142   CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Instantiation), SMLoc());
3143   Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
3144   EndStatementAtEOFStack.push_back(true);
3145   Lex();
3146 
3147   return false;
3148 }
3149 
handleMacroExit()3150 void MasmParser::handleMacroExit() {
3151   // Jump to the token we should return to, and consume it.
3152   EndStatementAtEOFStack.pop_back();
3153   jumpToLoc(ActiveMacros.back()->ExitLoc, ActiveMacros.back()->ExitBuffer,
3154             EndStatementAtEOFStack.back());
3155   Lex();
3156 
3157   // Pop the instantiation entry.
3158   delete ActiveMacros.back();
3159   ActiveMacros.pop_back();
3160 }
3161 
handleMacroInvocation(const MCAsmMacro * M,SMLoc NameLoc)3162 bool MasmParser::handleMacroInvocation(const MCAsmMacro *M, SMLoc NameLoc) {
3163   if (!M->IsFunction)
3164     return Error(NameLoc, "cannot invoke macro procedure as function");
3165 
3166   if (parseToken(AsmToken::LParen, "invoking macro function '" + M->Name +
3167                                        "' requires arguments in parentheses") ||
3168       handleMacroEntry(M, NameLoc, AsmToken::RParen))
3169     return true;
3170 
3171   // Parse all statements in the macro, retrieving the exit value when it ends.
3172   std::string ExitValue;
3173   SmallVector<AsmRewrite, 4> AsmStrRewrites;
3174   while (Lexer.isNot(AsmToken::Eof)) {
3175     ParseStatementInfo Info(&AsmStrRewrites);
3176     bool Parsed = parseStatement(Info, nullptr);
3177 
3178     if (!Parsed && Info.ExitValue.hasValue()) {
3179       ExitValue = std::move(*Info.ExitValue);
3180       break;
3181     }
3182 
3183     // If we have a Lexer Error we are on an Error Token. Load in Lexer Error
3184     // for printing ErrMsg via Lex() only if no (presumably better) parser error
3185     // exists.
3186     if (Parsed && !hasPendingError() && Lexer.getTok().is(AsmToken::Error)) {
3187       Lex();
3188     }
3189 
3190     // parseStatement returned true so may need to emit an error.
3191     printPendingErrors();
3192 
3193     // Skipping to the next line if needed.
3194     if (Parsed && !getLexer().isAtStartOfStatement())
3195       eatToEndOfStatement();
3196   }
3197 
3198   // Consume the right-parenthesis on the other side of the arguments.
3199   if (parseToken(AsmToken::RParen, "invoking macro function '" + M->Name +
3200                                        "' requires arguments in parentheses"))
3201     return true;
3202 
3203   // Exit values may require lexing, unfortunately. We construct a new buffer to
3204   // hold the exit value.
3205   std::unique_ptr<MemoryBuffer> MacroValue =
3206       MemoryBuffer::getMemBufferCopy(ExitValue, "<macro-value>");
3207 
3208   // Jump from this location to the instantiated exit value, and prime the
3209   // lexer.
3210   CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(MacroValue), Lexer.getLoc());
3211   Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), nullptr,
3212                   /*EndStatementAtEOF=*/false);
3213   EndStatementAtEOFStack.push_back(false);
3214   Lex();
3215 
3216   return false;
3217 }
3218 
3219 /// parseIdentifier:
3220 ///   ::= identifier
3221 ///   ::= string
parseIdentifier(StringRef & Res)3222 bool MasmParser::parseIdentifier(StringRef &Res) {
3223   // The assembler has relaxed rules for accepting identifiers, in particular we
3224   // allow things like '.globl $foo' and '.def @feat.00', which would normally
3225   // be separate tokens. At this level, we have already lexed so we cannot
3226   // (currently) handle this as a context dependent token, instead we detect
3227   // adjacent tokens and return the combined identifier.
3228   if (Lexer.is(AsmToken::Dollar) || Lexer.is(AsmToken::At)) {
3229     SMLoc PrefixLoc = getLexer().getLoc();
3230 
3231     // Consume the prefix character, and check for a following identifier.
3232 
3233     AsmToken Buf[1];
3234     Lexer.peekTokens(Buf, false);
3235 
3236     if (Buf[0].isNot(AsmToken::Identifier))
3237       return true;
3238 
3239     // We have a '$' or '@' followed by an identifier, make sure they are adjacent.
3240     if (PrefixLoc.getPointer() + 1 != Buf[0].getLoc().getPointer())
3241       return true;
3242 
3243     // eat $ or @
3244     Lexer.Lex(); // Lexer's Lex guarantees consecutive token.
3245     // Construct the joined identifier and consume the token.
3246     Res =
3247         StringRef(PrefixLoc.getPointer(), getTok().getIdentifier().size() + 1);
3248     Lex(); // Parser Lex to maintain invariants.
3249     return false;
3250   }
3251 
3252   if (Lexer.isNot(AsmToken::Identifier) && Lexer.isNot(AsmToken::String))
3253     return true;
3254 
3255   Res = getTok().getIdentifier();
3256 
3257   Lex(); // Consume the identifier token.
3258 
3259   return false;
3260 }
3261 
3262 /// parseDirectiveEquate:
3263 ///  ::= name "=" expression
3264 ///    | name "equ" expression    (not redefinable)
3265 ///    | name "equ" text-list
3266 ///    | name "textequ" text-list
parseDirectiveEquate(StringRef IDVal,StringRef Name,DirectiveKind DirKind)3267 bool MasmParser::parseDirectiveEquate(StringRef IDVal, StringRef Name,
3268                                       DirectiveKind DirKind) {
3269   Variable &Var = Variables[Name];
3270   if (Var.Name.empty()) {
3271     Var.Name = Name;
3272   } else if (!Var.Redefinable) {
3273     return TokError("invalid variable redefinition");
3274   }
3275   Var.Redefinable = (DirKind != DK_EQU);
3276 
3277   if (DirKind == DK_EQU || DirKind == DK_TEXTEQU) {
3278     // "equ" and "textequ" both allow text expressions.
3279     std::string Value;
3280     if (!parseTextItem(Value)) {
3281       Var.IsText = true;
3282       Var.TextValue = Value;
3283 
3284       // Accept a text-list, not just one text-item.
3285       auto parseItem = [&]() -> bool {
3286         if (parseTextItem(Value))
3287           return TokError("expected text item");
3288         Var.TextValue += Value;
3289         return false;
3290       };
3291       if (parseOptionalToken(AsmToken::Comma) && parseMany(parseItem))
3292         return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3293 
3294       return false;
3295     }
3296   }
3297   if (DirKind == DK_TEXTEQU)
3298     return TokError("expected <text> in '" + Twine(IDVal) + "' directive");
3299 
3300   // Parse as expression assignment.
3301   const MCExpr *Expr;
3302   SMLoc EndLoc, StartLoc = Lexer.getLoc();
3303   if (parseExpression(Expr, EndLoc))
3304     return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3305   MCSymbol *Sym = getContext().getOrCreateSymbol(Var.Name);
3306   Sym->setRedefinable(Var.Redefinable);
3307   Sym->setVariableValue(Expr);
3308   Sym->setExternal(false);
3309 
3310   if (Expr->evaluateAsAbsolute(Var.NumericValue,
3311                                getStreamer().getAssemblerPtr()))
3312     return false;
3313 
3314   // Not an absolute expression; define as a text replacement.
3315   Var.IsText = true;
3316   Var.TextValue = StringRef(StartLoc.getPointer(),
3317                             EndLoc.getPointer() - StartLoc.getPointer()).str();
3318   return false;
3319 }
3320 
parseEscapedString(std::string & Data)3321 bool MasmParser::parseEscapedString(std::string &Data) {
3322   if (check(getTok().isNot(AsmToken::String), "expected string"))
3323     return true;
3324 
3325   Data = "";
3326   char Quote = getTok().getString().front();
3327   StringRef Str = getTok().getStringContents();
3328   Data.reserve(Str.size());
3329   for (size_t i = 0, e = Str.size(); i != e; ++i) {
3330     Data.push_back(Str[i]);
3331     if (Str[i] == Quote) {
3332       // MASM treats doubled delimiting quotes as an escaped delimiting quote.
3333       // If we're escaping the string's trailing delimiter, we're definitely
3334       // missing a quotation mark.
3335       if (i + 1 == Str.size())
3336         return Error(getTok().getLoc(), "missing quotation mark in string");
3337       if (Str[i + 1] == Quote)
3338         ++i;
3339     }
3340   }
3341 
3342   Lex();
3343   return false;
3344 }
3345 
parseAngleBracketString(std::string & Data)3346 bool MasmParser::parseAngleBracketString(std::string &Data) {
3347   SMLoc EndLoc, StartLoc = getTok().getLoc();
3348   if (isAngleBracketString(StartLoc, EndLoc)) {
3349     const char *StartChar = StartLoc.getPointer() + 1;
3350     const char *EndChar = EndLoc.getPointer() - 1;
3351     jumpToLoc(EndLoc, CurBuffer, EndStatementAtEOFStack.back());
3352     // Eat from '<' to '>'.
3353     Lex();
3354 
3355     Data = angleBracketString(StringRef(StartChar, EndChar - StartChar));
3356     return false;
3357   }
3358   return true;
3359 }
3360 
3361 /// textItem ::= textLiteral | textMacroID | % constExpr
parseTextItem(std::string & Data)3362 bool MasmParser::parseTextItem(std::string &Data) {
3363   switch (getTok().getKind()) {
3364   default:
3365     return true;
3366   case AsmToken::Percent: {
3367     int64_t Res;
3368     if (parseToken(AsmToken::Percent) || parseAbsoluteExpression(Res))
3369       return true;
3370     Data = std::to_string(Res);
3371     return false;
3372   }
3373   case AsmToken::Less:
3374   case AsmToken::LessEqual:
3375   case AsmToken::LessLess:
3376   case AsmToken::LessGreater:
3377     return parseAngleBracketString(Data);
3378   case AsmToken::Identifier: {
3379     StringRef ID;
3380     if (parseIdentifier(ID))
3381       return true;
3382     Data = ID.str();
3383 
3384     auto it = Variables.find(ID);
3385     if (it == Variables.end())
3386       return true;
3387 
3388     while (it != Variables.end()) {
3389       const Variable &Var = it->second;
3390       if (!Var.IsText)
3391         return true;
3392       Data = Var.TextValue;
3393       it = Variables.find(Data);
3394     }
3395     return false;
3396   }
3397   }
3398   llvm_unreachable("unhandled token kind");
3399 }
3400 
3401 /// parseDirectiveAscii:
3402 ///   ::= ( .ascii | .asciz | .string ) [ "string" ( , "string" )* ]
parseDirectiveAscii(StringRef IDVal,bool ZeroTerminated)3403 bool MasmParser::parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) {
3404   auto parseOp = [&]() -> bool {
3405     std::string Data;
3406     if (checkForValidSection() || parseEscapedString(Data))
3407       return true;
3408     getStreamer().emitBytes(Data);
3409     if (ZeroTerminated)
3410       getStreamer().emitBytes(StringRef("\0", 1));
3411     return false;
3412   };
3413 
3414   if (parseMany(parseOp))
3415     return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3416   return false;
3417 }
3418 
emitIntValue(const MCExpr * Value,unsigned Size)3419 bool MasmParser::emitIntValue(const MCExpr *Value, unsigned Size) {
3420   // Special case constant expressions to match code generator.
3421   if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
3422     assert(Size <= 8 && "Invalid size");
3423     int64_t IntValue = MCE->getValue();
3424     if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
3425       return Error(MCE->getLoc(), "out of range literal value");
3426     getStreamer().emitIntValue(IntValue, Size);
3427   } else {
3428     const MCSymbolRefExpr *MSE = dyn_cast<MCSymbolRefExpr>(Value);
3429     if (MSE && MSE->getSymbol().getName() == "?") {
3430       // ? initializer; treat as 0.
3431       getStreamer().emitIntValue(0, Size);
3432     } else {
3433       getStreamer().emitValue(Value, Size, Value->getLoc());
3434     }
3435   }
3436   return false;
3437 }
3438 
parseScalarInitializer(unsigned Size,SmallVectorImpl<const MCExpr * > & Values,unsigned StringPadLength)3439 bool MasmParser::parseScalarInitializer(unsigned Size,
3440                                         SmallVectorImpl<const MCExpr *> &Values,
3441                                         unsigned StringPadLength) {
3442   if (Size == 1 && getTok().is(AsmToken::String)) {
3443     std::string Value;
3444     if (parseEscapedString(Value))
3445       return true;
3446     // Treat each character as an initializer.
3447     for (const unsigned char CharVal : Value)
3448       Values.push_back(MCConstantExpr::create(CharVal, getContext()));
3449 
3450     // Pad the string with spaces to the specified length.
3451     for (size_t i = Value.size(); i < StringPadLength; ++i)
3452       Values.push_back(MCConstantExpr::create(' ', getContext()));
3453   } else {
3454     const MCExpr *Value;
3455     if (parseExpression(Value))
3456       return true;
3457     if (getTok().is(AsmToken::Identifier) &&
3458         getTok().getString().equals_lower("dup")) {
3459       Lex(); // Eat 'dup'.
3460       const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
3461       if (!MCE)
3462         return Error(Value->getLoc(),
3463                      "cannot repeat value a non-constant number of times");
3464       const int64_t Repetitions = MCE->getValue();
3465       if (Repetitions < 0)
3466         return Error(Value->getLoc(),
3467                      "cannot repeat value a negative number of times");
3468 
3469       SmallVector<const MCExpr *, 1> DuplicatedValues;
3470       if (parseToken(AsmToken::LParen,
3471                      "parentheses required for 'dup' contents") ||
3472           parseScalarInstList(Size, DuplicatedValues) ||
3473           parseToken(AsmToken::RParen, "unmatched parentheses"))
3474         return true;
3475 
3476       for (int i = 0; i < Repetitions; ++i)
3477         Values.append(DuplicatedValues.begin(), DuplicatedValues.end());
3478     } else {
3479       Values.push_back(Value);
3480     }
3481   }
3482   return false;
3483 }
3484 
parseScalarInstList(unsigned Size,SmallVectorImpl<const MCExpr * > & Values,const AsmToken::TokenKind EndToken)3485 bool MasmParser::parseScalarInstList(unsigned Size,
3486                                      SmallVectorImpl<const MCExpr *> &Values,
3487                                      const AsmToken::TokenKind EndToken) {
3488   while (getTok().isNot(EndToken) &&
3489          (EndToken != AsmToken::Greater ||
3490           getTok().isNot(AsmToken::GreaterGreater))) {
3491     parseScalarInitializer(Size, Values);
3492 
3493     // If we see a comma, continue, and allow line continuation.
3494     if (!parseOptionalToken(AsmToken::Comma))
3495       break;
3496     parseOptionalToken(AsmToken::EndOfStatement);
3497   }
3498   return false;
3499 }
3500 
emitIntegralValues(unsigned Size,unsigned * Count)3501 bool MasmParser::emitIntegralValues(unsigned Size, unsigned *Count) {
3502   SmallVector<const MCExpr *, 1> Values;
3503   if (checkForValidSection() || parseScalarInstList(Size, Values))
3504     return true;
3505 
3506   for (auto Value : Values) {
3507     emitIntValue(Value, Size);
3508   }
3509   if (Count)
3510     *Count = Values.size();
3511   return false;
3512 }
3513 
3514 // Add a field to the current structure.
addIntegralField(StringRef Name,unsigned Size)3515 bool MasmParser::addIntegralField(StringRef Name, unsigned Size) {
3516   StructInfo &Struct = StructInProgress.back();
3517   FieldInfo &Field = Struct.addField(Name, FT_INTEGRAL, Size);
3518   IntFieldInfo &IntInfo = Field.Contents.IntInfo;
3519 
3520   Field.Type = Size;
3521 
3522   if (parseScalarInstList(Size, IntInfo.Values))
3523     return true;
3524 
3525   Field.SizeOf = Field.Type * IntInfo.Values.size();
3526   Field.LengthOf = IntInfo.Values.size();
3527   if (Struct.IsUnion)
3528     Struct.Size = std::max(Struct.Size, Field.SizeOf);
3529   else
3530     Struct.Size += Field.SizeOf;
3531   return false;
3532 }
3533 
3534 /// parseDirectiveValue
3535 ///  ::= (byte | word | ... ) [ expression (, expression)* ]
parseDirectiveValue(StringRef IDVal,unsigned Size)3536 bool MasmParser::parseDirectiveValue(StringRef IDVal, unsigned Size) {
3537   if (StructInProgress.empty()) {
3538     // Initialize data value.
3539     if (emitIntegralValues(Size))
3540       return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3541   } else if (addIntegralField("", Size)) {
3542     return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3543   }
3544 
3545   return false;
3546 }
3547 
3548 /// parseDirectiveNamedValue
3549 ///  ::= name (byte | word | ... ) [ expression (, expression)* ]
parseDirectiveNamedValue(StringRef TypeName,unsigned Size,StringRef Name,SMLoc NameLoc)3550 bool MasmParser::parseDirectiveNamedValue(StringRef TypeName, unsigned Size,
3551                                           StringRef Name, SMLoc NameLoc) {
3552   if (StructInProgress.empty()) {
3553     // Initialize named data value.
3554     MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
3555     getStreamer().emitLabel(Sym);
3556     unsigned Count;
3557     if (emitIntegralValues(Size, &Count))
3558       return addErrorSuffix(" in '" + Twine(TypeName) + "' directive");
3559 
3560     AsmTypeInfo Type;
3561     Type.Name = TypeName;
3562     Type.Size = Size * Count;
3563     Type.ElementSize = Size;
3564     Type.Length = Count;
3565     KnownType[Name.lower()] = Type;
3566   } else if (addIntegralField(Name, Size)) {
3567     return addErrorSuffix(" in '" + Twine(TypeName) + "' directive");
3568   }
3569 
3570   return false;
3571 }
3572 
parseHexOcta(MasmParser & Asm,uint64_t & hi,uint64_t & lo)3573 static bool parseHexOcta(MasmParser &Asm, uint64_t &hi, uint64_t &lo) {
3574   if (Asm.getTok().isNot(AsmToken::Integer) &&
3575       Asm.getTok().isNot(AsmToken::BigNum))
3576     return Asm.TokError("unknown token in expression");
3577   SMLoc ExprLoc = Asm.getTok().getLoc();
3578   APInt IntValue = Asm.getTok().getAPIntVal();
3579   Asm.Lex();
3580   if (!IntValue.isIntN(128))
3581     return Asm.Error(ExprLoc, "out of range literal value");
3582   if (!IntValue.isIntN(64)) {
3583     hi = IntValue.getHiBits(IntValue.getBitWidth() - 64).getZExtValue();
3584     lo = IntValue.getLoBits(64).getZExtValue();
3585   } else {
3586     hi = 0;
3587     lo = IntValue.getZExtValue();
3588   }
3589   return false;
3590 }
3591 
parseRealValue(const fltSemantics & Semantics,APInt & Res)3592 bool MasmParser::parseRealValue(const fltSemantics &Semantics, APInt &Res) {
3593   // We don't truly support arithmetic on floating point expressions, so we
3594   // have to manually parse unary prefixes.
3595   bool IsNeg = false;
3596   SMLoc SignLoc;
3597   if (getLexer().is(AsmToken::Minus)) {
3598     SignLoc = getLexer().getLoc();
3599     Lexer.Lex();
3600     IsNeg = true;
3601   } else if (getLexer().is(AsmToken::Plus)) {
3602     SignLoc = getLexer().getLoc();
3603     Lexer.Lex();
3604   }
3605 
3606   if (Lexer.is(AsmToken::Error))
3607     return TokError(Lexer.getErr());
3608   if (Lexer.isNot(AsmToken::Integer) && Lexer.isNot(AsmToken::Real) &&
3609       Lexer.isNot(AsmToken::Identifier))
3610     return TokError("unexpected token in directive");
3611 
3612   // Convert to an APFloat.
3613   APFloat Value(Semantics);
3614   StringRef IDVal = getTok().getString();
3615   if (getLexer().is(AsmToken::Identifier)) {
3616     if (IDVal.equals_lower("infinity") || IDVal.equals_lower("inf"))
3617       Value = APFloat::getInf(Semantics);
3618     else if (IDVal.equals_lower("nan"))
3619       Value = APFloat::getNaN(Semantics, false, ~0);
3620     else if (IDVal.equals_lower("?"))
3621       Value = APFloat::getZero(Semantics);
3622     else
3623       return TokError("invalid floating point literal");
3624   } else if (IDVal.consume_back("r") || IDVal.consume_back("R")) {
3625     // MASM hexadecimal floating-point literal; no APFloat conversion needed.
3626     // To match ML64.exe, ignore the initial sign.
3627     unsigned SizeInBits = Value.getSizeInBits(Semantics);
3628     if (SizeInBits != (IDVal.size() << 2))
3629       return TokError("invalid floating point literal");
3630 
3631     // Consume the numeric token.
3632     Lex();
3633 
3634     Res = APInt(SizeInBits, IDVal, 16);
3635     if (SignLoc.isValid())
3636       return Warning(SignLoc, "MASM-style hex floats ignore explicit sign");
3637     return false;
3638   } else if (errorToBool(
3639                  Value.convertFromString(IDVal, APFloat::rmNearestTiesToEven)
3640                      .takeError())) {
3641     return TokError("invalid floating point literal");
3642   }
3643   if (IsNeg)
3644     Value.changeSign();
3645 
3646   // Consume the numeric token.
3647   Lex();
3648 
3649   Res = Value.bitcastToAPInt();
3650 
3651   return false;
3652 }
3653 
parseRealInstList(const fltSemantics & Semantics,SmallVectorImpl<APInt> & ValuesAsInt,const AsmToken::TokenKind EndToken)3654 bool MasmParser::parseRealInstList(const fltSemantics &Semantics,
3655                                    SmallVectorImpl<APInt> &ValuesAsInt,
3656                                    const AsmToken::TokenKind EndToken) {
3657   while (getTok().isNot(EndToken) ||
3658          (EndToken == AsmToken::Greater &&
3659           getTok().isNot(AsmToken::GreaterGreater))) {
3660     const AsmToken NextTok = Lexer.peekTok();
3661     if (NextTok.is(AsmToken::Identifier) &&
3662         NextTok.getString().equals_lower("dup")) {
3663       const MCExpr *Value;
3664       if (parseExpression(Value) || parseToken(AsmToken::Identifier))
3665         return true;
3666       const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
3667       if (!MCE)
3668         return Error(Value->getLoc(),
3669                      "cannot repeat value a non-constant number of times");
3670       const int64_t Repetitions = MCE->getValue();
3671       if (Repetitions < 0)
3672         return Error(Value->getLoc(),
3673                      "cannot repeat value a negative number of times");
3674 
3675       SmallVector<APInt, 1> DuplicatedValues;
3676       if (parseToken(AsmToken::LParen,
3677                      "parentheses required for 'dup' contents") ||
3678           parseRealInstList(Semantics, DuplicatedValues) ||
3679           parseToken(AsmToken::RParen, "unmatched parentheses"))
3680         return true;
3681 
3682       for (int i = 0; i < Repetitions; ++i)
3683         ValuesAsInt.append(DuplicatedValues.begin(), DuplicatedValues.end());
3684     } else {
3685       APInt AsInt;
3686       if (parseRealValue(Semantics, AsInt))
3687         return true;
3688       ValuesAsInt.push_back(AsInt);
3689     }
3690 
3691     // Continue if we see a comma. (Also, allow line continuation.)
3692     if (!parseOptionalToken(AsmToken::Comma))
3693       break;
3694     parseOptionalToken(AsmToken::EndOfStatement);
3695   }
3696 
3697   return false;
3698 }
3699 
3700 // Initialize real data values.
emitRealValues(const fltSemantics & Semantics,unsigned * Count)3701 bool MasmParser::emitRealValues(const fltSemantics &Semantics,
3702                                 unsigned *Count) {
3703   if (checkForValidSection())
3704     return true;
3705 
3706   SmallVector<APInt, 1> ValuesAsInt;
3707   if (parseRealInstList(Semantics, ValuesAsInt))
3708     return true;
3709 
3710   for (const APInt &AsInt : ValuesAsInt) {
3711     getStreamer().emitIntValue(AsInt);
3712   }
3713   if (Count)
3714     *Count = ValuesAsInt.size();
3715   return false;
3716 }
3717 
3718 // Add a real field to the current struct.
addRealField(StringRef Name,const fltSemantics & Semantics,size_t Size)3719 bool MasmParser::addRealField(StringRef Name, const fltSemantics &Semantics,
3720                               size_t Size) {
3721   StructInfo &Struct = StructInProgress.back();
3722   FieldInfo &Field = Struct.addField(Name, FT_REAL, Size);
3723   RealFieldInfo &RealInfo = Field.Contents.RealInfo;
3724 
3725   Field.SizeOf = 0;
3726 
3727   if (parseRealInstList(Semantics, RealInfo.AsIntValues))
3728     return true;
3729 
3730   Field.Type = RealInfo.AsIntValues.back().getBitWidth() / 8;
3731   Field.LengthOf = RealInfo.AsIntValues.size();
3732   Field.SizeOf = Field.Type * Field.LengthOf;
3733   if (Struct.IsUnion)
3734     Struct.Size = std::max(Struct.Size, Field.SizeOf);
3735   else
3736     Struct.Size += Field.SizeOf;
3737   return false;
3738 }
3739 
3740 /// parseDirectiveRealValue
3741 ///  ::= (real4 | real8 | real10) [ expression (, expression)* ]
parseDirectiveRealValue(StringRef IDVal,const fltSemantics & Semantics,size_t Size)3742 bool MasmParser::parseDirectiveRealValue(StringRef IDVal,
3743                                          const fltSemantics &Semantics,
3744                                          size_t Size) {
3745   if (StructInProgress.empty()) {
3746     // Initialize data value.
3747     if (emitRealValues(Semantics))
3748       return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3749   } else if (addRealField("", Semantics, Size)) {
3750     return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3751   }
3752   return false;
3753 }
3754 
3755 /// parseDirectiveNamedRealValue
3756 ///  ::= name (real4 | real8 | real10) [ expression (, expression)* ]
parseDirectiveNamedRealValue(StringRef TypeName,const fltSemantics & Semantics,unsigned Size,StringRef Name,SMLoc NameLoc)3757 bool MasmParser::parseDirectiveNamedRealValue(StringRef TypeName,
3758                                               const fltSemantics &Semantics,
3759                                               unsigned Size, StringRef Name,
3760                                               SMLoc NameLoc) {
3761   if (StructInProgress.empty()) {
3762     // Initialize named data value.
3763     MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
3764     getStreamer().emitLabel(Sym);
3765     unsigned Count;
3766     if (emitRealValues(Semantics, &Count))
3767       return addErrorSuffix(" in '" + TypeName + "' directive");
3768 
3769     AsmTypeInfo Type;
3770     Type.Name = TypeName;
3771     Type.Size = Size * Count;
3772     Type.ElementSize = Size;
3773     Type.Length = Count;
3774     KnownType[Name.lower()] = Type;
3775   } else if (addRealField(Name, Semantics, Size)) {
3776     return addErrorSuffix(" in '" + TypeName + "' directive");
3777   }
3778   return false;
3779 }
3780 
parseOptionalAngleBracketOpen()3781 bool MasmParser::parseOptionalAngleBracketOpen() {
3782   const AsmToken Tok = getTok();
3783   if (parseOptionalToken(AsmToken::LessLess)) {
3784     AngleBracketDepth++;
3785     Lexer.UnLex(AsmToken(AsmToken::Less, Tok.getString().substr(1)));
3786     return true;
3787   } else if (parseOptionalToken(AsmToken::LessGreater)) {
3788     AngleBracketDepth++;
3789     Lexer.UnLex(AsmToken(AsmToken::Greater, Tok.getString().substr(1)));
3790     return true;
3791   } else if (parseOptionalToken(AsmToken::Less)) {
3792     AngleBracketDepth++;
3793     return true;
3794   }
3795 
3796   return false;
3797 }
3798 
parseAngleBracketClose(const Twine & Msg)3799 bool MasmParser::parseAngleBracketClose(const Twine &Msg) {
3800   const AsmToken Tok = getTok();
3801   if (parseOptionalToken(AsmToken::GreaterGreater)) {
3802     Lexer.UnLex(AsmToken(AsmToken::Greater, Tok.getString().substr(1)));
3803   } else if (parseToken(AsmToken::Greater, Msg)) {
3804     return true;
3805   }
3806   AngleBracketDepth--;
3807   return false;
3808 }
3809 
parseFieldInitializer(const FieldInfo & Field,const IntFieldInfo & Contents,FieldInitializer & Initializer)3810 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
3811                                        const IntFieldInfo &Contents,
3812                                        FieldInitializer &Initializer) {
3813   SMLoc Loc = getTok().getLoc();
3814 
3815   SmallVector<const MCExpr *, 1> Values;
3816   if (parseOptionalToken(AsmToken::LCurly)) {
3817     if (Field.LengthOf == 1 && Field.Type > 1)
3818       return Error(Loc, "Cannot initialize scalar field with array value");
3819     if (parseScalarInstList(Field.Type, Values, AsmToken::RCurly) ||
3820         parseToken(AsmToken::RCurly))
3821       return true;
3822   } else if (parseOptionalAngleBracketOpen()) {
3823     if (Field.LengthOf == 1 && Field.Type > 1)
3824       return Error(Loc, "Cannot initialize scalar field with array value");
3825     if (parseScalarInstList(Field.Type, Values, AsmToken::Greater) ||
3826         parseAngleBracketClose())
3827       return true;
3828   } else if (Field.LengthOf > 1 && Field.Type > 1) {
3829     return Error(Loc, "Cannot initialize array field with scalar value");
3830   } else if (parseScalarInitializer(Field.Type, Values,
3831                                     /*StringPadLength=*/Field.LengthOf)) {
3832     return true;
3833   }
3834 
3835   if (Values.size() > Field.LengthOf) {
3836     return Error(Loc, "Initializer too long for field; expected at most " +
3837                           std::to_string(Field.LengthOf) + " elements, got " +
3838                           std::to_string(Values.size()));
3839   }
3840   // Default-initialize all remaining values.
3841   Values.append(Contents.Values.begin() + Values.size(), Contents.Values.end());
3842 
3843   Initializer = FieldInitializer(std::move(Values));
3844   return false;
3845 }
3846 
parseFieldInitializer(const FieldInfo & Field,const RealFieldInfo & Contents,FieldInitializer & Initializer)3847 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
3848                                        const RealFieldInfo &Contents,
3849                                        FieldInitializer &Initializer) {
3850   const fltSemantics *Semantics;
3851   switch (Field.Type) {
3852   case 4:
3853     Semantics = &APFloat::IEEEsingle();
3854     break;
3855   case 8:
3856     Semantics = &APFloat::IEEEdouble();
3857     break;
3858   case 10:
3859     Semantics = &APFloat::x87DoubleExtended();
3860     break;
3861   default:
3862     llvm_unreachable("unknown real field type");
3863   }
3864 
3865   SMLoc Loc = getTok().getLoc();
3866 
3867   SmallVector<APInt, 1> AsIntValues;
3868   if (parseOptionalToken(AsmToken::LCurly)) {
3869     if (Field.LengthOf == 1)
3870       return Error(Loc, "Cannot initialize scalar field with array value");
3871     if (parseRealInstList(*Semantics, AsIntValues, AsmToken::RCurly) ||
3872         parseToken(AsmToken::RCurly))
3873       return true;
3874   } else if (parseOptionalAngleBracketOpen()) {
3875     if (Field.LengthOf == 1)
3876       return Error(Loc, "Cannot initialize scalar field with array value");
3877     if (parseRealInstList(*Semantics, AsIntValues, AsmToken::Greater) ||
3878         parseAngleBracketClose())
3879       return true;
3880   } else if (Field.LengthOf > 1) {
3881     return Error(Loc, "Cannot initialize array field with scalar value");
3882   } else {
3883     AsIntValues.emplace_back();
3884     if (parseRealValue(*Semantics, AsIntValues.back()))
3885       return true;
3886   }
3887 
3888   if (AsIntValues.size() > Field.LengthOf) {
3889     return Error(Loc, "Initializer too long for field; expected at most " +
3890                           std::to_string(Field.LengthOf) + " elements, got " +
3891                           std::to_string(AsIntValues.size()));
3892   }
3893   // Default-initialize all remaining values.
3894   AsIntValues.append(Contents.AsIntValues.begin() + AsIntValues.size(),
3895                      Contents.AsIntValues.end());
3896 
3897   Initializer = FieldInitializer(std::move(AsIntValues));
3898   return false;
3899 }
3900 
parseFieldInitializer(const FieldInfo & Field,const StructFieldInfo & Contents,FieldInitializer & Initializer)3901 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
3902                                        const StructFieldInfo &Contents,
3903                                        FieldInitializer &Initializer) {
3904   SMLoc Loc = getTok().getLoc();
3905 
3906   std::vector<StructInitializer> Initializers;
3907   if (Field.LengthOf > 1) {
3908     if (parseOptionalToken(AsmToken::LCurly)) {
3909       if (parseStructInstList(Contents.Structure, Initializers,
3910                               AsmToken::RCurly) ||
3911           parseToken(AsmToken::RCurly))
3912         return true;
3913     } else if (parseOptionalAngleBracketOpen()) {
3914       if (parseStructInstList(Contents.Structure, Initializers,
3915                               AsmToken::Greater) ||
3916           parseAngleBracketClose())
3917         return true;
3918     } else {
3919       return Error(Loc, "Cannot initialize array field with scalar value");
3920     }
3921   } else {
3922     Initializers.emplace_back();
3923     if (parseStructInitializer(Contents.Structure, Initializers.back()))
3924       return true;
3925   }
3926 
3927   if (Initializers.size() > Field.LengthOf) {
3928     return Error(Loc, "Initializer too long for field; expected at most " +
3929                           std::to_string(Field.LengthOf) + " elements, got " +
3930                           std::to_string(Initializers.size()));
3931   }
3932   // Default-initialize all remaining values.
3933   Initializers.insert(Initializers.end(),
3934                       Contents.Initializers.begin() + Initializers.size(),
3935                       Contents.Initializers.end());
3936 
3937   Initializer = FieldInitializer(std::move(Initializers), Contents.Structure);
3938   return false;
3939 }
3940 
parseFieldInitializer(const FieldInfo & Field,FieldInitializer & Initializer)3941 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
3942                                        FieldInitializer &Initializer) {
3943   switch (Field.Contents.FT) {
3944   case FT_INTEGRAL:
3945     return parseFieldInitializer(Field, Field.Contents.IntInfo, Initializer);
3946   case FT_REAL:
3947     return parseFieldInitializer(Field, Field.Contents.RealInfo, Initializer);
3948   case FT_STRUCT:
3949     return parseFieldInitializer(Field, Field.Contents.StructInfo, Initializer);
3950   }
3951   llvm_unreachable("Unhandled FieldType enum");
3952 }
3953 
parseStructInitializer(const StructInfo & Structure,StructInitializer & Initializer)3954 bool MasmParser::parseStructInitializer(const StructInfo &Structure,
3955                                         StructInitializer &Initializer) {
3956   const AsmToken FirstToken = getTok();
3957 
3958   Optional<AsmToken::TokenKind> EndToken;
3959   if (parseOptionalToken(AsmToken::LCurly)) {
3960     EndToken = AsmToken::RCurly;
3961   } else if (parseOptionalAngleBracketOpen()) {
3962     EndToken = AsmToken::Greater;
3963     AngleBracketDepth++;
3964   } else if (FirstToken.is(AsmToken::Identifier) &&
3965              FirstToken.getString() == "?") {
3966     // ? initializer; leave EndToken uninitialized to treat as empty.
3967     if (parseToken(AsmToken::Identifier))
3968       return true;
3969   } else {
3970     return Error(FirstToken.getLoc(), "Expected struct initializer");
3971   }
3972 
3973   auto &FieldInitializers = Initializer.FieldInitializers;
3974   size_t FieldIndex = 0;
3975   if (EndToken.hasValue()) {
3976     // Initialize all fields with given initializers.
3977     while (getTok().isNot(EndToken.getValue()) &&
3978            FieldIndex < Structure.Fields.size()) {
3979       const FieldInfo &Field = Structure.Fields[FieldIndex++];
3980       if (parseOptionalToken(AsmToken::Comma)) {
3981         // Empty initializer; use the default and continue. (Also, allow line
3982         // continuation.)
3983         FieldInitializers.push_back(Field.Contents);
3984         parseOptionalToken(AsmToken::EndOfStatement);
3985         continue;
3986       }
3987       FieldInitializers.emplace_back(Field.Contents.FT);
3988       if (parseFieldInitializer(Field, FieldInitializers.back()))
3989         return true;
3990 
3991       // Continue if we see a comma. (Also, allow line continuation.)
3992       SMLoc CommaLoc = getTok().getLoc();
3993       if (!parseOptionalToken(AsmToken::Comma))
3994         break;
3995       if (FieldIndex == Structure.Fields.size())
3996         return Error(CommaLoc, "'" + Structure.Name +
3997                                    "' initializer initializes too many fields");
3998       parseOptionalToken(AsmToken::EndOfStatement);
3999     }
4000   }
4001   // Default-initialize all remaining fields.
4002   for (auto It = Structure.Fields.begin() + FieldIndex;
4003        It != Structure.Fields.end(); ++It) {
4004     const FieldInfo &Field = *It;
4005     FieldInitializers.push_back(Field.Contents);
4006   }
4007 
4008   if (EndToken.hasValue()) {
4009     if (EndToken.getValue() == AsmToken::Greater)
4010       return parseAngleBracketClose();
4011 
4012     return parseToken(EndToken.getValue());
4013   }
4014 
4015   return false;
4016 }
4017 
parseStructInstList(const StructInfo & Structure,std::vector<StructInitializer> & Initializers,const AsmToken::TokenKind EndToken)4018 bool MasmParser::parseStructInstList(
4019     const StructInfo &Structure, std::vector<StructInitializer> &Initializers,
4020     const AsmToken::TokenKind EndToken) {
4021   while (getTok().isNot(EndToken) ||
4022          (EndToken == AsmToken::Greater &&
4023           getTok().isNot(AsmToken::GreaterGreater))) {
4024     const AsmToken NextTok = Lexer.peekTok();
4025     if (NextTok.is(AsmToken::Identifier) &&
4026         NextTok.getString().equals_lower("dup")) {
4027       const MCExpr *Value;
4028       if (parseExpression(Value) || parseToken(AsmToken::Identifier))
4029         return true;
4030       const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
4031       if (!MCE)
4032         return Error(Value->getLoc(),
4033                      "cannot repeat value a non-constant number of times");
4034       const int64_t Repetitions = MCE->getValue();
4035       if (Repetitions < 0)
4036         return Error(Value->getLoc(),
4037                      "cannot repeat value a negative number of times");
4038 
4039       std::vector<StructInitializer> DuplicatedValues;
4040       if (parseToken(AsmToken::LParen,
4041                      "parentheses required for 'dup' contents") ||
4042           parseStructInstList(Structure, DuplicatedValues) ||
4043           parseToken(AsmToken::RParen, "unmatched parentheses"))
4044         return true;
4045 
4046       for (int i = 0; i < Repetitions; ++i)
4047         Initializers.insert(Initializers.end(), DuplicatedValues.begin(),
4048                             DuplicatedValues.end());
4049     } else {
4050       Initializers.emplace_back();
4051       if (parseStructInitializer(Structure, Initializers.back()))
4052         return true;
4053     }
4054 
4055     // Continue if we see a comma. (Also, allow line continuation.)
4056     if (!parseOptionalToken(AsmToken::Comma))
4057       break;
4058     parseOptionalToken(AsmToken::EndOfStatement);
4059   }
4060 
4061   return false;
4062 }
4063 
emitFieldValue(const FieldInfo & Field,const IntFieldInfo & Contents)4064 bool MasmParser::emitFieldValue(const FieldInfo &Field,
4065                                 const IntFieldInfo &Contents) {
4066   // Default-initialize all values.
4067   for (const MCExpr *Value : Contents.Values) {
4068     if (emitIntValue(Value, Field.Type))
4069       return true;
4070   }
4071   return false;
4072 }
4073 
emitFieldValue(const FieldInfo & Field,const RealFieldInfo & Contents)4074 bool MasmParser::emitFieldValue(const FieldInfo &Field,
4075                                 const RealFieldInfo &Contents) {
4076   for (const APInt &AsInt : Contents.AsIntValues) {
4077     getStreamer().emitIntValue(AsInt.getLimitedValue(),
4078                                AsInt.getBitWidth() / 8);
4079   }
4080   return false;
4081 }
4082 
emitFieldValue(const FieldInfo & Field,const StructFieldInfo & Contents)4083 bool MasmParser::emitFieldValue(const FieldInfo &Field,
4084                                 const StructFieldInfo &Contents) {
4085   for (const auto &Initializer : Contents.Initializers) {
4086     size_t Index = 0, Offset = 0;
4087     for (const auto &SubField : Contents.Structure.Fields) {
4088       getStreamer().emitZeros(SubField.Offset - Offset);
4089       Offset = SubField.Offset + SubField.SizeOf;
4090       emitFieldInitializer(SubField, Initializer.FieldInitializers[Index++]);
4091     }
4092   }
4093   return false;
4094 }
4095 
emitFieldValue(const FieldInfo & Field)4096 bool MasmParser::emitFieldValue(const FieldInfo &Field) {
4097   switch (Field.Contents.FT) {
4098   case FT_INTEGRAL:
4099     return emitFieldValue(Field, Field.Contents.IntInfo);
4100   case FT_REAL:
4101     return emitFieldValue(Field, Field.Contents.RealInfo);
4102   case FT_STRUCT:
4103     return emitFieldValue(Field, Field.Contents.StructInfo);
4104   }
4105   llvm_unreachable("Unhandled FieldType enum");
4106 }
4107 
emitFieldInitializer(const FieldInfo & Field,const IntFieldInfo & Contents,const IntFieldInfo & Initializer)4108 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4109                                       const IntFieldInfo &Contents,
4110                                       const IntFieldInfo &Initializer) {
4111   for (const auto &Value : Initializer.Values) {
4112     if (emitIntValue(Value, Field.Type))
4113       return true;
4114   }
4115   // Default-initialize all remaining values.
4116   for (auto it = Contents.Values.begin() + Initializer.Values.size();
4117        it != Contents.Values.end(); ++it) {
4118     const auto &Value = *it;
4119     if (emitIntValue(Value, Field.Type))
4120       return true;
4121   }
4122   return false;
4123 }
4124 
emitFieldInitializer(const FieldInfo & Field,const RealFieldInfo & Contents,const RealFieldInfo & Initializer)4125 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4126                                       const RealFieldInfo &Contents,
4127                                       const RealFieldInfo &Initializer) {
4128   for (const auto &AsInt : Initializer.AsIntValues) {
4129     getStreamer().emitIntValue(AsInt.getLimitedValue(),
4130                                AsInt.getBitWidth() / 8);
4131   }
4132   // Default-initialize all remaining values.
4133   for (auto It = Contents.AsIntValues.begin() + Initializer.AsIntValues.size();
4134        It != Contents.AsIntValues.end(); ++It) {
4135     const auto &AsInt = *It;
4136     getStreamer().emitIntValue(AsInt.getLimitedValue(),
4137                                AsInt.getBitWidth() / 8);
4138   }
4139   return false;
4140 }
4141 
emitFieldInitializer(const FieldInfo & Field,const StructFieldInfo & Contents,const StructFieldInfo & Initializer)4142 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4143                                       const StructFieldInfo &Contents,
4144                                       const StructFieldInfo &Initializer) {
4145   for (const auto &Init : Initializer.Initializers) {
4146     emitStructInitializer(Contents.Structure, Init);
4147   }
4148   // Default-initialize all remaining values.
4149   for (auto It =
4150            Contents.Initializers.begin() + Initializer.Initializers.size();
4151        It != Contents.Initializers.end(); ++It) {
4152     const auto &Init = *It;
4153     emitStructInitializer(Contents.Structure, Init);
4154   }
4155   return false;
4156 }
4157 
emitFieldInitializer(const FieldInfo & Field,const FieldInitializer & Initializer)4158 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4159                                       const FieldInitializer &Initializer) {
4160   switch (Field.Contents.FT) {
4161   case FT_INTEGRAL:
4162     return emitFieldInitializer(Field, Field.Contents.IntInfo,
4163                                 Initializer.IntInfo);
4164   case FT_REAL:
4165     return emitFieldInitializer(Field, Field.Contents.RealInfo,
4166                                 Initializer.RealInfo);
4167   case FT_STRUCT:
4168     return emitFieldInitializer(Field, Field.Contents.StructInfo,
4169                                 Initializer.StructInfo);
4170   }
4171   llvm_unreachable("Unhandled FieldType enum");
4172 }
4173 
emitStructInitializer(const StructInfo & Structure,const StructInitializer & Initializer)4174 bool MasmParser::emitStructInitializer(const StructInfo &Structure,
4175                                        const StructInitializer &Initializer) {
4176   size_t Index = 0, Offset = 0;
4177   for (const auto &Init : Initializer.FieldInitializers) {
4178     const auto &Field = Structure.Fields[Index++];
4179     getStreamer().emitZeros(Field.Offset - Offset);
4180     Offset = Field.Offset + Field.SizeOf;
4181     if (emitFieldInitializer(Field, Init))
4182       return true;
4183   }
4184   // Default-initialize all remaining fields.
4185   for (auto It =
4186            Structure.Fields.begin() + Initializer.FieldInitializers.size();
4187        It != Structure.Fields.end(); ++It) {
4188     const auto &Field = *It;
4189     getStreamer().emitZeros(Field.Offset - Offset);
4190     Offset = Field.Offset + Field.SizeOf;
4191     if (emitFieldValue(Field))
4192       return true;
4193   }
4194   // Add final padding.
4195   if (Offset != Structure.Size)
4196     getStreamer().emitZeros(Structure.Size - Offset);
4197   return false;
4198 }
4199 
4200 // Set data values from initializers.
emitStructValues(const StructInfo & Structure,unsigned * Count)4201 bool MasmParser::emitStructValues(const StructInfo &Structure,
4202                                   unsigned *Count) {
4203   std::vector<StructInitializer> Initializers;
4204   if (parseStructInstList(Structure, Initializers))
4205     return true;
4206 
4207   for (const auto &Initializer : Initializers) {
4208     if (emitStructInitializer(Structure, Initializer))
4209       return true;
4210   }
4211 
4212   if (Count)
4213     *Count = Initializers.size();
4214   return false;
4215 }
4216 
4217 // Declare a field in the current struct.
addStructField(StringRef Name,const StructInfo & Structure)4218 bool MasmParser::addStructField(StringRef Name, const StructInfo &Structure) {
4219   StructInfo &OwningStruct = StructInProgress.back();
4220   FieldInfo &Field =
4221       OwningStruct.addField(Name, FT_STRUCT, Structure.AlignmentSize);
4222   StructFieldInfo &StructInfo = Field.Contents.StructInfo;
4223 
4224   StructInfo.Structure = Structure;
4225   Field.Type = Structure.Size;
4226 
4227   if (parseStructInstList(Structure, StructInfo.Initializers))
4228     return true;
4229 
4230   Field.LengthOf = StructInfo.Initializers.size();
4231   Field.SizeOf = Field.Type * Field.LengthOf;
4232   if (OwningStruct.IsUnion)
4233     OwningStruct.Size = std::max(OwningStruct.Size, Field.SizeOf);
4234   else
4235     OwningStruct.Size += Field.SizeOf;
4236 
4237   return false;
4238 }
4239 
4240 /// parseDirectiveStructValue
4241 ///  ::= struct-id (<struct-initializer> | {struct-initializer})
4242 ///                [, (<struct-initializer> | {struct-initializer})]*
parseDirectiveStructValue(const StructInfo & Structure,StringRef Directive,SMLoc DirLoc)4243 bool MasmParser::parseDirectiveStructValue(const StructInfo &Structure,
4244                                            StringRef Directive, SMLoc DirLoc) {
4245   if (StructInProgress.empty()) {
4246     if (emitStructValues(Structure))
4247       return true;
4248   } else if (addStructField("", Structure)) {
4249     return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4250   }
4251 
4252   return false;
4253 }
4254 
4255 /// parseDirectiveNamedValue
4256 ///  ::= name (byte | word | ... ) [ expression (, expression)* ]
parseDirectiveNamedStructValue(const StructInfo & Structure,StringRef Directive,SMLoc DirLoc,StringRef Name)4257 bool MasmParser::parseDirectiveNamedStructValue(const StructInfo &Structure,
4258                                                 StringRef Directive,
4259                                                 SMLoc DirLoc, StringRef Name) {
4260   if (StructInProgress.empty()) {
4261     // Initialize named data value.
4262     MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
4263     getStreamer().emitLabel(Sym);
4264     unsigned Count;
4265     if (emitStructValues(Structure, &Count))
4266       return true;
4267     AsmTypeInfo Type;
4268     Type.Name = Structure.Name;
4269     Type.Size = Structure.Size * Count;
4270     Type.ElementSize = Structure.Size;
4271     Type.Length = Count;
4272     KnownType[Name.lower()] = Type;
4273   } else if (addStructField(Name, Structure)) {
4274     return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4275   }
4276 
4277   return false;
4278 }
4279 
4280 /// parseDirectiveStruct
4281 ///  ::= <name> (STRUC | STRUCT | UNION) [fieldAlign] [, NONUNIQUE]
4282 ///      (dataDir | generalDir | offsetDir | nestedStruct)+
4283 ///      <name> ENDS
4284 ////// dataDir = data declaration
4285 ////// offsetDir = EVEN, ORG, ALIGN
parseDirectiveStruct(StringRef Directive,DirectiveKind DirKind,StringRef Name,SMLoc NameLoc)4286 bool MasmParser::parseDirectiveStruct(StringRef Directive,
4287                                       DirectiveKind DirKind, StringRef Name,
4288                                       SMLoc NameLoc) {
4289   // We ignore NONUNIQUE; we do not support OPTION M510 or OPTION OLDSTRUCTS
4290   // anyway, so all field accesses must be qualified.
4291   AsmToken NextTok = getTok();
4292   int64_t AlignmentValue = 1;
4293   if (NextTok.isNot(AsmToken::Comma) &&
4294       NextTok.isNot(AsmToken::EndOfStatement) &&
4295       parseAbsoluteExpression(AlignmentValue)) {
4296     return addErrorSuffix(" in alignment value for '" + Twine(Directive) +
4297                           "' directive");
4298   }
4299   if (!isPowerOf2_64(AlignmentValue)) {
4300     return Error(NextTok.getLoc(), "alignment must be a power of two; was " +
4301                                        std::to_string(AlignmentValue));
4302   }
4303 
4304   StringRef Qualifier;
4305   SMLoc QualifierLoc;
4306   if (parseOptionalToken(AsmToken::Comma)) {
4307     QualifierLoc = getTok().getLoc();
4308     if (parseIdentifier(Qualifier))
4309       return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4310     if (!Qualifier.equals_lower("nonunique"))
4311       return Error(QualifierLoc, "Unrecognized qualifier for '" +
4312                                      Twine(Directive) +
4313                                      "' directive; expected none or NONUNIQUE");
4314   }
4315 
4316   if (parseToken(AsmToken::EndOfStatement))
4317     return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4318 
4319   StructInProgress.emplace_back(Name, DirKind == DK_UNION, AlignmentValue);
4320   return false;
4321 }
4322 
4323 /// parseDirectiveNestedStruct
4324 ///  ::= (STRUC | STRUCT | UNION) [name]
4325 ///      (dataDir | generalDir | offsetDir | nestedStruct)+
4326 ///      ENDS
parseDirectiveNestedStruct(StringRef Directive,DirectiveKind DirKind)4327 bool MasmParser::parseDirectiveNestedStruct(StringRef Directive,
4328                                             DirectiveKind DirKind) {
4329   if (StructInProgress.empty())
4330     return TokError("missing name in top-level '" + Twine(Directive) +
4331                     "' directive");
4332 
4333   StringRef Name;
4334   if (getTok().is(AsmToken::Identifier)) {
4335     Name = getTok().getIdentifier();
4336     parseToken(AsmToken::Identifier);
4337   }
4338   if (parseToken(AsmToken::EndOfStatement))
4339     return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4340 
4341   // Reserve space to ensure Alignment doesn't get invalidated when
4342   // StructInProgress grows.
4343   StructInProgress.reserve(StructInProgress.size() + 1);
4344   StructInProgress.emplace_back(Name, DirKind == DK_UNION,
4345                                 StructInProgress.back().Alignment);
4346   return false;
4347 }
4348 
parseDirectiveEnds(StringRef Name,SMLoc NameLoc)4349 bool MasmParser::parseDirectiveEnds(StringRef Name, SMLoc NameLoc) {
4350   if (StructInProgress.empty())
4351     return Error(NameLoc, "ENDS directive without matching STRUC/STRUCT/UNION");
4352   if (StructInProgress.size() > 1)
4353     return Error(NameLoc, "unexpected name in nested ENDS directive");
4354   if (StructInProgress.back().Name.compare_lower(Name))
4355     return Error(NameLoc, "mismatched name in ENDS directive; expected '" +
4356                               StructInProgress.back().Name + "'");
4357   StructInfo Structure = StructInProgress.pop_back_val();
4358   // Pad to make the structure's size divisible by the smaller of its alignment
4359   // and the size of its largest field.
4360   Structure.Size = llvm::alignTo(
4361       Structure.Size, std::min(Structure.Alignment, Structure.AlignmentSize));
4362   Structs[Name.lower()] = Structure;
4363 
4364   if (parseToken(AsmToken::EndOfStatement))
4365     return addErrorSuffix(" in ENDS directive");
4366 
4367   return false;
4368 }
4369 
parseDirectiveNestedEnds()4370 bool MasmParser::parseDirectiveNestedEnds() {
4371   if (StructInProgress.empty())
4372     return TokError("ENDS directive without matching STRUC/STRUCT/UNION");
4373   if (StructInProgress.size() == 1)
4374     return TokError("missing name in top-level ENDS directive");
4375 
4376   if (parseToken(AsmToken::EndOfStatement))
4377     return addErrorSuffix(" in nested ENDS directive");
4378 
4379   StructInfo Structure = StructInProgress.pop_back_val();
4380   // Pad to make the structure's size divisible by its alignment.
4381   Structure.Size = llvm::alignTo(Structure.Size, Structure.Alignment);
4382 
4383   StructInfo &ParentStruct = StructInProgress.back();
4384   if (Structure.Name.empty()) {
4385     const size_t OldFields = ParentStruct.Fields.size();
4386     ParentStruct.Fields.insert(
4387         ParentStruct.Fields.end(),
4388         std::make_move_iterator(Structure.Fields.begin()),
4389         std::make_move_iterator(Structure.Fields.end()));
4390     for (const auto &FieldByName : Structure.FieldsByName) {
4391       ParentStruct.FieldsByName[FieldByName.getKey()] =
4392           FieldByName.getValue() + OldFields;
4393     }
4394     if (!ParentStruct.IsUnion) {
4395       for (auto FieldIter = ParentStruct.Fields.begin() + OldFields;
4396            FieldIter != ParentStruct.Fields.end(); ++FieldIter) {
4397         FieldIter->Offset += ParentStruct.Size;
4398       }
4399     }
4400 
4401     if (ParentStruct.IsUnion)
4402       ParentStruct.Size = std::max(ParentStruct.Size, Structure.Size);
4403     else
4404       ParentStruct.Size += Structure.Size;
4405   } else {
4406     FieldInfo &Field = ParentStruct.addField(Structure.Name, FT_STRUCT,
4407                                              Structure.AlignmentSize);
4408     StructFieldInfo &StructInfo = Field.Contents.StructInfo;
4409     Field.Type = Structure.Size;
4410     Field.LengthOf = 1;
4411     Field.SizeOf = Structure.Size;
4412 
4413     if (ParentStruct.IsUnion)
4414       ParentStruct.Size = std::max(ParentStruct.Size, Field.SizeOf);
4415     else
4416       ParentStruct.Size += Field.SizeOf;
4417 
4418     StructInfo.Structure = Structure;
4419     StructInfo.Initializers.emplace_back();
4420     auto &FieldInitializers = StructInfo.Initializers.back().FieldInitializers;
4421     for (const auto &SubField : Structure.Fields) {
4422       FieldInitializers.push_back(SubField.Contents);
4423     }
4424   }
4425 
4426   return false;
4427 }
4428 
4429 /// parseDirectiveOrg
4430 ///  ::= .org expression [ , expression ]
parseDirectiveOrg()4431 bool MasmParser::parseDirectiveOrg() {
4432   const MCExpr *Offset;
4433   SMLoc OffsetLoc = Lexer.getLoc();
4434   if (checkForValidSection() || parseExpression(Offset))
4435     return true;
4436 
4437   // Parse optional fill expression.
4438   int64_t FillExpr = 0;
4439   if (parseOptionalToken(AsmToken::Comma))
4440     if (parseAbsoluteExpression(FillExpr))
4441       return addErrorSuffix(" in '.org' directive");
4442   if (parseToken(AsmToken::EndOfStatement))
4443     return addErrorSuffix(" in '.org' directive");
4444 
4445   getStreamer().emitValueToOffset(Offset, FillExpr, OffsetLoc);
4446   return false;
4447 }
4448 
4449 /// parseDirectiveAlign
4450 ///  ::= align expression
parseDirectiveAlign()4451 bool MasmParser::parseDirectiveAlign() {
4452   SMLoc AlignmentLoc = getLexer().getLoc();
4453   int64_t Alignment;
4454 
4455   if (checkForValidSection())
4456     return addErrorSuffix(" in align directive");
4457   // Ignore empty 'align' directives.
4458   if (getTok().is(AsmToken::EndOfStatement)) {
4459     Warning(AlignmentLoc, "align directive with no operand is ignored");
4460     return parseToken(AsmToken::EndOfStatement);
4461   }
4462   if (parseAbsoluteExpression(Alignment) ||
4463       parseToken(AsmToken::EndOfStatement))
4464     return addErrorSuffix(" in align directive");
4465 
4466   // Always emit an alignment here even if we thrown an error.
4467   bool ReturnVal = false;
4468 
4469   // Reject alignments that aren't either a power of two or zero, for gas
4470   // compatibility. Alignment of zero is silently rounded up to one.
4471   if (Alignment == 0)
4472     Alignment = 1;
4473   if (!isPowerOf2_64(Alignment))
4474     ReturnVal |= Error(AlignmentLoc, "alignment must be a power of 2");
4475 
4476   // Check whether we should use optimal code alignment for this align
4477   // directive.
4478   const MCSection *Section = getStreamer().getCurrentSectionOnly();
4479   assert(Section && "must have section to emit alignment");
4480   if (Section->UseCodeAlign()) {
4481     getStreamer().emitCodeAlignment(Alignment, /*MaxBytesToEmit=*/0);
4482   } else {
4483     // FIXME: Target specific behavior about how the "extra" bytes are filled.
4484     getStreamer().emitValueToAlignment(Alignment, /*Value=*/0, /*ValueSize=*/1,
4485                                        /*MaxBytesToEmit=*/0);
4486   }
4487 
4488   return ReturnVal;
4489 }
4490 
4491 /// parseDirectiveFile
4492 /// ::= .file filename
4493 /// ::= .file number [directory] filename [md5 checksum] [source source-text]
parseDirectiveFile(SMLoc DirectiveLoc)4494 bool MasmParser::parseDirectiveFile(SMLoc DirectiveLoc) {
4495   // FIXME: I'm not sure what this is.
4496   int64_t FileNumber = -1;
4497   if (getLexer().is(AsmToken::Integer)) {
4498     FileNumber = getTok().getIntVal();
4499     Lex();
4500 
4501     if (FileNumber < 0)
4502       return TokError("negative file number");
4503   }
4504 
4505   std::string Path;
4506 
4507   // Usually the directory and filename together, otherwise just the directory.
4508   // Allow the strings to have escaped octal character sequence.
4509   if (check(getTok().isNot(AsmToken::String),
4510             "unexpected token in '.file' directive") ||
4511       parseEscapedString(Path))
4512     return true;
4513 
4514   StringRef Directory;
4515   StringRef Filename;
4516   std::string FilenameData;
4517   if (getLexer().is(AsmToken::String)) {
4518     if (check(FileNumber == -1,
4519               "explicit path specified, but no file number") ||
4520         parseEscapedString(FilenameData))
4521       return true;
4522     Filename = FilenameData;
4523     Directory = Path;
4524   } else {
4525     Filename = Path;
4526   }
4527 
4528   uint64_t MD5Hi, MD5Lo;
4529   bool HasMD5 = false;
4530 
4531   Optional<StringRef> Source;
4532   bool HasSource = false;
4533   std::string SourceString;
4534 
4535   while (!parseOptionalToken(AsmToken::EndOfStatement)) {
4536     StringRef Keyword;
4537     if (check(getTok().isNot(AsmToken::Identifier),
4538               "unexpected token in '.file' directive") ||
4539         parseIdentifier(Keyword))
4540       return true;
4541     if (Keyword == "md5") {
4542       HasMD5 = true;
4543       if (check(FileNumber == -1,
4544                 "MD5 checksum specified, but no file number") ||
4545           parseHexOcta(*this, MD5Hi, MD5Lo))
4546         return true;
4547     } else if (Keyword == "source") {
4548       HasSource = true;
4549       if (check(FileNumber == -1,
4550                 "source specified, but no file number") ||
4551           check(getTok().isNot(AsmToken::String),
4552                 "unexpected token in '.file' directive") ||
4553           parseEscapedString(SourceString))
4554         return true;
4555     } else {
4556       return TokError("unexpected token in '.file' directive");
4557     }
4558   }
4559 
4560   if (FileNumber == -1) {
4561     // Ignore the directive if there is no number and the target doesn't support
4562     // numberless .file directives. This allows some portability of assembler
4563     // between different object file formats.
4564     if (getContext().getAsmInfo()->hasSingleParameterDotFile())
4565       getStreamer().emitFileDirective(Filename);
4566   } else {
4567     // In case there is a -g option as well as debug info from directive .file,
4568     // we turn off the -g option, directly use the existing debug info instead.
4569     // Throw away any implicit file table for the assembler source.
4570     if (Ctx.getGenDwarfForAssembly()) {
4571       Ctx.getMCDwarfLineTable(0).resetFileTable();
4572       Ctx.setGenDwarfForAssembly(false);
4573     }
4574 
4575     Optional<MD5::MD5Result> CKMem;
4576     if (HasMD5) {
4577       MD5::MD5Result Sum;
4578       for (unsigned i = 0; i != 8; ++i) {
4579         Sum.Bytes[i] = uint8_t(MD5Hi >> ((7 - i) * 8));
4580         Sum.Bytes[i + 8] = uint8_t(MD5Lo >> ((7 - i) * 8));
4581       }
4582       CKMem = Sum;
4583     }
4584     if (HasSource) {
4585       char *SourceBuf = static_cast<char *>(Ctx.allocate(SourceString.size()));
4586       memcpy(SourceBuf, SourceString.data(), SourceString.size());
4587       Source = StringRef(SourceBuf, SourceString.size());
4588     }
4589     if (FileNumber == 0) {
4590       if (Ctx.getDwarfVersion() < 5)
4591         return Warning(DirectiveLoc, "file 0 not supported prior to DWARF-5");
4592       getStreamer().emitDwarfFile0Directive(Directory, Filename, CKMem, Source);
4593     } else {
4594       Expected<unsigned> FileNumOrErr = getStreamer().tryEmitDwarfFileDirective(
4595           FileNumber, Directory, Filename, CKMem, Source);
4596       if (!FileNumOrErr)
4597         return Error(DirectiveLoc, toString(FileNumOrErr.takeError()));
4598     }
4599     // Alert the user if there are some .file directives with MD5 and some not.
4600     // But only do that once.
4601     if (!ReportedInconsistentMD5 && !Ctx.isDwarfMD5UsageConsistent(0)) {
4602       ReportedInconsistentMD5 = true;
4603       return Warning(DirectiveLoc, "inconsistent use of MD5 checksums");
4604     }
4605   }
4606 
4607   return false;
4608 }
4609 
4610 /// parseDirectiveLine
4611 /// ::= .line [number]
parseDirectiveLine()4612 bool MasmParser::parseDirectiveLine() {
4613   int64_t LineNumber;
4614   if (getLexer().is(AsmToken::Integer)) {
4615     if (parseIntToken(LineNumber, "unexpected token in '.line' directive"))
4616       return true;
4617     (void)LineNumber;
4618     // FIXME: Do something with the .line.
4619   }
4620   if (parseToken(AsmToken::EndOfStatement,
4621                  "unexpected token in '.line' directive"))
4622     return true;
4623 
4624   return false;
4625 }
4626 
4627 /// parseDirectiveLoc
4628 /// ::= .loc FileNumber [LineNumber] [ColumnPos] [basic_block] [prologue_end]
4629 ///                                [epilogue_begin] [is_stmt VALUE] [isa VALUE]
4630 /// The first number is a file number, must have been previously assigned with
4631 /// a .file directive, the second number is the line number and optionally the
4632 /// third number is a column position (zero if not specified).  The remaining
4633 /// optional items are .loc sub-directives.
parseDirectiveLoc()4634 bool MasmParser::parseDirectiveLoc() {
4635   int64_t FileNumber = 0, LineNumber = 0;
4636   SMLoc Loc = getTok().getLoc();
4637   if (parseIntToken(FileNumber, "unexpected token in '.loc' directive") ||
4638       check(FileNumber < 1 && Ctx.getDwarfVersion() < 5, Loc,
4639             "file number less than one in '.loc' directive") ||
4640       check(!getContext().isValidDwarfFileNumber(FileNumber), Loc,
4641             "unassigned file number in '.loc' directive"))
4642     return true;
4643 
4644   // optional
4645   if (getLexer().is(AsmToken::Integer)) {
4646     LineNumber = getTok().getIntVal();
4647     if (LineNumber < 0)
4648       return TokError("line number less than zero in '.loc' directive");
4649     Lex();
4650   }
4651 
4652   int64_t ColumnPos = 0;
4653   if (getLexer().is(AsmToken::Integer)) {
4654     ColumnPos = getTok().getIntVal();
4655     if (ColumnPos < 0)
4656       return TokError("column position less than zero in '.loc' directive");
4657     Lex();
4658   }
4659 
4660   auto PrevFlags = getContext().getCurrentDwarfLoc().getFlags();
4661   unsigned Flags = PrevFlags & DWARF2_FLAG_IS_STMT;
4662   unsigned Isa = 0;
4663   int64_t Discriminator = 0;
4664 
4665   auto parseLocOp = [&]() -> bool {
4666     StringRef Name;
4667     SMLoc Loc = getTok().getLoc();
4668     if (parseIdentifier(Name))
4669       return TokError("unexpected token in '.loc' directive");
4670 
4671     if (Name == "basic_block")
4672       Flags |= DWARF2_FLAG_BASIC_BLOCK;
4673     else if (Name == "prologue_end")
4674       Flags |= DWARF2_FLAG_PROLOGUE_END;
4675     else if (Name == "epilogue_begin")
4676       Flags |= DWARF2_FLAG_EPILOGUE_BEGIN;
4677     else if (Name == "is_stmt") {
4678       Loc = getTok().getLoc();
4679       const MCExpr *Value;
4680       if (parseExpression(Value))
4681         return true;
4682       // The expression must be the constant 0 or 1.
4683       if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
4684         int Value = MCE->getValue();
4685         if (Value == 0)
4686           Flags &= ~DWARF2_FLAG_IS_STMT;
4687         else if (Value == 1)
4688           Flags |= DWARF2_FLAG_IS_STMT;
4689         else
4690           return Error(Loc, "is_stmt value not 0 or 1");
4691       } else {
4692         return Error(Loc, "is_stmt value not the constant value of 0 or 1");
4693       }
4694     } else if (Name == "isa") {
4695       Loc = getTok().getLoc();
4696       const MCExpr *Value;
4697       if (parseExpression(Value))
4698         return true;
4699       // The expression must be a constant greater or equal to 0.
4700       if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
4701         int Value = MCE->getValue();
4702         if (Value < 0)
4703           return Error(Loc, "isa number less than zero");
4704         Isa = Value;
4705       } else {
4706         return Error(Loc, "isa number not a constant value");
4707       }
4708     } else if (Name == "discriminator") {
4709       if (parseAbsoluteExpression(Discriminator))
4710         return true;
4711     } else {
4712       return Error(Loc, "unknown sub-directive in '.loc' directive");
4713     }
4714     return false;
4715   };
4716 
4717   if (parseMany(parseLocOp, false /*hasComma*/))
4718     return true;
4719 
4720   getStreamer().emitDwarfLocDirective(FileNumber, LineNumber, ColumnPos, Flags,
4721                                       Isa, Discriminator, StringRef());
4722 
4723   return false;
4724 }
4725 
4726 /// parseDirectiveStabs
4727 /// ::= .stabs string, number, number, number
parseDirectiveStabs()4728 bool MasmParser::parseDirectiveStabs() {
4729   return TokError("unsupported directive '.stabs'");
4730 }
4731 
4732 /// parseDirectiveCVFile
4733 /// ::= .cv_file number filename [checksum] [checksumkind]
parseDirectiveCVFile()4734 bool MasmParser::parseDirectiveCVFile() {
4735   SMLoc FileNumberLoc = getTok().getLoc();
4736   int64_t FileNumber;
4737   std::string Filename;
4738   std::string Checksum;
4739   int64_t ChecksumKind = 0;
4740 
4741   if (parseIntToken(FileNumber,
4742                     "expected file number in '.cv_file' directive") ||
4743       check(FileNumber < 1, FileNumberLoc, "file number less than one") ||
4744       check(getTok().isNot(AsmToken::String),
4745             "unexpected token in '.cv_file' directive") ||
4746       parseEscapedString(Filename))
4747     return true;
4748   if (!parseOptionalToken(AsmToken::EndOfStatement)) {
4749     if (check(getTok().isNot(AsmToken::String),
4750               "unexpected token in '.cv_file' directive") ||
4751         parseEscapedString(Checksum) ||
4752         parseIntToken(ChecksumKind,
4753                       "expected checksum kind in '.cv_file' directive") ||
4754         parseToken(AsmToken::EndOfStatement,
4755                    "unexpected token in '.cv_file' directive"))
4756       return true;
4757   }
4758 
4759   Checksum = fromHex(Checksum);
4760   void *CKMem = Ctx.allocate(Checksum.size(), 1);
4761   memcpy(CKMem, Checksum.data(), Checksum.size());
4762   ArrayRef<uint8_t> ChecksumAsBytes(reinterpret_cast<const uint8_t *>(CKMem),
4763                                     Checksum.size());
4764 
4765   if (!getStreamer().EmitCVFileDirective(FileNumber, Filename, ChecksumAsBytes,
4766                                          static_cast<uint8_t>(ChecksumKind)))
4767     return Error(FileNumberLoc, "file number already allocated");
4768 
4769   return false;
4770 }
4771 
parseCVFunctionId(int64_t & FunctionId,StringRef DirectiveName)4772 bool MasmParser::parseCVFunctionId(int64_t &FunctionId,
4773                                    StringRef DirectiveName) {
4774   SMLoc Loc;
4775   return parseTokenLoc(Loc) ||
4776          parseIntToken(FunctionId, "expected function id in '" + DirectiveName +
4777                                        "' directive") ||
4778          check(FunctionId < 0 || FunctionId >= UINT_MAX, Loc,
4779                "expected function id within range [0, UINT_MAX)");
4780 }
4781 
parseCVFileId(int64_t & FileNumber,StringRef DirectiveName)4782 bool MasmParser::parseCVFileId(int64_t &FileNumber, StringRef DirectiveName) {
4783   SMLoc Loc;
4784   return parseTokenLoc(Loc) ||
4785          parseIntToken(FileNumber, "expected integer in '" + DirectiveName +
4786                                        "' directive") ||
4787          check(FileNumber < 1, Loc, "file number less than one in '" +
4788                                         DirectiveName + "' directive") ||
4789          check(!getCVContext().isValidFileNumber(FileNumber), Loc,
4790                "unassigned file number in '" + DirectiveName + "' directive");
4791 }
4792 
4793 /// parseDirectiveCVFuncId
4794 /// ::= .cv_func_id FunctionId
4795 ///
4796 /// Introduces a function ID that can be used with .cv_loc.
parseDirectiveCVFuncId()4797 bool MasmParser::parseDirectiveCVFuncId() {
4798   SMLoc FunctionIdLoc = getTok().getLoc();
4799   int64_t FunctionId;
4800 
4801   if (parseCVFunctionId(FunctionId, ".cv_func_id") ||
4802       parseToken(AsmToken::EndOfStatement,
4803                  "unexpected token in '.cv_func_id' directive"))
4804     return true;
4805 
4806   if (!getStreamer().EmitCVFuncIdDirective(FunctionId))
4807     return Error(FunctionIdLoc, "function id already allocated");
4808 
4809   return false;
4810 }
4811 
4812 /// parseDirectiveCVInlineSiteId
4813 /// ::= .cv_inline_site_id FunctionId
4814 ///         "within" IAFunc
4815 ///         "inlined_at" IAFile IALine [IACol]
4816 ///
4817 /// Introduces a function ID that can be used with .cv_loc. Includes "inlined
4818 /// at" source location information for use in the line table of the caller,
4819 /// whether the caller is a real function or another inlined call site.
parseDirectiveCVInlineSiteId()4820 bool MasmParser::parseDirectiveCVInlineSiteId() {
4821   SMLoc FunctionIdLoc = getTok().getLoc();
4822   int64_t FunctionId;
4823   int64_t IAFunc;
4824   int64_t IAFile;
4825   int64_t IALine;
4826   int64_t IACol = 0;
4827 
4828   // FunctionId
4829   if (parseCVFunctionId(FunctionId, ".cv_inline_site_id"))
4830     return true;
4831 
4832   // "within"
4833   if (check((getLexer().isNot(AsmToken::Identifier) ||
4834              getTok().getIdentifier() != "within"),
4835             "expected 'within' identifier in '.cv_inline_site_id' directive"))
4836     return true;
4837   Lex();
4838 
4839   // IAFunc
4840   if (parseCVFunctionId(IAFunc, ".cv_inline_site_id"))
4841     return true;
4842 
4843   // "inlined_at"
4844   if (check((getLexer().isNot(AsmToken::Identifier) ||
4845              getTok().getIdentifier() != "inlined_at"),
4846             "expected 'inlined_at' identifier in '.cv_inline_site_id' "
4847             "directive") )
4848     return true;
4849   Lex();
4850 
4851   // IAFile IALine
4852   if (parseCVFileId(IAFile, ".cv_inline_site_id") ||
4853       parseIntToken(IALine, "expected line number after 'inlined_at'"))
4854     return true;
4855 
4856   // [IACol]
4857   if (getLexer().is(AsmToken::Integer)) {
4858     IACol = getTok().getIntVal();
4859     Lex();
4860   }
4861 
4862   if (parseToken(AsmToken::EndOfStatement,
4863                  "unexpected token in '.cv_inline_site_id' directive"))
4864     return true;
4865 
4866   if (!getStreamer().EmitCVInlineSiteIdDirective(FunctionId, IAFunc, IAFile,
4867                                                  IALine, IACol, FunctionIdLoc))
4868     return Error(FunctionIdLoc, "function id already allocated");
4869 
4870   return false;
4871 }
4872 
4873 /// parseDirectiveCVLoc
4874 /// ::= .cv_loc FunctionId FileNumber [LineNumber] [ColumnPos] [prologue_end]
4875 ///                                [is_stmt VALUE]
4876 /// The first number is a file number, must have been previously assigned with
4877 /// a .file directive, the second number is the line number and optionally the
4878 /// third number is a column position (zero if not specified).  The remaining
4879 /// optional items are .loc sub-directives.
parseDirectiveCVLoc()4880 bool MasmParser::parseDirectiveCVLoc() {
4881   SMLoc DirectiveLoc = getTok().getLoc();
4882   int64_t FunctionId, FileNumber;
4883   if (parseCVFunctionId(FunctionId, ".cv_loc") ||
4884       parseCVFileId(FileNumber, ".cv_loc"))
4885     return true;
4886 
4887   int64_t LineNumber = 0;
4888   if (getLexer().is(AsmToken::Integer)) {
4889     LineNumber = getTok().getIntVal();
4890     if (LineNumber < 0)
4891       return TokError("line number less than zero in '.cv_loc' directive");
4892     Lex();
4893   }
4894 
4895   int64_t ColumnPos = 0;
4896   if (getLexer().is(AsmToken::Integer)) {
4897     ColumnPos = getTok().getIntVal();
4898     if (ColumnPos < 0)
4899       return TokError("column position less than zero in '.cv_loc' directive");
4900     Lex();
4901   }
4902 
4903   bool PrologueEnd = false;
4904   uint64_t IsStmt = 0;
4905 
4906   auto parseOp = [&]() -> bool {
4907     StringRef Name;
4908     SMLoc Loc = getTok().getLoc();
4909     if (parseIdentifier(Name))
4910       return TokError("unexpected token in '.cv_loc' directive");
4911     if (Name == "prologue_end")
4912       PrologueEnd = true;
4913     else if (Name == "is_stmt") {
4914       Loc = getTok().getLoc();
4915       const MCExpr *Value;
4916       if (parseExpression(Value))
4917         return true;
4918       // The expression must be the constant 0 or 1.
4919       IsStmt = ~0ULL;
4920       if (const auto *MCE = dyn_cast<MCConstantExpr>(Value))
4921         IsStmt = MCE->getValue();
4922 
4923       if (IsStmt > 1)
4924         return Error(Loc, "is_stmt value not 0 or 1");
4925     } else {
4926       return Error(Loc, "unknown sub-directive in '.cv_loc' directive");
4927     }
4928     return false;
4929   };
4930 
4931   if (parseMany(parseOp, false /*hasComma*/))
4932     return true;
4933 
4934   getStreamer().emitCVLocDirective(FunctionId, FileNumber, LineNumber,
4935                                    ColumnPos, PrologueEnd, IsStmt, StringRef(),
4936                                    DirectiveLoc);
4937   return false;
4938 }
4939 
4940 /// parseDirectiveCVLinetable
4941 /// ::= .cv_linetable FunctionId, FnStart, FnEnd
parseDirectiveCVLinetable()4942 bool MasmParser::parseDirectiveCVLinetable() {
4943   int64_t FunctionId;
4944   StringRef FnStartName, FnEndName;
4945   SMLoc Loc = getTok().getLoc();
4946   if (parseCVFunctionId(FunctionId, ".cv_linetable") ||
4947       parseToken(AsmToken::Comma,
4948                  "unexpected token in '.cv_linetable' directive") ||
4949       parseTokenLoc(Loc) || check(parseIdentifier(FnStartName), Loc,
4950                                   "expected identifier in directive") ||
4951       parseToken(AsmToken::Comma,
4952                  "unexpected token in '.cv_linetable' directive") ||
4953       parseTokenLoc(Loc) || check(parseIdentifier(FnEndName), Loc,
4954                                   "expected identifier in directive"))
4955     return true;
4956 
4957   MCSymbol *FnStartSym = getContext().getOrCreateSymbol(FnStartName);
4958   MCSymbol *FnEndSym = getContext().getOrCreateSymbol(FnEndName);
4959 
4960   getStreamer().emitCVLinetableDirective(FunctionId, FnStartSym, FnEndSym);
4961   return false;
4962 }
4963 
4964 /// parseDirectiveCVInlineLinetable
4965 /// ::= .cv_inline_linetable PrimaryFunctionId FileId LineNum FnStart FnEnd
parseDirectiveCVInlineLinetable()4966 bool MasmParser::parseDirectiveCVInlineLinetable() {
4967   int64_t PrimaryFunctionId, SourceFileId, SourceLineNum;
4968   StringRef FnStartName, FnEndName;
4969   SMLoc Loc = getTok().getLoc();
4970   if (parseCVFunctionId(PrimaryFunctionId, ".cv_inline_linetable") ||
4971       parseTokenLoc(Loc) ||
4972       parseIntToken(
4973           SourceFileId,
4974           "expected SourceField in '.cv_inline_linetable' directive") ||
4975       check(SourceFileId <= 0, Loc,
4976             "File id less than zero in '.cv_inline_linetable' directive") ||
4977       parseTokenLoc(Loc) ||
4978       parseIntToken(
4979           SourceLineNum,
4980           "expected SourceLineNum in '.cv_inline_linetable' directive") ||
4981       check(SourceLineNum < 0, Loc,
4982             "Line number less than zero in '.cv_inline_linetable' directive") ||
4983       parseTokenLoc(Loc) || check(parseIdentifier(FnStartName), Loc,
4984                                   "expected identifier in directive") ||
4985       parseTokenLoc(Loc) || check(parseIdentifier(FnEndName), Loc,
4986                                   "expected identifier in directive"))
4987     return true;
4988 
4989   if (parseToken(AsmToken::EndOfStatement, "Expected End of Statement"))
4990     return true;
4991 
4992   MCSymbol *FnStartSym = getContext().getOrCreateSymbol(FnStartName);
4993   MCSymbol *FnEndSym = getContext().getOrCreateSymbol(FnEndName);
4994   getStreamer().emitCVInlineLinetableDirective(PrimaryFunctionId, SourceFileId,
4995                                                SourceLineNum, FnStartSym,
4996                                                FnEndSym);
4997   return false;
4998 }
4999 
initializeCVDefRangeTypeMap()5000 void MasmParser::initializeCVDefRangeTypeMap() {
5001   CVDefRangeTypeMap["reg"] = CVDR_DEFRANGE_REGISTER;
5002   CVDefRangeTypeMap["frame_ptr_rel"] = CVDR_DEFRANGE_FRAMEPOINTER_REL;
5003   CVDefRangeTypeMap["subfield_reg"] = CVDR_DEFRANGE_SUBFIELD_REGISTER;
5004   CVDefRangeTypeMap["reg_rel"] = CVDR_DEFRANGE_REGISTER_REL;
5005 }
5006 
5007 /// parseDirectiveCVDefRange
5008 /// ::= .cv_def_range RangeStart RangeEnd (GapStart GapEnd)*, bytes*
parseDirectiveCVDefRange()5009 bool MasmParser::parseDirectiveCVDefRange() {
5010   SMLoc Loc;
5011   std::vector<std::pair<const MCSymbol *, const MCSymbol *>> Ranges;
5012   while (getLexer().is(AsmToken::Identifier)) {
5013     Loc = getLexer().getLoc();
5014     StringRef GapStartName;
5015     if (parseIdentifier(GapStartName))
5016       return Error(Loc, "expected identifier in directive");
5017     MCSymbol *GapStartSym = getContext().getOrCreateSymbol(GapStartName);
5018 
5019     Loc = getLexer().getLoc();
5020     StringRef GapEndName;
5021     if (parseIdentifier(GapEndName))
5022       return Error(Loc, "expected identifier in directive");
5023     MCSymbol *GapEndSym = getContext().getOrCreateSymbol(GapEndName);
5024 
5025     Ranges.push_back({GapStartSym, GapEndSym});
5026   }
5027 
5028   StringRef CVDefRangeTypeStr;
5029   if (parseToken(
5030           AsmToken::Comma,
5031           "expected comma before def_range type in .cv_def_range directive") ||
5032       parseIdentifier(CVDefRangeTypeStr))
5033     return Error(Loc, "expected def_range type in directive");
5034 
5035   StringMap<CVDefRangeType>::const_iterator CVTypeIt =
5036       CVDefRangeTypeMap.find(CVDefRangeTypeStr);
5037   CVDefRangeType CVDRType = (CVTypeIt == CVDefRangeTypeMap.end())
5038                                 ? CVDR_DEFRANGE
5039                                 : CVTypeIt->getValue();
5040   switch (CVDRType) {
5041   case CVDR_DEFRANGE_REGISTER: {
5042     int64_t DRRegister;
5043     if (parseToken(AsmToken::Comma, "expected comma before register number in "
5044                                     ".cv_def_range directive") ||
5045         parseAbsoluteExpression(DRRegister))
5046       return Error(Loc, "expected register number");
5047 
5048     codeview::DefRangeRegisterHeader DRHdr;
5049     DRHdr.Register = DRRegister;
5050     DRHdr.MayHaveNoName = 0;
5051     getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5052     break;
5053   }
5054   case CVDR_DEFRANGE_FRAMEPOINTER_REL: {
5055     int64_t DROffset;
5056     if (parseToken(AsmToken::Comma,
5057                    "expected comma before offset in .cv_def_range directive") ||
5058         parseAbsoluteExpression(DROffset))
5059       return Error(Loc, "expected offset value");
5060 
5061     codeview::DefRangeFramePointerRelHeader DRHdr;
5062     DRHdr.Offset = DROffset;
5063     getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5064     break;
5065   }
5066   case CVDR_DEFRANGE_SUBFIELD_REGISTER: {
5067     int64_t DRRegister;
5068     int64_t DROffsetInParent;
5069     if (parseToken(AsmToken::Comma, "expected comma before register number in "
5070                                     ".cv_def_range directive") ||
5071         parseAbsoluteExpression(DRRegister))
5072       return Error(Loc, "expected register number");
5073     if (parseToken(AsmToken::Comma,
5074                    "expected comma before offset in .cv_def_range directive") ||
5075         parseAbsoluteExpression(DROffsetInParent))
5076       return Error(Loc, "expected offset value");
5077 
5078     codeview::DefRangeSubfieldRegisterHeader DRHdr;
5079     DRHdr.Register = DRRegister;
5080     DRHdr.MayHaveNoName = 0;
5081     DRHdr.OffsetInParent = DROffsetInParent;
5082     getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5083     break;
5084   }
5085   case CVDR_DEFRANGE_REGISTER_REL: {
5086     int64_t DRRegister;
5087     int64_t DRFlags;
5088     int64_t DRBasePointerOffset;
5089     if (parseToken(AsmToken::Comma, "expected comma before register number in "
5090                                     ".cv_def_range directive") ||
5091         parseAbsoluteExpression(DRRegister))
5092       return Error(Loc, "expected register value");
5093     if (parseToken(
5094             AsmToken::Comma,
5095             "expected comma before flag value in .cv_def_range directive") ||
5096         parseAbsoluteExpression(DRFlags))
5097       return Error(Loc, "expected flag value");
5098     if (parseToken(AsmToken::Comma, "expected comma before base pointer offset "
5099                                     "in .cv_def_range directive") ||
5100         parseAbsoluteExpression(DRBasePointerOffset))
5101       return Error(Loc, "expected base pointer offset value");
5102 
5103     codeview::DefRangeRegisterRelHeader DRHdr;
5104     DRHdr.Register = DRRegister;
5105     DRHdr.Flags = DRFlags;
5106     DRHdr.BasePointerOffset = DRBasePointerOffset;
5107     getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5108     break;
5109   }
5110   default:
5111     return Error(Loc, "unexpected def_range type in .cv_def_range directive");
5112   }
5113   return true;
5114 }
5115 
5116 /// parseDirectiveCVString
5117 /// ::= .cv_stringtable "string"
parseDirectiveCVString()5118 bool MasmParser::parseDirectiveCVString() {
5119   std::string Data;
5120   if (checkForValidSection() || parseEscapedString(Data))
5121     return addErrorSuffix(" in '.cv_string' directive");
5122 
5123   // Put the string in the table and emit the offset.
5124   std::pair<StringRef, unsigned> Insertion =
5125       getCVContext().addToStringTable(Data);
5126   getStreamer().emitIntValue(Insertion.second, 4);
5127   return false;
5128 }
5129 
5130 /// parseDirectiveCVStringTable
5131 /// ::= .cv_stringtable
parseDirectiveCVStringTable()5132 bool MasmParser::parseDirectiveCVStringTable() {
5133   getStreamer().emitCVStringTableDirective();
5134   return false;
5135 }
5136 
5137 /// parseDirectiveCVFileChecksums
5138 /// ::= .cv_filechecksums
parseDirectiveCVFileChecksums()5139 bool MasmParser::parseDirectiveCVFileChecksums() {
5140   getStreamer().emitCVFileChecksumsDirective();
5141   return false;
5142 }
5143 
5144 /// parseDirectiveCVFileChecksumOffset
5145 /// ::= .cv_filechecksumoffset fileno
parseDirectiveCVFileChecksumOffset()5146 bool MasmParser::parseDirectiveCVFileChecksumOffset() {
5147   int64_t FileNo;
5148   if (parseIntToken(FileNo, "expected identifier in directive"))
5149     return true;
5150   if (parseToken(AsmToken::EndOfStatement, "Expected End of Statement"))
5151     return true;
5152   getStreamer().emitCVFileChecksumOffsetDirective(FileNo);
5153   return false;
5154 }
5155 
5156 /// parseDirectiveCVFPOData
5157 /// ::= .cv_fpo_data procsym
parseDirectiveCVFPOData()5158 bool MasmParser::parseDirectiveCVFPOData() {
5159   SMLoc DirLoc = getLexer().getLoc();
5160   StringRef ProcName;
5161   if (parseIdentifier(ProcName))
5162     return TokError("expected symbol name");
5163   if (parseEOL("unexpected tokens"))
5164     return addErrorSuffix(" in '.cv_fpo_data' directive");
5165   MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName);
5166   getStreamer().EmitCVFPOData(ProcSym, DirLoc);
5167   return false;
5168 }
5169 
5170 /// parseDirectiveCFISections
5171 /// ::= .cfi_sections section [, section]
parseDirectiveCFISections()5172 bool MasmParser::parseDirectiveCFISections() {
5173   StringRef Name;
5174   bool EH = false;
5175   bool Debug = false;
5176 
5177   if (parseIdentifier(Name))
5178     return TokError("Expected an identifier");
5179 
5180   if (Name == ".eh_frame")
5181     EH = true;
5182   else if (Name == ".debug_frame")
5183     Debug = true;
5184 
5185   if (getLexer().is(AsmToken::Comma)) {
5186     Lex();
5187 
5188     if (parseIdentifier(Name))
5189       return TokError("Expected an identifier");
5190 
5191     if (Name == ".eh_frame")
5192       EH = true;
5193     else if (Name == ".debug_frame")
5194       Debug = true;
5195   }
5196 
5197   getStreamer().emitCFISections(EH, Debug);
5198   return false;
5199 }
5200 
5201 /// parseDirectiveCFIStartProc
5202 /// ::= .cfi_startproc [simple]
parseDirectiveCFIStartProc()5203 bool MasmParser::parseDirectiveCFIStartProc() {
5204   StringRef Simple;
5205   if (!parseOptionalToken(AsmToken::EndOfStatement)) {
5206     if (check(parseIdentifier(Simple) || Simple != "simple",
5207               "unexpected token") ||
5208         parseToken(AsmToken::EndOfStatement))
5209       return addErrorSuffix(" in '.cfi_startproc' directive");
5210   }
5211 
5212   // TODO(kristina): Deal with a corner case of incorrect diagnostic context
5213   // being produced if this directive is emitted as part of preprocessor macro
5214   // expansion which can *ONLY* happen if Clang's cc1as is the API consumer.
5215   // Tools like llvm-mc on the other hand are not affected by it, and report
5216   // correct context information.
5217   getStreamer().emitCFIStartProc(!Simple.empty(), Lexer.getLoc());
5218   return false;
5219 }
5220 
5221 /// parseDirectiveCFIEndProc
5222 /// ::= .cfi_endproc
parseDirectiveCFIEndProc()5223 bool MasmParser::parseDirectiveCFIEndProc() {
5224   getStreamer().emitCFIEndProc();
5225   return false;
5226 }
5227 
5228 /// parse register name or number.
parseRegisterOrRegisterNumber(int64_t & Register,SMLoc DirectiveLoc)5229 bool MasmParser::parseRegisterOrRegisterNumber(int64_t &Register,
5230                                                SMLoc DirectiveLoc) {
5231   unsigned RegNo;
5232 
5233   if (getLexer().isNot(AsmToken::Integer)) {
5234     if (getTargetParser().ParseRegister(RegNo, DirectiveLoc, DirectiveLoc))
5235       return true;
5236     Register = getContext().getRegisterInfo()->getDwarfRegNum(RegNo, true);
5237   } else
5238     return parseAbsoluteExpression(Register);
5239 
5240   return false;
5241 }
5242 
5243 /// parseDirectiveCFIDefCfa
5244 /// ::= .cfi_def_cfa register,  offset
parseDirectiveCFIDefCfa(SMLoc DirectiveLoc)5245 bool MasmParser::parseDirectiveCFIDefCfa(SMLoc DirectiveLoc) {
5246   int64_t Register = 0, Offset = 0;
5247   if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) ||
5248       parseToken(AsmToken::Comma, "unexpected token in directive") ||
5249       parseAbsoluteExpression(Offset))
5250     return true;
5251 
5252   getStreamer().emitCFIDefCfa(Register, Offset);
5253   return false;
5254 }
5255 
5256 /// parseDirectiveCFIDefCfaOffset
5257 /// ::= .cfi_def_cfa_offset offset
parseDirectiveCFIDefCfaOffset()5258 bool MasmParser::parseDirectiveCFIDefCfaOffset() {
5259   int64_t Offset = 0;
5260   if (parseAbsoluteExpression(Offset))
5261     return true;
5262 
5263   getStreamer().emitCFIDefCfaOffset(Offset);
5264   return false;
5265 }
5266 
5267 /// parseDirectiveCFIRegister
5268 /// ::= .cfi_register register, register
parseDirectiveCFIRegister(SMLoc DirectiveLoc)5269 bool MasmParser::parseDirectiveCFIRegister(SMLoc DirectiveLoc) {
5270   int64_t Register1 = 0, Register2 = 0;
5271   if (parseRegisterOrRegisterNumber(Register1, DirectiveLoc) ||
5272       parseToken(AsmToken::Comma, "unexpected token in directive") ||
5273       parseRegisterOrRegisterNumber(Register2, DirectiveLoc))
5274     return true;
5275 
5276   getStreamer().emitCFIRegister(Register1, Register2);
5277   return false;
5278 }
5279 
5280 /// parseDirectiveCFIWindowSave
5281 /// ::= .cfi_window_save
parseDirectiveCFIWindowSave()5282 bool MasmParser::parseDirectiveCFIWindowSave() {
5283   getStreamer().emitCFIWindowSave();
5284   return false;
5285 }
5286 
5287 /// parseDirectiveCFIAdjustCfaOffset
5288 /// ::= .cfi_adjust_cfa_offset adjustment
parseDirectiveCFIAdjustCfaOffset()5289 bool MasmParser::parseDirectiveCFIAdjustCfaOffset() {
5290   int64_t Adjustment = 0;
5291   if (parseAbsoluteExpression(Adjustment))
5292     return true;
5293 
5294   getStreamer().emitCFIAdjustCfaOffset(Adjustment);
5295   return false;
5296 }
5297 
5298 /// parseDirectiveCFIDefCfaRegister
5299 /// ::= .cfi_def_cfa_register register
parseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc)5300 bool MasmParser::parseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc) {
5301   int64_t Register = 0;
5302   if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5303     return true;
5304 
5305   getStreamer().emitCFIDefCfaRegister(Register);
5306   return false;
5307 }
5308 
5309 /// parseDirectiveCFIOffset
5310 /// ::= .cfi_offset register, offset
parseDirectiveCFIOffset(SMLoc DirectiveLoc)5311 bool MasmParser::parseDirectiveCFIOffset(SMLoc DirectiveLoc) {
5312   int64_t Register = 0;
5313   int64_t Offset = 0;
5314 
5315   if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) ||
5316       parseToken(AsmToken::Comma, "unexpected token in directive") ||
5317       parseAbsoluteExpression(Offset))
5318     return true;
5319 
5320   getStreamer().emitCFIOffset(Register, Offset);
5321   return false;
5322 }
5323 
5324 /// parseDirectiveCFIRelOffset
5325 /// ::= .cfi_rel_offset register, offset
parseDirectiveCFIRelOffset(SMLoc DirectiveLoc)5326 bool MasmParser::parseDirectiveCFIRelOffset(SMLoc DirectiveLoc) {
5327   int64_t Register = 0, Offset = 0;
5328 
5329   if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) ||
5330       parseToken(AsmToken::Comma, "unexpected token in directive") ||
5331       parseAbsoluteExpression(Offset))
5332     return true;
5333 
5334   getStreamer().emitCFIRelOffset(Register, Offset);
5335   return false;
5336 }
5337 
isValidEncoding(int64_t Encoding)5338 static bool isValidEncoding(int64_t Encoding) {
5339   if (Encoding & ~0xff)
5340     return false;
5341 
5342   if (Encoding == dwarf::DW_EH_PE_omit)
5343     return true;
5344 
5345   const unsigned Format = Encoding & 0xf;
5346   if (Format != dwarf::DW_EH_PE_absptr && Format != dwarf::DW_EH_PE_udata2 &&
5347       Format != dwarf::DW_EH_PE_udata4 && Format != dwarf::DW_EH_PE_udata8 &&
5348       Format != dwarf::DW_EH_PE_sdata2 && Format != dwarf::DW_EH_PE_sdata4 &&
5349       Format != dwarf::DW_EH_PE_sdata8 && Format != dwarf::DW_EH_PE_signed)
5350     return false;
5351 
5352   const unsigned Application = Encoding & 0x70;
5353   if (Application != dwarf::DW_EH_PE_absptr &&
5354       Application != dwarf::DW_EH_PE_pcrel)
5355     return false;
5356 
5357   return true;
5358 }
5359 
5360 /// parseDirectiveCFIPersonalityOrLsda
5361 /// IsPersonality true for cfi_personality, false for cfi_lsda
5362 /// ::= .cfi_personality encoding, [symbol_name]
5363 /// ::= .cfi_lsda encoding, [symbol_name]
parseDirectiveCFIPersonalityOrLsda(bool IsPersonality)5364 bool MasmParser::parseDirectiveCFIPersonalityOrLsda(bool IsPersonality) {
5365   int64_t Encoding = 0;
5366   if (parseAbsoluteExpression(Encoding))
5367     return true;
5368   if (Encoding == dwarf::DW_EH_PE_omit)
5369     return false;
5370 
5371   StringRef Name;
5372   if (check(!isValidEncoding(Encoding), "unsupported encoding.") ||
5373       parseToken(AsmToken::Comma, "unexpected token in directive") ||
5374       check(parseIdentifier(Name), "expected identifier in directive"))
5375     return true;
5376 
5377   MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
5378 
5379   if (IsPersonality)
5380     getStreamer().emitCFIPersonality(Sym, Encoding);
5381   else
5382     getStreamer().emitCFILsda(Sym, Encoding);
5383   return false;
5384 }
5385 
5386 /// parseDirectiveCFIRememberState
5387 /// ::= .cfi_remember_state
parseDirectiveCFIRememberState()5388 bool MasmParser::parseDirectiveCFIRememberState() {
5389   getStreamer().emitCFIRememberState();
5390   return false;
5391 }
5392 
5393 /// parseDirectiveCFIRestoreState
5394 /// ::= .cfi_remember_state
parseDirectiveCFIRestoreState()5395 bool MasmParser::parseDirectiveCFIRestoreState() {
5396   getStreamer().emitCFIRestoreState();
5397   return false;
5398 }
5399 
5400 /// parseDirectiveCFISameValue
5401 /// ::= .cfi_same_value register
parseDirectiveCFISameValue(SMLoc DirectiveLoc)5402 bool MasmParser::parseDirectiveCFISameValue(SMLoc DirectiveLoc) {
5403   int64_t Register = 0;
5404 
5405   if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5406     return true;
5407 
5408   getStreamer().emitCFISameValue(Register);
5409   return false;
5410 }
5411 
5412 /// parseDirectiveCFIRestore
5413 /// ::= .cfi_restore register
parseDirectiveCFIRestore(SMLoc DirectiveLoc)5414 bool MasmParser::parseDirectiveCFIRestore(SMLoc DirectiveLoc) {
5415   int64_t Register = 0;
5416   if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5417     return true;
5418 
5419   getStreamer().emitCFIRestore(Register);
5420   return false;
5421 }
5422 
5423 /// parseDirectiveCFIEscape
5424 /// ::= .cfi_escape expression[,...]
parseDirectiveCFIEscape()5425 bool MasmParser::parseDirectiveCFIEscape() {
5426   std::string Values;
5427   int64_t CurrValue;
5428   if (parseAbsoluteExpression(CurrValue))
5429     return true;
5430 
5431   Values.push_back((uint8_t)CurrValue);
5432 
5433   while (getLexer().is(AsmToken::Comma)) {
5434     Lex();
5435 
5436     if (parseAbsoluteExpression(CurrValue))
5437       return true;
5438 
5439     Values.push_back((uint8_t)CurrValue);
5440   }
5441 
5442   getStreamer().emitCFIEscape(Values);
5443   return false;
5444 }
5445 
5446 /// parseDirectiveCFIReturnColumn
5447 /// ::= .cfi_return_column register
parseDirectiveCFIReturnColumn(SMLoc DirectiveLoc)5448 bool MasmParser::parseDirectiveCFIReturnColumn(SMLoc DirectiveLoc) {
5449   int64_t Register = 0;
5450   if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5451     return true;
5452   getStreamer().emitCFIReturnColumn(Register);
5453   return false;
5454 }
5455 
5456 /// parseDirectiveCFISignalFrame
5457 /// ::= .cfi_signal_frame
parseDirectiveCFISignalFrame()5458 bool MasmParser::parseDirectiveCFISignalFrame() {
5459   if (parseToken(AsmToken::EndOfStatement,
5460                  "unexpected token in '.cfi_signal_frame'"))
5461     return true;
5462 
5463   getStreamer().emitCFISignalFrame();
5464   return false;
5465 }
5466 
5467 /// parseDirectiveCFIUndefined
5468 /// ::= .cfi_undefined register
parseDirectiveCFIUndefined(SMLoc DirectiveLoc)5469 bool MasmParser::parseDirectiveCFIUndefined(SMLoc DirectiveLoc) {
5470   int64_t Register = 0;
5471 
5472   if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5473     return true;
5474 
5475   getStreamer().emitCFIUndefined(Register);
5476   return false;
5477 }
5478 
5479 /// parseDirectiveMacro
5480 /// ::= name macro [parameters]
5481 ///     ["LOCAL" identifiers]
5482 ///   parameters ::= parameter [, parameter]*
5483 ///   parameter ::= name ":" qualifier
5484 ///   qualifier ::= "req" | "vararg" | "=" macro_argument
parseDirectiveMacro(StringRef Name,SMLoc NameLoc)5485 bool MasmParser::parseDirectiveMacro(StringRef Name, SMLoc NameLoc) {
5486   MCAsmMacroParameters Parameters;
5487   while (getLexer().isNot(AsmToken::EndOfStatement)) {
5488     if (!Parameters.empty() && Parameters.back().Vararg)
5489       return Error(Lexer.getLoc(),
5490                    "Vararg parameter '" + Parameters.back().Name +
5491                        "' should be last in the list of parameters");
5492 
5493     MCAsmMacroParameter Parameter;
5494     if (parseIdentifier(Parameter.Name))
5495       return TokError("expected identifier in 'macro' directive");
5496 
5497     // Emit an error if two (or more) named parameters share the same name.
5498     for (const MCAsmMacroParameter& CurrParam : Parameters)
5499       if (CurrParam.Name.equals_lower(Parameter.Name))
5500         return TokError("macro '" + Name + "' has multiple parameters"
5501                         " named '" + Parameter.Name + "'");
5502 
5503     if (Lexer.is(AsmToken::Colon)) {
5504       Lex();  // consume ':'
5505 
5506       if (parseOptionalToken(AsmToken::Equal)) {
5507         // Default value
5508         SMLoc ParamLoc;
5509 
5510         ParamLoc = Lexer.getLoc();
5511         if (parseMacroArgument(nullptr, Parameter.Value))
5512           return true;
5513       } else {
5514         SMLoc QualLoc;
5515         StringRef Qualifier;
5516 
5517         QualLoc = Lexer.getLoc();
5518         if (parseIdentifier(Qualifier))
5519           return Error(QualLoc, "missing parameter qualifier for "
5520                                 "'" +
5521                                     Parameter.Name + "' in macro '" + Name +
5522                                     "'");
5523 
5524         if (Qualifier.equals_lower("req"))
5525           Parameter.Required = true;
5526         else if (Qualifier.equals_lower("vararg"))
5527           Parameter.Vararg = true;
5528         else
5529           return Error(QualLoc,
5530                        Qualifier + " is not a valid parameter qualifier for '" +
5531                            Parameter.Name + "' in macro '" + Name + "'");
5532       }
5533     }
5534 
5535     Parameters.push_back(std::move(Parameter));
5536 
5537     if (getLexer().is(AsmToken::Comma))
5538       Lex();
5539   }
5540 
5541   // Eat just the end of statement.
5542   Lexer.Lex();
5543 
5544   std::vector<std::string> Locals;
5545   if (getTok().is(AsmToken::Identifier) &&
5546       getTok().getIdentifier().equals_lower("local")) {
5547     Lex(); // Eat the LOCAL directive.
5548 
5549     StringRef ID;
5550     while (true) {
5551       if (parseIdentifier(ID))
5552         return true;
5553       Locals.push_back(ID.lower());
5554 
5555       // If we see a comma, continue (and allow line continuation).
5556       if (!parseOptionalToken(AsmToken::Comma))
5557         break;
5558       parseOptionalToken(AsmToken::EndOfStatement);
5559     }
5560   }
5561 
5562   // Consuming deferred text, so use Lexer.Lex to ignore Lexing Errors.
5563   AsmToken EndToken, StartToken = getTok();
5564   unsigned MacroDepth = 0;
5565   bool IsMacroFunction = false;
5566   // Lex the macro definition.
5567   while (true) {
5568     // Ignore Lexing errors in macros.
5569     while (Lexer.is(AsmToken::Error)) {
5570       Lexer.Lex();
5571     }
5572 
5573     // Check whether we have reached the end of the file.
5574     if (getLexer().is(AsmToken::Eof))
5575       return Error(NameLoc, "no matching 'endm' in definition");
5576 
5577     // Otherwise, check whether we have reached the 'endm'... and determine if
5578     // this is a macro function.
5579     if (getLexer().is(AsmToken::Identifier)) {
5580       if (getTok().getIdentifier().equals_lower("endm")) {
5581         if (MacroDepth == 0) { // Outermost macro.
5582           EndToken = getTok();
5583           Lexer.Lex();
5584           if (getLexer().isNot(AsmToken::EndOfStatement))
5585             return TokError("unexpected token in '" + EndToken.getIdentifier() +
5586                             "' directive");
5587           break;
5588         } else {
5589           // Otherwise we just found the end of an inner macro.
5590           --MacroDepth;
5591         }
5592       } else if (getTok().getIdentifier().equals_lower("exitm")) {
5593         if (MacroDepth == 0 &&
5594             getLexer().peekTok().isNot(AsmToken::EndOfStatement)) {
5595           IsMacroFunction = true;
5596         }
5597       } else if (isMacroLikeDirective()) {
5598         // We allow nested macros. Those aren't instantiated until the
5599         // outermost macro is expanded so just ignore them for now.
5600         ++MacroDepth;
5601       }
5602     }
5603 
5604     // Otherwise, scan til the end of the statement.
5605     eatToEndOfStatement();
5606   }
5607 
5608   if (getContext().lookupMacro(Name.lower())) {
5609     return Error(NameLoc, "macro '" + Name + "' is already defined");
5610   }
5611 
5612   const char *BodyStart = StartToken.getLoc().getPointer();
5613   const char *BodyEnd = EndToken.getLoc().getPointer();
5614   StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart);
5615   MCAsmMacro Macro(Name, Body, std::move(Parameters), std::move(Locals),
5616                    IsMacroFunction);
5617   DEBUG_WITH_TYPE("asm-macros", dbgs() << "Defining new macro:\n";
5618                   Macro.dump());
5619   getContext().defineMacro(Name, std::move(Macro));
5620   return false;
5621 }
5622 
5623 /// parseDirectiveExitMacro
5624 /// ::= "exitm" [textitem]
parseDirectiveExitMacro(SMLoc DirectiveLoc,StringRef Directive,std::string & Value)5625 bool MasmParser::parseDirectiveExitMacro(SMLoc DirectiveLoc,
5626                                          StringRef Directive,
5627                                          std::string &Value) {
5628   SMLoc EndLoc = getTok().getLoc();
5629   if (getTok().isNot(AsmToken::EndOfStatement) && parseTextItem(Value))
5630     return Error(EndLoc,
5631                  "unable to parse text item in '" + Directive + "' directive");
5632   eatToEndOfStatement();
5633 
5634   if (!isInsideMacroInstantiation())
5635     return TokError("unexpected '" + Directive + "' in file, "
5636                                                  "no current macro definition");
5637 
5638   // Exit all conditionals that are active in the current macro.
5639   while (TheCondStack.size() != ActiveMacros.back()->CondStackDepth) {
5640     TheCondState = TheCondStack.back();
5641     TheCondStack.pop_back();
5642   }
5643 
5644   handleMacroExit();
5645   return false;
5646 }
5647 
5648 /// parseDirectiveEndMacro
5649 /// ::= endm
parseDirectiveEndMacro(StringRef Directive)5650 bool MasmParser::parseDirectiveEndMacro(StringRef Directive) {
5651   if (getLexer().isNot(AsmToken::EndOfStatement))
5652     return TokError("unexpected token in '" + Directive + "' directive");
5653 
5654   // If we are inside a macro instantiation, terminate the current
5655   // instantiation.
5656   if (isInsideMacroInstantiation()) {
5657     handleMacroExit();
5658     return false;
5659   }
5660 
5661   // Otherwise, this .endmacro is a stray entry in the file; well formed
5662   // .endmacro directives are handled during the macro definition parsing.
5663   return TokError("unexpected '" + Directive + "' in file, "
5664                                                "no current macro definition");
5665 }
5666 
5667 /// parseDirectivePurgeMacro
5668 /// ::= purge identifier ( , identifier )*
parseDirectivePurgeMacro(SMLoc DirectiveLoc)5669 bool MasmParser::parseDirectivePurgeMacro(SMLoc DirectiveLoc) {
5670   StringRef Name;
5671   while (true) {
5672     SMLoc NameLoc;
5673     if (parseTokenLoc(NameLoc) ||
5674         check(parseIdentifier(Name), NameLoc,
5675               "expected identifier in 'purge' directive"))
5676       return true;
5677 
5678     DEBUG_WITH_TYPE("asm-macros", dbgs()
5679                                       << "Un-defining macro: " << Name << "\n");
5680     if (!getContext().lookupMacro(Name.lower()))
5681       return Error(NameLoc, "macro '" + Name + "' is not defined");
5682     getContext().undefineMacro(Name.lower());
5683 
5684     if (!parseOptionalToken(AsmToken::Comma))
5685       break;
5686     parseOptionalToken(AsmToken::EndOfStatement);
5687   }
5688 
5689   return false;
5690 }
5691 
5692 /// parseDirectiveSymbolAttribute
5693 ///  ::= { ".globl", ".weak", ... } [ identifier ( , identifier )* ]
parseDirectiveSymbolAttribute(MCSymbolAttr Attr)5694 bool MasmParser::parseDirectiveSymbolAttribute(MCSymbolAttr Attr) {
5695   auto parseOp = [&]() -> bool {
5696     StringRef Name;
5697     SMLoc Loc = getTok().getLoc();
5698     if (parseIdentifier(Name))
5699       return Error(Loc, "expected identifier");
5700     MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
5701 
5702     // Assembler local symbols don't make any sense here. Complain loudly.
5703     if (Sym->isTemporary())
5704       return Error(Loc, "non-local symbol required");
5705 
5706     if (!getStreamer().emitSymbolAttribute(Sym, Attr))
5707       return Error(Loc, "unable to emit symbol attribute");
5708     return false;
5709   };
5710 
5711   if (parseMany(parseOp))
5712     return addErrorSuffix(" in directive");
5713   return false;
5714 }
5715 
5716 /// parseDirectiveComm
5717 ///  ::= ( .comm | .lcomm ) identifier , size_expression [ , align_expression ]
parseDirectiveComm(bool IsLocal)5718 bool MasmParser::parseDirectiveComm(bool IsLocal) {
5719   if (checkForValidSection())
5720     return true;
5721 
5722   SMLoc IDLoc = getLexer().getLoc();
5723   StringRef Name;
5724   if (parseIdentifier(Name))
5725     return TokError("expected identifier in directive");
5726 
5727   // Handle the identifier as the key symbol.
5728   MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
5729 
5730   if (getLexer().isNot(AsmToken::Comma))
5731     return TokError("unexpected token in directive");
5732   Lex();
5733 
5734   int64_t Size;
5735   SMLoc SizeLoc = getLexer().getLoc();
5736   if (parseAbsoluteExpression(Size))
5737     return true;
5738 
5739   int64_t Pow2Alignment = 0;
5740   SMLoc Pow2AlignmentLoc;
5741   if (getLexer().is(AsmToken::Comma)) {
5742     Lex();
5743     Pow2AlignmentLoc = getLexer().getLoc();
5744     if (parseAbsoluteExpression(Pow2Alignment))
5745       return true;
5746 
5747     LCOMM::LCOMMType LCOMM = Lexer.getMAI().getLCOMMDirectiveAlignmentType();
5748     if (IsLocal && LCOMM == LCOMM::NoAlignment)
5749       return Error(Pow2AlignmentLoc, "alignment not supported on this target");
5750 
5751     // If this target takes alignments in bytes (not log) validate and convert.
5752     if ((!IsLocal && Lexer.getMAI().getCOMMDirectiveAlignmentIsInBytes()) ||
5753         (IsLocal && LCOMM == LCOMM::ByteAlignment)) {
5754       if (!isPowerOf2_64(Pow2Alignment))
5755         return Error(Pow2AlignmentLoc, "alignment must be a power of 2");
5756       Pow2Alignment = Log2_64(Pow2Alignment);
5757     }
5758   }
5759 
5760   if (parseToken(AsmToken::EndOfStatement,
5761                  "unexpected token in '.comm' or '.lcomm' directive"))
5762     return true;
5763 
5764   // NOTE: a size of zero for a .comm should create a undefined symbol
5765   // but a size of .lcomm creates a bss symbol of size zero.
5766   if (Size < 0)
5767     return Error(SizeLoc, "invalid '.comm' or '.lcomm' directive size, can't "
5768                           "be less than zero");
5769 
5770   // NOTE: The alignment in the directive is a power of 2 value, the assembler
5771   // may internally end up wanting an alignment in bytes.
5772   // FIXME: Diagnose overflow.
5773   if (Pow2Alignment < 0)
5774     return Error(Pow2AlignmentLoc, "invalid '.comm' or '.lcomm' directive "
5775                                    "alignment, can't be less than zero");
5776 
5777   Sym->redefineIfPossible();
5778   if (!Sym->isUndefined())
5779     return Error(IDLoc, "invalid symbol redefinition");
5780 
5781   // Create the Symbol as a common or local common with Size and Pow2Alignment.
5782   if (IsLocal) {
5783     getStreamer().emitLocalCommonSymbol(Sym, Size, 1 << Pow2Alignment);
5784     return false;
5785   }
5786 
5787   getStreamer().emitCommonSymbol(Sym, Size, 1 << Pow2Alignment);
5788   return false;
5789 }
5790 
5791 /// parseDirectiveComment
5792 ///  ::= comment delimiter [[text]]
5793 ///              [[text]]
5794 ///              [[text]] delimiter [[text]]
parseDirectiveComment(SMLoc DirectiveLoc)5795 bool MasmParser::parseDirectiveComment(SMLoc DirectiveLoc) {
5796   std::string FirstLine = parseStringTo(AsmToken::EndOfStatement);
5797   size_t DelimiterEnd = FirstLine.find_first_of("\b\t\v\f\r\x1A ");
5798   StringRef Delimiter = StringRef(FirstLine).take_front(DelimiterEnd);
5799   if (Delimiter.empty())
5800     return Error(DirectiveLoc, "no delimiter in 'comment' directive");
5801   do {
5802     if (getTok().is(AsmToken::Eof))
5803       return Error(DirectiveLoc, "unmatched delimiter in 'comment' directive");
5804     Lex();  // eat end of statement
5805   } while (
5806       !StringRef(parseStringTo(AsmToken::EndOfStatement)).contains(Delimiter));
5807   return parseToken(AsmToken::EndOfStatement,
5808                     "unexpected token in 'comment' directive");
5809 }
5810 
5811 /// parseDirectiveInclude
5812 ///  ::= include <filename>
5813 ///    | include filename
parseDirectiveInclude()5814 bool MasmParser::parseDirectiveInclude() {
5815   // Allow the strings to have escaped octal character sequence.
5816   std::string Filename;
5817   SMLoc IncludeLoc = getTok().getLoc();
5818 
5819   if (!parseAngleBracketString(Filename))
5820     Filename = parseStringTo(AsmToken::EndOfStatement);
5821   if (check(!Filename.empty(), "missing filename in 'include' directive") ||
5822       check(getTok().isNot(AsmToken::EndOfStatement),
5823             "unexpected token in 'include' directive") ||
5824       // Attempt to switch the lexer to the included file before consuming the
5825       // end of statement to avoid losing it when we switch.
5826       check(enterIncludeFile(Filename), IncludeLoc,
5827             "Could not find include file '" + Filename + "'"))
5828     return true;
5829 
5830   return false;
5831 }
5832 
5833 /// parseDirectiveIf
5834 /// ::= .if{,eq,ge,gt,le,lt,ne} expression
parseDirectiveIf(SMLoc DirectiveLoc,DirectiveKind DirKind)5835 bool MasmParser::parseDirectiveIf(SMLoc DirectiveLoc, DirectiveKind DirKind) {
5836   TheCondStack.push_back(TheCondState);
5837   TheCondState.TheCond = AsmCond::IfCond;
5838   if (TheCondState.Ignore) {
5839     eatToEndOfStatement();
5840   } else {
5841     int64_t ExprValue;
5842     if (parseAbsoluteExpression(ExprValue) ||
5843         parseToken(AsmToken::EndOfStatement,
5844                    "unexpected token in '.if' directive"))
5845       return true;
5846 
5847     switch (DirKind) {
5848     default:
5849       llvm_unreachable("unsupported directive");
5850     case DK_IF:
5851       break;
5852     case DK_IFE:
5853       ExprValue = ExprValue == 0;
5854       break;
5855     }
5856 
5857     TheCondState.CondMet = ExprValue;
5858     TheCondState.Ignore = !TheCondState.CondMet;
5859   }
5860 
5861   return false;
5862 }
5863 
5864 /// parseDirectiveIfb
5865 /// ::= .ifb textitem
parseDirectiveIfb(SMLoc DirectiveLoc,bool ExpectBlank)5866 bool MasmParser::parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank) {
5867   TheCondStack.push_back(TheCondState);
5868   TheCondState.TheCond = AsmCond::IfCond;
5869 
5870   if (TheCondState.Ignore) {
5871     eatToEndOfStatement();
5872   } else {
5873     std::string Str;
5874     if (parseTextItem(Str))
5875       return TokError("expected text item parameter for 'ifb' directive");
5876 
5877     if (parseToken(AsmToken::EndOfStatement,
5878                    "unexpected token in 'ifb' directive"))
5879       return true;
5880 
5881     TheCondState.CondMet = ExpectBlank == Str.empty();
5882     TheCondState.Ignore = !TheCondState.CondMet;
5883   }
5884 
5885   return false;
5886 }
5887 
5888 /// parseDirectiveIfidn
5889 ///   ::= ifidn textitem, textitem
parseDirectiveIfidn(SMLoc DirectiveLoc,bool ExpectEqual,bool CaseInsensitive)5890 bool MasmParser::parseDirectiveIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
5891                                      bool CaseInsensitive) {
5892   std::string String1, String2;
5893 
5894   if (parseTextItem(String1)) {
5895     if (ExpectEqual)
5896       return TokError("expected text item parameter for 'ifidn' directive");
5897     return TokError("expected text item parameter for 'ifdif' directive");
5898   }
5899 
5900   if (Lexer.isNot(AsmToken::Comma)) {
5901     if (ExpectEqual)
5902       return TokError(
5903           "expected comma after first string for 'ifidn' directive");
5904     return TokError("expected comma after first string for 'ifdif' directive");
5905   }
5906   Lex();
5907 
5908   if (parseTextItem(String2)) {
5909     if (ExpectEqual)
5910       return TokError("expected text item parameter for 'ifidn' directive");
5911     return TokError("expected text item parameter for 'ifdif' directive");
5912   }
5913 
5914   TheCondStack.push_back(TheCondState);
5915   TheCondState.TheCond = AsmCond::IfCond;
5916   if (CaseInsensitive)
5917     TheCondState.CondMet =
5918         ExpectEqual == (StringRef(String1).equals_lower(String2));
5919   else
5920     TheCondState.CondMet = ExpectEqual == (String1 == String2);
5921   TheCondState.Ignore = !TheCondState.CondMet;
5922 
5923   return false;
5924 }
5925 
5926 /// parseDirectiveIfdef
5927 /// ::= ifdef symbol
5928 ///   | ifdef variable
parseDirectiveIfdef(SMLoc DirectiveLoc,bool expect_defined)5929 bool MasmParser::parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined) {
5930   TheCondStack.push_back(TheCondState);
5931   TheCondState.TheCond = AsmCond::IfCond;
5932 
5933   if (TheCondState.Ignore) {
5934     eatToEndOfStatement();
5935   } else {
5936     bool is_defined = false;
5937     unsigned RegNo;
5938     SMLoc StartLoc, EndLoc;
5939     is_defined = (getTargetParser().tryParseRegister(
5940                       RegNo, StartLoc, EndLoc) == MatchOperand_Success);
5941     if (!is_defined) {
5942       StringRef Name;
5943       if (check(parseIdentifier(Name), "expected identifier after 'ifdef'") ||
5944           parseToken(AsmToken::EndOfStatement, "unexpected token in 'ifdef'"))
5945         return true;
5946 
5947       if (Variables.find(Name) != Variables.end()) {
5948         is_defined = true;
5949       } else {
5950         MCSymbol *Sym = getContext().lookupSymbol(Name);
5951         is_defined = (Sym && !Sym->isUndefined(false));
5952       }
5953     }
5954 
5955     TheCondState.CondMet = (is_defined == expect_defined);
5956     TheCondState.Ignore = !TheCondState.CondMet;
5957   }
5958 
5959   return false;
5960 }
5961 
5962 /// parseDirectiveElseIf
5963 /// ::= elseif expression
parseDirectiveElseIf(SMLoc DirectiveLoc,DirectiveKind DirKind)5964 bool MasmParser::parseDirectiveElseIf(SMLoc DirectiveLoc,
5965                                       DirectiveKind DirKind) {
5966   if (TheCondState.TheCond != AsmCond::IfCond &&
5967       TheCondState.TheCond != AsmCond::ElseIfCond)
5968     return Error(DirectiveLoc, "Encountered a .elseif that doesn't follow an"
5969                                " .if or  an .elseif");
5970   TheCondState.TheCond = AsmCond::ElseIfCond;
5971 
5972   bool LastIgnoreState = false;
5973   if (!TheCondStack.empty())
5974     LastIgnoreState = TheCondStack.back().Ignore;
5975   if (LastIgnoreState || TheCondState.CondMet) {
5976     TheCondState.Ignore = true;
5977     eatToEndOfStatement();
5978   } else {
5979     int64_t ExprValue;
5980     if (parseAbsoluteExpression(ExprValue))
5981       return true;
5982 
5983     if (parseToken(AsmToken::EndOfStatement,
5984                    "unexpected token in '.elseif' directive"))
5985       return true;
5986 
5987     switch (DirKind) {
5988     default:
5989       llvm_unreachable("unsupported directive");
5990     case DK_ELSEIF:
5991       break;
5992     case DK_ELSEIFE:
5993       ExprValue = ExprValue == 0;
5994       break;
5995     }
5996 
5997     TheCondState.CondMet = ExprValue;
5998     TheCondState.Ignore = !TheCondState.CondMet;
5999   }
6000 
6001   return false;
6002 }
6003 
6004 /// parseDirectiveElseIfb
6005 /// ::= elseifb textitem
parseDirectiveElseIfb(SMLoc DirectiveLoc,bool ExpectBlank)6006 bool MasmParser::parseDirectiveElseIfb(SMLoc DirectiveLoc, bool ExpectBlank) {
6007   if (TheCondState.TheCond != AsmCond::IfCond &&
6008       TheCondState.TheCond != AsmCond::ElseIfCond)
6009     return Error(DirectiveLoc, "Encountered an elseif that doesn't follow an"
6010                                " if or an elseif");
6011   TheCondState.TheCond = AsmCond::ElseIfCond;
6012 
6013   bool LastIgnoreState = false;
6014   if (!TheCondStack.empty())
6015     LastIgnoreState = TheCondStack.back().Ignore;
6016   if (LastIgnoreState || TheCondState.CondMet) {
6017     TheCondState.Ignore = true;
6018     eatToEndOfStatement();
6019   } else {
6020     std::string Str;
6021     if (parseTextItem(Str)) {
6022       if (ExpectBlank)
6023         return TokError("expected text item parameter for 'elseifb' directive");
6024       return TokError("expected text item parameter for 'elseifnb' directive");
6025     }
6026 
6027     if (parseToken(AsmToken::EndOfStatement,
6028                    "unexpected token in 'elseifb' directive"))
6029       return true;
6030 
6031     TheCondState.CondMet = ExpectBlank == Str.empty();
6032     TheCondState.Ignore = !TheCondState.CondMet;
6033   }
6034 
6035   return false;
6036 }
6037 
6038 /// parseDirectiveElseIfdef
6039 /// ::= elseifdef symbol
6040 ///   | elseifdef variable
parseDirectiveElseIfdef(SMLoc DirectiveLoc,bool expect_defined)6041 bool MasmParser::parseDirectiveElseIfdef(SMLoc DirectiveLoc,
6042                                          bool expect_defined) {
6043   if (TheCondState.TheCond != AsmCond::IfCond &&
6044       TheCondState.TheCond != AsmCond::ElseIfCond)
6045     return Error(DirectiveLoc, "Encountered an elseif that doesn't follow an"
6046                                " if or an elseif");
6047   TheCondState.TheCond = AsmCond::ElseIfCond;
6048 
6049   bool LastIgnoreState = false;
6050   if (!TheCondStack.empty())
6051     LastIgnoreState = TheCondStack.back().Ignore;
6052   if (LastIgnoreState || TheCondState.CondMet) {
6053     TheCondState.Ignore = true;
6054     eatToEndOfStatement();
6055   } else {
6056     bool is_defined = false;
6057     unsigned RegNo;
6058     SMLoc StartLoc, EndLoc;
6059     is_defined = (getTargetParser().tryParseRegister(RegNo, StartLoc, EndLoc) ==
6060                   MatchOperand_Success);
6061     if (!is_defined) {
6062       StringRef Name;
6063       if (check(parseIdentifier(Name),
6064                 "expected identifier after 'elseifdef'") ||
6065           parseToken(AsmToken::EndOfStatement,
6066                      "unexpected token in 'elseifdef'"))
6067         return true;
6068 
6069       if (Variables.find(Name) != Variables.end()) {
6070         is_defined = true;
6071       } else {
6072         MCSymbol *Sym = getContext().lookupSymbol(Name);
6073         is_defined = (Sym && !Sym->isUndefined(false));
6074       }
6075     }
6076 
6077     TheCondState.CondMet = (is_defined == expect_defined);
6078     TheCondState.Ignore = !TheCondState.CondMet;
6079   }
6080 
6081   return false;
6082 }
6083 
6084 /// parseDirectiveElseIfidn
6085 /// ::= elseifidn textitem, textitem
parseDirectiveElseIfidn(SMLoc DirectiveLoc,bool ExpectEqual,bool CaseInsensitive)6086 bool MasmParser::parseDirectiveElseIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
6087                                          bool CaseInsensitive) {
6088   if (TheCondState.TheCond != AsmCond::IfCond &&
6089       TheCondState.TheCond != AsmCond::ElseIfCond)
6090     return Error(DirectiveLoc, "Encountered an elseif that doesn't follow an"
6091                                " if or an elseif");
6092   TheCondState.TheCond = AsmCond::ElseIfCond;
6093 
6094   bool LastIgnoreState = false;
6095   if (!TheCondStack.empty())
6096     LastIgnoreState = TheCondStack.back().Ignore;
6097   if (LastIgnoreState || TheCondState.CondMet) {
6098     TheCondState.Ignore = true;
6099     eatToEndOfStatement();
6100   } else {
6101     std::string String1, String2;
6102 
6103     if (parseTextItem(String1)) {
6104       if (ExpectEqual)
6105         return TokError(
6106             "expected text item parameter for 'elseifidn' directive");
6107       return TokError("expected text item parameter for 'elseifdif' directive");
6108     }
6109 
6110     if (Lexer.isNot(AsmToken::Comma)) {
6111       if (ExpectEqual)
6112         return TokError(
6113             "expected comma after first string for 'elseifidn' directive");
6114       return TokError(
6115           "expected comma after first string for 'elseifdif' directive");
6116     }
6117     Lex();
6118 
6119     if (parseTextItem(String2)) {
6120       if (ExpectEqual)
6121         return TokError(
6122             "expected text item parameter for 'elseifidn' directive");
6123       return TokError("expected text item parameter for 'elseifdif' directive");
6124     }
6125 
6126     if (CaseInsensitive)
6127       TheCondState.CondMet =
6128           ExpectEqual == (StringRef(String1).equals_lower(String2));
6129     else
6130       TheCondState.CondMet = ExpectEqual == (String1 == String2);
6131     TheCondState.Ignore = !TheCondState.CondMet;
6132   }
6133 
6134   return false;
6135 }
6136 
6137 /// parseDirectiveElse
6138 /// ::= else
parseDirectiveElse(SMLoc DirectiveLoc)6139 bool MasmParser::parseDirectiveElse(SMLoc DirectiveLoc) {
6140   if (parseToken(AsmToken::EndOfStatement,
6141                  "unexpected token in 'else' directive"))
6142     return true;
6143 
6144   if (TheCondState.TheCond != AsmCond::IfCond &&
6145       TheCondState.TheCond != AsmCond::ElseIfCond)
6146     return Error(DirectiveLoc, "Encountered an else that doesn't follow an if"
6147                                " or an elseif");
6148   TheCondState.TheCond = AsmCond::ElseCond;
6149   bool LastIgnoreState = false;
6150   if (!TheCondStack.empty())
6151     LastIgnoreState = TheCondStack.back().Ignore;
6152   if (LastIgnoreState || TheCondState.CondMet)
6153     TheCondState.Ignore = true;
6154   else
6155     TheCondState.Ignore = false;
6156 
6157   return false;
6158 }
6159 
6160 /// parseDirectiveEnd
6161 /// ::= end
parseDirectiveEnd(SMLoc DirectiveLoc)6162 bool MasmParser::parseDirectiveEnd(SMLoc DirectiveLoc) {
6163   if (parseToken(AsmToken::EndOfStatement,
6164                  "unexpected token in 'end' directive"))
6165     return true;
6166 
6167   while (Lexer.isNot(AsmToken::Eof))
6168     Lexer.Lex();
6169 
6170   return false;
6171 }
6172 
6173 /// parseDirectiveError
6174 ///   ::= .err [message]
parseDirectiveError(SMLoc DirectiveLoc)6175 bool MasmParser::parseDirectiveError(SMLoc DirectiveLoc) {
6176   if (!TheCondStack.empty()) {
6177     if (TheCondStack.back().Ignore) {
6178       eatToEndOfStatement();
6179       return false;
6180     }
6181   }
6182 
6183   std::string Message = ".err directive invoked in source file";
6184   if (Lexer.isNot(AsmToken::EndOfStatement))
6185     Message = parseStringTo(AsmToken::EndOfStatement);
6186   Lex();
6187 
6188   return Error(DirectiveLoc, Message);
6189 }
6190 
6191 /// parseDirectiveErrorIfb
6192 ///   ::= .errb textitem[, message]
parseDirectiveErrorIfb(SMLoc DirectiveLoc,bool ExpectBlank)6193 bool MasmParser::parseDirectiveErrorIfb(SMLoc DirectiveLoc, bool ExpectBlank) {
6194   if (!TheCondStack.empty()) {
6195     if (TheCondStack.back().Ignore) {
6196       eatToEndOfStatement();
6197       return false;
6198     }
6199   }
6200 
6201   std::string Text;
6202   if (parseTextItem(Text))
6203     return Error(getTok().getLoc(), "missing text item in '.errb' directive");
6204 
6205   std::string Message = ".errb directive invoked in source file";
6206   if (Lexer.isNot(AsmToken::EndOfStatement)) {
6207     if (parseToken(AsmToken::Comma))
6208       return addErrorSuffix(" in '.errb' directive");
6209     Message = parseStringTo(AsmToken::EndOfStatement);
6210   }
6211   Lex();
6212 
6213   if (Text.empty() == ExpectBlank)
6214     return Error(DirectiveLoc, Message);
6215   return false;
6216 }
6217 
6218 /// parseDirectiveErrorIfdef
6219 ///   ::= .errdef name[, message]
parseDirectiveErrorIfdef(SMLoc DirectiveLoc,bool ExpectDefined)6220 bool MasmParser::parseDirectiveErrorIfdef(SMLoc DirectiveLoc,
6221                                           bool ExpectDefined) {
6222   if (!TheCondStack.empty()) {
6223     if (TheCondStack.back().Ignore) {
6224       eatToEndOfStatement();
6225       return false;
6226     }
6227   }
6228 
6229   bool IsDefined = false;
6230   unsigned RegNo;
6231   SMLoc StartLoc, EndLoc;
6232   IsDefined = (getTargetParser().tryParseRegister(RegNo, StartLoc, EndLoc) ==
6233                MatchOperand_Success);
6234   if (!IsDefined) {
6235     StringRef Name;
6236     if (check(parseIdentifier(Name), "expected identifier after '.errdef'"))
6237       return true;
6238 
6239     if (Variables.find(Name) != Variables.end()) {
6240       IsDefined = true;
6241     } else {
6242       MCSymbol *Sym = getContext().lookupSymbol(Name);
6243       IsDefined = (Sym && !Sym->isUndefined(false));
6244     }
6245   }
6246 
6247   std::string Message = ".errdef directive invoked in source file";
6248   if (Lexer.isNot(AsmToken::EndOfStatement)) {
6249     if (parseToken(AsmToken::Comma))
6250       return addErrorSuffix(" in '.errdef' directive");
6251     Message = parseStringTo(AsmToken::EndOfStatement);
6252   }
6253   Lex();
6254 
6255   if (IsDefined == ExpectDefined)
6256     return Error(DirectiveLoc, Message);
6257   return false;
6258 }
6259 
6260 /// parseDirectiveErrorIfidn
6261 ///   ::= .erridn textitem, textitem[, message]
parseDirectiveErrorIfidn(SMLoc DirectiveLoc,bool ExpectEqual,bool CaseInsensitive)6262 bool MasmParser::parseDirectiveErrorIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
6263                                           bool CaseInsensitive) {
6264   if (!TheCondStack.empty()) {
6265     if (TheCondStack.back().Ignore) {
6266       eatToEndOfStatement();
6267       return false;
6268     }
6269   }
6270 
6271   std::string String1, String2;
6272 
6273   if (parseTextItem(String1)) {
6274     if (ExpectEqual)
6275       return TokError("expected string parameter for '.erridn' directive");
6276     return TokError("expected string parameter for '.errdif' directive");
6277   }
6278 
6279   if (Lexer.isNot(AsmToken::Comma)) {
6280     if (ExpectEqual)
6281       return TokError(
6282           "expected comma after first string for '.erridn' directive");
6283     return TokError(
6284         "expected comma after first string for '.errdif' directive");
6285   }
6286   Lex();
6287 
6288   if (parseTextItem(String2)) {
6289     if (ExpectEqual)
6290       return TokError("expected string parameter for '.erridn' directive");
6291     return TokError("expected string parameter for '.errdif' directive");
6292   }
6293 
6294   std::string Message;
6295   if (ExpectEqual)
6296     Message = ".erridn directive invoked in source file";
6297   else
6298     Message = ".errdif directive invoked in source file";
6299   if (Lexer.isNot(AsmToken::EndOfStatement)) {
6300     if (parseToken(AsmToken::Comma))
6301       return addErrorSuffix(" in '.erridn' directive");
6302     Message = parseStringTo(AsmToken::EndOfStatement);
6303   }
6304   Lex();
6305 
6306   if (CaseInsensitive)
6307     TheCondState.CondMet =
6308         ExpectEqual == (StringRef(String1).equals_lower(String2));
6309   else
6310     TheCondState.CondMet = ExpectEqual == (String1 == String2);
6311   TheCondState.Ignore = !TheCondState.CondMet;
6312 
6313   if ((CaseInsensitive &&
6314        ExpectEqual == StringRef(String1).equals_lower(String2)) ||
6315       (ExpectEqual == (String1 == String2)))
6316     return Error(DirectiveLoc, Message);
6317   return false;
6318 }
6319 
6320 /// parseDirectiveErrorIfe
6321 ///   ::= .erre expression[, message]
parseDirectiveErrorIfe(SMLoc DirectiveLoc,bool ExpectZero)6322 bool MasmParser::parseDirectiveErrorIfe(SMLoc DirectiveLoc, bool ExpectZero) {
6323   if (!TheCondStack.empty()) {
6324     if (TheCondStack.back().Ignore) {
6325       eatToEndOfStatement();
6326       return false;
6327     }
6328   }
6329 
6330   int64_t ExprValue;
6331   if (parseAbsoluteExpression(ExprValue))
6332     return addErrorSuffix(" in '.erre' directive");
6333 
6334   std::string Message = ".erre directive invoked in source file";
6335   if (Lexer.isNot(AsmToken::EndOfStatement)) {
6336     if (parseToken(AsmToken::Comma))
6337       return addErrorSuffix(" in '.erre' directive");
6338     Message = parseStringTo(AsmToken::EndOfStatement);
6339   }
6340   Lex();
6341 
6342   if ((ExprValue == 0) == ExpectZero)
6343     return Error(DirectiveLoc, Message);
6344   return false;
6345 }
6346 
6347 /// parseDirectiveEndIf
6348 /// ::= .endif
parseDirectiveEndIf(SMLoc DirectiveLoc)6349 bool MasmParser::parseDirectiveEndIf(SMLoc DirectiveLoc) {
6350   if (parseToken(AsmToken::EndOfStatement,
6351                  "unexpected token in '.endif' directive"))
6352     return true;
6353 
6354   if ((TheCondState.TheCond == AsmCond::NoCond) || TheCondStack.empty())
6355     return Error(DirectiveLoc, "Encountered a .endif that doesn't follow "
6356                                "an .if or .else");
6357   if (!TheCondStack.empty()) {
6358     TheCondState = TheCondStack.back();
6359     TheCondStack.pop_back();
6360   }
6361 
6362   return false;
6363 }
6364 
initializeDirectiveKindMap()6365 void MasmParser::initializeDirectiveKindMap() {
6366   DirectiveKindMap["="] = DK_ASSIGN;
6367   DirectiveKindMap["equ"] = DK_EQU;
6368   DirectiveKindMap["textequ"] = DK_TEXTEQU;
6369   // DirectiveKindMap[".ascii"] = DK_ASCII;
6370   // DirectiveKindMap[".asciz"] = DK_ASCIZ;
6371   // DirectiveKindMap[".string"] = DK_STRING;
6372   DirectiveKindMap["byte"] = DK_BYTE;
6373   DirectiveKindMap["sbyte"] = DK_SBYTE;
6374   DirectiveKindMap["word"] = DK_WORD;
6375   DirectiveKindMap["sword"] = DK_SWORD;
6376   DirectiveKindMap["dword"] = DK_DWORD;
6377   DirectiveKindMap["sdword"] = DK_SDWORD;
6378   DirectiveKindMap["fword"] = DK_FWORD;
6379   DirectiveKindMap["qword"] = DK_QWORD;
6380   DirectiveKindMap["sqword"] = DK_SQWORD;
6381   DirectiveKindMap["real4"] = DK_REAL4;
6382   DirectiveKindMap["real8"] = DK_REAL8;
6383   DirectiveKindMap["real10"] = DK_REAL10;
6384   DirectiveKindMap["align"] = DK_ALIGN;
6385   // DirectiveKindMap[".org"] = DK_ORG;
6386   DirectiveKindMap["extern"] = DK_EXTERN;
6387   DirectiveKindMap["public"] = DK_PUBLIC;
6388   // DirectiveKindMap[".comm"] = DK_COMM;
6389   DirectiveKindMap["comment"] = DK_COMMENT;
6390   DirectiveKindMap["include"] = DK_INCLUDE;
6391   DirectiveKindMap["repeat"] = DK_REPEAT;
6392   DirectiveKindMap["rept"] = DK_REPEAT;
6393   DirectiveKindMap["while"] = DK_WHILE;
6394   DirectiveKindMap["for"] = DK_FOR;
6395   DirectiveKindMap["irp"] = DK_FOR;
6396   DirectiveKindMap["forc"] = DK_FORC;
6397   DirectiveKindMap["irpc"] = DK_FORC;
6398   DirectiveKindMap["if"] = DK_IF;
6399   DirectiveKindMap["ife"] = DK_IFE;
6400   DirectiveKindMap["ifb"] = DK_IFB;
6401   DirectiveKindMap["ifnb"] = DK_IFNB;
6402   DirectiveKindMap["ifdef"] = DK_IFDEF;
6403   DirectiveKindMap["ifndef"] = DK_IFNDEF;
6404   DirectiveKindMap["ifdif"] = DK_IFDIF;
6405   DirectiveKindMap["ifdifi"] = DK_IFDIFI;
6406   DirectiveKindMap["ifidn"] = DK_IFIDN;
6407   DirectiveKindMap["ifidni"] = DK_IFIDNI;
6408   DirectiveKindMap["elseif"] = DK_ELSEIF;
6409   DirectiveKindMap["elseifdef"] = DK_ELSEIFDEF;
6410   DirectiveKindMap["elseifndef"] = DK_ELSEIFNDEF;
6411   DirectiveKindMap["elseifdif"] = DK_ELSEIFDIF;
6412   DirectiveKindMap["elseifidn"] = DK_ELSEIFIDN;
6413   DirectiveKindMap["else"] = DK_ELSE;
6414   DirectiveKindMap["end"] = DK_END;
6415   DirectiveKindMap["endif"] = DK_ENDIF;
6416   // DirectiveKindMap[".file"] = DK_FILE;
6417   // DirectiveKindMap[".line"] = DK_LINE;
6418   // DirectiveKindMap[".loc"] = DK_LOC;
6419   // DirectiveKindMap[".stabs"] = DK_STABS;
6420   // DirectiveKindMap[".cv_file"] = DK_CV_FILE;
6421   // DirectiveKindMap[".cv_func_id"] = DK_CV_FUNC_ID;
6422   // DirectiveKindMap[".cv_loc"] = DK_CV_LOC;
6423   // DirectiveKindMap[".cv_linetable"] = DK_CV_LINETABLE;
6424   // DirectiveKindMap[".cv_inline_linetable"] = DK_CV_INLINE_LINETABLE;
6425   // DirectiveKindMap[".cv_inline_site_id"] = DK_CV_INLINE_SITE_ID;
6426   // DirectiveKindMap[".cv_def_range"] = DK_CV_DEF_RANGE;
6427   // DirectiveKindMap[".cv_string"] = DK_CV_STRING;
6428   // DirectiveKindMap[".cv_stringtable"] = DK_CV_STRINGTABLE;
6429   // DirectiveKindMap[".cv_filechecksums"] = DK_CV_FILECHECKSUMS;
6430   // DirectiveKindMap[".cv_filechecksumoffset"] = DK_CV_FILECHECKSUM_OFFSET;
6431   // DirectiveKindMap[".cv_fpo_data"] = DK_CV_FPO_DATA;
6432   // DirectiveKindMap[".cfi_sections"] = DK_CFI_SECTIONS;
6433   // DirectiveKindMap[".cfi_startproc"] = DK_CFI_STARTPROC;
6434   // DirectiveKindMap[".cfi_endproc"] = DK_CFI_ENDPROC;
6435   // DirectiveKindMap[".cfi_def_cfa"] = DK_CFI_DEF_CFA;
6436   // DirectiveKindMap[".cfi_def_cfa_offset"] = DK_CFI_DEF_CFA_OFFSET;
6437   // DirectiveKindMap[".cfi_adjust_cfa_offset"] = DK_CFI_ADJUST_CFA_OFFSET;
6438   // DirectiveKindMap[".cfi_def_cfa_register"] = DK_CFI_DEF_CFA_REGISTER;
6439   // DirectiveKindMap[".cfi_offset"] = DK_CFI_OFFSET;
6440   // DirectiveKindMap[".cfi_rel_offset"] = DK_CFI_REL_OFFSET;
6441   // DirectiveKindMap[".cfi_personality"] = DK_CFI_PERSONALITY;
6442   // DirectiveKindMap[".cfi_lsda"] = DK_CFI_LSDA;
6443   // DirectiveKindMap[".cfi_remember_state"] = DK_CFI_REMEMBER_STATE;
6444   // DirectiveKindMap[".cfi_restore_state"] = DK_CFI_RESTORE_STATE;
6445   // DirectiveKindMap[".cfi_same_value"] = DK_CFI_SAME_VALUE;
6446   // DirectiveKindMap[".cfi_restore"] = DK_CFI_RESTORE;
6447   // DirectiveKindMap[".cfi_escape"] = DK_CFI_ESCAPE;
6448   // DirectiveKindMap[".cfi_return_column"] = DK_CFI_RETURN_COLUMN;
6449   // DirectiveKindMap[".cfi_signal_frame"] = DK_CFI_SIGNAL_FRAME;
6450   // DirectiveKindMap[".cfi_undefined"] = DK_CFI_UNDEFINED;
6451   // DirectiveKindMap[".cfi_register"] = DK_CFI_REGISTER;
6452   // DirectiveKindMap[".cfi_window_save"] = DK_CFI_WINDOW_SAVE;
6453   // DirectiveKindMap[".cfi_b_key_frame"] = DK_CFI_B_KEY_FRAME;
6454   DirectiveKindMap["macro"] = DK_MACRO;
6455   DirectiveKindMap["exitm"] = DK_EXITM;
6456   DirectiveKindMap["endm"] = DK_ENDM;
6457   DirectiveKindMap["purge"] = DK_PURGE;
6458   DirectiveKindMap[".err"] = DK_ERR;
6459   DirectiveKindMap[".errb"] = DK_ERRB;
6460   DirectiveKindMap[".errnb"] = DK_ERRNB;
6461   DirectiveKindMap[".errdef"] = DK_ERRDEF;
6462   DirectiveKindMap[".errndef"] = DK_ERRNDEF;
6463   DirectiveKindMap[".errdif"] = DK_ERRDIF;
6464   DirectiveKindMap[".errdifi"] = DK_ERRDIFI;
6465   DirectiveKindMap[".erridn"] = DK_ERRIDN;
6466   DirectiveKindMap[".erridni"] = DK_ERRIDNI;
6467   DirectiveKindMap[".erre"] = DK_ERRE;
6468   DirectiveKindMap[".errnz"] = DK_ERRNZ;
6469   DirectiveKindMap[".pushframe"] = DK_PUSHFRAME;
6470   DirectiveKindMap[".pushreg"] = DK_PUSHREG;
6471   DirectiveKindMap[".savereg"] = DK_SAVEREG;
6472   DirectiveKindMap[".savexmm128"] = DK_SAVEXMM128;
6473   DirectiveKindMap[".setframe"] = DK_SETFRAME;
6474   DirectiveKindMap[".radix"] = DK_RADIX;
6475   DirectiveKindMap["db"] = DK_DB;
6476   DirectiveKindMap["dd"] = DK_DD;
6477   DirectiveKindMap["df"] = DK_DF;
6478   DirectiveKindMap["dq"] = DK_DQ;
6479   DirectiveKindMap["dw"] = DK_DW;
6480   DirectiveKindMap["echo"] = DK_ECHO;
6481   DirectiveKindMap["struc"] = DK_STRUCT;
6482   DirectiveKindMap["struct"] = DK_STRUCT;
6483   DirectiveKindMap["union"] = DK_UNION;
6484   DirectiveKindMap["ends"] = DK_ENDS;
6485 }
6486 
isMacroLikeDirective()6487 bool MasmParser::isMacroLikeDirective() {
6488   if (getLexer().is(AsmToken::Identifier)) {
6489     bool IsMacroLike = StringSwitch<bool>(getTok().getIdentifier())
6490                            .CasesLower("repeat", "rept", true)
6491                            .CaseLower("while", true)
6492                            .CasesLower("for", "irp", true)
6493                            .CasesLower("forc", "irpc", true)
6494                            .Default(false);
6495     if (IsMacroLike)
6496       return true;
6497   }
6498   if (getLexer().peekTok().is(AsmToken::Identifier) &&
6499       getLexer().peekTok().getIdentifier().equals_lower("macro"))
6500     return true;
6501 
6502   return false;
6503 }
6504 
parseMacroLikeBody(SMLoc DirectiveLoc)6505 MCAsmMacro *MasmParser::parseMacroLikeBody(SMLoc DirectiveLoc) {
6506   AsmToken EndToken, StartToken = getTok();
6507 
6508   unsigned NestLevel = 0;
6509   while (true) {
6510     // Check whether we have reached the end of the file.
6511     if (getLexer().is(AsmToken::Eof)) {
6512       printError(DirectiveLoc, "no matching 'endm' in definition");
6513       return nullptr;
6514     }
6515 
6516     if (isMacroLikeDirective())
6517       ++NestLevel;
6518 
6519     // Otherwise, check whether we have reached the endm.
6520     if (Lexer.is(AsmToken::Identifier) &&
6521         getTok().getIdentifier().equals_lower("endm")) {
6522       if (NestLevel == 0) {
6523         EndToken = getTok();
6524         Lex();
6525         if (Lexer.isNot(AsmToken::EndOfStatement)) {
6526           printError(getTok().getLoc(), "unexpected token in 'endm' directive");
6527           return nullptr;
6528         }
6529         break;
6530       }
6531       --NestLevel;
6532     }
6533 
6534     // Otherwise, scan till the end of the statement.
6535     eatToEndOfStatement();
6536   }
6537 
6538   const char *BodyStart = StartToken.getLoc().getPointer();
6539   const char *BodyEnd = EndToken.getLoc().getPointer();
6540   StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart);
6541 
6542   // We Are Anonymous.
6543   MacroLikeBodies.emplace_back(StringRef(), Body, MCAsmMacroParameters());
6544   return &MacroLikeBodies.back();
6545 }
6546 
expandStatement(SMLoc Loc)6547 bool MasmParser::expandStatement(SMLoc Loc) {
6548   std::string Body = parseStringTo(AsmToken::EndOfStatement);
6549   SMLoc EndLoc = getTok().getLoc();
6550 
6551   MCAsmMacroParameters Parameters;
6552   MCAsmMacroArguments Arguments;
6553   for (const auto &V : Variables) {
6554     const Variable &Var = V.getValue();
6555     if (Var.IsText) {
6556       Parameters.emplace_back();
6557       Arguments.emplace_back();
6558       MCAsmMacroParameter &P = Parameters.back();
6559       MCAsmMacroArgument &A = Arguments.back();
6560       P.Name = Var.Name;
6561       P.Required = true;
6562       A.push_back(AsmToken(AsmToken::String, Var.TextValue));
6563     }
6564   }
6565   MacroLikeBodies.emplace_back(StringRef(), Body, Parameters);
6566   MCAsmMacro M = MacroLikeBodies.back();
6567 
6568   // Expand the statement in a new buffer.
6569   SmallString<80> Buf;
6570   raw_svector_ostream OS(Buf);
6571   if (expandMacro(OS, M.Body, M.Parameters, Arguments, M.Locals, EndLoc))
6572     return true;
6573   std::unique_ptr<MemoryBuffer> Expansion =
6574       MemoryBuffer::getMemBufferCopy(OS.str(), "<expansion>");
6575 
6576   // Jump to the expanded statement and prime the lexer.
6577   CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Expansion), EndLoc);
6578   Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
6579   EndStatementAtEOFStack.push_back(false);
6580   Lex();
6581   return false;
6582 }
6583 
instantiateMacroLikeBody(MCAsmMacro * M,SMLoc DirectiveLoc,raw_svector_ostream & OS)6584 void MasmParser::instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
6585                                           raw_svector_ostream &OS) {
6586   instantiateMacroLikeBody(M, DirectiveLoc, /*ExitLoc=*/getTok().getLoc(), OS);
6587 }
instantiateMacroLikeBody(MCAsmMacro * M,SMLoc DirectiveLoc,SMLoc ExitLoc,raw_svector_ostream & OS)6588 void MasmParser::instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
6589                                           SMLoc ExitLoc,
6590                                           raw_svector_ostream &OS) {
6591   OS << "endm\n";
6592 
6593   std::unique_ptr<MemoryBuffer> Instantiation =
6594       MemoryBuffer::getMemBufferCopy(OS.str(), "<instantiation>");
6595 
6596   // Create the macro instantiation object and add to the current macro
6597   // instantiation stack.
6598   MacroInstantiation *MI = new MacroInstantiation{DirectiveLoc, CurBuffer,
6599                                                   ExitLoc, TheCondStack.size()};
6600   ActiveMacros.push_back(MI);
6601 
6602   // Jump to the macro instantiation and prime the lexer.
6603   CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Instantiation), SMLoc());
6604   Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
6605   EndStatementAtEOFStack.push_back(true);
6606   Lex();
6607 }
6608 
6609 /// parseDirectiveRepeat
6610 ///   ::= ("repeat" | "rept") count
6611 ///       body
6612 ///     endm
parseDirectiveRepeat(SMLoc DirectiveLoc,StringRef Dir)6613 bool MasmParser::parseDirectiveRepeat(SMLoc DirectiveLoc, StringRef Dir) {
6614   const MCExpr *CountExpr;
6615   SMLoc CountLoc = getTok().getLoc();
6616   if (parseExpression(CountExpr))
6617     return true;
6618 
6619   int64_t Count;
6620   if (!CountExpr->evaluateAsAbsolute(Count, getStreamer().getAssemblerPtr())) {
6621     return Error(CountLoc, "unexpected token in '" + Dir + "' directive");
6622   }
6623 
6624   if (check(Count < 0, CountLoc, "Count is negative") ||
6625       parseToken(AsmToken::EndOfStatement,
6626                  "unexpected token in '" + Dir + "' directive"))
6627     return true;
6628 
6629   // Lex the repeat definition.
6630   MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
6631   if (!M)
6632     return true;
6633 
6634   // Macro instantiation is lexical, unfortunately. We construct a new buffer
6635   // to hold the macro body with substitutions.
6636   SmallString<256> Buf;
6637   raw_svector_ostream OS(Buf);
6638   while (Count--) {
6639     if (expandMacro(OS, M->Body, None, None, M->Locals, getTok().getLoc()))
6640       return true;
6641   }
6642   instantiateMacroLikeBody(M, DirectiveLoc, OS);
6643 
6644   return false;
6645 }
6646 
6647 /// parseDirectiveWhile
6648 /// ::= "while" expression
6649 ///       body
6650 ///     endm
parseDirectiveWhile(SMLoc DirectiveLoc)6651 bool MasmParser::parseDirectiveWhile(SMLoc DirectiveLoc) {
6652   const MCExpr *CondExpr;
6653   SMLoc CondLoc = getTok().getLoc();
6654   if (parseExpression(CondExpr))
6655     return true;
6656 
6657   // Lex the repeat definition.
6658   MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
6659   if (!M)
6660     return true;
6661 
6662   // Macro instantiation is lexical, unfortunately. We construct a new buffer
6663   // to hold the macro body with substitutions.
6664   SmallString<256> Buf;
6665   raw_svector_ostream OS(Buf);
6666   int64_t Condition;
6667   if (!CondExpr->evaluateAsAbsolute(Condition, getStreamer().getAssemblerPtr()))
6668     return Error(CondLoc, "expected absolute expression in 'while' directive");
6669   if (Condition) {
6670     // Instantiate the macro, then resume at this directive to recheck the
6671     // condition.
6672     if (expandMacro(OS, M->Body, None, None, M->Locals, getTok().getLoc()))
6673       return true;
6674     instantiateMacroLikeBody(M, DirectiveLoc, /*ExitLoc=*/DirectiveLoc, OS);
6675   }
6676 
6677   return false;
6678 }
6679 
6680 /// parseDirectiveFor
6681 /// ::= ("for" | "irp") symbol [":" qualifier], <values>
6682 ///       body
6683 ///     endm
parseDirectiveFor(SMLoc DirectiveLoc,StringRef Dir)6684 bool MasmParser::parseDirectiveFor(SMLoc DirectiveLoc, StringRef Dir) {
6685   MCAsmMacroParameter Parameter;
6686   MCAsmMacroArguments A;
6687   if (check(parseIdentifier(Parameter.Name),
6688             "expected identifier in '" + Dir + "' directive"))
6689     return true;
6690 
6691   // Parse optional qualifier (default value, or "req")
6692   if (parseOptionalToken(AsmToken::Colon)) {
6693     if (parseOptionalToken(AsmToken::Equal)) {
6694       // Default value
6695       SMLoc ParamLoc;
6696 
6697       ParamLoc = Lexer.getLoc();
6698       if (parseMacroArgument(nullptr, Parameter.Value))
6699         return true;
6700     } else {
6701       SMLoc QualLoc;
6702       StringRef Qualifier;
6703 
6704       QualLoc = Lexer.getLoc();
6705       if (parseIdentifier(Qualifier))
6706         return Error(QualLoc, "missing parameter qualifier for "
6707                               "'" +
6708                                   Parameter.Name + "' in '" + Dir +
6709                                   "' directive");
6710 
6711       if (Qualifier.equals_lower("req"))
6712         Parameter.Required = true;
6713       else
6714         return Error(QualLoc,
6715                      Qualifier + " is not a valid parameter qualifier for '" +
6716                          Parameter.Name + "' in '" + Dir + "' directive");
6717     }
6718   }
6719 
6720   if (parseToken(AsmToken::Comma,
6721                  "expected comma in '" + Dir + "' directive") ||
6722       parseToken(AsmToken::Less,
6723                  "values in '" + Dir +
6724                      "' directive must be enclosed in angle brackets"))
6725     return true;
6726 
6727   while (true) {
6728     A.emplace_back();
6729     if (parseMacroArgument(&Parameter, A.back(), /*EndTok=*/AsmToken::Greater))
6730       return addErrorSuffix(" in arguments for '" + Dir + "' directive");
6731 
6732     // If we see a comma, continue, and allow line continuation.
6733     if (!parseOptionalToken(AsmToken::Comma))
6734       break;
6735     parseOptionalToken(AsmToken::EndOfStatement);
6736   }
6737 
6738   if (parseToken(AsmToken::Greater,
6739                  "values in '" + Dir +
6740                      "' directive must be enclosed in angle brackets") ||
6741       parseToken(AsmToken::EndOfStatement, "expected End of Statement"))
6742     return true;
6743 
6744   // Lex the for definition.
6745   MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
6746   if (!M)
6747     return true;
6748 
6749   // Macro instantiation is lexical, unfortunately. We construct a new buffer
6750   // to hold the macro body with substitutions.
6751   SmallString<256> Buf;
6752   raw_svector_ostream OS(Buf);
6753 
6754   for (const MCAsmMacroArgument &Arg : A) {
6755     if (expandMacro(OS, M->Body, Parameter, Arg, M->Locals, getTok().getLoc()))
6756       return true;
6757   }
6758 
6759   instantiateMacroLikeBody(M, DirectiveLoc, OS);
6760 
6761   return false;
6762 }
6763 
6764 /// parseDirectiveForc
6765 /// ::= ("forc" | "irpc") symbol, <string>
6766 ///       body
6767 ///     endm
parseDirectiveForc(SMLoc DirectiveLoc,StringRef Directive)6768 bool MasmParser::parseDirectiveForc(SMLoc DirectiveLoc, StringRef Directive) {
6769   MCAsmMacroParameter Parameter;
6770 
6771   std::string Argument;
6772   if (check(parseIdentifier(Parameter.Name),
6773             "expected identifier in '" + Directive + "' directive") ||
6774       parseToken(AsmToken::Comma,
6775                  "expected comma in '" + Directive + "' directive"))
6776     return true;
6777   if (parseAngleBracketString(Argument)) {
6778     // Match ml64.exe; treat all characters to end of statement as a string,
6779     // ignoring comment markers, then discard anything following a space (using
6780     // the C locale).
6781     Argument = parseStringTo(AsmToken::EndOfStatement);
6782     if (getTok().is(AsmToken::EndOfStatement))
6783       Argument += getTok().getString();
6784     size_t End = 0;
6785     for (; End < Argument.size(); ++End) {
6786       if (isSpace(Argument[End]))
6787         break;
6788     }
6789     Argument.resize(End);
6790   }
6791   if (parseToken(AsmToken::EndOfStatement, "expected end of statement"))
6792     return true;
6793 
6794   // Lex the irpc definition.
6795   MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
6796   if (!M)
6797     return true;
6798 
6799   // Macro instantiation is lexical, unfortunately. We construct a new buffer
6800   // to hold the macro body with substitutions.
6801   SmallString<256> Buf;
6802   raw_svector_ostream OS(Buf);
6803 
6804   StringRef Values(Argument);
6805   for (std::size_t I = 0, End = Values.size(); I != End; ++I) {
6806     MCAsmMacroArgument Arg;
6807     Arg.emplace_back(AsmToken::Identifier, Values.slice(I, I + 1));
6808 
6809     if (expandMacro(OS, M->Body, Parameter, Arg, M->Locals, getTok().getLoc()))
6810       return true;
6811   }
6812 
6813   instantiateMacroLikeBody(M, DirectiveLoc, OS);
6814 
6815   return false;
6816 }
6817 
parseDirectiveMSEmit(SMLoc IDLoc,ParseStatementInfo & Info,size_t Len)6818 bool MasmParser::parseDirectiveMSEmit(SMLoc IDLoc, ParseStatementInfo &Info,
6819                                       size_t Len) {
6820   const MCExpr *Value;
6821   SMLoc ExprLoc = getLexer().getLoc();
6822   if (parseExpression(Value))
6823     return true;
6824   const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
6825   if (!MCE)
6826     return Error(ExprLoc, "unexpected expression in _emit");
6827   uint64_t IntValue = MCE->getValue();
6828   if (!isUInt<8>(IntValue) && !isInt<8>(IntValue))
6829     return Error(ExprLoc, "literal value out of range for directive");
6830 
6831   Info.AsmRewrites->emplace_back(AOK_Emit, IDLoc, Len);
6832   return false;
6833 }
6834 
parseDirectiveMSAlign(SMLoc IDLoc,ParseStatementInfo & Info)6835 bool MasmParser::parseDirectiveMSAlign(SMLoc IDLoc, ParseStatementInfo &Info) {
6836   const MCExpr *Value;
6837   SMLoc ExprLoc = getLexer().getLoc();
6838   if (parseExpression(Value))
6839     return true;
6840   const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
6841   if (!MCE)
6842     return Error(ExprLoc, "unexpected expression in align");
6843   uint64_t IntValue = MCE->getValue();
6844   if (!isPowerOf2_64(IntValue))
6845     return Error(ExprLoc, "literal value not a power of two greater then zero");
6846 
6847   Info.AsmRewrites->emplace_back(AOK_Align, IDLoc, 5, Log2_64(IntValue));
6848   return false;
6849 }
6850 
parseDirectiveRadix(SMLoc DirectiveLoc)6851 bool MasmParser::parseDirectiveRadix(SMLoc DirectiveLoc) {
6852   const SMLoc Loc = getLexer().getLoc();
6853   std::string RadixStringRaw = parseStringTo(AsmToken::EndOfStatement);
6854   StringRef RadixString = StringRef(RadixStringRaw).trim();
6855   unsigned Radix;
6856   if (RadixString.getAsInteger(10, Radix)) {
6857     return Error(Loc,
6858                  "radix must be a decimal number in the range 2 to 16; was " +
6859                      RadixString);
6860   }
6861   if (Radix < 2 || Radix > 16)
6862     return Error(Loc, "radix must be in the range 2 to 16; was " +
6863                           std::to_string(Radix));
6864   getLexer().setMasmDefaultRadix(Radix);
6865   return false;
6866 }
6867 
6868 /// parseDirectiveEcho
6869 ///   ::= "echo" message
parseDirectiveEcho()6870 bool MasmParser::parseDirectiveEcho() {
6871   // We're called before the directive is parsed, to avoid triggering lexical
6872   // substitutions in the message. Assert that the next token is the directive,
6873   // then eat it without using the Parser's Lex method.
6874   assert(getTok().is(AsmToken::Identifier) &&
6875          getTok().getString().equals_lower("echo"));
6876   Lexer.Lex();
6877 
6878   std::string Message = parseStringTo(AsmToken::EndOfStatement);
6879   llvm::outs() << Message;
6880   if (!StringRef(Message).endswith("\n"))
6881     llvm::outs() << '\n';
6882   return false;
6883 }
6884 
6885 // We are comparing pointers, but the pointers are relative to a single string.
6886 // Thus, this should always be deterministic.
rewritesSort(const AsmRewrite * AsmRewriteA,const AsmRewrite * AsmRewriteB)6887 static int rewritesSort(const AsmRewrite *AsmRewriteA,
6888                         const AsmRewrite *AsmRewriteB) {
6889   if (AsmRewriteA->Loc.getPointer() < AsmRewriteB->Loc.getPointer())
6890     return -1;
6891   if (AsmRewriteB->Loc.getPointer() < AsmRewriteA->Loc.getPointer())
6892     return 1;
6893 
6894   // It's possible to have a SizeDirective, Imm/ImmPrefix and an Input/Output
6895   // rewrite to the same location.  Make sure the SizeDirective rewrite is
6896   // performed first, then the Imm/ImmPrefix and finally the Input/Output.  This
6897   // ensures the sort algorithm is stable.
6898   if (AsmRewritePrecedence[AsmRewriteA->Kind] >
6899       AsmRewritePrecedence[AsmRewriteB->Kind])
6900     return -1;
6901 
6902   if (AsmRewritePrecedence[AsmRewriteA->Kind] <
6903       AsmRewritePrecedence[AsmRewriteB->Kind])
6904     return 1;
6905   llvm_unreachable("Unstable rewrite sort.");
6906 }
6907 
defineMacro(StringRef Name,StringRef Value)6908 bool MasmParser::defineMacro(StringRef Name, StringRef Value) {
6909   Variable &Var = Variables[Name.lower()];
6910   if (Var.Name.empty()) {
6911     Var.Name = Name;
6912   } else if (!Var.Redefinable) {
6913     return TokError("invalid variable redefinition");
6914   }
6915   Var.Redefinable = true;
6916   Var.IsText = true;
6917   Var.TextValue = Value.str();
6918   return false;
6919 }
6920 
lookUpField(StringRef Name,AsmFieldInfo & Info) const6921 bool MasmParser::lookUpField(StringRef Name, AsmFieldInfo &Info) const {
6922   const std::pair<StringRef, StringRef> BaseMember = Name.split('.');
6923   const StringRef Base = BaseMember.first, Member = BaseMember.second;
6924   return lookUpField(Base, Member, Info);
6925 }
6926 
lookUpField(StringRef Base,StringRef Member,AsmFieldInfo & Info) const6927 bool MasmParser::lookUpField(StringRef Base, StringRef Member,
6928                              AsmFieldInfo &Info) const {
6929   if (Base.empty())
6930     return true;
6931 
6932   AsmFieldInfo BaseInfo;
6933   if (Base.contains('.') && !lookUpField(Base, BaseInfo))
6934     Base = BaseInfo.Type.Name;
6935 
6936   auto StructIt = Structs.find(Base.lower());
6937   auto TypeIt = KnownType.find(Base.lower());
6938   if (TypeIt != KnownType.end()) {
6939     StructIt = Structs.find(TypeIt->second.Name.lower());
6940   }
6941   if (StructIt != Structs.end())
6942     return lookUpField(StructIt->second, Member, Info);
6943 
6944   return true;
6945 }
6946 
lookUpField(const StructInfo & Structure,StringRef Member,AsmFieldInfo & Info) const6947 bool MasmParser::lookUpField(const StructInfo &Structure, StringRef Member,
6948                              AsmFieldInfo &Info) const {
6949   if (Member.empty()) {
6950     Info.Type.Name = Structure.Name;
6951     Info.Type.Size = Structure.Size;
6952     Info.Type.ElementSize = Structure.Size;
6953     Info.Type.Length = 1;
6954     return false;
6955   }
6956 
6957   std::pair<StringRef, StringRef> Split = Member.split('.');
6958   const StringRef FieldName = Split.first, FieldMember = Split.second;
6959 
6960   auto StructIt = Structs.find(FieldName.lower());
6961   if (StructIt != Structs.end())
6962     return lookUpField(StructIt->second, FieldMember, Info);
6963 
6964   auto FieldIt = Structure.FieldsByName.find(FieldName.lower());
6965   if (FieldIt == Structure.FieldsByName.end())
6966     return true;
6967 
6968   const FieldInfo &Field = Structure.Fields[FieldIt->second];
6969   if (FieldMember.empty()) {
6970     Info.Offset += Field.Offset;
6971     Info.Type.Size = Field.SizeOf;
6972     Info.Type.ElementSize = Field.Type;
6973     Info.Type.Length = Field.LengthOf;
6974     if (Field.Contents.FT == FT_STRUCT)
6975       Info.Type.Name = Field.Contents.StructInfo.Structure.Name;
6976     else
6977       Info.Type.Name = "";
6978     return false;
6979   }
6980 
6981   if (Field.Contents.FT != FT_STRUCT)
6982     return true;
6983   const StructFieldInfo &StructInfo = Field.Contents.StructInfo;
6984 
6985   if (lookUpField(StructInfo.Structure, FieldMember, Info))
6986     return true;
6987 
6988   Info.Offset += Field.Offset;
6989   return false;
6990 }
6991 
lookUpType(StringRef Name,AsmTypeInfo & Info) const6992 bool MasmParser::lookUpType(StringRef Name, AsmTypeInfo &Info) const {
6993   unsigned Size = StringSwitch<unsigned>(Name)
6994                       .CasesLower("byte", "db", "sbyte", 1)
6995                       .CasesLower("word", "dw", "sword", 2)
6996                       .CasesLower("dword", "dd", "sdword", 4)
6997                       .CasesLower("fword", "df", 6)
6998                       .CasesLower("qword", "dq", "sqword", 8)
6999                       .CaseLower("real4", 4)
7000                       .CaseLower("real8", 8)
7001                       .CaseLower("real10", 10)
7002                       .Default(0);
7003   if (Size) {
7004     Info.Name = Name;
7005     Info.ElementSize = Size;
7006     Info.Length = 1;
7007     Info.Size = Size;
7008     return false;
7009   }
7010 
7011   auto StructIt = Structs.find(Name.lower());
7012   if (StructIt != Structs.end()) {
7013     const StructInfo &Structure = StructIt->second;
7014     Info.Name = Name;
7015     Info.ElementSize = Structure.Size;
7016     Info.Length = 1;
7017     Info.Size = Structure.Size;
7018     return false;
7019   }
7020 
7021   return true;
7022 }
7023 
parseMSInlineAsm(void * AsmLoc,std::string & AsmString,unsigned & NumOutputs,unsigned & NumInputs,SmallVectorImpl<std::pair<void *,bool>> & OpDecls,SmallVectorImpl<std::string> & Constraints,SmallVectorImpl<std::string> & Clobbers,const MCInstrInfo * MII,const MCInstPrinter * IP,MCAsmParserSemaCallback & SI)7024 bool MasmParser::parseMSInlineAsm(
7025     void *AsmLoc, std::string &AsmString, unsigned &NumOutputs,
7026     unsigned &NumInputs, SmallVectorImpl<std::pair<void *, bool>> &OpDecls,
7027     SmallVectorImpl<std::string> &Constraints,
7028     SmallVectorImpl<std::string> &Clobbers, const MCInstrInfo *MII,
7029     const MCInstPrinter *IP, MCAsmParserSemaCallback &SI) {
7030   SmallVector<void *, 4> InputDecls;
7031   SmallVector<void *, 4> OutputDecls;
7032   SmallVector<bool, 4> InputDeclsAddressOf;
7033   SmallVector<bool, 4> OutputDeclsAddressOf;
7034   SmallVector<std::string, 4> InputConstraints;
7035   SmallVector<std::string, 4> OutputConstraints;
7036   SmallVector<unsigned, 4> ClobberRegs;
7037 
7038   SmallVector<AsmRewrite, 4> AsmStrRewrites;
7039 
7040   // Prime the lexer.
7041   Lex();
7042 
7043   // While we have input, parse each statement.
7044   unsigned InputIdx = 0;
7045   unsigned OutputIdx = 0;
7046   while (getLexer().isNot(AsmToken::Eof)) {
7047     // Parse curly braces marking block start/end.
7048     if (parseCurlyBlockScope(AsmStrRewrites))
7049       continue;
7050 
7051     ParseStatementInfo Info(&AsmStrRewrites);
7052     bool StatementErr = parseStatement(Info, &SI);
7053 
7054     if (StatementErr || Info.ParseError) {
7055       // Emit pending errors if any exist.
7056       printPendingErrors();
7057       return true;
7058     }
7059 
7060     // No pending error should exist here.
7061     assert(!hasPendingError() && "unexpected error from parseStatement");
7062 
7063     if (Info.Opcode == ~0U)
7064       continue;
7065 
7066     const MCInstrDesc &Desc = MII->get(Info.Opcode);
7067 
7068     // Build the list of clobbers, outputs and inputs.
7069     for (unsigned i = 1, e = Info.ParsedOperands.size(); i != e; ++i) {
7070       MCParsedAsmOperand &Operand = *Info.ParsedOperands[i];
7071 
7072       // Register operand.
7073       if (Operand.isReg() && !Operand.needAddressOf() &&
7074           !getTargetParser().OmitRegisterFromClobberLists(Operand.getReg())) {
7075         unsigned NumDefs = Desc.getNumDefs();
7076         // Clobber.
7077         if (NumDefs && Operand.getMCOperandNum() < NumDefs)
7078           ClobberRegs.push_back(Operand.getReg());
7079         continue;
7080       }
7081 
7082       // Expr/Input or Output.
7083       StringRef SymName = Operand.getSymName();
7084       if (SymName.empty())
7085         continue;
7086 
7087       void *OpDecl = Operand.getOpDecl();
7088       if (!OpDecl)
7089         continue;
7090 
7091       StringRef Constraint = Operand.getConstraint();
7092       if (Operand.isImm()) {
7093         // Offset as immediate.
7094         if (Operand.isOffsetOfLocal())
7095           Constraint = "r";
7096         else
7097           Constraint = "i";
7098       }
7099 
7100       bool isOutput = (i == 1) && Desc.mayStore();
7101       SMLoc Start = SMLoc::getFromPointer(SymName.data());
7102       if (isOutput) {
7103         ++InputIdx;
7104         OutputDecls.push_back(OpDecl);
7105         OutputDeclsAddressOf.push_back(Operand.needAddressOf());
7106         OutputConstraints.push_back(("=" + Constraint).str());
7107         AsmStrRewrites.emplace_back(AOK_Output, Start, SymName.size());
7108       } else {
7109         InputDecls.push_back(OpDecl);
7110         InputDeclsAddressOf.push_back(Operand.needAddressOf());
7111         InputConstraints.push_back(Constraint.str());
7112         if (Desc.OpInfo[i - 1].isBranchTarget())
7113           AsmStrRewrites.emplace_back(AOK_CallInput, Start, SymName.size());
7114         else
7115           AsmStrRewrites.emplace_back(AOK_Input, Start, SymName.size());
7116       }
7117     }
7118 
7119     // Consider implicit defs to be clobbers.  Think of cpuid and push.
7120     ArrayRef<MCPhysReg> ImpDefs(Desc.getImplicitDefs(),
7121                                 Desc.getNumImplicitDefs());
7122     ClobberRegs.insert(ClobberRegs.end(), ImpDefs.begin(), ImpDefs.end());
7123   }
7124 
7125   // Set the number of Outputs and Inputs.
7126   NumOutputs = OutputDecls.size();
7127   NumInputs = InputDecls.size();
7128 
7129   // Set the unique clobbers.
7130   array_pod_sort(ClobberRegs.begin(), ClobberRegs.end());
7131   ClobberRegs.erase(std::unique(ClobberRegs.begin(), ClobberRegs.end()),
7132                     ClobberRegs.end());
7133   Clobbers.assign(ClobberRegs.size(), std::string());
7134   for (unsigned I = 0, E = ClobberRegs.size(); I != E; ++I) {
7135     raw_string_ostream OS(Clobbers[I]);
7136     IP->printRegName(OS, ClobberRegs[I]);
7137   }
7138 
7139   // Merge the various outputs and inputs.  Output are expected first.
7140   if (NumOutputs || NumInputs) {
7141     unsigned NumExprs = NumOutputs + NumInputs;
7142     OpDecls.resize(NumExprs);
7143     Constraints.resize(NumExprs);
7144     for (unsigned i = 0; i < NumOutputs; ++i) {
7145       OpDecls[i] = std::make_pair(OutputDecls[i], OutputDeclsAddressOf[i]);
7146       Constraints[i] = OutputConstraints[i];
7147     }
7148     for (unsigned i = 0, j = NumOutputs; i < NumInputs; ++i, ++j) {
7149       OpDecls[j] = std::make_pair(InputDecls[i], InputDeclsAddressOf[i]);
7150       Constraints[j] = InputConstraints[i];
7151     }
7152   }
7153 
7154   // Build the IR assembly string.
7155   std::string AsmStringIR;
7156   raw_string_ostream OS(AsmStringIR);
7157   StringRef ASMString =
7158       SrcMgr.getMemoryBuffer(SrcMgr.getMainFileID())->getBuffer();
7159   const char *AsmStart = ASMString.begin();
7160   const char *AsmEnd = ASMString.end();
7161   array_pod_sort(AsmStrRewrites.begin(), AsmStrRewrites.end(), rewritesSort);
7162   for (auto it = AsmStrRewrites.begin(); it != AsmStrRewrites.end(); ++it) {
7163     const AsmRewrite &AR = *it;
7164     // Check if this has already been covered by another rewrite...
7165     if (AR.Done)
7166       continue;
7167     AsmRewriteKind Kind = AR.Kind;
7168 
7169     const char *Loc = AR.Loc.getPointer();
7170     assert(Loc >= AsmStart && "Expected Loc to be at or after Start!");
7171 
7172     // Emit everything up to the immediate/expression.
7173     if (unsigned Len = Loc - AsmStart)
7174       OS << StringRef(AsmStart, Len);
7175 
7176     // Skip the original expression.
7177     if (Kind == AOK_Skip) {
7178       AsmStart = Loc + AR.Len;
7179       continue;
7180     }
7181 
7182     unsigned AdditionalSkip = 0;
7183     // Rewrite expressions in $N notation.
7184     switch (Kind) {
7185     default:
7186       break;
7187     case AOK_IntelExpr:
7188       assert(AR.IntelExp.isValid() && "cannot write invalid intel expression");
7189       if (AR.IntelExp.NeedBracs)
7190         OS << "[";
7191       if (AR.IntelExp.hasBaseReg())
7192         OS << AR.IntelExp.BaseReg;
7193       if (AR.IntelExp.hasIndexReg())
7194         OS << (AR.IntelExp.hasBaseReg() ? " + " : "")
7195            << AR.IntelExp.IndexReg;
7196       if (AR.IntelExp.Scale > 1)
7197         OS << " * $$" << AR.IntelExp.Scale;
7198       if (AR.IntelExp.hasOffset()) {
7199         if (AR.IntelExp.hasRegs())
7200           OS << " + ";
7201         // Fuse this rewrite with a rewrite of the offset name, if present.
7202         StringRef OffsetName = AR.IntelExp.OffsetName;
7203         SMLoc OffsetLoc = SMLoc::getFromPointer(AR.IntelExp.OffsetName.data());
7204         size_t OffsetLen = OffsetName.size();
7205         auto rewrite_it = std::find_if(
7206             it, AsmStrRewrites.end(), [&](const AsmRewrite &FusingAR) {
7207               return FusingAR.Loc == OffsetLoc && FusingAR.Len == OffsetLen &&
7208                      (FusingAR.Kind == AOK_Input ||
7209                       FusingAR.Kind == AOK_CallInput);
7210             });
7211         if (rewrite_it == AsmStrRewrites.end()) {
7212           OS << "offset " << OffsetName;
7213         } else if (rewrite_it->Kind == AOK_CallInput) {
7214           OS << "${" << InputIdx++ << ":P}";
7215           rewrite_it->Done = true;
7216         } else {
7217           OS << '$' << InputIdx++;
7218           rewrite_it->Done = true;
7219         }
7220       }
7221       if (AR.IntelExp.Imm || AR.IntelExp.emitImm())
7222         OS << (AR.IntelExp.emitImm() ? "$$" : " + $$") << AR.IntelExp.Imm;
7223       if (AR.IntelExp.NeedBracs)
7224         OS << "]";
7225       break;
7226     case AOK_Label:
7227       OS << Ctx.getAsmInfo()->getPrivateLabelPrefix() << AR.Label;
7228       break;
7229     case AOK_Input:
7230       OS << '$' << InputIdx++;
7231       break;
7232     case AOK_CallInput:
7233       OS << "${" << InputIdx++ << ":P}";
7234       break;
7235     case AOK_Output:
7236       OS << '$' << OutputIdx++;
7237       break;
7238     case AOK_SizeDirective:
7239       switch (AR.Val) {
7240       default: break;
7241       case 8:  OS << "byte ptr "; break;
7242       case 16: OS << "word ptr "; break;
7243       case 32: OS << "dword ptr "; break;
7244       case 64: OS << "qword ptr "; break;
7245       case 80: OS << "xword ptr "; break;
7246       case 128: OS << "xmmword ptr "; break;
7247       case 256: OS << "ymmword ptr "; break;
7248       }
7249       break;
7250     case AOK_Emit:
7251       OS << ".byte";
7252       break;
7253     case AOK_Align: {
7254       // MS alignment directives are measured in bytes. If the native assembler
7255       // measures alignment in bytes, we can pass it straight through.
7256       OS << ".align";
7257       if (getContext().getAsmInfo()->getAlignmentIsInBytes())
7258         break;
7259 
7260       // Alignment is in log2 form, so print that instead and skip the original
7261       // immediate.
7262       unsigned Val = AR.Val;
7263       OS << ' ' << Val;
7264       assert(Val < 10 && "Expected alignment less then 2^10.");
7265       AdditionalSkip = (Val < 4) ? 2 : Val < 7 ? 3 : 4;
7266       break;
7267     }
7268     case AOK_EVEN:
7269       OS << ".even";
7270       break;
7271     case AOK_EndOfStatement:
7272       OS << "\n\t";
7273       break;
7274     }
7275 
7276     // Skip the original expression.
7277     AsmStart = Loc + AR.Len + AdditionalSkip;
7278   }
7279 
7280   // Emit the remainder of the asm string.
7281   if (AsmStart != AsmEnd)
7282     OS << StringRef(AsmStart, AsmEnd - AsmStart);
7283 
7284   AsmString = OS.str();
7285   return false;
7286 }
7287 
7288 /// Create an MCAsmParser instance.
createMCMasmParser(SourceMgr & SM,MCContext & C,MCStreamer & Out,const MCAsmInfo & MAI,unsigned CB)7289 MCAsmParser *llvm::createMCMasmParser(SourceMgr &SM, MCContext &C,
7290                                       MCStreamer &Out, const MCAsmInfo &MAI,
7291                                       unsigned CB) {
7292   return new MasmParser(SM, C, Out, MAI, CB);
7293 }
7294