• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This class implements the lexer for assembly files.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/MC/MCParser/AsmLexer.h"
15 #include "llvm/MC/MCAsmInfo.h"
16 #include "llvm/Support/MemoryBuffer.h"
17 #include "llvm/Support/SMLoc.h"
18 #include <cctype>
19 #include <cerrno>
20 #include <cstdio>
21 #include <cstdlib>
22 using namespace llvm;
23 
AsmLexer(const MCAsmInfo & MAI)24 AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) {
25   CurPtr = nullptr;
26   IsAtStartOfLine = true;
27   IsAtStartOfStatement = true;
28   AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@");
29 }
30 
~AsmLexer()31 AsmLexer::~AsmLexer() {
32 }
33 
setBuffer(StringRef Buf,const char * ptr)34 void AsmLexer::setBuffer(StringRef Buf, const char *ptr) {
35   CurBuf = Buf;
36 
37   if (ptr)
38     CurPtr = ptr;
39   else
40     CurPtr = CurBuf.begin();
41 
42   TokStart = nullptr;
43 }
44 
45 /// ReturnError - Set the error to the specified string at the specified
46 /// location.  This is defined to always return AsmToken::Error.
ReturnError(const char * Loc,const std::string & Msg)47 AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
48   SetError(SMLoc::getFromPointer(Loc), Msg);
49 
50   return AsmToken(AsmToken::Error, StringRef(Loc, CurPtr - Loc));
51 }
52 
getNextChar()53 int AsmLexer::getNextChar() {
54   if (CurPtr == CurBuf.end())
55     return EOF;
56   return (unsigned char)*CurPtr++;
57 }
58 
59 /// LexFloatLiteral: [0-9]*[.][0-9]*([eE][+-]?[0-9]*)?
60 ///
61 /// The leading integral digit sequence and dot should have already been
62 /// consumed, some or all of the fractional digit sequence *can* have been
63 /// consumed.
LexFloatLiteral()64 AsmToken AsmLexer::LexFloatLiteral() {
65   // Skip the fractional digit sequence.
66   while (isdigit(*CurPtr))
67     ++CurPtr;
68 
69   // Check for exponent; we intentionally accept a slighlty wider set of
70   // literals here and rely on the upstream client to reject invalid ones (e.g.,
71   // "1e+").
72   if (*CurPtr == 'e' || *CurPtr == 'E') {
73     ++CurPtr;
74     if (*CurPtr == '-' || *CurPtr == '+')
75       ++CurPtr;
76     while (isdigit(*CurPtr))
77       ++CurPtr;
78   }
79 
80   return AsmToken(AsmToken::Real,
81                   StringRef(TokStart, CurPtr - TokStart));
82 }
83 
84 /// LexHexFloatLiteral matches essentially (.[0-9a-fA-F]*)?[pP][+-]?[0-9a-fA-F]+
85 /// while making sure there are enough actual digits around for the constant to
86 /// be valid.
87 ///
88 /// The leading "0x[0-9a-fA-F]*" (i.e. integer part) has already been consumed
89 /// before we get here.
LexHexFloatLiteral(bool NoIntDigits)90 AsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) {
91   assert((*CurPtr == 'p' || *CurPtr == 'P' || *CurPtr == '.') &&
92          "unexpected parse state in floating hex");
93   bool NoFracDigits = true;
94 
95   // Skip the fractional part if there is one
96   if (*CurPtr == '.') {
97     ++CurPtr;
98 
99     const char *FracStart = CurPtr;
100     while (isxdigit(*CurPtr))
101       ++CurPtr;
102 
103     NoFracDigits = CurPtr == FracStart;
104   }
105 
106   if (NoIntDigits && NoFracDigits)
107     return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
108                                  "expected at least one significand digit");
109 
110   // Make sure we do have some kind of proper exponent part
111   if (*CurPtr != 'p' && *CurPtr != 'P')
112     return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
113                                  "expected exponent part 'p'");
114   ++CurPtr;
115 
116   if (*CurPtr == '+' || *CurPtr == '-')
117     ++CurPtr;
118 
119   // N.b. exponent digits are *not* hex
120   const char *ExpStart = CurPtr;
121   while (isdigit(*CurPtr))
122     ++CurPtr;
123 
124   if (CurPtr == ExpStart)
125     return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
126                                  "expected at least one exponent digit");
127 
128   return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart));
129 }
130 
131 /// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]*
IsIdentifierChar(char c,bool AllowAt)132 static bool IsIdentifierChar(char c, bool AllowAt) {
133   return isalnum(c) || c == '_' || c == '$' || c == '.' ||
134          (c == '@' && AllowAt) || c == '?';
135 }
LexIdentifier()136 AsmToken AsmLexer::LexIdentifier() {
137   // Check for floating point literals.
138   if (CurPtr[-1] == '.' && isdigit(*CurPtr)) {
139     // Disambiguate a .1243foo identifier from a floating literal.
140     while (isdigit(*CurPtr))
141       ++CurPtr;
142     if (*CurPtr == 'e' || *CurPtr == 'E' ||
143         !IsIdentifierChar(*CurPtr, AllowAtInIdentifier))
144       return LexFloatLiteral();
145   }
146 
147   while (IsIdentifierChar(*CurPtr, AllowAtInIdentifier))
148     ++CurPtr;
149 
150   // Handle . as a special case.
151   if (CurPtr == TokStart+1 && TokStart[0] == '.')
152     return AsmToken(AsmToken::Dot, StringRef(TokStart, 1));
153 
154   return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart));
155 }
156 
157 /// LexSlash: Slash: /
158 ///           C-Style Comment: /* ... */
LexSlash()159 AsmToken AsmLexer::LexSlash() {
160   switch (*CurPtr) {
161   case '*':
162     IsAtStartOfStatement = false;
163     break; // C style comment.
164   case '/':
165     ++CurPtr;
166     return LexLineComment();
167   default:
168     IsAtStartOfStatement = false;
169     return AsmToken(AsmToken::Slash, StringRef(TokStart, 1));
170   }
171 
172   // C Style comment.
173   ++CurPtr;  // skip the star.
174   while (CurPtr != CurBuf.end()) {
175     switch (*CurPtr++) {
176     case '*':
177       // End of the comment?
178       if (*CurPtr != '/')
179         break;
180       ++CurPtr;   // End the */.
181       return AsmToken(AsmToken::Comment,
182                       StringRef(TokStart, CurPtr - TokStart));
183     }
184   }
185   return ReturnError(TokStart, "unterminated comment");
186 }
187 
188 /// LexLineComment: Comment: #[^\n]*
189 ///                        : //[^\n]*
LexLineComment()190 AsmToken AsmLexer::LexLineComment() {
191   // Mark This as an end of statement with a body of the
192   // comment. While it would be nicer to leave this two tokens,
193   // backwards compatability with TargetParsers makes keeping this in this form
194   // better.
195   int CurChar = getNextChar();
196   while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF)
197     CurChar = getNextChar();
198 
199   IsAtStartOfLine = true;
200   // Whis is a whole line comment. leave newline
201   if (IsAtStartOfStatement)
202     return AsmToken(AsmToken::EndOfStatement,
203                     StringRef(TokStart, CurPtr - TokStart));
204   IsAtStartOfStatement = true;
205 
206   return AsmToken(AsmToken::EndOfStatement,
207                   StringRef(TokStart, CurPtr - 1 - TokStart));
208 }
209 
SkipIgnoredIntegerSuffix(const char * & CurPtr)210 static void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
211   // Skip ULL, UL, U, L and LL suffices.
212   if (CurPtr[0] == 'U')
213     ++CurPtr;
214   if (CurPtr[0] == 'L')
215     ++CurPtr;
216   if (CurPtr[0] == 'L')
217     ++CurPtr;
218 }
219 
220 // Look ahead to search for first non-hex digit, if it's [hH], then we treat the
221 // integer as a hexadecimal, possibly with leading zeroes.
doLookAhead(const char * & CurPtr,unsigned DefaultRadix)222 static unsigned doLookAhead(const char *&CurPtr, unsigned DefaultRadix) {
223   const char *FirstHex = nullptr;
224   const char *LookAhead = CurPtr;
225   while (1) {
226     if (isdigit(*LookAhead)) {
227       ++LookAhead;
228     } else if (isxdigit(*LookAhead)) {
229       if (!FirstHex)
230         FirstHex = LookAhead;
231       ++LookAhead;
232     } else {
233       break;
234     }
235   }
236   bool isHex = *LookAhead == 'h' || *LookAhead == 'H';
237   CurPtr = isHex || !FirstHex ? LookAhead : FirstHex;
238   if (isHex)
239     return 16;
240   return DefaultRadix;
241 }
242 
intToken(StringRef Ref,APInt & Value)243 static AsmToken intToken(StringRef Ref, APInt &Value)
244 {
245   if (Value.isIntN(64))
246     return AsmToken(AsmToken::Integer, Ref, Value);
247   return AsmToken(AsmToken::BigNum, Ref, Value);
248 }
249 
250 /// LexDigit: First character is [0-9].
251 ///   Local Label: [0-9][:]
252 ///   Forward/Backward Label: [0-9][fb]
253 ///   Binary integer: 0b[01]+
254 ///   Octal integer: 0[0-7]+
255 ///   Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH]
256 ///   Decimal integer: [1-9][0-9]*
LexDigit()257 AsmToken AsmLexer::LexDigit() {
258   // Decimal integer: [1-9][0-9]*
259   if (CurPtr[-1] != '0' || CurPtr[0] == '.') {
260     unsigned Radix = doLookAhead(CurPtr, 10);
261     bool isHex = Radix == 16;
262     // Check for floating point literals.
263     if (!isHex && (*CurPtr == '.' || *CurPtr == 'e')) {
264       ++CurPtr;
265       return LexFloatLiteral();
266     }
267 
268     StringRef Result(TokStart, CurPtr - TokStart);
269 
270     APInt Value(128, 0, true);
271     if (Result.getAsInteger(Radix, Value))
272       return ReturnError(TokStart, !isHex ? "invalid decimal number" :
273                            "invalid hexdecimal number");
274 
275     // Consume the [bB][hH].
276     if (Radix == 2 || Radix == 16)
277       ++CurPtr;
278 
279     // The darwin/x86 (and x86-64) assembler accepts and ignores type
280     // suffices on integer literals.
281     SkipIgnoredIntegerSuffix(CurPtr);
282 
283     return intToken(Result, Value);
284   }
285 
286   if ((*CurPtr == 'b') || (*CurPtr == 'B')) {
287     ++CurPtr;
288     // See if we actually have "0b" as part of something like "jmp 0b\n"
289     if (!isdigit(CurPtr[0])) {
290       --CurPtr;
291       StringRef Result(TokStart, CurPtr - TokStart);
292       return AsmToken(AsmToken::Integer, Result, 0);
293     }
294     const char *NumStart = CurPtr;
295     while (CurPtr[0] == '0' || CurPtr[0] == '1')
296       ++CurPtr;
297 
298     // Requires at least one binary digit.
299     if (CurPtr == NumStart)
300       return ReturnError(TokStart, "invalid binary number");
301 
302     StringRef Result(TokStart, CurPtr - TokStart);
303 
304     APInt Value(128, 0, true);
305     if (Result.substr(2).getAsInteger(2, Value))
306       return ReturnError(TokStart, "invalid binary number");
307 
308     // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
309     // suffixes on integer literals.
310     SkipIgnoredIntegerSuffix(CurPtr);
311 
312     return intToken(Result, Value);
313   }
314 
315   if ((*CurPtr == 'x') || (*CurPtr == 'X')) {
316     ++CurPtr;
317     const char *NumStart = CurPtr;
318     while (isxdigit(CurPtr[0]))
319       ++CurPtr;
320 
321     // "0x.0p0" is valid, and "0x0p0" (but not "0xp0" for example, which will be
322     // diagnosed by LexHexFloatLiteral).
323     if (CurPtr[0] == '.' || CurPtr[0] == 'p' || CurPtr[0] == 'P')
324       return LexHexFloatLiteral(NumStart == CurPtr);
325 
326     // Otherwise requires at least one hex digit.
327     if (CurPtr == NumStart)
328       return ReturnError(CurPtr-2, "invalid hexadecimal number");
329 
330     APInt Result(128, 0);
331     if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result))
332       return ReturnError(TokStart, "invalid hexadecimal number");
333 
334     // Consume the optional [hH].
335     if (*CurPtr == 'h' || *CurPtr == 'H')
336       ++CurPtr;
337 
338     // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
339     // suffixes on integer literals.
340     SkipIgnoredIntegerSuffix(CurPtr);
341 
342     return intToken(StringRef(TokStart, CurPtr - TokStart), Result);
343   }
344 
345   // Either octal or hexadecimal.
346   APInt Value(128, 0, true);
347   unsigned Radix = doLookAhead(CurPtr, 8);
348   bool isHex = Radix == 16;
349   StringRef Result(TokStart, CurPtr - TokStart);
350   if (Result.getAsInteger(Radix, Value))
351     return ReturnError(TokStart, !isHex ? "invalid octal number" :
352                        "invalid hexdecimal number");
353 
354   // Consume the [hH].
355   if (Radix == 16)
356     ++CurPtr;
357 
358   // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
359   // suffixes on integer literals.
360   SkipIgnoredIntegerSuffix(CurPtr);
361 
362   return intToken(Result, Value);
363 }
364 
365 /// LexSingleQuote: Integer: 'b'
LexSingleQuote()366 AsmToken AsmLexer::LexSingleQuote() {
367   int CurChar = getNextChar();
368 
369   if (CurChar == '\\')
370     CurChar = getNextChar();
371 
372   if (CurChar == EOF)
373     return ReturnError(TokStart, "unterminated single quote");
374 
375   CurChar = getNextChar();
376 
377   if (CurChar != '\'')
378     return ReturnError(TokStart, "single quote way too long");
379 
380   // The idea here being that 'c' is basically just an integral
381   // constant.
382   StringRef Res = StringRef(TokStart,CurPtr - TokStart);
383   long long Value;
384 
385   if (Res.startswith("\'\\")) {
386     char theChar = Res[2];
387     switch (theChar) {
388       default: Value = theChar; break;
389       case '\'': Value = '\''; break;
390       case 't': Value = '\t'; break;
391       case 'n': Value = '\n'; break;
392       case 'b': Value = '\b'; break;
393     }
394   } else
395     Value = TokStart[1];
396 
397   return AsmToken(AsmToken::Integer, Res, Value);
398 }
399 
400 
401 /// LexQuote: String: "..."
LexQuote()402 AsmToken AsmLexer::LexQuote() {
403   int CurChar = getNextChar();
404   // TODO: does gas allow multiline string constants?
405   while (CurChar != '"') {
406     if (CurChar == '\\') {
407       // Allow \", etc.
408       CurChar = getNextChar();
409     }
410 
411     if (CurChar == EOF)
412       return ReturnError(TokStart, "unterminated string constant");
413 
414     CurChar = getNextChar();
415   }
416 
417   return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart));
418 }
419 
LexUntilEndOfStatement()420 StringRef AsmLexer::LexUntilEndOfStatement() {
421   TokStart = CurPtr;
422 
423   while (!isAtStartOfComment(CurPtr) &&     // Start of line comment.
424          !isAtStatementSeparator(CurPtr) && // End of statement marker.
425          *CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) {
426     ++CurPtr;
427   }
428   return StringRef(TokStart, CurPtr-TokStart);
429 }
430 
LexUntilEndOfLine()431 StringRef AsmLexer::LexUntilEndOfLine() {
432   TokStart = CurPtr;
433 
434   while (*CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) {
435     ++CurPtr;
436   }
437   return StringRef(TokStart, CurPtr-TokStart);
438 }
439 
peekTokens(MutableArrayRef<AsmToken> Buf,bool ShouldSkipSpace)440 size_t AsmLexer::peekTokens(MutableArrayRef<AsmToken> Buf,
441                             bool ShouldSkipSpace) {
442   const char *SavedTokStart = TokStart;
443   const char *SavedCurPtr = CurPtr;
444   bool SavedAtStartOfLine = IsAtStartOfLine;
445   bool SavedAtStartOfStatement = IsAtStartOfStatement;
446   bool SavedSkipSpace = SkipSpace;
447 
448   std::string SavedErr = getErr();
449   SMLoc SavedErrLoc = getErrLoc();
450 
451   SkipSpace = ShouldSkipSpace;
452 
453   size_t ReadCount;
454   for (ReadCount = 0; ReadCount < Buf.size(); ++ReadCount) {
455     AsmToken Token = LexToken();
456 
457     Buf[ReadCount] = Token;
458 
459     if (Token.is(AsmToken::Eof))
460       break;
461   }
462 
463   SetError(SavedErrLoc, SavedErr);
464 
465   SkipSpace = SavedSkipSpace;
466   IsAtStartOfLine = SavedAtStartOfLine;
467   IsAtStartOfStatement = SavedAtStartOfStatement;
468   CurPtr = SavedCurPtr;
469   TokStart = SavedTokStart;
470 
471   return ReadCount;
472 }
473 
isAtStartOfComment(const char * Ptr)474 bool AsmLexer::isAtStartOfComment(const char *Ptr) {
475   const char *CommentString = MAI.getCommentString();
476 
477   if (CommentString[1] == '\0')
478     return CommentString[0] == Ptr[0];
479 
480   // FIXME: special case for the bogus "##" comment string in X86MCAsmInfoDarwin
481   if (CommentString[1] == '#')
482     return CommentString[0] == Ptr[0];
483 
484   return strncmp(Ptr, CommentString, strlen(CommentString)) == 0;
485 }
486 
isAtStatementSeparator(const char * Ptr)487 bool AsmLexer::isAtStatementSeparator(const char *Ptr) {
488   return strncmp(Ptr, MAI.getSeparatorString(),
489                  strlen(MAI.getSeparatorString())) == 0;
490 }
491 
LexToken()492 AsmToken AsmLexer::LexToken() {
493   TokStart = CurPtr;
494   // This always consumes at least one character.
495   int CurChar = getNextChar();
496 
497   if (CurChar == '#' && IsAtStartOfStatement) {
498     // If this starts with a '#', this may be a cpp
499     // hash directive and otherwise a line comment.
500     AsmToken TokenBuf[2];
501     MutableArrayRef<AsmToken> Buf(TokenBuf, 2);
502     size_t num = peekTokens(Buf, true);
503     // There cannot be a space preceeding this
504     if (IsAtStartOfLine && num == 2 && TokenBuf[0].is(AsmToken::Integer) &&
505         TokenBuf[1].is(AsmToken::String)) {
506       CurPtr = TokStart; // reset curPtr;
507       StringRef s = LexUntilEndOfLine();
508       UnLex(TokenBuf[1]);
509       UnLex(TokenBuf[0]);
510       return AsmToken(AsmToken::HashDirective, s);
511     }
512     return LexLineComment();
513   }
514 
515   if (isAtStartOfComment(TokStart))
516     return LexLineComment();
517 
518   if (isAtStatementSeparator(TokStart)) {
519     CurPtr += strlen(MAI.getSeparatorString()) - 1;
520     IsAtStartOfLine = true;
521     IsAtStartOfStatement = true;
522     return AsmToken(AsmToken::EndOfStatement,
523                     StringRef(TokStart, strlen(MAI.getSeparatorString())));
524   }
525 
526   // If we're missing a newline at EOF, make sure we still get an
527   // EndOfStatement token before the Eof token.
528   if (CurChar == EOF && !IsAtStartOfStatement) {
529     IsAtStartOfLine = true;
530     IsAtStartOfStatement = true;
531     return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
532   }
533   IsAtStartOfLine = false;
534   bool OldIsAtStartOfStatement = IsAtStartOfStatement;
535   IsAtStartOfStatement = false;
536   switch (CurChar) {
537   default:
538     // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
539     if (isalpha(CurChar) || CurChar == '_' || CurChar == '.')
540       return LexIdentifier();
541 
542     // Unknown character, emit an error.
543     return ReturnError(TokStart, "invalid character in input");
544   case EOF:
545     IsAtStartOfLine = true;
546     IsAtStartOfStatement = true;
547     return AsmToken(AsmToken::Eof, StringRef(TokStart, 0));
548   case 0:
549   case ' ':
550   case '\t':
551     IsAtStartOfStatement = OldIsAtStartOfStatement;
552     while (*CurPtr == ' ' || *CurPtr == '\t')
553       CurPtr++;
554     if (SkipSpace)
555       return LexToken(); // Ignore whitespace.
556     else
557       return AsmToken(AsmToken::Space, StringRef(TokStart, CurPtr - TokStart));
558   case '\n':
559   case '\r':
560     IsAtStartOfLine = true;
561     IsAtStartOfStatement = true;
562     return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
563   case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1));
564   case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1));
565   case '-': return AsmToken(AsmToken::Minus, StringRef(TokStart, 1));
566   case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1));
567   case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1));
568   case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1));
569   case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1));
570   case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1));
571   case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1));
572   case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1));
573   case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1));
574   case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1));
575   case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1));
576   case '@': return AsmToken(AsmToken::At, StringRef(TokStart, 1));
577   case '\\': return AsmToken(AsmToken::BackSlash, StringRef(TokStart, 1));
578   case '=':
579     if (*CurPtr == '=') {
580       ++CurPtr;
581       return AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2));
582     }
583     return AsmToken(AsmToken::Equal, StringRef(TokStart, 1));
584   case '|':
585     if (*CurPtr == '|') {
586       ++CurPtr;
587       return AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2));
588     }
589     return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1));
590   case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1));
591   case '&':
592     if (*CurPtr == '&') {
593       ++CurPtr;
594       return AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2));
595     }
596     return AsmToken(AsmToken::Amp, StringRef(TokStart, 1));
597   case '!':
598     if (*CurPtr == '=') {
599       ++CurPtr;
600       return AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2));
601     }
602     return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1));
603   case '%': return AsmToken(AsmToken::Percent, StringRef(TokStart, 1));
604   case '/':
605     IsAtStartOfStatement = OldIsAtStartOfStatement;
606     return LexSlash();
607   case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
608   case '\'': return LexSingleQuote();
609   case '"': return LexQuote();
610   case '0': case '1': case '2': case '3': case '4':
611   case '5': case '6': case '7': case '8': case '9':
612     return LexDigit();
613   case '<':
614     switch (*CurPtr) {
615     case '<':
616       ++CurPtr;
617       return AsmToken(AsmToken::LessLess, StringRef(TokStart, 2));
618     case '=':
619       ++CurPtr;
620       return AsmToken(AsmToken::LessEqual, StringRef(TokStart, 2));
621     case '>':
622       ++CurPtr;
623       return AsmToken(AsmToken::LessGreater, StringRef(TokStart, 2));
624     default:
625       return AsmToken(AsmToken::Less, StringRef(TokStart, 1));
626     }
627   case '>':
628     switch (*CurPtr) {
629     case '>':
630       ++CurPtr;
631       return AsmToken(AsmToken::GreaterGreater, StringRef(TokStart, 2));
632     case '=':
633       ++CurPtr;
634       return AsmToken(AsmToken::GreaterEqual, StringRef(TokStart, 2));
635     default:
636       return AsmToken(AsmToken::Greater, StringRef(TokStart, 1));
637     }
638 
639   // TODO: Quoted identifiers (objc methods etc)
640   // local labels: [0-9][:]
641   // Forward/backward labels: [0-9][fb]
642   // Integers, fp constants, character constants.
643   }
644 }
645