• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file contains the implementation of the UnwrappedLineParser,
12 /// which turns a stream of tokens into UnwrappedLines.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "UnwrappedLineParser.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #define DEBUG_TYPE "format-parser"
22 
23 namespace clang {
24 namespace format {
25 
26 class FormatTokenSource {
27 public:
~FormatTokenSource()28   virtual ~FormatTokenSource() {}
29   virtual FormatToken *getNextToken() = 0;
30 
31   virtual unsigned getPosition() = 0;
32   virtual FormatToken *setPosition(unsigned Position) = 0;
33 };
34 
35 namespace {
36 
37 class ScopedDeclarationState {
38 public:
ScopedDeclarationState(UnwrappedLine & Line,std::vector<bool> & Stack,bool MustBeDeclaration)39   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
40                          bool MustBeDeclaration)
41       : Line(Line), Stack(Stack) {
42     Line.MustBeDeclaration = MustBeDeclaration;
43     Stack.push_back(MustBeDeclaration);
44   }
~ScopedDeclarationState()45   ~ScopedDeclarationState() {
46     Stack.pop_back();
47     if (!Stack.empty())
48       Line.MustBeDeclaration = Stack.back();
49     else
50       Line.MustBeDeclaration = true;
51   }
52 
53 private:
54   UnwrappedLine &Line;
55   std::vector<bool> &Stack;
56 };
57 
58 class ScopedMacroState : public FormatTokenSource {
59 public:
ScopedMacroState(UnwrappedLine & Line,FormatTokenSource * & TokenSource,FormatToken * & ResetToken)60   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
61                    FormatToken *&ResetToken)
62       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
63         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
64         Token(nullptr) {
65     TokenSource = this;
66     Line.Level = 0;
67     Line.InPPDirective = true;
68   }
69 
~ScopedMacroState()70   ~ScopedMacroState() override {
71     TokenSource = PreviousTokenSource;
72     ResetToken = Token;
73     Line.InPPDirective = false;
74     Line.Level = PreviousLineLevel;
75   }
76 
getNextToken()77   FormatToken *getNextToken() override {
78     // The \c UnwrappedLineParser guards against this by never calling
79     // \c getNextToken() after it has encountered the first eof token.
80     assert(!eof());
81     Token = PreviousTokenSource->getNextToken();
82     if (eof())
83       return getFakeEOF();
84     return Token;
85   }
86 
getPosition()87   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
88 
setPosition(unsigned Position)89   FormatToken *setPosition(unsigned Position) override {
90     Token = PreviousTokenSource->setPosition(Position);
91     return Token;
92   }
93 
94 private:
eof()95   bool eof() { return Token && Token->HasUnescapedNewline; }
96 
getFakeEOF()97   FormatToken *getFakeEOF() {
98     static bool EOFInitialized = false;
99     static FormatToken FormatTok;
100     if (!EOFInitialized) {
101       FormatTok.Tok.startToken();
102       FormatTok.Tok.setKind(tok::eof);
103       EOFInitialized = true;
104     }
105     return &FormatTok;
106   }
107 
108   UnwrappedLine &Line;
109   FormatTokenSource *&TokenSource;
110   FormatToken *&ResetToken;
111   unsigned PreviousLineLevel;
112   FormatTokenSource *PreviousTokenSource;
113 
114   FormatToken *Token;
115 };
116 
117 } // end anonymous namespace
118 
119 class ScopedLineState {
120 public:
ScopedLineState(UnwrappedLineParser & Parser,bool SwitchToPreprocessorLines=false)121   ScopedLineState(UnwrappedLineParser &Parser,
122                   bool SwitchToPreprocessorLines = false)
123       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
124     if (SwitchToPreprocessorLines)
125       Parser.CurrentLines = &Parser.PreprocessorDirectives;
126     else if (!Parser.Line->Tokens.empty())
127       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
128     PreBlockLine = std::move(Parser.Line);
129     Parser.Line = llvm::make_unique<UnwrappedLine>();
130     Parser.Line->Level = PreBlockLine->Level;
131     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
132   }
133 
~ScopedLineState()134   ~ScopedLineState() {
135     if (!Parser.Line->Tokens.empty()) {
136       Parser.addUnwrappedLine();
137     }
138     assert(Parser.Line->Tokens.empty());
139     Parser.Line = std::move(PreBlockLine);
140     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
141       Parser.MustBreakBeforeNextToken = true;
142     Parser.CurrentLines = OriginalLines;
143   }
144 
145 private:
146   UnwrappedLineParser &Parser;
147 
148   std::unique_ptr<UnwrappedLine> PreBlockLine;
149   SmallVectorImpl<UnwrappedLine> *OriginalLines;
150 };
151 
152 class CompoundStatementIndenter {
153 public:
CompoundStatementIndenter(UnwrappedLineParser * Parser,const FormatStyle & Style,unsigned & LineLevel)154   CompoundStatementIndenter(UnwrappedLineParser *Parser,
155                             const FormatStyle &Style, unsigned &LineLevel)
156       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
157     if (Style.BraceWrapping.AfterControlStatement)
158       Parser->addUnwrappedLine();
159     if (Style.BraceWrapping.IndentBraces)
160       ++LineLevel;
161   }
~CompoundStatementIndenter()162   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
163 
164 private:
165   unsigned &LineLevel;
166   unsigned OldLineLevel;
167 };
168 
169 namespace {
170 
171 class IndexedTokenSource : public FormatTokenSource {
172 public:
IndexedTokenSource(ArrayRef<FormatToken * > Tokens)173   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
174       : Tokens(Tokens), Position(-1) {}
175 
getNextToken()176   FormatToken *getNextToken() override {
177     ++Position;
178     return Tokens[Position];
179   }
180 
getPosition()181   unsigned getPosition() override {
182     assert(Position >= 0);
183     return Position;
184   }
185 
setPosition(unsigned P)186   FormatToken *setPosition(unsigned P) override {
187     Position = P;
188     return Tokens[Position];
189   }
190 
reset()191   void reset() { Position = -1; }
192 
193 private:
194   ArrayRef<FormatToken *> Tokens;
195   int Position;
196 };
197 
198 } // end anonymous namespace
199 
UnwrappedLineParser(const FormatStyle & Style,const AdditionalKeywords & Keywords,ArrayRef<FormatToken * > Tokens,UnwrappedLineConsumer & Callback)200 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
201                                          const AdditionalKeywords &Keywords,
202                                          ArrayRef<FormatToken *> Tokens,
203                                          UnwrappedLineConsumer &Callback)
204     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
205       CurrentLines(&Lines), Style(Style), Keywords(Keywords), Tokens(nullptr),
206       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {}
207 
reset()208 void UnwrappedLineParser::reset() {
209   PPBranchLevel = -1;
210   Line.reset(new UnwrappedLine);
211   CommentsBeforeNextToken.clear();
212   FormatTok = nullptr;
213   MustBreakBeforeNextToken = false;
214   PreprocessorDirectives.clear();
215   CurrentLines = &Lines;
216   DeclarationScopeStack.clear();
217   PPStack.clear();
218 }
219 
parse()220 void UnwrappedLineParser::parse() {
221   IndexedTokenSource TokenSource(AllTokens);
222   do {
223     DEBUG(llvm::dbgs() << "----\n");
224     reset();
225     Tokens = &TokenSource;
226     TokenSource.reset();
227 
228     readToken();
229     parseFile();
230     // Create line with eof token.
231     pushToken(FormatTok);
232     addUnwrappedLine();
233 
234     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
235                                                   E = Lines.end();
236          I != E; ++I) {
237       Callback.consumeUnwrappedLine(*I);
238     }
239     Callback.finishRun();
240     Lines.clear();
241     while (!PPLevelBranchIndex.empty() &&
242            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
243       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
244       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
245     }
246     if (!PPLevelBranchIndex.empty()) {
247       ++PPLevelBranchIndex.back();
248       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
249       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
250     }
251   } while (!PPLevelBranchIndex.empty());
252 }
253 
parseFile()254 void UnwrappedLineParser::parseFile() {
255   // The top-level context in a file always has declarations, except for pre-
256   // processor directives and JavaScript files.
257   bool MustBeDeclaration =
258       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
259   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
260                                           MustBeDeclaration);
261   parseLevel(/*HasOpeningBrace=*/false);
262   // Make sure to format the remaining tokens.
263   flushComments(true);
264   addUnwrappedLine();
265 }
266 
parseLevel(bool HasOpeningBrace)267 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
268   bool SwitchLabelEncountered = false;
269   do {
270     tok::TokenKind kind = FormatTok->Tok.getKind();
271     if (FormatTok->Type == TT_MacroBlockBegin) {
272       kind = tok::l_brace;
273     } else if (FormatTok->Type == TT_MacroBlockEnd) {
274       kind = tok::r_brace;
275     }
276 
277     switch (kind) {
278     case tok::comment:
279       nextToken();
280       addUnwrappedLine();
281       break;
282     case tok::l_brace:
283       // FIXME: Add parameter whether this can happen - if this happens, we must
284       // be in a non-declaration context.
285       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
286         continue;
287       parseBlock(/*MustBeDeclaration=*/false);
288       addUnwrappedLine();
289       break;
290     case tok::r_brace:
291       if (HasOpeningBrace)
292         return;
293       nextToken();
294       addUnwrappedLine();
295       break;
296     case tok::kw_default:
297     case tok::kw_case:
298       if (!SwitchLabelEncountered &&
299           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
300         ++Line->Level;
301       SwitchLabelEncountered = true;
302       parseStructuralElement();
303       break;
304     default:
305       parseStructuralElement();
306       break;
307     }
308   } while (!eof());
309 }
310 
calculateBraceTypes(bool ExpectClassBody)311 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
312   // We'll parse forward through the tokens until we hit
313   // a closing brace or eof - note that getNextToken() will
314   // parse macros, so this will magically work inside macro
315   // definitions, too.
316   unsigned StoredPosition = Tokens->getPosition();
317   FormatToken *Tok = FormatTok;
318   const FormatToken *PrevTok = getPreviousToken();
319   // Keep a stack of positions of lbrace tokens. We will
320   // update information about whether an lbrace starts a
321   // braced init list or a different block during the loop.
322   SmallVector<FormatToken *, 8> LBraceStack;
323   assert(Tok->Tok.is(tok::l_brace));
324   do {
325     // Get next non-comment token.
326     FormatToken *NextTok;
327     unsigned ReadTokens = 0;
328     do {
329       NextTok = Tokens->getNextToken();
330       ++ReadTokens;
331     } while (NextTok->is(tok::comment));
332 
333     switch (Tok->Tok.getKind()) {
334     case tok::l_brace:
335       if (Style.Language == FormatStyle::LK_JavaScript && PrevTok &&
336           PrevTok->is(tok::colon))
337         // In TypeScript's TypeMemberLists, there can be semicolons between the
338         // individual members.
339         Tok->BlockKind = BK_BracedInit;
340       else
341         Tok->BlockKind = BK_Unknown;
342       LBraceStack.push_back(Tok);
343       break;
344     case tok::r_brace:
345       if (LBraceStack.empty())
346         break;
347       if (LBraceStack.back()->BlockKind == BK_Unknown) {
348         bool ProbablyBracedList = false;
349         if (Style.Language == FormatStyle::LK_Proto) {
350           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
351         } else {
352           // Using OriginalColumn to distinguish between ObjC methods and
353           // binary operators is a bit hacky.
354           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
355                                   NextTok->OriginalColumn == 0;
356 
357           // If there is a comma, semicolon or right paren after the closing
358           // brace, we assume this is a braced initializer list.  Note that
359           // regardless how we mark inner braces here, we will overwrite the
360           // BlockKind later if we parse a braced list (where all blocks
361           // inside are by default braced lists), or when we explicitly detect
362           // blocks (for example while parsing lambdas).
363           //
364           // We exclude + and - as they can be ObjC visibility modifiers.
365           ProbablyBracedList =
366               (Style.Language == FormatStyle::LK_JavaScript &&
367                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in)) ||
368               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
369                                tok::r_paren, tok::r_square, tok::l_brace,
370                                tok::l_square, tok::l_paren, tok::ellipsis) ||
371               (NextTok->is(tok::semi) &&
372                (!ExpectClassBody || LBraceStack.size() != 1)) ||
373               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
374         }
375         if (ProbablyBracedList) {
376           Tok->BlockKind = BK_BracedInit;
377           LBraceStack.back()->BlockKind = BK_BracedInit;
378         } else {
379           Tok->BlockKind = BK_Block;
380           LBraceStack.back()->BlockKind = BK_Block;
381         }
382       }
383       LBraceStack.pop_back();
384       break;
385     case tok::at:
386     case tok::semi:
387     case tok::kw_if:
388     case tok::kw_while:
389     case tok::kw_for:
390     case tok::kw_switch:
391     case tok::kw_try:
392     case tok::kw___try:
393       if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
394         LBraceStack.back()->BlockKind = BK_Block;
395       break;
396     default:
397       break;
398     }
399     PrevTok = Tok;
400     Tok = NextTok;
401   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
402 
403   // Assume other blocks for all unclosed opening braces.
404   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
405     if (LBraceStack[i]->BlockKind == BK_Unknown)
406       LBraceStack[i]->BlockKind = BK_Block;
407   }
408 
409   FormatTok = Tokens->setPosition(StoredPosition);
410 }
411 
parseBlock(bool MustBeDeclaration,bool AddLevel,bool MunchSemi)412 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
413                                      bool MunchSemi) {
414   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
415          "'{' or macro block token expected");
416   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
417   FormatTok->BlockKind = BK_Block;
418 
419   unsigned InitialLevel = Line->Level;
420   nextToken();
421 
422   if (MacroBlock && FormatTok->is(tok::l_paren))
423     parseParens();
424 
425   addUnwrappedLine();
426 
427   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
428                                           MustBeDeclaration);
429   if (AddLevel)
430     ++Line->Level;
431   parseLevel(/*HasOpeningBrace=*/true);
432 
433   if (eof())
434     return;
435 
436   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
437                  : !FormatTok->is(tok::r_brace)) {
438     Line->Level = InitialLevel;
439     FormatTok->BlockKind = BK_Block;
440     return;
441   }
442 
443   nextToken(); // Munch the closing brace.
444 
445   if (MacroBlock && FormatTok->is(tok::l_paren))
446     parseParens();
447 
448   if (MunchSemi && FormatTok->Tok.is(tok::semi))
449     nextToken();
450   Line->Level = InitialLevel;
451 }
452 
isGoogScope(const UnwrappedLine & Line)453 static bool isGoogScope(const UnwrappedLine &Line) {
454   // FIXME: Closure-library specific stuff should not be hard-coded but be
455   // configurable.
456   if (Line.Tokens.size() < 4)
457     return false;
458   auto I = Line.Tokens.begin();
459   if (I->Tok->TokenText != "goog")
460     return false;
461   ++I;
462   if (I->Tok->isNot(tok::period))
463     return false;
464   ++I;
465   if (I->Tok->TokenText != "scope")
466     return false;
467   ++I;
468   return I->Tok->is(tok::l_paren);
469 }
470 
ShouldBreakBeforeBrace(const FormatStyle & Style,const FormatToken & InitialToken)471 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
472                                    const FormatToken &InitialToken) {
473   if (InitialToken.is(tok::kw_namespace))
474     return Style.BraceWrapping.AfterNamespace;
475   if (InitialToken.is(tok::kw_class))
476     return Style.BraceWrapping.AfterClass;
477   if (InitialToken.is(tok::kw_union))
478     return Style.BraceWrapping.AfterUnion;
479   if (InitialToken.is(tok::kw_struct))
480     return Style.BraceWrapping.AfterStruct;
481   return false;
482 }
483 
parseChildBlock()484 void UnwrappedLineParser::parseChildBlock() {
485   FormatTok->BlockKind = BK_Block;
486   nextToken();
487   {
488     bool GoogScope =
489         Style.Language == FormatStyle::LK_JavaScript && isGoogScope(*Line);
490     ScopedLineState LineState(*this);
491     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
492                                             /*MustBeDeclaration=*/false);
493     Line->Level += GoogScope ? 0 : 1;
494     parseLevel(/*HasOpeningBrace=*/true);
495     flushComments(isOnNewLine(*FormatTok));
496     Line->Level -= GoogScope ? 0 : 1;
497   }
498   nextToken();
499 }
500 
parsePPDirective()501 void UnwrappedLineParser::parsePPDirective() {
502   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
503   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
504   nextToken();
505 
506   if (!FormatTok->Tok.getIdentifierInfo()) {
507     parsePPUnknown();
508     return;
509   }
510 
511   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
512   case tok::pp_define:
513     parsePPDefine();
514     return;
515   case tok::pp_if:
516     parsePPIf(/*IfDef=*/false);
517     break;
518   case tok::pp_ifdef:
519   case tok::pp_ifndef:
520     parsePPIf(/*IfDef=*/true);
521     break;
522   case tok::pp_else:
523     parsePPElse();
524     break;
525   case tok::pp_elif:
526     parsePPElIf();
527     break;
528   case tok::pp_endif:
529     parsePPEndIf();
530     break;
531   default:
532     parsePPUnknown();
533     break;
534   }
535 }
536 
conditionalCompilationCondition(bool Unreachable)537 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
538   if (Unreachable || (!PPStack.empty() && PPStack.back() == PP_Unreachable))
539     PPStack.push_back(PP_Unreachable);
540   else
541     PPStack.push_back(PP_Conditional);
542 }
543 
conditionalCompilationStart(bool Unreachable)544 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
545   ++PPBranchLevel;
546   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
547   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
548     PPLevelBranchIndex.push_back(0);
549     PPLevelBranchCount.push_back(0);
550   }
551   PPChainBranchIndex.push(0);
552   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
553   conditionalCompilationCondition(Unreachable || Skip);
554 }
555 
conditionalCompilationAlternative()556 void UnwrappedLineParser::conditionalCompilationAlternative() {
557   if (!PPStack.empty())
558     PPStack.pop_back();
559   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
560   if (!PPChainBranchIndex.empty())
561     ++PPChainBranchIndex.top();
562   conditionalCompilationCondition(
563       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
564       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
565 }
566 
conditionalCompilationEnd()567 void UnwrappedLineParser::conditionalCompilationEnd() {
568   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
569   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
570     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
571       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
572     }
573   }
574   // Guard against #endif's without #if.
575   if (PPBranchLevel > 0)
576     --PPBranchLevel;
577   if (!PPChainBranchIndex.empty())
578     PPChainBranchIndex.pop();
579   if (!PPStack.empty())
580     PPStack.pop_back();
581 }
582 
parsePPIf(bool IfDef)583 void UnwrappedLineParser::parsePPIf(bool IfDef) {
584   nextToken();
585   bool IsLiteralFalse = (FormatTok->Tok.isLiteral() &&
586                          FormatTok->Tok.getLiteralData() != nullptr &&
587                          StringRef(FormatTok->Tok.getLiteralData(),
588                                    FormatTok->Tok.getLength()) == "0") ||
589                         FormatTok->Tok.is(tok::kw_false);
590   conditionalCompilationStart(!IfDef && IsLiteralFalse);
591   parsePPUnknown();
592 }
593 
parsePPElse()594 void UnwrappedLineParser::parsePPElse() {
595   conditionalCompilationAlternative();
596   parsePPUnknown();
597 }
598 
parsePPElIf()599 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
600 
parsePPEndIf()601 void UnwrappedLineParser::parsePPEndIf() {
602   conditionalCompilationEnd();
603   parsePPUnknown();
604 }
605 
parsePPDefine()606 void UnwrappedLineParser::parsePPDefine() {
607   nextToken();
608 
609   if (FormatTok->Tok.getKind() != tok::identifier) {
610     parsePPUnknown();
611     return;
612   }
613   nextToken();
614   if (FormatTok->Tok.getKind() == tok::l_paren &&
615       FormatTok->WhitespaceRange.getBegin() ==
616           FormatTok->WhitespaceRange.getEnd()) {
617     parseParens();
618   }
619   addUnwrappedLine();
620   Line->Level = 1;
621 
622   // Errors during a preprocessor directive can only affect the layout of the
623   // preprocessor directive, and thus we ignore them. An alternative approach
624   // would be to use the same approach we use on the file level (no
625   // re-indentation if there was a structural error) within the macro
626   // definition.
627   parseFile();
628 }
629 
parsePPUnknown()630 void UnwrappedLineParser::parsePPUnknown() {
631   do {
632     nextToken();
633   } while (!eof());
634   addUnwrappedLine();
635 }
636 
637 // Here we blacklist certain tokens that are not usually the first token in an
638 // unwrapped line. This is used in attempt to distinguish macro calls without
639 // trailing semicolons from other constructs split to several lines.
tokenCanStartNewLine(const clang::Token & Tok)640 static bool tokenCanStartNewLine(const clang::Token &Tok) {
641   // Semicolon can be a null-statement, l_square can be a start of a macro or
642   // a C++11 attribute, but this doesn't seem to be common.
643   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
644          Tok.isNot(tok::l_square) &&
645          // Tokens that can only be used as binary operators and a part of
646          // overloaded operator names.
647          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
648          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
649          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
650          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
651          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
652          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
653          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
654          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
655          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
656          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
657          Tok.isNot(tok::lesslessequal) &&
658          // Colon is used in labels, base class lists, initializer lists,
659          // range-based for loops, ternary operator, but should never be the
660          // first token in an unwrapped line.
661          Tok.isNot(tok::colon) &&
662          // 'noexcept' is a trailing annotation.
663          Tok.isNot(tok::kw_noexcept);
664 }
665 
mustBeJSIdent(const AdditionalKeywords & Keywords,const FormatToken * FormatTok)666 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
667                           const FormatToken *FormatTok) {
668   // FIXME: This returns true for C/C++ keywords like 'struct'.
669   return FormatTok->is(tok::identifier) &&
670          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
671           !FormatTok->isOneOf(Keywords.kw_in, Keywords.kw_of, Keywords.kw_as,
672                               Keywords.kw_async, Keywords.kw_await,
673                               Keywords.kw_yield, Keywords.kw_finally,
674                               Keywords.kw_function, Keywords.kw_import,
675                               Keywords.kw_is, Keywords.kw_let, Keywords.kw_var,
676                               Keywords.kw_abstract, Keywords.kw_extends,
677                               Keywords.kw_implements, Keywords.kw_instanceof,
678                               Keywords.kw_interface, Keywords.kw_throws));
679 }
680 
mustBeJSIdentOrValue(const AdditionalKeywords & Keywords,const FormatToken * FormatTok)681 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
682                                  const FormatToken *FormatTok) {
683   return FormatTok->Tok.isLiteral() || mustBeJSIdent(Keywords, FormatTok);
684 }
685 
686 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
687 // when encountered after a value (see mustBeJSIdentOrValue).
isJSDeclOrStmt(const AdditionalKeywords & Keywords,const FormatToken * FormatTok)688 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
689                            const FormatToken *FormatTok) {
690   return FormatTok->isOneOf(
691       tok::kw_return, Keywords.kw_yield,
692       // conditionals
693       tok::kw_if, tok::kw_else,
694       // loops
695       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
696       // switch/case
697       tok::kw_switch, tok::kw_case,
698       // exceptions
699       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
700       // declaration
701       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
702       Keywords.kw_async, Keywords.kw_function,
703       // import/export
704       Keywords.kw_import, tok::kw_export);
705 }
706 
707 // readTokenWithJavaScriptASI reads the next token and terminates the current
708 // line if JavaScript Automatic Semicolon Insertion must
709 // happen between the current token and the next token.
710 //
711 // This method is conservative - it cannot cover all edge cases of JavaScript,
712 // but only aims to correctly handle certain well known cases. It *must not*
713 // return true in speculative cases.
readTokenWithJavaScriptASI()714 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
715   FormatToken *Previous = FormatTok;
716   readToken();
717   FormatToken *Next = FormatTok;
718 
719   bool IsOnSameLine =
720       CommentsBeforeNextToken.empty()
721           ? Next->NewlinesBefore == 0
722           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
723   if (IsOnSameLine)
724     return;
725 
726   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
727   if (PreviousMustBeValue && Line && Line->Tokens.size() > 1) {
728     // If the token before the previous one is an '@', the previous token is an
729     // annotation and can precede another identifier/value.
730     const FormatToken *PrePrevious = std::prev(Line->Tokens.end(), 2)->Tok;
731     if (PrePrevious->is(tok::at))
732       return;
733   }
734   if (Next->is(tok::exclaim) && PreviousMustBeValue)
735     addUnwrappedLine();
736   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
737   if (NextMustBeValue && (PreviousMustBeValue ||
738                           Previous->isOneOf(tok::r_square, tok::r_paren,
739                                             tok::plusplus, tok::minusminus)))
740     addUnwrappedLine();
741   if (PreviousMustBeValue && isJSDeclOrStmt(Keywords, Next))
742     addUnwrappedLine();
743 }
744 
parseStructuralElement()745 void UnwrappedLineParser::parseStructuralElement() {
746   assert(!FormatTok->is(tok::l_brace));
747   if (Style.Language == FormatStyle::LK_TableGen &&
748       FormatTok->is(tok::pp_include)) {
749     nextToken();
750     if (FormatTok->is(tok::string_literal))
751       nextToken();
752     addUnwrappedLine();
753     return;
754   }
755   switch (FormatTok->Tok.getKind()) {
756   case tok::at:
757     nextToken();
758     if (FormatTok->Tok.is(tok::l_brace)) {
759       parseBracedList();
760       break;
761     }
762     switch (FormatTok->Tok.getObjCKeywordID()) {
763     case tok::objc_public:
764     case tok::objc_protected:
765     case tok::objc_package:
766     case tok::objc_private:
767       return parseAccessSpecifier();
768     case tok::objc_interface:
769     case tok::objc_implementation:
770       return parseObjCInterfaceOrImplementation();
771     case tok::objc_protocol:
772       return parseObjCProtocol();
773     case tok::objc_end:
774       return; // Handled by the caller.
775     case tok::objc_optional:
776     case tok::objc_required:
777       nextToken();
778       addUnwrappedLine();
779       return;
780     case tok::objc_autoreleasepool:
781       nextToken();
782       if (FormatTok->Tok.is(tok::l_brace)) {
783         if (Style.BraceWrapping.AfterObjCDeclaration)
784           addUnwrappedLine();
785         parseBlock(/*MustBeDeclaration=*/false);
786       }
787       addUnwrappedLine();
788       return;
789     case tok::objc_try:
790       // This branch isn't strictly necessary (the kw_try case below would
791       // do this too after the tok::at is parsed above).  But be explicit.
792       parseTryCatch();
793       return;
794     default:
795       break;
796     }
797     break;
798   case tok::kw_asm:
799     nextToken();
800     if (FormatTok->is(tok::l_brace)) {
801       FormatTok->Type = TT_InlineASMBrace;
802       nextToken();
803       while (FormatTok && FormatTok->isNot(tok::eof)) {
804         if (FormatTok->is(tok::r_brace)) {
805           FormatTok->Type = TT_InlineASMBrace;
806           nextToken();
807           addUnwrappedLine();
808           break;
809         }
810         FormatTok->Finalized = true;
811         nextToken();
812       }
813     }
814     break;
815   case tok::kw_namespace:
816     parseNamespace();
817     return;
818   case tok::kw_inline:
819     nextToken();
820     if (FormatTok->Tok.is(tok::kw_namespace)) {
821       parseNamespace();
822       return;
823     }
824     break;
825   case tok::kw_public:
826   case tok::kw_protected:
827   case tok::kw_private:
828     if (Style.Language == FormatStyle::LK_Java ||
829         Style.Language == FormatStyle::LK_JavaScript)
830       nextToken();
831     else
832       parseAccessSpecifier();
833     return;
834   case tok::kw_if:
835     parseIfThenElse();
836     return;
837   case tok::kw_for:
838   case tok::kw_while:
839     parseForOrWhileLoop();
840     return;
841   case tok::kw_do:
842     parseDoWhile();
843     return;
844   case tok::kw_switch:
845     parseSwitch();
846     return;
847   case tok::kw_default:
848     nextToken();
849     parseLabel();
850     return;
851   case tok::kw_case:
852     parseCaseLabel();
853     return;
854   case tok::kw_try:
855   case tok::kw___try:
856     parseTryCatch();
857     return;
858   case tok::kw_extern:
859     nextToken();
860     if (FormatTok->Tok.is(tok::string_literal)) {
861       nextToken();
862       if (FormatTok->Tok.is(tok::l_brace)) {
863         parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
864         addUnwrappedLine();
865         return;
866       }
867     }
868     break;
869   case tok::kw_export:
870     if (Style.Language == FormatStyle::LK_JavaScript) {
871       parseJavaScriptEs6ImportExport();
872       return;
873     }
874     break;
875   case tok::identifier:
876     if (FormatTok->is(TT_ForEachMacro)) {
877       parseForOrWhileLoop();
878       return;
879     }
880     if (FormatTok->is(TT_MacroBlockBegin)) {
881       parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
882                  /*MunchSemi=*/false);
883       return;
884     }
885     if (FormatTok->is(Keywords.kw_import)) {
886       if (Style.Language == FormatStyle::LK_JavaScript) {
887         parseJavaScriptEs6ImportExport();
888         return;
889       }
890       if (Style.Language == FormatStyle::LK_Proto) {
891         nextToken();
892         if (FormatTok->is(tok::kw_public))
893           nextToken();
894         if (!FormatTok->is(tok::string_literal))
895           return;
896         nextToken();
897         if (FormatTok->is(tok::semi))
898           nextToken();
899         addUnwrappedLine();
900         return;
901       }
902     }
903     if (FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
904                            Keywords.kw_slots, Keywords.kw_qslots)) {
905       nextToken();
906       if (FormatTok->is(tok::colon)) {
907         nextToken();
908         addUnwrappedLine();
909       }
910       return;
911     }
912     // In all other cases, parse the declaration.
913     break;
914   default:
915     break;
916   }
917   do {
918     const FormatToken *Previous = getPreviousToken();
919     switch (FormatTok->Tok.getKind()) {
920     case tok::at:
921       nextToken();
922       if (FormatTok->Tok.is(tok::l_brace))
923         parseBracedList();
924       break;
925     case tok::kw_enum:
926       // Ignore if this is part of "template <enum ...".
927       if (Previous && Previous->is(tok::less)) {
928         nextToken();
929         break;
930       }
931 
932       // parseEnum falls through and does not yet add an unwrapped line as an
933       // enum definition can start a structural element.
934       if (!parseEnum())
935         break;
936       // This only applies for C++.
937       if (Style.Language != FormatStyle::LK_Cpp) {
938         addUnwrappedLine();
939         return;
940       }
941       break;
942     case tok::kw_typedef:
943       nextToken();
944       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
945                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
946         parseEnum();
947       break;
948     case tok::kw_struct:
949     case tok::kw_union:
950     case tok::kw_class:
951       // parseRecord falls through and does not yet add an unwrapped line as a
952       // record declaration or definition can start a structural element.
953       parseRecord();
954       // This does not apply for Java and JavaScript.
955       if (Style.Language == FormatStyle::LK_Java ||
956           Style.Language == FormatStyle::LK_JavaScript) {
957         if (FormatTok->is(tok::semi))
958           nextToken();
959         addUnwrappedLine();
960         return;
961       }
962       break;
963     case tok::period:
964       nextToken();
965       // In Java, classes have an implicit static member "class".
966       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
967           FormatTok->is(tok::kw_class))
968         nextToken();
969       if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
970           FormatTok->Tok.getIdentifierInfo())
971         // JavaScript only has pseudo keywords, all keywords are allowed to
972         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
973         nextToken();
974       break;
975     case tok::semi:
976       nextToken();
977       addUnwrappedLine();
978       return;
979     case tok::r_brace:
980       addUnwrappedLine();
981       return;
982     case tok::l_paren:
983       parseParens();
984       break;
985     case tok::kw_operator:
986       nextToken();
987       if (FormatTok->isBinaryOperator())
988         nextToken();
989       break;
990     case tok::caret:
991       nextToken();
992       if (FormatTok->Tok.isAnyIdentifier() ||
993           FormatTok->isSimpleTypeSpecifier())
994         nextToken();
995       if (FormatTok->is(tok::l_paren))
996         parseParens();
997       if (FormatTok->is(tok::l_brace))
998         parseChildBlock();
999       break;
1000     case tok::l_brace:
1001       if (!tryToParseBracedList()) {
1002         // A block outside of parentheses must be the last part of a
1003         // structural element.
1004         // FIXME: Figure out cases where this is not true, and add projections
1005         // for them (the one we know is missing are lambdas).
1006         if (Style.BraceWrapping.AfterFunction)
1007           addUnwrappedLine();
1008         FormatTok->Type = TT_FunctionLBrace;
1009         parseBlock(/*MustBeDeclaration=*/false);
1010         addUnwrappedLine();
1011         return;
1012       }
1013       // Otherwise this was a braced init list, and the structural
1014       // element continues.
1015       break;
1016     case tok::kw_try:
1017       // We arrive here when parsing function-try blocks.
1018       parseTryCatch();
1019       return;
1020     case tok::identifier: {
1021       if (FormatTok->is(TT_MacroBlockEnd)) {
1022         addUnwrappedLine();
1023         return;
1024       }
1025 
1026       // Parse function literal unless 'function' is the first token in a line
1027       // in which case this should be treated as a free-standing function.
1028       if (Style.Language == FormatStyle::LK_JavaScript &&
1029           (FormatTok->is(Keywords.kw_function) ||
1030            FormatTok->startsSequence(Keywords.kw_async,
1031                                      Keywords.kw_function)) &&
1032           Line->Tokens.size() > 0) {
1033         tryToParseJSFunction();
1034         break;
1035       }
1036       if ((Style.Language == FormatStyle::LK_JavaScript ||
1037            Style.Language == FormatStyle::LK_Java) &&
1038           FormatTok->is(Keywords.kw_interface)) {
1039         if (Style.Language == FormatStyle::LK_JavaScript) {
1040           // In JavaScript/TypeScript, "interface" can be used as a standalone
1041           // identifier, e.g. in `var interface = 1;`. If "interface" is
1042           // followed by another identifier, it is very like to be an actual
1043           // interface declaration.
1044           unsigned StoredPosition = Tokens->getPosition();
1045           FormatToken *Next = Tokens->getNextToken();
1046           FormatTok = Tokens->setPosition(StoredPosition);
1047           if (Next && !mustBeJSIdent(Keywords, Next)) {
1048             nextToken();
1049             break;
1050           }
1051         }
1052         parseRecord();
1053         addUnwrappedLine();
1054         return;
1055       }
1056 
1057       // See if the following token should start a new unwrapped line.
1058       StringRef Text = FormatTok->TokenText;
1059       nextToken();
1060       if (Line->Tokens.size() == 1 &&
1061           // JS doesn't have macros, and within classes colons indicate fields,
1062           // not labels.
1063           Style.Language != FormatStyle::LK_JavaScript) {
1064         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1065           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1066           parseLabel();
1067           return;
1068         }
1069         // Recognize function-like macro usages without trailing semicolon as
1070         // well as free-standing macros like Q_OBJECT.
1071         bool FunctionLike = FormatTok->is(tok::l_paren);
1072         if (FunctionLike)
1073           parseParens();
1074 
1075         bool FollowedByNewline =
1076             CommentsBeforeNextToken.empty()
1077                 ? FormatTok->NewlinesBefore > 0
1078                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1079 
1080         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1081             tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
1082           addUnwrappedLine();
1083           return;
1084         }
1085       }
1086       break;
1087     }
1088     case tok::equal:
1089       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1090       // TT_JsFatArrow. The always start an expression or a child block if
1091       // followed by a curly.
1092       if (FormatTok->is(TT_JsFatArrow)) {
1093         nextToken();
1094         if (FormatTok->is(tok::l_brace))
1095           parseChildBlock();
1096         break;
1097       }
1098 
1099       nextToken();
1100       if (FormatTok->Tok.is(tok::l_brace)) {
1101         parseBracedList();
1102       }
1103       break;
1104     case tok::l_square:
1105       parseSquare();
1106       break;
1107     case tok::kw_new:
1108       parseNew();
1109       break;
1110     default:
1111       nextToken();
1112       break;
1113     }
1114   } while (!eof());
1115 }
1116 
tryToParseLambda()1117 bool UnwrappedLineParser::tryToParseLambda() {
1118   if (Style.Language != FormatStyle::LK_Cpp) {
1119     nextToken();
1120     return false;
1121   }
1122   const FormatToken* Previous = getPreviousToken();
1123   if (Previous &&
1124       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1125                          tok::kw_delete) ||
1126        Previous->closesScope() || Previous->isSimpleTypeSpecifier())) {
1127     nextToken();
1128     return false;
1129   }
1130   assert(FormatTok->is(tok::l_square));
1131   FormatToken &LSquare = *FormatTok;
1132   if (!tryToParseLambdaIntroducer())
1133     return false;
1134 
1135   while (FormatTok->isNot(tok::l_brace)) {
1136     if (FormatTok->isSimpleTypeSpecifier()) {
1137       nextToken();
1138       continue;
1139     }
1140     switch (FormatTok->Tok.getKind()) {
1141     case tok::l_brace:
1142       break;
1143     case tok::l_paren:
1144       parseParens();
1145       break;
1146     case tok::amp:
1147     case tok::star:
1148     case tok::kw_const:
1149     case tok::comma:
1150     case tok::less:
1151     case tok::greater:
1152     case tok::identifier:
1153     case tok::numeric_constant:
1154     case tok::coloncolon:
1155     case tok::kw_mutable:
1156       nextToken();
1157       break;
1158     case tok::arrow:
1159       FormatTok->Type = TT_LambdaArrow;
1160       nextToken();
1161       break;
1162     default:
1163       return true;
1164     }
1165   }
1166   LSquare.Type = TT_LambdaLSquare;
1167   parseChildBlock();
1168   return true;
1169 }
1170 
tryToParseLambdaIntroducer()1171 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1172   nextToken();
1173   if (FormatTok->is(tok::equal)) {
1174     nextToken();
1175     if (FormatTok->is(tok::r_square)) {
1176       nextToken();
1177       return true;
1178     }
1179     if (FormatTok->isNot(tok::comma))
1180       return false;
1181     nextToken();
1182   } else if (FormatTok->is(tok::amp)) {
1183     nextToken();
1184     if (FormatTok->is(tok::r_square)) {
1185       nextToken();
1186       return true;
1187     }
1188     if (!FormatTok->isOneOf(tok::comma, tok::identifier)) {
1189       return false;
1190     }
1191     if (FormatTok->is(tok::comma))
1192       nextToken();
1193   } else if (FormatTok->is(tok::r_square)) {
1194     nextToken();
1195     return true;
1196   }
1197   do {
1198     if (FormatTok->is(tok::amp))
1199       nextToken();
1200     if (!FormatTok->isOneOf(tok::identifier, tok::kw_this))
1201       return false;
1202     nextToken();
1203     if (FormatTok->is(tok::ellipsis))
1204       nextToken();
1205     if (FormatTok->is(tok::comma)) {
1206       nextToken();
1207     } else if (FormatTok->is(tok::r_square)) {
1208       nextToken();
1209       return true;
1210     } else {
1211       return false;
1212     }
1213   } while (!eof());
1214   return false;
1215 }
1216 
tryToParseJSFunction()1217 void UnwrappedLineParser::tryToParseJSFunction() {
1218   assert(FormatTok->is(Keywords.kw_function) ||
1219          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1220   if (FormatTok->is(Keywords.kw_async))
1221     nextToken();
1222   // Consume "function".
1223   nextToken();
1224 
1225   // Consume * (generator function).
1226   if (FormatTok->is(tok::star))
1227     nextToken();
1228 
1229   // Consume function name.
1230   if (FormatTok->is(tok::identifier))
1231     nextToken();
1232 
1233   if (FormatTok->isNot(tok::l_paren))
1234     return;
1235 
1236   // Parse formal parameter list.
1237   parseParens();
1238 
1239   if (FormatTok->is(tok::colon)) {
1240     // Parse a type definition.
1241     nextToken();
1242 
1243     // Eat the type declaration. For braced inline object types, balance braces,
1244     // otherwise just parse until finding an l_brace for the function body.
1245     if (FormatTok->is(tok::l_brace))
1246       tryToParseBracedList();
1247     else
1248       while (FormatTok->isNot(tok::l_brace) && !eof())
1249         nextToken();
1250   }
1251 
1252   parseChildBlock();
1253 }
1254 
tryToParseBracedList()1255 bool UnwrappedLineParser::tryToParseBracedList() {
1256   if (FormatTok->BlockKind == BK_Unknown)
1257     calculateBraceTypes();
1258   assert(FormatTok->BlockKind != BK_Unknown);
1259   if (FormatTok->BlockKind == BK_Block)
1260     return false;
1261   parseBracedList();
1262   return true;
1263 }
1264 
parseBracedList(bool ContinueOnSemicolons)1265 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) {
1266   bool HasError = false;
1267   nextToken();
1268 
1269   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1270   // replace this by using parseAssigmentExpression() inside.
1271   do {
1272     if (Style.Language == FormatStyle::LK_JavaScript) {
1273       if (FormatTok->is(Keywords.kw_function) ||
1274           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1275         tryToParseJSFunction();
1276         continue;
1277       }
1278       if (FormatTok->is(TT_JsFatArrow)) {
1279         nextToken();
1280         // Fat arrows can be followed by simple expressions or by child blocks
1281         // in curly braces.
1282         if (FormatTok->is(tok::l_brace)) {
1283           parseChildBlock();
1284           continue;
1285         }
1286       }
1287     }
1288     switch (FormatTok->Tok.getKind()) {
1289     case tok::caret:
1290       nextToken();
1291       if (FormatTok->is(tok::l_brace)) {
1292         parseChildBlock();
1293       }
1294       break;
1295     case tok::l_square:
1296       tryToParseLambda();
1297       break;
1298     case tok::l_brace:
1299       // Assume there are no blocks inside a braced init list apart
1300       // from the ones we explicitly parse out (like lambdas).
1301       FormatTok->BlockKind = BK_BracedInit;
1302       parseBracedList();
1303       break;
1304     case tok::l_paren:
1305       parseParens();
1306       // JavaScript can just have free standing methods and getters/setters in
1307       // object literals. Detect them by a "{" following ")".
1308       if (Style.Language == FormatStyle::LK_JavaScript) {
1309         if (FormatTok->is(tok::l_brace))
1310           parseChildBlock();
1311         break;
1312       }
1313       break;
1314     case tok::r_brace:
1315       nextToken();
1316       return !HasError;
1317     case tok::semi:
1318       // JavaScript (or more precisely TypeScript) can have semicolons in braced
1319       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1320       // used for error recovery if we have otherwise determined that this is
1321       // a braced list.
1322       if (Style.Language == FormatStyle::LK_JavaScript) {
1323         nextToken();
1324         break;
1325       }
1326       HasError = true;
1327       if (!ContinueOnSemicolons)
1328         return !HasError;
1329       nextToken();
1330       break;
1331     case tok::comma:
1332       nextToken();
1333       break;
1334     default:
1335       nextToken();
1336       break;
1337     }
1338   } while (!eof());
1339   return false;
1340 }
1341 
parseParens()1342 void UnwrappedLineParser::parseParens() {
1343   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1344   nextToken();
1345   do {
1346     switch (FormatTok->Tok.getKind()) {
1347     case tok::l_paren:
1348       parseParens();
1349       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1350         parseChildBlock();
1351       break;
1352     case tok::r_paren:
1353       nextToken();
1354       return;
1355     case tok::r_brace:
1356       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1357       return;
1358     case tok::l_square:
1359       tryToParseLambda();
1360       break;
1361     case tok::l_brace:
1362       if (!tryToParseBracedList())
1363         parseChildBlock();
1364       break;
1365     case tok::at:
1366       nextToken();
1367       if (FormatTok->Tok.is(tok::l_brace))
1368         parseBracedList();
1369       break;
1370     case tok::identifier:
1371       if (Style.Language == FormatStyle::LK_JavaScript &&
1372           (FormatTok->is(Keywords.kw_function) ||
1373            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1374         tryToParseJSFunction();
1375       else
1376         nextToken();
1377       break;
1378     default:
1379       nextToken();
1380       break;
1381     }
1382   } while (!eof());
1383 }
1384 
parseSquare()1385 void UnwrappedLineParser::parseSquare() {
1386   assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1387   if (tryToParseLambda())
1388     return;
1389   do {
1390     switch (FormatTok->Tok.getKind()) {
1391     case tok::l_paren:
1392       parseParens();
1393       break;
1394     case tok::r_square:
1395       nextToken();
1396       return;
1397     case tok::r_brace:
1398       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1399       return;
1400     case tok::l_square:
1401       parseSquare();
1402       break;
1403     case tok::l_brace: {
1404       if (!tryToParseBracedList())
1405         parseChildBlock();
1406       break;
1407     }
1408     case tok::at:
1409       nextToken();
1410       if (FormatTok->Tok.is(tok::l_brace))
1411         parseBracedList();
1412       break;
1413     default:
1414       nextToken();
1415       break;
1416     }
1417   } while (!eof());
1418 }
1419 
parseIfThenElse()1420 void UnwrappedLineParser::parseIfThenElse() {
1421   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1422   nextToken();
1423   if (FormatTok->Tok.is(tok::l_paren))
1424     parseParens();
1425   bool NeedsUnwrappedLine = false;
1426   if (FormatTok->Tok.is(tok::l_brace)) {
1427     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1428     parseBlock(/*MustBeDeclaration=*/false);
1429     if (Style.BraceWrapping.BeforeElse)
1430       addUnwrappedLine();
1431     else
1432       NeedsUnwrappedLine = true;
1433   } else {
1434     addUnwrappedLine();
1435     ++Line->Level;
1436     parseStructuralElement();
1437     --Line->Level;
1438   }
1439   if (FormatTok->Tok.is(tok::kw_else)) {
1440     nextToken();
1441     if (FormatTok->Tok.is(tok::l_brace)) {
1442       CompoundStatementIndenter Indenter(this, Style, Line->Level);
1443       parseBlock(/*MustBeDeclaration=*/false);
1444       addUnwrappedLine();
1445     } else if (FormatTok->Tok.is(tok::kw_if)) {
1446       parseIfThenElse();
1447     } else {
1448       addUnwrappedLine();
1449       ++Line->Level;
1450       parseStructuralElement();
1451       if (FormatTok->is(tok::eof))
1452         addUnwrappedLine();
1453       --Line->Level;
1454     }
1455   } else if (NeedsUnwrappedLine) {
1456     addUnwrappedLine();
1457   }
1458 }
1459 
parseTryCatch()1460 void UnwrappedLineParser::parseTryCatch() {
1461   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1462   nextToken();
1463   bool NeedsUnwrappedLine = false;
1464   if (FormatTok->is(tok::colon)) {
1465     // We are in a function try block, what comes is an initializer list.
1466     nextToken();
1467     while (FormatTok->is(tok::identifier)) {
1468       nextToken();
1469       if (FormatTok->is(tok::l_paren))
1470         parseParens();
1471       if (FormatTok->is(tok::comma))
1472         nextToken();
1473     }
1474   }
1475   // Parse try with resource.
1476   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1477     parseParens();
1478   }
1479   if (FormatTok->is(tok::l_brace)) {
1480     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1481     parseBlock(/*MustBeDeclaration=*/false);
1482     if (Style.BraceWrapping.BeforeCatch) {
1483       addUnwrappedLine();
1484     } else {
1485       NeedsUnwrappedLine = true;
1486     }
1487   } else if (!FormatTok->is(tok::kw_catch)) {
1488     // The C++ standard requires a compound-statement after a try.
1489     // If there's none, we try to assume there's a structuralElement
1490     // and try to continue.
1491     addUnwrappedLine();
1492     ++Line->Level;
1493     parseStructuralElement();
1494     --Line->Level;
1495   }
1496   while (1) {
1497     if (FormatTok->is(tok::at))
1498       nextToken();
1499     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1500                              tok::kw___finally) ||
1501           ((Style.Language == FormatStyle::LK_Java ||
1502             Style.Language == FormatStyle::LK_JavaScript) &&
1503            FormatTok->is(Keywords.kw_finally)) ||
1504           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1505            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1506       break;
1507     nextToken();
1508     while (FormatTok->isNot(tok::l_brace)) {
1509       if (FormatTok->is(tok::l_paren)) {
1510         parseParens();
1511         continue;
1512       }
1513       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1514         return;
1515       nextToken();
1516     }
1517     NeedsUnwrappedLine = false;
1518     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1519     parseBlock(/*MustBeDeclaration=*/false);
1520     if (Style.BraceWrapping.BeforeCatch)
1521       addUnwrappedLine();
1522     else
1523       NeedsUnwrappedLine = true;
1524   }
1525   if (NeedsUnwrappedLine)
1526     addUnwrappedLine();
1527 }
1528 
parseNamespace()1529 void UnwrappedLineParser::parseNamespace() {
1530   assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
1531 
1532   const FormatToken &InitialToken = *FormatTok;
1533   nextToken();
1534   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon))
1535     nextToken();
1536   if (FormatTok->Tok.is(tok::l_brace)) {
1537     if (ShouldBreakBeforeBrace(Style, InitialToken))
1538       addUnwrappedLine();
1539 
1540     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1541                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
1542                      DeclarationScopeStack.size() > 1);
1543     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1544     // Munch the semicolon after a namespace. This is more common than one would
1545     // think. Puttin the semicolon into its own line is very ugly.
1546     if (FormatTok->Tok.is(tok::semi))
1547       nextToken();
1548     addUnwrappedLine();
1549   }
1550   // FIXME: Add error handling.
1551 }
1552 
parseNew()1553 void UnwrappedLineParser::parseNew() {
1554   assert(FormatTok->is(tok::kw_new) && "'new' expected");
1555   nextToken();
1556   if (Style.Language != FormatStyle::LK_Java)
1557     return;
1558 
1559   // In Java, we can parse everything up to the parens, which aren't optional.
1560   do {
1561     // There should not be a ;, { or } before the new's open paren.
1562     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1563       return;
1564 
1565     // Consume the parens.
1566     if (FormatTok->is(tok::l_paren)) {
1567       parseParens();
1568 
1569       // If there is a class body of an anonymous class, consume that as child.
1570       if (FormatTok->is(tok::l_brace))
1571         parseChildBlock();
1572       return;
1573     }
1574     nextToken();
1575   } while (!eof());
1576 }
1577 
parseForOrWhileLoop()1578 void UnwrappedLineParser::parseForOrWhileLoop() {
1579   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1580          "'for', 'while' or foreach macro expected");
1581   nextToken();
1582   if (FormatTok->Tok.is(tok::l_paren))
1583     parseParens();
1584   if (FormatTok->Tok.is(tok::l_brace)) {
1585     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1586     parseBlock(/*MustBeDeclaration=*/false);
1587     addUnwrappedLine();
1588   } else {
1589     addUnwrappedLine();
1590     ++Line->Level;
1591     parseStructuralElement();
1592     --Line->Level;
1593   }
1594 }
1595 
parseDoWhile()1596 void UnwrappedLineParser::parseDoWhile() {
1597   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1598   nextToken();
1599   if (FormatTok->Tok.is(tok::l_brace)) {
1600     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1601     parseBlock(/*MustBeDeclaration=*/false);
1602     if (Style.BraceWrapping.IndentBraces)
1603       addUnwrappedLine();
1604   } else {
1605     addUnwrappedLine();
1606     ++Line->Level;
1607     parseStructuralElement();
1608     --Line->Level;
1609   }
1610 
1611   // FIXME: Add error handling.
1612   if (!FormatTok->Tok.is(tok::kw_while)) {
1613     addUnwrappedLine();
1614     return;
1615   }
1616 
1617   nextToken();
1618   parseStructuralElement();
1619 }
1620 
parseLabel()1621 void UnwrappedLineParser::parseLabel() {
1622   nextToken();
1623   unsigned OldLineLevel = Line->Level;
1624   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1625     --Line->Level;
1626   if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1627     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1628     parseBlock(/*MustBeDeclaration=*/false);
1629     if (FormatTok->Tok.is(tok::kw_break)) {
1630       if (Style.BraceWrapping.AfterControlStatement)
1631         addUnwrappedLine();
1632       parseStructuralElement();
1633     }
1634     addUnwrappedLine();
1635   } else {
1636     if (FormatTok->is(tok::semi))
1637       nextToken();
1638     addUnwrappedLine();
1639   }
1640   Line->Level = OldLineLevel;
1641   if (FormatTok->isNot(tok::l_brace)) {
1642     parseStructuralElement();
1643     addUnwrappedLine();
1644   }
1645 }
1646 
parseCaseLabel()1647 void UnwrappedLineParser::parseCaseLabel() {
1648   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1649   // FIXME: fix handling of complex expressions here.
1650   do {
1651     nextToken();
1652   } while (!eof() && !FormatTok->Tok.is(tok::colon));
1653   parseLabel();
1654 }
1655 
parseSwitch()1656 void UnwrappedLineParser::parseSwitch() {
1657   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1658   nextToken();
1659   if (FormatTok->Tok.is(tok::l_paren))
1660     parseParens();
1661   if (FormatTok->Tok.is(tok::l_brace)) {
1662     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1663     parseBlock(/*MustBeDeclaration=*/false);
1664     addUnwrappedLine();
1665   } else {
1666     addUnwrappedLine();
1667     ++Line->Level;
1668     parseStructuralElement();
1669     --Line->Level;
1670   }
1671 }
1672 
parseAccessSpecifier()1673 void UnwrappedLineParser::parseAccessSpecifier() {
1674   nextToken();
1675   // Understand Qt's slots.
1676   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
1677     nextToken();
1678   // Otherwise, we don't know what it is, and we'd better keep the next token.
1679   if (FormatTok->Tok.is(tok::colon))
1680     nextToken();
1681   addUnwrappedLine();
1682 }
1683 
parseEnum()1684 bool UnwrappedLineParser::parseEnum() {
1685   // Won't be 'enum' for NS_ENUMs.
1686   if (FormatTok->Tok.is(tok::kw_enum))
1687     nextToken();
1688 
1689   // In TypeScript, "enum" can also be used as property name, e.g. in interface
1690   // declarations. An "enum" keyword followed by a colon would be a syntax
1691   // error and thus assume it is just an identifier.
1692   if (Style.Language == FormatStyle::LK_JavaScript &&
1693       FormatTok->isOneOf(tok::colon, tok::question))
1694     return false;
1695 
1696   // Eat up enum class ...
1697   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
1698     nextToken();
1699 
1700   while (FormatTok->Tok.getIdentifierInfo() ||
1701          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
1702                             tok::greater, tok::comma, tok::question)) {
1703     nextToken();
1704     // We can have macros or attributes in between 'enum' and the enum name.
1705     if (FormatTok->is(tok::l_paren))
1706       parseParens();
1707     if (FormatTok->is(tok::identifier)) {
1708       nextToken();
1709       // If there are two identifiers in a row, this is likely an elaborate
1710       // return type. In Java, this can be "implements", etc.
1711       if (Style.Language == FormatStyle::LK_Cpp &&
1712           FormatTok->is(tok::identifier))
1713         return false;
1714     }
1715   }
1716 
1717   // Just a declaration or something is wrong.
1718   if (FormatTok->isNot(tok::l_brace))
1719     return true;
1720   FormatTok->BlockKind = BK_Block;
1721 
1722   if (Style.Language == FormatStyle::LK_Java) {
1723     // Java enums are different.
1724     parseJavaEnumBody();
1725     return true;
1726   }
1727   if (Style.Language == FormatStyle::LK_Proto) {
1728     parseBlock(/*MustBeDeclaration=*/true);
1729     return true;
1730   }
1731 
1732   // Parse enum body.
1733   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
1734   if (HasError) {
1735     if (FormatTok->is(tok::semi))
1736       nextToken();
1737     addUnwrappedLine();
1738   }
1739   return true;
1740 
1741   // There is no addUnwrappedLine() here so that we fall through to parsing a
1742   // structural element afterwards. Thus, in "enum A {} n, m;",
1743   // "} n, m;" will end up in one unwrapped line.
1744 }
1745 
parseJavaEnumBody()1746 void UnwrappedLineParser::parseJavaEnumBody() {
1747   // Determine whether the enum is simple, i.e. does not have a semicolon or
1748   // constants with class bodies. Simple enums can be formatted like braced
1749   // lists, contracted to a single line, etc.
1750   unsigned StoredPosition = Tokens->getPosition();
1751   bool IsSimple = true;
1752   FormatToken *Tok = Tokens->getNextToken();
1753   while (Tok) {
1754     if (Tok->is(tok::r_brace))
1755       break;
1756     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
1757       IsSimple = false;
1758       break;
1759     }
1760     // FIXME: This will also mark enums with braces in the arguments to enum
1761     // constants as "not simple". This is probably fine in practice, though.
1762     Tok = Tokens->getNextToken();
1763   }
1764   FormatTok = Tokens->setPosition(StoredPosition);
1765 
1766   if (IsSimple) {
1767     parseBracedList();
1768     addUnwrappedLine();
1769     return;
1770   }
1771 
1772   // Parse the body of a more complex enum.
1773   // First add a line for everything up to the "{".
1774   nextToken();
1775   addUnwrappedLine();
1776   ++Line->Level;
1777 
1778   // Parse the enum constants.
1779   while (FormatTok) {
1780     if (FormatTok->is(tok::l_brace)) {
1781       // Parse the constant's class body.
1782       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1783                  /*MunchSemi=*/false);
1784     } else if (FormatTok->is(tok::l_paren)) {
1785       parseParens();
1786     } else if (FormatTok->is(tok::comma)) {
1787       nextToken();
1788       addUnwrappedLine();
1789     } else if (FormatTok->is(tok::semi)) {
1790       nextToken();
1791       addUnwrappedLine();
1792       break;
1793     } else if (FormatTok->is(tok::r_brace)) {
1794       addUnwrappedLine();
1795       break;
1796     } else {
1797       nextToken();
1798     }
1799   }
1800 
1801   // Parse the class body after the enum's ";" if any.
1802   parseLevel(/*HasOpeningBrace=*/true);
1803   nextToken();
1804   --Line->Level;
1805   addUnwrappedLine();
1806 }
1807 
parseRecord()1808 void UnwrappedLineParser::parseRecord() {
1809   const FormatToken &InitialToken = *FormatTok;
1810   nextToken();
1811 
1812   // The actual identifier can be a nested name specifier, and in macros
1813   // it is often token-pasted.
1814   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
1815                             tok::kw___attribute, tok::kw___declspec,
1816                             tok::kw_alignas) ||
1817          ((Style.Language == FormatStyle::LK_Java ||
1818            Style.Language == FormatStyle::LK_JavaScript) &&
1819           FormatTok->isOneOf(tok::period, tok::comma))) {
1820     bool IsNonMacroIdentifier =
1821         FormatTok->is(tok::identifier) &&
1822         FormatTok->TokenText != FormatTok->TokenText.upper();
1823     nextToken();
1824     // We can have macros or attributes in between 'class' and the class name.
1825     if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
1826       parseParens();
1827   }
1828 
1829   // Note that parsing away template declarations here leads to incorrectly
1830   // accepting function declarations as record declarations.
1831   // In general, we cannot solve this problem. Consider:
1832   // class A<int> B() {}
1833   // which can be a function definition or a class definition when B() is a
1834   // macro. If we find enough real-world cases where this is a problem, we
1835   // can parse for the 'template' keyword in the beginning of the statement,
1836   // and thus rule out the record production in case there is no template
1837   // (this would still leave us with an ambiguity between template function
1838   // and class declarations).
1839   if (FormatTok->isOneOf(tok::colon, tok::less)) {
1840     while (!eof()) {
1841       if (FormatTok->is(tok::l_brace)) {
1842         calculateBraceTypes(/*ExpectClassBody=*/true);
1843         if (!tryToParseBracedList())
1844           break;
1845       }
1846       if (FormatTok->Tok.is(tok::semi))
1847         return;
1848       nextToken();
1849     }
1850   }
1851   if (FormatTok->Tok.is(tok::l_brace)) {
1852     if (ShouldBreakBeforeBrace(Style, InitialToken))
1853       addUnwrappedLine();
1854 
1855     parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1856                /*MunchSemi=*/false);
1857   }
1858   // There is no addUnwrappedLine() here so that we fall through to parsing a
1859   // structural element afterwards. Thus, in "class A {} n, m;",
1860   // "} n, m;" will end up in one unwrapped line.
1861 }
1862 
parseObjCProtocolList()1863 void UnwrappedLineParser::parseObjCProtocolList() {
1864   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
1865   do
1866     nextToken();
1867   while (!eof() && FormatTok->Tok.isNot(tok::greater));
1868   nextToken(); // Skip '>'.
1869 }
1870 
parseObjCUntilAtEnd()1871 void UnwrappedLineParser::parseObjCUntilAtEnd() {
1872   do {
1873     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
1874       nextToken();
1875       addUnwrappedLine();
1876       break;
1877     }
1878     if (FormatTok->is(tok::l_brace)) {
1879       parseBlock(/*MustBeDeclaration=*/false);
1880       // In ObjC interfaces, nothing should be following the "}".
1881       addUnwrappedLine();
1882     } else if (FormatTok->is(tok::r_brace)) {
1883       // Ignore stray "}". parseStructuralElement doesn't consume them.
1884       nextToken();
1885       addUnwrappedLine();
1886     } else {
1887       parseStructuralElement();
1888     }
1889   } while (!eof());
1890 }
1891 
parseObjCInterfaceOrImplementation()1892 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
1893   nextToken();
1894   nextToken(); // interface name
1895 
1896   // @interface can be followed by either a base class, or a category.
1897   if (FormatTok->Tok.is(tok::colon)) {
1898     nextToken();
1899     nextToken(); // base class name
1900   } else if (FormatTok->Tok.is(tok::l_paren))
1901     // Skip category, if present.
1902     parseParens();
1903 
1904   if (FormatTok->Tok.is(tok::less))
1905     parseObjCProtocolList();
1906 
1907   if (FormatTok->Tok.is(tok::l_brace)) {
1908     if (Style.BraceWrapping.AfterObjCDeclaration)
1909       addUnwrappedLine();
1910     parseBlock(/*MustBeDeclaration=*/true);
1911   }
1912 
1913   // With instance variables, this puts '}' on its own line.  Without instance
1914   // variables, this ends the @interface line.
1915   addUnwrappedLine();
1916 
1917   parseObjCUntilAtEnd();
1918 }
1919 
parseObjCProtocol()1920 void UnwrappedLineParser::parseObjCProtocol() {
1921   nextToken();
1922   nextToken(); // protocol name
1923 
1924   if (FormatTok->Tok.is(tok::less))
1925     parseObjCProtocolList();
1926 
1927   // Check for protocol declaration.
1928   if (FormatTok->Tok.is(tok::semi)) {
1929     nextToken();
1930     return addUnwrappedLine();
1931   }
1932 
1933   addUnwrappedLine();
1934   parseObjCUntilAtEnd();
1935 }
1936 
parseJavaScriptEs6ImportExport()1937 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
1938   bool IsImport = FormatTok->is(Keywords.kw_import);
1939   assert(IsImport || FormatTok->is(tok::kw_export));
1940   nextToken();
1941 
1942   // Consume the "default" in "export default class/function".
1943   if (FormatTok->is(tok::kw_default))
1944     nextToken();
1945 
1946   // Consume "async function", "function" and "default function", so that these
1947   // get parsed as free-standing JS functions, i.e. do not require a trailing
1948   // semicolon.
1949   if (FormatTok->is(Keywords.kw_async))
1950     nextToken();
1951   if (FormatTok->is(Keywords.kw_function)) {
1952     nextToken();
1953     return;
1954   }
1955 
1956   // For imports, `export *`, `export {...}`, consume the rest of the line up
1957   // to the terminating `;`. For everything else, just return and continue
1958   // parsing the structural element, i.e. the declaration or expression for
1959   // `export default`.
1960   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
1961       !FormatTok->isStringLiteral())
1962     return;
1963 
1964   while (!eof() && FormatTok->isNot(tok::semi)) {
1965     if (FormatTok->is(tok::l_brace)) {
1966       FormatTok->BlockKind = BK_Block;
1967       parseBracedList();
1968     } else {
1969       nextToken();
1970     }
1971   }
1972 }
1973 
printDebugInfo(const UnwrappedLine & Line,StringRef Prefix="")1974 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
1975                                                  StringRef Prefix = "") {
1976   llvm::dbgs() << Prefix << "Line(" << Line.Level << ")"
1977                << (Line.InPPDirective ? " MACRO" : "") << ": ";
1978   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
1979                                                     E = Line.Tokens.end();
1980        I != E; ++I) {
1981     llvm::dbgs() << I->Tok->Tok.getName() << "[" << I->Tok->Type << "] ";
1982   }
1983   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
1984                                                     E = Line.Tokens.end();
1985        I != E; ++I) {
1986     const UnwrappedLineNode &Node = *I;
1987     for (SmallVectorImpl<UnwrappedLine>::const_iterator
1988              I = Node.Children.begin(),
1989              E = Node.Children.end();
1990          I != E; ++I) {
1991       printDebugInfo(*I, "\nChild: ");
1992     }
1993   }
1994   llvm::dbgs() << "\n";
1995 }
1996 
addUnwrappedLine()1997 void UnwrappedLineParser::addUnwrappedLine() {
1998   if (Line->Tokens.empty())
1999     return;
2000   DEBUG({
2001     if (CurrentLines == &Lines)
2002       printDebugInfo(*Line);
2003   });
2004   CurrentLines->push_back(std::move(*Line));
2005   Line->Tokens.clear();
2006   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2007     CurrentLines->append(
2008         std::make_move_iterator(PreprocessorDirectives.begin()),
2009         std::make_move_iterator(PreprocessorDirectives.end()));
2010     PreprocessorDirectives.clear();
2011   }
2012 }
2013 
eof() const2014 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2015 
isOnNewLine(const FormatToken & FormatTok)2016 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2017   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2018          FormatTok.NewlinesBefore > 0;
2019 }
2020 
flushComments(bool NewlineBeforeNext)2021 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
2022   bool JustComments = Line->Tokens.empty();
2023   for (SmallVectorImpl<FormatToken *>::const_iterator
2024            I = CommentsBeforeNextToken.begin(),
2025            E = CommentsBeforeNextToken.end();
2026        I != E; ++I) {
2027     if (isOnNewLine(**I) && JustComments)
2028       addUnwrappedLine();
2029     pushToken(*I);
2030   }
2031   if (NewlineBeforeNext && JustComments)
2032     addUnwrappedLine();
2033   CommentsBeforeNextToken.clear();
2034 }
2035 
nextToken()2036 void UnwrappedLineParser::nextToken() {
2037   if (eof())
2038     return;
2039   flushComments(isOnNewLine(*FormatTok));
2040   pushToken(FormatTok);
2041   if (Style.Language != FormatStyle::LK_JavaScript)
2042     readToken();
2043   else
2044     readTokenWithJavaScriptASI();
2045 }
2046 
getPreviousToken()2047 const FormatToken *UnwrappedLineParser::getPreviousToken() {
2048   // FIXME: This is a dirty way to access the previous token. Find a better
2049   // solution.
2050   if (!Line || Line->Tokens.empty())
2051     return nullptr;
2052   return Line->Tokens.back().Tok;
2053 }
2054 
readToken()2055 void UnwrappedLineParser::readToken() {
2056   bool CommentsInCurrentLine = true;
2057   do {
2058     FormatTok = Tokens->getNextToken();
2059     assert(FormatTok);
2060     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
2061            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
2062       // If there is an unfinished unwrapped line, we flush the preprocessor
2063       // directives only after that unwrapped line was finished later.
2064       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
2065       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
2066       // Comments stored before the preprocessor directive need to be output
2067       // before the preprocessor directive, at the same level as the
2068       // preprocessor directive, as we consider them to apply to the directive.
2069       flushComments(isOnNewLine(*FormatTok));
2070       parsePPDirective();
2071     }
2072     while (FormatTok->Type == TT_ConflictStart ||
2073            FormatTok->Type == TT_ConflictEnd ||
2074            FormatTok->Type == TT_ConflictAlternative) {
2075       if (FormatTok->Type == TT_ConflictStart) {
2076         conditionalCompilationStart(/*Unreachable=*/false);
2077       } else if (FormatTok->Type == TT_ConflictAlternative) {
2078         conditionalCompilationAlternative();
2079       } else if (FormatTok->Type == TT_ConflictEnd) {
2080         conditionalCompilationEnd();
2081       }
2082       FormatTok = Tokens->getNextToken();
2083       FormatTok->MustBreakBefore = true;
2084     }
2085 
2086     if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) &&
2087         !Line->InPPDirective) {
2088       continue;
2089     }
2090 
2091     if (!FormatTok->Tok.is(tok::comment))
2092       return;
2093     if (isOnNewLine(*FormatTok) || FormatTok->IsFirst) {
2094       CommentsInCurrentLine = false;
2095     }
2096     if (CommentsInCurrentLine) {
2097       pushToken(FormatTok);
2098     } else {
2099       CommentsBeforeNextToken.push_back(FormatTok);
2100     }
2101   } while (!eof());
2102 }
2103 
pushToken(FormatToken * Tok)2104 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
2105   Line->Tokens.push_back(UnwrappedLineNode(Tok));
2106   if (MustBreakBeforeNextToken) {
2107     Line->Tokens.back().Tok->MustBreakBefore = true;
2108     MustBreakBeforeNextToken = false;
2109   }
2110 }
2111 
2112 } // end namespace format
2113 } // end namespace clang
2114