• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #include <algorithm>
22 
23 #define DEBUG_TYPE "format-parser"
24 
25 namespace clang {
26 namespace format {
27 
28 class FormatTokenSource {
29 public:
~FormatTokenSource()30   virtual ~FormatTokenSource() {}
31   virtual FormatToken *getNextToken() = 0;
32 
33   virtual unsigned getPosition() = 0;
34   virtual FormatToken *setPosition(unsigned Position) = 0;
35 };
36 
37 namespace {
38 
39 class ScopedDeclarationState {
40 public:
ScopedDeclarationState(UnwrappedLine & Line,std::vector<bool> & Stack,bool MustBeDeclaration)41   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
42                          bool MustBeDeclaration)
43       : Line(Line), Stack(Stack) {
44     Line.MustBeDeclaration = MustBeDeclaration;
45     Stack.push_back(MustBeDeclaration);
46   }
~ScopedDeclarationState()47   ~ScopedDeclarationState() {
48     Stack.pop_back();
49     if (!Stack.empty())
50       Line.MustBeDeclaration = Stack.back();
51     else
52       Line.MustBeDeclaration = true;
53   }
54 
55 private:
56   UnwrappedLine &Line;
57   std::vector<bool> &Stack;
58 };
59 
isLineComment(const FormatToken & FormatTok)60 static bool isLineComment(const FormatToken &FormatTok) {
61   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
62 }
63 
64 // Checks if \p FormatTok is a line comment that continues the line comment
65 // \p Previous. The original column of \p MinColumnToken is used to determine
66 // whether \p FormatTok is indented enough to the right to continue \p Previous.
continuesLineComment(const FormatToken & FormatTok,const FormatToken * Previous,const FormatToken * MinColumnToken)67 static bool continuesLineComment(const FormatToken &FormatTok,
68                                  const FormatToken *Previous,
69                                  const FormatToken *MinColumnToken) {
70   if (!Previous || !MinColumnToken)
71     return false;
72   unsigned MinContinueColumn =
73       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
74   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
75          isLineComment(*Previous) &&
76          FormatTok.OriginalColumn >= MinContinueColumn;
77 }
78 
79 class ScopedMacroState : public FormatTokenSource {
80 public:
ScopedMacroState(UnwrappedLine & Line,FormatTokenSource * & TokenSource,FormatToken * & ResetToken)81   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
82                    FormatToken *&ResetToken)
83       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
84         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
85         Token(nullptr), PreviousToken(nullptr) {
86     FakeEOF.Tok.startToken();
87     FakeEOF.Tok.setKind(tok::eof);
88     TokenSource = this;
89     Line.Level = 0;
90     Line.InPPDirective = true;
91   }
92 
~ScopedMacroState()93   ~ScopedMacroState() override {
94     TokenSource = PreviousTokenSource;
95     ResetToken = Token;
96     Line.InPPDirective = false;
97     Line.Level = PreviousLineLevel;
98   }
99 
getNextToken()100   FormatToken *getNextToken() override {
101     // The \c UnwrappedLineParser guards against this by never calling
102     // \c getNextToken() after it has encountered the first eof token.
103     assert(!eof());
104     PreviousToken = Token;
105     Token = PreviousTokenSource->getNextToken();
106     if (eof())
107       return &FakeEOF;
108     return Token;
109   }
110 
getPosition()111   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
112 
setPosition(unsigned Position)113   FormatToken *setPosition(unsigned Position) override {
114     PreviousToken = nullptr;
115     Token = PreviousTokenSource->setPosition(Position);
116     return Token;
117   }
118 
119 private:
eof()120   bool eof() {
121     return Token && Token->HasUnescapedNewline &&
122            !continuesLineComment(*Token, PreviousToken,
123                                  /*MinColumnToken=*/PreviousToken);
124   }
125 
126   FormatToken FakeEOF;
127   UnwrappedLine &Line;
128   FormatTokenSource *&TokenSource;
129   FormatToken *&ResetToken;
130   unsigned PreviousLineLevel;
131   FormatTokenSource *PreviousTokenSource;
132 
133   FormatToken *Token;
134   FormatToken *PreviousToken;
135 };
136 
137 } // end anonymous namespace
138 
139 class ScopedLineState {
140 public:
ScopedLineState(UnwrappedLineParser & Parser,bool SwitchToPreprocessorLines=false)141   ScopedLineState(UnwrappedLineParser &Parser,
142                   bool SwitchToPreprocessorLines = false)
143       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
144     if (SwitchToPreprocessorLines)
145       Parser.CurrentLines = &Parser.PreprocessorDirectives;
146     else if (!Parser.Line->Tokens.empty())
147       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
148     PreBlockLine = std::move(Parser.Line);
149     Parser.Line = std::make_unique<UnwrappedLine>();
150     Parser.Line->Level = PreBlockLine->Level;
151     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
152   }
153 
~ScopedLineState()154   ~ScopedLineState() {
155     if (!Parser.Line->Tokens.empty()) {
156       Parser.addUnwrappedLine();
157     }
158     assert(Parser.Line->Tokens.empty());
159     Parser.Line = std::move(PreBlockLine);
160     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
161       Parser.MustBreakBeforeNextToken = true;
162     Parser.CurrentLines = OriginalLines;
163   }
164 
165 private:
166   UnwrappedLineParser &Parser;
167 
168   std::unique_ptr<UnwrappedLine> PreBlockLine;
169   SmallVectorImpl<UnwrappedLine> *OriginalLines;
170 };
171 
172 class CompoundStatementIndenter {
173 public:
CompoundStatementIndenter(UnwrappedLineParser * Parser,const FormatStyle & Style,unsigned & LineLevel)174   CompoundStatementIndenter(UnwrappedLineParser *Parser,
175                             const FormatStyle &Style, unsigned &LineLevel)
176       : CompoundStatementIndenter(Parser, LineLevel,
177                                   Style.BraceWrapping.AfterControlStatement,
178                                   Style.BraceWrapping.IndentBraces) {}
CompoundStatementIndenter(UnwrappedLineParser * Parser,unsigned & LineLevel,bool WrapBrace,bool IndentBrace)179   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
180                             bool WrapBrace, bool IndentBrace)
181       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
182     if (WrapBrace)
183       Parser->addUnwrappedLine();
184     if (IndentBrace)
185       ++LineLevel;
186   }
~CompoundStatementIndenter()187   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
188 
189 private:
190   unsigned &LineLevel;
191   unsigned OldLineLevel;
192 };
193 
194 namespace {
195 
196 class IndexedTokenSource : public FormatTokenSource {
197 public:
IndexedTokenSource(ArrayRef<FormatToken * > Tokens)198   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
199       : Tokens(Tokens), Position(-1) {}
200 
getNextToken()201   FormatToken *getNextToken() override {
202     ++Position;
203     return Tokens[Position];
204   }
205 
getPosition()206   unsigned getPosition() override {
207     assert(Position >= 0);
208     return Position;
209   }
210 
setPosition(unsigned P)211   FormatToken *setPosition(unsigned P) override {
212     Position = P;
213     return Tokens[Position];
214   }
215 
reset()216   void reset() { Position = -1; }
217 
218 private:
219   ArrayRef<FormatToken *> Tokens;
220   int Position;
221 };
222 
223 } // end anonymous namespace
224 
UnwrappedLineParser(const FormatStyle & Style,const AdditionalKeywords & Keywords,unsigned FirstStartColumn,ArrayRef<FormatToken * > Tokens,UnwrappedLineConsumer & Callback)225 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
226                                          const AdditionalKeywords &Keywords,
227                                          unsigned FirstStartColumn,
228                                          ArrayRef<FormatToken *> Tokens,
229                                          UnwrappedLineConsumer &Callback)
230     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
231       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
232       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
233       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
234       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
235                        ? IG_Rejected
236                        : IG_Inited),
237       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
238 
reset()239 void UnwrappedLineParser::reset() {
240   PPBranchLevel = -1;
241   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
242                      ? IG_Rejected
243                      : IG_Inited;
244   IncludeGuardToken = nullptr;
245   Line.reset(new UnwrappedLine);
246   CommentsBeforeNextToken.clear();
247   FormatTok = nullptr;
248   MustBreakBeforeNextToken = false;
249   PreprocessorDirectives.clear();
250   CurrentLines = &Lines;
251   DeclarationScopeStack.clear();
252   PPStack.clear();
253   Line->FirstStartColumn = FirstStartColumn;
254 }
255 
parse()256 void UnwrappedLineParser::parse() {
257   IndexedTokenSource TokenSource(AllTokens);
258   Line->FirstStartColumn = FirstStartColumn;
259   do {
260     LLVM_DEBUG(llvm::dbgs() << "----\n");
261     reset();
262     Tokens = &TokenSource;
263     TokenSource.reset();
264 
265     readToken();
266     parseFile();
267 
268     // If we found an include guard then all preprocessor directives (other than
269     // the guard) are over-indented by one.
270     if (IncludeGuard == IG_Found)
271       for (auto &Line : Lines)
272         if (Line.InPPDirective && Line.Level > 0)
273           --Line.Level;
274 
275     // Create line with eof token.
276     pushToken(FormatTok);
277     addUnwrappedLine();
278 
279     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
280                                                   E = Lines.end();
281          I != E; ++I) {
282       Callback.consumeUnwrappedLine(*I);
283     }
284     Callback.finishRun();
285     Lines.clear();
286     while (!PPLevelBranchIndex.empty() &&
287            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
288       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
289       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
290     }
291     if (!PPLevelBranchIndex.empty()) {
292       ++PPLevelBranchIndex.back();
293       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
294       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
295     }
296   } while (!PPLevelBranchIndex.empty());
297 }
298 
parseFile()299 void UnwrappedLineParser::parseFile() {
300   // The top-level context in a file always has declarations, except for pre-
301   // processor directives and JavaScript files.
302   bool MustBeDeclaration =
303       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
304   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
305                                           MustBeDeclaration);
306   if (Style.Language == FormatStyle::LK_TextProto)
307     parseBracedList();
308   else
309     parseLevel(/*HasOpeningBrace=*/false);
310   // Make sure to format the remaining tokens.
311   //
312   // LK_TextProto is special since its top-level is parsed as the body of a
313   // braced list, which does not necessarily have natural line separators such
314   // as a semicolon. Comments after the last entry that have been determined to
315   // not belong to that line, as in:
316   //   key: value
317   //   // endfile comment
318   // do not have a chance to be put on a line of their own until this point.
319   // Here we add this newline before end-of-file comments.
320   if (Style.Language == FormatStyle::LK_TextProto &&
321       !CommentsBeforeNextToken.empty())
322     addUnwrappedLine();
323   flushComments(true);
324   addUnwrappedLine();
325 }
326 
parseCSharpGenericTypeConstraint()327 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
328   do {
329     switch (FormatTok->Tok.getKind()) {
330     case tok::l_brace:
331       return;
332     default:
333       if (FormatTok->is(Keywords.kw_where)) {
334         addUnwrappedLine();
335         nextToken();
336         parseCSharpGenericTypeConstraint();
337         break;
338       }
339       nextToken();
340       break;
341     }
342   } while (!eof());
343 }
344 
parseCSharpAttribute()345 void UnwrappedLineParser::parseCSharpAttribute() {
346   int UnpairedSquareBrackets = 1;
347   do {
348     switch (FormatTok->Tok.getKind()) {
349     case tok::r_square:
350       nextToken();
351       --UnpairedSquareBrackets;
352       if (UnpairedSquareBrackets == 0) {
353         addUnwrappedLine();
354         return;
355       }
356       break;
357     case tok::l_square:
358       ++UnpairedSquareBrackets;
359       nextToken();
360       break;
361     default:
362       nextToken();
363       break;
364     }
365   } while (!eof());
366 }
367 
parseLevel(bool HasOpeningBrace)368 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
369   bool SwitchLabelEncountered = false;
370   do {
371     tok::TokenKind kind = FormatTok->Tok.getKind();
372     if (FormatTok->getType() == TT_MacroBlockBegin) {
373       kind = tok::l_brace;
374     } else if (FormatTok->getType() == TT_MacroBlockEnd) {
375       kind = tok::r_brace;
376     }
377 
378     switch (kind) {
379     case tok::comment:
380       nextToken();
381       addUnwrappedLine();
382       break;
383     case tok::l_brace:
384       // FIXME: Add parameter whether this can happen - if this happens, we must
385       // be in a non-declaration context.
386       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
387         continue;
388       parseBlock(/*MustBeDeclaration=*/false);
389       addUnwrappedLine();
390       break;
391     case tok::r_brace:
392       if (HasOpeningBrace)
393         return;
394       nextToken();
395       addUnwrappedLine();
396       break;
397     case tok::kw_default: {
398       unsigned StoredPosition = Tokens->getPosition();
399       FormatToken *Next;
400       do {
401         Next = Tokens->getNextToken();
402       } while (Next && Next->is(tok::comment));
403       FormatTok = Tokens->setPosition(StoredPosition);
404       if (Next && Next->isNot(tok::colon)) {
405         // default not followed by ':' is not a case label; treat it like
406         // an identifier.
407         parseStructuralElement();
408         break;
409       }
410       // Else, if it is 'default:', fall through to the case handling.
411       LLVM_FALLTHROUGH;
412     }
413     case tok::kw_case:
414       if (Style.Language == FormatStyle::LK_JavaScript &&
415           Line->MustBeDeclaration) {
416         // A 'case: string' style field declaration.
417         parseStructuralElement();
418         break;
419       }
420       if (!SwitchLabelEncountered &&
421           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
422         ++Line->Level;
423       SwitchLabelEncountered = true;
424       parseStructuralElement();
425       break;
426     case tok::l_square:
427       if (Style.isCSharp()) {
428         nextToken();
429         parseCSharpAttribute();
430         break;
431       }
432       LLVM_FALLTHROUGH;
433     default:
434       parseStructuralElement();
435       break;
436     }
437   } while (!eof());
438 }
439 
calculateBraceTypes(bool ExpectClassBody)440 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
441   // We'll parse forward through the tokens until we hit
442   // a closing brace or eof - note that getNextToken() will
443   // parse macros, so this will magically work inside macro
444   // definitions, too.
445   unsigned StoredPosition = Tokens->getPosition();
446   FormatToken *Tok = FormatTok;
447   const FormatToken *PrevTok = Tok->Previous;
448   // Keep a stack of positions of lbrace tokens. We will
449   // update information about whether an lbrace starts a
450   // braced init list or a different block during the loop.
451   SmallVector<FormatToken *, 8> LBraceStack;
452   assert(Tok->Tok.is(tok::l_brace));
453   do {
454     // Get next non-comment token.
455     FormatToken *NextTok;
456     unsigned ReadTokens = 0;
457     do {
458       NextTok = Tokens->getNextToken();
459       ++ReadTokens;
460     } while (NextTok->is(tok::comment));
461 
462     switch (Tok->Tok.getKind()) {
463     case tok::l_brace:
464       if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
465         if (PrevTok->isOneOf(tok::colon, tok::less))
466           // A ':' indicates this code is in a type, or a braced list
467           // following a label in an object literal ({a: {b: 1}}).
468           // A '<' could be an object used in a comparison, but that is nonsense
469           // code (can never return true), so more likely it is a generic type
470           // argument (`X<{a: string; b: number}>`).
471           // The code below could be confused by semicolons between the
472           // individual members in a type member list, which would normally
473           // trigger BK_Block. In both cases, this must be parsed as an inline
474           // braced init.
475           Tok->setBlockKind(BK_BracedInit);
476         else if (PrevTok->is(tok::r_paren))
477           // `) { }` can only occur in function or method declarations in JS.
478           Tok->setBlockKind(BK_Block);
479       } else {
480         Tok->setBlockKind(BK_Unknown);
481       }
482       LBraceStack.push_back(Tok);
483       break;
484     case tok::r_brace:
485       if (LBraceStack.empty())
486         break;
487       if (LBraceStack.back()->is(BK_Unknown)) {
488         bool ProbablyBracedList = false;
489         if (Style.Language == FormatStyle::LK_Proto) {
490           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
491         } else {
492           // Using OriginalColumn to distinguish between ObjC methods and
493           // binary operators is a bit hacky.
494           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
495                                   NextTok->OriginalColumn == 0;
496 
497           // If there is a comma, semicolon or right paren after the closing
498           // brace, we assume this is a braced initializer list.  Note that
499           // regardless how we mark inner braces here, we will overwrite the
500           // BlockKind later if we parse a braced list (where all blocks
501           // inside are by default braced lists), or when we explicitly detect
502           // blocks (for example while parsing lambdas).
503           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
504           // braced list in JS.
505           ProbablyBracedList =
506               (Style.Language == FormatStyle::LK_JavaScript &&
507                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
508                                 Keywords.kw_as)) ||
509               (Style.isCpp() && NextTok->is(tok::l_paren)) ||
510               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
511                                tok::r_paren, tok::r_square, tok::l_brace,
512                                tok::ellipsis) ||
513               (NextTok->is(tok::identifier) &&
514                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
515               (NextTok->is(tok::semi) &&
516                (!ExpectClassBody || LBraceStack.size() != 1)) ||
517               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
518           if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
519             // We can have an array subscript after a braced init
520             // list, but C++11 attributes are expected after blocks.
521             NextTok = Tokens->getNextToken();
522             ++ReadTokens;
523             ProbablyBracedList = NextTok->isNot(tok::l_square);
524           }
525         }
526         if (ProbablyBracedList) {
527           Tok->setBlockKind(BK_BracedInit);
528           LBraceStack.back()->setBlockKind(BK_BracedInit);
529         } else {
530           Tok->setBlockKind(BK_Block);
531           LBraceStack.back()->setBlockKind(BK_Block);
532         }
533       }
534       LBraceStack.pop_back();
535       break;
536     case tok::identifier:
537       if (!Tok->is(TT_StatementMacro))
538         break;
539       LLVM_FALLTHROUGH;
540     case tok::at:
541     case tok::semi:
542     case tok::kw_if:
543     case tok::kw_while:
544     case tok::kw_for:
545     case tok::kw_switch:
546     case tok::kw_try:
547     case tok::kw___try:
548       if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown))
549         LBraceStack.back()->setBlockKind(BK_Block);
550       break;
551     default:
552       break;
553     }
554     PrevTok = Tok;
555     Tok = NextTok;
556   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
557 
558   // Assume other blocks for all unclosed opening braces.
559   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
560     if (LBraceStack[i]->is(BK_Unknown))
561       LBraceStack[i]->setBlockKind(BK_Block);
562   }
563 
564   FormatTok = Tokens->setPosition(StoredPosition);
565 }
566 
567 template <class T>
hash_combine(std::size_t & seed,const T & v)568 static inline void hash_combine(std::size_t &seed, const T &v) {
569   std::hash<T> hasher;
570   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
571 }
572 
computePPHash() const573 size_t UnwrappedLineParser::computePPHash() const {
574   size_t h = 0;
575   for (const auto &i : PPStack) {
576     hash_combine(h, size_t(i.Kind));
577     hash_combine(h, i.Line);
578   }
579   return h;
580 }
581 
parseBlock(bool MustBeDeclaration,bool AddLevel,bool MunchSemi)582 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
583                                      bool MunchSemi) {
584   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
585          "'{' or macro block token expected");
586   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
587   FormatTok->setBlockKind(BK_Block);
588 
589   size_t PPStartHash = computePPHash();
590 
591   unsigned InitialLevel = Line->Level;
592   nextToken(/*LevelDifference=*/AddLevel ? 1 : 0);
593 
594   if (MacroBlock && FormatTok->is(tok::l_paren))
595     parseParens();
596 
597   size_t NbPreprocessorDirectives =
598       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
599   addUnwrappedLine();
600   size_t OpeningLineIndex =
601       CurrentLines->empty()
602           ? (UnwrappedLine::kInvalidIndex)
603           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
604 
605   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
606                                           MustBeDeclaration);
607   if (AddLevel)
608     ++Line->Level;
609   parseLevel(/*HasOpeningBrace=*/true);
610 
611   if (eof())
612     return;
613 
614   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
615                  : !FormatTok->is(tok::r_brace)) {
616     Line->Level = InitialLevel;
617     FormatTok->setBlockKind(BK_Block);
618     return;
619   }
620 
621   size_t PPEndHash = computePPHash();
622 
623   // Munch the closing brace.
624   nextToken(/*LevelDifference=*/AddLevel ? -1 : 0);
625 
626   if (MacroBlock && FormatTok->is(tok::l_paren))
627     parseParens();
628 
629   if (FormatTok->is(tok::arrow)) {
630     // Following the } we can find a trailing return type arrow
631     // as part of an implicit conversion constraint.
632     nextToken();
633     parseStructuralElement();
634   }
635 
636   if (MunchSemi && FormatTok->Tok.is(tok::semi))
637     nextToken();
638 
639   Line->Level = InitialLevel;
640 
641   if (PPStartHash == PPEndHash) {
642     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
643     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
644       // Update the opening line to add the forward reference as well
645       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
646           CurrentLines->size() - 1;
647     }
648   }
649 }
650 
isGoogScope(const UnwrappedLine & Line)651 static bool isGoogScope(const UnwrappedLine &Line) {
652   // FIXME: Closure-library specific stuff should not be hard-coded but be
653   // configurable.
654   if (Line.Tokens.size() < 4)
655     return false;
656   auto I = Line.Tokens.begin();
657   if (I->Tok->TokenText != "goog")
658     return false;
659   ++I;
660   if (I->Tok->isNot(tok::period))
661     return false;
662   ++I;
663   if (I->Tok->TokenText != "scope")
664     return false;
665   ++I;
666   return I->Tok->is(tok::l_paren);
667 }
668 
isIIFE(const UnwrappedLine & Line,const AdditionalKeywords & Keywords)669 static bool isIIFE(const UnwrappedLine &Line,
670                    const AdditionalKeywords &Keywords) {
671   // Look for the start of an immediately invoked anonymous function.
672   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
673   // This is commonly done in JavaScript to create a new, anonymous scope.
674   // Example: (function() { ... })()
675   if (Line.Tokens.size() < 3)
676     return false;
677   auto I = Line.Tokens.begin();
678   if (I->Tok->isNot(tok::l_paren))
679     return false;
680   ++I;
681   if (I->Tok->isNot(Keywords.kw_function))
682     return false;
683   ++I;
684   return I->Tok->is(tok::l_paren);
685 }
686 
ShouldBreakBeforeBrace(const FormatStyle & Style,const FormatToken & InitialToken)687 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
688                                    const FormatToken &InitialToken) {
689   if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro))
690     return Style.BraceWrapping.AfterNamespace;
691   if (InitialToken.is(tok::kw_class))
692     return Style.BraceWrapping.AfterClass;
693   if (InitialToken.is(tok::kw_union))
694     return Style.BraceWrapping.AfterUnion;
695   if (InitialToken.is(tok::kw_struct))
696     return Style.BraceWrapping.AfterStruct;
697   return false;
698 }
699 
parseChildBlock()700 void UnwrappedLineParser::parseChildBlock() {
701   FormatTok->setBlockKind(BK_Block);
702   nextToken();
703   {
704     bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript &&
705                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
706     ScopedLineState LineState(*this);
707     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
708                                             /*MustBeDeclaration=*/false);
709     Line->Level += SkipIndent ? 0 : 1;
710     parseLevel(/*HasOpeningBrace=*/true);
711     flushComments(isOnNewLine(*FormatTok));
712     Line->Level -= SkipIndent ? 0 : 1;
713   }
714   nextToken();
715 }
716 
parsePPDirective()717 void UnwrappedLineParser::parsePPDirective() {
718   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
719   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
720 
721   nextToken();
722 
723   if (!FormatTok->Tok.getIdentifierInfo()) {
724     parsePPUnknown();
725     return;
726   }
727 
728   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
729   case tok::pp_define:
730     parsePPDefine();
731     return;
732   case tok::pp_if:
733     parsePPIf(/*IfDef=*/false);
734     break;
735   case tok::pp_ifdef:
736   case tok::pp_ifndef:
737     parsePPIf(/*IfDef=*/true);
738     break;
739   case tok::pp_else:
740     parsePPElse();
741     break;
742   case tok::pp_elif:
743     parsePPElIf();
744     break;
745   case tok::pp_endif:
746     parsePPEndIf();
747     break;
748   default:
749     parsePPUnknown();
750     break;
751   }
752 }
753 
conditionalCompilationCondition(bool Unreachable)754 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
755   size_t Line = CurrentLines->size();
756   if (CurrentLines == &PreprocessorDirectives)
757     Line += Lines.size();
758 
759   if (Unreachable ||
760       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
761     PPStack.push_back({PP_Unreachable, Line});
762   else
763     PPStack.push_back({PP_Conditional, Line});
764 }
765 
conditionalCompilationStart(bool Unreachable)766 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
767   ++PPBranchLevel;
768   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
769   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
770     PPLevelBranchIndex.push_back(0);
771     PPLevelBranchCount.push_back(0);
772   }
773   PPChainBranchIndex.push(0);
774   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
775   conditionalCompilationCondition(Unreachable || Skip);
776 }
777 
conditionalCompilationAlternative()778 void UnwrappedLineParser::conditionalCompilationAlternative() {
779   if (!PPStack.empty())
780     PPStack.pop_back();
781   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
782   if (!PPChainBranchIndex.empty())
783     ++PPChainBranchIndex.top();
784   conditionalCompilationCondition(
785       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
786       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
787 }
788 
conditionalCompilationEnd()789 void UnwrappedLineParser::conditionalCompilationEnd() {
790   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
791   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
792     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
793       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
794     }
795   }
796   // Guard against #endif's without #if.
797   if (PPBranchLevel > -1)
798     --PPBranchLevel;
799   if (!PPChainBranchIndex.empty())
800     PPChainBranchIndex.pop();
801   if (!PPStack.empty())
802     PPStack.pop_back();
803 }
804 
parsePPIf(bool IfDef)805 void UnwrappedLineParser::parsePPIf(bool IfDef) {
806   bool IfNDef = FormatTok->is(tok::pp_ifndef);
807   nextToken();
808   bool Unreachable = false;
809   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
810     Unreachable = true;
811   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
812     Unreachable = true;
813   conditionalCompilationStart(Unreachable);
814   FormatToken *IfCondition = FormatTok;
815   // If there's a #ifndef on the first line, and the only lines before it are
816   // comments, it could be an include guard.
817   bool MaybeIncludeGuard = IfNDef;
818   if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
819     for (auto &Line : Lines) {
820       if (!Line.Tokens.front().Tok->is(tok::comment)) {
821         MaybeIncludeGuard = false;
822         IncludeGuard = IG_Rejected;
823         break;
824       }
825     }
826   --PPBranchLevel;
827   parsePPUnknown();
828   ++PPBranchLevel;
829   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
830     IncludeGuard = IG_IfNdefed;
831     IncludeGuardToken = IfCondition;
832   }
833 }
834 
parsePPElse()835 void UnwrappedLineParser::parsePPElse() {
836   // If a potential include guard has an #else, it's not an include guard.
837   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
838     IncludeGuard = IG_Rejected;
839   conditionalCompilationAlternative();
840   if (PPBranchLevel > -1)
841     --PPBranchLevel;
842   parsePPUnknown();
843   ++PPBranchLevel;
844 }
845 
parsePPElIf()846 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
847 
parsePPEndIf()848 void UnwrappedLineParser::parsePPEndIf() {
849   conditionalCompilationEnd();
850   parsePPUnknown();
851   // If the #endif of a potential include guard is the last thing in the file,
852   // then we found an include guard.
853   unsigned TokenPosition = Tokens->getPosition();
854   FormatToken *PeekNext = AllTokens[TokenPosition];
855   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 &&
856       PeekNext->is(tok::eof) &&
857       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
858     IncludeGuard = IG_Found;
859 }
860 
parsePPDefine()861 void UnwrappedLineParser::parsePPDefine() {
862   nextToken();
863 
864   if (!FormatTok->Tok.getIdentifierInfo()) {
865     IncludeGuard = IG_Rejected;
866     IncludeGuardToken = nullptr;
867     parsePPUnknown();
868     return;
869   }
870 
871   if (IncludeGuard == IG_IfNdefed &&
872       IncludeGuardToken->TokenText == FormatTok->TokenText) {
873     IncludeGuard = IG_Defined;
874     IncludeGuardToken = nullptr;
875     for (auto &Line : Lines) {
876       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
877         IncludeGuard = IG_Rejected;
878         break;
879       }
880     }
881   }
882 
883   nextToken();
884   if (FormatTok->Tok.getKind() == tok::l_paren &&
885       FormatTok->WhitespaceRange.getBegin() ==
886           FormatTok->WhitespaceRange.getEnd()) {
887     parseParens();
888   }
889   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
890     Line->Level += PPBranchLevel + 1;
891   addUnwrappedLine();
892   ++Line->Level;
893 
894   // Errors during a preprocessor directive can only affect the layout of the
895   // preprocessor directive, and thus we ignore them. An alternative approach
896   // would be to use the same approach we use on the file level (no
897   // re-indentation if there was a structural error) within the macro
898   // definition.
899   parseFile();
900 }
901 
parsePPUnknown()902 void UnwrappedLineParser::parsePPUnknown() {
903   do {
904     nextToken();
905   } while (!eof());
906   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
907     Line->Level += PPBranchLevel + 1;
908   addUnwrappedLine();
909 }
910 
911 // Here we exclude certain tokens that are not usually the first token in an
912 // unwrapped line. This is used in attempt to distinguish macro calls without
913 // trailing semicolons from other constructs split to several lines.
tokenCanStartNewLine(const FormatToken & Tok)914 static bool tokenCanStartNewLine(const FormatToken &Tok) {
915   // Semicolon can be a null-statement, l_square can be a start of a macro or
916   // a C++11 attribute, but this doesn't seem to be common.
917   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
918          Tok.isNot(TT_AttributeSquare) &&
919          // Tokens that can only be used as binary operators and a part of
920          // overloaded operator names.
921          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
922          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
923          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
924          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
925          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
926          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
927          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
928          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
929          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
930          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
931          Tok.isNot(tok::lesslessequal) &&
932          // Colon is used in labels, base class lists, initializer lists,
933          // range-based for loops, ternary operator, but should never be the
934          // first token in an unwrapped line.
935          Tok.isNot(tok::colon) &&
936          // 'noexcept' is a trailing annotation.
937          Tok.isNot(tok::kw_noexcept);
938 }
939 
mustBeJSIdent(const AdditionalKeywords & Keywords,const FormatToken * FormatTok)940 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
941                           const FormatToken *FormatTok) {
942   // FIXME: This returns true for C/C++ keywords like 'struct'.
943   return FormatTok->is(tok::identifier) &&
944          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
945           !FormatTok->isOneOf(
946               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
947               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
948               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
949               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
950               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
951               Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws,
952               Keywords.kw_from));
953 }
954 
mustBeJSIdentOrValue(const AdditionalKeywords & Keywords,const FormatToken * FormatTok)955 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
956                                  const FormatToken *FormatTok) {
957   return FormatTok->Tok.isLiteral() ||
958          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
959          mustBeJSIdent(Keywords, FormatTok);
960 }
961 
962 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
963 // when encountered after a value (see mustBeJSIdentOrValue).
isJSDeclOrStmt(const AdditionalKeywords & Keywords,const FormatToken * FormatTok)964 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
965                            const FormatToken *FormatTok) {
966   return FormatTok->isOneOf(
967       tok::kw_return, Keywords.kw_yield,
968       // conditionals
969       tok::kw_if, tok::kw_else,
970       // loops
971       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
972       // switch/case
973       tok::kw_switch, tok::kw_case,
974       // exceptions
975       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
976       // declaration
977       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
978       Keywords.kw_async, Keywords.kw_function,
979       // import/export
980       Keywords.kw_import, tok::kw_export);
981 }
982 
983 // readTokenWithJavaScriptASI reads the next token and terminates the current
984 // line if JavaScript Automatic Semicolon Insertion must
985 // happen between the current token and the next token.
986 //
987 // This method is conservative - it cannot cover all edge cases of JavaScript,
988 // but only aims to correctly handle certain well known cases. It *must not*
989 // return true in speculative cases.
readTokenWithJavaScriptASI()990 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
991   FormatToken *Previous = FormatTok;
992   readToken();
993   FormatToken *Next = FormatTok;
994 
995   bool IsOnSameLine =
996       CommentsBeforeNextToken.empty()
997           ? Next->NewlinesBefore == 0
998           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
999   if (IsOnSameLine)
1000     return;
1001 
1002   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1003   bool PreviousStartsTemplateExpr =
1004       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
1005   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1006     // If the line contains an '@' sign, the previous token might be an
1007     // annotation, which can precede another identifier/value.
1008     bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(),
1009                               [](UnwrappedLineNode &LineNode) {
1010                                 return LineNode.Tok->is(tok::at);
1011                               }) != Line->Tokens.end();
1012     if (HasAt)
1013       return;
1014   }
1015   if (Next->is(tok::exclaim) && PreviousMustBeValue)
1016     return addUnwrappedLine();
1017   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1018   bool NextEndsTemplateExpr =
1019       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1020   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1021       (PreviousMustBeValue ||
1022        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1023                          tok::minusminus)))
1024     return addUnwrappedLine();
1025   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1026       isJSDeclOrStmt(Keywords, Next))
1027     return addUnwrappedLine();
1028 }
1029 
parseStructuralElement()1030 void UnwrappedLineParser::parseStructuralElement() {
1031   assert(!FormatTok->is(tok::l_brace));
1032   if (Style.Language == FormatStyle::LK_TableGen &&
1033       FormatTok->is(tok::pp_include)) {
1034     nextToken();
1035     if (FormatTok->is(tok::string_literal))
1036       nextToken();
1037     addUnwrappedLine();
1038     return;
1039   }
1040   switch (FormatTok->Tok.getKind()) {
1041   case tok::kw_asm:
1042     nextToken();
1043     if (FormatTok->is(tok::l_brace)) {
1044       FormatTok->setType(TT_InlineASMBrace);
1045       nextToken();
1046       while (FormatTok && FormatTok->isNot(tok::eof)) {
1047         if (FormatTok->is(tok::r_brace)) {
1048           FormatTok->setType(TT_InlineASMBrace);
1049           nextToken();
1050           addUnwrappedLine();
1051           break;
1052         }
1053         FormatTok->Finalized = true;
1054         nextToken();
1055       }
1056     }
1057     break;
1058   case tok::kw_namespace:
1059     parseNamespace();
1060     return;
1061   case tok::kw_public:
1062   case tok::kw_protected:
1063   case tok::kw_private:
1064     if (Style.Language == FormatStyle::LK_Java ||
1065         Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp())
1066       nextToken();
1067     else
1068       parseAccessSpecifier();
1069     return;
1070   case tok::kw_if:
1071     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1072       // field/method declaration.
1073       break;
1074     parseIfThenElse();
1075     return;
1076   case tok::kw_for:
1077   case tok::kw_while:
1078     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1079       // field/method declaration.
1080       break;
1081     parseForOrWhileLoop();
1082     return;
1083   case tok::kw_do:
1084     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1085       // field/method declaration.
1086       break;
1087     parseDoWhile();
1088     return;
1089   case tok::kw_switch:
1090     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1091       // 'switch: string' field declaration.
1092       break;
1093     parseSwitch();
1094     return;
1095   case tok::kw_default:
1096     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1097       // 'default: string' field declaration.
1098       break;
1099     nextToken();
1100     if (FormatTok->is(tok::colon)) {
1101       parseLabel();
1102       return;
1103     }
1104     // e.g. "default void f() {}" in a Java interface.
1105     break;
1106   case tok::kw_case:
1107     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1108       // 'case: string' field declaration.
1109       break;
1110     parseCaseLabel();
1111     return;
1112   case tok::kw_try:
1113   case tok::kw___try:
1114     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1115       // field/method declaration.
1116       break;
1117     parseTryCatch();
1118     return;
1119   case tok::kw_extern:
1120     nextToken();
1121     if (FormatTok->Tok.is(tok::string_literal)) {
1122       nextToken();
1123       if (FormatTok->Tok.is(tok::l_brace)) {
1124         if (!Style.IndentExternBlock) {
1125           if (Style.BraceWrapping.AfterExternBlock) {
1126             addUnwrappedLine();
1127           }
1128           parseBlock(/*MustBeDeclaration=*/true,
1129                      /*AddLevel=*/Style.BraceWrapping.AfterExternBlock);
1130         } else {
1131           parseBlock(/*MustBeDeclaration=*/true,
1132                      /*AddLevel=*/Style.IndentExternBlock ==
1133                          FormatStyle::IEBS_Indent);
1134         }
1135         addUnwrappedLine();
1136         return;
1137       }
1138     }
1139     break;
1140   case tok::kw_export:
1141     if (Style.Language == FormatStyle::LK_JavaScript) {
1142       parseJavaScriptEs6ImportExport();
1143       return;
1144     }
1145     if (!Style.isCpp())
1146       break;
1147     // Handle C++ "(inline|export) namespace".
1148     LLVM_FALLTHROUGH;
1149   case tok::kw_inline:
1150     nextToken();
1151     if (FormatTok->Tok.is(tok::kw_namespace)) {
1152       parseNamespace();
1153       return;
1154     }
1155     break;
1156   case tok::identifier:
1157     if (FormatTok->is(TT_ForEachMacro)) {
1158       parseForOrWhileLoop();
1159       return;
1160     }
1161     if (FormatTok->is(TT_MacroBlockBegin)) {
1162       parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
1163                  /*MunchSemi=*/false);
1164       return;
1165     }
1166     if (FormatTok->is(Keywords.kw_import)) {
1167       if (Style.Language == FormatStyle::LK_JavaScript) {
1168         parseJavaScriptEs6ImportExport();
1169         return;
1170       }
1171       if (Style.Language == FormatStyle::LK_Proto) {
1172         nextToken();
1173         if (FormatTok->is(tok::kw_public))
1174           nextToken();
1175         if (!FormatTok->is(tok::string_literal))
1176           return;
1177         nextToken();
1178         if (FormatTok->is(tok::semi))
1179           nextToken();
1180         addUnwrappedLine();
1181         return;
1182       }
1183     }
1184     if (Style.isCpp() &&
1185         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1186                            Keywords.kw_slots, Keywords.kw_qslots)) {
1187       nextToken();
1188       if (FormatTok->is(tok::colon)) {
1189         nextToken();
1190         addUnwrappedLine();
1191         return;
1192       }
1193     }
1194     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1195       parseStatementMacro();
1196       return;
1197     }
1198     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1199       parseNamespace();
1200       return;
1201     }
1202     // In all other cases, parse the declaration.
1203     break;
1204   default:
1205     break;
1206   }
1207   do {
1208     const FormatToken *Previous = FormatTok->Previous;
1209     switch (FormatTok->Tok.getKind()) {
1210     case tok::at:
1211       nextToken();
1212       if (FormatTok->Tok.is(tok::l_brace)) {
1213         nextToken();
1214         parseBracedList();
1215         break;
1216       } else if (Style.Language == FormatStyle::LK_Java &&
1217                  FormatTok->is(Keywords.kw_interface)) {
1218         nextToken();
1219         break;
1220       }
1221       switch (FormatTok->Tok.getObjCKeywordID()) {
1222       case tok::objc_public:
1223       case tok::objc_protected:
1224       case tok::objc_package:
1225       case tok::objc_private:
1226         return parseAccessSpecifier();
1227       case tok::objc_interface:
1228       case tok::objc_implementation:
1229         return parseObjCInterfaceOrImplementation();
1230       case tok::objc_protocol:
1231         if (parseObjCProtocol())
1232           return;
1233         break;
1234       case tok::objc_end:
1235         return; // Handled by the caller.
1236       case tok::objc_optional:
1237       case tok::objc_required:
1238         nextToken();
1239         addUnwrappedLine();
1240         return;
1241       case tok::objc_autoreleasepool:
1242         nextToken();
1243         if (FormatTok->Tok.is(tok::l_brace)) {
1244           if (Style.BraceWrapping.AfterControlStatement ==
1245               FormatStyle::BWACS_Always)
1246             addUnwrappedLine();
1247           parseBlock(/*MustBeDeclaration=*/false);
1248         }
1249         addUnwrappedLine();
1250         return;
1251       case tok::objc_synchronized:
1252         nextToken();
1253         if (FormatTok->Tok.is(tok::l_paren))
1254           // Skip synchronization object
1255           parseParens();
1256         if (FormatTok->Tok.is(tok::l_brace)) {
1257           if (Style.BraceWrapping.AfterControlStatement ==
1258               FormatStyle::BWACS_Always)
1259             addUnwrappedLine();
1260           parseBlock(/*MustBeDeclaration=*/false);
1261         }
1262         addUnwrappedLine();
1263         return;
1264       case tok::objc_try:
1265         // This branch isn't strictly necessary (the kw_try case below would
1266         // do this too after the tok::at is parsed above).  But be explicit.
1267         parseTryCatch();
1268         return;
1269       default:
1270         break;
1271       }
1272       break;
1273     case tok::kw_concept:
1274       parseConcept();
1275       break;
1276     case tok::kw_requires:
1277       parseRequires();
1278       break;
1279     case tok::kw_enum:
1280       // Ignore if this is part of "template <enum ...".
1281       if (Previous && Previous->is(tok::less)) {
1282         nextToken();
1283         break;
1284       }
1285 
1286       // parseEnum falls through and does not yet add an unwrapped line as an
1287       // enum definition can start a structural element.
1288       if (!parseEnum())
1289         break;
1290       // This only applies for C++.
1291       if (!Style.isCpp()) {
1292         addUnwrappedLine();
1293         return;
1294       }
1295       break;
1296     case tok::kw_typedef:
1297       nextToken();
1298       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1299                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1300                              Keywords.kw_CF_CLOSED_ENUM,
1301                              Keywords.kw_NS_CLOSED_ENUM))
1302         parseEnum();
1303       break;
1304     case tok::kw_struct:
1305     case tok::kw_union:
1306     case tok::kw_class:
1307       // parseRecord falls through and does not yet add an unwrapped line as a
1308       // record declaration or definition can start a structural element.
1309       parseRecord();
1310       // This does not apply for Java, JavaScript and C#.
1311       if (Style.Language == FormatStyle::LK_Java ||
1312           Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) {
1313         if (FormatTok->is(tok::semi))
1314           nextToken();
1315         addUnwrappedLine();
1316         return;
1317       }
1318       break;
1319     case tok::period:
1320       nextToken();
1321       // In Java, classes have an implicit static member "class".
1322       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1323           FormatTok->is(tok::kw_class))
1324         nextToken();
1325       if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1326           FormatTok->Tok.getIdentifierInfo())
1327         // JavaScript only has pseudo keywords, all keywords are allowed to
1328         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1329         nextToken();
1330       break;
1331     case tok::semi:
1332       nextToken();
1333       addUnwrappedLine();
1334       return;
1335     case tok::r_brace:
1336       addUnwrappedLine();
1337       return;
1338     case tok::l_paren:
1339       parseParens();
1340       break;
1341     case tok::kw_operator:
1342       nextToken();
1343       if (FormatTok->isBinaryOperator())
1344         nextToken();
1345       break;
1346     case tok::caret:
1347       nextToken();
1348       if (FormatTok->Tok.isAnyIdentifier() ||
1349           FormatTok->isSimpleTypeSpecifier())
1350         nextToken();
1351       if (FormatTok->is(tok::l_paren))
1352         parseParens();
1353       if (FormatTok->is(tok::l_brace))
1354         parseChildBlock();
1355       break;
1356     case tok::l_brace:
1357       if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1358         // A block outside of parentheses must be the last part of a
1359         // structural element.
1360         // FIXME: Figure out cases where this is not true, and add projections
1361         // for them (the one we know is missing are lambdas).
1362         if (Style.BraceWrapping.AfterFunction)
1363           addUnwrappedLine();
1364         FormatTok->setType(TT_FunctionLBrace);
1365         parseBlock(/*MustBeDeclaration=*/false);
1366         addUnwrappedLine();
1367         return;
1368       }
1369       // Otherwise this was a braced init list, and the structural
1370       // element continues.
1371       break;
1372     case tok::kw_try:
1373       if (Style.Language == FormatStyle::LK_JavaScript &&
1374           Line->MustBeDeclaration) {
1375         // field/method declaration.
1376         nextToken();
1377         break;
1378       }
1379       // We arrive here when parsing function-try blocks.
1380       if (Style.BraceWrapping.AfterFunction)
1381         addUnwrappedLine();
1382       parseTryCatch();
1383       return;
1384     case tok::identifier: {
1385       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1386           Line->MustBeDeclaration) {
1387         addUnwrappedLine();
1388         parseCSharpGenericTypeConstraint();
1389         break;
1390       }
1391       if (FormatTok->is(TT_MacroBlockEnd)) {
1392         addUnwrappedLine();
1393         return;
1394       }
1395 
1396       // Function declarations (as opposed to function expressions) are parsed
1397       // on their own unwrapped line by continuing this loop. Function
1398       // expressions (functions that are not on their own line) must not create
1399       // a new unwrapped line, so they are special cased below.
1400       size_t TokenCount = Line->Tokens.size();
1401       if (Style.Language == FormatStyle::LK_JavaScript &&
1402           FormatTok->is(Keywords.kw_function) &&
1403           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1404                                                      Keywords.kw_async)))) {
1405         tryToParseJSFunction();
1406         break;
1407       }
1408       if ((Style.Language == FormatStyle::LK_JavaScript ||
1409            Style.Language == FormatStyle::LK_Java) &&
1410           FormatTok->is(Keywords.kw_interface)) {
1411         if (Style.Language == FormatStyle::LK_JavaScript) {
1412           // In JavaScript/TypeScript, "interface" can be used as a standalone
1413           // identifier, e.g. in `var interface = 1;`. If "interface" is
1414           // followed by another identifier, it is very like to be an actual
1415           // interface declaration.
1416           unsigned StoredPosition = Tokens->getPosition();
1417           FormatToken *Next = Tokens->getNextToken();
1418           FormatTok = Tokens->setPosition(StoredPosition);
1419           if (Next && !mustBeJSIdent(Keywords, Next)) {
1420             nextToken();
1421             break;
1422           }
1423         }
1424         parseRecord();
1425         addUnwrappedLine();
1426         return;
1427       }
1428 
1429       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1430         parseStatementMacro();
1431         return;
1432       }
1433 
1434       // See if the following token should start a new unwrapped line.
1435       StringRef Text = FormatTok->TokenText;
1436       nextToken();
1437 
1438       // JS doesn't have macros, and within classes colons indicate fields, not
1439       // labels.
1440       if (Style.Language == FormatStyle::LK_JavaScript)
1441         break;
1442 
1443       TokenCount = Line->Tokens.size();
1444       if (TokenCount == 1 ||
1445           (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1446         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1447           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1448           parseLabel(!Style.IndentGotoLabels);
1449           return;
1450         }
1451         // Recognize function-like macro usages without trailing semicolon as
1452         // well as free-standing macros like Q_OBJECT.
1453         bool FunctionLike = FormatTok->is(tok::l_paren);
1454         if (FunctionLike)
1455           parseParens();
1456 
1457         bool FollowedByNewline =
1458             CommentsBeforeNextToken.empty()
1459                 ? FormatTok->NewlinesBefore > 0
1460                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1461 
1462         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1463             tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1464           addUnwrappedLine();
1465           return;
1466         }
1467       }
1468       break;
1469     }
1470     case tok::equal:
1471       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1472       // TT_JsFatArrow. The always start an expression or a child block if
1473       // followed by a curly.
1474       if (FormatTok->is(TT_JsFatArrow)) {
1475         nextToken();
1476         if (FormatTok->is(tok::l_brace)) {
1477           // C# may break after => if the next character is a newline.
1478           if (Style.isCSharp() && Style.BraceWrapping.AfterFunction == true) {
1479             // calling `addUnwrappedLine()` here causes odd parsing errors.
1480             FormatTok->MustBreakBefore = true;
1481           }
1482           parseChildBlock();
1483         }
1484         break;
1485       }
1486 
1487       nextToken();
1488       if (FormatTok->Tok.is(tok::l_brace)) {
1489         // Block kind should probably be set to BK_BracedInit for any language.
1490         // C# needs this change to ensure that array initialisers and object
1491         // initialisers are indented the same way.
1492         if (Style.isCSharp())
1493           FormatTok->setBlockKind(BK_BracedInit);
1494         nextToken();
1495         parseBracedList();
1496       } else if (Style.Language == FormatStyle::LK_Proto &&
1497                  FormatTok->Tok.is(tok::less)) {
1498         nextToken();
1499         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1500                         /*ClosingBraceKind=*/tok::greater);
1501       }
1502       break;
1503     case tok::l_square:
1504       parseSquare();
1505       break;
1506     case tok::kw_new:
1507       parseNew();
1508       break;
1509     default:
1510       nextToken();
1511       break;
1512     }
1513   } while (!eof());
1514 }
1515 
tryToParsePropertyAccessor()1516 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
1517   assert(FormatTok->is(tok::l_brace));
1518   if (!Style.isCSharp())
1519     return false;
1520   // See if it's a property accessor.
1521   if (FormatTok->Previous->isNot(tok::identifier))
1522     return false;
1523 
1524   // See if we are inside a property accessor.
1525   //
1526   // Record the current tokenPosition so that we can advance and
1527   // reset the current token. `Next` is not set yet so we need
1528   // another way to advance along the token stream.
1529   unsigned int StoredPosition = Tokens->getPosition();
1530   FormatToken *Tok = Tokens->getNextToken();
1531 
1532   // A trivial property accessor is of the form:
1533   // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set] }
1534   // Track these as they do not require line breaks to be introduced.
1535   bool HasGetOrSet = false;
1536   bool IsTrivialPropertyAccessor = true;
1537   while (!eof()) {
1538     if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
1539                      tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
1540                      Keywords.kw_set)) {
1541       if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_set))
1542         HasGetOrSet = true;
1543       Tok = Tokens->getNextToken();
1544       continue;
1545     }
1546     if (Tok->isNot(tok::r_brace))
1547       IsTrivialPropertyAccessor = false;
1548     break;
1549   }
1550 
1551   if (!HasGetOrSet) {
1552     Tokens->setPosition(StoredPosition);
1553     return false;
1554   }
1555 
1556   // Try to parse the property accessor:
1557   // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
1558   Tokens->setPosition(StoredPosition);
1559   if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction == true)
1560     addUnwrappedLine();
1561   nextToken();
1562   do {
1563     switch (FormatTok->Tok.getKind()) {
1564     case tok::r_brace:
1565       nextToken();
1566       if (FormatTok->is(tok::equal)) {
1567         while (!eof() && FormatTok->isNot(tok::semi))
1568           nextToken();
1569         nextToken();
1570       }
1571       addUnwrappedLine();
1572       return true;
1573     case tok::l_brace:
1574       ++Line->Level;
1575       parseBlock(/*MustBeDeclaration=*/true);
1576       addUnwrappedLine();
1577       --Line->Level;
1578       break;
1579     case tok::equal:
1580       if (FormatTok->is(TT_JsFatArrow)) {
1581         ++Line->Level;
1582         do {
1583           nextToken();
1584         } while (!eof() && FormatTok->isNot(tok::semi));
1585         nextToken();
1586         addUnwrappedLine();
1587         --Line->Level;
1588         break;
1589       }
1590       nextToken();
1591       break;
1592     default:
1593       if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_set) &&
1594           !IsTrivialPropertyAccessor) {
1595         // Non-trivial get/set needs to be on its own line.
1596         addUnwrappedLine();
1597       }
1598       nextToken();
1599     }
1600   } while (!eof());
1601 
1602   // Unreachable for well-formed code (paired '{' and '}').
1603   return true;
1604 }
1605 
tryToParseLambda()1606 bool UnwrappedLineParser::tryToParseLambda() {
1607   if (!Style.isCpp()) {
1608     nextToken();
1609     return false;
1610   }
1611   assert(FormatTok->is(tok::l_square));
1612   FormatToken &LSquare = *FormatTok;
1613   if (!tryToParseLambdaIntroducer())
1614     return false;
1615 
1616   bool SeenArrow = false;
1617 
1618   while (FormatTok->isNot(tok::l_brace)) {
1619     if (FormatTok->isSimpleTypeSpecifier()) {
1620       nextToken();
1621       continue;
1622     }
1623     switch (FormatTok->Tok.getKind()) {
1624     case tok::l_brace:
1625       break;
1626     case tok::l_paren:
1627       parseParens();
1628       break;
1629     case tok::amp:
1630     case tok::star:
1631     case tok::kw_const:
1632     case tok::comma:
1633     case tok::less:
1634     case tok::greater:
1635     case tok::identifier:
1636     case tok::numeric_constant:
1637     case tok::coloncolon:
1638     case tok::kw_class:
1639     case tok::kw_mutable:
1640     case tok::kw_noexcept:
1641     case tok::kw_template:
1642     case tok::kw_typename:
1643       nextToken();
1644       break;
1645     // Specialization of a template with an integer parameter can contain
1646     // arithmetic, logical, comparison and ternary operators.
1647     //
1648     // FIXME: This also accepts sequences of operators that are not in the scope
1649     // of a template argument list.
1650     //
1651     // In a C++ lambda a template type can only occur after an arrow. We use
1652     // this as an heuristic to distinguish between Objective-C expressions
1653     // followed by an `a->b` expression, such as:
1654     // ([obj func:arg] + a->b)
1655     // Otherwise the code below would parse as a lambda.
1656     //
1657     // FIXME: This heuristic is incorrect for C++20 generic lambdas with
1658     // explicit template lists: []<bool b = true && false>(U &&u){}
1659     case tok::plus:
1660     case tok::minus:
1661     case tok::exclaim:
1662     case tok::tilde:
1663     case tok::slash:
1664     case tok::percent:
1665     case tok::lessless:
1666     case tok::pipe:
1667     case tok::pipepipe:
1668     case tok::ampamp:
1669     case tok::caret:
1670     case tok::equalequal:
1671     case tok::exclaimequal:
1672     case tok::greaterequal:
1673     case tok::lessequal:
1674     case tok::question:
1675     case tok::colon:
1676     case tok::ellipsis:
1677     case tok::kw_true:
1678     case tok::kw_false:
1679       if (SeenArrow) {
1680         nextToken();
1681         break;
1682       }
1683       return true;
1684     case tok::arrow:
1685       // This might or might not actually be a lambda arrow (this could be an
1686       // ObjC method invocation followed by a dereferencing arrow). We might
1687       // reset this back to TT_Unknown in TokenAnnotator.
1688       FormatTok->setType(TT_LambdaArrow);
1689       SeenArrow = true;
1690       nextToken();
1691       break;
1692     default:
1693       return true;
1694     }
1695   }
1696   FormatTok->setType(TT_LambdaLBrace);
1697   LSquare.setType(TT_LambdaLSquare);
1698   parseChildBlock();
1699   return true;
1700 }
1701 
tryToParseLambdaIntroducer()1702 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1703   const FormatToken *Previous = FormatTok->Previous;
1704   if (Previous &&
1705       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1706                          tok::kw_delete, tok::l_square) ||
1707        FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1708        Previous->isSimpleTypeSpecifier())) {
1709     nextToken();
1710     return false;
1711   }
1712   nextToken();
1713   if (FormatTok->is(tok::l_square)) {
1714     return false;
1715   }
1716   parseSquare(/*LambdaIntroducer=*/true);
1717   return true;
1718 }
1719 
tryToParseJSFunction()1720 void UnwrappedLineParser::tryToParseJSFunction() {
1721   assert(FormatTok->is(Keywords.kw_function) ||
1722          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1723   if (FormatTok->is(Keywords.kw_async))
1724     nextToken();
1725   // Consume "function".
1726   nextToken();
1727 
1728   // Consume * (generator function). Treat it like C++'s overloaded operators.
1729   if (FormatTok->is(tok::star)) {
1730     FormatTok->setType(TT_OverloadedOperator);
1731     nextToken();
1732   }
1733 
1734   // Consume function name.
1735   if (FormatTok->is(tok::identifier))
1736     nextToken();
1737 
1738   if (FormatTok->isNot(tok::l_paren))
1739     return;
1740 
1741   // Parse formal parameter list.
1742   parseParens();
1743 
1744   if (FormatTok->is(tok::colon)) {
1745     // Parse a type definition.
1746     nextToken();
1747 
1748     // Eat the type declaration. For braced inline object types, balance braces,
1749     // otherwise just parse until finding an l_brace for the function body.
1750     if (FormatTok->is(tok::l_brace))
1751       tryToParseBracedList();
1752     else
1753       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1754         nextToken();
1755   }
1756 
1757   if (FormatTok->is(tok::semi))
1758     return;
1759 
1760   parseChildBlock();
1761 }
1762 
tryToParseBracedList()1763 bool UnwrappedLineParser::tryToParseBracedList() {
1764   if (FormatTok->is(BK_Unknown))
1765     calculateBraceTypes();
1766   assert(FormatTok->isNot(BK_Unknown));
1767   if (FormatTok->is(BK_Block))
1768     return false;
1769   nextToken();
1770   parseBracedList();
1771   return true;
1772 }
1773 
parseBracedList(bool ContinueOnSemicolons,bool IsEnum,tok::TokenKind ClosingBraceKind)1774 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1775                                           bool IsEnum,
1776                                           tok::TokenKind ClosingBraceKind) {
1777   bool HasError = false;
1778 
1779   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1780   // replace this by using parseAssigmentExpression() inside.
1781   do {
1782     if (Style.isCSharp()) {
1783       if (FormatTok->is(TT_JsFatArrow)) {
1784         nextToken();
1785         // Fat arrows can be followed by simple expressions or by child blocks
1786         // in curly braces.
1787         if (FormatTok->is(tok::l_brace)) {
1788           parseChildBlock();
1789           continue;
1790         }
1791       }
1792     }
1793     if (Style.Language == FormatStyle::LK_JavaScript) {
1794       if (FormatTok->is(Keywords.kw_function) ||
1795           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1796         tryToParseJSFunction();
1797         continue;
1798       }
1799       if (FormatTok->is(TT_JsFatArrow)) {
1800         nextToken();
1801         // Fat arrows can be followed by simple expressions or by child blocks
1802         // in curly braces.
1803         if (FormatTok->is(tok::l_brace)) {
1804           parseChildBlock();
1805           continue;
1806         }
1807       }
1808       if (FormatTok->is(tok::l_brace)) {
1809         // Could be a method inside of a braced list `{a() { return 1; }}`.
1810         if (tryToParseBracedList())
1811           continue;
1812         parseChildBlock();
1813       }
1814     }
1815     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1816       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
1817         addUnwrappedLine();
1818       nextToken();
1819       return !HasError;
1820     }
1821     switch (FormatTok->Tok.getKind()) {
1822     case tok::caret:
1823       nextToken();
1824       if (FormatTok->is(tok::l_brace)) {
1825         parseChildBlock();
1826       }
1827       break;
1828     case tok::l_square:
1829       if (Style.isCSharp())
1830         parseSquare();
1831       else
1832         tryToParseLambda();
1833       break;
1834     case tok::l_paren:
1835       parseParens();
1836       // JavaScript can just have free standing methods and getters/setters in
1837       // object literals. Detect them by a "{" following ")".
1838       if (Style.Language == FormatStyle::LK_JavaScript) {
1839         if (FormatTok->is(tok::l_brace))
1840           parseChildBlock();
1841         break;
1842       }
1843       break;
1844     case tok::l_brace:
1845       // Assume there are no blocks inside a braced init list apart
1846       // from the ones we explicitly parse out (like lambdas).
1847       FormatTok->setBlockKind(BK_BracedInit);
1848       nextToken();
1849       parseBracedList();
1850       break;
1851     case tok::less:
1852       if (Style.Language == FormatStyle::LK_Proto) {
1853         nextToken();
1854         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1855                         /*ClosingBraceKind=*/tok::greater);
1856       } else {
1857         nextToken();
1858       }
1859       break;
1860     case tok::semi:
1861       // JavaScript (or more precisely TypeScript) can have semicolons in braced
1862       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1863       // used for error recovery if we have otherwise determined that this is
1864       // a braced list.
1865       if (Style.Language == FormatStyle::LK_JavaScript) {
1866         nextToken();
1867         break;
1868       }
1869       HasError = true;
1870       if (!ContinueOnSemicolons)
1871         return !HasError;
1872       nextToken();
1873       break;
1874     case tok::comma:
1875       nextToken();
1876       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
1877         addUnwrappedLine();
1878       break;
1879     default:
1880       nextToken();
1881       break;
1882     }
1883   } while (!eof());
1884   return false;
1885 }
1886 
parseParens()1887 void UnwrappedLineParser::parseParens() {
1888   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1889   nextToken();
1890   do {
1891     switch (FormatTok->Tok.getKind()) {
1892     case tok::l_paren:
1893       parseParens();
1894       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1895         parseChildBlock();
1896       break;
1897     case tok::r_paren:
1898       nextToken();
1899       return;
1900     case tok::r_brace:
1901       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1902       return;
1903     case tok::l_square:
1904       tryToParseLambda();
1905       break;
1906     case tok::l_brace:
1907       if (!tryToParseBracedList())
1908         parseChildBlock();
1909       break;
1910     case tok::at:
1911       nextToken();
1912       if (FormatTok->Tok.is(tok::l_brace)) {
1913         nextToken();
1914         parseBracedList();
1915       }
1916       break;
1917     case tok::kw_class:
1918       if (Style.Language == FormatStyle::LK_JavaScript)
1919         parseRecord(/*ParseAsExpr=*/true);
1920       else
1921         nextToken();
1922       break;
1923     case tok::identifier:
1924       if (Style.Language == FormatStyle::LK_JavaScript &&
1925           (FormatTok->is(Keywords.kw_function) ||
1926            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1927         tryToParseJSFunction();
1928       else
1929         nextToken();
1930       break;
1931     default:
1932       nextToken();
1933       break;
1934     }
1935   } while (!eof());
1936 }
1937 
parseSquare(bool LambdaIntroducer)1938 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
1939   if (!LambdaIntroducer) {
1940     assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1941     if (tryToParseLambda())
1942       return;
1943   }
1944   do {
1945     switch (FormatTok->Tok.getKind()) {
1946     case tok::l_paren:
1947       parseParens();
1948       break;
1949     case tok::r_square:
1950       nextToken();
1951       return;
1952     case tok::r_brace:
1953       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1954       return;
1955     case tok::l_square:
1956       parseSquare();
1957       break;
1958     case tok::l_brace: {
1959       if (!tryToParseBracedList())
1960         parseChildBlock();
1961       break;
1962     }
1963     case tok::at:
1964       nextToken();
1965       if (FormatTok->Tok.is(tok::l_brace)) {
1966         nextToken();
1967         parseBracedList();
1968       }
1969       break;
1970     default:
1971       nextToken();
1972       break;
1973     }
1974   } while (!eof());
1975 }
1976 
parseIfThenElse()1977 void UnwrappedLineParser::parseIfThenElse() {
1978   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1979   nextToken();
1980   if (FormatTok->Tok.isOneOf(tok::kw_constexpr, tok::identifier))
1981     nextToken();
1982   if (FormatTok->Tok.is(tok::l_paren))
1983     parseParens();
1984   // handle [[likely]] / [[unlikely]]
1985   if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute())
1986     parseSquare();
1987   bool NeedsUnwrappedLine = false;
1988   if (FormatTok->Tok.is(tok::l_brace)) {
1989     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1990     parseBlock(/*MustBeDeclaration=*/false);
1991     if (Style.BraceWrapping.BeforeElse)
1992       addUnwrappedLine();
1993     else
1994       NeedsUnwrappedLine = true;
1995   } else {
1996     addUnwrappedLine();
1997     ++Line->Level;
1998     parseStructuralElement();
1999     --Line->Level;
2000   }
2001   if (FormatTok->Tok.is(tok::kw_else)) {
2002     nextToken();
2003     // handle [[likely]] / [[unlikely]]
2004     if (FormatTok->Tok.is(tok::l_square) && tryToParseSimpleAttribute())
2005       parseSquare();
2006     if (FormatTok->Tok.is(tok::l_brace)) {
2007       CompoundStatementIndenter Indenter(this, Style, Line->Level);
2008       parseBlock(/*MustBeDeclaration=*/false);
2009       addUnwrappedLine();
2010     } else if (FormatTok->Tok.is(tok::kw_if)) {
2011       parseIfThenElse();
2012     } else {
2013       addUnwrappedLine();
2014       ++Line->Level;
2015       parseStructuralElement();
2016       if (FormatTok->is(tok::eof))
2017         addUnwrappedLine();
2018       --Line->Level;
2019     }
2020   } else if (NeedsUnwrappedLine) {
2021     addUnwrappedLine();
2022   }
2023 }
2024 
parseTryCatch()2025 void UnwrappedLineParser::parseTryCatch() {
2026   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2027   nextToken();
2028   bool NeedsUnwrappedLine = false;
2029   if (FormatTok->is(tok::colon)) {
2030     // We are in a function try block, what comes is an initializer list.
2031     nextToken();
2032 
2033     // In case identifiers were removed by clang-tidy, what might follow is
2034     // multiple commas in sequence - before the first identifier.
2035     while (FormatTok->is(tok::comma))
2036       nextToken();
2037 
2038     while (FormatTok->is(tok::identifier)) {
2039       nextToken();
2040       if (FormatTok->is(tok::l_paren))
2041         parseParens();
2042 
2043       // In case identifiers were removed by clang-tidy, what might follow is
2044       // multiple commas in sequence - after the first identifier.
2045       while (FormatTok->is(tok::comma))
2046         nextToken();
2047     }
2048   }
2049   // Parse try with resource.
2050   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
2051     parseParens();
2052   }
2053   if (FormatTok->is(tok::l_brace)) {
2054     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2055     parseBlock(/*MustBeDeclaration=*/false);
2056     if (Style.BraceWrapping.BeforeCatch) {
2057       addUnwrappedLine();
2058     } else {
2059       NeedsUnwrappedLine = true;
2060     }
2061   } else if (!FormatTok->is(tok::kw_catch)) {
2062     // The C++ standard requires a compound-statement after a try.
2063     // If there's none, we try to assume there's a structuralElement
2064     // and try to continue.
2065     addUnwrappedLine();
2066     ++Line->Level;
2067     parseStructuralElement();
2068     --Line->Level;
2069   }
2070   while (1) {
2071     if (FormatTok->is(tok::at))
2072       nextToken();
2073     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2074                              tok::kw___finally) ||
2075           ((Style.Language == FormatStyle::LK_Java ||
2076             Style.Language == FormatStyle::LK_JavaScript) &&
2077            FormatTok->is(Keywords.kw_finally)) ||
2078           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
2079            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
2080       break;
2081     nextToken();
2082     while (FormatTok->isNot(tok::l_brace)) {
2083       if (FormatTok->is(tok::l_paren)) {
2084         parseParens();
2085         continue;
2086       }
2087       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
2088         return;
2089       nextToken();
2090     }
2091     NeedsUnwrappedLine = false;
2092     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2093     parseBlock(/*MustBeDeclaration=*/false);
2094     if (Style.BraceWrapping.BeforeCatch)
2095       addUnwrappedLine();
2096     else
2097       NeedsUnwrappedLine = true;
2098   }
2099   if (NeedsUnwrappedLine)
2100     addUnwrappedLine();
2101 }
2102 
parseNamespace()2103 void UnwrappedLineParser::parseNamespace() {
2104   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2105          "'namespace' expected");
2106 
2107   const FormatToken &InitialToken = *FormatTok;
2108   nextToken();
2109   if (InitialToken.is(TT_NamespaceMacro)) {
2110     parseParens();
2111   } else {
2112     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2113                               tok::l_square)) {
2114       if (FormatTok->is(tok::l_square))
2115         parseSquare();
2116       else
2117         nextToken();
2118     }
2119   }
2120   if (FormatTok->Tok.is(tok::l_brace)) {
2121     if (ShouldBreakBeforeBrace(Style, InitialToken))
2122       addUnwrappedLine();
2123 
2124     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
2125                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
2126                      DeclarationScopeStack.size() > 1);
2127     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
2128     // Munch the semicolon after a namespace. This is more common than one would
2129     // think. Putting the semicolon into its own line is very ugly.
2130     if (FormatTok->Tok.is(tok::semi))
2131       nextToken();
2132     addUnwrappedLine();
2133   }
2134   // FIXME: Add error handling.
2135 }
2136 
parseNew()2137 void UnwrappedLineParser::parseNew() {
2138   assert(FormatTok->is(tok::kw_new) && "'new' expected");
2139   nextToken();
2140 
2141   if (Style.isCSharp()) {
2142     do {
2143       if (FormatTok->is(tok::l_brace))
2144         parseBracedList();
2145 
2146       if (FormatTok->isOneOf(tok::semi, tok::comma))
2147         return;
2148 
2149       nextToken();
2150     } while (!eof());
2151   }
2152 
2153   if (Style.Language != FormatStyle::LK_Java)
2154     return;
2155 
2156   // In Java, we can parse everything up to the parens, which aren't optional.
2157   do {
2158     // There should not be a ;, { or } before the new's open paren.
2159     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
2160       return;
2161 
2162     // Consume the parens.
2163     if (FormatTok->is(tok::l_paren)) {
2164       parseParens();
2165 
2166       // If there is a class body of an anonymous class, consume that as child.
2167       if (FormatTok->is(tok::l_brace))
2168         parseChildBlock();
2169       return;
2170     }
2171     nextToken();
2172   } while (!eof());
2173 }
2174 
parseForOrWhileLoop()2175 void UnwrappedLineParser::parseForOrWhileLoop() {
2176   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
2177          "'for', 'while' or foreach macro expected");
2178   nextToken();
2179   // JS' for await ( ...
2180   if (Style.Language == FormatStyle::LK_JavaScript &&
2181       FormatTok->is(Keywords.kw_await))
2182     nextToken();
2183   if (FormatTok->Tok.is(tok::l_paren))
2184     parseParens();
2185   if (FormatTok->Tok.is(tok::l_brace)) {
2186     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2187     parseBlock(/*MustBeDeclaration=*/false);
2188     addUnwrappedLine();
2189   } else {
2190     addUnwrappedLine();
2191     ++Line->Level;
2192     parseStructuralElement();
2193     --Line->Level;
2194   }
2195 }
2196 
parseDoWhile()2197 void UnwrappedLineParser::parseDoWhile() {
2198   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
2199   nextToken();
2200   if (FormatTok->Tok.is(tok::l_brace)) {
2201     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2202     parseBlock(/*MustBeDeclaration=*/false);
2203     if (Style.BraceWrapping.BeforeWhile)
2204       addUnwrappedLine();
2205   } else {
2206     addUnwrappedLine();
2207     ++Line->Level;
2208     parseStructuralElement();
2209     --Line->Level;
2210   }
2211 
2212   // FIXME: Add error handling.
2213   if (!FormatTok->Tok.is(tok::kw_while)) {
2214     addUnwrappedLine();
2215     return;
2216   }
2217 
2218   nextToken();
2219   parseStructuralElement();
2220 }
2221 
parseLabel(bool LeftAlignLabel)2222 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
2223   nextToken();
2224   unsigned OldLineLevel = Line->Level;
2225   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
2226     --Line->Level;
2227   if (LeftAlignLabel)
2228     Line->Level = 0;
2229   if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
2230       FormatTok->Tok.is(tok::l_brace)) {
2231     CompoundStatementIndenter Indenter(this, Line->Level,
2232                                        Style.BraceWrapping.AfterCaseLabel,
2233                                        Style.BraceWrapping.IndentBraces);
2234     parseBlock(/*MustBeDeclaration=*/false);
2235     if (FormatTok->Tok.is(tok::kw_break)) {
2236       if (Style.BraceWrapping.AfterControlStatement ==
2237           FormatStyle::BWACS_Always) {
2238         addUnwrappedLine();
2239         if (!Style.IndentCaseBlocks &&
2240             Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
2241           Line->Level++;
2242         }
2243       }
2244       parseStructuralElement();
2245     }
2246     addUnwrappedLine();
2247   } else {
2248     if (FormatTok->is(tok::semi))
2249       nextToken();
2250     addUnwrappedLine();
2251   }
2252   Line->Level = OldLineLevel;
2253   if (FormatTok->isNot(tok::l_brace)) {
2254     parseStructuralElement();
2255     addUnwrappedLine();
2256   }
2257 }
2258 
parseCaseLabel()2259 void UnwrappedLineParser::parseCaseLabel() {
2260   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
2261   // FIXME: fix handling of complex expressions here.
2262   do {
2263     nextToken();
2264   } while (!eof() && !FormatTok->Tok.is(tok::colon));
2265   parseLabel();
2266 }
2267 
parseSwitch()2268 void UnwrappedLineParser::parseSwitch() {
2269   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
2270   nextToken();
2271   if (FormatTok->Tok.is(tok::l_paren))
2272     parseParens();
2273   if (FormatTok->Tok.is(tok::l_brace)) {
2274     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2275     parseBlock(/*MustBeDeclaration=*/false);
2276     addUnwrappedLine();
2277   } else {
2278     addUnwrappedLine();
2279     ++Line->Level;
2280     parseStructuralElement();
2281     --Line->Level;
2282   }
2283 }
2284 
parseAccessSpecifier()2285 void UnwrappedLineParser::parseAccessSpecifier() {
2286   nextToken();
2287   // Understand Qt's slots.
2288   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
2289     nextToken();
2290   // Otherwise, we don't know what it is, and we'd better keep the next token.
2291   if (FormatTok->Tok.is(tok::colon))
2292     nextToken();
2293   addUnwrappedLine();
2294 }
2295 
parseConcept()2296 void UnwrappedLineParser::parseConcept() {
2297   assert(FormatTok->Tok.is(tok::kw_concept) && "'concept' expected");
2298   nextToken();
2299   if (!FormatTok->Tok.is(tok::identifier))
2300     return;
2301   nextToken();
2302   if (!FormatTok->Tok.is(tok::equal))
2303     return;
2304   nextToken();
2305   if (FormatTok->Tok.is(tok::kw_requires)) {
2306     nextToken();
2307     parseRequiresExpression(Line->Level);
2308   } else {
2309     parseConstraintExpression(Line->Level);
2310   }
2311 }
2312 
parseRequiresExpression(unsigned int OriginalLevel)2313 void UnwrappedLineParser::parseRequiresExpression(unsigned int OriginalLevel) {
2314   // requires (R range)
2315   if (FormatTok->Tok.is(tok::l_paren)) {
2316     parseParens();
2317     if (Style.IndentRequires && OriginalLevel != Line->Level) {
2318       addUnwrappedLine();
2319       --Line->Level;
2320     }
2321   }
2322 
2323   if (FormatTok->Tok.is(tok::l_brace)) {
2324     if (Style.BraceWrapping.AfterFunction)
2325       addUnwrappedLine();
2326     FormatTok->setType(TT_FunctionLBrace);
2327     parseBlock(/*MustBeDeclaration=*/false);
2328     addUnwrappedLine();
2329   } else {
2330     parseConstraintExpression(OriginalLevel);
2331   }
2332 }
2333 
parseConstraintExpression(unsigned int OriginalLevel)2334 void UnwrappedLineParser::parseConstraintExpression(
2335     unsigned int OriginalLevel) {
2336   // requires Id<T> && Id<T> || Id<T>
2337   while (
2338       FormatTok->isOneOf(tok::identifier, tok::kw_requires, tok::coloncolon)) {
2339     nextToken();
2340     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::less,
2341                               tok::greater, tok::comma, tok::ellipsis)) {
2342       if (FormatTok->Tok.is(tok::less)) {
2343         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2344                         /*ClosingBraceKind=*/tok::greater);
2345         continue;
2346       }
2347       nextToken();
2348     }
2349     if (FormatTok->Tok.is(tok::kw_requires)) {
2350       parseRequiresExpression(OriginalLevel);
2351     }
2352     if (FormatTok->Tok.is(tok::less)) {
2353       parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2354                       /*ClosingBraceKind=*/tok::greater);
2355     }
2356 
2357     if (FormatTok->Tok.is(tok::l_paren)) {
2358       parseParens();
2359     }
2360     if (FormatTok->Tok.is(tok::l_brace)) {
2361       if (Style.BraceWrapping.AfterFunction)
2362         addUnwrappedLine();
2363       FormatTok->setType(TT_FunctionLBrace);
2364       parseBlock(/*MustBeDeclaration=*/false);
2365     }
2366     if (FormatTok->Tok.is(tok::semi)) {
2367       // Eat any trailing semi.
2368       nextToken();
2369       addUnwrappedLine();
2370     }
2371     if (FormatTok->Tok.is(tok::colon)) {
2372       return;
2373     }
2374     if (!FormatTok->Tok.isOneOf(tok::ampamp, tok::pipepipe)) {
2375       if (FormatTok->Previous &&
2376           !FormatTok->Previous->isOneOf(tok::identifier, tok::kw_requires,
2377                                         tok::coloncolon)) {
2378         addUnwrappedLine();
2379       }
2380       if (Style.IndentRequires && OriginalLevel != Line->Level) {
2381         --Line->Level;
2382       }
2383       break;
2384     } else {
2385       FormatTok->setType(TT_ConstraintJunctions);
2386     }
2387 
2388     nextToken();
2389   }
2390 }
2391 
parseRequires()2392 void UnwrappedLineParser::parseRequires() {
2393   assert(FormatTok->Tok.is(tok::kw_requires) && "'requires' expected");
2394 
2395   unsigned OriginalLevel = Line->Level;
2396   if (FormatTok->Previous && FormatTok->Previous->is(tok::greater)) {
2397     addUnwrappedLine();
2398     if (Style.IndentRequires) {
2399       Line->Level++;
2400     }
2401   }
2402   nextToken();
2403 
2404   parseRequiresExpression(OriginalLevel);
2405 }
2406 
parseEnum()2407 bool UnwrappedLineParser::parseEnum() {
2408   // Won't be 'enum' for NS_ENUMs.
2409   if (FormatTok->Tok.is(tok::kw_enum))
2410     nextToken();
2411 
2412   // In TypeScript, "enum" can also be used as property name, e.g. in interface
2413   // declarations. An "enum" keyword followed by a colon would be a syntax
2414   // error and thus assume it is just an identifier.
2415   if (Style.Language == FormatStyle::LK_JavaScript &&
2416       FormatTok->isOneOf(tok::colon, tok::question))
2417     return false;
2418 
2419   // In protobuf, "enum" can be used as a field name.
2420   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
2421     return false;
2422 
2423   // Eat up enum class ...
2424   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
2425     nextToken();
2426 
2427   while (FormatTok->Tok.getIdentifierInfo() ||
2428          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
2429                             tok::greater, tok::comma, tok::question)) {
2430     nextToken();
2431     // We can have macros or attributes in between 'enum' and the enum name.
2432     if (FormatTok->is(tok::l_paren))
2433       parseParens();
2434     if (FormatTok->is(tok::identifier)) {
2435       nextToken();
2436       // If there are two identifiers in a row, this is likely an elaborate
2437       // return type. In Java, this can be "implements", etc.
2438       if (Style.isCpp() && FormatTok->is(tok::identifier))
2439         return false;
2440     }
2441   }
2442 
2443   // Just a declaration or something is wrong.
2444   if (FormatTok->isNot(tok::l_brace))
2445     return true;
2446   FormatTok->setBlockKind(BK_Block);
2447 
2448   if (Style.Language == FormatStyle::LK_Java) {
2449     // Java enums are different.
2450     parseJavaEnumBody();
2451     return true;
2452   }
2453   if (Style.Language == FormatStyle::LK_Proto) {
2454     parseBlock(/*MustBeDeclaration=*/true);
2455     return true;
2456   }
2457 
2458   if (!Style.AllowShortEnumsOnASingleLine)
2459     addUnwrappedLine();
2460   // Parse enum body.
2461   nextToken();
2462   if (!Style.AllowShortEnumsOnASingleLine) {
2463     addUnwrappedLine();
2464     Line->Level += 1;
2465   }
2466   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
2467                                    /*IsEnum=*/true);
2468   if (!Style.AllowShortEnumsOnASingleLine)
2469     Line->Level -= 1;
2470   if (HasError) {
2471     if (FormatTok->is(tok::semi))
2472       nextToken();
2473     addUnwrappedLine();
2474   }
2475   return true;
2476 
2477   // There is no addUnwrappedLine() here so that we fall through to parsing a
2478   // structural element afterwards. Thus, in "enum A {} n, m;",
2479   // "} n, m;" will end up in one unwrapped line.
2480 }
2481 
2482 namespace {
2483 // A class used to set and restore the Token position when peeking
2484 // ahead in the token source.
2485 class ScopedTokenPosition {
2486   unsigned StoredPosition;
2487   FormatTokenSource *Tokens;
2488 
2489 public:
ScopedTokenPosition(FormatTokenSource * Tokens)2490   ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
2491     assert(Tokens && "Tokens expected to not be null");
2492     StoredPosition = Tokens->getPosition();
2493   }
2494 
~ScopedTokenPosition()2495   ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
2496 };
2497 } // namespace
2498 
2499 // Look to see if we have [[ by looking ahead, if
2500 // its not then rewind to the original position.
tryToParseSimpleAttribute()2501 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
2502   ScopedTokenPosition AutoPosition(Tokens);
2503   FormatToken *Tok = Tokens->getNextToken();
2504   // We already read the first [ check for the second.
2505   if (Tok && !Tok->is(tok::l_square)) {
2506     return false;
2507   }
2508   // Double check that the attribute is just something
2509   // fairly simple.
2510   while (Tok) {
2511     if (Tok->is(tok::r_square)) {
2512       break;
2513     }
2514     Tok = Tokens->getNextToken();
2515   }
2516   Tok = Tokens->getNextToken();
2517   if (Tok && !Tok->is(tok::r_square)) {
2518     return false;
2519   }
2520   Tok = Tokens->getNextToken();
2521   if (Tok && Tok->is(tok::semi)) {
2522     return false;
2523   }
2524   return true;
2525 }
2526 
parseJavaEnumBody()2527 void UnwrappedLineParser::parseJavaEnumBody() {
2528   // Determine whether the enum is simple, i.e. does not have a semicolon or
2529   // constants with class bodies. Simple enums can be formatted like braced
2530   // lists, contracted to a single line, etc.
2531   unsigned StoredPosition = Tokens->getPosition();
2532   bool IsSimple = true;
2533   FormatToken *Tok = Tokens->getNextToken();
2534   while (Tok) {
2535     if (Tok->is(tok::r_brace))
2536       break;
2537     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
2538       IsSimple = false;
2539       break;
2540     }
2541     // FIXME: This will also mark enums with braces in the arguments to enum
2542     // constants as "not simple". This is probably fine in practice, though.
2543     Tok = Tokens->getNextToken();
2544   }
2545   FormatTok = Tokens->setPosition(StoredPosition);
2546 
2547   if (IsSimple) {
2548     nextToken();
2549     parseBracedList();
2550     addUnwrappedLine();
2551     return;
2552   }
2553 
2554   // Parse the body of a more complex enum.
2555   // First add a line for everything up to the "{".
2556   nextToken();
2557   addUnwrappedLine();
2558   ++Line->Level;
2559 
2560   // Parse the enum constants.
2561   while (FormatTok) {
2562     if (FormatTok->is(tok::l_brace)) {
2563       // Parse the constant's class body.
2564       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2565                  /*MunchSemi=*/false);
2566     } else if (FormatTok->is(tok::l_paren)) {
2567       parseParens();
2568     } else if (FormatTok->is(tok::comma)) {
2569       nextToken();
2570       addUnwrappedLine();
2571     } else if (FormatTok->is(tok::semi)) {
2572       nextToken();
2573       addUnwrappedLine();
2574       break;
2575     } else if (FormatTok->is(tok::r_brace)) {
2576       addUnwrappedLine();
2577       break;
2578     } else {
2579       nextToken();
2580     }
2581   }
2582 
2583   // Parse the class body after the enum's ";" if any.
2584   parseLevel(/*HasOpeningBrace=*/true);
2585   nextToken();
2586   --Line->Level;
2587   addUnwrappedLine();
2588 }
2589 
parseRecord(bool ParseAsExpr)2590 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
2591   const FormatToken &InitialToken = *FormatTok;
2592   nextToken();
2593 
2594   // The actual identifier can be a nested name specifier, and in macros
2595   // it is often token-pasted.
2596   // An [[attribute]] can be before the identifier.
2597   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
2598                             tok::kw___attribute, tok::kw___declspec,
2599                             tok::kw_alignas, tok::l_square, tok::r_square) ||
2600          ((Style.Language == FormatStyle::LK_Java ||
2601            Style.Language == FormatStyle::LK_JavaScript) &&
2602           FormatTok->isOneOf(tok::period, tok::comma))) {
2603     if (Style.Language == FormatStyle::LK_JavaScript &&
2604         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
2605       // JavaScript/TypeScript supports inline object types in
2606       // extends/implements positions:
2607       //     class Foo implements {bar: number} { }
2608       nextToken();
2609       if (FormatTok->is(tok::l_brace)) {
2610         tryToParseBracedList();
2611         continue;
2612       }
2613     }
2614     bool IsNonMacroIdentifier =
2615         FormatTok->is(tok::identifier) &&
2616         FormatTok->TokenText != FormatTok->TokenText.upper();
2617     nextToken();
2618     // We can have macros or attributes in between 'class' and the class name.
2619     if (!IsNonMacroIdentifier) {
2620       if (FormatTok->Tok.is(tok::l_paren)) {
2621         parseParens();
2622       } else if (FormatTok->is(TT_AttributeSquare)) {
2623         parseSquare();
2624         // Consume the closing TT_AttributeSquare.
2625         if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
2626           nextToken();
2627       }
2628     }
2629   }
2630 
2631   // Note that parsing away template declarations here leads to incorrectly
2632   // accepting function declarations as record declarations.
2633   // In general, we cannot solve this problem. Consider:
2634   // class A<int> B() {}
2635   // which can be a function definition or a class definition when B() is a
2636   // macro. If we find enough real-world cases where this is a problem, we
2637   // can parse for the 'template' keyword in the beginning of the statement,
2638   // and thus rule out the record production in case there is no template
2639   // (this would still leave us with an ambiguity between template function
2640   // and class declarations).
2641   if (FormatTok->isOneOf(tok::colon, tok::less)) {
2642     while (!eof()) {
2643       if (FormatTok->is(tok::l_brace)) {
2644         calculateBraceTypes(/*ExpectClassBody=*/true);
2645         if (!tryToParseBracedList())
2646           break;
2647       }
2648       if (FormatTok->Tok.is(tok::semi))
2649         return;
2650       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
2651         addUnwrappedLine();
2652         nextToken();
2653         parseCSharpGenericTypeConstraint();
2654         break;
2655       }
2656       nextToken();
2657     }
2658   }
2659   if (FormatTok->Tok.is(tok::l_brace)) {
2660     if (ParseAsExpr) {
2661       parseChildBlock();
2662     } else {
2663       if (ShouldBreakBeforeBrace(Style, InitialToken))
2664         addUnwrappedLine();
2665 
2666       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2667                  /*MunchSemi=*/false);
2668     }
2669   }
2670   // There is no addUnwrappedLine() here so that we fall through to parsing a
2671   // structural element afterwards. Thus, in "class A {} n, m;",
2672   // "} n, m;" will end up in one unwrapped line.
2673 }
2674 
parseObjCMethod()2675 void UnwrappedLineParser::parseObjCMethod() {
2676   assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
2677          "'(' or identifier expected.");
2678   do {
2679     if (FormatTok->Tok.is(tok::semi)) {
2680       nextToken();
2681       addUnwrappedLine();
2682       return;
2683     } else if (FormatTok->Tok.is(tok::l_brace)) {
2684       if (Style.BraceWrapping.AfterFunction)
2685         addUnwrappedLine();
2686       parseBlock(/*MustBeDeclaration=*/false);
2687       addUnwrappedLine();
2688       return;
2689     } else {
2690       nextToken();
2691     }
2692   } while (!eof());
2693 }
2694 
parseObjCProtocolList()2695 void UnwrappedLineParser::parseObjCProtocolList() {
2696   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
2697   do {
2698     nextToken();
2699     // Early exit in case someone forgot a close angle.
2700     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2701         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2702       return;
2703   } while (!eof() && FormatTok->Tok.isNot(tok::greater));
2704   nextToken(); // Skip '>'.
2705 }
2706 
parseObjCUntilAtEnd()2707 void UnwrappedLineParser::parseObjCUntilAtEnd() {
2708   do {
2709     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2710       nextToken();
2711       addUnwrappedLine();
2712       break;
2713     }
2714     if (FormatTok->is(tok::l_brace)) {
2715       parseBlock(/*MustBeDeclaration=*/false);
2716       // In ObjC interfaces, nothing should be following the "}".
2717       addUnwrappedLine();
2718     } else if (FormatTok->is(tok::r_brace)) {
2719       // Ignore stray "}". parseStructuralElement doesn't consume them.
2720       nextToken();
2721       addUnwrappedLine();
2722     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
2723       nextToken();
2724       parseObjCMethod();
2725     } else {
2726       parseStructuralElement();
2727     }
2728   } while (!eof());
2729 }
2730 
parseObjCInterfaceOrImplementation()2731 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2732   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
2733          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
2734   nextToken();
2735   nextToken(); // interface name
2736 
2737   // @interface can be followed by a lightweight generic
2738   // specialization list, then either a base class or a category.
2739   if (FormatTok->Tok.is(tok::less)) {
2740     parseObjCLightweightGenerics();
2741   }
2742   if (FormatTok->Tok.is(tok::colon)) {
2743     nextToken();
2744     nextToken(); // base class name
2745     // The base class can also have lightweight generics applied to it.
2746     if (FormatTok->Tok.is(tok::less)) {
2747       parseObjCLightweightGenerics();
2748     }
2749   } else if (FormatTok->Tok.is(tok::l_paren))
2750     // Skip category, if present.
2751     parseParens();
2752 
2753   if (FormatTok->Tok.is(tok::less))
2754     parseObjCProtocolList();
2755 
2756   if (FormatTok->Tok.is(tok::l_brace)) {
2757     if (Style.BraceWrapping.AfterObjCDeclaration)
2758       addUnwrappedLine();
2759     parseBlock(/*MustBeDeclaration=*/true);
2760   }
2761 
2762   // With instance variables, this puts '}' on its own line.  Without instance
2763   // variables, this ends the @interface line.
2764   addUnwrappedLine();
2765 
2766   parseObjCUntilAtEnd();
2767 }
2768 
parseObjCLightweightGenerics()2769 void UnwrappedLineParser::parseObjCLightweightGenerics() {
2770   assert(FormatTok->Tok.is(tok::less));
2771   // Unlike protocol lists, generic parameterizations support
2772   // nested angles:
2773   //
2774   // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
2775   //     NSObject <NSCopying, NSSecureCoding>
2776   //
2777   // so we need to count how many open angles we have left.
2778   unsigned NumOpenAngles = 1;
2779   do {
2780     nextToken();
2781     // Early exit in case someone forgot a close angle.
2782     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2783         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2784       break;
2785     if (FormatTok->Tok.is(tok::less))
2786       ++NumOpenAngles;
2787     else if (FormatTok->Tok.is(tok::greater)) {
2788       assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
2789       --NumOpenAngles;
2790     }
2791   } while (!eof() && NumOpenAngles != 0);
2792   nextToken(); // Skip '>'.
2793 }
2794 
2795 // Returns true for the declaration/definition form of @protocol,
2796 // false for the expression form.
parseObjCProtocol()2797 bool UnwrappedLineParser::parseObjCProtocol() {
2798   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
2799   nextToken();
2800 
2801   if (FormatTok->is(tok::l_paren))
2802     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
2803     return false;
2804 
2805   // The definition/declaration form,
2806   // @protocol Foo
2807   // - (int)someMethod;
2808   // @end
2809 
2810   nextToken(); // protocol name
2811 
2812   if (FormatTok->Tok.is(tok::less))
2813     parseObjCProtocolList();
2814 
2815   // Check for protocol declaration.
2816   if (FormatTok->Tok.is(tok::semi)) {
2817     nextToken();
2818     addUnwrappedLine();
2819     return true;
2820   }
2821 
2822   addUnwrappedLine();
2823   parseObjCUntilAtEnd();
2824   return true;
2825 }
2826 
parseJavaScriptEs6ImportExport()2827 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
2828   bool IsImport = FormatTok->is(Keywords.kw_import);
2829   assert(IsImport || FormatTok->is(tok::kw_export));
2830   nextToken();
2831 
2832   // Consume the "default" in "export default class/function".
2833   if (FormatTok->is(tok::kw_default))
2834     nextToken();
2835 
2836   // Consume "async function", "function" and "default function", so that these
2837   // get parsed as free-standing JS functions, i.e. do not require a trailing
2838   // semicolon.
2839   if (FormatTok->is(Keywords.kw_async))
2840     nextToken();
2841   if (FormatTok->is(Keywords.kw_function)) {
2842     nextToken();
2843     return;
2844   }
2845 
2846   // For imports, `export *`, `export {...}`, consume the rest of the line up
2847   // to the terminating `;`. For everything else, just return and continue
2848   // parsing the structural element, i.e. the declaration or expression for
2849   // `export default`.
2850   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
2851       !FormatTok->isStringLiteral())
2852     return;
2853 
2854   while (!eof()) {
2855     if (FormatTok->is(tok::semi))
2856       return;
2857     if (Line->Tokens.empty()) {
2858       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2859       // import statement should terminate.
2860       return;
2861     }
2862     if (FormatTok->is(tok::l_brace)) {
2863       FormatTok->setBlockKind(BK_Block);
2864       nextToken();
2865       parseBracedList();
2866     } else {
2867       nextToken();
2868     }
2869   }
2870 }
2871 
parseStatementMacro()2872 void UnwrappedLineParser::parseStatementMacro() {
2873   nextToken();
2874   if (FormatTok->is(tok::l_paren))
2875     parseParens();
2876   if (FormatTok->is(tok::semi))
2877     nextToken();
2878   addUnwrappedLine();
2879 }
2880 
printDebugInfo(const UnwrappedLine & Line,StringRef Prefix="")2881 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2882                                                  StringRef Prefix = "") {
2883   llvm::dbgs() << Prefix << "Line(" << Line.Level
2884                << ", FSC=" << Line.FirstStartColumn << ")"
2885                << (Line.InPPDirective ? " MACRO" : "") << ": ";
2886   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2887                                                     E = Line.Tokens.end();
2888        I != E; ++I) {
2889     llvm::dbgs() << I->Tok->Tok.getName() << "["
2890                  << "T=" << (unsigned)I->Tok->getType()
2891                  << ", OC=" << I->Tok->OriginalColumn << "] ";
2892   }
2893   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2894                                                     E = Line.Tokens.end();
2895        I != E; ++I) {
2896     const UnwrappedLineNode &Node = *I;
2897     for (SmallVectorImpl<UnwrappedLine>::const_iterator
2898              I = Node.Children.begin(),
2899              E = Node.Children.end();
2900          I != E; ++I) {
2901       printDebugInfo(*I, "\nChild: ");
2902     }
2903   }
2904   llvm::dbgs() << "\n";
2905 }
2906 
addUnwrappedLine()2907 void UnwrappedLineParser::addUnwrappedLine() {
2908   if (Line->Tokens.empty())
2909     return;
2910   LLVM_DEBUG({
2911     if (CurrentLines == &Lines)
2912       printDebugInfo(*Line);
2913   });
2914   CurrentLines->push_back(std::move(*Line));
2915   Line->Tokens.clear();
2916   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
2917   Line->FirstStartColumn = 0;
2918   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2919     CurrentLines->append(
2920         std::make_move_iterator(PreprocessorDirectives.begin()),
2921         std::make_move_iterator(PreprocessorDirectives.end()));
2922     PreprocessorDirectives.clear();
2923   }
2924   // Disconnect the current token from the last token on the previous line.
2925   FormatTok->Previous = nullptr;
2926 }
2927 
eof() const2928 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2929 
isOnNewLine(const FormatToken & FormatTok)2930 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2931   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2932          FormatTok.NewlinesBefore > 0;
2933 }
2934 
2935 // Checks if \p FormatTok is a line comment that continues the line comment
2936 // section on \p Line.
2937 static bool
continuesLineCommentSection(const FormatToken & FormatTok,const UnwrappedLine & Line,const llvm::Regex & CommentPragmasRegex)2938 continuesLineCommentSection(const FormatToken &FormatTok,
2939                             const UnwrappedLine &Line,
2940                             const llvm::Regex &CommentPragmasRegex) {
2941   if (Line.Tokens.empty())
2942     return false;
2943 
2944   StringRef IndentContent = FormatTok.TokenText;
2945   if (FormatTok.TokenText.startswith("//") ||
2946       FormatTok.TokenText.startswith("/*"))
2947     IndentContent = FormatTok.TokenText.substr(2);
2948   if (CommentPragmasRegex.match(IndentContent))
2949     return false;
2950 
2951   // If Line starts with a line comment, then FormatTok continues the comment
2952   // section if its original column is greater or equal to the original start
2953   // column of the line.
2954   //
2955   // Define the min column token of a line as follows: if a line ends in '{' or
2956   // contains a '{' followed by a line comment, then the min column token is
2957   // that '{'. Otherwise, the min column token of the line is the first token of
2958   // the line.
2959   //
2960   // If Line starts with a token other than a line comment, then FormatTok
2961   // continues the comment section if its original column is greater than the
2962   // original start column of the min column token of the line.
2963   //
2964   // For example, the second line comment continues the first in these cases:
2965   //
2966   // // first line
2967   // // second line
2968   //
2969   // and:
2970   //
2971   // // first line
2972   //  // second line
2973   //
2974   // and:
2975   //
2976   // int i; // first line
2977   //  // second line
2978   //
2979   // and:
2980   //
2981   // do { // first line
2982   //      // second line
2983   //   int i;
2984   // } while (true);
2985   //
2986   // and:
2987   //
2988   // enum {
2989   //   a, // first line
2990   //    // second line
2991   //   b
2992   // };
2993   //
2994   // The second line comment doesn't continue the first in these cases:
2995   //
2996   //   // first line
2997   //  // second line
2998   //
2999   // and:
3000   //
3001   // int i; // first line
3002   // // second line
3003   //
3004   // and:
3005   //
3006   // do { // first line
3007   //   // second line
3008   //   int i;
3009   // } while (true);
3010   //
3011   // and:
3012   //
3013   // enum {
3014   //   a, // first line
3015   //   // second line
3016   // };
3017   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
3018 
3019   // Scan for '{//'. If found, use the column of '{' as a min column for line
3020   // comment section continuation.
3021   const FormatToken *PreviousToken = nullptr;
3022   for (const UnwrappedLineNode &Node : Line.Tokens) {
3023     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
3024         isLineComment(*Node.Tok)) {
3025       MinColumnToken = PreviousToken;
3026       break;
3027     }
3028     PreviousToken = Node.Tok;
3029 
3030     // Grab the last newline preceding a token in this unwrapped line.
3031     if (Node.Tok->NewlinesBefore > 0) {
3032       MinColumnToken = Node.Tok;
3033     }
3034   }
3035   if (PreviousToken && PreviousToken->is(tok::l_brace)) {
3036     MinColumnToken = PreviousToken;
3037   }
3038 
3039   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
3040                               MinColumnToken);
3041 }
3042 
flushComments(bool NewlineBeforeNext)3043 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
3044   bool JustComments = Line->Tokens.empty();
3045   for (SmallVectorImpl<FormatToken *>::const_iterator
3046            I = CommentsBeforeNextToken.begin(),
3047            E = CommentsBeforeNextToken.end();
3048        I != E; ++I) {
3049     // Line comments that belong to the same line comment section are put on the
3050     // same line since later we might want to reflow content between them.
3051     // Additional fine-grained breaking of line comment sections is controlled
3052     // by the class BreakableLineCommentSection in case it is desirable to keep
3053     // several line comment sections in the same unwrapped line.
3054     //
3055     // FIXME: Consider putting separate line comment sections as children to the
3056     // unwrapped line instead.
3057     (*I)->ContinuesLineCommentSection =
3058         continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
3059     if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
3060       addUnwrappedLine();
3061     pushToken(*I);
3062   }
3063   if (NewlineBeforeNext && JustComments)
3064     addUnwrappedLine();
3065   CommentsBeforeNextToken.clear();
3066 }
3067 
nextToken(int LevelDifference)3068 void UnwrappedLineParser::nextToken(int LevelDifference) {
3069   if (eof())
3070     return;
3071   flushComments(isOnNewLine(*FormatTok));
3072   pushToken(FormatTok);
3073   FormatToken *Previous = FormatTok;
3074   if (Style.Language != FormatStyle::LK_JavaScript)
3075     readToken(LevelDifference);
3076   else
3077     readTokenWithJavaScriptASI();
3078   FormatTok->Previous = Previous;
3079 }
3080 
distributeComments(const SmallVectorImpl<FormatToken * > & Comments,const FormatToken * NextTok)3081 void UnwrappedLineParser::distributeComments(
3082     const SmallVectorImpl<FormatToken *> &Comments,
3083     const FormatToken *NextTok) {
3084   // Whether or not a line comment token continues a line is controlled by
3085   // the method continuesLineCommentSection, with the following caveat:
3086   //
3087   // Define a trail of Comments to be a nonempty proper postfix of Comments such
3088   // that each comment line from the trail is aligned with the next token, if
3089   // the next token exists. If a trail exists, the beginning of the maximal
3090   // trail is marked as a start of a new comment section.
3091   //
3092   // For example in this code:
3093   //
3094   // int a; // line about a
3095   //   // line 1 about b
3096   //   // line 2 about b
3097   //   int b;
3098   //
3099   // the two lines about b form a maximal trail, so there are two sections, the
3100   // first one consisting of the single comment "// line about a" and the
3101   // second one consisting of the next two comments.
3102   if (Comments.empty())
3103     return;
3104   bool ShouldPushCommentsInCurrentLine = true;
3105   bool HasTrailAlignedWithNextToken = false;
3106   unsigned StartOfTrailAlignedWithNextToken = 0;
3107   if (NextTok) {
3108     // We are skipping the first element intentionally.
3109     for (unsigned i = Comments.size() - 1; i > 0; --i) {
3110       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
3111         HasTrailAlignedWithNextToken = true;
3112         StartOfTrailAlignedWithNextToken = i;
3113       }
3114     }
3115   }
3116   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
3117     FormatToken *FormatTok = Comments[i];
3118     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
3119       FormatTok->ContinuesLineCommentSection = false;
3120     } else {
3121       FormatTok->ContinuesLineCommentSection =
3122           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
3123     }
3124     if (!FormatTok->ContinuesLineCommentSection &&
3125         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
3126       ShouldPushCommentsInCurrentLine = false;
3127     }
3128     if (ShouldPushCommentsInCurrentLine) {
3129       pushToken(FormatTok);
3130     } else {
3131       CommentsBeforeNextToken.push_back(FormatTok);
3132     }
3133   }
3134 }
3135 
readToken(int LevelDifference)3136 void UnwrappedLineParser::readToken(int LevelDifference) {
3137   SmallVector<FormatToken *, 1> Comments;
3138   do {
3139     FormatTok = Tokens->getNextToken();
3140     assert(FormatTok);
3141     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
3142            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
3143       distributeComments(Comments, FormatTok);
3144       Comments.clear();
3145       // If there is an unfinished unwrapped line, we flush the preprocessor
3146       // directives only after that unwrapped line was finished later.
3147       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
3148       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
3149       assert((LevelDifference >= 0 ||
3150               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
3151              "LevelDifference makes Line->Level negative");
3152       Line->Level += LevelDifference;
3153       // Comments stored before the preprocessor directive need to be output
3154       // before the preprocessor directive, at the same level as the
3155       // preprocessor directive, as we consider them to apply to the directive.
3156       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
3157           PPBranchLevel > 0)
3158         Line->Level += PPBranchLevel;
3159       flushComments(isOnNewLine(*FormatTok));
3160       parsePPDirective();
3161     }
3162     while (FormatTok->getType() == TT_ConflictStart ||
3163            FormatTok->getType() == TT_ConflictEnd ||
3164            FormatTok->getType() == TT_ConflictAlternative) {
3165       if (FormatTok->getType() == TT_ConflictStart) {
3166         conditionalCompilationStart(/*Unreachable=*/false);
3167       } else if (FormatTok->getType() == TT_ConflictAlternative) {
3168         conditionalCompilationAlternative();
3169       } else if (FormatTok->getType() == TT_ConflictEnd) {
3170         conditionalCompilationEnd();
3171       }
3172       FormatTok = Tokens->getNextToken();
3173       FormatTok->MustBreakBefore = true;
3174     }
3175 
3176     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
3177         !Line->InPPDirective) {
3178       continue;
3179     }
3180 
3181     if (!FormatTok->Tok.is(tok::comment)) {
3182       distributeComments(Comments, FormatTok);
3183       Comments.clear();
3184       return;
3185     }
3186 
3187     Comments.push_back(FormatTok);
3188   } while (!eof());
3189 
3190   distributeComments(Comments, nullptr);
3191   Comments.clear();
3192 }
3193 
pushToken(FormatToken * Tok)3194 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
3195   Line->Tokens.push_back(UnwrappedLineNode(Tok));
3196   if (MustBreakBeforeNextToken) {
3197     Line->Tokens.back().Tok->MustBreakBefore = true;
3198     MustBreakBeforeNextToken = false;
3199   }
3200 }
3201 
3202 } // end namespace format
3203 } // end namespace clang
3204