1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20
21 #include <algorithm>
22
23 #define DEBUG_TYPE "format-parser"
24
25 namespace clang {
26 namespace format {
27
28 class FormatTokenSource {
29 public:
~FormatTokenSource()30 virtual ~FormatTokenSource() {}
31 virtual FormatToken *getNextToken() = 0;
32
33 virtual unsigned getPosition() = 0;
34 virtual FormatToken *setPosition(unsigned Position) = 0;
35 };
36
37 namespace {
38
39 class ScopedDeclarationState {
40 public:
ScopedDeclarationState(UnwrappedLine & Line,std::vector<bool> & Stack,bool MustBeDeclaration)41 ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
42 bool MustBeDeclaration)
43 : Line(Line), Stack(Stack) {
44 Line.MustBeDeclaration = MustBeDeclaration;
45 Stack.push_back(MustBeDeclaration);
46 }
~ScopedDeclarationState()47 ~ScopedDeclarationState() {
48 Stack.pop_back();
49 if (!Stack.empty())
50 Line.MustBeDeclaration = Stack.back();
51 else
52 Line.MustBeDeclaration = true;
53 }
54
55 private:
56 UnwrappedLine &Line;
57 std::vector<bool> &Stack;
58 };
59
isLineComment(const FormatToken & FormatTok)60 static bool isLineComment(const FormatToken &FormatTok) {
61 return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
62 }
63
64 // Checks if \p FormatTok is a line comment that continues the line comment
65 // \p Previous. The original column of \p MinColumnToken is used to determine
66 // whether \p FormatTok is indented enough to the right to continue \p Previous.
continuesLineComment(const FormatToken & FormatTok,const FormatToken * Previous,const FormatToken * MinColumnToken)67 static bool continuesLineComment(const FormatToken &FormatTok,
68 const FormatToken *Previous,
69 const FormatToken *MinColumnToken) {
70 if (!Previous || !MinColumnToken)
71 return false;
72 unsigned MinContinueColumn =
73 MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
74 return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
75 isLineComment(*Previous) &&
76 FormatTok.OriginalColumn >= MinContinueColumn;
77 }
78
79 class ScopedMacroState : public FormatTokenSource {
80 public:
ScopedMacroState(UnwrappedLine & Line,FormatTokenSource * & TokenSource,FormatToken * & ResetToken)81 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
82 FormatToken *&ResetToken)
83 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
84 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
85 Token(nullptr), PreviousToken(nullptr) {
86 FakeEOF.Tok.startToken();
87 FakeEOF.Tok.setKind(tok::eof);
88 TokenSource = this;
89 Line.Level = 0;
90 Line.InPPDirective = true;
91 }
92
~ScopedMacroState()93 ~ScopedMacroState() override {
94 TokenSource = PreviousTokenSource;
95 ResetToken = Token;
96 Line.InPPDirective = false;
97 Line.Level = PreviousLineLevel;
98 }
99
getNextToken()100 FormatToken *getNextToken() override {
101 // The \c UnwrappedLineParser guards against this by never calling
102 // \c getNextToken() after it has encountered the first eof token.
103 assert(!eof());
104 PreviousToken = Token;
105 Token = PreviousTokenSource->getNextToken();
106 if (eof())
107 return &FakeEOF;
108 return Token;
109 }
110
getPosition()111 unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
112
setPosition(unsigned Position)113 FormatToken *setPosition(unsigned Position) override {
114 PreviousToken = nullptr;
115 Token = PreviousTokenSource->setPosition(Position);
116 return Token;
117 }
118
119 private:
eof()120 bool eof() {
121 return Token && Token->HasUnescapedNewline &&
122 !continuesLineComment(*Token, PreviousToken,
123 /*MinColumnToken=*/PreviousToken);
124 }
125
126 FormatToken FakeEOF;
127 UnwrappedLine &Line;
128 FormatTokenSource *&TokenSource;
129 FormatToken *&ResetToken;
130 unsigned PreviousLineLevel;
131 FormatTokenSource *PreviousTokenSource;
132
133 FormatToken *Token;
134 FormatToken *PreviousToken;
135 };
136
137 } // end anonymous namespace
138
139 class ScopedLineState {
140 public:
ScopedLineState(UnwrappedLineParser & Parser,bool SwitchToPreprocessorLines=false)141 ScopedLineState(UnwrappedLineParser &Parser,
142 bool SwitchToPreprocessorLines = false)
143 : Parser(Parser), OriginalLines(Parser.CurrentLines) {
144 if (SwitchToPreprocessorLines)
145 Parser.CurrentLines = &Parser.PreprocessorDirectives;
146 else if (!Parser.Line->Tokens.empty())
147 Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
148 PreBlockLine = std::move(Parser.Line);
149 Parser.Line = std::make_unique<UnwrappedLine>();
150 Parser.Line->Level = PreBlockLine->Level;
151 Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
152 }
153
~ScopedLineState()154 ~ScopedLineState() {
155 if (!Parser.Line->Tokens.empty()) {
156 Parser.addUnwrappedLine();
157 }
158 assert(Parser.Line->Tokens.empty());
159 Parser.Line = std::move(PreBlockLine);
160 if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
161 Parser.MustBreakBeforeNextToken = true;
162 Parser.CurrentLines = OriginalLines;
163 }
164
165 private:
166 UnwrappedLineParser &Parser;
167
168 std::unique_ptr<UnwrappedLine> PreBlockLine;
169 SmallVectorImpl<UnwrappedLine> *OriginalLines;
170 };
171
172 class CompoundStatementIndenter {
173 public:
CompoundStatementIndenter(UnwrappedLineParser * Parser,const FormatStyle & Style,unsigned & LineLevel)174 CompoundStatementIndenter(UnwrappedLineParser *Parser,
175 const FormatStyle &Style, unsigned &LineLevel)
176 : CompoundStatementIndenter(Parser, LineLevel,
177 Style.BraceWrapping.AfterControlStatement,
178 Style.BraceWrapping.IndentBraces) {}
CompoundStatementIndenter(UnwrappedLineParser * Parser,unsigned & LineLevel,bool WrapBrace,bool IndentBrace)179 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
180 bool WrapBrace, bool IndentBrace)
181 : LineLevel(LineLevel), OldLineLevel(LineLevel) {
182 if (WrapBrace)
183 Parser->addUnwrappedLine();
184 if (IndentBrace)
185 ++LineLevel;
186 }
~CompoundStatementIndenter()187 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
188
189 private:
190 unsigned &LineLevel;
191 unsigned OldLineLevel;
192 };
193
194 namespace {
195
196 class IndexedTokenSource : public FormatTokenSource {
197 public:
IndexedTokenSource(ArrayRef<FormatToken * > Tokens)198 IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
199 : Tokens(Tokens), Position(-1) {}
200
getNextToken()201 FormatToken *getNextToken() override {
202 ++Position;
203 return Tokens[Position];
204 }
205
getPosition()206 unsigned getPosition() override {
207 assert(Position >= 0);
208 return Position;
209 }
210
setPosition(unsigned P)211 FormatToken *setPosition(unsigned P) override {
212 Position = P;
213 return Tokens[Position];
214 }
215
reset()216 void reset() { Position = -1; }
217
218 private:
219 ArrayRef<FormatToken *> Tokens;
220 int Position;
221 };
222
223 } // end anonymous namespace
224
UnwrappedLineParser(const FormatStyle & Style,const AdditionalKeywords & Keywords,unsigned FirstStartColumn,ArrayRef<FormatToken * > Tokens,UnwrappedLineConsumer & Callback)225 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
226 const AdditionalKeywords &Keywords,
227 unsigned FirstStartColumn,
228 ArrayRef<FormatToken *> Tokens,
229 UnwrappedLineConsumer &Callback)
230 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
231 CurrentLines(&Lines), Style(Style), Keywords(Keywords),
232 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
233 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
234 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
235 ? IG_Rejected
236 : IG_Inited),
237 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
238
reset()239 void UnwrappedLineParser::reset() {
240 PPBranchLevel = -1;
241 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
242 ? IG_Rejected
243 : IG_Inited;
244 IncludeGuardToken = nullptr;
245 Line.reset(new UnwrappedLine);
246 CommentsBeforeNextToken.clear();
247 FormatTok = nullptr;
248 MustBreakBeforeNextToken = false;
249 PreprocessorDirectives.clear();
250 CurrentLines = &Lines;
251 DeclarationScopeStack.clear();
252 PPStack.clear();
253 Line->FirstStartColumn = FirstStartColumn;
254 }
255
parse()256 void UnwrappedLineParser::parse() {
257 IndexedTokenSource TokenSource(AllTokens);
258 Line->FirstStartColumn = FirstStartColumn;
259 do {
260 LLVM_DEBUG(llvm::dbgs() << "----\n");
261 reset();
262 Tokens = &TokenSource;
263 TokenSource.reset();
264
265 readToken();
266 parseFile();
267
268 // If we found an include guard then all preprocessor directives (other than
269 // the guard) are over-indented by one.
270 if (IncludeGuard == IG_Found)
271 for (auto &Line : Lines)
272 if (Line.InPPDirective && Line.Level > 0)
273 --Line.Level;
274
275 // Create line with eof token.
276 pushToken(FormatTok);
277 addUnwrappedLine();
278
279 for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
280 E = Lines.end();
281 I != E; ++I) {
282 Callback.consumeUnwrappedLine(*I);
283 }
284 Callback.finishRun();
285 Lines.clear();
286 while (!PPLevelBranchIndex.empty() &&
287 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
288 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
289 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
290 }
291 if (!PPLevelBranchIndex.empty()) {
292 ++PPLevelBranchIndex.back();
293 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
294 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
295 }
296 } while (!PPLevelBranchIndex.empty());
297 }
298
parseFile()299 void UnwrappedLineParser::parseFile() {
300 // The top-level context in a file always has declarations, except for pre-
301 // processor directives and JavaScript files.
302 bool MustBeDeclaration =
303 !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
304 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
305 MustBeDeclaration);
306 if (Style.Language == FormatStyle::LK_TextProto)
307 parseBracedList();
308 else
309 parseLevel(/*HasOpeningBrace=*/false);
310 // Make sure to format the remaining tokens.
311 //
312 // LK_TextProto is special since its top-level is parsed as the body of a
313 // braced list, which does not necessarily have natural line separators such
314 // as a semicolon. Comments after the last entry that have been determined to
315 // not belong to that line, as in:
316 // key: value
317 // // endfile comment
318 // do not have a chance to be put on a line of their own until this point.
319 // Here we add this newline before end-of-file comments.
320 if (Style.Language == FormatStyle::LK_TextProto &&
321 !CommentsBeforeNextToken.empty())
322 addUnwrappedLine();
323 flushComments(true);
324 addUnwrappedLine();
325 }
326
parseCSharpGenericTypeConstraint()327 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
328 do {
329 switch (FormatTok->Tok.getKind()) {
330 case tok::l_brace:
331 return;
332 default:
333 if (FormatTok->is(Keywords.kw_where)) {
334 addUnwrappedLine();
335 nextToken();
336 parseCSharpGenericTypeConstraint();
337 break;
338 }
339 nextToken();
340 break;
341 }
342 } while (!eof());
343 }
344
parseCSharpAttribute()345 void UnwrappedLineParser::parseCSharpAttribute() {
346 int UnpairedSquareBrackets = 1;
347 do {
348 switch (FormatTok->Tok.getKind()) {
349 case tok::r_square:
350 nextToken();
351 --UnpairedSquareBrackets;
352 if (UnpairedSquareBrackets == 0) {
353 addUnwrappedLine();
354 return;
355 }
356 break;
357 case tok::l_square:
358 ++UnpairedSquareBrackets;
359 nextToken();
360 break;
361 default:
362 nextToken();
363 break;
364 }
365 } while (!eof());
366 }
367
parseLevel(bool HasOpeningBrace)368 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
369 bool SwitchLabelEncountered = false;
370 do {
371 tok::TokenKind kind = FormatTok->Tok.getKind();
372 if (FormatTok->getType() == TT_MacroBlockBegin) {
373 kind = tok::l_brace;
374 } else if (FormatTok->getType() == TT_MacroBlockEnd) {
375 kind = tok::r_brace;
376 }
377
378 switch (kind) {
379 case tok::comment:
380 nextToken();
381 addUnwrappedLine();
382 break;
383 case tok::l_brace:
384 // FIXME: Add parameter whether this can happen - if this happens, we must
385 // be in a non-declaration context.
386 if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
387 continue;
388 parseBlock(/*MustBeDeclaration=*/false);
389 addUnwrappedLine();
390 break;
391 case tok::r_brace:
392 if (HasOpeningBrace)
393 return;
394 nextToken();
395 addUnwrappedLine();
396 break;
397 case tok::kw_default: {
398 unsigned StoredPosition = Tokens->getPosition();
399 FormatToken *Next;
400 do {
401 Next = Tokens->getNextToken();
402 } while (Next && Next->is(tok::comment));
403 FormatTok = Tokens->setPosition(StoredPosition);
404 if (Next && Next->isNot(tok::colon)) {
405 // default not followed by ':' is not a case label; treat it like
406 // an identifier.
407 parseStructuralElement();
408 break;
409 }
410 // Else, if it is 'default:', fall through to the case handling.
411 LLVM_FALLTHROUGH;
412 }
413 case tok::kw_case:
414 if (Style.Language == FormatStyle::LK_JavaScript &&
415 Line->MustBeDeclaration) {
416 // A 'case: string' style field declaration.
417 parseStructuralElement();
418 break;
419 }
420 if (!SwitchLabelEncountered &&
421 (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
422 ++Line->Level;
423 SwitchLabelEncountered = true;
424 parseStructuralElement();
425 break;
426 case tok::l_square:
427 if (Style.isCSharp()) {
428 nextToken();
429 parseCSharpAttribute();
430 break;
431 }
432 LLVM_FALLTHROUGH;
433 default:
434 parseStructuralElement();
435 break;
436 }
437 } while (!eof());
438 }
439
calculateBraceTypes(bool ExpectClassBody)440 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
441 // We'll parse forward through the tokens until we hit
442 // a closing brace or eof - note that getNextToken() will
443 // parse macros, so this will magically work inside macro
444 // definitions, too.
445 unsigned StoredPosition = Tokens->getPosition();
446 FormatToken *Tok = FormatTok;
447 const FormatToken *PrevTok = Tok->Previous;
448 // Keep a stack of positions of lbrace tokens. We will
449 // update information about whether an lbrace starts a
450 // braced init list or a different block during the loop.
451 SmallVector<FormatToken *, 8> LBraceStack;
452 assert(Tok->Tok.is(tok::l_brace));
453 do {
454 // Get next non-comment token.
455 FormatToken *NextTok;
456 unsigned ReadTokens = 0;
457 do {
458 NextTok = Tokens->getNextToken();
459 ++ReadTokens;
460 } while (NextTok->is(tok::comment));
461
462 switch (Tok->Tok.getKind()) {
463 case tok::l_brace:
464 if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
465 if (PrevTok->isOneOf(tok::colon, tok::less))
466 // A ':' indicates this code is in a type, or a braced list
467 // following a label in an object literal ({a: {b: 1}}).
468 // A '<' could be an object used in a comparison, but that is nonsense
469 // code (can never return true), so more likely it is a generic type
470 // argument (`X<{a: string; b: number}>`).
471 // The code below could be confused by semicolons between the
472 // individual members in a type member list, which would normally
473 // trigger BK_Block. In both cases, this must be parsed as an inline
474 // braced init.
475 Tok->setBlockKind(BK_BracedInit);
476 else if (PrevTok->is(tok::r_paren))
477 // `) { }` can only occur in function or method declarations in JS.
478 Tok->setBlockKind(BK_Block);
479 } else {
480 Tok->setBlockKind(BK_Unknown);
481 }
482 LBraceStack.push_back(Tok);
483 break;
484 case tok::r_brace:
485 if (LBraceStack.empty())
486 break;
487 if (LBraceStack.back()->is(BK_Unknown)) {
488 bool ProbablyBracedList = false;
489 if (Style.Language == FormatStyle::LK_Proto) {
490 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
491 } else {
492 // Using OriginalColumn to distinguish between ObjC methods and
493 // binary operators is a bit hacky.
494 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
495 NextTok->OriginalColumn == 0;
496
497 // If there is a comma, semicolon or right paren after the closing
498 // brace, we assume this is a braced initializer list. Note that
499 // regardless how we mark inner braces here, we will overwrite the
500 // BlockKind later if we parse a braced list (where all blocks
501 // inside are by default braced lists), or when we explicitly detect
502 // blocks (for example while parsing lambdas).
503 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
504 // braced list in JS.
505 ProbablyBracedList =
506 (Style.Language == FormatStyle::LK_JavaScript &&
507 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
508 Keywords.kw_as)) ||
509 (Style.isCpp() && NextTok->is(tok::l_paren)) ||
510 NextTok->isOneOf(tok::comma, tok::period, tok::colon,
511 tok::r_paren, tok::r_square, tok::l_brace,
512 tok::ellipsis) ||
513 (NextTok->is(tok::identifier) &&
514 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
515 (NextTok->is(tok::semi) &&
516 (!ExpectClassBody || LBraceStack.size() != 1)) ||
517 (NextTok->isBinaryOperator() && !NextIsObjCMethod);
518 if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
519 // We can have an array subscript after a braced init
520 // list, but C++11 attributes are expected after blocks.
521 NextTok = Tokens->getNextToken();
522 ++ReadTokens;
523 ProbablyBracedList = NextTok->isNot(tok::l_square);
524 }
525 }
526 if (ProbablyBracedList) {
527 Tok->setBlockKind(BK_BracedInit);
528 LBraceStack.back()->setBlockKind(BK_BracedInit);
529 } else {
530 Tok->setBlockKind(BK_Block);
531 LBraceStack.back()->setBlockKind(BK_Block);
532 }
533 }
534 LBraceStack.pop_back();
535 break;
536 case tok::identifier:
537 if (!Tok->is(TT_StatementMacro))
538 break;
539 LLVM_FALLTHROUGH;
540 case tok::at:
541 case tok::semi:
542 case tok::kw_if:
543 case tok::kw_while:
544 case tok::kw_for:
545 case tok::kw_switch:
546 case tok::kw_try:
547 case tok::kw___try:
548 if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown))
549 LBraceStack.back()->setBlockKind(BK_Block);
550 break;
551 default:
552 break;
553 }
554 PrevTok = Tok;
555 Tok = NextTok;
556 } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
557
558 // Assume other blocks for all unclosed opening braces.
559 for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
560 if (LBraceStack[i]->is(BK_Unknown))
561 LBraceStack[i]->setBlockKind(BK_Block);
562 }
563
564 FormatTok = Tokens->setPosition(StoredPosition);
565 }
566
567 template <class T>
hash_combine(std::size_t & seed,const T & v)568 static inline void hash_combine(std::size_t &seed, const T &v) {
569 std::hash<T> hasher;
570 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
571 }
572
computePPHash() const573 size_t UnwrappedLineParser::computePPHash() const {
574 size_t h = 0;
575 for (const auto &i : PPStack) {
576 hash_combine(h, size_t(i.Kind));
577 hash_combine(h, i.Line);
578 }
579 return h;
580 }
581
parseBlock(bool MustBeDeclaration,bool AddLevel,bool MunchSemi)582 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
583 bool MunchSemi) {
584 assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
585 "'{' or macro block token expected");
586 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
587 FormatTok->setBlockKind(BK_Block);
588
589 size_t PPStartHash = computePPHash();
590
591 unsigned InitialLevel = Line->Level;
592 nextToken(/*LevelDifference=*/AddLevel ? 1 : 0);
593
594 if (MacroBlock && FormatTok->is(tok::l_paren))
595 parseParens();
596
597 size_t NbPreprocessorDirectives =
598 CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
599 addUnwrappedLine();
600 size_t OpeningLineIndex =
601 CurrentLines->empty()
602 ? (UnwrappedLine::kInvalidIndex)
603 : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
604
605 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
606 MustBeDeclaration);
607 if (AddLevel)
608 ++Line->Level;
609 parseLevel(/*HasOpeningBrace=*/true);
610
611 if (eof())
612 return;
613
614 if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
615 : !FormatTok->is(tok::r_brace)) {
616 Line->Level = InitialLevel;
617 FormatTok->setBlockKind(BK_Block);
618 return;
619 }
620
621 size_t PPEndHash = computePPHash();
622
623 // Munch the closing brace.
624 nextToken(/*LevelDifference=*/AddLevel ? -1 : 0);
625
626 if (MacroBlock && FormatTok->is(tok::l_paren))
627 parseParens();
628
629 if (FormatTok->is(tok::arrow)) {
630 // Following the } we can find a trailing return type arrow
631 // as part of an implicit conversion constraint.
632 nextToken();
633 parseStructuralElement();
634 }
635
636 if (MunchSemi && FormatTok->Tok.is(tok::semi))
637 nextToken();
638
639 Line->Level = InitialLevel;
640
641 if (PPStartHash == PPEndHash) {
642 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
643 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
644 // Update the opening line to add the forward reference as well
645 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
646 CurrentLines->size() - 1;
647 }
648 }
649 }
650
isGoogScope(const UnwrappedLine & Line)651 static bool isGoogScope(const UnwrappedLine &Line) {
652 // FIXME: Closure-library specific stuff should not be hard-coded but be
653 // configurable.
654 if (Line.Tokens.size() < 4)
655 return false;
656 auto I = Line.Tokens.begin();
657 if (I->Tok->TokenText != "goog")
658 return false;
659 ++I;
660 if (I->Tok->isNot(tok::period))
661 return false;
662 ++I;
663 if (I->Tok->TokenText != "scope")
664 return false;
665 ++I;
666 return I->Tok->is(tok::l_paren);
667 }
668
isIIFE(const UnwrappedLine & Line,const AdditionalKeywords & Keywords)669 static bool isIIFE(const UnwrappedLine &Line,
670 const AdditionalKeywords &Keywords) {
671 // Look for the start of an immediately invoked anonymous function.
672 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
673 // This is commonly done in JavaScript to create a new, anonymous scope.
674 // Example: (function() { ... })()
675 if (Line.Tokens.size() < 3)
676 return false;
677 auto I = Line.Tokens.begin();
678 if (I->Tok->isNot(tok::l_paren))
679 return false;
680 ++I;
681 if (I->Tok->isNot(Keywords.kw_function))
682 return false;
683 ++I;
684 return I->Tok->is(tok::l_paren);
685 }
686
ShouldBreakBeforeBrace(const FormatStyle & Style,const FormatToken & InitialToken)687 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
688 const FormatToken &InitialToken) {
689 if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro))
690 return Style.BraceWrapping.AfterNamespace;
691 if (InitialToken.is(tok::kw_class))
692 return Style.BraceWrapping.AfterClass;
693 if (InitialToken.is(tok::kw_union))
694 return Style.BraceWrapping.AfterUnion;
695 if (InitialToken.is(tok::kw_struct))
696 return Style.BraceWrapping.AfterStruct;
697 return false;
698 }
699
parseChildBlock()700 void UnwrappedLineParser::parseChildBlock() {
701 FormatTok->setBlockKind(BK_Block);
702 nextToken();
703 {
704 bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript &&
705 (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
706 ScopedLineState LineState(*this);
707 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
708 /*MustBeDeclaration=*/false);
709 Line->Level += SkipIndent ? 0 : 1;
710 parseLevel(/*HasOpeningBrace=*/true);
711 flushComments(isOnNewLine(*FormatTok));
712 Line->Level -= SkipIndent ? 0 : 1;
713 }
714 nextToken();
715 }
716
parsePPDirective()717 void UnwrappedLineParser::parsePPDirective() {
718 assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
719 ScopedMacroState MacroState(*Line, Tokens, FormatTok);
720
721 nextToken();
722
723 if (!FormatTok->Tok.getIdentifierInfo()) {
724 parsePPUnknown();
725 return;
726 }
727
728 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
729 case tok::pp_define:
730 parsePPDefine();
731 return;
732 case tok::pp_if:
733 parsePPIf(/*IfDef=*/false);
734 break;
735 case tok::pp_ifdef:
736 case tok::pp_ifndef:
737 parsePPIf(/*IfDef=*/true);
738 break;
739 case tok::pp_else:
740 parsePPElse();
741 break;
742 case tok::pp_elif:
743 parsePPElIf();
744 break;
745 case tok::pp_endif:
746 parsePPEndIf();
747 break;
748 default:
749 parsePPUnknown();
750 break;
751 }
752 }
753
conditionalCompilationCondition(bool Unreachable)754 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
755 size_t Line = CurrentLines->size();
756 if (CurrentLines == &PreprocessorDirectives)
757 Line += Lines.size();
758
759 if (Unreachable ||
760 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
761 PPStack.push_back({PP_Unreachable, Line});
762 else
763 PPStack.push_back({PP_Conditional, Line});
764 }
765
conditionalCompilationStart(bool Unreachable)766 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
767 ++PPBranchLevel;
768 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
769 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
770 PPLevelBranchIndex.push_back(0);
771 PPLevelBranchCount.push_back(0);
772 }
773 PPChainBranchIndex.push(0);
774 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
775 conditionalCompilationCondition(Unreachable || Skip);
776 }
777
conditionalCompilationAlternative()778 void UnwrappedLineParser::conditionalCompilationAlternative() {
779 if (!PPStack.empty())
780 PPStack.pop_back();
781 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
782 if (!PPChainBranchIndex.empty())
783 ++PPChainBranchIndex.top();
784 conditionalCompilationCondition(
785 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
786 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
787 }
788
conditionalCompilationEnd()789 void UnwrappedLineParser::conditionalCompilationEnd() {
790 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
791 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
792 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
793 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
794 }
795 }
796 // Guard against #endif's without #if.
797 if (PPBranchLevel > -1)
798 --PPBranchLevel;
799 if (!PPChainBranchIndex.empty())
800 PPChainBranchIndex.pop();
801 if (!PPStack.empty())
802 PPStack.pop_back();
803 }
804
parsePPIf(bool IfDef)805 void UnwrappedLineParser::parsePPIf(bool IfDef) {
806 bool IfNDef = FormatTok->is(tok::pp_ifndef);
807 nextToken();
808 bool Unreachable = false;
809 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
810 Unreachable = true;
811 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
812 Unreachable = true;
813 conditionalCompilationStart(Unreachable);
814 FormatToken *IfCondition = FormatTok;
815 // If there's a #ifndef on the first line, and the only lines before it are
816 // comments, it could be an include guard.
817 bool MaybeIncludeGuard = IfNDef;
818 if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
819 for (auto &Line : Lines) {
820 if (!Line.Tokens.front().Tok->is(tok::comment)) {
821 MaybeIncludeGuard = false;
822 IncludeGuard = IG_Rejected;
823 break;
824 }
825 }
826 --PPBranchLevel;
827 parsePPUnknown();
828 ++PPBranchLevel;
829 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
830 IncludeGuard = IG_IfNdefed;
831 IncludeGuardToken = IfCondition;
832 }
833 }
834
parsePPElse()835 void UnwrappedLineParser::parsePPElse() {
836 // If a potential include guard has an #else, it's not an include guard.
837 if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
838 IncludeGuard = IG_Rejected;
839 conditionalCompilationAlternative();
840 if (PPBranchLevel > -1)
841 --PPBranchLevel;
842 parsePPUnknown();
843 ++PPBranchLevel;
844 }
845
parsePPElIf()846 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
847
parsePPEndIf()848 void UnwrappedLineParser::parsePPEndIf() {
849 conditionalCompilationEnd();
850 parsePPUnknown();
851 // If the #endif of a potential include guard is the last thing in the file,
852 // then we found an include guard.
853 unsigned TokenPosition = Tokens->getPosition();
854 FormatToken *PeekNext = AllTokens[TokenPosition];
855 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 &&
856 PeekNext->is(tok::eof) &&
857 Style.IndentPPDirectives != FormatStyle::PPDIS_None)
858 IncludeGuard = IG_Found;
859 }
860
parsePPDefine()861 void UnwrappedLineParser::parsePPDefine() {
862 nextToken();
863
864 if (!FormatTok->Tok.getIdentifierInfo()) {
865 IncludeGuard = IG_Rejected;
866 IncludeGuardToken = nullptr;
867 parsePPUnknown();
868 return;
869 }
870
871 if (IncludeGuard == IG_IfNdefed &&
872 IncludeGuardToken->TokenText == FormatTok->TokenText) {
873 IncludeGuard = IG_Defined;
874 IncludeGuardToken = nullptr;
875 for (auto &Line : Lines) {
876 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
877 IncludeGuard = IG_Rejected;
878 break;
879 }
880 }
881 }
882
883 nextToken();
884 if (FormatTok->Tok.getKind() == tok::l_paren &&
885 FormatTok->WhitespaceRange.getBegin() ==
886 FormatTok->WhitespaceRange.getEnd()) {
887 parseParens();
888 }
889 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
890 Line->Level += PPBranchLevel + 1;
891 addUnwrappedLine();
892 ++Line->Level;
893
894 // Errors during a preprocessor directive can only affect the layout of the
895 // preprocessor directive, and thus we ignore them. An alternative approach
896 // would be to use the same approach we use on the file level (no
897 // re-indentation if there was a structural error) within the macro
898 // definition.
899 parseFile();
900 }
901
parsePPUnknown()902 void UnwrappedLineParser::parsePPUnknown() {
903 do {
904 nextToken();
905 } while (!eof());
906 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
907 Line->Level += PPBranchLevel + 1;
908 addUnwrappedLine();
909 }
910
911 // Here we exclude certain tokens that are not usually the first token in an
912 // unwrapped line. This is used in attempt to distinguish macro calls without
913 // trailing semicolons from other constructs split to several lines.
tokenCanStartNewLine(const FormatToken & Tok)914 static bool tokenCanStartNewLine(const FormatToken &Tok) {
915 // Semicolon can be a null-statement, l_square can be a start of a macro or
916 // a C++11 attribute, but this doesn't seem to be common.
917 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
918 Tok.isNot(TT_AttributeSquare) &&
919 // Tokens that can only be used as binary operators and a part of
920 // overloaded operator names.
921 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
922 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
923 Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
924 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
925 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
926 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
927 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
928 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
929 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
930 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
931 Tok.isNot(tok::lesslessequal) &&
932 // Colon is used in labels, base class lists, initializer lists,
933 // range-based for loops, ternary operator, but should never be the
934 // first token in an unwrapped line.
935 Tok.isNot(tok::colon) &&
936 // 'noexcept' is a trailing annotation.
937 Tok.isNot(tok::kw_noexcept);
938 }
939
mustBeJSIdent(const AdditionalKeywords & Keywords,const FormatToken * FormatTok)940 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
941 const FormatToken *FormatTok) {
942 // FIXME: This returns true for C/C++ keywords like 'struct'.
943 return FormatTok->is(tok::identifier) &&
944 (FormatTok->Tok.getIdentifierInfo() == nullptr ||
945 !FormatTok->isOneOf(
946 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
947 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
948 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
949 Keywords.kw_let, Keywords.kw_var, tok::kw_const,
950 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
951 Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws,
952 Keywords.kw_from));
953 }
954
mustBeJSIdentOrValue(const AdditionalKeywords & Keywords,const FormatToken * FormatTok)955 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
956 const FormatToken *FormatTok) {
957 return FormatTok->Tok.isLiteral() ||
958 FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
959 mustBeJSIdent(Keywords, FormatTok);
960 }
961
962 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
963 // when encountered after a value (see mustBeJSIdentOrValue).
isJSDeclOrStmt(const AdditionalKeywords & Keywords,const FormatToken * FormatTok)964 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
965 const FormatToken *FormatTok) {
966 return FormatTok->isOneOf(
967 tok::kw_return, Keywords.kw_yield,
968 // conditionals
969 tok::kw_if, tok::kw_else,
970 // loops
971 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
972 // switch/case
973 tok::kw_switch, tok::kw_case,
974 // exceptions
975 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
976 // declaration
977 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
978 Keywords.kw_async, Keywords.kw_function,
979 // import/export
980 Keywords.kw_import, tok::kw_export);
981 }
982
983 // readTokenWithJavaScriptASI reads the next token and terminates the current
984 // line if JavaScript Automatic Semicolon Insertion must
985 // happen between the current token and the next token.
986 //
987 // This method is conservative - it cannot cover all edge cases of JavaScript,
988 // but only aims to correctly handle certain well known cases. It *must not*
989 // return true in speculative cases.
readTokenWithJavaScriptASI()990 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
991 FormatToken *Previous = FormatTok;
992 readToken();
993 FormatToken *Next = FormatTok;
994
995 bool IsOnSameLine =
996 CommentsBeforeNextToken.empty()
997 ? Next->NewlinesBefore == 0
998 : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
999 if (IsOnSameLine)
1000 return;
1001
1002 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1003 bool PreviousStartsTemplateExpr =
1004 Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
1005 if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1006 // If the line contains an '@' sign, the previous token might be an
1007 // annotation, which can precede another identifier/value.
1008 bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(),
1009 [](UnwrappedLineNode &LineNode) {
1010 return LineNode.Tok->is(tok::at);
1011 }) != Line->Tokens.end();
1012 if (HasAt)
1013 return;
1014 }
1015 if (Next->is(tok::exclaim) && PreviousMustBeValue)
1016 return addUnwrappedLine();
1017 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1018 bool NextEndsTemplateExpr =
1019 Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1020 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1021 (PreviousMustBeValue ||
1022 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1023 tok::minusminus)))
1024 return addUnwrappedLine();
1025 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1026 isJSDeclOrStmt(Keywords, Next))
1027 return addUnwrappedLine();
1028 }
1029
parseStructuralElement()1030 void UnwrappedLineParser::parseStructuralElement() {
1031 assert(!FormatTok->is(tok::l_brace));
1032 if (Style.Language == FormatStyle::LK_TableGen &&
1033 FormatTok->is(tok::pp_include)) {
1034 nextToken();
1035 if (FormatTok->is(tok::string_literal))
1036 nextToken();
1037 addUnwrappedLine();
1038 return;
1039 }
1040 switch (FormatTok->Tok.getKind()) {
1041 case tok::kw_asm:
1042 nextToken();
1043 if (FormatTok->is(tok::l_brace)) {
1044 FormatTok->setType(TT_InlineASMBrace);
1045 nextToken();
1046 while (FormatTok && FormatTok->isNot(tok::eof)) {
1047 if (FormatTok->is(tok::r_brace)) {
1048 FormatTok->setType(TT_InlineASMBrace);
1049 nextToken();
1050 addUnwrappedLine();
1051 break;
1052 }
1053 FormatTok->Finalized = true;
1054 nextToken();
1055 }
1056 }
1057 break;
1058 case tok::kw_namespace:
1059 parseNamespace();
1060 return;
1061 case tok::kw_public:
1062 case tok::kw_protected:
1063 case tok::kw_private:
1064 if (Style.Language == FormatStyle::LK_Java ||
1065 Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp())
1066 nextToken();
1067 else
1068 parseAccessSpecifier();
1069 return;
1070 case tok::kw_if:
1071 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1072 // field/method declaration.
1073 break;
1074 parseIfThenElse();
1075 return;
1076 case tok::kw_for:
1077 case tok::kw_while:
1078 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1079 // field/method declaration.
1080 break;
1081 parseForOrWhileLoop();
1082 return;
1083 case tok::kw_do:
1084 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1085 // field/method declaration.
1086 break;
1087 parseDoWhile();
1088 return;
1089 case tok::kw_switch:
1090 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1091 // 'switch: string' field declaration.
1092 break;
1093 parseSwitch();
1094 return;
1095 case tok::kw_default:
1096 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1097 // 'default: string' field declaration.
1098 break;
1099 nextToken();
1100 if (FormatTok->is(tok::colon)) {
1101 parseLabel();
1102 return;
1103 }
1104 // e.g. "default void f() {}" in a Java interface.
1105 break;
1106 case tok::kw_case:
1107 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1108 // 'case: string' field declaration.
1109 break;
1110 parseCaseLabel();
1111 return;
1112 case tok::kw_try:
1113 case tok::kw___try:
1114 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1115 // field/method declaration.
1116 break;
1117 parseTryCatch();
1118 return;
1119 case tok::kw_extern:
1120 nextToken();
1121 if (FormatTok->Tok.is(tok::string_literal)) {
1122 nextToken();
1123 if (FormatTok->Tok.is(tok::l_brace)) {
1124 if (!Style.IndentExternBlock) {
1125 if (Style.BraceWrapping.AfterExternBlock) {
1126 addUnwrappedLine();
1127 }
1128 parseBlock(/*MustBeDeclaration=*/true,
1129 /*AddLevel=*/Style.BraceWrapping.AfterExternBlock);
1130 } else {
1131 parseBlock(/*MustBeDeclaration=*/true,
1132 /*AddLevel=*/Style.IndentExternBlock ==
1133 FormatStyle::IEBS_Indent);
1134 }
1135 addUnwrappedLine();
1136 return;
1137 }
1138 }
1139 break;
1140 case tok::kw_export:
1141 if (Style.Language == FormatStyle::LK_JavaScript) {
1142 parseJavaScriptEs6ImportExport();
1143 return;
1144 }
1145 if (!Style.isCpp())
1146 break;
1147 // Handle C++ "(inline|export) namespace".
1148 LLVM_FALLTHROUGH;
1149 case tok::kw_inline:
1150 nextToken();
1151 if (FormatTok->Tok.is(tok::kw_namespace)) {
1152 parseNamespace();
1153 return;
1154 }
1155 break;
1156 case tok::identifier:
1157 if (FormatTok->is(TT_ForEachMacro)) {
1158 parseForOrWhileLoop();
1159 return;
1160 }
1161 if (FormatTok->is(TT_MacroBlockBegin)) {
1162 parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
1163 /*MunchSemi=*/false);
1164 return;
1165 }
1166 if (FormatTok->is(Keywords.kw_import)) {
1167 if (Style.Language == FormatStyle::LK_JavaScript) {
1168 parseJavaScriptEs6ImportExport();
1169 return;
1170 }
1171 if (Style.Language == FormatStyle::LK_Proto) {
1172 nextToken();
1173 if (FormatTok->is(tok::kw_public))
1174 nextToken();
1175 if (!FormatTok->is(tok::string_literal))
1176 return;
1177 nextToken();
1178 if (FormatTok->is(tok::semi))
1179 nextToken();
1180 addUnwrappedLine();
1181 return;
1182 }
1183 }
1184 if (Style.isCpp() &&
1185 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1186 Keywords.kw_slots, Keywords.kw_qslots)) {
1187 nextToken();
1188 if (FormatTok->is(tok::colon)) {
1189 nextToken();
1190 addUnwrappedLine();
1191 return;
1192 }
1193 }
1194 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1195 parseStatementMacro();
1196 return;
1197 }
1198 if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1199 parseNamespace();
1200 return;
1201 }
1202 // In all other cases, parse the declaration.
1203 break;
1204 default:
1205 break;
1206 }
1207 do {
1208 const FormatToken *Previous = FormatTok->Previous;
1209 switch (FormatTok->Tok.getKind()) {
1210 case tok::at:
1211 nextToken();
1212 if (FormatTok->Tok.is(tok::l_brace)) {
1213 nextToken();
1214 parseBracedList();
1215 break;
1216 } else if (Style.Language == FormatStyle::LK_Java &&
1217 FormatTok->is(Keywords.kw_interface)) {
1218 nextToken();
1219 break;
1220 }
1221 switch (FormatTok->Tok.getObjCKeywordID()) {
1222 case tok::objc_public:
1223 case tok::objc_protected:
1224 case tok::objc_package:
1225 case tok::objc_private:
1226 return parseAccessSpecifier();
1227 case tok::objc_interface:
1228 case tok::objc_implementation:
1229 return parseObjCInterfaceOrImplementation();
1230 case tok::objc_protocol:
1231 if (parseObjCProtocol())
1232 return;
1233 break;
1234 case tok::objc_end:
1235 return; // Handled by the caller.
1236 case tok::objc_optional:
1237 case tok::objc_required:
1238 nextToken();
1239 addUnwrappedLine();
1240 return;
1241 case tok::objc_autoreleasepool:
1242 nextToken();
1243 if (FormatTok->Tok.is(tok::l_brace)) {
1244 if (Style.BraceWrapping.AfterControlStatement ==
1245 FormatStyle::BWACS_Always)
1246 addUnwrappedLine();
1247 parseBlock(/*MustBeDeclaration=*/false);
1248 }
1249 addUnwrappedLine();
1250 return;
1251 case tok::objc_synchronized:
1252 nextToken();
1253 if (FormatTok->Tok.is(tok::l_paren))
1254 // Skip synchronization object
1255 parseParens();
1256 if (FormatTok->Tok.is(tok::l_brace)) {
1257 if (Style.BraceWrapping.AfterControlStatement ==
1258 FormatStyle::BWACS_Always)
1259 addUnwrappedLine();
1260 parseBlock(/*MustBeDeclaration=*/false);
1261 }
1262 addUnwrappedLine();
1263 return;
1264 case tok::objc_try:
1265 // This branch isn't strictly necessary (the kw_try case below would
1266 // do this too after the tok::at is parsed above). But be explicit.
1267 parseTryCatch();
1268 return;
1269 default:
1270 break;
1271 }
1272 break;
1273 case tok::kw_concept:
1274 parseConcept();
1275 break;
1276 case tok::kw_requires:
1277 parseRequires();
1278 break;
1279 case tok::kw_enum:
1280 // Ignore if this is part of "template <enum ...".
1281 if (Previous && Previous->is(tok::less)) {
1282 nextToken();
1283 break;
1284 }
1285
1286 // parseEnum falls through and does not yet add an unwrapped line as an
1287 // enum definition can start a structural element.
1288 if (!parseEnum())
1289 break;
1290 // This only applies for C++.
1291 if (!Style.isCpp()) {
1292 addUnwrappedLine();
1293 return;
1294 }
1295 break;
1296 case tok::kw_typedef:
1297 nextToken();
1298 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1299 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1300 Keywords.kw_CF_CLOSED_ENUM,
1301 Keywords.kw_NS_CLOSED_ENUM))
1302 parseEnum();
1303 break;
1304 case tok::kw_struct:
1305 case tok::kw_union:
1306 case tok::kw_class:
1307 // parseRecord falls through and does not yet add an unwrapped line as a
1308 // record declaration or definition can start a structural element.
1309 parseRecord();
1310 // This does not apply for Java, JavaScript and C#.
1311 if (Style.Language == FormatStyle::LK_Java ||
1312 Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) {
1313 if (FormatTok->is(tok::semi))
1314 nextToken();
1315 addUnwrappedLine();
1316 return;
1317 }
1318 break;
1319 case tok::period:
1320 nextToken();
1321 // In Java, classes have an implicit static member "class".
1322 if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1323 FormatTok->is(tok::kw_class))
1324 nextToken();
1325 if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1326 FormatTok->Tok.getIdentifierInfo())
1327 // JavaScript only has pseudo keywords, all keywords are allowed to
1328 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1329 nextToken();
1330 break;
1331 case tok::semi:
1332 nextToken();
1333 addUnwrappedLine();
1334 return;
1335 case tok::r_brace:
1336 addUnwrappedLine();
1337 return;
1338 case tok::l_paren:
1339 parseParens();
1340 break;
1341 case tok::kw_operator:
1342 nextToken();
1343 if (FormatTok->isBinaryOperator())
1344 nextToken();
1345 break;
1346 case tok::caret:
1347 nextToken();
1348 if (FormatTok->Tok.isAnyIdentifier() ||
1349 FormatTok->isSimpleTypeSpecifier())
1350 nextToken();
1351 if (FormatTok->is(tok::l_paren))
1352 parseParens();
1353 if (FormatTok->is(tok::l_brace))
1354 parseChildBlock();
1355 break;
1356 case tok::l_brace:
1357 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1358 // A block outside of parentheses must be the last part of a
1359 // structural element.
1360 // FIXME: Figure out cases where this is not true, and add projections
1361 // for them (the one we know is missing are lambdas).
1362 if (Style.BraceWrapping.AfterFunction)
1363 addUnwrappedLine();
1364 FormatTok->setType(TT_FunctionLBrace);
1365 parseBlock(/*MustBeDeclaration=*/false);
1366 addUnwrappedLine();
1367 return;
1368 }
1369 // Otherwise this was a braced init list, and the structural
1370 // element continues.
1371 break;
1372 case tok::kw_try:
1373 if (Style.Language == FormatStyle::LK_JavaScript &&
1374 Line->MustBeDeclaration) {
1375 // field/method declaration.
1376 nextToken();
1377 break;
1378 }
1379 // We arrive here when parsing function-try blocks.
1380 if (Style.BraceWrapping.AfterFunction)
1381 addUnwrappedLine();
1382 parseTryCatch();
1383 return;
1384 case tok::identifier: {
1385 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1386 Line->MustBeDeclaration) {
1387 addUnwrappedLine();
1388 parseCSharpGenericTypeConstraint();
1389 break;
1390 }
1391 if (FormatTok->is(TT_MacroBlockEnd)) {
1392 addUnwrappedLine();
1393 return;
1394 }
1395
1396 // Function declarations (as opposed to function expressions) are parsed
1397 // on their own unwrapped line by continuing this loop. Function
1398 // expressions (functions that are not on their own line) must not create
1399 // a new unwrapped line, so they are special cased below.
1400 size_t TokenCount = Line->Tokens.size();
1401 if (Style.Language == FormatStyle::LK_JavaScript &&
1402 FormatTok->is(Keywords.kw_function) &&
1403 (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1404 Keywords.kw_async)))) {
1405 tryToParseJSFunction();
1406 break;
1407 }
1408 if ((Style.Language == FormatStyle::LK_JavaScript ||
1409 Style.Language == FormatStyle::LK_Java) &&
1410 FormatTok->is(Keywords.kw_interface)) {
1411 if (Style.Language == FormatStyle::LK_JavaScript) {
1412 // In JavaScript/TypeScript, "interface" can be used as a standalone
1413 // identifier, e.g. in `var interface = 1;`. If "interface" is
1414 // followed by another identifier, it is very like to be an actual
1415 // interface declaration.
1416 unsigned StoredPosition = Tokens->getPosition();
1417 FormatToken *Next = Tokens->getNextToken();
1418 FormatTok = Tokens->setPosition(StoredPosition);
1419 if (Next && !mustBeJSIdent(Keywords, Next)) {
1420 nextToken();
1421 break;
1422 }
1423 }
1424 parseRecord();
1425 addUnwrappedLine();
1426 return;
1427 }
1428
1429 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1430 parseStatementMacro();
1431 return;
1432 }
1433
1434 // See if the following token should start a new unwrapped line.
1435 StringRef Text = FormatTok->TokenText;
1436 nextToken();
1437
1438 // JS doesn't have macros, and within classes colons indicate fields, not
1439 // labels.
1440 if (Style.Language == FormatStyle::LK_JavaScript)
1441 break;
1442
1443 TokenCount = Line->Tokens.size();
1444 if (TokenCount == 1 ||
1445 (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1446 if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1447 Line->Tokens.begin()->Tok->MustBreakBefore = true;
1448 parseLabel(!Style.IndentGotoLabels);
1449 return;
1450 }
1451 // Recognize function-like macro usages without trailing semicolon as
1452 // well as free-standing macros like Q_OBJECT.
1453 bool FunctionLike = FormatTok->is(tok::l_paren);
1454 if (FunctionLike)
1455 parseParens();
1456
1457 bool FollowedByNewline =
1458 CommentsBeforeNextToken.empty()
1459 ? FormatTok->NewlinesBefore > 0
1460 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1461
1462 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1463 tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1464 addUnwrappedLine();
1465 return;
1466 }
1467 }
1468 break;
1469 }
1470 case tok::equal:
1471 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1472 // TT_JsFatArrow. The always start an expression or a child block if
1473 // followed by a curly.
1474 if (FormatTok->is(TT_JsFatArrow)) {
1475 nextToken();
1476 if (FormatTok->is(tok::l_brace)) {
1477 // C# may break after => if the next character is a newline.
1478 if (Style.isCSharp() && Style.BraceWrapping.AfterFunction == true) {
1479 // calling `addUnwrappedLine()` here causes odd parsing errors.
1480 FormatTok->MustBreakBefore = true;
1481 }
1482 parseChildBlock();
1483 }
1484 break;
1485 }
1486
1487 nextToken();
1488 if (FormatTok->Tok.is(tok::l_brace)) {
1489 // Block kind should probably be set to BK_BracedInit for any language.
1490 // C# needs this change to ensure that array initialisers and object
1491 // initialisers are indented the same way.
1492 if (Style.isCSharp())
1493 FormatTok->setBlockKind(BK_BracedInit);
1494 nextToken();
1495 parseBracedList();
1496 } else if (Style.Language == FormatStyle::LK_Proto &&
1497 FormatTok->Tok.is(tok::less)) {
1498 nextToken();
1499 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1500 /*ClosingBraceKind=*/tok::greater);
1501 }
1502 break;
1503 case tok::l_square:
1504 parseSquare();
1505 break;
1506 case tok::kw_new:
1507 parseNew();
1508 break;
1509 default:
1510 nextToken();
1511 break;
1512 }
1513 } while (!eof());
1514 }
1515
tryToParsePropertyAccessor()1516 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
1517 assert(FormatTok->is(tok::l_brace));
1518 if (!Style.isCSharp())
1519 return false;
1520 // See if it's a property accessor.
1521 if (FormatTok->Previous->isNot(tok::identifier))
1522 return false;
1523
1524 // See if we are inside a property accessor.
1525 //
1526 // Record the current tokenPosition so that we can advance and
1527 // reset the current token. `Next` is not set yet so we need
1528 // another way to advance along the token stream.
1529 unsigned int StoredPosition = Tokens->getPosition();
1530 FormatToken *Tok = Tokens->getNextToken();
1531
1532 // A trivial property accessor is of the form:
1533 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set] }
1534 // Track these as they do not require line breaks to be introduced.
1535 bool HasGetOrSet = false;
1536 bool IsTrivialPropertyAccessor = true;
1537 while (!eof()) {
1538 if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
1539 tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
1540 Keywords.kw_set)) {
1541 if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_set))
1542 HasGetOrSet = true;
1543 Tok = Tokens->getNextToken();
1544 continue;
1545 }
1546 if (Tok->isNot(tok::r_brace))
1547 IsTrivialPropertyAccessor = false;
1548 break;
1549 }
1550
1551 if (!HasGetOrSet) {
1552 Tokens->setPosition(StoredPosition);
1553 return false;
1554 }
1555
1556 // Try to parse the property accessor:
1557 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
1558 Tokens->setPosition(StoredPosition);
1559 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction == true)
1560 addUnwrappedLine();
1561 nextToken();
1562 do {
1563 switch (FormatTok->Tok.getKind()) {
1564 case tok::r_brace:
1565 nextToken();
1566 if (FormatTok->is(tok::equal)) {
1567 while (!eof() && FormatTok->isNot(tok::semi))
1568 nextToken();
1569 nextToken();
1570 }
1571 addUnwrappedLine();
1572 return true;
1573 case tok::l_brace:
1574 ++Line->Level;
1575 parseBlock(/*MustBeDeclaration=*/true);
1576 addUnwrappedLine();
1577 --Line->Level;
1578 break;
1579 case tok::equal:
1580 if (FormatTok->is(TT_JsFatArrow)) {
1581 ++Line->Level;
1582 do {
1583 nextToken();
1584 } while (!eof() && FormatTok->isNot(tok::semi));
1585 nextToken();
1586 addUnwrappedLine();
1587 --Line->Level;
1588 break;
1589 }
1590 nextToken();
1591 break;
1592 default:
1593 if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_set) &&
1594 !IsTrivialPropertyAccessor) {
1595 // Non-trivial get/set needs to be on its own line.
1596 addUnwrappedLine();
1597 }
1598 nextToken();
1599 }
1600 } while (!eof());
1601
1602 // Unreachable for well-formed code (paired '{' and '}').
1603 return true;
1604 }
1605
tryToParseLambda()1606 bool UnwrappedLineParser::tryToParseLambda() {
1607 if (!Style.isCpp()) {
1608 nextToken();
1609 return false;
1610 }
1611 assert(FormatTok->is(tok::l_square));
1612 FormatToken &LSquare = *FormatTok;
1613 if (!tryToParseLambdaIntroducer())
1614 return false;
1615
1616 bool SeenArrow = false;
1617
1618 while (FormatTok->isNot(tok::l_brace)) {
1619 if (FormatTok->isSimpleTypeSpecifier()) {
1620 nextToken();
1621 continue;
1622 }
1623 switch (FormatTok->Tok.getKind()) {
1624 case tok::l_brace:
1625 break;
1626 case tok::l_paren:
1627 parseParens();
1628 break;
1629 case tok::amp:
1630 case tok::star:
1631 case tok::kw_const:
1632 case tok::comma:
1633 case tok::less:
1634 case tok::greater:
1635 case tok::identifier:
1636 case tok::numeric_constant:
1637 case tok::coloncolon:
1638 case tok::kw_class:
1639 case tok::kw_mutable:
1640 case tok::kw_noexcept:
1641 case tok::kw_template:
1642 case tok::kw_typename:
1643 nextToken();
1644 break;
1645 // Specialization of a template with an integer parameter can contain
1646 // arithmetic, logical, comparison and ternary operators.
1647 //
1648 // FIXME: This also accepts sequences of operators that are not in the scope
1649 // of a template argument list.
1650 //
1651 // In a C++ lambda a template type can only occur after an arrow. We use
1652 // this as an heuristic to distinguish between Objective-C expressions
1653 // followed by an `a->b` expression, such as:
1654 // ([obj func:arg] + a->b)
1655 // Otherwise the code below would parse as a lambda.
1656 //
1657 // FIXME: This heuristic is incorrect for C++20 generic lambdas with
1658 // explicit template lists: []<bool b = true && false>(U &&u){}
1659 case tok::plus:
1660 case tok::minus:
1661 case tok::exclaim:
1662 case tok::tilde:
1663 case tok::slash:
1664 case tok::percent:
1665 case tok::lessless:
1666 case tok::pipe:
1667 case tok::pipepipe:
1668 case tok::ampamp:
1669 case tok::caret:
1670 case tok::equalequal:
1671 case tok::exclaimequal:
1672 case tok::greaterequal:
1673 case tok::lessequal:
1674 case tok::question:
1675 case tok::colon:
1676 case tok::ellipsis:
1677 case tok::kw_true:
1678 case tok::kw_false:
1679 if (SeenArrow) {
1680 nextToken();
1681 break;
1682 }
1683 return true;
1684 case tok::arrow:
1685 // This might or might not actually be a lambda arrow (this could be an
1686 // ObjC method invocation followed by a dereferencing arrow). We might
1687 // reset this back to TT_Unknown in TokenAnnotator.
1688 FormatTok->setType(TT_LambdaArrow);
1689 SeenArrow = true;
1690 nextToken();
1691 break;
1692 default:
1693 return true;
1694 }
1695 }
1696 FormatTok->setType(TT_LambdaLBrace);
1697 LSquare.setType(TT_LambdaLSquare);
1698 parseChildBlock();
1699 return true;
1700 }
1701
tryToParseLambdaIntroducer()1702 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1703 const FormatToken *Previous = FormatTok->Previous;
1704 if (Previous &&
1705 (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1706 tok::kw_delete, tok::l_square) ||
1707 FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1708 Previous->isSimpleTypeSpecifier())) {
1709 nextToken();
1710 return false;
1711 }
1712 nextToken();
1713 if (FormatTok->is(tok::l_square)) {
1714 return false;
1715 }
1716 parseSquare(/*LambdaIntroducer=*/true);
1717 return true;
1718 }
1719
tryToParseJSFunction()1720 void UnwrappedLineParser::tryToParseJSFunction() {
1721 assert(FormatTok->is(Keywords.kw_function) ||
1722 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1723 if (FormatTok->is(Keywords.kw_async))
1724 nextToken();
1725 // Consume "function".
1726 nextToken();
1727
1728 // Consume * (generator function). Treat it like C++'s overloaded operators.
1729 if (FormatTok->is(tok::star)) {
1730 FormatTok->setType(TT_OverloadedOperator);
1731 nextToken();
1732 }
1733
1734 // Consume function name.
1735 if (FormatTok->is(tok::identifier))
1736 nextToken();
1737
1738 if (FormatTok->isNot(tok::l_paren))
1739 return;
1740
1741 // Parse formal parameter list.
1742 parseParens();
1743
1744 if (FormatTok->is(tok::colon)) {
1745 // Parse a type definition.
1746 nextToken();
1747
1748 // Eat the type declaration. For braced inline object types, balance braces,
1749 // otherwise just parse until finding an l_brace for the function body.
1750 if (FormatTok->is(tok::l_brace))
1751 tryToParseBracedList();
1752 else
1753 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1754 nextToken();
1755 }
1756
1757 if (FormatTok->is(tok::semi))
1758 return;
1759
1760 parseChildBlock();
1761 }
1762
tryToParseBracedList()1763 bool UnwrappedLineParser::tryToParseBracedList() {
1764 if (FormatTok->is(BK_Unknown))
1765 calculateBraceTypes();
1766 assert(FormatTok->isNot(BK_Unknown));
1767 if (FormatTok->is(BK_Block))
1768 return false;
1769 nextToken();
1770 parseBracedList();
1771 return true;
1772 }
1773
parseBracedList(bool ContinueOnSemicolons,bool IsEnum,tok::TokenKind ClosingBraceKind)1774 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1775 bool IsEnum,
1776 tok::TokenKind ClosingBraceKind) {
1777 bool HasError = false;
1778
1779 // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1780 // replace this by using parseAssigmentExpression() inside.
1781 do {
1782 if (Style.isCSharp()) {
1783 if (FormatTok->is(TT_JsFatArrow)) {
1784 nextToken();
1785 // Fat arrows can be followed by simple expressions or by child blocks
1786 // in curly braces.
1787 if (FormatTok->is(tok::l_brace)) {
1788 parseChildBlock();
1789 continue;
1790 }
1791 }
1792 }
1793 if (Style.Language == FormatStyle::LK_JavaScript) {
1794 if (FormatTok->is(Keywords.kw_function) ||
1795 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1796 tryToParseJSFunction();
1797 continue;
1798 }
1799 if (FormatTok->is(TT_JsFatArrow)) {
1800 nextToken();
1801 // Fat arrows can be followed by simple expressions or by child blocks
1802 // in curly braces.
1803 if (FormatTok->is(tok::l_brace)) {
1804 parseChildBlock();
1805 continue;
1806 }
1807 }
1808 if (FormatTok->is(tok::l_brace)) {
1809 // Could be a method inside of a braced list `{a() { return 1; }}`.
1810 if (tryToParseBracedList())
1811 continue;
1812 parseChildBlock();
1813 }
1814 }
1815 if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1816 if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
1817 addUnwrappedLine();
1818 nextToken();
1819 return !HasError;
1820 }
1821 switch (FormatTok->Tok.getKind()) {
1822 case tok::caret:
1823 nextToken();
1824 if (FormatTok->is(tok::l_brace)) {
1825 parseChildBlock();
1826 }
1827 break;
1828 case tok::l_square:
1829 if (Style.isCSharp())
1830 parseSquare();
1831 else
1832 tryToParseLambda();
1833 break;
1834 case tok::l_paren:
1835 parseParens();
1836 // JavaScript can just have free standing methods and getters/setters in
1837 // object literals. Detect them by a "{" following ")".
1838 if (Style.Language == FormatStyle::LK_JavaScript) {
1839 if (FormatTok->is(tok::l_brace))
1840 parseChildBlock();
1841 break;
1842 }
1843 break;
1844 case tok::l_brace:
1845 // Assume there are no blocks inside a braced init list apart
1846 // from the ones we explicitly parse out (like lambdas).
1847 FormatTok->setBlockKind(BK_BracedInit);
1848 nextToken();
1849 parseBracedList();
1850 break;
1851 case tok::less:
1852 if (Style.Language == FormatStyle::LK_Proto) {
1853 nextToken();
1854 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1855 /*ClosingBraceKind=*/tok::greater);
1856 } else {
1857 nextToken();
1858 }
1859 break;
1860 case tok::semi:
1861 // JavaScript (or more precisely TypeScript) can have semicolons in braced
1862 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1863 // used for error recovery if we have otherwise determined that this is
1864 // a braced list.
1865 if (Style.Language == FormatStyle::LK_JavaScript) {
1866 nextToken();
1867 break;
1868 }
1869 HasError = true;
1870 if (!ContinueOnSemicolons)
1871 return !HasError;
1872 nextToken();
1873 break;
1874 case tok::comma:
1875 nextToken();
1876 if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
1877 addUnwrappedLine();
1878 break;
1879 default:
1880 nextToken();
1881 break;
1882 }
1883 } while (!eof());
1884 return false;
1885 }
1886
parseParens()1887 void UnwrappedLineParser::parseParens() {
1888 assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1889 nextToken();
1890 do {
1891 switch (FormatTok->Tok.getKind()) {
1892 case tok::l_paren:
1893 parseParens();
1894 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1895 parseChildBlock();
1896 break;
1897 case tok::r_paren:
1898 nextToken();
1899 return;
1900 case tok::r_brace:
1901 // A "}" inside parenthesis is an error if there wasn't a matching "{".
1902 return;
1903 case tok::l_square:
1904 tryToParseLambda();
1905 break;
1906 case tok::l_brace:
1907 if (!tryToParseBracedList())
1908 parseChildBlock();
1909 break;
1910 case tok::at:
1911 nextToken();
1912 if (FormatTok->Tok.is(tok::l_brace)) {
1913 nextToken();
1914 parseBracedList();
1915 }
1916 break;
1917 case tok::kw_class:
1918 if (Style.Language == FormatStyle::LK_JavaScript)
1919 parseRecord(/*ParseAsExpr=*/true);
1920 else
1921 nextToken();
1922 break;
1923 case tok::identifier:
1924 if (Style.Language == FormatStyle::LK_JavaScript &&
1925 (FormatTok->is(Keywords.kw_function) ||
1926 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1927 tryToParseJSFunction();
1928 else
1929 nextToken();
1930 break;
1931 default:
1932 nextToken();
1933 break;
1934 }
1935 } while (!eof());
1936 }
1937
parseSquare(bool LambdaIntroducer)1938 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
1939 if (!LambdaIntroducer) {
1940 assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1941 if (tryToParseLambda())
1942 return;
1943 }
1944 do {
1945 switch (FormatTok->Tok.getKind()) {
1946 case tok::l_paren:
1947 parseParens();
1948 break;
1949 case tok::r_square:
1950 nextToken();
1951 return;
1952 case tok::r_brace:
1953 // A "}" inside parenthesis is an error if there wasn't a matching "{".
1954 return;
1955 case tok::l_square:
1956 parseSquare();
1957 break;
1958 case tok::l_brace: {
1959 if (!tryToParseBracedList())
1960 parseChildBlock();
1961 break;
1962 }
1963 case tok::at:
1964 nextToken();
1965 if (FormatTok->Tok.is(tok::l_brace)) {
1966 nextToken();
1967 parseBracedList();
1968 }
1969 break;
1970 default:
1971 nextToken();
1972 break;
1973 }
1974 } while (!eof());
1975 }
1976
parseIfThenElse()1977 void UnwrappedLineParser::parseIfThenElse() {
1978 assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1979 nextToken();
1980 if (FormatTok->Tok.isOneOf(tok::kw_constexpr, tok::identifier))
1981 nextToken();
1982 if (FormatTok->Tok.is(tok::l_paren))
1983 parseParens();
1984 // handle [[likely]] / [[unlikely]]
1985 if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute())
1986 parseSquare();
1987 bool NeedsUnwrappedLine = false;
1988 if (FormatTok->Tok.is(tok::l_brace)) {
1989 CompoundStatementIndenter Indenter(this, Style, Line->Level);
1990 parseBlock(/*MustBeDeclaration=*/false);
1991 if (Style.BraceWrapping.BeforeElse)
1992 addUnwrappedLine();
1993 else
1994 NeedsUnwrappedLine = true;
1995 } else {
1996 addUnwrappedLine();
1997 ++Line->Level;
1998 parseStructuralElement();
1999 --Line->Level;
2000 }
2001 if (FormatTok->Tok.is(tok::kw_else)) {
2002 nextToken();
2003 // handle [[likely]] / [[unlikely]]
2004 if (FormatTok->Tok.is(tok::l_square) && tryToParseSimpleAttribute())
2005 parseSquare();
2006 if (FormatTok->Tok.is(tok::l_brace)) {
2007 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2008 parseBlock(/*MustBeDeclaration=*/false);
2009 addUnwrappedLine();
2010 } else if (FormatTok->Tok.is(tok::kw_if)) {
2011 parseIfThenElse();
2012 } else {
2013 addUnwrappedLine();
2014 ++Line->Level;
2015 parseStructuralElement();
2016 if (FormatTok->is(tok::eof))
2017 addUnwrappedLine();
2018 --Line->Level;
2019 }
2020 } else if (NeedsUnwrappedLine) {
2021 addUnwrappedLine();
2022 }
2023 }
2024
parseTryCatch()2025 void UnwrappedLineParser::parseTryCatch() {
2026 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2027 nextToken();
2028 bool NeedsUnwrappedLine = false;
2029 if (FormatTok->is(tok::colon)) {
2030 // We are in a function try block, what comes is an initializer list.
2031 nextToken();
2032
2033 // In case identifiers were removed by clang-tidy, what might follow is
2034 // multiple commas in sequence - before the first identifier.
2035 while (FormatTok->is(tok::comma))
2036 nextToken();
2037
2038 while (FormatTok->is(tok::identifier)) {
2039 nextToken();
2040 if (FormatTok->is(tok::l_paren))
2041 parseParens();
2042
2043 // In case identifiers were removed by clang-tidy, what might follow is
2044 // multiple commas in sequence - after the first identifier.
2045 while (FormatTok->is(tok::comma))
2046 nextToken();
2047 }
2048 }
2049 // Parse try with resource.
2050 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
2051 parseParens();
2052 }
2053 if (FormatTok->is(tok::l_brace)) {
2054 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2055 parseBlock(/*MustBeDeclaration=*/false);
2056 if (Style.BraceWrapping.BeforeCatch) {
2057 addUnwrappedLine();
2058 } else {
2059 NeedsUnwrappedLine = true;
2060 }
2061 } else if (!FormatTok->is(tok::kw_catch)) {
2062 // The C++ standard requires a compound-statement after a try.
2063 // If there's none, we try to assume there's a structuralElement
2064 // and try to continue.
2065 addUnwrappedLine();
2066 ++Line->Level;
2067 parseStructuralElement();
2068 --Line->Level;
2069 }
2070 while (1) {
2071 if (FormatTok->is(tok::at))
2072 nextToken();
2073 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2074 tok::kw___finally) ||
2075 ((Style.Language == FormatStyle::LK_Java ||
2076 Style.Language == FormatStyle::LK_JavaScript) &&
2077 FormatTok->is(Keywords.kw_finally)) ||
2078 (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
2079 FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
2080 break;
2081 nextToken();
2082 while (FormatTok->isNot(tok::l_brace)) {
2083 if (FormatTok->is(tok::l_paren)) {
2084 parseParens();
2085 continue;
2086 }
2087 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
2088 return;
2089 nextToken();
2090 }
2091 NeedsUnwrappedLine = false;
2092 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2093 parseBlock(/*MustBeDeclaration=*/false);
2094 if (Style.BraceWrapping.BeforeCatch)
2095 addUnwrappedLine();
2096 else
2097 NeedsUnwrappedLine = true;
2098 }
2099 if (NeedsUnwrappedLine)
2100 addUnwrappedLine();
2101 }
2102
parseNamespace()2103 void UnwrappedLineParser::parseNamespace() {
2104 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2105 "'namespace' expected");
2106
2107 const FormatToken &InitialToken = *FormatTok;
2108 nextToken();
2109 if (InitialToken.is(TT_NamespaceMacro)) {
2110 parseParens();
2111 } else {
2112 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2113 tok::l_square)) {
2114 if (FormatTok->is(tok::l_square))
2115 parseSquare();
2116 else
2117 nextToken();
2118 }
2119 }
2120 if (FormatTok->Tok.is(tok::l_brace)) {
2121 if (ShouldBreakBeforeBrace(Style, InitialToken))
2122 addUnwrappedLine();
2123
2124 bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
2125 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
2126 DeclarationScopeStack.size() > 1);
2127 parseBlock(/*MustBeDeclaration=*/true, AddLevel);
2128 // Munch the semicolon after a namespace. This is more common than one would
2129 // think. Putting the semicolon into its own line is very ugly.
2130 if (FormatTok->Tok.is(tok::semi))
2131 nextToken();
2132 addUnwrappedLine();
2133 }
2134 // FIXME: Add error handling.
2135 }
2136
parseNew()2137 void UnwrappedLineParser::parseNew() {
2138 assert(FormatTok->is(tok::kw_new) && "'new' expected");
2139 nextToken();
2140
2141 if (Style.isCSharp()) {
2142 do {
2143 if (FormatTok->is(tok::l_brace))
2144 parseBracedList();
2145
2146 if (FormatTok->isOneOf(tok::semi, tok::comma))
2147 return;
2148
2149 nextToken();
2150 } while (!eof());
2151 }
2152
2153 if (Style.Language != FormatStyle::LK_Java)
2154 return;
2155
2156 // In Java, we can parse everything up to the parens, which aren't optional.
2157 do {
2158 // There should not be a ;, { or } before the new's open paren.
2159 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
2160 return;
2161
2162 // Consume the parens.
2163 if (FormatTok->is(tok::l_paren)) {
2164 parseParens();
2165
2166 // If there is a class body of an anonymous class, consume that as child.
2167 if (FormatTok->is(tok::l_brace))
2168 parseChildBlock();
2169 return;
2170 }
2171 nextToken();
2172 } while (!eof());
2173 }
2174
parseForOrWhileLoop()2175 void UnwrappedLineParser::parseForOrWhileLoop() {
2176 assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
2177 "'for', 'while' or foreach macro expected");
2178 nextToken();
2179 // JS' for await ( ...
2180 if (Style.Language == FormatStyle::LK_JavaScript &&
2181 FormatTok->is(Keywords.kw_await))
2182 nextToken();
2183 if (FormatTok->Tok.is(tok::l_paren))
2184 parseParens();
2185 if (FormatTok->Tok.is(tok::l_brace)) {
2186 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2187 parseBlock(/*MustBeDeclaration=*/false);
2188 addUnwrappedLine();
2189 } else {
2190 addUnwrappedLine();
2191 ++Line->Level;
2192 parseStructuralElement();
2193 --Line->Level;
2194 }
2195 }
2196
parseDoWhile()2197 void UnwrappedLineParser::parseDoWhile() {
2198 assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
2199 nextToken();
2200 if (FormatTok->Tok.is(tok::l_brace)) {
2201 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2202 parseBlock(/*MustBeDeclaration=*/false);
2203 if (Style.BraceWrapping.BeforeWhile)
2204 addUnwrappedLine();
2205 } else {
2206 addUnwrappedLine();
2207 ++Line->Level;
2208 parseStructuralElement();
2209 --Line->Level;
2210 }
2211
2212 // FIXME: Add error handling.
2213 if (!FormatTok->Tok.is(tok::kw_while)) {
2214 addUnwrappedLine();
2215 return;
2216 }
2217
2218 nextToken();
2219 parseStructuralElement();
2220 }
2221
parseLabel(bool LeftAlignLabel)2222 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
2223 nextToken();
2224 unsigned OldLineLevel = Line->Level;
2225 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
2226 --Line->Level;
2227 if (LeftAlignLabel)
2228 Line->Level = 0;
2229 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
2230 FormatTok->Tok.is(tok::l_brace)) {
2231 CompoundStatementIndenter Indenter(this, Line->Level,
2232 Style.BraceWrapping.AfterCaseLabel,
2233 Style.BraceWrapping.IndentBraces);
2234 parseBlock(/*MustBeDeclaration=*/false);
2235 if (FormatTok->Tok.is(tok::kw_break)) {
2236 if (Style.BraceWrapping.AfterControlStatement ==
2237 FormatStyle::BWACS_Always) {
2238 addUnwrappedLine();
2239 if (!Style.IndentCaseBlocks &&
2240 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
2241 Line->Level++;
2242 }
2243 }
2244 parseStructuralElement();
2245 }
2246 addUnwrappedLine();
2247 } else {
2248 if (FormatTok->is(tok::semi))
2249 nextToken();
2250 addUnwrappedLine();
2251 }
2252 Line->Level = OldLineLevel;
2253 if (FormatTok->isNot(tok::l_brace)) {
2254 parseStructuralElement();
2255 addUnwrappedLine();
2256 }
2257 }
2258
parseCaseLabel()2259 void UnwrappedLineParser::parseCaseLabel() {
2260 assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
2261 // FIXME: fix handling of complex expressions here.
2262 do {
2263 nextToken();
2264 } while (!eof() && !FormatTok->Tok.is(tok::colon));
2265 parseLabel();
2266 }
2267
parseSwitch()2268 void UnwrappedLineParser::parseSwitch() {
2269 assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
2270 nextToken();
2271 if (FormatTok->Tok.is(tok::l_paren))
2272 parseParens();
2273 if (FormatTok->Tok.is(tok::l_brace)) {
2274 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2275 parseBlock(/*MustBeDeclaration=*/false);
2276 addUnwrappedLine();
2277 } else {
2278 addUnwrappedLine();
2279 ++Line->Level;
2280 parseStructuralElement();
2281 --Line->Level;
2282 }
2283 }
2284
parseAccessSpecifier()2285 void UnwrappedLineParser::parseAccessSpecifier() {
2286 nextToken();
2287 // Understand Qt's slots.
2288 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
2289 nextToken();
2290 // Otherwise, we don't know what it is, and we'd better keep the next token.
2291 if (FormatTok->Tok.is(tok::colon))
2292 nextToken();
2293 addUnwrappedLine();
2294 }
2295
parseConcept()2296 void UnwrappedLineParser::parseConcept() {
2297 assert(FormatTok->Tok.is(tok::kw_concept) && "'concept' expected");
2298 nextToken();
2299 if (!FormatTok->Tok.is(tok::identifier))
2300 return;
2301 nextToken();
2302 if (!FormatTok->Tok.is(tok::equal))
2303 return;
2304 nextToken();
2305 if (FormatTok->Tok.is(tok::kw_requires)) {
2306 nextToken();
2307 parseRequiresExpression(Line->Level);
2308 } else {
2309 parseConstraintExpression(Line->Level);
2310 }
2311 }
2312
parseRequiresExpression(unsigned int OriginalLevel)2313 void UnwrappedLineParser::parseRequiresExpression(unsigned int OriginalLevel) {
2314 // requires (R range)
2315 if (FormatTok->Tok.is(tok::l_paren)) {
2316 parseParens();
2317 if (Style.IndentRequires && OriginalLevel != Line->Level) {
2318 addUnwrappedLine();
2319 --Line->Level;
2320 }
2321 }
2322
2323 if (FormatTok->Tok.is(tok::l_brace)) {
2324 if (Style.BraceWrapping.AfterFunction)
2325 addUnwrappedLine();
2326 FormatTok->setType(TT_FunctionLBrace);
2327 parseBlock(/*MustBeDeclaration=*/false);
2328 addUnwrappedLine();
2329 } else {
2330 parseConstraintExpression(OriginalLevel);
2331 }
2332 }
2333
parseConstraintExpression(unsigned int OriginalLevel)2334 void UnwrappedLineParser::parseConstraintExpression(
2335 unsigned int OriginalLevel) {
2336 // requires Id<T> && Id<T> || Id<T>
2337 while (
2338 FormatTok->isOneOf(tok::identifier, tok::kw_requires, tok::coloncolon)) {
2339 nextToken();
2340 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::less,
2341 tok::greater, tok::comma, tok::ellipsis)) {
2342 if (FormatTok->Tok.is(tok::less)) {
2343 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2344 /*ClosingBraceKind=*/tok::greater);
2345 continue;
2346 }
2347 nextToken();
2348 }
2349 if (FormatTok->Tok.is(tok::kw_requires)) {
2350 parseRequiresExpression(OriginalLevel);
2351 }
2352 if (FormatTok->Tok.is(tok::less)) {
2353 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2354 /*ClosingBraceKind=*/tok::greater);
2355 }
2356
2357 if (FormatTok->Tok.is(tok::l_paren)) {
2358 parseParens();
2359 }
2360 if (FormatTok->Tok.is(tok::l_brace)) {
2361 if (Style.BraceWrapping.AfterFunction)
2362 addUnwrappedLine();
2363 FormatTok->setType(TT_FunctionLBrace);
2364 parseBlock(/*MustBeDeclaration=*/false);
2365 }
2366 if (FormatTok->Tok.is(tok::semi)) {
2367 // Eat any trailing semi.
2368 nextToken();
2369 addUnwrappedLine();
2370 }
2371 if (FormatTok->Tok.is(tok::colon)) {
2372 return;
2373 }
2374 if (!FormatTok->Tok.isOneOf(tok::ampamp, tok::pipepipe)) {
2375 if (FormatTok->Previous &&
2376 !FormatTok->Previous->isOneOf(tok::identifier, tok::kw_requires,
2377 tok::coloncolon)) {
2378 addUnwrappedLine();
2379 }
2380 if (Style.IndentRequires && OriginalLevel != Line->Level) {
2381 --Line->Level;
2382 }
2383 break;
2384 } else {
2385 FormatTok->setType(TT_ConstraintJunctions);
2386 }
2387
2388 nextToken();
2389 }
2390 }
2391
parseRequires()2392 void UnwrappedLineParser::parseRequires() {
2393 assert(FormatTok->Tok.is(tok::kw_requires) && "'requires' expected");
2394
2395 unsigned OriginalLevel = Line->Level;
2396 if (FormatTok->Previous && FormatTok->Previous->is(tok::greater)) {
2397 addUnwrappedLine();
2398 if (Style.IndentRequires) {
2399 Line->Level++;
2400 }
2401 }
2402 nextToken();
2403
2404 parseRequiresExpression(OriginalLevel);
2405 }
2406
parseEnum()2407 bool UnwrappedLineParser::parseEnum() {
2408 // Won't be 'enum' for NS_ENUMs.
2409 if (FormatTok->Tok.is(tok::kw_enum))
2410 nextToken();
2411
2412 // In TypeScript, "enum" can also be used as property name, e.g. in interface
2413 // declarations. An "enum" keyword followed by a colon would be a syntax
2414 // error and thus assume it is just an identifier.
2415 if (Style.Language == FormatStyle::LK_JavaScript &&
2416 FormatTok->isOneOf(tok::colon, tok::question))
2417 return false;
2418
2419 // In protobuf, "enum" can be used as a field name.
2420 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
2421 return false;
2422
2423 // Eat up enum class ...
2424 if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
2425 nextToken();
2426
2427 while (FormatTok->Tok.getIdentifierInfo() ||
2428 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
2429 tok::greater, tok::comma, tok::question)) {
2430 nextToken();
2431 // We can have macros or attributes in between 'enum' and the enum name.
2432 if (FormatTok->is(tok::l_paren))
2433 parseParens();
2434 if (FormatTok->is(tok::identifier)) {
2435 nextToken();
2436 // If there are two identifiers in a row, this is likely an elaborate
2437 // return type. In Java, this can be "implements", etc.
2438 if (Style.isCpp() && FormatTok->is(tok::identifier))
2439 return false;
2440 }
2441 }
2442
2443 // Just a declaration or something is wrong.
2444 if (FormatTok->isNot(tok::l_brace))
2445 return true;
2446 FormatTok->setBlockKind(BK_Block);
2447
2448 if (Style.Language == FormatStyle::LK_Java) {
2449 // Java enums are different.
2450 parseJavaEnumBody();
2451 return true;
2452 }
2453 if (Style.Language == FormatStyle::LK_Proto) {
2454 parseBlock(/*MustBeDeclaration=*/true);
2455 return true;
2456 }
2457
2458 if (!Style.AllowShortEnumsOnASingleLine)
2459 addUnwrappedLine();
2460 // Parse enum body.
2461 nextToken();
2462 if (!Style.AllowShortEnumsOnASingleLine) {
2463 addUnwrappedLine();
2464 Line->Level += 1;
2465 }
2466 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
2467 /*IsEnum=*/true);
2468 if (!Style.AllowShortEnumsOnASingleLine)
2469 Line->Level -= 1;
2470 if (HasError) {
2471 if (FormatTok->is(tok::semi))
2472 nextToken();
2473 addUnwrappedLine();
2474 }
2475 return true;
2476
2477 // There is no addUnwrappedLine() here so that we fall through to parsing a
2478 // structural element afterwards. Thus, in "enum A {} n, m;",
2479 // "} n, m;" will end up in one unwrapped line.
2480 }
2481
2482 namespace {
2483 // A class used to set and restore the Token position when peeking
2484 // ahead in the token source.
2485 class ScopedTokenPosition {
2486 unsigned StoredPosition;
2487 FormatTokenSource *Tokens;
2488
2489 public:
ScopedTokenPosition(FormatTokenSource * Tokens)2490 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
2491 assert(Tokens && "Tokens expected to not be null");
2492 StoredPosition = Tokens->getPosition();
2493 }
2494
~ScopedTokenPosition()2495 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
2496 };
2497 } // namespace
2498
2499 // Look to see if we have [[ by looking ahead, if
2500 // its not then rewind to the original position.
tryToParseSimpleAttribute()2501 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
2502 ScopedTokenPosition AutoPosition(Tokens);
2503 FormatToken *Tok = Tokens->getNextToken();
2504 // We already read the first [ check for the second.
2505 if (Tok && !Tok->is(tok::l_square)) {
2506 return false;
2507 }
2508 // Double check that the attribute is just something
2509 // fairly simple.
2510 while (Tok) {
2511 if (Tok->is(tok::r_square)) {
2512 break;
2513 }
2514 Tok = Tokens->getNextToken();
2515 }
2516 Tok = Tokens->getNextToken();
2517 if (Tok && !Tok->is(tok::r_square)) {
2518 return false;
2519 }
2520 Tok = Tokens->getNextToken();
2521 if (Tok && Tok->is(tok::semi)) {
2522 return false;
2523 }
2524 return true;
2525 }
2526
parseJavaEnumBody()2527 void UnwrappedLineParser::parseJavaEnumBody() {
2528 // Determine whether the enum is simple, i.e. does not have a semicolon or
2529 // constants with class bodies. Simple enums can be formatted like braced
2530 // lists, contracted to a single line, etc.
2531 unsigned StoredPosition = Tokens->getPosition();
2532 bool IsSimple = true;
2533 FormatToken *Tok = Tokens->getNextToken();
2534 while (Tok) {
2535 if (Tok->is(tok::r_brace))
2536 break;
2537 if (Tok->isOneOf(tok::l_brace, tok::semi)) {
2538 IsSimple = false;
2539 break;
2540 }
2541 // FIXME: This will also mark enums with braces in the arguments to enum
2542 // constants as "not simple". This is probably fine in practice, though.
2543 Tok = Tokens->getNextToken();
2544 }
2545 FormatTok = Tokens->setPosition(StoredPosition);
2546
2547 if (IsSimple) {
2548 nextToken();
2549 parseBracedList();
2550 addUnwrappedLine();
2551 return;
2552 }
2553
2554 // Parse the body of a more complex enum.
2555 // First add a line for everything up to the "{".
2556 nextToken();
2557 addUnwrappedLine();
2558 ++Line->Level;
2559
2560 // Parse the enum constants.
2561 while (FormatTok) {
2562 if (FormatTok->is(tok::l_brace)) {
2563 // Parse the constant's class body.
2564 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2565 /*MunchSemi=*/false);
2566 } else if (FormatTok->is(tok::l_paren)) {
2567 parseParens();
2568 } else if (FormatTok->is(tok::comma)) {
2569 nextToken();
2570 addUnwrappedLine();
2571 } else if (FormatTok->is(tok::semi)) {
2572 nextToken();
2573 addUnwrappedLine();
2574 break;
2575 } else if (FormatTok->is(tok::r_brace)) {
2576 addUnwrappedLine();
2577 break;
2578 } else {
2579 nextToken();
2580 }
2581 }
2582
2583 // Parse the class body after the enum's ";" if any.
2584 parseLevel(/*HasOpeningBrace=*/true);
2585 nextToken();
2586 --Line->Level;
2587 addUnwrappedLine();
2588 }
2589
parseRecord(bool ParseAsExpr)2590 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
2591 const FormatToken &InitialToken = *FormatTok;
2592 nextToken();
2593
2594 // The actual identifier can be a nested name specifier, and in macros
2595 // it is often token-pasted.
2596 // An [[attribute]] can be before the identifier.
2597 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
2598 tok::kw___attribute, tok::kw___declspec,
2599 tok::kw_alignas, tok::l_square, tok::r_square) ||
2600 ((Style.Language == FormatStyle::LK_Java ||
2601 Style.Language == FormatStyle::LK_JavaScript) &&
2602 FormatTok->isOneOf(tok::period, tok::comma))) {
2603 if (Style.Language == FormatStyle::LK_JavaScript &&
2604 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
2605 // JavaScript/TypeScript supports inline object types in
2606 // extends/implements positions:
2607 // class Foo implements {bar: number} { }
2608 nextToken();
2609 if (FormatTok->is(tok::l_brace)) {
2610 tryToParseBracedList();
2611 continue;
2612 }
2613 }
2614 bool IsNonMacroIdentifier =
2615 FormatTok->is(tok::identifier) &&
2616 FormatTok->TokenText != FormatTok->TokenText.upper();
2617 nextToken();
2618 // We can have macros or attributes in between 'class' and the class name.
2619 if (!IsNonMacroIdentifier) {
2620 if (FormatTok->Tok.is(tok::l_paren)) {
2621 parseParens();
2622 } else if (FormatTok->is(TT_AttributeSquare)) {
2623 parseSquare();
2624 // Consume the closing TT_AttributeSquare.
2625 if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
2626 nextToken();
2627 }
2628 }
2629 }
2630
2631 // Note that parsing away template declarations here leads to incorrectly
2632 // accepting function declarations as record declarations.
2633 // In general, we cannot solve this problem. Consider:
2634 // class A<int> B() {}
2635 // which can be a function definition or a class definition when B() is a
2636 // macro. If we find enough real-world cases where this is a problem, we
2637 // can parse for the 'template' keyword in the beginning of the statement,
2638 // and thus rule out the record production in case there is no template
2639 // (this would still leave us with an ambiguity between template function
2640 // and class declarations).
2641 if (FormatTok->isOneOf(tok::colon, tok::less)) {
2642 while (!eof()) {
2643 if (FormatTok->is(tok::l_brace)) {
2644 calculateBraceTypes(/*ExpectClassBody=*/true);
2645 if (!tryToParseBracedList())
2646 break;
2647 }
2648 if (FormatTok->Tok.is(tok::semi))
2649 return;
2650 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
2651 addUnwrappedLine();
2652 nextToken();
2653 parseCSharpGenericTypeConstraint();
2654 break;
2655 }
2656 nextToken();
2657 }
2658 }
2659 if (FormatTok->Tok.is(tok::l_brace)) {
2660 if (ParseAsExpr) {
2661 parseChildBlock();
2662 } else {
2663 if (ShouldBreakBeforeBrace(Style, InitialToken))
2664 addUnwrappedLine();
2665
2666 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2667 /*MunchSemi=*/false);
2668 }
2669 }
2670 // There is no addUnwrappedLine() here so that we fall through to parsing a
2671 // structural element afterwards. Thus, in "class A {} n, m;",
2672 // "} n, m;" will end up in one unwrapped line.
2673 }
2674
parseObjCMethod()2675 void UnwrappedLineParser::parseObjCMethod() {
2676 assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
2677 "'(' or identifier expected.");
2678 do {
2679 if (FormatTok->Tok.is(tok::semi)) {
2680 nextToken();
2681 addUnwrappedLine();
2682 return;
2683 } else if (FormatTok->Tok.is(tok::l_brace)) {
2684 if (Style.BraceWrapping.AfterFunction)
2685 addUnwrappedLine();
2686 parseBlock(/*MustBeDeclaration=*/false);
2687 addUnwrappedLine();
2688 return;
2689 } else {
2690 nextToken();
2691 }
2692 } while (!eof());
2693 }
2694
parseObjCProtocolList()2695 void UnwrappedLineParser::parseObjCProtocolList() {
2696 assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
2697 do {
2698 nextToken();
2699 // Early exit in case someone forgot a close angle.
2700 if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2701 FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2702 return;
2703 } while (!eof() && FormatTok->Tok.isNot(tok::greater));
2704 nextToken(); // Skip '>'.
2705 }
2706
parseObjCUntilAtEnd()2707 void UnwrappedLineParser::parseObjCUntilAtEnd() {
2708 do {
2709 if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2710 nextToken();
2711 addUnwrappedLine();
2712 break;
2713 }
2714 if (FormatTok->is(tok::l_brace)) {
2715 parseBlock(/*MustBeDeclaration=*/false);
2716 // In ObjC interfaces, nothing should be following the "}".
2717 addUnwrappedLine();
2718 } else if (FormatTok->is(tok::r_brace)) {
2719 // Ignore stray "}". parseStructuralElement doesn't consume them.
2720 nextToken();
2721 addUnwrappedLine();
2722 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
2723 nextToken();
2724 parseObjCMethod();
2725 } else {
2726 parseStructuralElement();
2727 }
2728 } while (!eof());
2729 }
2730
parseObjCInterfaceOrImplementation()2731 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2732 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
2733 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
2734 nextToken();
2735 nextToken(); // interface name
2736
2737 // @interface can be followed by a lightweight generic
2738 // specialization list, then either a base class or a category.
2739 if (FormatTok->Tok.is(tok::less)) {
2740 parseObjCLightweightGenerics();
2741 }
2742 if (FormatTok->Tok.is(tok::colon)) {
2743 nextToken();
2744 nextToken(); // base class name
2745 // The base class can also have lightweight generics applied to it.
2746 if (FormatTok->Tok.is(tok::less)) {
2747 parseObjCLightweightGenerics();
2748 }
2749 } else if (FormatTok->Tok.is(tok::l_paren))
2750 // Skip category, if present.
2751 parseParens();
2752
2753 if (FormatTok->Tok.is(tok::less))
2754 parseObjCProtocolList();
2755
2756 if (FormatTok->Tok.is(tok::l_brace)) {
2757 if (Style.BraceWrapping.AfterObjCDeclaration)
2758 addUnwrappedLine();
2759 parseBlock(/*MustBeDeclaration=*/true);
2760 }
2761
2762 // With instance variables, this puts '}' on its own line. Without instance
2763 // variables, this ends the @interface line.
2764 addUnwrappedLine();
2765
2766 parseObjCUntilAtEnd();
2767 }
2768
parseObjCLightweightGenerics()2769 void UnwrappedLineParser::parseObjCLightweightGenerics() {
2770 assert(FormatTok->Tok.is(tok::less));
2771 // Unlike protocol lists, generic parameterizations support
2772 // nested angles:
2773 //
2774 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
2775 // NSObject <NSCopying, NSSecureCoding>
2776 //
2777 // so we need to count how many open angles we have left.
2778 unsigned NumOpenAngles = 1;
2779 do {
2780 nextToken();
2781 // Early exit in case someone forgot a close angle.
2782 if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2783 FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2784 break;
2785 if (FormatTok->Tok.is(tok::less))
2786 ++NumOpenAngles;
2787 else if (FormatTok->Tok.is(tok::greater)) {
2788 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
2789 --NumOpenAngles;
2790 }
2791 } while (!eof() && NumOpenAngles != 0);
2792 nextToken(); // Skip '>'.
2793 }
2794
2795 // Returns true for the declaration/definition form of @protocol,
2796 // false for the expression form.
parseObjCProtocol()2797 bool UnwrappedLineParser::parseObjCProtocol() {
2798 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
2799 nextToken();
2800
2801 if (FormatTok->is(tok::l_paren))
2802 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
2803 return false;
2804
2805 // The definition/declaration form,
2806 // @protocol Foo
2807 // - (int)someMethod;
2808 // @end
2809
2810 nextToken(); // protocol name
2811
2812 if (FormatTok->Tok.is(tok::less))
2813 parseObjCProtocolList();
2814
2815 // Check for protocol declaration.
2816 if (FormatTok->Tok.is(tok::semi)) {
2817 nextToken();
2818 addUnwrappedLine();
2819 return true;
2820 }
2821
2822 addUnwrappedLine();
2823 parseObjCUntilAtEnd();
2824 return true;
2825 }
2826
parseJavaScriptEs6ImportExport()2827 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
2828 bool IsImport = FormatTok->is(Keywords.kw_import);
2829 assert(IsImport || FormatTok->is(tok::kw_export));
2830 nextToken();
2831
2832 // Consume the "default" in "export default class/function".
2833 if (FormatTok->is(tok::kw_default))
2834 nextToken();
2835
2836 // Consume "async function", "function" and "default function", so that these
2837 // get parsed as free-standing JS functions, i.e. do not require a trailing
2838 // semicolon.
2839 if (FormatTok->is(Keywords.kw_async))
2840 nextToken();
2841 if (FormatTok->is(Keywords.kw_function)) {
2842 nextToken();
2843 return;
2844 }
2845
2846 // For imports, `export *`, `export {...}`, consume the rest of the line up
2847 // to the terminating `;`. For everything else, just return and continue
2848 // parsing the structural element, i.e. the declaration or expression for
2849 // `export default`.
2850 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
2851 !FormatTok->isStringLiteral())
2852 return;
2853
2854 while (!eof()) {
2855 if (FormatTok->is(tok::semi))
2856 return;
2857 if (Line->Tokens.empty()) {
2858 // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2859 // import statement should terminate.
2860 return;
2861 }
2862 if (FormatTok->is(tok::l_brace)) {
2863 FormatTok->setBlockKind(BK_Block);
2864 nextToken();
2865 parseBracedList();
2866 } else {
2867 nextToken();
2868 }
2869 }
2870 }
2871
parseStatementMacro()2872 void UnwrappedLineParser::parseStatementMacro() {
2873 nextToken();
2874 if (FormatTok->is(tok::l_paren))
2875 parseParens();
2876 if (FormatTok->is(tok::semi))
2877 nextToken();
2878 addUnwrappedLine();
2879 }
2880
printDebugInfo(const UnwrappedLine & Line,StringRef Prefix="")2881 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2882 StringRef Prefix = "") {
2883 llvm::dbgs() << Prefix << "Line(" << Line.Level
2884 << ", FSC=" << Line.FirstStartColumn << ")"
2885 << (Line.InPPDirective ? " MACRO" : "") << ": ";
2886 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2887 E = Line.Tokens.end();
2888 I != E; ++I) {
2889 llvm::dbgs() << I->Tok->Tok.getName() << "["
2890 << "T=" << (unsigned)I->Tok->getType()
2891 << ", OC=" << I->Tok->OriginalColumn << "] ";
2892 }
2893 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2894 E = Line.Tokens.end();
2895 I != E; ++I) {
2896 const UnwrappedLineNode &Node = *I;
2897 for (SmallVectorImpl<UnwrappedLine>::const_iterator
2898 I = Node.Children.begin(),
2899 E = Node.Children.end();
2900 I != E; ++I) {
2901 printDebugInfo(*I, "\nChild: ");
2902 }
2903 }
2904 llvm::dbgs() << "\n";
2905 }
2906
addUnwrappedLine()2907 void UnwrappedLineParser::addUnwrappedLine() {
2908 if (Line->Tokens.empty())
2909 return;
2910 LLVM_DEBUG({
2911 if (CurrentLines == &Lines)
2912 printDebugInfo(*Line);
2913 });
2914 CurrentLines->push_back(std::move(*Line));
2915 Line->Tokens.clear();
2916 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
2917 Line->FirstStartColumn = 0;
2918 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2919 CurrentLines->append(
2920 std::make_move_iterator(PreprocessorDirectives.begin()),
2921 std::make_move_iterator(PreprocessorDirectives.end()));
2922 PreprocessorDirectives.clear();
2923 }
2924 // Disconnect the current token from the last token on the previous line.
2925 FormatTok->Previous = nullptr;
2926 }
2927
eof() const2928 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2929
isOnNewLine(const FormatToken & FormatTok)2930 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2931 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2932 FormatTok.NewlinesBefore > 0;
2933 }
2934
2935 // Checks if \p FormatTok is a line comment that continues the line comment
2936 // section on \p Line.
2937 static bool
continuesLineCommentSection(const FormatToken & FormatTok,const UnwrappedLine & Line,const llvm::Regex & CommentPragmasRegex)2938 continuesLineCommentSection(const FormatToken &FormatTok,
2939 const UnwrappedLine &Line,
2940 const llvm::Regex &CommentPragmasRegex) {
2941 if (Line.Tokens.empty())
2942 return false;
2943
2944 StringRef IndentContent = FormatTok.TokenText;
2945 if (FormatTok.TokenText.startswith("//") ||
2946 FormatTok.TokenText.startswith("/*"))
2947 IndentContent = FormatTok.TokenText.substr(2);
2948 if (CommentPragmasRegex.match(IndentContent))
2949 return false;
2950
2951 // If Line starts with a line comment, then FormatTok continues the comment
2952 // section if its original column is greater or equal to the original start
2953 // column of the line.
2954 //
2955 // Define the min column token of a line as follows: if a line ends in '{' or
2956 // contains a '{' followed by a line comment, then the min column token is
2957 // that '{'. Otherwise, the min column token of the line is the first token of
2958 // the line.
2959 //
2960 // If Line starts with a token other than a line comment, then FormatTok
2961 // continues the comment section if its original column is greater than the
2962 // original start column of the min column token of the line.
2963 //
2964 // For example, the second line comment continues the first in these cases:
2965 //
2966 // // first line
2967 // // second line
2968 //
2969 // and:
2970 //
2971 // // first line
2972 // // second line
2973 //
2974 // and:
2975 //
2976 // int i; // first line
2977 // // second line
2978 //
2979 // and:
2980 //
2981 // do { // first line
2982 // // second line
2983 // int i;
2984 // } while (true);
2985 //
2986 // and:
2987 //
2988 // enum {
2989 // a, // first line
2990 // // second line
2991 // b
2992 // };
2993 //
2994 // The second line comment doesn't continue the first in these cases:
2995 //
2996 // // first line
2997 // // second line
2998 //
2999 // and:
3000 //
3001 // int i; // first line
3002 // // second line
3003 //
3004 // and:
3005 //
3006 // do { // first line
3007 // // second line
3008 // int i;
3009 // } while (true);
3010 //
3011 // and:
3012 //
3013 // enum {
3014 // a, // first line
3015 // // second line
3016 // };
3017 const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
3018
3019 // Scan for '{//'. If found, use the column of '{' as a min column for line
3020 // comment section continuation.
3021 const FormatToken *PreviousToken = nullptr;
3022 for (const UnwrappedLineNode &Node : Line.Tokens) {
3023 if (PreviousToken && PreviousToken->is(tok::l_brace) &&
3024 isLineComment(*Node.Tok)) {
3025 MinColumnToken = PreviousToken;
3026 break;
3027 }
3028 PreviousToken = Node.Tok;
3029
3030 // Grab the last newline preceding a token in this unwrapped line.
3031 if (Node.Tok->NewlinesBefore > 0) {
3032 MinColumnToken = Node.Tok;
3033 }
3034 }
3035 if (PreviousToken && PreviousToken->is(tok::l_brace)) {
3036 MinColumnToken = PreviousToken;
3037 }
3038
3039 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
3040 MinColumnToken);
3041 }
3042
flushComments(bool NewlineBeforeNext)3043 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
3044 bool JustComments = Line->Tokens.empty();
3045 for (SmallVectorImpl<FormatToken *>::const_iterator
3046 I = CommentsBeforeNextToken.begin(),
3047 E = CommentsBeforeNextToken.end();
3048 I != E; ++I) {
3049 // Line comments that belong to the same line comment section are put on the
3050 // same line since later we might want to reflow content between them.
3051 // Additional fine-grained breaking of line comment sections is controlled
3052 // by the class BreakableLineCommentSection in case it is desirable to keep
3053 // several line comment sections in the same unwrapped line.
3054 //
3055 // FIXME: Consider putting separate line comment sections as children to the
3056 // unwrapped line instead.
3057 (*I)->ContinuesLineCommentSection =
3058 continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
3059 if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
3060 addUnwrappedLine();
3061 pushToken(*I);
3062 }
3063 if (NewlineBeforeNext && JustComments)
3064 addUnwrappedLine();
3065 CommentsBeforeNextToken.clear();
3066 }
3067
nextToken(int LevelDifference)3068 void UnwrappedLineParser::nextToken(int LevelDifference) {
3069 if (eof())
3070 return;
3071 flushComments(isOnNewLine(*FormatTok));
3072 pushToken(FormatTok);
3073 FormatToken *Previous = FormatTok;
3074 if (Style.Language != FormatStyle::LK_JavaScript)
3075 readToken(LevelDifference);
3076 else
3077 readTokenWithJavaScriptASI();
3078 FormatTok->Previous = Previous;
3079 }
3080
distributeComments(const SmallVectorImpl<FormatToken * > & Comments,const FormatToken * NextTok)3081 void UnwrappedLineParser::distributeComments(
3082 const SmallVectorImpl<FormatToken *> &Comments,
3083 const FormatToken *NextTok) {
3084 // Whether or not a line comment token continues a line is controlled by
3085 // the method continuesLineCommentSection, with the following caveat:
3086 //
3087 // Define a trail of Comments to be a nonempty proper postfix of Comments such
3088 // that each comment line from the trail is aligned with the next token, if
3089 // the next token exists. If a trail exists, the beginning of the maximal
3090 // trail is marked as a start of a new comment section.
3091 //
3092 // For example in this code:
3093 //
3094 // int a; // line about a
3095 // // line 1 about b
3096 // // line 2 about b
3097 // int b;
3098 //
3099 // the two lines about b form a maximal trail, so there are two sections, the
3100 // first one consisting of the single comment "// line about a" and the
3101 // second one consisting of the next two comments.
3102 if (Comments.empty())
3103 return;
3104 bool ShouldPushCommentsInCurrentLine = true;
3105 bool HasTrailAlignedWithNextToken = false;
3106 unsigned StartOfTrailAlignedWithNextToken = 0;
3107 if (NextTok) {
3108 // We are skipping the first element intentionally.
3109 for (unsigned i = Comments.size() - 1; i > 0; --i) {
3110 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
3111 HasTrailAlignedWithNextToken = true;
3112 StartOfTrailAlignedWithNextToken = i;
3113 }
3114 }
3115 }
3116 for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
3117 FormatToken *FormatTok = Comments[i];
3118 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
3119 FormatTok->ContinuesLineCommentSection = false;
3120 } else {
3121 FormatTok->ContinuesLineCommentSection =
3122 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
3123 }
3124 if (!FormatTok->ContinuesLineCommentSection &&
3125 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
3126 ShouldPushCommentsInCurrentLine = false;
3127 }
3128 if (ShouldPushCommentsInCurrentLine) {
3129 pushToken(FormatTok);
3130 } else {
3131 CommentsBeforeNextToken.push_back(FormatTok);
3132 }
3133 }
3134 }
3135
readToken(int LevelDifference)3136 void UnwrappedLineParser::readToken(int LevelDifference) {
3137 SmallVector<FormatToken *, 1> Comments;
3138 do {
3139 FormatTok = Tokens->getNextToken();
3140 assert(FormatTok);
3141 while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
3142 (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
3143 distributeComments(Comments, FormatTok);
3144 Comments.clear();
3145 // If there is an unfinished unwrapped line, we flush the preprocessor
3146 // directives only after that unwrapped line was finished later.
3147 bool SwitchToPreprocessorLines = !Line->Tokens.empty();
3148 ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
3149 assert((LevelDifference >= 0 ||
3150 static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
3151 "LevelDifference makes Line->Level negative");
3152 Line->Level += LevelDifference;
3153 // Comments stored before the preprocessor directive need to be output
3154 // before the preprocessor directive, at the same level as the
3155 // preprocessor directive, as we consider them to apply to the directive.
3156 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
3157 PPBranchLevel > 0)
3158 Line->Level += PPBranchLevel;
3159 flushComments(isOnNewLine(*FormatTok));
3160 parsePPDirective();
3161 }
3162 while (FormatTok->getType() == TT_ConflictStart ||
3163 FormatTok->getType() == TT_ConflictEnd ||
3164 FormatTok->getType() == TT_ConflictAlternative) {
3165 if (FormatTok->getType() == TT_ConflictStart) {
3166 conditionalCompilationStart(/*Unreachable=*/false);
3167 } else if (FormatTok->getType() == TT_ConflictAlternative) {
3168 conditionalCompilationAlternative();
3169 } else if (FormatTok->getType() == TT_ConflictEnd) {
3170 conditionalCompilationEnd();
3171 }
3172 FormatTok = Tokens->getNextToken();
3173 FormatTok->MustBreakBefore = true;
3174 }
3175
3176 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
3177 !Line->InPPDirective) {
3178 continue;
3179 }
3180
3181 if (!FormatTok->Tok.is(tok::comment)) {
3182 distributeComments(Comments, FormatTok);
3183 Comments.clear();
3184 return;
3185 }
3186
3187 Comments.push_back(FormatTok);
3188 } while (!eof());
3189
3190 distributeComments(Comments, nullptr);
3191 Comments.clear();
3192 }
3193
pushToken(FormatToken * Tok)3194 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
3195 Line->Tokens.push_back(UnwrappedLineNode(Tok));
3196 if (MustBreakBeforeNextToken) {
3197 Line->Tokens.back().Tok->MustBreakBefore = true;
3198 MustBreakBeforeNextToken = false;
3199 }
3200 }
3201
3202 } // end namespace format
3203 } // end namespace clang
3204