1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file contains the implementation of the UnwrappedLineParser,
12 /// which turns a stream of tokens into UnwrappedLines.
13 ///
14 //===----------------------------------------------------------------------===//
15
16 #define DEBUG_TYPE "format-parser"
17
18 #include "UnwrappedLineParser.h"
19 #include "llvm/Support/Debug.h"
20
21 namespace clang {
22 namespace format {
23
24 class FormatTokenSource {
25 public:
~FormatTokenSource()26 virtual ~FormatTokenSource() {}
27 virtual FormatToken *getNextToken() = 0;
28
29 virtual unsigned getPosition() = 0;
30 virtual FormatToken *setPosition(unsigned Position) = 0;
31 };
32
33 namespace {
34
35 class ScopedDeclarationState {
36 public:
ScopedDeclarationState(UnwrappedLine & Line,std::vector<bool> & Stack,bool MustBeDeclaration)37 ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
38 bool MustBeDeclaration)
39 : Line(Line), Stack(Stack) {
40 Line.MustBeDeclaration = MustBeDeclaration;
41 Stack.push_back(MustBeDeclaration);
42 }
~ScopedDeclarationState()43 ~ScopedDeclarationState() {
44 Stack.pop_back();
45 if (!Stack.empty())
46 Line.MustBeDeclaration = Stack.back();
47 else
48 Line.MustBeDeclaration = true;
49 }
50
51 private:
52 UnwrappedLine &Line;
53 std::vector<bool> &Stack;
54 };
55
56 class ScopedMacroState : public FormatTokenSource {
57 public:
ScopedMacroState(UnwrappedLine & Line,FormatTokenSource * & TokenSource,FormatToken * & ResetToken,bool & StructuralError)58 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
59 FormatToken *&ResetToken, bool &StructuralError)
60 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
61 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
62 StructuralError(StructuralError),
63 PreviousStructuralError(StructuralError), Token(NULL) {
64 TokenSource = this;
65 Line.Level = 0;
66 Line.InPPDirective = true;
67 }
68
~ScopedMacroState()69 ~ScopedMacroState() {
70 TokenSource = PreviousTokenSource;
71 ResetToken = Token;
72 Line.InPPDirective = false;
73 Line.Level = PreviousLineLevel;
74 StructuralError = PreviousStructuralError;
75 }
76
getNextToken()77 virtual FormatToken *getNextToken() {
78 // The \c UnwrappedLineParser guards against this by never calling
79 // \c getNextToken() after it has encountered the first eof token.
80 assert(!eof());
81 Token = PreviousTokenSource->getNextToken();
82 if (eof())
83 return getFakeEOF();
84 return Token;
85 }
86
getPosition()87 virtual unsigned getPosition() { return PreviousTokenSource->getPosition(); }
88
setPosition(unsigned Position)89 virtual FormatToken *setPosition(unsigned Position) {
90 Token = PreviousTokenSource->setPosition(Position);
91 return Token;
92 }
93
94 private:
eof()95 bool eof() { return Token && Token->HasUnescapedNewline; }
96
getFakeEOF()97 FormatToken *getFakeEOF() {
98 static bool EOFInitialized = false;
99 static FormatToken FormatTok;
100 if (!EOFInitialized) {
101 FormatTok.Tok.startToken();
102 FormatTok.Tok.setKind(tok::eof);
103 EOFInitialized = true;
104 }
105 return &FormatTok;
106 }
107
108 UnwrappedLine &Line;
109 FormatTokenSource *&TokenSource;
110 FormatToken *&ResetToken;
111 unsigned PreviousLineLevel;
112 FormatTokenSource *PreviousTokenSource;
113 bool &StructuralError;
114 bool PreviousStructuralError;
115
116 FormatToken *Token;
117 };
118
119 } // end anonymous namespace
120
121 class ScopedLineState {
122 public:
ScopedLineState(UnwrappedLineParser & Parser,bool SwitchToPreprocessorLines=false)123 ScopedLineState(UnwrappedLineParser &Parser,
124 bool SwitchToPreprocessorLines = false)
125 : Parser(Parser), SwitchToPreprocessorLines(SwitchToPreprocessorLines) {
126 if (SwitchToPreprocessorLines)
127 Parser.CurrentLines = &Parser.PreprocessorDirectives;
128 PreBlockLine = Parser.Line.take();
129 Parser.Line.reset(new UnwrappedLine());
130 Parser.Line->Level = PreBlockLine->Level;
131 Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
132 }
133
~ScopedLineState()134 ~ScopedLineState() {
135 if (!Parser.Line->Tokens.empty()) {
136 Parser.addUnwrappedLine();
137 }
138 assert(Parser.Line->Tokens.empty());
139 Parser.Line.reset(PreBlockLine);
140 Parser.MustBreakBeforeNextToken = true;
141 if (SwitchToPreprocessorLines)
142 Parser.CurrentLines = &Parser.Lines;
143 }
144
145 private:
146 UnwrappedLineParser &Parser;
147 const bool SwitchToPreprocessorLines;
148
149 UnwrappedLine *PreBlockLine;
150 };
151
152 namespace {
153
154 class IndexedTokenSource : public FormatTokenSource {
155 public:
IndexedTokenSource(ArrayRef<FormatToken * > Tokens)156 IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
157 : Tokens(Tokens), Position(-1) {}
158
getNextToken()159 virtual FormatToken *getNextToken() {
160 ++Position;
161 return Tokens[Position];
162 }
163
getPosition()164 virtual unsigned getPosition() {
165 assert(Position >= 0);
166 return Position;
167 }
168
setPosition(unsigned P)169 virtual FormatToken *setPosition(unsigned P) {
170 Position = P;
171 return Tokens[Position];
172 }
173
174 private:
175 ArrayRef<FormatToken *> Tokens;
176 int Position;
177 };
178
179 } // end anonymous namespace
180
UnwrappedLineParser(const FormatStyle & Style,ArrayRef<FormatToken * > Tokens,UnwrappedLineConsumer & Callback)181 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
182 ArrayRef<FormatToken *> Tokens,
183 UnwrappedLineConsumer &Callback)
184 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
185 CurrentLines(&Lines), StructuralError(false), Style(Style), Tokens(NULL),
186 Callback(Callback), AllTokens(Tokens) {}
187
parse()188 bool UnwrappedLineParser::parse() {
189 DEBUG(llvm::dbgs() << "----\n");
190 IndexedTokenSource TokenSource(AllTokens);
191 Tokens = &TokenSource;
192 readToken();
193 parseFile();
194 for (std::vector<UnwrappedLine>::iterator I = Lines.begin(), E = Lines.end();
195 I != E; ++I) {
196 Callback.consumeUnwrappedLine(*I);
197 }
198
199 // Create line with eof token.
200 pushToken(FormatTok);
201 Callback.consumeUnwrappedLine(*Line);
202 return StructuralError;
203 }
204
parseFile()205 void UnwrappedLineParser::parseFile() {
206 ScopedDeclarationState DeclarationState(
207 *Line, DeclarationScopeStack,
208 /*MustBeDeclaration=*/ !Line->InPPDirective);
209 parseLevel(/*HasOpeningBrace=*/false);
210 // Make sure to format the remaining tokens.
211 flushComments(true);
212 addUnwrappedLine();
213 }
214
parseLevel(bool HasOpeningBrace)215 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
216 bool SwitchLabelEncountered = false;
217 do {
218 switch (FormatTok->Tok.getKind()) {
219 case tok::comment:
220 nextToken();
221 addUnwrappedLine();
222 break;
223 case tok::l_brace:
224 // FIXME: Add parameter whether this can happen - if this happens, we must
225 // be in a non-declaration context.
226 parseBlock(/*MustBeDeclaration=*/false);
227 addUnwrappedLine();
228 break;
229 case tok::r_brace:
230 if (HasOpeningBrace)
231 return;
232 StructuralError = true;
233 nextToken();
234 addUnwrappedLine();
235 break;
236 case tok::kw_default:
237 case tok::kw_case:
238 if (!SwitchLabelEncountered)
239 Line->Level += Style.IndentCaseLabels;
240 SwitchLabelEncountered = true;
241 parseStructuralElement();
242 break;
243 default:
244 parseStructuralElement();
245 break;
246 }
247 } while (!eof());
248 }
249
calculateBraceTypes()250 void UnwrappedLineParser::calculateBraceTypes() {
251 // We'll parse forward through the tokens until we hit
252 // a closing brace or eof - note that getNextToken() will
253 // parse macros, so this will magically work inside macro
254 // definitions, too.
255 unsigned StoredPosition = Tokens->getPosition();
256 unsigned Position = StoredPosition;
257 FormatToken *Tok = FormatTok;
258 // Keep a stack of positions of lbrace tokens. We will
259 // update information about whether an lbrace starts a
260 // braced init list or a different block during the loop.
261 SmallVector<FormatToken *, 8> LBraceStack;
262 assert(Tok->Tok.is(tok::l_brace));
263 do {
264 // Get next none-comment token.
265 FormatToken *NextTok;
266 unsigned ReadTokens = 0;
267 do {
268 NextTok = Tokens->getNextToken();
269 ++ReadTokens;
270 } while (NextTok->is(tok::comment));
271
272 switch (Tok->Tok.getKind()) {
273 case tok::l_brace:
274 LBraceStack.push_back(Tok);
275 break;
276 case tok::r_brace:
277 if (!LBraceStack.empty()) {
278 if (LBraceStack.back()->BlockKind == BK_Unknown) {
279 // If there is a comma, semicolon or right paren after the closing
280 // brace, we assume this is a braced initializer list.
281
282 // FIXME: Note that this currently works only because we do not
283 // use the brace information while inside a braced init list.
284 // Thus, if the parent is a braced init list, we consider all
285 // brace blocks inside it braced init list. That works good enough
286 // for now, but we will need to fix it to correctly handle lambdas.
287 if (NextTok->isOneOf(tok::comma, tok::semi, tok::r_paren,
288 tok::l_brace, tok::colon)) {
289 Tok->BlockKind = BK_BracedInit;
290 LBraceStack.back()->BlockKind = BK_BracedInit;
291 } else {
292 Tok->BlockKind = BK_Block;
293 LBraceStack.back()->BlockKind = BK_Block;
294 }
295 }
296 LBraceStack.pop_back();
297 }
298 break;
299 case tok::semi:
300 case tok::kw_if:
301 case tok::kw_while:
302 case tok::kw_for:
303 case tok::kw_switch:
304 case tok::kw_try:
305 if (!LBraceStack.empty())
306 LBraceStack.back()->BlockKind = BK_Block;
307 break;
308 default:
309 break;
310 }
311 Tok = NextTok;
312 Position += ReadTokens;
313 } while (Tok->Tok.isNot(tok::eof));
314 // Assume other blocks for all unclosed opening braces.
315 for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
316 if (LBraceStack[i]->BlockKind == BK_Unknown)
317 LBraceStack[i]->BlockKind = BK_Block;
318 }
319 FormatTok = Tokens->setPosition(StoredPosition);
320 }
321
parseBlock(bool MustBeDeclaration,bool AddLevel)322 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel) {
323 assert(FormatTok->Tok.is(tok::l_brace) && "'{' expected");
324 unsigned InitialLevel = Line->Level;
325 nextToken();
326
327 addUnwrappedLine();
328
329 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
330 MustBeDeclaration);
331 if (AddLevel)
332 ++Line->Level;
333 parseLevel(/*HasOpeningBrace=*/true);
334
335 if (!FormatTok->Tok.is(tok::r_brace)) {
336 Line->Level = InitialLevel;
337 StructuralError = true;
338 return;
339 }
340
341 nextToken(); // Munch the closing brace.
342 Line->Level = InitialLevel;
343 }
344
parsePPDirective()345 void UnwrappedLineParser::parsePPDirective() {
346 assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
347 ScopedMacroState MacroState(*Line, Tokens, FormatTok, StructuralError);
348 nextToken();
349
350 if (FormatTok->Tok.getIdentifierInfo() == NULL) {
351 parsePPUnknown();
352 return;
353 }
354
355 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
356 case tok::pp_define:
357 parsePPDefine();
358 return;
359 case tok::pp_if:
360 parsePPIf();
361 break;
362 case tok::pp_ifdef:
363 case tok::pp_ifndef:
364 parsePPIfdef();
365 break;
366 case tok::pp_else:
367 parsePPElse();
368 break;
369 case tok::pp_elif:
370 parsePPElIf();
371 break;
372 case tok::pp_endif:
373 parsePPEndIf();
374 break;
375 default:
376 parsePPUnknown();
377 break;
378 }
379 }
380
pushPPConditional()381 void UnwrappedLineParser::pushPPConditional() {
382 if (!PPStack.empty() && PPStack.back() == PP_Unreachable)
383 PPStack.push_back(PP_Unreachable);
384 else
385 PPStack.push_back(PP_Conditional);
386 }
387
parsePPIf()388 void UnwrappedLineParser::parsePPIf() {
389 nextToken();
390 if ((FormatTok->Tok.isLiteral() &&
391 StringRef(FormatTok->Tok.getLiteralData(), FormatTok->Tok.getLength()) ==
392 "0") ||
393 FormatTok->Tok.is(tok::kw_false)) {
394 PPStack.push_back(PP_Unreachable);
395 } else {
396 pushPPConditional();
397 }
398 parsePPUnknown();
399 }
400
parsePPIfdef()401 void UnwrappedLineParser::parsePPIfdef() {
402 pushPPConditional();
403 parsePPUnknown();
404 }
405
parsePPElse()406 void UnwrappedLineParser::parsePPElse() {
407 if (!PPStack.empty())
408 PPStack.pop_back();
409 pushPPConditional();
410 parsePPUnknown();
411 }
412
parsePPElIf()413 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
414
parsePPEndIf()415 void UnwrappedLineParser::parsePPEndIf() {
416 if (!PPStack.empty())
417 PPStack.pop_back();
418 parsePPUnknown();
419 }
420
parsePPDefine()421 void UnwrappedLineParser::parsePPDefine() {
422 nextToken();
423
424 if (FormatTok->Tok.getKind() != tok::identifier) {
425 parsePPUnknown();
426 return;
427 }
428 nextToken();
429 if (FormatTok->Tok.getKind() == tok::l_paren &&
430 FormatTok->WhitespaceRange.getBegin() ==
431 FormatTok->WhitespaceRange.getEnd()) {
432 parseParens();
433 }
434 addUnwrappedLine();
435 Line->Level = 1;
436
437 // Errors during a preprocessor directive can only affect the layout of the
438 // preprocessor directive, and thus we ignore them. An alternative approach
439 // would be to use the same approach we use on the file level (no
440 // re-indentation if there was a structural error) within the macro
441 // definition.
442 parseFile();
443 }
444
parsePPUnknown()445 void UnwrappedLineParser::parsePPUnknown() {
446 do {
447 nextToken();
448 } while (!eof());
449 addUnwrappedLine();
450 }
451
452 // Here we blacklist certain tokens that are not usually the first token in an
453 // unwrapped line. This is used in attempt to distinguish macro calls without
454 // trailing semicolons from other constructs split to several lines.
tokenCanStartNewLine(clang::Token Tok)455 bool tokenCanStartNewLine(clang::Token Tok) {
456 // Semicolon can be a null-statement, l_square can be a start of a macro or
457 // a C++11 attribute, but this doesn't seem to be common.
458 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
459 Tok.isNot(tok::l_square) &&
460 // Tokens that can only be used as binary operators and a part of
461 // overloaded operator names.
462 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
463 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
464 Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
465 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
466 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
467 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
468 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
469 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
470 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
471 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
472 Tok.isNot(tok::lesslessequal) &&
473 // Colon is used in labels, base class lists, initializer lists,
474 // range-based for loops, ternary operator, but should never be the
475 // first token in an unwrapped line.
476 Tok.isNot(tok::colon);
477 }
478
parseStructuralElement()479 void UnwrappedLineParser::parseStructuralElement() {
480 assert(!FormatTok->Tok.is(tok::l_brace));
481 switch (FormatTok->Tok.getKind()) {
482 case tok::at:
483 nextToken();
484 if (FormatTok->Tok.is(tok::l_brace)) {
485 parseBracedList();
486 break;
487 }
488 switch (FormatTok->Tok.getObjCKeywordID()) {
489 case tok::objc_public:
490 case tok::objc_protected:
491 case tok::objc_package:
492 case tok::objc_private:
493 return parseAccessSpecifier();
494 case tok::objc_interface:
495 case tok::objc_implementation:
496 return parseObjCInterfaceOrImplementation();
497 case tok::objc_protocol:
498 return parseObjCProtocol();
499 case tok::objc_end:
500 return; // Handled by the caller.
501 case tok::objc_optional:
502 case tok::objc_required:
503 nextToken();
504 addUnwrappedLine();
505 return;
506 default:
507 break;
508 }
509 break;
510 case tok::kw_namespace:
511 parseNamespace();
512 return;
513 case tok::kw_inline:
514 nextToken();
515 if (FormatTok->Tok.is(tok::kw_namespace)) {
516 parseNamespace();
517 return;
518 }
519 break;
520 case tok::kw_public:
521 case tok::kw_protected:
522 case tok::kw_private:
523 parseAccessSpecifier();
524 return;
525 case tok::kw_if:
526 parseIfThenElse();
527 return;
528 case tok::kw_for:
529 case tok::kw_while:
530 parseForOrWhileLoop();
531 return;
532 case tok::kw_do:
533 parseDoWhile();
534 return;
535 case tok::kw_switch:
536 parseSwitch();
537 return;
538 case tok::kw_default:
539 nextToken();
540 parseLabel();
541 return;
542 case tok::kw_case:
543 parseCaseLabel();
544 return;
545 case tok::kw_return:
546 parseReturn();
547 return;
548 case tok::kw_extern:
549 nextToken();
550 if (FormatTok->Tok.is(tok::string_literal)) {
551 nextToken();
552 if (FormatTok->Tok.is(tok::l_brace)) {
553 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
554 addUnwrappedLine();
555 return;
556 }
557 }
558 // In all other cases, parse the declaration.
559 break;
560 default:
561 break;
562 }
563 do {
564 switch (FormatTok->Tok.getKind()) {
565 case tok::at:
566 nextToken();
567 if (FormatTok->Tok.is(tok::l_brace))
568 parseBracedList();
569 break;
570 case tok::kw_enum:
571 parseEnum();
572 break;
573 case tok::kw_struct:
574 case tok::kw_union:
575 case tok::kw_class:
576 parseRecord();
577 // A record declaration or definition is always the start of a structural
578 // element.
579 break;
580 case tok::semi:
581 nextToken();
582 addUnwrappedLine();
583 return;
584 case tok::r_brace:
585 addUnwrappedLine();
586 return;
587 case tok::l_paren:
588 parseParens();
589 break;
590 case tok::l_brace:
591 if (!tryToParseBracedList()) {
592 // A block outside of parentheses must be the last part of a
593 // structural element.
594 // FIXME: Figure out cases where this is not true, and add projections
595 // for them (the one we know is missing are lambdas).
596 if (Style.BreakBeforeBraces == FormatStyle::BS_Linux ||
597 Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup ||
598 Style.BreakBeforeBraces == FormatStyle::BS_Allman)
599 addUnwrappedLine();
600 parseBlock(/*MustBeDeclaration=*/false);
601 addUnwrappedLine();
602 return;
603 }
604 // Otherwise this was a braced init list, and the structural
605 // element continues.
606 break;
607 case tok::identifier: {
608 StringRef Text = FormatTok->TokenText;
609 nextToken();
610 if (Line->Tokens.size() == 1) {
611 if (FormatTok->Tok.is(tok::colon)) {
612 parseLabel();
613 return;
614 }
615 // Recognize function-like macro usages without trailing semicolon.
616 if (FormatTok->Tok.is(tok::l_paren)) {
617 parseParens();
618 if (FormatTok->HasUnescapedNewline &&
619 tokenCanStartNewLine(FormatTok->Tok)) {
620 addUnwrappedLine();
621 return;
622 }
623 } else if (FormatTok->HasUnescapedNewline && Text.size() >= 5 &&
624 Text == Text.upper()) {
625 // Recognize free-standing macros like Q_OBJECT.
626 addUnwrappedLine();
627 return;
628 }
629 }
630 break;
631 }
632 case tok::equal:
633 nextToken();
634 if (FormatTok->Tok.is(tok::l_brace)) {
635 parseBracedList();
636 }
637 break;
638 default:
639 nextToken();
640 break;
641 }
642 } while (!eof());
643 }
644
tryToParseBracedList()645 bool UnwrappedLineParser::tryToParseBracedList() {
646 if (FormatTok->BlockKind == BK_Unknown)
647 calculateBraceTypes();
648 assert(FormatTok->BlockKind != BK_Unknown);
649 if (FormatTok->BlockKind == BK_Block)
650 return false;
651 parseBracedList();
652 return true;
653 }
654
parseBracedList()655 void UnwrappedLineParser::parseBracedList() {
656 nextToken();
657
658 // FIXME: Once we have an expression parser in the UnwrappedLineParser,
659 // replace this by using parseAssigmentExpression() inside.
660 do {
661 // FIXME: When we start to support lambdas, we'll want to parse them away
662 // here, otherwise our bail-out scenarios below break. The better solution
663 // might be to just implement a more or less complete expression parser.
664 switch (FormatTok->Tok.getKind()) {
665 case tok::l_brace:
666 parseBracedList();
667 break;
668 case tok::r_brace:
669 nextToken();
670 return;
671 case tok::semi:
672 // Probably a missing closing brace. Bail out.
673 return;
674 case tok::comma:
675 nextToken();
676 break;
677 default:
678 nextToken();
679 break;
680 }
681 } while (!eof());
682 }
683
parseReturn()684 void UnwrappedLineParser::parseReturn() {
685 nextToken();
686
687 do {
688 switch (FormatTok->Tok.getKind()) {
689 case tok::l_brace:
690 parseBracedList();
691 if (FormatTok->Tok.isNot(tok::semi)) {
692 // Assume missing ';'.
693 addUnwrappedLine();
694 return;
695 }
696 break;
697 case tok::l_paren:
698 parseParens();
699 break;
700 case tok::r_brace:
701 // Assume missing ';'.
702 addUnwrappedLine();
703 return;
704 case tok::semi:
705 nextToken();
706 addUnwrappedLine();
707 return;
708 default:
709 nextToken();
710 break;
711 }
712 } while (!eof());
713 }
714
parseParens()715 void UnwrappedLineParser::parseParens() {
716 assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
717 nextToken();
718 do {
719 switch (FormatTok->Tok.getKind()) {
720 case tok::l_paren:
721 parseParens();
722 break;
723 case tok::r_paren:
724 nextToken();
725 return;
726 case tok::r_brace:
727 // A "}" inside parenthesis is an error if there wasn't a matching "{".
728 return;
729 case tok::l_brace: {
730 if (!tryToParseBracedList()) {
731 nextToken();
732 {
733 ScopedLineState LineState(*this);
734 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
735 /*MustBeDeclaration=*/false);
736 Line->Level += 1;
737 parseLevel(/*HasOpeningBrace=*/true);
738 Line->Level -= 1;
739 }
740 nextToken();
741 }
742 break;
743 }
744 case tok::at:
745 nextToken();
746 if (FormatTok->Tok.is(tok::l_brace))
747 parseBracedList();
748 break;
749 default:
750 nextToken();
751 break;
752 }
753 } while (!eof());
754 }
755
parseIfThenElse()756 void UnwrappedLineParser::parseIfThenElse() {
757 assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
758 nextToken();
759 if (FormatTok->Tok.is(tok::l_paren))
760 parseParens();
761 bool NeedsUnwrappedLine = false;
762 if (FormatTok->Tok.is(tok::l_brace)) {
763 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
764 addUnwrappedLine();
765 parseBlock(/*MustBeDeclaration=*/false);
766 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
767 addUnwrappedLine();
768 else
769 NeedsUnwrappedLine = true;
770 } else {
771 addUnwrappedLine();
772 ++Line->Level;
773 parseStructuralElement();
774 --Line->Level;
775 }
776 if (FormatTok->Tok.is(tok::kw_else)) {
777 nextToken();
778 if (FormatTok->Tok.is(tok::l_brace)) {
779 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
780 addUnwrappedLine();
781 parseBlock(/*MustBeDeclaration=*/false);
782 addUnwrappedLine();
783 } else if (FormatTok->Tok.is(tok::kw_if)) {
784 parseIfThenElse();
785 } else {
786 addUnwrappedLine();
787 ++Line->Level;
788 parseStructuralElement();
789 --Line->Level;
790 }
791 } else if (NeedsUnwrappedLine) {
792 addUnwrappedLine();
793 }
794 }
795
parseNamespace()796 void UnwrappedLineParser::parseNamespace() {
797 assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
798 nextToken();
799 if (FormatTok->Tok.is(tok::identifier))
800 nextToken();
801 if (FormatTok->Tok.is(tok::l_brace)) {
802 if (Style.BreakBeforeBraces == FormatStyle::BS_Linux ||
803 Style.BreakBeforeBraces == FormatStyle::BS_Allman)
804 addUnwrappedLine();
805
806 bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
807 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
808 DeclarationScopeStack.size() > 1);
809 parseBlock(/*MustBeDeclaration=*/true, AddLevel);
810 // Munch the semicolon after a namespace. This is more common than one would
811 // think. Puttin the semicolon into its own line is very ugly.
812 if (FormatTok->Tok.is(tok::semi))
813 nextToken();
814 addUnwrappedLine();
815 }
816 // FIXME: Add error handling.
817 }
818
parseForOrWhileLoop()819 void UnwrappedLineParser::parseForOrWhileLoop() {
820 assert((FormatTok->Tok.is(tok::kw_for) || FormatTok->Tok.is(tok::kw_while)) &&
821 "'for' or 'while' expected");
822 nextToken();
823 if (FormatTok->Tok.is(tok::l_paren))
824 parseParens();
825 if (FormatTok->Tok.is(tok::l_brace)) {
826 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
827 addUnwrappedLine();
828 parseBlock(/*MustBeDeclaration=*/false);
829 addUnwrappedLine();
830 } else {
831 addUnwrappedLine();
832 ++Line->Level;
833 parseStructuralElement();
834 --Line->Level;
835 }
836 }
837
parseDoWhile()838 void UnwrappedLineParser::parseDoWhile() {
839 assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
840 nextToken();
841 if (FormatTok->Tok.is(tok::l_brace)) {
842 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
843 addUnwrappedLine();
844 parseBlock(/*MustBeDeclaration=*/false);
845 } else {
846 addUnwrappedLine();
847 ++Line->Level;
848 parseStructuralElement();
849 --Line->Level;
850 }
851
852 // FIXME: Add error handling.
853 if (!FormatTok->Tok.is(tok::kw_while)) {
854 addUnwrappedLine();
855 return;
856 }
857
858 nextToken();
859 parseStructuralElement();
860 }
861
parseLabel()862 void UnwrappedLineParser::parseLabel() {
863 if (FormatTok->Tok.isNot(tok::colon))
864 return;
865 nextToken();
866 unsigned OldLineLevel = Line->Level;
867 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
868 --Line->Level;
869 if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
870 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
871 addUnwrappedLine();
872 parseBlock(/*MustBeDeclaration=*/false);
873 if (FormatTok->Tok.is(tok::kw_break)) {
874 // "break;" after "}" on its own line only for BS_Allman
875 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
876 addUnwrappedLine();
877 parseStructuralElement();
878 }
879 }
880 addUnwrappedLine();
881 Line->Level = OldLineLevel;
882 }
883
parseCaseLabel()884 void UnwrappedLineParser::parseCaseLabel() {
885 assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
886 // FIXME: fix handling of complex expressions here.
887 do {
888 nextToken();
889 } while (!eof() && !FormatTok->Tok.is(tok::colon));
890 parseLabel();
891 }
892
parseSwitch()893 void UnwrappedLineParser::parseSwitch() {
894 assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
895 nextToken();
896 if (FormatTok->Tok.is(tok::l_paren))
897 parseParens();
898 if (FormatTok->Tok.is(tok::l_brace)) {
899 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
900 addUnwrappedLine();
901 parseBlock(/*MustBeDeclaration=*/false);
902 addUnwrappedLine();
903 } else {
904 addUnwrappedLine();
905 ++Line->Level;
906 parseStructuralElement();
907 --Line->Level;
908 }
909 }
910
parseAccessSpecifier()911 void UnwrappedLineParser::parseAccessSpecifier() {
912 nextToken();
913 // Otherwise, we don't know what it is, and we'd better keep the next token.
914 if (FormatTok->Tok.is(tok::colon))
915 nextToken();
916 addUnwrappedLine();
917 }
918
parseEnum()919 void UnwrappedLineParser::parseEnum() {
920 nextToken();
921 if (FormatTok->Tok.is(tok::identifier) ||
922 FormatTok->Tok.is(tok::kw___attribute) ||
923 FormatTok->Tok.is(tok::kw___declspec)) {
924 nextToken();
925 // We can have macros or attributes in between 'enum' and the enum name.
926 if (FormatTok->Tok.is(tok::l_paren)) {
927 parseParens();
928 }
929 if (FormatTok->Tok.is(tok::identifier))
930 nextToken();
931 }
932 if (FormatTok->Tok.is(tok::l_brace)) {
933 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
934 addUnwrappedLine();
935 nextToken();
936 addUnwrappedLine();
937 ++Line->Level;
938 do {
939 switch (FormatTok->Tok.getKind()) {
940 case tok::l_paren:
941 parseParens();
942 break;
943 case tok::r_brace:
944 addUnwrappedLine();
945 nextToken();
946 --Line->Level;
947 return;
948 case tok::comma:
949 nextToken();
950 addUnwrappedLine();
951 break;
952 default:
953 nextToken();
954 break;
955 }
956 } while (!eof());
957 }
958 // We fall through to parsing a structural element afterwards, so that in
959 // enum A {} n, m;
960 // "} n, m;" will end up in one unwrapped line.
961 }
962
parseRecord()963 void UnwrappedLineParser::parseRecord() {
964 nextToken();
965 if (FormatTok->Tok.is(tok::identifier) ||
966 FormatTok->Tok.is(tok::kw___attribute) ||
967 FormatTok->Tok.is(tok::kw___declspec)) {
968 nextToken();
969 // We can have macros or attributes in between 'class' and the class name.
970 if (FormatTok->Tok.is(tok::l_paren)) {
971 parseParens();
972 }
973 // The actual identifier can be a nested name specifier, and in macros
974 // it is often token-pasted.
975 while (FormatTok->Tok.is(tok::identifier) ||
976 FormatTok->Tok.is(tok::coloncolon) ||
977 FormatTok->Tok.is(tok::hashhash))
978 nextToken();
979
980 // Note that parsing away template declarations here leads to incorrectly
981 // accepting function declarations as record declarations.
982 // In general, we cannot solve this problem. Consider:
983 // class A<int> B() {}
984 // which can be a function definition or a class definition when B() is a
985 // macro. If we find enough real-world cases where this is a problem, we
986 // can parse for the 'template' keyword in the beginning of the statement,
987 // and thus rule out the record production in case there is no template
988 // (this would still leave us with an ambiguity between template function
989 // and class declarations).
990 if (FormatTok->Tok.is(tok::colon) || FormatTok->Tok.is(tok::less)) {
991 while (!eof() && FormatTok->Tok.isNot(tok::l_brace)) {
992 if (FormatTok->Tok.is(tok::semi))
993 return;
994 nextToken();
995 }
996 }
997 }
998 if (FormatTok->Tok.is(tok::l_brace)) {
999 if (Style.BreakBeforeBraces == FormatStyle::BS_Linux ||
1000 Style.BreakBeforeBraces == FormatStyle::BS_Allman)
1001 addUnwrappedLine();
1002
1003 parseBlock(/*MustBeDeclaration=*/true);
1004 }
1005 // We fall through to parsing a structural element afterwards, so
1006 // class A {} n, m;
1007 // will end up in one unwrapped line.
1008 }
1009
parseObjCProtocolList()1010 void UnwrappedLineParser::parseObjCProtocolList() {
1011 assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
1012 do
1013 nextToken();
1014 while (!eof() && FormatTok->Tok.isNot(tok::greater));
1015 nextToken(); // Skip '>'.
1016 }
1017
parseObjCUntilAtEnd()1018 void UnwrappedLineParser::parseObjCUntilAtEnd() {
1019 do {
1020 if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
1021 nextToken();
1022 addUnwrappedLine();
1023 break;
1024 }
1025 parseStructuralElement();
1026 } while (!eof());
1027 }
1028
parseObjCInterfaceOrImplementation()1029 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
1030 nextToken();
1031 nextToken(); // interface name
1032
1033 // @interface can be followed by either a base class, or a category.
1034 if (FormatTok->Tok.is(tok::colon)) {
1035 nextToken();
1036 nextToken(); // base class name
1037 } else if (FormatTok->Tok.is(tok::l_paren))
1038 // Skip category, if present.
1039 parseParens();
1040
1041 if (FormatTok->Tok.is(tok::less))
1042 parseObjCProtocolList();
1043
1044 // If instance variables are present, keep the '{' on the first line too.
1045 if (FormatTok->Tok.is(tok::l_brace))
1046 parseBlock(/*MustBeDeclaration=*/true);
1047
1048 // With instance variables, this puts '}' on its own line. Without instance
1049 // variables, this ends the @interface line.
1050 addUnwrappedLine();
1051
1052 parseObjCUntilAtEnd();
1053 }
1054
parseObjCProtocol()1055 void UnwrappedLineParser::parseObjCProtocol() {
1056 nextToken();
1057 nextToken(); // protocol name
1058
1059 if (FormatTok->Tok.is(tok::less))
1060 parseObjCProtocolList();
1061
1062 // Check for protocol declaration.
1063 if (FormatTok->Tok.is(tok::semi)) {
1064 nextToken();
1065 return addUnwrappedLine();
1066 }
1067
1068 addUnwrappedLine();
1069 parseObjCUntilAtEnd();
1070 }
1071
addUnwrappedLine()1072 void UnwrappedLineParser::addUnwrappedLine() {
1073 if (Line->Tokens.empty())
1074 return;
1075 DEBUG({
1076 llvm::dbgs() << "Line(" << Line->Level << ")"
1077 << (Line->InPPDirective ? " MACRO" : "") << ": ";
1078 for (std::list<FormatToken *>::iterator I = Line->Tokens.begin(),
1079 E = Line->Tokens.end();
1080 I != E; ++I) {
1081 llvm::dbgs() << (*I)->Tok.getName() << " ";
1082 }
1083 llvm::dbgs() << "\n";
1084 });
1085 CurrentLines->push_back(*Line);
1086 Line->Tokens.clear();
1087 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
1088 for (std::vector<UnwrappedLine>::iterator
1089 I = PreprocessorDirectives.begin(),
1090 E = PreprocessorDirectives.end();
1091 I != E; ++I) {
1092 CurrentLines->push_back(*I);
1093 }
1094 PreprocessorDirectives.clear();
1095 }
1096 }
1097
eof() const1098 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
1099
flushComments(bool NewlineBeforeNext)1100 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
1101 bool JustComments = Line->Tokens.empty();
1102 for (SmallVectorImpl<FormatToken *>::const_iterator
1103 I = CommentsBeforeNextToken.begin(),
1104 E = CommentsBeforeNextToken.end();
1105 I != E; ++I) {
1106 if ((*I)->NewlinesBefore && JustComments) {
1107 addUnwrappedLine();
1108 }
1109 pushToken(*I);
1110 }
1111 if (NewlineBeforeNext && JustComments) {
1112 addUnwrappedLine();
1113 }
1114 CommentsBeforeNextToken.clear();
1115 }
1116
nextToken()1117 void UnwrappedLineParser::nextToken() {
1118 if (eof())
1119 return;
1120 flushComments(FormatTok->NewlinesBefore > 0);
1121 pushToken(FormatTok);
1122 readToken();
1123 }
1124
readToken()1125 void UnwrappedLineParser::readToken() {
1126 bool CommentsInCurrentLine = true;
1127 do {
1128 FormatTok = Tokens->getNextToken();
1129 while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
1130 (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
1131 // If there is an unfinished unwrapped line, we flush the preprocessor
1132 // directives only after that unwrapped line was finished later.
1133 bool SwitchToPreprocessorLines =
1134 !Line->Tokens.empty() && CurrentLines == &Lines;
1135 ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
1136 // Comments stored before the preprocessor directive need to be output
1137 // before the preprocessor directive, at the same level as the
1138 // preprocessor directive, as we consider them to apply to the directive.
1139 flushComments(FormatTok->NewlinesBefore > 0);
1140 parsePPDirective();
1141 }
1142
1143 if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) &&
1144 !Line->InPPDirective) {
1145 continue;
1146 }
1147
1148 if (!FormatTok->Tok.is(tok::comment))
1149 return;
1150 if (FormatTok->NewlinesBefore > 0 || FormatTok->IsFirst) {
1151 CommentsInCurrentLine = false;
1152 }
1153 if (CommentsInCurrentLine) {
1154 pushToken(FormatTok);
1155 } else {
1156 CommentsBeforeNextToken.push_back(FormatTok);
1157 }
1158 } while (!eof());
1159 }
1160
pushToken(FormatToken * Tok)1161 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
1162 Line->Tokens.push_back(Tok);
1163 if (MustBreakBeforeNextToken) {
1164 Line->Tokens.back()->MustBreakBefore = true;
1165 MustBreakBeforeNextToken = false;
1166 }
1167 }
1168
1169 } // end namespace format
1170 } // end namespace clang
1171