1 //===--- TokenAnnotator.cpp - Format C++ code -----------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements a token annotator, i.e. creates
12 /// \c AnnotatedTokens out of \c FormatTokens with required extra information.
13 ///
14 //===----------------------------------------------------------------------===//
15
16 #include "TokenAnnotator.h"
17 #include "clang/Basic/SourceManager.h"
18 #include "llvm/Support/Debug.h"
19
20 #define DEBUG_TYPE "format-token-annotator"
21
22 namespace clang {
23 namespace format {
24
25 namespace {
26
27 /// \brief A parser that gathers additional information about tokens.
28 ///
29 /// The \c TokenAnnotator tries to match parenthesis and square brakets and
30 /// store a parenthesis levels. It also tries to resolve matching "<" and ">"
31 /// into template parameter lists.
32 class AnnotatingParser {
33 public:
AnnotatingParser(const FormatStyle & Style,AnnotatedLine & Line,IdentifierInfo & Ident_in)34 AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line,
35 IdentifierInfo &Ident_in)
36 : Style(Style), Line(Line), CurrentToken(Line.First),
37 KeywordVirtualFound(false), AutoFound(false), Ident_in(Ident_in) {
38 Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false));
39 resetTokenMetadata(CurrentToken);
40 }
41
42 private:
parseAngle()43 bool parseAngle() {
44 if (!CurrentToken)
45 return false;
46 ScopedContextCreator ContextCreator(*this, tok::less, 10);
47 FormatToken *Left = CurrentToken->Previous;
48 Contexts.back().IsExpression = false;
49 // If there's a template keyword before the opening angle bracket, this is a
50 // template parameter, not an argument.
51 Contexts.back().InTemplateArgument =
52 Left->Previous && Left->Previous->Tok.isNot(tok::kw_template);
53
54 while (CurrentToken) {
55 if (CurrentToken->is(tok::greater)) {
56 Left->MatchingParen = CurrentToken;
57 CurrentToken->MatchingParen = Left;
58 CurrentToken->Type = TT_TemplateCloser;
59 next();
60 return true;
61 }
62 if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace,
63 tok::question, tok::colon))
64 return false;
65 // If a && or || is found and interpreted as a binary operator, this set
66 // of angles is likely part of something like "a < b && c > d". If the
67 // angles are inside an expression, the ||/&& might also be a binary
68 // operator that was misinterpreted because we are parsing template
69 // parameters.
70 // FIXME: This is getting out of hand, write a decent parser.
71 if (CurrentToken->Previous->isOneOf(tok::pipepipe, tok::ampamp) &&
72 ((CurrentToken->Previous->Type == TT_BinaryOperator &&
73 // Toplevel bool expressions do not make lots of sense;
74 // If we're on the top level, it contains only the base context and
75 // the context for the current opening angle bracket.
76 Contexts.size() > 2) ||
77 Contexts[Contexts.size() - 2].IsExpression) &&
78 Line.First->isNot(tok::kw_template))
79 return false;
80 updateParameterCount(Left, CurrentToken);
81 if (!consumeToken())
82 return false;
83 }
84 return false;
85 }
86
parseParens(bool LookForDecls=false)87 bool parseParens(bool LookForDecls = false) {
88 if (!CurrentToken)
89 return false;
90 ScopedContextCreator ContextCreator(*this, tok::l_paren, 1);
91
92 // FIXME: This is a bit of a hack. Do better.
93 Contexts.back().ColonIsForRangeExpr =
94 Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr;
95
96 bool StartsObjCMethodExpr = false;
97 FormatToken *Left = CurrentToken->Previous;
98 if (CurrentToken->is(tok::caret)) {
99 // (^ can start a block type.
100 Left->Type = TT_ObjCBlockLParen;
101 } else if (FormatToken *MaybeSel = Left->Previous) {
102 // @selector( starts a selector.
103 if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Previous &&
104 MaybeSel->Previous->is(tok::at)) {
105 StartsObjCMethodExpr = true;
106 }
107 }
108
109 if (Left->Previous &&
110 (Left->Previous->isOneOf(tok::kw_static_assert, tok::kw_if,
111 tok::kw_while, tok::l_paren, tok::comma) ||
112 Left->Previous->Type == TT_BinaryOperator)) {
113 // static_assert, if and while usually contain expressions.
114 Contexts.back().IsExpression = true;
115 } else if (Line.InPPDirective &&
116 (!Left->Previous ||
117 (Left->Previous->isNot(tok::identifier) &&
118 Left->Previous->Type != TT_OverloadedOperator))) {
119 Contexts.back().IsExpression = true;
120 } else if (Left->Previous && Left->Previous->is(tok::r_square) &&
121 Left->Previous->MatchingParen &&
122 Left->Previous->MatchingParen->Type == TT_LambdaLSquare) {
123 // This is a parameter list of a lambda expression.
124 Contexts.back().IsExpression = false;
125 } else if (Contexts[Contexts.size() - 2].CaretFound) {
126 // This is the parameter list of an ObjC block.
127 Contexts.back().IsExpression = false;
128 } else if (Left->Previous && Left->Previous->is(tok::kw___attribute)) {
129 Left->Type = TT_AttributeParen;
130 } else if (Left->Previous && Left->Previous->IsForEachMacro) {
131 // The first argument to a foreach macro is a declaration.
132 Contexts.back().IsForEachMacro = true;
133 Contexts.back().IsExpression = false;
134 }
135
136 if (StartsObjCMethodExpr) {
137 Contexts.back().ColonIsObjCMethodExpr = true;
138 Left->Type = TT_ObjCMethodExpr;
139 }
140
141 bool MightBeFunctionType = CurrentToken->is(tok::star);
142 bool HasMultipleLines = false;
143 bool HasMultipleParametersOnALine = false;
144 while (CurrentToken) {
145 // LookForDecls is set when "if (" has been seen. Check for
146 // 'identifier' '*' 'identifier' followed by not '=' -- this
147 // '*' has to be a binary operator but determineStarAmpUsage() will
148 // categorize it as an unary operator, so set the right type here.
149 if (LookForDecls && CurrentToken->Next) {
150 FormatToken *Prev = CurrentToken->getPreviousNonComment();
151 if (Prev) {
152 FormatToken *PrevPrev = Prev->getPreviousNonComment();
153 FormatToken *Next = CurrentToken->Next;
154 if (PrevPrev && PrevPrev->is(tok::identifier) &&
155 Prev->isOneOf(tok::star, tok::amp, tok::ampamp) &&
156 CurrentToken->is(tok::identifier) && Next->isNot(tok::equal)) {
157 Prev->Type = TT_BinaryOperator;
158 LookForDecls = false;
159 }
160 }
161 }
162
163 if (CurrentToken->Previous->Type == TT_PointerOrReference &&
164 CurrentToken->Previous->Previous->isOneOf(tok::l_paren,
165 tok::coloncolon))
166 MightBeFunctionType = true;
167 if (CurrentToken->Previous->Type == TT_BinaryOperator)
168 Contexts.back().IsExpression = true;
169 if (CurrentToken->is(tok::r_paren)) {
170 if (MightBeFunctionType && CurrentToken->Next &&
171 (CurrentToken->Next->is(tok::l_paren) ||
172 (CurrentToken->Next->is(tok::l_square) &&
173 !Contexts.back().IsExpression)))
174 Left->Type = TT_FunctionTypeLParen;
175 Left->MatchingParen = CurrentToken;
176 CurrentToken->MatchingParen = Left;
177
178 if (StartsObjCMethodExpr) {
179 CurrentToken->Type = TT_ObjCMethodExpr;
180 if (Contexts.back().FirstObjCSelectorName) {
181 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
182 Contexts.back().LongestObjCSelectorName;
183 }
184 }
185
186 if (Left->Type == TT_AttributeParen)
187 CurrentToken->Type = TT_AttributeParen;
188
189 if (!HasMultipleLines)
190 Left->PackingKind = PPK_Inconclusive;
191 else if (HasMultipleParametersOnALine)
192 Left->PackingKind = PPK_BinPacked;
193 else
194 Left->PackingKind = PPK_OnePerLine;
195
196 next();
197 return true;
198 }
199 if (CurrentToken->isOneOf(tok::r_square, tok::r_brace))
200 return false;
201 else if (CurrentToken->is(tok::l_brace))
202 Left->Type = TT_Unknown; // Not TT_ObjCBlockLParen
203 if (CurrentToken->is(tok::comma) && CurrentToken->Next &&
204 !CurrentToken->Next->HasUnescapedNewline &&
205 !CurrentToken->Next->isTrailingComment())
206 HasMultipleParametersOnALine = true;
207 if (CurrentToken->isOneOf(tok::kw_const, tok::kw_auto) ||
208 CurrentToken->isSimpleTypeSpecifier())
209 Contexts.back().IsExpression = false;
210 FormatToken *Tok = CurrentToken;
211 if (!consumeToken())
212 return false;
213 updateParameterCount(Left, Tok);
214 if (CurrentToken && CurrentToken->HasUnescapedNewline)
215 HasMultipleLines = true;
216 }
217 return false;
218 }
219
parseSquare()220 bool parseSquare() {
221 if (!CurrentToken)
222 return false;
223
224 // A '[' could be an index subscript (after an identifier or after
225 // ')' or ']'), it could be the start of an Objective-C method
226 // expression, or it could the the start of an Objective-C array literal.
227 FormatToken *Left = CurrentToken->Previous;
228 FormatToken *Parent = Left->getPreviousNonComment();
229 bool StartsObjCMethodExpr =
230 Contexts.back().CanBeExpression && Left->Type != TT_LambdaLSquare &&
231 CurrentToken->isNot(tok::l_brace) &&
232 (!Parent || Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren,
233 tok::kw_return, tok::kw_throw) ||
234 Parent->isUnaryOperator() || Parent->Type == TT_ObjCForIn ||
235 Parent->Type == TT_CastRParen ||
236 getBinOpPrecedence(Parent->Tok.getKind(), true, true) > prec::Unknown);
237 ScopedContextCreator ContextCreator(*this, tok::l_square, 10);
238 Contexts.back().IsExpression = true;
239 bool ColonFound = false;
240
241 if (StartsObjCMethodExpr) {
242 Contexts.back().ColonIsObjCMethodExpr = true;
243 Left->Type = TT_ObjCMethodExpr;
244 } else if (Parent && Parent->is(tok::at)) {
245 Left->Type = TT_ArrayInitializerLSquare;
246 } else if (Left->Type == TT_Unknown) {
247 Left->Type = TT_ArraySubscriptLSquare;
248 }
249
250 while (CurrentToken) {
251 if (CurrentToken->is(tok::r_square)) {
252 if (CurrentToken->Next && CurrentToken->Next->is(tok::l_paren) &&
253 Left->Type == TT_ObjCMethodExpr) {
254 // An ObjC method call is rarely followed by an open parenthesis.
255 // FIXME: Do we incorrectly label ":" with this?
256 StartsObjCMethodExpr = false;
257 Left->Type = TT_Unknown;
258 }
259 if (StartsObjCMethodExpr && CurrentToken->Previous != Left) {
260 CurrentToken->Type = TT_ObjCMethodExpr;
261 // determineStarAmpUsage() thinks that '*' '[' is allocating an
262 // array of pointers, but if '[' starts a selector then '*' is a
263 // binary operator.
264 if (Parent && Parent->Type == TT_PointerOrReference)
265 Parent->Type = TT_BinaryOperator;
266 }
267 Left->MatchingParen = CurrentToken;
268 CurrentToken->MatchingParen = Left;
269 if (Contexts.back().FirstObjCSelectorName) {
270 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
271 Contexts.back().LongestObjCSelectorName;
272 if (Left->BlockParameterCount > 1)
273 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 0;
274 }
275 next();
276 return true;
277 }
278 if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace))
279 return false;
280 if (CurrentToken->is(tok::colon))
281 ColonFound = true;
282 if (CurrentToken->is(tok::comma) &&
283 Style.Language != FormatStyle::LK_Proto &&
284 (Left->Type == TT_ArraySubscriptLSquare ||
285 (Left->Type == TT_ObjCMethodExpr && !ColonFound)))
286 Left->Type = TT_ArrayInitializerLSquare;
287 FormatToken* Tok = CurrentToken;
288 if (!consumeToken())
289 return false;
290 updateParameterCount(Left, Tok);
291 }
292 return false;
293 }
294
parseBrace()295 bool parseBrace() {
296 if (CurrentToken) {
297 FormatToken *Left = CurrentToken->Previous;
298
299 if (Contexts.back().CaretFound)
300 Left->Type = TT_ObjCBlockLBrace;
301 Contexts.back().CaretFound = false;
302
303 ScopedContextCreator ContextCreator(*this, tok::l_brace, 1);
304 Contexts.back().ColonIsDictLiteral = true;
305 if (Left->BlockKind == BK_BracedInit)
306 Contexts.back().IsExpression = true;
307
308 while (CurrentToken) {
309 if (CurrentToken->is(tok::r_brace)) {
310 Left->MatchingParen = CurrentToken;
311 CurrentToken->MatchingParen = Left;
312 next();
313 return true;
314 }
315 if (CurrentToken->isOneOf(tok::r_paren, tok::r_square))
316 return false;
317 updateParameterCount(Left, CurrentToken);
318 if (CurrentToken->is(tok::colon) &&
319 Style.Language != FormatStyle::LK_Proto) {
320 if (CurrentToken->getPreviousNonComment()->is(tok::identifier))
321 CurrentToken->getPreviousNonComment()->Type = TT_SelectorName;
322 Left->Type = TT_DictLiteral;
323 }
324 if (!consumeToken())
325 return false;
326 }
327 }
328 return true;
329 }
330
updateParameterCount(FormatToken * Left,FormatToken * Current)331 void updateParameterCount(FormatToken *Left, FormatToken *Current) {
332 if (Current->Type == TT_LambdaLSquare ||
333 (Current->is(tok::caret) && Current->Type == TT_UnaryOperator) ||
334 (Style.Language == FormatStyle::LK_JavaScript &&
335 Current->TokenText == "function")) {
336 ++Left->BlockParameterCount;
337 }
338 if (Current->is(tok::comma)) {
339 ++Left->ParameterCount;
340 if (!Left->Role)
341 Left->Role.reset(new CommaSeparatedList(Style));
342 Left->Role->CommaFound(Current);
343 } else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) {
344 Left->ParameterCount = 1;
345 }
346 }
347
parseConditional()348 bool parseConditional() {
349 while (CurrentToken) {
350 if (CurrentToken->is(tok::colon)) {
351 CurrentToken->Type = TT_ConditionalExpr;
352 next();
353 return true;
354 }
355 if (!consumeToken())
356 return false;
357 }
358 return false;
359 }
360
parseTemplateDeclaration()361 bool parseTemplateDeclaration() {
362 if (CurrentToken && CurrentToken->is(tok::less)) {
363 CurrentToken->Type = TT_TemplateOpener;
364 next();
365 if (!parseAngle())
366 return false;
367 if (CurrentToken)
368 CurrentToken->Previous->ClosesTemplateDeclaration = true;
369 return true;
370 }
371 return false;
372 }
373
consumeToken()374 bool consumeToken() {
375 FormatToken *Tok = CurrentToken;
376 next();
377 switch (Tok->Tok.getKind()) {
378 case tok::plus:
379 case tok::minus:
380 if (!Tok->Previous && Line.MustBeDeclaration)
381 Tok->Type = TT_ObjCMethodSpecifier;
382 break;
383 case tok::colon:
384 if (!Tok->Previous)
385 return false;
386 // Colons from ?: are handled in parseConditional().
387 if (Tok->Previous->is(tok::r_paren) && Contexts.size() == 1 &&
388 Line.First->isNot(tok::kw_case)) {
389 Tok->Type = TT_CtorInitializerColon;
390 } else if (Contexts.back().ColonIsDictLiteral) {
391 Tok->Type = TT_DictLiteral;
392 } else if (Contexts.back().ColonIsObjCMethodExpr ||
393 Line.First->Type == TT_ObjCMethodSpecifier) {
394 Tok->Type = TT_ObjCMethodExpr;
395 Tok->Previous->Type = TT_SelectorName;
396 if (Tok->Previous->ColumnWidth >
397 Contexts.back().LongestObjCSelectorName) {
398 Contexts.back().LongestObjCSelectorName = Tok->Previous->ColumnWidth;
399 }
400 if (!Contexts.back().FirstObjCSelectorName)
401 Contexts.back().FirstObjCSelectorName = Tok->Previous;
402 } else if (Contexts.back().ColonIsForRangeExpr) {
403 Tok->Type = TT_RangeBasedForLoopColon;
404 } else if (CurrentToken && CurrentToken->is(tok::numeric_constant)) {
405 Tok->Type = TT_BitFieldColon;
406 } else if (Contexts.size() == 1 &&
407 !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) {
408 Tok->Type = TT_InheritanceColon;
409 } else if (Contexts.back().ContextKind == tok::l_paren) {
410 Tok->Type = TT_InlineASMColon;
411 }
412 break;
413 case tok::kw_if:
414 case tok::kw_while:
415 if (CurrentToken && CurrentToken->is(tok::l_paren)) {
416 next();
417 if (!parseParens(/*LookForDecls=*/true))
418 return false;
419 }
420 break;
421 case tok::kw_for:
422 Contexts.back().ColonIsForRangeExpr = true;
423 next();
424 if (!parseParens())
425 return false;
426 break;
427 case tok::l_paren:
428 if (!parseParens())
429 return false;
430 if (Line.MustBeDeclaration && Contexts.size() == 1 &&
431 !Contexts.back().IsExpression &&
432 Line.First->Type != TT_ObjCProperty &&
433 (!Tok->Previous || Tok->Previous->isNot(tok::kw_decltype)))
434 Line.MightBeFunctionDecl = true;
435 break;
436 case tok::l_square:
437 if (!parseSquare())
438 return false;
439 break;
440 case tok::l_brace:
441 if (!parseBrace())
442 return false;
443 break;
444 case tok::less:
445 if (Tok->Previous && !Tok->Previous->Tok.isLiteral() && parseAngle())
446 Tok->Type = TT_TemplateOpener;
447 else {
448 Tok->Type = TT_BinaryOperator;
449 CurrentToken = Tok;
450 next();
451 }
452 break;
453 case tok::r_paren:
454 case tok::r_square:
455 return false;
456 case tok::r_brace:
457 // Lines can start with '}'.
458 if (Tok->Previous)
459 return false;
460 break;
461 case tok::greater:
462 Tok->Type = TT_BinaryOperator;
463 break;
464 case tok::kw_operator:
465 while (CurrentToken &&
466 !CurrentToken->isOneOf(tok::l_paren, tok::semi, tok::r_paren)) {
467 if (CurrentToken->isOneOf(tok::star, tok::amp))
468 CurrentToken->Type = TT_PointerOrReference;
469 consumeToken();
470 if (CurrentToken && CurrentToken->Previous->Type == TT_BinaryOperator)
471 CurrentToken->Previous->Type = TT_OverloadedOperator;
472 }
473 if (CurrentToken) {
474 CurrentToken->Type = TT_OverloadedOperatorLParen;
475 if (CurrentToken->Previous->Type == TT_BinaryOperator)
476 CurrentToken->Previous->Type = TT_OverloadedOperator;
477 }
478 break;
479 case tok::question:
480 parseConditional();
481 break;
482 case tok::kw_template:
483 parseTemplateDeclaration();
484 break;
485 case tok::identifier:
486 if (Line.First->is(tok::kw_for) &&
487 Tok->Tok.getIdentifierInfo() == &Ident_in)
488 Tok->Type = TT_ObjCForIn;
489 break;
490 case tok::comma:
491 if (Contexts.back().FirstStartOfName)
492 Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true;
493 if (Contexts.back().InCtorInitializer)
494 Tok->Type = TT_CtorInitializerComma;
495 if (Contexts.back().IsForEachMacro)
496 Contexts.back().IsExpression = true;
497 break;
498 default:
499 break;
500 }
501 return true;
502 }
503
parseIncludeDirective()504 void parseIncludeDirective() {
505 next();
506 if (CurrentToken && CurrentToken->is(tok::less)) {
507 next();
508 while (CurrentToken) {
509 if (CurrentToken->isNot(tok::comment) || CurrentToken->Next)
510 CurrentToken->Type = TT_ImplicitStringLiteral;
511 next();
512 }
513 } else {
514 while (CurrentToken) {
515 if (CurrentToken->is(tok::string_literal))
516 // Mark these string literals as "implicit" literals, too, so that
517 // they are not split or line-wrapped.
518 CurrentToken->Type = TT_ImplicitStringLiteral;
519 next();
520 }
521 }
522 }
523
parseWarningOrError()524 void parseWarningOrError() {
525 next();
526 // We still want to format the whitespace left of the first token of the
527 // warning or error.
528 next();
529 while (CurrentToken) {
530 CurrentToken->Type = TT_ImplicitStringLiteral;
531 next();
532 }
533 }
534
parsePragma()535 void parsePragma() {
536 next(); // Consume "pragma".
537 if (CurrentToken && CurrentToken->TokenText == "mark") {
538 next(); // Consume "mark".
539 next(); // Consume first token (so we fix leading whitespace).
540 while (CurrentToken) {
541 CurrentToken->Type = TT_ImplicitStringLiteral;
542 next();
543 }
544 }
545 }
546
parsePreprocessorDirective()547 void parsePreprocessorDirective() {
548 next();
549 if (!CurrentToken)
550 return;
551 if (CurrentToken->Tok.is(tok::numeric_constant)) {
552 CurrentToken->SpacesRequiredBefore = 1;
553 return;
554 }
555 // Hashes in the middle of a line can lead to any strange token
556 // sequence.
557 if (!CurrentToken->Tok.getIdentifierInfo())
558 return;
559 switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) {
560 case tok::pp_include:
561 case tok::pp_import:
562 parseIncludeDirective();
563 break;
564 case tok::pp_error:
565 case tok::pp_warning:
566 parseWarningOrError();
567 break;
568 case tok::pp_pragma:
569 parsePragma();
570 break;
571 case tok::pp_if:
572 case tok::pp_elif:
573 Contexts.back().IsExpression = true;
574 parseLine();
575 break;
576 default:
577 break;
578 }
579 while (CurrentToken)
580 next();
581 }
582
583 public:
parseLine()584 LineType parseLine() {
585 if (CurrentToken->is(tok::hash)) {
586 parsePreprocessorDirective();
587 return LT_PreprocessorDirective;
588 }
589
590 // Directly allow to 'import <string-literal>' to support protocol buffer
591 // definitions (code.google.com/p/protobuf) or missing "#" (either way we
592 // should not break the line).
593 IdentifierInfo *Info = CurrentToken->Tok.getIdentifierInfo();
594 if (Info && Info->getPPKeywordID() == tok::pp_import &&
595 CurrentToken->Next && CurrentToken->Next->is(tok::string_literal))
596 parseIncludeDirective();
597
598 while (CurrentToken) {
599 if (CurrentToken->is(tok::kw_virtual))
600 KeywordVirtualFound = true;
601 if (!consumeToken())
602 return LT_Invalid;
603 }
604 if (KeywordVirtualFound)
605 return LT_VirtualFunctionDecl;
606
607 if (Line.First->Type == TT_ObjCMethodSpecifier) {
608 if (Contexts.back().FirstObjCSelectorName)
609 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
610 Contexts.back().LongestObjCSelectorName;
611 return LT_ObjCMethodDecl;
612 }
613
614 return LT_Other;
615 }
616
617 private:
resetTokenMetadata(FormatToken * Token)618 void resetTokenMetadata(FormatToken *Token) {
619 if (!Token)
620 return;
621
622 // Reset token type in case we have already looked at it and then
623 // recovered from an error (e.g. failure to find the matching >).
624 if (CurrentToken->Type != TT_LambdaLSquare &&
625 CurrentToken->Type != TT_FunctionLBrace &&
626 CurrentToken->Type != TT_ImplicitStringLiteral &&
627 CurrentToken->Type != TT_RegexLiteral &&
628 CurrentToken->Type != TT_TrailingReturnArrow)
629 CurrentToken->Type = TT_Unknown;
630 if (CurrentToken->Role)
631 CurrentToken->Role.reset(nullptr);
632 CurrentToken->FakeLParens.clear();
633 CurrentToken->FakeRParens = 0;
634 }
635
next()636 void next() {
637 if (CurrentToken) {
638 determineTokenType(*CurrentToken);
639 CurrentToken->BindingStrength = Contexts.back().BindingStrength;
640 CurrentToken->NestingLevel = Contexts.size() - 1;
641 CurrentToken = CurrentToken->Next;
642 }
643
644 resetTokenMetadata(CurrentToken);
645 }
646
647 /// \brief A struct to hold information valid in a specific context, e.g.
648 /// a pair of parenthesis.
649 struct Context {
Contextclang::format::__anonef4f45af0111::AnnotatingParser::Context650 Context(tok::TokenKind ContextKind, unsigned BindingStrength,
651 bool IsExpression)
652 : ContextKind(ContextKind), BindingStrength(BindingStrength),
653 LongestObjCSelectorName(0), ColonIsForRangeExpr(false),
654 ColonIsDictLiteral(false), ColonIsObjCMethodExpr(false),
655 FirstObjCSelectorName(nullptr), FirstStartOfName(nullptr),
656 IsExpression(IsExpression), CanBeExpression(true),
657 InTemplateArgument(false), InCtorInitializer(false),
658 CaretFound(false), IsForEachMacro(false) {}
659
660 tok::TokenKind ContextKind;
661 unsigned BindingStrength;
662 unsigned LongestObjCSelectorName;
663 bool ColonIsForRangeExpr;
664 bool ColonIsDictLiteral;
665 bool ColonIsObjCMethodExpr;
666 FormatToken *FirstObjCSelectorName;
667 FormatToken *FirstStartOfName;
668 bool IsExpression;
669 bool CanBeExpression;
670 bool InTemplateArgument;
671 bool InCtorInitializer;
672 bool CaretFound;
673 bool IsForEachMacro;
674 };
675
676 /// \brief Puts a new \c Context onto the stack \c Contexts for the lifetime
677 /// of each instance.
678 struct ScopedContextCreator {
679 AnnotatingParser &P;
680
ScopedContextCreatorclang::format::__anonef4f45af0111::AnnotatingParser::ScopedContextCreator681 ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind,
682 unsigned Increase)
683 : P(P) {
684 P.Contexts.push_back(Context(ContextKind,
685 P.Contexts.back().BindingStrength + Increase,
686 P.Contexts.back().IsExpression));
687 }
688
~ScopedContextCreatorclang::format::__anonef4f45af0111::AnnotatingParser::ScopedContextCreator689 ~ScopedContextCreator() { P.Contexts.pop_back(); }
690 };
691
determineTokenType(FormatToken & Current)692 void determineTokenType(FormatToken &Current) {
693 if (Current.getPrecedence() == prec::Assignment &&
694 !Line.First->isOneOf(tok::kw_template, tok::kw_using) &&
695 (!Current.Previous || Current.Previous->isNot(tok::kw_operator))) {
696 Contexts.back().IsExpression = true;
697 for (FormatToken *Previous = Current.Previous;
698 Previous && !Previous->isOneOf(tok::comma, tok::semi);
699 Previous = Previous->Previous) {
700 if (Previous->isOneOf(tok::r_square, tok::r_paren))
701 Previous = Previous->MatchingParen;
702 if (Previous->Type == TT_BinaryOperator &&
703 Previous->isOneOf(tok::star, tok::amp)) {
704 Previous->Type = TT_PointerOrReference;
705 }
706 }
707 } else if (Current.isOneOf(tok::kw_return, tok::kw_throw)) {
708 Contexts.back().IsExpression = true;
709 } else if (Current.is(tok::l_paren) && !Line.MustBeDeclaration &&
710 !Line.InPPDirective &&
711 (!Current.Previous ||
712 Current.Previous->isNot(tok::kw_decltype))) {
713 bool ParametersOfFunctionType =
714 Current.Previous && Current.Previous->is(tok::r_paren) &&
715 Current.Previous->MatchingParen &&
716 Current.Previous->MatchingParen->Type == TT_FunctionTypeLParen;
717 bool IsForOrCatch = Current.Previous &&
718 Current.Previous->isOneOf(tok::kw_for, tok::kw_catch);
719 Contexts.back().IsExpression = !ParametersOfFunctionType && !IsForOrCatch;
720 } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) {
721 for (FormatToken *Previous = Current.Previous;
722 Previous && Previous->isOneOf(tok::star, tok::amp);
723 Previous = Previous->Previous)
724 Previous->Type = TT_PointerOrReference;
725 } else if (Current.Previous &&
726 Current.Previous->Type == TT_CtorInitializerColon) {
727 Contexts.back().IsExpression = true;
728 Contexts.back().InCtorInitializer = true;
729 } else if (Current.is(tok::kw_new)) {
730 Contexts.back().CanBeExpression = false;
731 } else if (Current.is(tok::semi) || Current.is(tok::exclaim)) {
732 // This should be the condition or increment in a for-loop.
733 Contexts.back().IsExpression = true;
734 }
735
736 if (Current.Type == TT_Unknown) {
737 // Line.MightBeFunctionDecl can only be true after the parentheses of a
738 // function declaration have been found. In this case, 'Current' is a
739 // trailing token of this declaration and thus cannot be a name.
740 if (isStartOfName(Current) && !Line.MightBeFunctionDecl) {
741 Contexts.back().FirstStartOfName = &Current;
742 Current.Type = TT_StartOfName;
743 } else if (Current.is(tok::kw_auto)) {
744 AutoFound = true;
745 } else if (Current.is(tok::arrow) && AutoFound &&
746 Line.MustBeDeclaration) {
747 Current.Type = TT_TrailingReturnArrow;
748 } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) {
749 Current.Type =
750 determineStarAmpUsage(Current, Contexts.back().CanBeExpression &&
751 Contexts.back().IsExpression,
752 Contexts.back().InTemplateArgument);
753 } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) {
754 Current.Type = determinePlusMinusCaretUsage(Current);
755 if (Current.Type == TT_UnaryOperator && Current.is(tok::caret))
756 Contexts.back().CaretFound = true;
757 } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) {
758 Current.Type = determineIncrementUsage(Current);
759 } else if (Current.is(tok::exclaim)) {
760 Current.Type = TT_UnaryOperator;
761 } else if (Current.is(tok::question)) {
762 Current.Type = TT_ConditionalExpr;
763 } else if (Current.isBinaryOperator() &&
764 (!Current.Previous ||
765 Current.Previous->isNot(tok::l_square))) {
766 Current.Type = TT_BinaryOperator;
767 } else if (Current.is(tok::comment)) {
768 if (Current.TokenText.startswith("//"))
769 Current.Type = TT_LineComment;
770 else
771 Current.Type = TT_BlockComment;
772 } else if (Current.is(tok::r_paren)) {
773 if (rParenEndsCast(Current))
774 Current.Type = TT_CastRParen;
775 } else if (Current.is(tok::at) && Current.Next) {
776 switch (Current.Next->Tok.getObjCKeywordID()) {
777 case tok::objc_interface:
778 case tok::objc_implementation:
779 case tok::objc_protocol:
780 Current.Type = TT_ObjCDecl;
781 break;
782 case tok::objc_property:
783 Current.Type = TT_ObjCProperty;
784 break;
785 default:
786 break;
787 }
788 } else if (Current.is(tok::period)) {
789 FormatToken *PreviousNoComment = Current.getPreviousNonComment();
790 if (PreviousNoComment &&
791 PreviousNoComment->isOneOf(tok::comma, tok::l_brace))
792 Current.Type = TT_DesignatedInitializerPeriod;
793 } else if (Current.isOneOf(tok::identifier, tok::kw_const) &&
794 Current.Previous && Current.Previous->isNot(tok::equal) &&
795 Line.MightBeFunctionDecl && Contexts.size() == 1) {
796 // Line.MightBeFunctionDecl can only be true after the parentheses of a
797 // function declaration have been found.
798 Current.Type = TT_TrailingAnnotation;
799 }
800 }
801 }
802
803 /// \brief Take a guess at whether \p Tok starts a name of a function or
804 /// variable declaration.
805 ///
806 /// This is a heuristic based on whether \p Tok is an identifier following
807 /// something that is likely a type.
isStartOfName(const FormatToken & Tok)808 bool isStartOfName(const FormatToken &Tok) {
809 if (Tok.isNot(tok::identifier) || !Tok.Previous)
810 return false;
811
812 // Skip "const" as it does not have an influence on whether this is a name.
813 FormatToken *PreviousNotConst = Tok.Previous;
814 while (PreviousNotConst && PreviousNotConst->is(tok::kw_const))
815 PreviousNotConst = PreviousNotConst->Previous;
816
817 if (!PreviousNotConst)
818 return false;
819
820 bool IsPPKeyword = PreviousNotConst->is(tok::identifier) &&
821 PreviousNotConst->Previous &&
822 PreviousNotConst->Previous->is(tok::hash);
823
824 if (PreviousNotConst->Type == TT_TemplateCloser)
825 return PreviousNotConst && PreviousNotConst->MatchingParen &&
826 PreviousNotConst->MatchingParen->Previous &&
827 PreviousNotConst->MatchingParen->Previous->isNot(tok::kw_template);
828
829 if (PreviousNotConst->is(tok::r_paren) && PreviousNotConst->MatchingParen &&
830 PreviousNotConst->MatchingParen->Previous &&
831 PreviousNotConst->MatchingParen->Previous->is(tok::kw_decltype))
832 return true;
833
834 return (!IsPPKeyword && PreviousNotConst->is(tok::identifier)) ||
835 PreviousNotConst->Type == TT_PointerOrReference ||
836 PreviousNotConst->isSimpleTypeSpecifier();
837 }
838
839 /// \brief Determine whether ')' is ending a cast.
rParenEndsCast(const FormatToken & Tok)840 bool rParenEndsCast(const FormatToken &Tok) {
841 FormatToken *LeftOfParens = nullptr;
842 if (Tok.MatchingParen)
843 LeftOfParens = Tok.MatchingParen->getPreviousNonComment();
844 bool IsCast = false;
845 bool ParensAreEmpty = Tok.Previous == Tok.MatchingParen;
846 bool ParensAreType = !Tok.Previous ||
847 Tok.Previous->Type == TT_PointerOrReference ||
848 Tok.Previous->Type == TT_TemplateCloser ||
849 Tok.Previous->isSimpleTypeSpecifier();
850 bool ParensCouldEndDecl =
851 Tok.Next && Tok.Next->isOneOf(tok::equal, tok::semi, tok::l_brace);
852 bool IsSizeOfOrAlignOf =
853 LeftOfParens && LeftOfParens->isOneOf(tok::kw_sizeof, tok::kw_alignof);
854 if (ParensAreType && !ParensCouldEndDecl && !IsSizeOfOrAlignOf &&
855 ((Contexts.size() > 1 && Contexts[Contexts.size() - 2].IsExpression) ||
856 (Tok.Next && Tok.Next->isBinaryOperator())))
857 IsCast = true;
858 else if (Tok.Next && Tok.Next->isNot(tok::string_literal) &&
859 (Tok.Next->Tok.isLiteral() ||
860 Tok.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof)))
861 IsCast = true;
862 // If there is an identifier after the (), it is likely a cast, unless
863 // there is also an identifier before the ().
864 else if (LeftOfParens &&
865 (LeftOfParens->Tok.getIdentifierInfo() == nullptr ||
866 LeftOfParens->is(tok::kw_return)) &&
867 LeftOfParens->Type != TT_OverloadedOperator &&
868 LeftOfParens->isNot(tok::at) &&
869 LeftOfParens->Type != TT_TemplateCloser && Tok.Next) {
870 if (Tok.Next->isOneOf(tok::identifier, tok::numeric_constant)) {
871 IsCast = true;
872 } else {
873 // Use heuristics to recognize c style casting.
874 FormatToken *Prev = Tok.Previous;
875 if (Prev && Prev->isOneOf(tok::amp, tok::star))
876 Prev = Prev->Previous;
877
878 if (Prev && Tok.Next && Tok.Next->Next) {
879 bool NextIsUnary = Tok.Next->isUnaryOperator() ||
880 Tok.Next->isOneOf(tok::amp, tok::star);
881 IsCast = NextIsUnary && Tok.Next->Next->isOneOf(
882 tok::identifier, tok::numeric_constant);
883 }
884
885 for (; Prev != Tok.MatchingParen; Prev = Prev->Previous) {
886 if (!Prev || !Prev->isOneOf(tok::kw_const, tok::identifier)) {
887 IsCast = false;
888 break;
889 }
890 }
891 }
892 }
893 return IsCast && !ParensAreEmpty;
894 }
895
896 /// \brief Return the type of the given token assuming it is * or &.
determineStarAmpUsage(const FormatToken & Tok,bool IsExpression,bool InTemplateArgument)897 TokenType determineStarAmpUsage(const FormatToken &Tok, bool IsExpression,
898 bool InTemplateArgument) {
899 const FormatToken *PrevToken = Tok.getPreviousNonComment();
900 if (!PrevToken)
901 return TT_UnaryOperator;
902
903 const FormatToken *NextToken = Tok.getNextNonComment();
904 if (!NextToken || NextToken->is(tok::l_brace))
905 return TT_Unknown;
906
907 if (PrevToken->is(tok::coloncolon) ||
908 (PrevToken->is(tok::l_paren) && !IsExpression))
909 return TT_PointerOrReference;
910
911 if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace,
912 tok::comma, tok::semi, tok::kw_return, tok::colon,
913 tok::equal, tok::kw_delete, tok::kw_sizeof) ||
914 PrevToken->Type == TT_BinaryOperator ||
915 PrevToken->Type == TT_ConditionalExpr ||
916 PrevToken->Type == TT_UnaryOperator || PrevToken->Type == TT_CastRParen)
917 return TT_UnaryOperator;
918
919 if (NextToken->is(tok::l_square) && NextToken->Type != TT_LambdaLSquare)
920 return TT_PointerOrReference;
921 if (NextToken->is(tok::kw_operator))
922 return TT_PointerOrReference;
923
924 if (PrevToken->is(tok::r_paren) && PrevToken->MatchingParen &&
925 PrevToken->MatchingParen->Previous &&
926 PrevToken->MatchingParen->Previous->isOneOf(tok::kw_typeof,
927 tok::kw_decltype))
928 return TT_PointerOrReference;
929
930 if (PrevToken->Tok.isLiteral() ||
931 PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::kw_true,
932 tok::kw_false) ||
933 NextToken->Tok.isLiteral() ||
934 NextToken->isOneOf(tok::kw_true, tok::kw_false) ||
935 NextToken->isUnaryOperator() ||
936 // If we know we're in a template argument, there are no named
937 // declarations. Thus, having an identifier on the right-hand side
938 // indicates a binary operator.
939 (InTemplateArgument && NextToken->Tok.isAnyIdentifier()))
940 return TT_BinaryOperator;
941
942 // This catches some cases where evaluation order is used as control flow:
943 // aaa && aaa->f();
944 const FormatToken *NextNextToken = NextToken->getNextNonComment();
945 if (NextNextToken && NextNextToken->is(tok::arrow))
946 return TT_BinaryOperator;
947
948 // It is very unlikely that we are going to find a pointer or reference type
949 // definition on the RHS of an assignment.
950 if (IsExpression)
951 return TT_BinaryOperator;
952
953 return TT_PointerOrReference;
954 }
955
determinePlusMinusCaretUsage(const FormatToken & Tok)956 TokenType determinePlusMinusCaretUsage(const FormatToken &Tok) {
957 const FormatToken *PrevToken = Tok.getPreviousNonComment();
958 if (!PrevToken || PrevToken->Type == TT_CastRParen)
959 return TT_UnaryOperator;
960
961 // Use heuristics to recognize unary operators.
962 if (PrevToken->isOneOf(tok::equal, tok::l_paren, tok::comma, tok::l_square,
963 tok::question, tok::colon, tok::kw_return,
964 tok::kw_case, tok::at, tok::l_brace))
965 return TT_UnaryOperator;
966
967 // There can't be two consecutive binary operators.
968 if (PrevToken->Type == TT_BinaryOperator)
969 return TT_UnaryOperator;
970
971 // Fall back to marking the token as binary operator.
972 return TT_BinaryOperator;
973 }
974
975 /// \brief Determine whether ++/-- are pre- or post-increments/-decrements.
determineIncrementUsage(const FormatToken & Tok)976 TokenType determineIncrementUsage(const FormatToken &Tok) {
977 const FormatToken *PrevToken = Tok.getPreviousNonComment();
978 if (!PrevToken || PrevToken->Type == TT_CastRParen)
979 return TT_UnaryOperator;
980 if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier))
981 return TT_TrailingUnaryOperator;
982
983 return TT_UnaryOperator;
984 }
985
986 SmallVector<Context, 8> Contexts;
987
988 const FormatStyle &Style;
989 AnnotatedLine &Line;
990 FormatToken *CurrentToken;
991 bool KeywordVirtualFound;
992 bool AutoFound;
993 IdentifierInfo &Ident_in;
994 };
995
996 static int PrecedenceUnaryOperator = prec::PointerToMember + 1;
997 static int PrecedenceArrowAndPeriod = prec::PointerToMember + 2;
998
999 /// \brief Parses binary expressions by inserting fake parenthesis based on
1000 /// operator precedence.
1001 class ExpressionParser {
1002 public:
ExpressionParser(AnnotatedLine & Line)1003 ExpressionParser(AnnotatedLine &Line) : Current(Line.First) {
1004 // Skip leading "}", e.g. in "} else if (...) {".
1005 if (Current->is(tok::r_brace))
1006 next();
1007 }
1008
1009 /// \brief Parse expressions with the given operatore precedence.
parse(int Precedence=0)1010 void parse(int Precedence = 0) {
1011 // Skip 'return' and ObjC selector colons as they are not part of a binary
1012 // expression.
1013 while (Current &&
1014 (Current->is(tok::kw_return) ||
1015 (Current->is(tok::colon) && (Current->Type == TT_ObjCMethodExpr ||
1016 Current->Type == TT_DictLiteral))))
1017 next();
1018
1019 if (!Current || Precedence > PrecedenceArrowAndPeriod)
1020 return;
1021
1022 // Conditional expressions need to be parsed separately for proper nesting.
1023 if (Precedence == prec::Conditional) {
1024 parseConditionalExpr();
1025 return;
1026 }
1027
1028 // Parse unary operators, which all have a higher precedence than binary
1029 // operators.
1030 if (Precedence == PrecedenceUnaryOperator) {
1031 parseUnaryOperator();
1032 return;
1033 }
1034
1035 FormatToken *Start = Current;
1036 FormatToken *LatestOperator = nullptr;
1037 unsigned OperatorIndex = 0;
1038
1039 while (Current) {
1040 // Consume operators with higher precedence.
1041 parse(Precedence + 1);
1042
1043 int CurrentPrecedence = getCurrentPrecedence();
1044
1045 if (Current && Current->Type == TT_SelectorName &&
1046 Precedence == CurrentPrecedence) {
1047 if (LatestOperator)
1048 addFakeParenthesis(Start, prec::Level(Precedence));
1049 Start = Current;
1050 }
1051
1052 // At the end of the line or when an operator with higher precedence is
1053 // found, insert fake parenthesis and return.
1054 if (!Current || Current->closesScope() ||
1055 (CurrentPrecedence != -1 && CurrentPrecedence < Precedence)) {
1056 if (LatestOperator) {
1057 LatestOperator->LastOperator = true;
1058 if (Precedence == PrecedenceArrowAndPeriod) {
1059 // Call expressions don't have a binary operator precedence.
1060 addFakeParenthesis(Start, prec::Unknown);
1061 } else {
1062 addFakeParenthesis(Start, prec::Level(Precedence));
1063 }
1064 }
1065 return;
1066 }
1067
1068 // Consume scopes: (), [], <> and {}
1069 if (Current->opensScope()) {
1070 while (Current && !Current->closesScope()) {
1071 next();
1072 parse();
1073 }
1074 next();
1075 } else {
1076 // Operator found.
1077 if (CurrentPrecedence == Precedence) {
1078 LatestOperator = Current;
1079 Current->OperatorIndex = OperatorIndex;
1080 ++OperatorIndex;
1081 }
1082
1083 next();
1084 }
1085 }
1086 }
1087
1088 private:
1089 /// \brief Gets the precedence (+1) of the given token for binary operators
1090 /// and other tokens that we treat like binary operators.
getCurrentPrecedence()1091 int getCurrentPrecedence() {
1092 if (Current) {
1093 if (Current->Type == TT_ConditionalExpr)
1094 return prec::Conditional;
1095 else if (Current->is(tok::semi) || Current->Type == TT_InlineASMColon ||
1096 Current->Type == TT_SelectorName)
1097 return 0;
1098 else if (Current->Type == TT_RangeBasedForLoopColon)
1099 return prec::Comma;
1100 else if (Current->Type == TT_BinaryOperator || Current->is(tok::comma))
1101 return Current->getPrecedence();
1102 else if (Current->isOneOf(tok::period, tok::arrow))
1103 return PrecedenceArrowAndPeriod;
1104 }
1105 return -1;
1106 }
1107
addFakeParenthesis(FormatToken * Start,prec::Level Precedence)1108 void addFakeParenthesis(FormatToken *Start, prec::Level Precedence) {
1109 Start->FakeLParens.push_back(Precedence);
1110 if (Precedence > prec::Unknown)
1111 Start->StartsBinaryExpression = true;
1112 if (Current) {
1113 ++Current->Previous->FakeRParens;
1114 if (Precedence > prec::Unknown)
1115 Current->Previous->EndsBinaryExpression = true;
1116 }
1117 }
1118
1119 /// \brief Parse unary operator expressions and surround them with fake
1120 /// parentheses if appropriate.
parseUnaryOperator()1121 void parseUnaryOperator() {
1122 if (!Current || Current->Type != TT_UnaryOperator) {
1123 parse(PrecedenceArrowAndPeriod);
1124 return;
1125 }
1126
1127 FormatToken *Start = Current;
1128 next();
1129 parseUnaryOperator();
1130
1131 // The actual precedence doesn't matter.
1132 addFakeParenthesis(Start, prec::Unknown);
1133 }
1134
parseConditionalExpr()1135 void parseConditionalExpr() {
1136 FormatToken *Start = Current;
1137 parse(prec::LogicalOr);
1138 if (!Current || !Current->is(tok::question))
1139 return;
1140 next();
1141 parse(prec::LogicalOr);
1142 if (!Current || Current->Type != TT_ConditionalExpr)
1143 return;
1144 next();
1145 parseConditionalExpr();
1146 addFakeParenthesis(Start, prec::Conditional);
1147 }
1148
next()1149 void next() {
1150 if (Current)
1151 Current = Current->Next;
1152 while (Current && Current->isTrailingComment())
1153 Current = Current->Next;
1154 }
1155
1156 FormatToken *Current;
1157 };
1158
1159 } // end anonymous namespace
1160
1161 void
setCommentLineLevels(SmallVectorImpl<AnnotatedLine * > & Lines)1162 TokenAnnotator::setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines) {
1163 const AnnotatedLine *NextNonCommentLine = nullptr;
1164 for (SmallVectorImpl<AnnotatedLine *>::reverse_iterator I = Lines.rbegin(),
1165 E = Lines.rend();
1166 I != E; ++I) {
1167 if (NextNonCommentLine && (*I)->First->is(tok::comment) &&
1168 (*I)->First->Next == nullptr)
1169 (*I)->Level = NextNonCommentLine->Level;
1170 else
1171 NextNonCommentLine = (*I)->First->isNot(tok::r_brace) ? (*I) : nullptr;
1172
1173 setCommentLineLevels((*I)->Children);
1174 }
1175 }
1176
annotate(AnnotatedLine & Line)1177 void TokenAnnotator::annotate(AnnotatedLine &Line) {
1178 for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(),
1179 E = Line.Children.end();
1180 I != E; ++I) {
1181 annotate(**I);
1182 }
1183 AnnotatingParser Parser(Style, Line, Ident_in);
1184 Line.Type = Parser.parseLine();
1185 if (Line.Type == LT_Invalid)
1186 return;
1187
1188 ExpressionParser ExprParser(Line);
1189 ExprParser.parse();
1190
1191 if (Line.First->Type == TT_ObjCMethodSpecifier)
1192 Line.Type = LT_ObjCMethodDecl;
1193 else if (Line.First->Type == TT_ObjCDecl)
1194 Line.Type = LT_ObjCDecl;
1195 else if (Line.First->Type == TT_ObjCProperty)
1196 Line.Type = LT_ObjCProperty;
1197
1198 Line.First->SpacesRequiredBefore = 1;
1199 Line.First->CanBreakBefore = Line.First->MustBreakBefore;
1200 }
1201
1202 // This function heuristically determines whether 'Current' starts the name of a
1203 // function declaration.
isFunctionDeclarationName(const FormatToken & Current)1204 static bool isFunctionDeclarationName(const FormatToken &Current) {
1205 if (Current.Type != TT_StartOfName ||
1206 Current.NestingLevel != 0 ||
1207 Current.Previous->Type == TT_StartOfName)
1208 return false;
1209 const FormatToken *Next = Current.Next;
1210 for (; Next; Next = Next->Next) {
1211 if (Next->Type == TT_TemplateOpener) {
1212 Next = Next->MatchingParen;
1213 } else if (Next->is(tok::coloncolon)) {
1214 Next = Next->Next;
1215 if (!Next || !Next->is(tok::identifier))
1216 return false;
1217 } else if (Next->is(tok::l_paren)) {
1218 break;
1219 } else {
1220 return false;
1221 }
1222 }
1223 if (!Next)
1224 return false;
1225 assert(Next->is(tok::l_paren));
1226 if (Next->Next == Next->MatchingParen)
1227 return true;
1228 for (const FormatToken *Tok = Next->Next; Tok != Next->MatchingParen;
1229 Tok = Tok->Next) {
1230 if (Tok->is(tok::kw_const) || Tok->isSimpleTypeSpecifier() ||
1231 Tok->Type == TT_PointerOrReference || Tok->Type == TT_StartOfName)
1232 return true;
1233 if (Tok->isOneOf(tok::l_brace, tok::string_literal) || Tok->Tok.isLiteral())
1234 return false;
1235 }
1236 return false;
1237 }
1238
calculateFormattingInformation(AnnotatedLine & Line)1239 void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
1240 for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(),
1241 E = Line.Children.end();
1242 I != E; ++I) {
1243 calculateFormattingInformation(**I);
1244 }
1245
1246 Line.First->TotalLength =
1247 Line.First->IsMultiline ? Style.ColumnLimit : Line.First->ColumnWidth;
1248 if (!Line.First->Next)
1249 return;
1250 FormatToken *Current = Line.First->Next;
1251 bool InFunctionDecl = Line.MightBeFunctionDecl;
1252 while (Current) {
1253 if (isFunctionDeclarationName(*Current))
1254 Current->Type = TT_FunctionDeclarationName;
1255 if (Current->Type == TT_LineComment) {
1256 if (Current->Previous->BlockKind == BK_BracedInit &&
1257 Current->Previous->opensScope())
1258 Current->SpacesRequiredBefore = Style.Cpp11BracedListStyle ? 0 : 1;
1259 else
1260 Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments;
1261
1262 // If we find a trailing comment, iterate backwards to determine whether
1263 // it seems to relate to a specific parameter. If so, break before that
1264 // parameter to avoid changing the comment's meaning. E.g. don't move 'b'
1265 // to the previous line in:
1266 // SomeFunction(a,
1267 // b, // comment
1268 // c);
1269 if (!Current->HasUnescapedNewline) {
1270 for (FormatToken *Parameter = Current->Previous; Parameter;
1271 Parameter = Parameter->Previous) {
1272 if (Parameter->isOneOf(tok::comment, tok::r_brace))
1273 break;
1274 if (Parameter->Previous && Parameter->Previous->is(tok::comma)) {
1275 if (Parameter->Previous->Type != TT_CtorInitializerComma &&
1276 Parameter->HasUnescapedNewline)
1277 Parameter->MustBreakBefore = true;
1278 break;
1279 }
1280 }
1281 }
1282 } else if (Current->SpacesRequiredBefore == 0 &&
1283 spaceRequiredBefore(Line, *Current)) {
1284 Current->SpacesRequiredBefore = 1;
1285 }
1286
1287 Current->MustBreakBefore =
1288 Current->MustBreakBefore || mustBreakBefore(Line, *Current);
1289
1290 Current->CanBreakBefore =
1291 Current->MustBreakBefore || canBreakBefore(Line, *Current);
1292 unsigned ChildSize = 0;
1293 if (Current->Previous->Children.size() == 1) {
1294 FormatToken &LastOfChild = *Current->Previous->Children[0]->Last;
1295 ChildSize = LastOfChild.isTrailingComment() ? Style.ColumnLimit
1296 : LastOfChild.TotalLength + 1;
1297 }
1298 if (Current->MustBreakBefore || Current->Previous->Children.size() > 1 ||
1299 Current->IsMultiline)
1300 Current->TotalLength = Current->Previous->TotalLength + Style.ColumnLimit;
1301 else
1302 Current->TotalLength = Current->Previous->TotalLength +
1303 Current->ColumnWidth + ChildSize +
1304 Current->SpacesRequiredBefore;
1305
1306 if (Current->Type == TT_CtorInitializerColon)
1307 InFunctionDecl = false;
1308
1309 // FIXME: Only calculate this if CanBreakBefore is true once static
1310 // initializers etc. are sorted out.
1311 // FIXME: Move magic numbers to a better place.
1312 Current->SplitPenalty = 20 * Current->BindingStrength +
1313 splitPenalty(Line, *Current, InFunctionDecl);
1314
1315 Current = Current->Next;
1316 }
1317
1318 calculateUnbreakableTailLengths(Line);
1319 for (Current = Line.First; Current != nullptr; Current = Current->Next) {
1320 if (Current->Role)
1321 Current->Role->precomputeFormattingInfos(Current);
1322 }
1323
1324 DEBUG({ printDebugInfo(Line); });
1325 }
1326
calculateUnbreakableTailLengths(AnnotatedLine & Line)1327 void TokenAnnotator::calculateUnbreakableTailLengths(AnnotatedLine &Line) {
1328 unsigned UnbreakableTailLength = 0;
1329 FormatToken *Current = Line.Last;
1330 while (Current) {
1331 Current->UnbreakableTailLength = UnbreakableTailLength;
1332 if (Current->CanBreakBefore ||
1333 Current->isOneOf(tok::comment, tok::string_literal)) {
1334 UnbreakableTailLength = 0;
1335 } else {
1336 UnbreakableTailLength +=
1337 Current->ColumnWidth + Current->SpacesRequiredBefore;
1338 }
1339 Current = Current->Previous;
1340 }
1341 }
1342
splitPenalty(const AnnotatedLine & Line,const FormatToken & Tok,bool InFunctionDecl)1343 unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
1344 const FormatToken &Tok,
1345 bool InFunctionDecl) {
1346 const FormatToken &Left = *Tok.Previous;
1347 const FormatToken &Right = Tok;
1348
1349 if (Left.is(tok::semi))
1350 return 0;
1351 if (Left.is(tok::comma) || (Right.is(tok::identifier) && Right.Next &&
1352 Right.Next->Type == TT_DictLiteral))
1353 return 1;
1354 if (Right.is(tok::l_square)) {
1355 if (Style.Language == FormatStyle::LK_Proto)
1356 return 1;
1357 if (Right.Type != TT_ObjCMethodExpr && Right.Type != TT_LambdaLSquare)
1358 return 500;
1359 }
1360 if (Right.Type == TT_StartOfName ||
1361 Right.Type == TT_FunctionDeclarationName || Right.is(tok::kw_operator)) {
1362 if (Line.First->is(tok::kw_for) && Right.PartOfMultiVariableDeclStmt)
1363 return 3;
1364 if (Left.Type == TT_StartOfName)
1365 return 20;
1366 if (InFunctionDecl && Right.NestingLevel == 0)
1367 return Style.PenaltyReturnTypeOnItsOwnLine;
1368 return 200;
1369 }
1370 if (Left.is(tok::equal) && Right.is(tok::l_brace))
1371 return 150;
1372 if (Left.Type == TT_CastRParen)
1373 return 100;
1374 if (Left.is(tok::coloncolon) ||
1375 (Right.is(tok::period) && Style.Language == FormatStyle::LK_Proto))
1376 return 500;
1377 if (Left.isOneOf(tok::kw_class, tok::kw_struct))
1378 return 5000;
1379
1380 if (Left.Type == TT_RangeBasedForLoopColon ||
1381 Left.Type == TT_InheritanceColon)
1382 return 2;
1383
1384 if (Right.isMemberAccess()) {
1385 if (Left.is(tok::r_paren) && Left.MatchingParen &&
1386 Left.MatchingParen->ParameterCount > 0)
1387 return 20; // Should be smaller than breaking at a nested comma.
1388 return 150;
1389 }
1390
1391 if (Right.Type == TT_TrailingAnnotation &&
1392 (!Right.Next || Right.Next->isNot(tok::l_paren))) {
1393 // Generally, breaking before a trailing annotation is bad unless it is
1394 // function-like. It seems to be especially preferable to keep standard
1395 // annotations (i.e. "const", "final" and "override") on the same line.
1396 // Use a slightly higher penalty after ")" so that annotations like
1397 // "const override" are kept together.
1398 bool is_short_annotation = Right.TokenText.size() < 10;
1399 return (Left.is(tok::r_paren) ? 100 : 120) + (is_short_annotation ? 50 : 0);
1400 }
1401
1402 // In for-loops, prefer breaking at ',' and ';'.
1403 if (Line.First->is(tok::kw_for) && Left.is(tok::equal))
1404 return 4;
1405
1406 // In Objective-C method expressions, prefer breaking before "param:" over
1407 // breaking after it.
1408 if (Right.Type == TT_SelectorName)
1409 return 0;
1410 if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr)
1411 return Line.MightBeFunctionDecl ? 50 : 500;
1412
1413 if (Left.is(tok::l_paren) && InFunctionDecl)
1414 return 100;
1415 if (Left.is(tok::equal) && InFunctionDecl)
1416 return 110;
1417 if (Left.opensScope())
1418 return Left.ParameterCount > 1 ? Style.PenaltyBreakBeforeFirstCallParameter
1419 : 19;
1420
1421 if (Right.is(tok::lessless)) {
1422 if (Left.is(tok::string_literal)) {
1423 StringRef Content = Left.TokenText;
1424 if (Content.startswith("\""))
1425 Content = Content.drop_front(1);
1426 if (Content.endswith("\""))
1427 Content = Content.drop_back(1);
1428 Content = Content.trim();
1429 if (Content.size() > 1 &&
1430 (Content.back() == ':' || Content.back() == '='))
1431 return 25;
1432 }
1433 return 1; // Breaking at a << is really cheap.
1434 }
1435 if (Left.Type == TT_ConditionalExpr)
1436 return prec::Conditional;
1437 prec::Level Level = Left.getPrecedence();
1438
1439 if (Level != prec::Unknown)
1440 return Level;
1441
1442 return 3;
1443 }
1444
spaceRequiredBetween(const AnnotatedLine & Line,const FormatToken & Left,const FormatToken & Right)1445 bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
1446 const FormatToken &Left,
1447 const FormatToken &Right) {
1448 if (Style.Language == FormatStyle::LK_Proto) {
1449 if (Right.is(tok::period) &&
1450 (Left.TokenText == "optional" || Left.TokenText == "required" ||
1451 Left.TokenText == "repeated"))
1452 return true;
1453 if (Right.is(tok::l_paren) &&
1454 (Left.TokenText == "returns" || Left.TokenText == "option"))
1455 return true;
1456 } else if (Style.Language == FormatStyle::LK_JavaScript) {
1457 if (Left.TokenText == "var")
1458 return true;
1459 }
1460 if (Left.is(tok::kw_return) && Right.isNot(tok::semi))
1461 return true;
1462 if (Style.ObjCSpaceAfterProperty && Line.Type == LT_ObjCProperty &&
1463 Left.Tok.getObjCKeywordID() == tok::objc_property)
1464 return true;
1465 if (Right.is(tok::hashhash))
1466 return Left.is(tok::hash);
1467 if (Left.isOneOf(tok::hashhash, tok::hash))
1468 return Right.is(tok::hash);
1469 if (Left.is(tok::l_paren) && Right.is(tok::r_paren))
1470 return Style.SpaceInEmptyParentheses;
1471 if (Left.is(tok::l_paren) || Right.is(tok::r_paren))
1472 return (Right.Type == TT_CastRParen ||
1473 (Left.MatchingParen && Left.MatchingParen->Type == TT_CastRParen))
1474 ? Style.SpacesInCStyleCastParentheses
1475 : Style.SpacesInParentheses;
1476 if (Style.SpacesInAngles &&
1477 ((Left.Type == TT_TemplateOpener) != (Right.Type == TT_TemplateCloser)))
1478 return true;
1479 if (Right.isOneOf(tok::semi, tok::comma))
1480 return false;
1481 if (Right.is(tok::less) &&
1482 (Left.is(tok::kw_template) ||
1483 (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList)))
1484 return true;
1485 if (Left.is(tok::arrow) || Right.is(tok::arrow))
1486 return false;
1487 if (Left.isOneOf(tok::exclaim, tok::tilde))
1488 return false;
1489 if (Left.is(tok::at) &&
1490 Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant,
1491 tok::numeric_constant, tok::l_paren, tok::l_brace,
1492 tok::kw_true, tok::kw_false))
1493 return false;
1494 if (Left.is(tok::coloncolon))
1495 return false;
1496 if (Right.is(tok::coloncolon) && Left.isNot(tok::l_brace))
1497 return (Left.is(tok::less) && Style.Standard == FormatStyle::LS_Cpp03) ||
1498 !Left.isOneOf(tok::identifier, tok::greater, tok::l_paren,
1499 tok::r_paren, tok::less);
1500 if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less))
1501 return false;
1502 if (Right.is(tok::ellipsis))
1503 return Left.Tok.isLiteral();
1504 if (Left.is(tok::l_square) && Right.is(tok::amp))
1505 return false;
1506 if (Right.Type == TT_PointerOrReference)
1507 return Left.Tok.isLiteral() ||
1508 ((Left.Type != TT_PointerOrReference) && Left.isNot(tok::l_paren) &&
1509 Style.PointerAlignment != FormatStyle::PAS_Left);
1510 if (Right.Type == TT_FunctionTypeLParen && Left.isNot(tok::l_paren) &&
1511 (Left.Type != TT_PointerOrReference || Style.PointerAlignment != FormatStyle::PAS_Right))
1512 return true;
1513 if (Left.Type == TT_PointerOrReference)
1514 return Right.Tok.isLiteral() || Right.Type == TT_BlockComment ||
1515 ((Right.Type != TT_PointerOrReference) &&
1516 Right.isNot(tok::l_paren) && Style.PointerAlignment != FormatStyle::PAS_Right &&
1517 Left.Previous &&
1518 !Left.Previous->isOneOf(tok::l_paren, tok::coloncolon));
1519 if (Right.is(tok::star) && Left.is(tok::l_paren))
1520 return false;
1521 if (Left.is(tok::l_square))
1522 return Left.Type == TT_ArrayInitializerLSquare &&
1523 Style.SpacesInContainerLiterals && Right.isNot(tok::r_square);
1524 if (Right.is(tok::r_square))
1525 return Right.MatchingParen && Style.SpacesInContainerLiterals &&
1526 Right.MatchingParen->Type == TT_ArrayInitializerLSquare;
1527 if (Right.is(tok::l_square) && Right.Type != TT_ObjCMethodExpr &&
1528 Right.Type != TT_LambdaLSquare && Left.isNot(tok::numeric_constant) &&
1529 Left.Type != TT_DictLiteral)
1530 return false;
1531 if (Left.is(tok::colon))
1532 return Left.Type != TT_ObjCMethodExpr;
1533 if (Left.Type == TT_BlockComment)
1534 return !Left.TokenText.endswith("=*/");
1535 if (Right.is(tok::l_paren)) {
1536 if (Left.is(tok::r_paren) && Left.Type == TT_AttributeParen)
1537 return true;
1538 return Line.Type == LT_ObjCDecl ||
1539 Left.isOneOf(tok::kw_new, tok::kw_delete, tok::semi) ||
1540 (Style.SpaceBeforeParens != FormatStyle::SBPO_Never &&
1541 (Left.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while,
1542 tok::kw_switch, tok::kw_catch, tok::kw_case) ||
1543 Left.IsForEachMacro)) ||
1544 (Style.SpaceBeforeParens == FormatStyle::SBPO_Always &&
1545 Left.isOneOf(tok::identifier, tok::kw___attribute) &&
1546 Line.Type != LT_PreprocessorDirective);
1547 }
1548 if (Left.is(tok::at) && Right.Tok.getObjCKeywordID() != tok::objc_not_keyword)
1549 return false;
1550 if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
1551 return !Left.Children.empty(); // No spaces in "{}".
1552 if ((Left.is(tok::l_brace) && Left.BlockKind != BK_Block) ||
1553 (Right.is(tok::r_brace) && Right.MatchingParen &&
1554 Right.MatchingParen->BlockKind != BK_Block))
1555 return !Style.Cpp11BracedListStyle;
1556 if (Right.Type == TT_UnaryOperator)
1557 return !Left.isOneOf(tok::l_paren, tok::l_square, tok::at) &&
1558 (Left.isNot(tok::colon) || Left.Type != TT_ObjCMethodExpr);
1559 if ((Left.isOneOf(tok::identifier, tok::greater, tok::r_square,
1560 tok::r_paren) ||
1561 Left.isSimpleTypeSpecifier()) &&
1562 Right.is(tok::l_brace) && Right.getNextNonComment() &&
1563 Right.BlockKind != BK_Block)
1564 return false;
1565 if (Left.is(tok::period) || Right.is(tok::period))
1566 return false;
1567 if (Right.is(tok::hash) && Left.is(tok::identifier) && Left.TokenText == "L")
1568 return false;
1569 return true;
1570 }
1571
spaceRequiredBefore(const AnnotatedLine & Line,const FormatToken & Tok)1572 bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
1573 const FormatToken &Tok) {
1574 if (Tok.Tok.getIdentifierInfo() && Tok.Previous->Tok.getIdentifierInfo())
1575 return true; // Never ever merge two identifiers.
1576 if (Tok.Previous->Type == TT_ImplicitStringLiteral)
1577 return Tok.WhitespaceRange.getBegin() != Tok.WhitespaceRange.getEnd();
1578 if (Line.Type == LT_ObjCMethodDecl) {
1579 if (Tok.Previous->Type == TT_ObjCMethodSpecifier)
1580 return true;
1581 if (Tok.Previous->is(tok::r_paren) && Tok.is(tok::identifier))
1582 // Don't space between ')' and <id>
1583 return false;
1584 }
1585 if (Line.Type == LT_ObjCProperty &&
1586 (Tok.is(tok::equal) || Tok.Previous->is(tok::equal)))
1587 return false;
1588
1589 if (Tok.Type == TT_TrailingReturnArrow ||
1590 Tok.Previous->Type == TT_TrailingReturnArrow)
1591 return true;
1592 if (Tok.Previous->is(tok::comma))
1593 return true;
1594 if (Tok.is(tok::comma))
1595 return false;
1596 if (Tok.Type == TT_CtorInitializerColon || Tok.Type == TT_ObjCBlockLParen)
1597 return true;
1598 if (Tok.Previous->Tok.is(tok::kw_operator))
1599 return Tok.is(tok::coloncolon);
1600 if (Tok.Type == TT_OverloadedOperatorLParen)
1601 return false;
1602 if (Tok.is(tok::colon))
1603 return !Line.First->isOneOf(tok::kw_case, tok::kw_default) &&
1604 Tok.getNextNonComment() && Tok.Type != TT_ObjCMethodExpr &&
1605 !Tok.Previous->is(tok::question) &&
1606 (Tok.Type != TT_DictLiteral || Style.SpacesInContainerLiterals);
1607 if (Tok.Previous->Type == TT_UnaryOperator ||
1608 Tok.Previous->Type == TT_CastRParen)
1609 return Tok.Type == TT_BinaryOperator;
1610 if (Tok.Previous->is(tok::greater) && Tok.is(tok::greater)) {
1611 return Tok.Type == TT_TemplateCloser &&
1612 Tok.Previous->Type == TT_TemplateCloser &&
1613 (Style.Standard != FormatStyle::LS_Cpp11 || Style.SpacesInAngles);
1614 }
1615 if (Tok.isOneOf(tok::arrowstar, tok::periodstar) ||
1616 Tok.Previous->isOneOf(tok::arrowstar, tok::periodstar))
1617 return false;
1618 if (!Style.SpaceBeforeAssignmentOperators &&
1619 Tok.getPrecedence() == prec::Assignment)
1620 return false;
1621 if ((Tok.Type == TT_BinaryOperator && !Tok.Previous->is(tok::l_paren)) ||
1622 Tok.Previous->Type == TT_BinaryOperator ||
1623 Tok.Previous->Type == TT_ConditionalExpr)
1624 return true;
1625 if (Tok.Previous->Type == TT_TemplateCloser && Tok.is(tok::l_paren))
1626 return false;
1627 if (Tok.is(tok::less) && Tok.Previous->isNot(tok::l_paren) &&
1628 Line.First->is(tok::hash))
1629 return true;
1630 if (Tok.Type == TT_TrailingUnaryOperator)
1631 return false;
1632 if (Tok.Previous->Type == TT_RegexLiteral)
1633 return false;
1634 return spaceRequiredBetween(Line, *Tok.Previous, Tok);
1635 }
1636
1637 // Returns 'true' if 'Tok' is a brace we'd want to break before in Allman style.
isAllmanBrace(const FormatToken & Tok)1638 static bool isAllmanBrace(const FormatToken &Tok) {
1639 return Tok.is(tok::l_brace) && Tok.BlockKind == BK_Block &&
1640 Tok.Type != TT_ObjCBlockLBrace && Tok.Type != TT_DictLiteral;
1641 }
1642
mustBreakBefore(const AnnotatedLine & Line,const FormatToken & Right)1643 bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
1644 const FormatToken &Right) {
1645 const FormatToken &Left = *Right.Previous;
1646 if (Right.NewlinesBefore > 1)
1647 return true;
1648 if (Right.is(tok::comment)) {
1649 return Right.Previous->BlockKind != BK_BracedInit &&
1650 Right.Previous->Type != TT_CtorInitializerColon &&
1651 (Right.NewlinesBefore > 0 && Right.HasUnescapedNewline);
1652 } else if (Right.Previous->isTrailingComment() ||
1653 (Right.isStringLiteral() && Right.Previous->isStringLiteral())) {
1654 return true;
1655 } else if (Right.Previous->IsUnterminatedLiteral) {
1656 return true;
1657 } else if (Right.is(tok::lessless) && Right.Next &&
1658 Right.Previous->is(tok::string_literal) &&
1659 Right.Next->is(tok::string_literal)) {
1660 return true;
1661 } else if (Right.Previous->ClosesTemplateDeclaration &&
1662 Right.Previous->MatchingParen &&
1663 Right.Previous->MatchingParen->NestingLevel == 0 &&
1664 Style.AlwaysBreakTemplateDeclarations) {
1665 return true;
1666 } else if ((Right.Type == TT_CtorInitializerComma ||
1667 Right.Type == TT_CtorInitializerColon) &&
1668 Style.BreakConstructorInitializersBeforeComma &&
1669 !Style.ConstructorInitializerAllOnOneLineOrOnePerLine) {
1670 return true;
1671 } else if (Right.is(tok::string_literal) &&
1672 Right.TokenText.startswith("R\"")) {
1673 // Raw string literals are special wrt. line breaks. The author has made a
1674 // deliberate choice and might have aligned the contents of the string
1675 // literal accordingly. Thus, we try keep existing line breaks.
1676 return Right.NewlinesBefore > 0;
1677 } else if (Right.Previous->is(tok::l_brace) && Right.NestingLevel == 1 &&
1678 Style.Language == FormatStyle::LK_Proto) {
1679 // Don't enums onto single lines in protocol buffers.
1680 return true;
1681 } else if (isAllmanBrace(Left) || isAllmanBrace(Right)) {
1682 return Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1683 Style.BreakBeforeBraces == FormatStyle::BS_GNU;
1684 }
1685
1686 // If the last token before a '}' is a comma or a comment, the intention is to
1687 // insert a line break after it in order to make shuffling around entries
1688 // easier.
1689 const FormatToken *BeforeClosingBrace = nullptr;
1690 if (Left.is(tok::l_brace) && Left.MatchingParen)
1691 BeforeClosingBrace = Left.MatchingParen->Previous;
1692 else if (Right.is(tok::r_brace))
1693 BeforeClosingBrace = Right.Previous;
1694 if (BeforeClosingBrace &&
1695 BeforeClosingBrace->isOneOf(tok::comma, tok::comment))
1696 return true;
1697
1698 if (Style.Language == FormatStyle::LK_JavaScript) {
1699 // FIXME: This might apply to other languages and token kinds.
1700 if (Right.is(tok::char_constant) && Left.is(tok::plus) && Left.Previous &&
1701 Left.Previous->is(tok::char_constant))
1702 return true;
1703 }
1704
1705 return false;
1706 }
1707
canBreakBefore(const AnnotatedLine & Line,const FormatToken & Right)1708 bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
1709 const FormatToken &Right) {
1710 const FormatToken &Left = *Right.Previous;
1711 if (Left.is(tok::at))
1712 return false;
1713 if (Left.Tok.getObjCKeywordID() == tok::objc_interface)
1714 return false;
1715 if (Right.Type == TT_StartOfName ||
1716 Right.Type == TT_FunctionDeclarationName || Right.is(tok::kw_operator))
1717 return true;
1718 if (Right.isTrailingComment())
1719 // We rely on MustBreakBefore being set correctly here as we should not
1720 // change the "binding" behavior of a comment.
1721 // The first comment in a braced lists is always interpreted as belonging to
1722 // the first list element. Otherwise, it should be placed outside of the
1723 // list.
1724 return Left.BlockKind == BK_BracedInit;
1725 if (Left.is(tok::question) && Right.is(tok::colon))
1726 return false;
1727 if (Right.Type == TT_ConditionalExpr || Right.is(tok::question))
1728 return Style.BreakBeforeTernaryOperators;
1729 if (Left.Type == TT_ConditionalExpr || Left.is(tok::question))
1730 return !Style.BreakBeforeTernaryOperators;
1731 if (Right.Type == TT_InheritanceColon)
1732 return true;
1733 if (Right.is(tok::colon) && (Right.Type != TT_CtorInitializerColon &&
1734 Right.Type != TT_InlineASMColon))
1735 return false;
1736 if (Left.is(tok::colon) &&
1737 (Left.Type == TT_DictLiteral || Left.Type == TT_ObjCMethodExpr))
1738 return true;
1739 if (Right.Type == TT_SelectorName)
1740 return true;
1741 if (Left.is(tok::r_paren) && Line.Type == LT_ObjCProperty)
1742 return true;
1743 if (Left.ClosesTemplateDeclaration)
1744 return true;
1745 if (Right.Type == TT_RangeBasedForLoopColon ||
1746 Right.Type == TT_OverloadedOperatorLParen ||
1747 Right.Type == TT_OverloadedOperator)
1748 return false;
1749 if (Left.Type == TT_RangeBasedForLoopColon)
1750 return true;
1751 if (Right.Type == TT_RangeBasedForLoopColon)
1752 return false;
1753 if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser ||
1754 Left.Type == TT_UnaryOperator || Left.is(tok::kw_operator))
1755 return false;
1756 if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl)
1757 return false;
1758 if (Left.is(tok::l_paren) && Left.Type == TT_AttributeParen)
1759 return false;
1760 if (Left.is(tok::l_paren) && Left.Previous &&
1761 (Left.Previous->Type == TT_BinaryOperator ||
1762 Left.Previous->Type == TT_CastRParen || Left.Previous->is(tok::kw_if)))
1763 return false;
1764 if (Right.Type == TT_ImplicitStringLiteral)
1765 return false;
1766
1767 if (Right.is(tok::r_paren) || Right.Type == TT_TemplateCloser)
1768 return false;
1769
1770 // We only break before r_brace if there was a corresponding break before
1771 // the l_brace, which is tracked by BreakBeforeClosingBrace.
1772 if (Right.is(tok::r_brace))
1773 return Right.MatchingParen && Right.MatchingParen->BlockKind == BK_Block;
1774
1775 // Allow breaking after a trailing annotation, e.g. after a method
1776 // declaration.
1777 if (Left.Type == TT_TrailingAnnotation)
1778 return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal, tok::l_paren,
1779 tok::less, tok::coloncolon);
1780
1781 if (Right.is(tok::kw___attribute))
1782 return true;
1783
1784 if (Left.is(tok::identifier) && Right.is(tok::string_literal))
1785 return true;
1786
1787 if (Right.is(tok::identifier) && Right.Next &&
1788 Right.Next->Type == TT_DictLiteral)
1789 return true;
1790
1791 if (Left.Type == TT_CtorInitializerComma &&
1792 Style.BreakConstructorInitializersBeforeComma)
1793 return false;
1794 if (Right.Type == TT_CtorInitializerComma &&
1795 Style.BreakConstructorInitializersBeforeComma)
1796 return true;
1797 if (Left.is(tok::greater) && Right.is(tok::greater) &&
1798 Left.Type != TT_TemplateCloser)
1799 return false;
1800 if (Right.Type == TT_BinaryOperator && Style.BreakBeforeBinaryOperators)
1801 return true;
1802 if (Left.Type == TT_ArrayInitializerLSquare)
1803 return true;
1804 return (Left.isBinaryOperator() &&
1805 !Left.isOneOf(tok::arrowstar, tok::lessless) &&
1806 !Style.BreakBeforeBinaryOperators) ||
1807 Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace,
1808 tok::kw_class, tok::kw_struct) ||
1809 Right.isMemberAccess() ||
1810 Right.isOneOf(tok::lessless, tok::colon, tok::l_square, tok::at) ||
1811 (Left.is(tok::r_paren) &&
1812 Right.isOneOf(tok::identifier, tok::kw_const)) ||
1813 (Left.is(tok::l_paren) && !Right.is(tok::r_paren));
1814 }
1815
printDebugInfo(const AnnotatedLine & Line)1816 void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) {
1817 llvm::errs() << "AnnotatedTokens:\n";
1818 const FormatToken *Tok = Line.First;
1819 while (Tok) {
1820 llvm::errs() << " M=" << Tok->MustBreakBefore
1821 << " C=" << Tok->CanBreakBefore << " T=" << Tok->Type
1822 << " S=" << Tok->SpacesRequiredBefore
1823 << " B=" << Tok->BlockParameterCount
1824 << " P=" << Tok->SplitPenalty << " Name=" << Tok->Tok.getName()
1825 << " L=" << Tok->TotalLength << " PPK=" << Tok->PackingKind
1826 << " FakeLParens=";
1827 for (unsigned i = 0, e = Tok->FakeLParens.size(); i != e; ++i)
1828 llvm::errs() << Tok->FakeLParens[i] << "/";
1829 llvm::errs() << " FakeRParens=" << Tok->FakeRParens << "\n";
1830 if (!Tok->Next)
1831 assert(Tok == Line.Last);
1832 Tok = Tok->Next;
1833 }
1834 llvm::errs() << "----\n";
1835 }
1836
1837 } // namespace format
1838 } // namespace clang
1839