• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===--- TokenAnnotator.cpp - Format C++ code -----------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements a token annotator, i.e. creates
12 /// \c AnnotatedTokens out of \c FormatTokens with required extra information.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "TokenAnnotator.h"
17 #include "clang/Basic/SourceManager.h"
18 #include "llvm/Support/Debug.h"
19 
20 #define DEBUG_TYPE "format-token-annotator"
21 
22 namespace clang {
23 namespace format {
24 
25 namespace {
26 
27 /// \brief A parser that gathers additional information about tokens.
28 ///
29 /// The \c TokenAnnotator tries to match parenthesis and square brakets and
30 /// store a parenthesis levels. It also tries to resolve matching "<" and ">"
31 /// into template parameter lists.
32 class AnnotatingParser {
33 public:
AnnotatingParser(const FormatStyle & Style,AnnotatedLine & Line,IdentifierInfo & Ident_in)34   AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line,
35                    IdentifierInfo &Ident_in)
36       : Style(Style), Line(Line), CurrentToken(Line.First),
37         KeywordVirtualFound(false), AutoFound(false), Ident_in(Ident_in) {
38     Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false));
39     resetTokenMetadata(CurrentToken);
40   }
41 
42 private:
parseAngle()43   bool parseAngle() {
44     if (!CurrentToken)
45       return false;
46     ScopedContextCreator ContextCreator(*this, tok::less, 10);
47     FormatToken *Left = CurrentToken->Previous;
48     Contexts.back().IsExpression = false;
49     // If there's a template keyword before the opening angle bracket, this is a
50     // template parameter, not an argument.
51     Contexts.back().InTemplateArgument =
52         Left->Previous && Left->Previous->Tok.isNot(tok::kw_template);
53 
54     while (CurrentToken) {
55       if (CurrentToken->is(tok::greater)) {
56         Left->MatchingParen = CurrentToken;
57         CurrentToken->MatchingParen = Left;
58         CurrentToken->Type = TT_TemplateCloser;
59         next();
60         return true;
61       }
62       if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace,
63                                 tok::question, tok::colon))
64         return false;
65       // If a && or || is found and interpreted as a binary operator, this set
66       // of angles is likely part of something like "a < b && c > d". If the
67       // angles are inside an expression, the ||/&& might also be a binary
68       // operator that was misinterpreted because we are parsing template
69       // parameters.
70       // FIXME: This is getting out of hand, write a decent parser.
71       if (CurrentToken->Previous->isOneOf(tok::pipepipe, tok::ampamp) &&
72           ((CurrentToken->Previous->Type == TT_BinaryOperator &&
73             // Toplevel bool expressions do not make lots of sense;
74             // If we're on the top level, it contains only the base context and
75             // the context for the current opening angle bracket.
76             Contexts.size() > 2) ||
77            Contexts[Contexts.size() - 2].IsExpression) &&
78           Line.First->isNot(tok::kw_template))
79         return false;
80       updateParameterCount(Left, CurrentToken);
81       if (!consumeToken())
82         return false;
83     }
84     return false;
85   }
86 
parseParens(bool LookForDecls=false)87   bool parseParens(bool LookForDecls = false) {
88     if (!CurrentToken)
89       return false;
90     ScopedContextCreator ContextCreator(*this, tok::l_paren, 1);
91 
92     // FIXME: This is a bit of a hack. Do better.
93     Contexts.back().ColonIsForRangeExpr =
94         Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr;
95 
96     bool StartsObjCMethodExpr = false;
97     FormatToken *Left = CurrentToken->Previous;
98     if (CurrentToken->is(tok::caret)) {
99       // (^ can start a block type.
100       Left->Type = TT_ObjCBlockLParen;
101     } else if (FormatToken *MaybeSel = Left->Previous) {
102       // @selector( starts a selector.
103       if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Previous &&
104           MaybeSel->Previous->is(tok::at)) {
105         StartsObjCMethodExpr = true;
106       }
107     }
108 
109     if (Left->Previous &&
110         (Left->Previous->isOneOf(tok::kw_static_assert, tok::kw_if,
111                                  tok::kw_while, tok::l_paren, tok::comma) ||
112          Left->Previous->Type == TT_BinaryOperator)) {
113       // static_assert, if and while usually contain expressions.
114       Contexts.back().IsExpression = true;
115     } else if (Line.InPPDirective &&
116                (!Left->Previous ||
117                 (Left->Previous->isNot(tok::identifier) &&
118                  Left->Previous->Type != TT_OverloadedOperator))) {
119       Contexts.back().IsExpression = true;
120     } else if (Left->Previous && Left->Previous->is(tok::r_square) &&
121                Left->Previous->MatchingParen &&
122                Left->Previous->MatchingParen->Type == TT_LambdaLSquare) {
123       // This is a parameter list of a lambda expression.
124       Contexts.back().IsExpression = false;
125     } else if (Contexts[Contexts.size() - 2].CaretFound) {
126       // This is the parameter list of an ObjC block.
127       Contexts.back().IsExpression = false;
128     } else if (Left->Previous && Left->Previous->is(tok::kw___attribute)) {
129       Left->Type = TT_AttributeParen;
130     } else if (Left->Previous && Left->Previous->IsForEachMacro) {
131       // The first argument to a foreach macro is a declaration.
132       Contexts.back().IsForEachMacro = true;
133       Contexts.back().IsExpression = false;
134     }
135 
136     if (StartsObjCMethodExpr) {
137       Contexts.back().ColonIsObjCMethodExpr = true;
138       Left->Type = TT_ObjCMethodExpr;
139     }
140 
141     bool MightBeFunctionType = CurrentToken->is(tok::star);
142     bool HasMultipleLines = false;
143     bool HasMultipleParametersOnALine = false;
144     while (CurrentToken) {
145       // LookForDecls is set when "if (" has been seen. Check for
146       // 'identifier' '*' 'identifier' followed by not '=' -- this
147       // '*' has to be a binary operator but determineStarAmpUsage() will
148       // categorize it as an unary operator, so set the right type here.
149       if (LookForDecls && CurrentToken->Next) {
150         FormatToken *Prev = CurrentToken->getPreviousNonComment();
151         if (Prev) {
152           FormatToken *PrevPrev = Prev->getPreviousNonComment();
153           FormatToken *Next = CurrentToken->Next;
154           if (PrevPrev && PrevPrev->is(tok::identifier) &&
155               Prev->isOneOf(tok::star, tok::amp, tok::ampamp) &&
156               CurrentToken->is(tok::identifier) && Next->isNot(tok::equal)) {
157             Prev->Type = TT_BinaryOperator;
158             LookForDecls = false;
159           }
160         }
161       }
162 
163       if (CurrentToken->Previous->Type == TT_PointerOrReference &&
164           CurrentToken->Previous->Previous->isOneOf(tok::l_paren,
165                                                     tok::coloncolon))
166         MightBeFunctionType = true;
167       if (CurrentToken->Previous->Type == TT_BinaryOperator)
168         Contexts.back().IsExpression = true;
169       if (CurrentToken->is(tok::r_paren)) {
170         if (MightBeFunctionType && CurrentToken->Next &&
171             (CurrentToken->Next->is(tok::l_paren) ||
172              (CurrentToken->Next->is(tok::l_square) &&
173               !Contexts.back().IsExpression)))
174           Left->Type = TT_FunctionTypeLParen;
175         Left->MatchingParen = CurrentToken;
176         CurrentToken->MatchingParen = Left;
177 
178         if (StartsObjCMethodExpr) {
179           CurrentToken->Type = TT_ObjCMethodExpr;
180           if (Contexts.back().FirstObjCSelectorName) {
181             Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
182                 Contexts.back().LongestObjCSelectorName;
183           }
184         }
185 
186         if (Left->Type == TT_AttributeParen)
187           CurrentToken->Type = TT_AttributeParen;
188 
189         if (!HasMultipleLines)
190           Left->PackingKind = PPK_Inconclusive;
191         else if (HasMultipleParametersOnALine)
192           Left->PackingKind = PPK_BinPacked;
193         else
194           Left->PackingKind = PPK_OnePerLine;
195 
196         next();
197         return true;
198       }
199       if (CurrentToken->isOneOf(tok::r_square, tok::r_brace))
200         return false;
201       else if (CurrentToken->is(tok::l_brace))
202         Left->Type = TT_Unknown; // Not TT_ObjCBlockLParen
203       if (CurrentToken->is(tok::comma) && CurrentToken->Next &&
204           !CurrentToken->Next->HasUnescapedNewline &&
205           !CurrentToken->Next->isTrailingComment())
206         HasMultipleParametersOnALine = true;
207       if (CurrentToken->isOneOf(tok::kw_const, tok::kw_auto) ||
208           CurrentToken->isSimpleTypeSpecifier())
209         Contexts.back().IsExpression = false;
210       FormatToken *Tok = CurrentToken;
211       if (!consumeToken())
212         return false;
213       updateParameterCount(Left, Tok);
214       if (CurrentToken && CurrentToken->HasUnescapedNewline)
215         HasMultipleLines = true;
216     }
217     return false;
218   }
219 
parseSquare()220   bool parseSquare() {
221     if (!CurrentToken)
222       return false;
223 
224     // A '[' could be an index subscript (after an identifier or after
225     // ')' or ']'), it could be the start of an Objective-C method
226     // expression, or it could the the start of an Objective-C array literal.
227     FormatToken *Left = CurrentToken->Previous;
228     FormatToken *Parent = Left->getPreviousNonComment();
229     bool StartsObjCMethodExpr =
230         Contexts.back().CanBeExpression && Left->Type != TT_LambdaLSquare &&
231         CurrentToken->isNot(tok::l_brace) &&
232         (!Parent || Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren,
233                                     tok::kw_return, tok::kw_throw) ||
234          Parent->isUnaryOperator() || Parent->Type == TT_ObjCForIn ||
235          Parent->Type == TT_CastRParen ||
236          getBinOpPrecedence(Parent->Tok.getKind(), true, true) > prec::Unknown);
237     ScopedContextCreator ContextCreator(*this, tok::l_square, 10);
238     Contexts.back().IsExpression = true;
239     bool ColonFound = false;
240 
241     if (StartsObjCMethodExpr) {
242       Contexts.back().ColonIsObjCMethodExpr = true;
243       Left->Type = TT_ObjCMethodExpr;
244     } else if (Parent && Parent->is(tok::at)) {
245       Left->Type = TT_ArrayInitializerLSquare;
246     } else if (Left->Type == TT_Unknown) {
247       Left->Type = TT_ArraySubscriptLSquare;
248     }
249 
250     while (CurrentToken) {
251       if (CurrentToken->is(tok::r_square)) {
252         if (CurrentToken->Next && CurrentToken->Next->is(tok::l_paren) &&
253             Left->Type == TT_ObjCMethodExpr) {
254           // An ObjC method call is rarely followed by an open parenthesis.
255           // FIXME: Do we incorrectly label ":" with this?
256           StartsObjCMethodExpr = false;
257           Left->Type = TT_Unknown;
258         }
259         if (StartsObjCMethodExpr && CurrentToken->Previous != Left) {
260           CurrentToken->Type = TT_ObjCMethodExpr;
261           // determineStarAmpUsage() thinks that '*' '[' is allocating an
262           // array of pointers, but if '[' starts a selector then '*' is a
263           // binary operator.
264           if (Parent && Parent->Type == TT_PointerOrReference)
265             Parent->Type = TT_BinaryOperator;
266         }
267         Left->MatchingParen = CurrentToken;
268         CurrentToken->MatchingParen = Left;
269         if (Contexts.back().FirstObjCSelectorName) {
270           Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
271               Contexts.back().LongestObjCSelectorName;
272           if (Left->BlockParameterCount > 1)
273             Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 0;
274         }
275         next();
276         return true;
277       }
278       if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace))
279         return false;
280       if (CurrentToken->is(tok::colon))
281         ColonFound = true;
282       if (CurrentToken->is(tok::comma) &&
283           Style.Language != FormatStyle::LK_Proto &&
284           (Left->Type == TT_ArraySubscriptLSquare ||
285            (Left->Type == TT_ObjCMethodExpr && !ColonFound)))
286         Left->Type = TT_ArrayInitializerLSquare;
287       FormatToken* Tok = CurrentToken;
288       if (!consumeToken())
289         return false;
290       updateParameterCount(Left, Tok);
291     }
292     return false;
293   }
294 
parseBrace()295   bool parseBrace() {
296     if (CurrentToken) {
297       FormatToken *Left = CurrentToken->Previous;
298 
299       if (Contexts.back().CaretFound)
300         Left->Type = TT_ObjCBlockLBrace;
301       Contexts.back().CaretFound = false;
302 
303       ScopedContextCreator ContextCreator(*this, tok::l_brace, 1);
304       Contexts.back().ColonIsDictLiteral = true;
305       if (Left->BlockKind == BK_BracedInit)
306         Contexts.back().IsExpression = true;
307 
308       while (CurrentToken) {
309         if (CurrentToken->is(tok::r_brace)) {
310           Left->MatchingParen = CurrentToken;
311           CurrentToken->MatchingParen = Left;
312           next();
313           return true;
314         }
315         if (CurrentToken->isOneOf(tok::r_paren, tok::r_square))
316           return false;
317         updateParameterCount(Left, CurrentToken);
318         if (CurrentToken->is(tok::colon) &&
319             Style.Language != FormatStyle::LK_Proto) {
320           if (CurrentToken->getPreviousNonComment()->is(tok::identifier))
321             CurrentToken->getPreviousNonComment()->Type = TT_SelectorName;
322           Left->Type = TT_DictLiteral;
323         }
324         if (!consumeToken())
325           return false;
326       }
327     }
328     return true;
329   }
330 
updateParameterCount(FormatToken * Left,FormatToken * Current)331   void updateParameterCount(FormatToken *Left, FormatToken *Current) {
332     if (Current->Type == TT_LambdaLSquare ||
333         (Current->is(tok::caret) && Current->Type == TT_UnaryOperator) ||
334         (Style.Language == FormatStyle::LK_JavaScript &&
335          Current->TokenText == "function")) {
336       ++Left->BlockParameterCount;
337     }
338     if (Current->is(tok::comma)) {
339       ++Left->ParameterCount;
340       if (!Left->Role)
341         Left->Role.reset(new CommaSeparatedList(Style));
342       Left->Role->CommaFound(Current);
343     } else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) {
344       Left->ParameterCount = 1;
345     }
346   }
347 
parseConditional()348   bool parseConditional() {
349     while (CurrentToken) {
350       if (CurrentToken->is(tok::colon)) {
351         CurrentToken->Type = TT_ConditionalExpr;
352         next();
353         return true;
354       }
355       if (!consumeToken())
356         return false;
357     }
358     return false;
359   }
360 
parseTemplateDeclaration()361   bool parseTemplateDeclaration() {
362     if (CurrentToken && CurrentToken->is(tok::less)) {
363       CurrentToken->Type = TT_TemplateOpener;
364       next();
365       if (!parseAngle())
366         return false;
367       if (CurrentToken)
368         CurrentToken->Previous->ClosesTemplateDeclaration = true;
369       return true;
370     }
371     return false;
372   }
373 
consumeToken()374   bool consumeToken() {
375     FormatToken *Tok = CurrentToken;
376     next();
377     switch (Tok->Tok.getKind()) {
378     case tok::plus:
379     case tok::minus:
380       if (!Tok->Previous && Line.MustBeDeclaration)
381         Tok->Type = TT_ObjCMethodSpecifier;
382       break;
383     case tok::colon:
384       if (!Tok->Previous)
385         return false;
386       // Colons from ?: are handled in parseConditional().
387       if (Tok->Previous->is(tok::r_paren) && Contexts.size() == 1 &&
388           Line.First->isNot(tok::kw_case)) {
389         Tok->Type = TT_CtorInitializerColon;
390       } else if (Contexts.back().ColonIsDictLiteral) {
391         Tok->Type = TT_DictLiteral;
392       } else if (Contexts.back().ColonIsObjCMethodExpr ||
393                  Line.First->Type == TT_ObjCMethodSpecifier) {
394         Tok->Type = TT_ObjCMethodExpr;
395         Tok->Previous->Type = TT_SelectorName;
396         if (Tok->Previous->ColumnWidth >
397             Contexts.back().LongestObjCSelectorName) {
398           Contexts.back().LongestObjCSelectorName = Tok->Previous->ColumnWidth;
399         }
400         if (!Contexts.back().FirstObjCSelectorName)
401           Contexts.back().FirstObjCSelectorName = Tok->Previous;
402       } else if (Contexts.back().ColonIsForRangeExpr) {
403         Tok->Type = TT_RangeBasedForLoopColon;
404       } else if (CurrentToken && CurrentToken->is(tok::numeric_constant)) {
405         Tok->Type = TT_BitFieldColon;
406       } else if (Contexts.size() == 1 &&
407                  !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) {
408         Tok->Type = TT_InheritanceColon;
409       } else if (Contexts.back().ContextKind == tok::l_paren) {
410         Tok->Type = TT_InlineASMColon;
411       }
412       break;
413     case tok::kw_if:
414     case tok::kw_while:
415       if (CurrentToken && CurrentToken->is(tok::l_paren)) {
416         next();
417         if (!parseParens(/*LookForDecls=*/true))
418           return false;
419       }
420       break;
421     case tok::kw_for:
422       Contexts.back().ColonIsForRangeExpr = true;
423       next();
424       if (!parseParens())
425         return false;
426       break;
427     case tok::l_paren:
428       if (!parseParens())
429         return false;
430       if (Line.MustBeDeclaration && Contexts.size() == 1 &&
431           !Contexts.back().IsExpression &&
432           Line.First->Type != TT_ObjCProperty &&
433           (!Tok->Previous || Tok->Previous->isNot(tok::kw_decltype)))
434         Line.MightBeFunctionDecl = true;
435       break;
436     case tok::l_square:
437       if (!parseSquare())
438         return false;
439       break;
440     case tok::l_brace:
441       if (!parseBrace())
442         return false;
443       break;
444     case tok::less:
445       if (Tok->Previous && !Tok->Previous->Tok.isLiteral() && parseAngle())
446         Tok->Type = TT_TemplateOpener;
447       else {
448         Tok->Type = TT_BinaryOperator;
449         CurrentToken = Tok;
450         next();
451       }
452       break;
453     case tok::r_paren:
454     case tok::r_square:
455       return false;
456     case tok::r_brace:
457       // Lines can start with '}'.
458       if (Tok->Previous)
459         return false;
460       break;
461     case tok::greater:
462       Tok->Type = TT_BinaryOperator;
463       break;
464     case tok::kw_operator:
465       while (CurrentToken &&
466              !CurrentToken->isOneOf(tok::l_paren, tok::semi, tok::r_paren)) {
467         if (CurrentToken->isOneOf(tok::star, tok::amp))
468           CurrentToken->Type = TT_PointerOrReference;
469         consumeToken();
470         if (CurrentToken && CurrentToken->Previous->Type == TT_BinaryOperator)
471           CurrentToken->Previous->Type = TT_OverloadedOperator;
472       }
473       if (CurrentToken) {
474         CurrentToken->Type = TT_OverloadedOperatorLParen;
475         if (CurrentToken->Previous->Type == TT_BinaryOperator)
476           CurrentToken->Previous->Type = TT_OverloadedOperator;
477       }
478       break;
479     case tok::question:
480       parseConditional();
481       break;
482     case tok::kw_template:
483       parseTemplateDeclaration();
484       break;
485     case tok::identifier:
486       if (Line.First->is(tok::kw_for) &&
487           Tok->Tok.getIdentifierInfo() == &Ident_in)
488         Tok->Type = TT_ObjCForIn;
489       break;
490     case tok::comma:
491       if (Contexts.back().FirstStartOfName)
492         Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true;
493       if (Contexts.back().InCtorInitializer)
494         Tok->Type = TT_CtorInitializerComma;
495       if (Contexts.back().IsForEachMacro)
496         Contexts.back().IsExpression = true;
497       break;
498     default:
499       break;
500     }
501     return true;
502   }
503 
parseIncludeDirective()504   void parseIncludeDirective() {
505     next();
506     if (CurrentToken && CurrentToken->is(tok::less)) {
507       next();
508       while (CurrentToken) {
509         if (CurrentToken->isNot(tok::comment) || CurrentToken->Next)
510           CurrentToken->Type = TT_ImplicitStringLiteral;
511         next();
512       }
513     } else {
514       while (CurrentToken) {
515         if (CurrentToken->is(tok::string_literal))
516           // Mark these string literals as "implicit" literals, too, so that
517           // they are not split or line-wrapped.
518           CurrentToken->Type = TT_ImplicitStringLiteral;
519         next();
520       }
521     }
522   }
523 
parseWarningOrError()524   void parseWarningOrError() {
525     next();
526     // We still want to format the whitespace left of the first token of the
527     // warning or error.
528     next();
529     while (CurrentToken) {
530       CurrentToken->Type = TT_ImplicitStringLiteral;
531       next();
532     }
533   }
534 
parsePragma()535   void parsePragma() {
536     next(); // Consume "pragma".
537     if (CurrentToken && CurrentToken->TokenText == "mark") {
538       next(); // Consume "mark".
539       next(); // Consume first token (so we fix leading whitespace).
540       while (CurrentToken) {
541         CurrentToken->Type = TT_ImplicitStringLiteral;
542         next();
543       }
544     }
545   }
546 
parsePreprocessorDirective()547   void parsePreprocessorDirective() {
548     next();
549     if (!CurrentToken)
550       return;
551     if (CurrentToken->Tok.is(tok::numeric_constant)) {
552       CurrentToken->SpacesRequiredBefore = 1;
553       return;
554     }
555     // Hashes in the middle of a line can lead to any strange token
556     // sequence.
557     if (!CurrentToken->Tok.getIdentifierInfo())
558       return;
559     switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) {
560     case tok::pp_include:
561     case tok::pp_import:
562       parseIncludeDirective();
563       break;
564     case tok::pp_error:
565     case tok::pp_warning:
566       parseWarningOrError();
567       break;
568     case tok::pp_pragma:
569       parsePragma();
570       break;
571     case tok::pp_if:
572     case tok::pp_elif:
573       Contexts.back().IsExpression = true;
574       parseLine();
575       break;
576     default:
577       break;
578     }
579     while (CurrentToken)
580       next();
581   }
582 
583 public:
parseLine()584   LineType parseLine() {
585     if (CurrentToken->is(tok::hash)) {
586       parsePreprocessorDirective();
587       return LT_PreprocessorDirective;
588     }
589 
590     // Directly allow to 'import <string-literal>' to support protocol buffer
591     // definitions (code.google.com/p/protobuf) or missing "#" (either way we
592     // should not break the line).
593     IdentifierInfo *Info = CurrentToken->Tok.getIdentifierInfo();
594     if (Info && Info->getPPKeywordID() == tok::pp_import &&
595         CurrentToken->Next && CurrentToken->Next->is(tok::string_literal))
596       parseIncludeDirective();
597 
598     while (CurrentToken) {
599       if (CurrentToken->is(tok::kw_virtual))
600         KeywordVirtualFound = true;
601       if (!consumeToken())
602         return LT_Invalid;
603     }
604     if (KeywordVirtualFound)
605       return LT_VirtualFunctionDecl;
606 
607     if (Line.First->Type == TT_ObjCMethodSpecifier) {
608       if (Contexts.back().FirstObjCSelectorName)
609         Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
610             Contexts.back().LongestObjCSelectorName;
611       return LT_ObjCMethodDecl;
612     }
613 
614     return LT_Other;
615   }
616 
617 private:
resetTokenMetadata(FormatToken * Token)618   void resetTokenMetadata(FormatToken *Token) {
619     if (!Token)
620       return;
621 
622     // Reset token type in case we have already looked at it and then
623     // recovered from an error (e.g. failure to find the matching >).
624     if (CurrentToken->Type != TT_LambdaLSquare &&
625         CurrentToken->Type != TT_FunctionLBrace &&
626         CurrentToken->Type != TT_ImplicitStringLiteral &&
627         CurrentToken->Type != TT_RegexLiteral &&
628         CurrentToken->Type != TT_TrailingReturnArrow)
629       CurrentToken->Type = TT_Unknown;
630     if (CurrentToken->Role)
631       CurrentToken->Role.reset(nullptr);
632     CurrentToken->FakeLParens.clear();
633     CurrentToken->FakeRParens = 0;
634   }
635 
next()636   void next() {
637     if (CurrentToken) {
638       determineTokenType(*CurrentToken);
639       CurrentToken->BindingStrength = Contexts.back().BindingStrength;
640       CurrentToken->NestingLevel = Contexts.size() - 1;
641       CurrentToken = CurrentToken->Next;
642     }
643 
644     resetTokenMetadata(CurrentToken);
645   }
646 
647   /// \brief A struct to hold information valid in a specific context, e.g.
648   /// a pair of parenthesis.
649   struct Context {
Contextclang::format::__anonef4f45af0111::AnnotatingParser::Context650     Context(tok::TokenKind ContextKind, unsigned BindingStrength,
651             bool IsExpression)
652         : ContextKind(ContextKind), BindingStrength(BindingStrength),
653           LongestObjCSelectorName(0), ColonIsForRangeExpr(false),
654           ColonIsDictLiteral(false), ColonIsObjCMethodExpr(false),
655           FirstObjCSelectorName(nullptr), FirstStartOfName(nullptr),
656           IsExpression(IsExpression), CanBeExpression(true),
657           InTemplateArgument(false), InCtorInitializer(false),
658           CaretFound(false), IsForEachMacro(false) {}
659 
660     tok::TokenKind ContextKind;
661     unsigned BindingStrength;
662     unsigned LongestObjCSelectorName;
663     bool ColonIsForRangeExpr;
664     bool ColonIsDictLiteral;
665     bool ColonIsObjCMethodExpr;
666     FormatToken *FirstObjCSelectorName;
667     FormatToken *FirstStartOfName;
668     bool IsExpression;
669     bool CanBeExpression;
670     bool InTemplateArgument;
671     bool InCtorInitializer;
672     bool CaretFound;
673     bool IsForEachMacro;
674   };
675 
676   /// \brief Puts a new \c Context onto the stack \c Contexts for the lifetime
677   /// of each instance.
678   struct ScopedContextCreator {
679     AnnotatingParser &P;
680 
ScopedContextCreatorclang::format::__anonef4f45af0111::AnnotatingParser::ScopedContextCreator681     ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind,
682                          unsigned Increase)
683         : P(P) {
684       P.Contexts.push_back(Context(ContextKind,
685                                    P.Contexts.back().BindingStrength + Increase,
686                                    P.Contexts.back().IsExpression));
687     }
688 
~ScopedContextCreatorclang::format::__anonef4f45af0111::AnnotatingParser::ScopedContextCreator689     ~ScopedContextCreator() { P.Contexts.pop_back(); }
690   };
691 
determineTokenType(FormatToken & Current)692   void determineTokenType(FormatToken &Current) {
693     if (Current.getPrecedence() == prec::Assignment &&
694         !Line.First->isOneOf(tok::kw_template, tok::kw_using) &&
695         (!Current.Previous || Current.Previous->isNot(tok::kw_operator))) {
696       Contexts.back().IsExpression = true;
697       for (FormatToken *Previous = Current.Previous;
698            Previous && !Previous->isOneOf(tok::comma, tok::semi);
699            Previous = Previous->Previous) {
700         if (Previous->isOneOf(tok::r_square, tok::r_paren))
701           Previous = Previous->MatchingParen;
702         if (Previous->Type == TT_BinaryOperator &&
703             Previous->isOneOf(tok::star, tok::amp)) {
704           Previous->Type = TT_PointerOrReference;
705         }
706       }
707     } else if (Current.isOneOf(tok::kw_return, tok::kw_throw)) {
708       Contexts.back().IsExpression = true;
709     } else if (Current.is(tok::l_paren) && !Line.MustBeDeclaration &&
710                !Line.InPPDirective &&
711                (!Current.Previous ||
712                 Current.Previous->isNot(tok::kw_decltype))) {
713       bool ParametersOfFunctionType =
714           Current.Previous && Current.Previous->is(tok::r_paren) &&
715           Current.Previous->MatchingParen &&
716           Current.Previous->MatchingParen->Type == TT_FunctionTypeLParen;
717       bool IsForOrCatch = Current.Previous &&
718                           Current.Previous->isOneOf(tok::kw_for, tok::kw_catch);
719       Contexts.back().IsExpression = !ParametersOfFunctionType && !IsForOrCatch;
720     } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) {
721       for (FormatToken *Previous = Current.Previous;
722            Previous && Previous->isOneOf(tok::star, tok::amp);
723            Previous = Previous->Previous)
724         Previous->Type = TT_PointerOrReference;
725     } else if (Current.Previous &&
726                Current.Previous->Type == TT_CtorInitializerColon) {
727       Contexts.back().IsExpression = true;
728       Contexts.back().InCtorInitializer = true;
729     } else if (Current.is(tok::kw_new)) {
730       Contexts.back().CanBeExpression = false;
731     } else if (Current.is(tok::semi) || Current.is(tok::exclaim)) {
732       // This should be the condition or increment in a for-loop.
733       Contexts.back().IsExpression = true;
734     }
735 
736     if (Current.Type == TT_Unknown) {
737       // Line.MightBeFunctionDecl can only be true after the parentheses of a
738       // function declaration have been found. In this case, 'Current' is a
739       // trailing token of this declaration and thus cannot be a name.
740       if (isStartOfName(Current) && !Line.MightBeFunctionDecl) {
741         Contexts.back().FirstStartOfName = &Current;
742         Current.Type = TT_StartOfName;
743       } else if (Current.is(tok::kw_auto)) {
744         AutoFound = true;
745       } else if (Current.is(tok::arrow) && AutoFound &&
746                  Line.MustBeDeclaration) {
747         Current.Type = TT_TrailingReturnArrow;
748       } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) {
749         Current.Type =
750             determineStarAmpUsage(Current, Contexts.back().CanBeExpression &&
751                                                Contexts.back().IsExpression,
752                                   Contexts.back().InTemplateArgument);
753       } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) {
754         Current.Type = determinePlusMinusCaretUsage(Current);
755         if (Current.Type == TT_UnaryOperator && Current.is(tok::caret))
756           Contexts.back().CaretFound = true;
757       } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) {
758         Current.Type = determineIncrementUsage(Current);
759       } else if (Current.is(tok::exclaim)) {
760         Current.Type = TT_UnaryOperator;
761       } else if (Current.is(tok::question)) {
762         Current.Type = TT_ConditionalExpr;
763       } else if (Current.isBinaryOperator() &&
764                  (!Current.Previous ||
765                   Current.Previous->isNot(tok::l_square))) {
766         Current.Type = TT_BinaryOperator;
767       } else if (Current.is(tok::comment)) {
768         if (Current.TokenText.startswith("//"))
769           Current.Type = TT_LineComment;
770         else
771           Current.Type = TT_BlockComment;
772       } else if (Current.is(tok::r_paren)) {
773         if (rParenEndsCast(Current))
774           Current.Type = TT_CastRParen;
775       } else if (Current.is(tok::at) && Current.Next) {
776         switch (Current.Next->Tok.getObjCKeywordID()) {
777         case tok::objc_interface:
778         case tok::objc_implementation:
779         case tok::objc_protocol:
780           Current.Type = TT_ObjCDecl;
781           break;
782         case tok::objc_property:
783           Current.Type = TT_ObjCProperty;
784           break;
785         default:
786           break;
787         }
788       } else if (Current.is(tok::period)) {
789         FormatToken *PreviousNoComment = Current.getPreviousNonComment();
790         if (PreviousNoComment &&
791             PreviousNoComment->isOneOf(tok::comma, tok::l_brace))
792           Current.Type = TT_DesignatedInitializerPeriod;
793       } else if (Current.isOneOf(tok::identifier, tok::kw_const) &&
794                  Current.Previous && Current.Previous->isNot(tok::equal) &&
795                  Line.MightBeFunctionDecl && Contexts.size() == 1) {
796         // Line.MightBeFunctionDecl can only be true after the parentheses of a
797         // function declaration have been found.
798         Current.Type = TT_TrailingAnnotation;
799       }
800     }
801   }
802 
803   /// \brief Take a guess at whether \p Tok starts a name of a function or
804   /// variable declaration.
805   ///
806   /// This is a heuristic based on whether \p Tok is an identifier following
807   /// something that is likely a type.
isStartOfName(const FormatToken & Tok)808   bool isStartOfName(const FormatToken &Tok) {
809     if (Tok.isNot(tok::identifier) || !Tok.Previous)
810       return false;
811 
812     // Skip "const" as it does not have an influence on whether this is a name.
813     FormatToken *PreviousNotConst = Tok.Previous;
814     while (PreviousNotConst && PreviousNotConst->is(tok::kw_const))
815       PreviousNotConst = PreviousNotConst->Previous;
816 
817     if (!PreviousNotConst)
818       return false;
819 
820     bool IsPPKeyword = PreviousNotConst->is(tok::identifier) &&
821                        PreviousNotConst->Previous &&
822                        PreviousNotConst->Previous->is(tok::hash);
823 
824     if (PreviousNotConst->Type == TT_TemplateCloser)
825       return PreviousNotConst && PreviousNotConst->MatchingParen &&
826              PreviousNotConst->MatchingParen->Previous &&
827              PreviousNotConst->MatchingParen->Previous->isNot(tok::kw_template);
828 
829     if (PreviousNotConst->is(tok::r_paren) && PreviousNotConst->MatchingParen &&
830         PreviousNotConst->MatchingParen->Previous &&
831         PreviousNotConst->MatchingParen->Previous->is(tok::kw_decltype))
832       return true;
833 
834     return (!IsPPKeyword && PreviousNotConst->is(tok::identifier)) ||
835            PreviousNotConst->Type == TT_PointerOrReference ||
836            PreviousNotConst->isSimpleTypeSpecifier();
837   }
838 
839   /// \brief Determine whether ')' is ending a cast.
rParenEndsCast(const FormatToken & Tok)840   bool rParenEndsCast(const FormatToken &Tok) {
841     FormatToken *LeftOfParens = nullptr;
842     if (Tok.MatchingParen)
843       LeftOfParens = Tok.MatchingParen->getPreviousNonComment();
844     bool IsCast = false;
845     bool ParensAreEmpty = Tok.Previous == Tok.MatchingParen;
846     bool ParensAreType = !Tok.Previous ||
847                          Tok.Previous->Type == TT_PointerOrReference ||
848                          Tok.Previous->Type == TT_TemplateCloser ||
849                          Tok.Previous->isSimpleTypeSpecifier();
850     bool ParensCouldEndDecl =
851         Tok.Next && Tok.Next->isOneOf(tok::equal, tok::semi, tok::l_brace);
852     bool IsSizeOfOrAlignOf =
853         LeftOfParens && LeftOfParens->isOneOf(tok::kw_sizeof, tok::kw_alignof);
854     if (ParensAreType && !ParensCouldEndDecl && !IsSizeOfOrAlignOf &&
855         ((Contexts.size() > 1 && Contexts[Contexts.size() - 2].IsExpression) ||
856          (Tok.Next && Tok.Next->isBinaryOperator())))
857       IsCast = true;
858     else if (Tok.Next && Tok.Next->isNot(tok::string_literal) &&
859              (Tok.Next->Tok.isLiteral() ||
860               Tok.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof)))
861       IsCast = true;
862     // If there is an identifier after the (), it is likely a cast, unless
863     // there is also an identifier before the ().
864     else if (LeftOfParens &&
865              (LeftOfParens->Tok.getIdentifierInfo() == nullptr ||
866               LeftOfParens->is(tok::kw_return)) &&
867              LeftOfParens->Type != TT_OverloadedOperator &&
868              LeftOfParens->isNot(tok::at) &&
869              LeftOfParens->Type != TT_TemplateCloser && Tok.Next) {
870       if (Tok.Next->isOneOf(tok::identifier, tok::numeric_constant)) {
871         IsCast = true;
872       } else {
873         // Use heuristics to recognize c style casting.
874         FormatToken *Prev = Tok.Previous;
875         if (Prev && Prev->isOneOf(tok::amp, tok::star))
876           Prev = Prev->Previous;
877 
878         if (Prev && Tok.Next && Tok.Next->Next) {
879           bool NextIsUnary = Tok.Next->isUnaryOperator() ||
880                              Tok.Next->isOneOf(tok::amp, tok::star);
881           IsCast = NextIsUnary && Tok.Next->Next->isOneOf(
882                                       tok::identifier, tok::numeric_constant);
883         }
884 
885         for (; Prev != Tok.MatchingParen; Prev = Prev->Previous) {
886           if (!Prev || !Prev->isOneOf(tok::kw_const, tok::identifier)) {
887             IsCast = false;
888             break;
889           }
890         }
891       }
892     }
893     return IsCast && !ParensAreEmpty;
894   }
895 
896   /// \brief Return the type of the given token assuming it is * or &.
determineStarAmpUsage(const FormatToken & Tok,bool IsExpression,bool InTemplateArgument)897   TokenType determineStarAmpUsage(const FormatToken &Tok, bool IsExpression,
898                                   bool InTemplateArgument) {
899     const FormatToken *PrevToken = Tok.getPreviousNonComment();
900     if (!PrevToken)
901       return TT_UnaryOperator;
902 
903     const FormatToken *NextToken = Tok.getNextNonComment();
904     if (!NextToken || NextToken->is(tok::l_brace))
905       return TT_Unknown;
906 
907     if (PrevToken->is(tok::coloncolon) ||
908         (PrevToken->is(tok::l_paren) && !IsExpression))
909       return TT_PointerOrReference;
910 
911     if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace,
912                            tok::comma, tok::semi, tok::kw_return, tok::colon,
913                            tok::equal, tok::kw_delete, tok::kw_sizeof) ||
914         PrevToken->Type == TT_BinaryOperator ||
915         PrevToken->Type == TT_ConditionalExpr ||
916         PrevToken->Type == TT_UnaryOperator || PrevToken->Type == TT_CastRParen)
917       return TT_UnaryOperator;
918 
919     if (NextToken->is(tok::l_square) && NextToken->Type != TT_LambdaLSquare)
920       return TT_PointerOrReference;
921     if (NextToken->is(tok::kw_operator))
922       return TT_PointerOrReference;
923 
924     if (PrevToken->is(tok::r_paren) && PrevToken->MatchingParen &&
925         PrevToken->MatchingParen->Previous &&
926         PrevToken->MatchingParen->Previous->isOneOf(tok::kw_typeof,
927                                                     tok::kw_decltype))
928       return TT_PointerOrReference;
929 
930     if (PrevToken->Tok.isLiteral() ||
931         PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::kw_true,
932                            tok::kw_false) ||
933         NextToken->Tok.isLiteral() ||
934         NextToken->isOneOf(tok::kw_true, tok::kw_false) ||
935         NextToken->isUnaryOperator() ||
936         // If we know we're in a template argument, there are no named
937         // declarations. Thus, having an identifier on the right-hand side
938         // indicates a binary operator.
939         (InTemplateArgument && NextToken->Tok.isAnyIdentifier()))
940       return TT_BinaryOperator;
941 
942     // This catches some cases where evaluation order is used as control flow:
943     //   aaa && aaa->f();
944     const FormatToken *NextNextToken = NextToken->getNextNonComment();
945     if (NextNextToken && NextNextToken->is(tok::arrow))
946       return TT_BinaryOperator;
947 
948     // It is very unlikely that we are going to find a pointer or reference type
949     // definition on the RHS of an assignment.
950     if (IsExpression)
951       return TT_BinaryOperator;
952 
953     return TT_PointerOrReference;
954   }
955 
determinePlusMinusCaretUsage(const FormatToken & Tok)956   TokenType determinePlusMinusCaretUsage(const FormatToken &Tok) {
957     const FormatToken *PrevToken = Tok.getPreviousNonComment();
958     if (!PrevToken || PrevToken->Type == TT_CastRParen)
959       return TT_UnaryOperator;
960 
961     // Use heuristics to recognize unary operators.
962     if (PrevToken->isOneOf(tok::equal, tok::l_paren, tok::comma, tok::l_square,
963                            tok::question, tok::colon, tok::kw_return,
964                            tok::kw_case, tok::at, tok::l_brace))
965       return TT_UnaryOperator;
966 
967     // There can't be two consecutive binary operators.
968     if (PrevToken->Type == TT_BinaryOperator)
969       return TT_UnaryOperator;
970 
971     // Fall back to marking the token as binary operator.
972     return TT_BinaryOperator;
973   }
974 
975   /// \brief Determine whether ++/-- are pre- or post-increments/-decrements.
determineIncrementUsage(const FormatToken & Tok)976   TokenType determineIncrementUsage(const FormatToken &Tok) {
977     const FormatToken *PrevToken = Tok.getPreviousNonComment();
978     if (!PrevToken || PrevToken->Type == TT_CastRParen)
979       return TT_UnaryOperator;
980     if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier))
981       return TT_TrailingUnaryOperator;
982 
983     return TT_UnaryOperator;
984   }
985 
986   SmallVector<Context, 8> Contexts;
987 
988   const FormatStyle &Style;
989   AnnotatedLine &Line;
990   FormatToken *CurrentToken;
991   bool KeywordVirtualFound;
992   bool AutoFound;
993   IdentifierInfo &Ident_in;
994 };
995 
996 static int PrecedenceUnaryOperator = prec::PointerToMember + 1;
997 static int PrecedenceArrowAndPeriod = prec::PointerToMember + 2;
998 
999 /// \brief Parses binary expressions by inserting fake parenthesis based on
1000 /// operator precedence.
1001 class ExpressionParser {
1002 public:
ExpressionParser(AnnotatedLine & Line)1003   ExpressionParser(AnnotatedLine &Line) : Current(Line.First) {
1004     // Skip leading "}", e.g. in "} else if (...) {".
1005     if (Current->is(tok::r_brace))
1006       next();
1007   }
1008 
1009   /// \brief Parse expressions with the given operatore precedence.
parse(int Precedence=0)1010   void parse(int Precedence = 0) {
1011     // Skip 'return' and ObjC selector colons as they are not part of a binary
1012     // expression.
1013     while (Current &&
1014            (Current->is(tok::kw_return) ||
1015             (Current->is(tok::colon) && (Current->Type == TT_ObjCMethodExpr ||
1016                                          Current->Type == TT_DictLiteral))))
1017       next();
1018 
1019     if (!Current || Precedence > PrecedenceArrowAndPeriod)
1020       return;
1021 
1022     // Conditional expressions need to be parsed separately for proper nesting.
1023     if (Precedence == prec::Conditional) {
1024       parseConditionalExpr();
1025       return;
1026     }
1027 
1028     // Parse unary operators, which all have a higher precedence than binary
1029     // operators.
1030     if (Precedence == PrecedenceUnaryOperator) {
1031       parseUnaryOperator();
1032       return;
1033     }
1034 
1035     FormatToken *Start = Current;
1036     FormatToken *LatestOperator = nullptr;
1037     unsigned OperatorIndex = 0;
1038 
1039     while (Current) {
1040       // Consume operators with higher precedence.
1041       parse(Precedence + 1);
1042 
1043       int CurrentPrecedence = getCurrentPrecedence();
1044 
1045       if (Current && Current->Type == TT_SelectorName &&
1046           Precedence == CurrentPrecedence) {
1047         if (LatestOperator)
1048           addFakeParenthesis(Start, prec::Level(Precedence));
1049         Start = Current;
1050       }
1051 
1052       // At the end of the line or when an operator with higher precedence is
1053       // found, insert fake parenthesis and return.
1054       if (!Current || Current->closesScope() ||
1055           (CurrentPrecedence != -1 && CurrentPrecedence < Precedence)) {
1056         if (LatestOperator) {
1057           LatestOperator->LastOperator = true;
1058           if (Precedence == PrecedenceArrowAndPeriod) {
1059             // Call expressions don't have a binary operator precedence.
1060             addFakeParenthesis(Start, prec::Unknown);
1061           } else {
1062             addFakeParenthesis(Start, prec::Level(Precedence));
1063           }
1064         }
1065         return;
1066       }
1067 
1068       // Consume scopes: (), [], <> and {}
1069       if (Current->opensScope()) {
1070         while (Current && !Current->closesScope()) {
1071           next();
1072           parse();
1073         }
1074         next();
1075       } else {
1076         // Operator found.
1077         if (CurrentPrecedence == Precedence) {
1078           LatestOperator = Current;
1079           Current->OperatorIndex = OperatorIndex;
1080           ++OperatorIndex;
1081         }
1082 
1083         next();
1084       }
1085     }
1086   }
1087 
1088 private:
1089   /// \brief Gets the precedence (+1) of the given token for binary operators
1090   /// and other tokens that we treat like binary operators.
getCurrentPrecedence()1091   int getCurrentPrecedence() {
1092     if (Current) {
1093       if (Current->Type == TT_ConditionalExpr)
1094         return prec::Conditional;
1095       else if (Current->is(tok::semi) || Current->Type == TT_InlineASMColon ||
1096                Current->Type == TT_SelectorName)
1097         return 0;
1098       else if (Current->Type == TT_RangeBasedForLoopColon)
1099         return prec::Comma;
1100       else if (Current->Type == TT_BinaryOperator || Current->is(tok::comma))
1101         return Current->getPrecedence();
1102       else if (Current->isOneOf(tok::period, tok::arrow))
1103         return PrecedenceArrowAndPeriod;
1104     }
1105     return -1;
1106   }
1107 
addFakeParenthesis(FormatToken * Start,prec::Level Precedence)1108   void addFakeParenthesis(FormatToken *Start, prec::Level Precedence) {
1109     Start->FakeLParens.push_back(Precedence);
1110     if (Precedence > prec::Unknown)
1111       Start->StartsBinaryExpression = true;
1112     if (Current) {
1113       ++Current->Previous->FakeRParens;
1114       if (Precedence > prec::Unknown)
1115         Current->Previous->EndsBinaryExpression = true;
1116     }
1117   }
1118 
1119   /// \brief Parse unary operator expressions and surround them with fake
1120   /// parentheses if appropriate.
parseUnaryOperator()1121   void parseUnaryOperator() {
1122     if (!Current || Current->Type != TT_UnaryOperator) {
1123       parse(PrecedenceArrowAndPeriod);
1124       return;
1125     }
1126 
1127     FormatToken *Start = Current;
1128     next();
1129     parseUnaryOperator();
1130 
1131     // The actual precedence doesn't matter.
1132     addFakeParenthesis(Start, prec::Unknown);
1133   }
1134 
parseConditionalExpr()1135   void parseConditionalExpr() {
1136     FormatToken *Start = Current;
1137     parse(prec::LogicalOr);
1138     if (!Current || !Current->is(tok::question))
1139       return;
1140     next();
1141     parse(prec::LogicalOr);
1142     if (!Current || Current->Type != TT_ConditionalExpr)
1143       return;
1144     next();
1145     parseConditionalExpr();
1146     addFakeParenthesis(Start, prec::Conditional);
1147   }
1148 
next()1149   void next() {
1150     if (Current)
1151       Current = Current->Next;
1152     while (Current && Current->isTrailingComment())
1153       Current = Current->Next;
1154   }
1155 
1156   FormatToken *Current;
1157 };
1158 
1159 } // end anonymous namespace
1160 
1161 void
setCommentLineLevels(SmallVectorImpl<AnnotatedLine * > & Lines)1162 TokenAnnotator::setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines) {
1163   const AnnotatedLine *NextNonCommentLine = nullptr;
1164   for (SmallVectorImpl<AnnotatedLine *>::reverse_iterator I = Lines.rbegin(),
1165                                                           E = Lines.rend();
1166        I != E; ++I) {
1167     if (NextNonCommentLine && (*I)->First->is(tok::comment) &&
1168         (*I)->First->Next == nullptr)
1169       (*I)->Level = NextNonCommentLine->Level;
1170     else
1171       NextNonCommentLine = (*I)->First->isNot(tok::r_brace) ? (*I) : nullptr;
1172 
1173     setCommentLineLevels((*I)->Children);
1174   }
1175 }
1176 
annotate(AnnotatedLine & Line)1177 void TokenAnnotator::annotate(AnnotatedLine &Line) {
1178   for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(),
1179                                                   E = Line.Children.end();
1180        I != E; ++I) {
1181     annotate(**I);
1182   }
1183   AnnotatingParser Parser(Style, Line, Ident_in);
1184   Line.Type = Parser.parseLine();
1185   if (Line.Type == LT_Invalid)
1186     return;
1187 
1188   ExpressionParser ExprParser(Line);
1189   ExprParser.parse();
1190 
1191   if (Line.First->Type == TT_ObjCMethodSpecifier)
1192     Line.Type = LT_ObjCMethodDecl;
1193   else if (Line.First->Type == TT_ObjCDecl)
1194     Line.Type = LT_ObjCDecl;
1195   else if (Line.First->Type == TT_ObjCProperty)
1196     Line.Type = LT_ObjCProperty;
1197 
1198   Line.First->SpacesRequiredBefore = 1;
1199   Line.First->CanBreakBefore = Line.First->MustBreakBefore;
1200 }
1201 
1202 // This function heuristically determines whether 'Current' starts the name of a
1203 // function declaration.
isFunctionDeclarationName(const FormatToken & Current)1204 static bool isFunctionDeclarationName(const FormatToken &Current) {
1205   if (Current.Type != TT_StartOfName ||
1206       Current.NestingLevel != 0 ||
1207       Current.Previous->Type == TT_StartOfName)
1208     return false;
1209   const FormatToken *Next = Current.Next;
1210   for (; Next; Next = Next->Next) {
1211     if (Next->Type == TT_TemplateOpener) {
1212       Next = Next->MatchingParen;
1213     } else if (Next->is(tok::coloncolon)) {
1214       Next = Next->Next;
1215       if (!Next || !Next->is(tok::identifier))
1216         return false;
1217     } else if (Next->is(tok::l_paren)) {
1218       break;
1219     } else {
1220       return false;
1221     }
1222   }
1223   if (!Next)
1224     return false;
1225   assert(Next->is(tok::l_paren));
1226   if (Next->Next == Next->MatchingParen)
1227     return true;
1228   for (const FormatToken *Tok = Next->Next; Tok != Next->MatchingParen;
1229        Tok = Tok->Next) {
1230     if (Tok->is(tok::kw_const) || Tok->isSimpleTypeSpecifier() ||
1231         Tok->Type == TT_PointerOrReference || Tok->Type == TT_StartOfName)
1232       return true;
1233     if (Tok->isOneOf(tok::l_brace, tok::string_literal) || Tok->Tok.isLiteral())
1234       return false;
1235   }
1236   return false;
1237 }
1238 
calculateFormattingInformation(AnnotatedLine & Line)1239 void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
1240   for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(),
1241                                                   E = Line.Children.end();
1242        I != E; ++I) {
1243     calculateFormattingInformation(**I);
1244   }
1245 
1246   Line.First->TotalLength =
1247       Line.First->IsMultiline ? Style.ColumnLimit : Line.First->ColumnWidth;
1248   if (!Line.First->Next)
1249     return;
1250   FormatToken *Current = Line.First->Next;
1251   bool InFunctionDecl = Line.MightBeFunctionDecl;
1252   while (Current) {
1253     if (isFunctionDeclarationName(*Current))
1254       Current->Type = TT_FunctionDeclarationName;
1255     if (Current->Type == TT_LineComment) {
1256       if (Current->Previous->BlockKind == BK_BracedInit &&
1257           Current->Previous->opensScope())
1258         Current->SpacesRequiredBefore = Style.Cpp11BracedListStyle ? 0 : 1;
1259       else
1260         Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments;
1261 
1262       // If we find a trailing comment, iterate backwards to determine whether
1263       // it seems to relate to a specific parameter. If so, break before that
1264       // parameter to avoid changing the comment's meaning. E.g. don't move 'b'
1265       // to the previous line in:
1266       //   SomeFunction(a,
1267       //                b, // comment
1268       //                c);
1269       if (!Current->HasUnescapedNewline) {
1270         for (FormatToken *Parameter = Current->Previous; Parameter;
1271              Parameter = Parameter->Previous) {
1272           if (Parameter->isOneOf(tok::comment, tok::r_brace))
1273             break;
1274           if (Parameter->Previous && Parameter->Previous->is(tok::comma)) {
1275             if (Parameter->Previous->Type != TT_CtorInitializerComma &&
1276                 Parameter->HasUnescapedNewline)
1277               Parameter->MustBreakBefore = true;
1278             break;
1279           }
1280         }
1281       }
1282     } else if (Current->SpacesRequiredBefore == 0 &&
1283                spaceRequiredBefore(Line, *Current)) {
1284       Current->SpacesRequiredBefore = 1;
1285     }
1286 
1287     Current->MustBreakBefore =
1288         Current->MustBreakBefore || mustBreakBefore(Line, *Current);
1289 
1290     Current->CanBreakBefore =
1291         Current->MustBreakBefore || canBreakBefore(Line, *Current);
1292     unsigned ChildSize = 0;
1293     if (Current->Previous->Children.size() == 1) {
1294       FormatToken &LastOfChild = *Current->Previous->Children[0]->Last;
1295       ChildSize = LastOfChild.isTrailingComment() ? Style.ColumnLimit
1296                                                   : LastOfChild.TotalLength + 1;
1297     }
1298     if (Current->MustBreakBefore || Current->Previous->Children.size() > 1 ||
1299         Current->IsMultiline)
1300       Current->TotalLength = Current->Previous->TotalLength + Style.ColumnLimit;
1301     else
1302       Current->TotalLength = Current->Previous->TotalLength +
1303                              Current->ColumnWidth + ChildSize +
1304                              Current->SpacesRequiredBefore;
1305 
1306     if (Current->Type == TT_CtorInitializerColon)
1307       InFunctionDecl = false;
1308 
1309     // FIXME: Only calculate this if CanBreakBefore is true once static
1310     // initializers etc. are sorted out.
1311     // FIXME: Move magic numbers to a better place.
1312     Current->SplitPenalty = 20 * Current->BindingStrength +
1313                             splitPenalty(Line, *Current, InFunctionDecl);
1314 
1315     Current = Current->Next;
1316   }
1317 
1318   calculateUnbreakableTailLengths(Line);
1319   for (Current = Line.First; Current != nullptr; Current = Current->Next) {
1320     if (Current->Role)
1321       Current->Role->precomputeFormattingInfos(Current);
1322   }
1323 
1324   DEBUG({ printDebugInfo(Line); });
1325 }
1326 
calculateUnbreakableTailLengths(AnnotatedLine & Line)1327 void TokenAnnotator::calculateUnbreakableTailLengths(AnnotatedLine &Line) {
1328   unsigned UnbreakableTailLength = 0;
1329   FormatToken *Current = Line.Last;
1330   while (Current) {
1331     Current->UnbreakableTailLength = UnbreakableTailLength;
1332     if (Current->CanBreakBefore ||
1333         Current->isOneOf(tok::comment, tok::string_literal)) {
1334       UnbreakableTailLength = 0;
1335     } else {
1336       UnbreakableTailLength +=
1337           Current->ColumnWidth + Current->SpacesRequiredBefore;
1338     }
1339     Current = Current->Previous;
1340   }
1341 }
1342 
splitPenalty(const AnnotatedLine & Line,const FormatToken & Tok,bool InFunctionDecl)1343 unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
1344                                       const FormatToken &Tok,
1345                                       bool InFunctionDecl) {
1346   const FormatToken &Left = *Tok.Previous;
1347   const FormatToken &Right = Tok;
1348 
1349   if (Left.is(tok::semi))
1350     return 0;
1351   if (Left.is(tok::comma) || (Right.is(tok::identifier) && Right.Next &&
1352                               Right.Next->Type == TT_DictLiteral))
1353     return 1;
1354   if (Right.is(tok::l_square)) {
1355     if (Style.Language == FormatStyle::LK_Proto)
1356       return 1;
1357     if (Right.Type != TT_ObjCMethodExpr && Right.Type != TT_LambdaLSquare)
1358       return 500;
1359   }
1360   if (Right.Type == TT_StartOfName ||
1361       Right.Type == TT_FunctionDeclarationName || Right.is(tok::kw_operator)) {
1362     if (Line.First->is(tok::kw_for) && Right.PartOfMultiVariableDeclStmt)
1363       return 3;
1364     if (Left.Type == TT_StartOfName)
1365       return 20;
1366     if (InFunctionDecl && Right.NestingLevel == 0)
1367       return Style.PenaltyReturnTypeOnItsOwnLine;
1368     return 200;
1369   }
1370   if (Left.is(tok::equal) && Right.is(tok::l_brace))
1371     return 150;
1372   if (Left.Type == TT_CastRParen)
1373     return 100;
1374   if (Left.is(tok::coloncolon) ||
1375       (Right.is(tok::period) && Style.Language == FormatStyle::LK_Proto))
1376     return 500;
1377   if (Left.isOneOf(tok::kw_class, tok::kw_struct))
1378     return 5000;
1379 
1380   if (Left.Type == TT_RangeBasedForLoopColon ||
1381       Left.Type == TT_InheritanceColon)
1382     return 2;
1383 
1384   if (Right.isMemberAccess()) {
1385     if (Left.is(tok::r_paren) && Left.MatchingParen &&
1386         Left.MatchingParen->ParameterCount > 0)
1387       return 20; // Should be smaller than breaking at a nested comma.
1388     return 150;
1389   }
1390 
1391   if (Right.Type == TT_TrailingAnnotation &&
1392       (!Right.Next || Right.Next->isNot(tok::l_paren))) {
1393     // Generally, breaking before a trailing annotation is bad unless it is
1394     // function-like. It seems to be especially preferable to keep standard
1395     // annotations (i.e. "const", "final" and "override") on the same line.
1396     // Use a slightly higher penalty after ")" so that annotations like
1397     // "const override" are kept together.
1398     bool is_short_annotation = Right.TokenText.size() < 10;
1399     return (Left.is(tok::r_paren) ? 100 : 120) + (is_short_annotation ? 50 : 0);
1400   }
1401 
1402   // In for-loops, prefer breaking at ',' and ';'.
1403   if (Line.First->is(tok::kw_for) && Left.is(tok::equal))
1404     return 4;
1405 
1406   // In Objective-C method expressions, prefer breaking before "param:" over
1407   // breaking after it.
1408   if (Right.Type == TT_SelectorName)
1409     return 0;
1410   if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr)
1411     return Line.MightBeFunctionDecl ? 50 : 500;
1412 
1413   if (Left.is(tok::l_paren) && InFunctionDecl)
1414     return 100;
1415   if (Left.is(tok::equal) && InFunctionDecl)
1416     return 110;
1417   if (Left.opensScope())
1418     return Left.ParameterCount > 1 ? Style.PenaltyBreakBeforeFirstCallParameter
1419                                    : 19;
1420 
1421   if (Right.is(tok::lessless)) {
1422     if (Left.is(tok::string_literal)) {
1423       StringRef Content = Left.TokenText;
1424       if (Content.startswith("\""))
1425         Content = Content.drop_front(1);
1426       if (Content.endswith("\""))
1427         Content = Content.drop_back(1);
1428       Content = Content.trim();
1429       if (Content.size() > 1 &&
1430           (Content.back() == ':' || Content.back() == '='))
1431         return 25;
1432     }
1433     return 1; // Breaking at a << is really cheap.
1434   }
1435   if (Left.Type == TT_ConditionalExpr)
1436     return prec::Conditional;
1437   prec::Level Level = Left.getPrecedence();
1438 
1439   if (Level != prec::Unknown)
1440     return Level;
1441 
1442   return 3;
1443 }
1444 
spaceRequiredBetween(const AnnotatedLine & Line,const FormatToken & Left,const FormatToken & Right)1445 bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
1446                                           const FormatToken &Left,
1447                                           const FormatToken &Right) {
1448   if (Style.Language == FormatStyle::LK_Proto) {
1449     if (Right.is(tok::period) &&
1450         (Left.TokenText == "optional" || Left.TokenText == "required" ||
1451          Left.TokenText == "repeated"))
1452       return true;
1453     if (Right.is(tok::l_paren) &&
1454         (Left.TokenText == "returns" || Left.TokenText == "option"))
1455       return true;
1456   } else if (Style.Language == FormatStyle::LK_JavaScript) {
1457     if (Left.TokenText == "var")
1458       return true;
1459   }
1460   if (Left.is(tok::kw_return) && Right.isNot(tok::semi))
1461     return true;
1462   if (Style.ObjCSpaceAfterProperty && Line.Type == LT_ObjCProperty &&
1463       Left.Tok.getObjCKeywordID() == tok::objc_property)
1464     return true;
1465   if (Right.is(tok::hashhash))
1466     return Left.is(tok::hash);
1467   if (Left.isOneOf(tok::hashhash, tok::hash))
1468     return Right.is(tok::hash);
1469   if (Left.is(tok::l_paren) && Right.is(tok::r_paren))
1470     return Style.SpaceInEmptyParentheses;
1471   if (Left.is(tok::l_paren) || Right.is(tok::r_paren))
1472     return (Right.Type == TT_CastRParen ||
1473             (Left.MatchingParen && Left.MatchingParen->Type == TT_CastRParen))
1474                ? Style.SpacesInCStyleCastParentheses
1475                : Style.SpacesInParentheses;
1476   if (Style.SpacesInAngles &&
1477       ((Left.Type == TT_TemplateOpener) != (Right.Type == TT_TemplateCloser)))
1478     return true;
1479   if (Right.isOneOf(tok::semi, tok::comma))
1480     return false;
1481   if (Right.is(tok::less) &&
1482       (Left.is(tok::kw_template) ||
1483        (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList)))
1484     return true;
1485   if (Left.is(tok::arrow) || Right.is(tok::arrow))
1486     return false;
1487   if (Left.isOneOf(tok::exclaim, tok::tilde))
1488     return false;
1489   if (Left.is(tok::at) &&
1490       Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant,
1491                     tok::numeric_constant, tok::l_paren, tok::l_brace,
1492                     tok::kw_true, tok::kw_false))
1493     return false;
1494   if (Left.is(tok::coloncolon))
1495     return false;
1496   if (Right.is(tok::coloncolon) && Left.isNot(tok::l_brace))
1497     return (Left.is(tok::less) && Style.Standard == FormatStyle::LS_Cpp03) ||
1498            !Left.isOneOf(tok::identifier, tok::greater, tok::l_paren,
1499                          tok::r_paren, tok::less);
1500   if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less))
1501     return false;
1502   if (Right.is(tok::ellipsis))
1503     return Left.Tok.isLiteral();
1504   if (Left.is(tok::l_square) && Right.is(tok::amp))
1505     return false;
1506   if (Right.Type == TT_PointerOrReference)
1507     return Left.Tok.isLiteral() ||
1508            ((Left.Type != TT_PointerOrReference) && Left.isNot(tok::l_paren) &&
1509             Style.PointerAlignment != FormatStyle::PAS_Left);
1510   if (Right.Type == TT_FunctionTypeLParen && Left.isNot(tok::l_paren) &&
1511       (Left.Type != TT_PointerOrReference || Style.PointerAlignment != FormatStyle::PAS_Right))
1512     return true;
1513   if (Left.Type == TT_PointerOrReference)
1514     return Right.Tok.isLiteral() || Right.Type == TT_BlockComment ||
1515            ((Right.Type != TT_PointerOrReference) &&
1516             Right.isNot(tok::l_paren) && Style.PointerAlignment != FormatStyle::PAS_Right &&
1517             Left.Previous &&
1518             !Left.Previous->isOneOf(tok::l_paren, tok::coloncolon));
1519   if (Right.is(tok::star) && Left.is(tok::l_paren))
1520     return false;
1521   if (Left.is(tok::l_square))
1522     return Left.Type == TT_ArrayInitializerLSquare &&
1523            Style.SpacesInContainerLiterals && Right.isNot(tok::r_square);
1524   if (Right.is(tok::r_square))
1525     return Right.MatchingParen && Style.SpacesInContainerLiterals &&
1526            Right.MatchingParen->Type == TT_ArrayInitializerLSquare;
1527   if (Right.is(tok::l_square) && Right.Type != TT_ObjCMethodExpr &&
1528       Right.Type != TT_LambdaLSquare && Left.isNot(tok::numeric_constant) &&
1529       Left.Type != TT_DictLiteral)
1530     return false;
1531   if (Left.is(tok::colon))
1532     return Left.Type != TT_ObjCMethodExpr;
1533   if (Left.Type == TT_BlockComment)
1534     return !Left.TokenText.endswith("=*/");
1535   if (Right.is(tok::l_paren)) {
1536     if (Left.is(tok::r_paren) && Left.Type == TT_AttributeParen)
1537       return true;
1538     return Line.Type == LT_ObjCDecl ||
1539            Left.isOneOf(tok::kw_new, tok::kw_delete, tok::semi) ||
1540            (Style.SpaceBeforeParens != FormatStyle::SBPO_Never &&
1541             (Left.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while,
1542                           tok::kw_switch, tok::kw_catch, tok::kw_case) ||
1543              Left.IsForEachMacro)) ||
1544            (Style.SpaceBeforeParens == FormatStyle::SBPO_Always &&
1545             Left.isOneOf(tok::identifier, tok::kw___attribute) &&
1546             Line.Type != LT_PreprocessorDirective);
1547   }
1548   if (Left.is(tok::at) && Right.Tok.getObjCKeywordID() != tok::objc_not_keyword)
1549     return false;
1550   if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
1551     return !Left.Children.empty(); // No spaces in "{}".
1552   if ((Left.is(tok::l_brace) && Left.BlockKind != BK_Block) ||
1553       (Right.is(tok::r_brace) && Right.MatchingParen &&
1554        Right.MatchingParen->BlockKind != BK_Block))
1555     return !Style.Cpp11BracedListStyle;
1556   if (Right.Type == TT_UnaryOperator)
1557     return !Left.isOneOf(tok::l_paren, tok::l_square, tok::at) &&
1558            (Left.isNot(tok::colon) || Left.Type != TT_ObjCMethodExpr);
1559   if ((Left.isOneOf(tok::identifier, tok::greater, tok::r_square,
1560                     tok::r_paren) ||
1561        Left.isSimpleTypeSpecifier()) &&
1562       Right.is(tok::l_brace) && Right.getNextNonComment() &&
1563       Right.BlockKind != BK_Block)
1564     return false;
1565   if (Left.is(tok::period) || Right.is(tok::period))
1566     return false;
1567   if (Right.is(tok::hash) && Left.is(tok::identifier) && Left.TokenText == "L")
1568     return false;
1569   return true;
1570 }
1571 
spaceRequiredBefore(const AnnotatedLine & Line,const FormatToken & Tok)1572 bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
1573                                          const FormatToken &Tok) {
1574   if (Tok.Tok.getIdentifierInfo() && Tok.Previous->Tok.getIdentifierInfo())
1575     return true; // Never ever merge two identifiers.
1576   if (Tok.Previous->Type == TT_ImplicitStringLiteral)
1577     return Tok.WhitespaceRange.getBegin() != Tok.WhitespaceRange.getEnd();
1578   if (Line.Type == LT_ObjCMethodDecl) {
1579     if (Tok.Previous->Type == TT_ObjCMethodSpecifier)
1580       return true;
1581     if (Tok.Previous->is(tok::r_paren) && Tok.is(tok::identifier))
1582       // Don't space between ')' and <id>
1583       return false;
1584   }
1585   if (Line.Type == LT_ObjCProperty &&
1586       (Tok.is(tok::equal) || Tok.Previous->is(tok::equal)))
1587     return false;
1588 
1589   if (Tok.Type == TT_TrailingReturnArrow ||
1590       Tok.Previous->Type == TT_TrailingReturnArrow)
1591     return true;
1592   if (Tok.Previous->is(tok::comma))
1593     return true;
1594   if (Tok.is(tok::comma))
1595     return false;
1596   if (Tok.Type == TT_CtorInitializerColon || Tok.Type == TT_ObjCBlockLParen)
1597     return true;
1598   if (Tok.Previous->Tok.is(tok::kw_operator))
1599     return Tok.is(tok::coloncolon);
1600   if (Tok.Type == TT_OverloadedOperatorLParen)
1601     return false;
1602   if (Tok.is(tok::colon))
1603     return !Line.First->isOneOf(tok::kw_case, tok::kw_default) &&
1604            Tok.getNextNonComment() && Tok.Type != TT_ObjCMethodExpr &&
1605            !Tok.Previous->is(tok::question) &&
1606            (Tok.Type != TT_DictLiteral || Style.SpacesInContainerLiterals);
1607   if (Tok.Previous->Type == TT_UnaryOperator ||
1608       Tok.Previous->Type == TT_CastRParen)
1609     return Tok.Type == TT_BinaryOperator;
1610   if (Tok.Previous->is(tok::greater) && Tok.is(tok::greater)) {
1611     return Tok.Type == TT_TemplateCloser &&
1612            Tok.Previous->Type == TT_TemplateCloser &&
1613            (Style.Standard != FormatStyle::LS_Cpp11 || Style.SpacesInAngles);
1614   }
1615   if (Tok.isOneOf(tok::arrowstar, tok::periodstar) ||
1616       Tok.Previous->isOneOf(tok::arrowstar, tok::periodstar))
1617     return false;
1618   if (!Style.SpaceBeforeAssignmentOperators &&
1619       Tok.getPrecedence() == prec::Assignment)
1620     return false;
1621   if ((Tok.Type == TT_BinaryOperator && !Tok.Previous->is(tok::l_paren)) ||
1622       Tok.Previous->Type == TT_BinaryOperator ||
1623       Tok.Previous->Type == TT_ConditionalExpr)
1624     return true;
1625   if (Tok.Previous->Type == TT_TemplateCloser && Tok.is(tok::l_paren))
1626     return false;
1627   if (Tok.is(tok::less) && Tok.Previous->isNot(tok::l_paren) &&
1628       Line.First->is(tok::hash))
1629     return true;
1630   if (Tok.Type == TT_TrailingUnaryOperator)
1631     return false;
1632   if (Tok.Previous->Type == TT_RegexLiteral)
1633     return false;
1634   return spaceRequiredBetween(Line, *Tok.Previous, Tok);
1635 }
1636 
1637 // Returns 'true' if 'Tok' is a brace we'd want to break before in Allman style.
isAllmanBrace(const FormatToken & Tok)1638 static bool isAllmanBrace(const FormatToken &Tok) {
1639   return Tok.is(tok::l_brace) && Tok.BlockKind == BK_Block &&
1640          Tok.Type != TT_ObjCBlockLBrace && Tok.Type != TT_DictLiteral;
1641 }
1642 
mustBreakBefore(const AnnotatedLine & Line,const FormatToken & Right)1643 bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
1644                                      const FormatToken &Right) {
1645   const FormatToken &Left = *Right.Previous;
1646   if (Right.NewlinesBefore > 1)
1647     return true;
1648   if (Right.is(tok::comment)) {
1649     return Right.Previous->BlockKind != BK_BracedInit &&
1650            Right.Previous->Type != TT_CtorInitializerColon &&
1651            (Right.NewlinesBefore > 0 && Right.HasUnescapedNewline);
1652   } else if (Right.Previous->isTrailingComment() ||
1653              (Right.isStringLiteral() && Right.Previous->isStringLiteral())) {
1654     return true;
1655   } else if (Right.Previous->IsUnterminatedLiteral) {
1656     return true;
1657   } else if (Right.is(tok::lessless) && Right.Next &&
1658              Right.Previous->is(tok::string_literal) &&
1659              Right.Next->is(tok::string_literal)) {
1660     return true;
1661   } else if (Right.Previous->ClosesTemplateDeclaration &&
1662              Right.Previous->MatchingParen &&
1663              Right.Previous->MatchingParen->NestingLevel == 0 &&
1664              Style.AlwaysBreakTemplateDeclarations) {
1665     return true;
1666   } else if ((Right.Type == TT_CtorInitializerComma ||
1667               Right.Type == TT_CtorInitializerColon) &&
1668              Style.BreakConstructorInitializersBeforeComma &&
1669              !Style.ConstructorInitializerAllOnOneLineOrOnePerLine) {
1670     return true;
1671   } else if (Right.is(tok::string_literal) &&
1672              Right.TokenText.startswith("R\"")) {
1673     // Raw string literals are special wrt. line breaks. The author has made a
1674     // deliberate choice and might have aligned the contents of the string
1675     // literal accordingly. Thus, we try keep existing line breaks.
1676     return Right.NewlinesBefore > 0;
1677   } else if (Right.Previous->is(tok::l_brace) && Right.NestingLevel == 1 &&
1678              Style.Language == FormatStyle::LK_Proto) {
1679     // Don't enums onto single lines in protocol buffers.
1680     return true;
1681   } else if (isAllmanBrace(Left) || isAllmanBrace(Right)) {
1682     return Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1683            Style.BreakBeforeBraces == FormatStyle::BS_GNU;
1684   }
1685 
1686   // If the last token before a '}' is a comma or a comment, the intention is to
1687   // insert a line break after it in order to make shuffling around entries
1688   // easier.
1689   const FormatToken *BeforeClosingBrace = nullptr;
1690   if (Left.is(tok::l_brace) && Left.MatchingParen)
1691     BeforeClosingBrace = Left.MatchingParen->Previous;
1692   else if (Right.is(tok::r_brace))
1693     BeforeClosingBrace = Right.Previous;
1694   if (BeforeClosingBrace &&
1695       BeforeClosingBrace->isOneOf(tok::comma, tok::comment))
1696     return true;
1697 
1698   if (Style.Language == FormatStyle::LK_JavaScript) {
1699     // FIXME: This might apply to other languages and token kinds.
1700     if (Right.is(tok::char_constant) && Left.is(tok::plus) && Left.Previous &&
1701         Left.Previous->is(tok::char_constant))
1702       return true;
1703   }
1704 
1705   return false;
1706 }
1707 
canBreakBefore(const AnnotatedLine & Line,const FormatToken & Right)1708 bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
1709                                     const FormatToken &Right) {
1710   const FormatToken &Left = *Right.Previous;
1711   if (Left.is(tok::at))
1712     return false;
1713   if (Left.Tok.getObjCKeywordID() == tok::objc_interface)
1714     return false;
1715   if (Right.Type == TT_StartOfName ||
1716       Right.Type == TT_FunctionDeclarationName || Right.is(tok::kw_operator))
1717     return true;
1718   if (Right.isTrailingComment())
1719     // We rely on MustBreakBefore being set correctly here as we should not
1720     // change the "binding" behavior of a comment.
1721     // The first comment in a braced lists is always interpreted as belonging to
1722     // the first list element. Otherwise, it should be placed outside of the
1723     // list.
1724     return Left.BlockKind == BK_BracedInit;
1725   if (Left.is(tok::question) && Right.is(tok::colon))
1726     return false;
1727   if (Right.Type == TT_ConditionalExpr || Right.is(tok::question))
1728     return Style.BreakBeforeTernaryOperators;
1729   if (Left.Type == TT_ConditionalExpr || Left.is(tok::question))
1730     return !Style.BreakBeforeTernaryOperators;
1731   if (Right.Type == TT_InheritanceColon)
1732     return true;
1733   if (Right.is(tok::colon) && (Right.Type != TT_CtorInitializerColon &&
1734                                Right.Type != TT_InlineASMColon))
1735     return false;
1736   if (Left.is(tok::colon) &&
1737       (Left.Type == TT_DictLiteral || Left.Type == TT_ObjCMethodExpr))
1738     return true;
1739   if (Right.Type == TT_SelectorName)
1740     return true;
1741   if (Left.is(tok::r_paren) && Line.Type == LT_ObjCProperty)
1742     return true;
1743   if (Left.ClosesTemplateDeclaration)
1744     return true;
1745   if (Right.Type == TT_RangeBasedForLoopColon ||
1746       Right.Type == TT_OverloadedOperatorLParen ||
1747       Right.Type == TT_OverloadedOperator)
1748     return false;
1749   if (Left.Type == TT_RangeBasedForLoopColon)
1750     return true;
1751   if (Right.Type == TT_RangeBasedForLoopColon)
1752     return false;
1753   if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser ||
1754       Left.Type == TT_UnaryOperator || Left.is(tok::kw_operator))
1755     return false;
1756   if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl)
1757     return false;
1758   if (Left.is(tok::l_paren) && Left.Type == TT_AttributeParen)
1759     return false;
1760   if (Left.is(tok::l_paren) && Left.Previous &&
1761       (Left.Previous->Type == TT_BinaryOperator ||
1762        Left.Previous->Type == TT_CastRParen || Left.Previous->is(tok::kw_if)))
1763     return false;
1764   if (Right.Type == TT_ImplicitStringLiteral)
1765     return false;
1766 
1767   if (Right.is(tok::r_paren) || Right.Type == TT_TemplateCloser)
1768     return false;
1769 
1770   // We only break before r_brace if there was a corresponding break before
1771   // the l_brace, which is tracked by BreakBeforeClosingBrace.
1772   if (Right.is(tok::r_brace))
1773     return Right.MatchingParen && Right.MatchingParen->BlockKind == BK_Block;
1774 
1775   // Allow breaking after a trailing annotation, e.g. after a method
1776   // declaration.
1777   if (Left.Type == TT_TrailingAnnotation)
1778     return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal, tok::l_paren,
1779                           tok::less, tok::coloncolon);
1780 
1781   if (Right.is(tok::kw___attribute))
1782     return true;
1783 
1784   if (Left.is(tok::identifier) && Right.is(tok::string_literal))
1785     return true;
1786 
1787   if (Right.is(tok::identifier) && Right.Next &&
1788       Right.Next->Type == TT_DictLiteral)
1789     return true;
1790 
1791   if (Left.Type == TT_CtorInitializerComma &&
1792       Style.BreakConstructorInitializersBeforeComma)
1793     return false;
1794   if (Right.Type == TT_CtorInitializerComma &&
1795       Style.BreakConstructorInitializersBeforeComma)
1796     return true;
1797   if (Left.is(tok::greater) && Right.is(tok::greater) &&
1798       Left.Type != TT_TemplateCloser)
1799     return false;
1800   if (Right.Type == TT_BinaryOperator && Style.BreakBeforeBinaryOperators)
1801     return true;
1802   if (Left.Type == TT_ArrayInitializerLSquare)
1803     return true;
1804   return (Left.isBinaryOperator() &&
1805           !Left.isOneOf(tok::arrowstar, tok::lessless) &&
1806           !Style.BreakBeforeBinaryOperators) ||
1807          Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace,
1808                       tok::kw_class, tok::kw_struct) ||
1809          Right.isMemberAccess() ||
1810          Right.isOneOf(tok::lessless, tok::colon, tok::l_square, tok::at) ||
1811          (Left.is(tok::r_paren) &&
1812           Right.isOneOf(tok::identifier, tok::kw_const)) ||
1813          (Left.is(tok::l_paren) && !Right.is(tok::r_paren));
1814 }
1815 
printDebugInfo(const AnnotatedLine & Line)1816 void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) {
1817   llvm::errs() << "AnnotatedTokens:\n";
1818   const FormatToken *Tok = Line.First;
1819   while (Tok) {
1820     llvm::errs() << " M=" << Tok->MustBreakBefore
1821                  << " C=" << Tok->CanBreakBefore << " T=" << Tok->Type
1822                  << " S=" << Tok->SpacesRequiredBefore
1823                  << " B=" << Tok->BlockParameterCount
1824                  << " P=" << Tok->SplitPenalty << " Name=" << Tok->Tok.getName()
1825                  << " L=" << Tok->TotalLength << " PPK=" << Tok->PackingKind
1826                  << " FakeLParens=";
1827     for (unsigned i = 0, e = Tok->FakeLParens.size(); i != e; ++i)
1828       llvm::errs() << Tok->FakeLParens[i] << "/";
1829     llvm::errs() << " FakeRParens=" << Tok->FakeRParens << "\n";
1830     if (!Tok->Next)
1831       assert(Tok == Line.Last);
1832     Tok = Tok->Next;
1833   }
1834   llvm::errs() << "----\n";
1835 }
1836 
1837 } // namespace format
1838 } // namespace clang
1839