• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===--- Format.cpp - Format C++ code -------------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements functions declared in Format.h. This will be
12 /// split into separate files as we go.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #define DEBUG_TYPE "format-formatter"
17 
18 #include "TokenAnnotator.h"
19 #include "UnwrappedLineParser.h"
20 #include "clang/Basic/Diagnostic.h"
21 #include "clang/Basic/OperatorPrecedence.h"
22 #include "clang/Basic/SourceManager.h"
23 #include "clang/Format/Format.h"
24 #include "clang/Frontend/TextDiagnosticPrinter.h"
25 #include "clang/Lex/Lexer.h"
26 #include "llvm/Support/Allocator.h"
27 #include "llvm/Support/Debug.h"
28 #include <queue>
29 #include <string>
30 
31 namespace clang {
32 namespace format {
33 
getLLVMStyle()34 FormatStyle getLLVMStyle() {
35   FormatStyle LLVMStyle;
36   LLVMStyle.ColumnLimit = 80;
37   LLVMStyle.MaxEmptyLinesToKeep = 1;
38   LLVMStyle.PointerBindsToType = false;
39   LLVMStyle.DerivePointerBinding = false;
40   LLVMStyle.AccessModifierOffset = -2;
41   LLVMStyle.Standard = FormatStyle::LS_Cpp03;
42   LLVMStyle.IndentCaseLabels = false;
43   LLVMStyle.SpacesBeforeTrailingComments = 1;
44   LLVMStyle.BinPackParameters = true;
45   LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true;
46   LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false;
47   LLVMStyle.AllowShortIfStatementsOnASingleLine = false;
48   LLVMStyle.ObjCSpaceBeforeProtocolList = true;
49   LLVMStyle.PenaltyExcessCharacter = 1000000;
50   LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 5;
51   return LLVMStyle;
52 }
53 
getGoogleStyle()54 FormatStyle getGoogleStyle() {
55   FormatStyle GoogleStyle;
56   GoogleStyle.ColumnLimit = 80;
57   GoogleStyle.MaxEmptyLinesToKeep = 1;
58   GoogleStyle.PointerBindsToType = true;
59   GoogleStyle.DerivePointerBinding = true;
60   GoogleStyle.AccessModifierOffset = -1;
61   GoogleStyle.Standard = FormatStyle::LS_Auto;
62   GoogleStyle.IndentCaseLabels = true;
63   GoogleStyle.SpacesBeforeTrailingComments = 2;
64   GoogleStyle.BinPackParameters = true;
65   GoogleStyle.AllowAllParametersOfDeclarationOnNextLine = true;
66   GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
67   GoogleStyle.AllowShortIfStatementsOnASingleLine = false;
68   GoogleStyle.ObjCSpaceBeforeProtocolList = false;
69   GoogleStyle.PenaltyExcessCharacter = 1000000;
70   GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 100;
71   return GoogleStyle;
72 }
73 
getChromiumStyle()74 FormatStyle getChromiumStyle() {
75   FormatStyle ChromiumStyle = getGoogleStyle();
76   ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false;
77   ChromiumStyle.BinPackParameters = false;
78   ChromiumStyle.Standard = FormatStyle::LS_Cpp03;
79   ChromiumStyle.DerivePointerBinding = false;
80   return ChromiumStyle;
81 }
82 
isTrailingComment(const AnnotatedToken & Tok)83 static bool isTrailingComment(const AnnotatedToken &Tok) {
84   return Tok.is(tok::comment) &&
85          (Tok.Children.empty() || Tok.Children[0].MustBreakBefore);
86 }
87 
88 // Returns the length of everything up to the first possible line break after
89 // the ), ], } or > matching \c Tok.
getLengthToMatchingParen(const AnnotatedToken & Tok)90 static unsigned getLengthToMatchingParen(const AnnotatedToken &Tok) {
91   if (Tok.MatchingParen == NULL)
92     return 0;
93   AnnotatedToken *End = Tok.MatchingParen;
94   while (!End->Children.empty() && !End->Children[0].CanBreakBefore) {
95     End = &End->Children[0];
96   }
97   return End->TotalLength - Tok.TotalLength + 1;
98 }
99 
100 /// \brief Manages the whitespaces around tokens and their replacements.
101 ///
102 /// This includes special handling for certain constructs, e.g. the alignment of
103 /// trailing line comments.
104 class WhitespaceManager {
105 public:
WhitespaceManager(SourceManager & SourceMgr)106   WhitespaceManager(SourceManager &SourceMgr) : SourceMgr(SourceMgr) {}
107 
108   /// \brief Replaces the whitespace in front of \p Tok. Only call once for
109   /// each \c AnnotatedToken.
replaceWhitespace(const AnnotatedToken & Tok,unsigned NewLines,unsigned Spaces,unsigned WhitespaceStartColumn,const FormatStyle & Style)110   void replaceWhitespace(const AnnotatedToken &Tok, unsigned NewLines,
111                          unsigned Spaces, unsigned WhitespaceStartColumn,
112                          const FormatStyle &Style) {
113     // 2+ newlines mean an empty line separating logic scopes.
114     if (NewLines >= 2)
115       alignComments();
116 
117     // Align line comments if they are trailing or if they continue other
118     // trailing comments.
119     if (isTrailingComment(Tok)) {
120       // Remove the comment's trailing whitespace.
121       if (Tok.FormatTok.Tok.getLength() != Tok.FormatTok.TokenLength)
122         Replaces.insert(tooling::Replacement(
123             SourceMgr, Tok.FormatTok.Tok.getLocation().getLocWithOffset(
124                            Tok.FormatTok.TokenLength),
125             Tok.FormatTok.Tok.getLength() - Tok.FormatTok.TokenLength, ""));
126 
127       // Align comment with other comments.
128       if (Tok.Parent != NULL || !Comments.empty()) {
129         if (Style.ColumnLimit >=
130             Spaces + WhitespaceStartColumn + Tok.FormatTok.TokenLength) {
131           StoredComment Comment;
132           Comment.Tok = Tok.FormatTok;
133           Comment.Spaces = Spaces;
134           Comment.NewLines = NewLines;
135           Comment.MinColumn =
136               NewLines > 0 ? Spaces : WhitespaceStartColumn + Spaces;
137           Comment.MaxColumn = Style.ColumnLimit - Tok.FormatTok.TokenLength;
138           Comments.push_back(Comment);
139           return;
140         }
141       }
142     }
143 
144     // If this line does not have a trailing comment, align the stored comments.
145     if (Tok.Children.empty() && !isTrailingComment(Tok))
146       alignComments();
147 
148     if (Tok.Type == TT_BlockComment)
149       indentBlockComment(Tok.FormatTok, Spaces);
150 
151     storeReplacement(Tok.FormatTok, getNewLineText(NewLines, Spaces));
152   }
153 
154   /// \brief Like \c replaceWhitespace, but additionally adds right-aligned
155   /// backslashes to escape newlines inside a preprocessor directive.
156   ///
157   /// This function and \c replaceWhitespace have the same behavior if
158   /// \c Newlines == 0.
replacePPWhitespace(const AnnotatedToken & Tok,unsigned NewLines,unsigned Spaces,unsigned WhitespaceStartColumn,const FormatStyle & Style)159   void replacePPWhitespace(const AnnotatedToken &Tok, unsigned NewLines,
160                            unsigned Spaces, unsigned WhitespaceStartColumn,
161                            const FormatStyle &Style) {
162     storeReplacement(
163         Tok.FormatTok,
164         getNewLineText(NewLines, Spaces, WhitespaceStartColumn, Style));
165   }
166 
167   /// \brief Inserts a line break into the middle of a token.
168   ///
169   /// Will break at \p Offset inside \p Tok, putting \p Prefix before the line
170   /// break and \p Postfix before the rest of the token starts in the next line.
171   ///
172   /// \p InPPDirective, \p Spaces, \p WhitespaceStartColumn and \p Style are
173   /// used to generate the correct line break.
breakToken(const AnnotatedToken & Tok,unsigned Offset,StringRef Prefix,StringRef Postfix,bool InPPDirective,unsigned Spaces,unsigned WhitespaceStartColumn,const FormatStyle & Style)174   void breakToken(const AnnotatedToken &Tok, unsigned Offset, StringRef Prefix,
175                   StringRef Postfix, bool InPPDirective, unsigned Spaces,
176                   unsigned WhitespaceStartColumn, const FormatStyle &Style) {
177     std::string NewLineText;
178     if (!InPPDirective)
179       NewLineText = getNewLineText(1, Spaces);
180     else
181       NewLineText = getNewLineText(1, Spaces, WhitespaceStartColumn, Style);
182     std::string ReplacementText = (Prefix + NewLineText + Postfix).str();
183     SourceLocation InsertAt = Tok.FormatTok.WhiteSpaceStart
184         .getLocWithOffset(Tok.FormatTok.WhiteSpaceLength + Offset);
185     Replaces.insert(
186         tooling::Replacement(SourceMgr, InsertAt, 0, ReplacementText));
187   }
188 
189   /// \brief Returns all the \c Replacements created during formatting.
generateReplacements()190   const tooling::Replacements &generateReplacements() {
191     alignComments();
192     return Replaces;
193   }
194 
195 private:
indentBlockComment(const FormatToken & Tok,int Indent)196   void indentBlockComment(const FormatToken &Tok, int Indent) {
197     SourceLocation TokenLoc = Tok.Tok.getLocation();
198     int IndentDelta = Indent - SourceMgr.getSpellingColumnNumber(TokenLoc) + 1;
199     const char *Start = SourceMgr.getCharacterData(TokenLoc);
200     const char *Current = Start;
201     const char *TokEnd = Current + Tok.TokenLength;
202     llvm::SmallVector<SourceLocation, 16> LineStarts;
203     while (Current < TokEnd) {
204       if (*Current == '\n') {
205         ++Current;
206         LineStarts.push_back(TokenLoc.getLocWithOffset(Current - Start));
207         // If we need to outdent the line, check that it's indented enough.
208         for (int i = 0; i < -IndentDelta; ++i, ++Current)
209           if (Current >= TokEnd || *Current != ' ')
210             return;
211       } else {
212         ++Current;
213       }
214     }
215 
216     for (size_t i = 0; i < LineStarts.size(); ++i) {
217       if (IndentDelta > 0)
218         Replaces.insert(tooling::Replacement(SourceMgr, LineStarts[i], 0,
219                                              std::string(IndentDelta, ' ')));
220       else if (IndentDelta < 0)
221         Replaces.insert(
222             tooling::Replacement(SourceMgr, LineStarts[i], -IndentDelta, ""));
223     }
224   }
225 
getNewLineText(unsigned NewLines,unsigned Spaces)226   std::string getNewLineText(unsigned NewLines, unsigned Spaces) {
227     return std::string(NewLines, '\n') + std::string(Spaces, ' ');
228   }
229 
230   std::string
getNewLineText(unsigned NewLines,unsigned Spaces,unsigned WhitespaceStartColumn,const FormatStyle & Style)231   getNewLineText(unsigned NewLines, unsigned Spaces,
232                  unsigned WhitespaceStartColumn, const FormatStyle &Style) {
233     std::string NewLineText;
234     if (NewLines > 0) {
235       unsigned Offset =
236           std::min<int>(Style.ColumnLimit - 1, WhitespaceStartColumn);
237       for (unsigned i = 0; i < NewLines; ++i) {
238         NewLineText += std::string(Style.ColumnLimit - Offset - 1, ' ');
239         NewLineText += "\\\n";
240         Offset = 0;
241       }
242     }
243     return NewLineText + std::string(Spaces, ' ');
244   }
245 
246   /// \brief Structure to store a comment for later layout and alignment.
247   struct StoredComment {
248     FormatToken Tok;
249     unsigned MinColumn;
250     unsigned MaxColumn;
251     unsigned NewLines;
252     unsigned Spaces;
253   };
254   SmallVector<StoredComment, 16> Comments;
255   typedef SmallVector<StoredComment, 16>::iterator comment_iterator;
256 
257   /// \brief Try to align all stashed comments.
alignComments()258   void alignComments() {
259     unsigned MinColumn = 0;
260     unsigned MaxColumn = UINT_MAX;
261     comment_iterator Start = Comments.begin();
262     for (comment_iterator I = Start, E = Comments.end(); I != E; ++I) {
263       if (I->MinColumn > MaxColumn || I->MaxColumn < MinColumn) {
264         alignComments(Start, I, MinColumn);
265         MinColumn = I->MinColumn;
266         MaxColumn = I->MaxColumn;
267         Start = I;
268       } else {
269         MinColumn = std::max(MinColumn, I->MinColumn);
270         MaxColumn = std::min(MaxColumn, I->MaxColumn);
271       }
272     }
273     alignComments(Start, Comments.end(), MinColumn);
274     Comments.clear();
275   }
276 
277   /// \brief Put all the comments between \p I and \p E into \p Column.
alignComments(comment_iterator I,comment_iterator E,unsigned Column)278   void alignComments(comment_iterator I, comment_iterator E, unsigned Column) {
279     while (I != E) {
280       unsigned Spaces = I->Spaces + Column - I->MinColumn;
281       storeReplacement(I->Tok, std::string(I->NewLines, '\n') +
282                                std::string(Spaces, ' '));
283       ++I;
284     }
285   }
286 
287   /// \brief Stores \p Text as the replacement for the whitespace in front of
288   /// \p Tok.
storeReplacement(const FormatToken & Tok,const std::string Text)289   void storeReplacement(const FormatToken &Tok, const std::string Text) {
290     // Don't create a replacement, if it does not change anything.
291     if (StringRef(SourceMgr.getCharacterData(Tok.WhiteSpaceStart),
292                   Tok.WhiteSpaceLength) == Text)
293       return;
294 
295     Replaces.insert(tooling::Replacement(SourceMgr, Tok.WhiteSpaceStart,
296                                          Tok.WhiteSpaceLength, Text));
297   }
298 
299   SourceManager &SourceMgr;
300   tooling::Replacements Replaces;
301 };
302 
303 class UnwrappedLineFormatter {
304 public:
UnwrappedLineFormatter(const FormatStyle & Style,SourceManager & SourceMgr,const AnnotatedLine & Line,unsigned FirstIndent,const AnnotatedToken & RootToken,WhitespaceManager & Whitespaces,bool StructuralError)305   UnwrappedLineFormatter(const FormatStyle &Style, SourceManager &SourceMgr,
306                          const AnnotatedLine &Line, unsigned FirstIndent,
307                          const AnnotatedToken &RootToken,
308                          WhitespaceManager &Whitespaces, bool StructuralError)
309       : Style(Style), SourceMgr(SourceMgr), Line(Line),
310         FirstIndent(FirstIndent), RootToken(RootToken),
311         Whitespaces(Whitespaces), Count(0) {}
312 
313   /// \brief Formats an \c UnwrappedLine.
314   ///
315   /// \returns The column after the last token in the last line of the
316   /// \c UnwrappedLine.
format(const AnnotatedLine * NextLine)317   unsigned format(const AnnotatedLine *NextLine) {
318     // Initialize state dependent on indent.
319     LineState State;
320     State.Column = FirstIndent;
321     State.NextToken = &RootToken;
322     State.Stack.push_back(
323         ParenState(FirstIndent + 4, FirstIndent, !Style.BinPackParameters,
324                    /*HasMultiParameterLine=*/ false));
325     State.VariablePos = 0;
326     State.LineContainsContinuedForLoopSection = false;
327     State.ParenLevel = 0;
328     State.StartOfStringLiteral = 0;
329     State.StartOfLineLevel = State.ParenLevel;
330 
331     DEBUG({
332       DebugTokenState(*State.NextToken);
333     });
334 
335     // The first token has already been indented and thus consumed.
336     moveStateToNextToken(State, /*DryRun=*/ false);
337 
338     // If everything fits on a single line, just put it there.
339     unsigned ColumnLimit = Style.ColumnLimit;
340     if (NextLine && NextLine->InPPDirective &&
341         !NextLine->First.FormatTok.HasUnescapedNewline)
342       ColumnLimit = getColumnLimit();
343     if (Line.Last->TotalLength <= ColumnLimit - FirstIndent) {
344       while (State.NextToken != NULL) {
345         addTokenToState(false, false, State);
346       }
347       return State.Column;
348     }
349 
350     // If the ObjC method declaration does not fit on a line, we should format
351     // it with one arg per line.
352     if (Line.Type == LT_ObjCMethodDecl)
353       State.Stack.back().BreakBeforeParameter = true;
354 
355     // Find best solution in solution space.
356     return analyzeSolutionSpace(State);
357   }
358 
359 private:
DebugTokenState(const AnnotatedToken & AnnotatedTok)360   void DebugTokenState(const AnnotatedToken &AnnotatedTok) {
361     const Token &Tok = AnnotatedTok.FormatTok.Tok;
362     llvm::errs() << StringRef(SourceMgr.getCharacterData(Tok.getLocation()),
363                               Tok.getLength());
364     llvm::errs();
365   }
366 
367   struct ParenState {
ParenStateclang::format::UnwrappedLineFormatter::ParenState368     ParenState(unsigned Indent, unsigned LastSpace, bool AvoidBinPacking,
369                bool HasMultiParameterLine)
370         : Indent(Indent), LastSpace(LastSpace), FirstLessLess(0),
371           BreakBeforeClosingBrace(false), QuestionColumn(0),
372           AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
373           HasMultiParameterLine(HasMultiParameterLine), ColonPos(0),
374           StartOfFunctionCall(0) {}
375 
376     /// \brief The position to which a specific parenthesis level needs to be
377     /// indented.
378     unsigned Indent;
379 
380     /// \brief The position of the last space on each level.
381     ///
382     /// Used e.g. to break like:
383     /// functionCall(Parameter, otherCall(
384     ///                             OtherParameter));
385     unsigned LastSpace;
386 
387     /// \brief The position the first "<<" operator encountered on each level.
388     ///
389     /// Used to align "<<" operators. 0 if no such operator has been encountered
390     /// on a level.
391     unsigned FirstLessLess;
392 
393     /// \brief Whether a newline needs to be inserted before the block's closing
394     /// brace.
395     ///
396     /// We only want to insert a newline before the closing brace if there also
397     /// was a newline after the beginning left brace.
398     bool BreakBeforeClosingBrace;
399 
400     /// \brief The column of a \c ? in a conditional expression;
401     unsigned QuestionColumn;
402 
403     /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple
404     /// lines, in this context.
405     bool AvoidBinPacking;
406 
407     /// \brief Break after the next comma (or all the commas in this context if
408     /// \c AvoidBinPacking is \c true).
409     bool BreakBeforeParameter;
410 
411     /// \brief This context already has a line with more than one parameter.
412     bool HasMultiParameterLine;
413 
414     /// \brief The position of the colon in an ObjC method declaration/call.
415     unsigned ColonPos;
416 
417     /// \brief The start of the most recent function in a builder-type call.
418     unsigned StartOfFunctionCall;
419 
operator <clang::format::UnwrappedLineFormatter::ParenState420     bool operator<(const ParenState &Other) const {
421       if (Indent != Other.Indent)
422         return Indent < Other.Indent;
423       if (LastSpace != Other.LastSpace)
424         return LastSpace < Other.LastSpace;
425       if (FirstLessLess != Other.FirstLessLess)
426         return FirstLessLess < Other.FirstLessLess;
427       if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
428         return BreakBeforeClosingBrace;
429       if (QuestionColumn != Other.QuestionColumn)
430         return QuestionColumn < Other.QuestionColumn;
431       if (AvoidBinPacking != Other.AvoidBinPacking)
432         return AvoidBinPacking;
433       if (BreakBeforeParameter != Other.BreakBeforeParameter)
434         return BreakBeforeParameter;
435       if (HasMultiParameterLine != Other.HasMultiParameterLine)
436         return HasMultiParameterLine;
437       if (ColonPos != Other.ColonPos)
438         return ColonPos < Other.ColonPos;
439       if (StartOfFunctionCall != Other.StartOfFunctionCall)
440         return StartOfFunctionCall < Other.StartOfFunctionCall;
441       return false;
442     }
443   };
444 
445   /// \brief The current state when indenting a unwrapped line.
446   ///
447   /// As the indenting tries different combinations this is copied by value.
448   struct LineState {
449     /// \brief The number of used columns in the current line.
450     unsigned Column;
451 
452     /// \brief The token that needs to be next formatted.
453     const AnnotatedToken *NextToken;
454 
455     /// \brief The column of the first variable name in a variable declaration.
456     ///
457     /// Used to align further variables if necessary.
458     unsigned VariablePos;
459 
460     /// \brief \c true if this line contains a continued for-loop section.
461     bool LineContainsContinuedForLoopSection;
462 
463     /// \brief The level of nesting inside (), [], <> and {}.
464     unsigned ParenLevel;
465 
466     /// \brief The \c ParenLevel at the start of this line.
467     unsigned StartOfLineLevel;
468 
469     /// \brief The start column of the string literal, if we're in a string
470     /// literal sequence, 0 otherwise.
471     unsigned StartOfStringLiteral;
472 
473     /// \brief A stack keeping track of properties applying to parenthesis
474     /// levels.
475     std::vector<ParenState> Stack;
476 
477     /// \brief Comparison operator to be able to used \c LineState in \c map.
operator <clang::format::UnwrappedLineFormatter::LineState478     bool operator<(const LineState &Other) const {
479       if (NextToken != Other.NextToken)
480         return NextToken < Other.NextToken;
481       if (Column != Other.Column)
482         return Column < Other.Column;
483       if (VariablePos != Other.VariablePos)
484         return VariablePos < Other.VariablePos;
485       if (LineContainsContinuedForLoopSection !=
486           Other.LineContainsContinuedForLoopSection)
487         return LineContainsContinuedForLoopSection;
488       if (ParenLevel != Other.ParenLevel)
489         return ParenLevel < Other.ParenLevel;
490       if (StartOfLineLevel != Other.StartOfLineLevel)
491         return StartOfLineLevel < Other.StartOfLineLevel;
492       if (StartOfStringLiteral != Other.StartOfStringLiteral)
493         return StartOfStringLiteral < Other.StartOfStringLiteral;
494       return Stack < Other.Stack;
495     }
496   };
497 
498   /// \brief Appends the next token to \p State and updates information
499   /// necessary for indentation.
500   ///
501   /// Puts the token on the current line if \p Newline is \c true and adds a
502   /// line break and necessary indentation otherwise.
503   ///
504   /// If \p DryRun is \c false, also creates and stores the required
505   /// \c Replacement.
addTokenToState(bool Newline,bool DryRun,LineState & State)506   unsigned addTokenToState(bool Newline, bool DryRun, LineState &State) {
507     const AnnotatedToken &Current = *State.NextToken;
508     const AnnotatedToken &Previous = *State.NextToken->Parent;
509     assert(State.Stack.size());
510 
511     if (Current.Type == TT_ImplicitStringLiteral) {
512       State.Column += State.NextToken->FormatTok.WhiteSpaceLength +
513                       State.NextToken->FormatTok.TokenLength;
514       if (State.NextToken->Children.empty())
515         State.NextToken = NULL;
516       else
517         State.NextToken = &State.NextToken->Children[0];
518       return 0;
519     }
520 
521     if (Newline) {
522       unsigned WhitespaceStartColumn = State.Column;
523       if (Current.is(tok::r_brace)) {
524         State.Column = Line.Level * 2;
525       } else if (Current.is(tok::string_literal) &&
526                  State.StartOfStringLiteral != 0) {
527         State.Column = State.StartOfStringLiteral;
528         State.Stack.back().BreakBeforeParameter = true;
529       } else if (Current.is(tok::lessless) &&
530                  State.Stack.back().FirstLessLess != 0) {
531         State.Column = State.Stack.back().FirstLessLess;
532       } else if (State.ParenLevel != 0 &&
533                  (Previous.isOneOf(tok::equal, tok::coloncolon) ||
534                   Current.isOneOf(tok::period, tok::arrow, tok::question))) {
535         // Indent and extra 4 spaces after if we know the current expression is
536         // continued.  Don't do that on the top level, as we already indent 4
537         // there.
538         State.Column = std::max(State.Stack.back().LastSpace,
539                                 State.Stack.back().Indent) + 4;
540       } else if (Current.Type == TT_ConditionalExpr) {
541         State.Column = State.Stack.back().QuestionColumn;
542       } else if (Previous.is(tok::comma) && State.VariablePos != 0 &&
543                  ((RootToken.is(tok::kw_for) && State.ParenLevel == 1) ||
544                   State.ParenLevel == 0)) {
545         State.Column = State.VariablePos;
546       } else if (Previous.ClosesTemplateDeclaration ||
547                  (Current.Type == TT_StartOfName && State.ParenLevel == 0)) {
548         State.Column = State.Stack.back().Indent - 4;
549       } else if (Current.Type == TT_ObjCSelectorName) {
550         if (State.Stack.back().ColonPos > Current.FormatTok.TokenLength) {
551           State.Column =
552               State.Stack.back().ColonPos - Current.FormatTok.TokenLength;
553         } else {
554           State.Column = State.Stack.back().Indent;
555           State.Stack.back().ColonPos =
556               State.Column + Current.FormatTok.TokenLength;
557         }
558       } else if (Previous.Type == TT_ObjCMethodExpr ||
559                  Current.Type == TT_StartOfName) {
560         State.Column = State.Stack.back().Indent + 4;
561       } else {
562         State.Column = State.Stack.back().Indent;
563       }
564 
565       if (Current.is(tok::question))
566         State.Stack.back().BreakBeforeParameter = true;
567       if (Previous.isOneOf(tok::comma, tok::semi) &&
568           !State.Stack.back().AvoidBinPacking)
569         State.Stack.back().BreakBeforeParameter = false;
570 
571       if (!DryRun) {
572         unsigned NewLines = 1;
573         if (Current.Type == TT_LineComment)
574           NewLines =
575               std::max(NewLines, std::min(Current.FormatTok.NewlinesBefore,
576                                           Style.MaxEmptyLinesToKeep + 1));
577         if (!Line.InPPDirective)
578           Whitespaces.replaceWhitespace(Current, NewLines, State.Column,
579                                         WhitespaceStartColumn, Style);
580         else
581           Whitespaces.replacePPWhitespace(Current, NewLines, State.Column,
582                                           WhitespaceStartColumn, Style);
583       }
584 
585       State.Stack.back().LastSpace = State.Column;
586       State.StartOfLineLevel = State.ParenLevel;
587 
588       // Any break on this level means that the parent level has been broken
589       // and we need to avoid bin packing there.
590       for (unsigned i = 0, e = State.Stack.size() - 1; i != e; ++i) {
591         State.Stack[i].BreakBeforeParameter = true;
592       }
593       if (Current.isOneOf(tok::period, tok::arrow))
594         State.Stack.back().BreakBeforeParameter = true;
595 
596       // If we break after {, we should also break before the corresponding }.
597       if (Previous.is(tok::l_brace))
598         State.Stack.back().BreakBeforeClosingBrace = true;
599 
600       if (State.Stack.back().AvoidBinPacking) {
601         // If we are breaking after '(', '{', '<', this is not bin packing
602         // unless AllowAllParametersOfDeclarationOnNextLine is false.
603         if ((Previous.isNot(tok::l_paren) && Previous.isNot(tok::l_brace)) ||
604             (!Style.AllowAllParametersOfDeclarationOnNextLine &&
605              Line.MustBeDeclaration))
606           State.Stack.back().BreakBeforeParameter = true;
607       }
608     } else {
609       // FIXME: Put VariablePos into ParenState and remove second part of if().
610       if (Current.is(tok::equal) &&
611           (RootToken.is(tok::kw_for) || State.ParenLevel == 0))
612         State.VariablePos = State.Column - Previous.FormatTok.TokenLength;
613 
614       unsigned Spaces = State.NextToken->SpacesRequiredBefore;
615 
616       if (!DryRun)
617         Whitespaces.replaceWhitespace(Current, 0, Spaces, State.Column, Style);
618 
619       if (Current.Type == TT_ObjCSelectorName &&
620           State.Stack.back().ColonPos == 0) {
621         if (State.Stack.back().Indent + Current.LongestObjCSelectorName >
622             State.Column + Spaces + Current.FormatTok.TokenLength)
623           State.Stack.back().ColonPos =
624               State.Stack.back().Indent + Current.LongestObjCSelectorName;
625         else
626           State.Stack.back().ColonPos =
627               State.Column + Spaces + Current.FormatTok.TokenLength;
628       }
629 
630       if (Current.Type != TT_LineComment &&
631           (Previous.isOneOf(tok::l_paren, tok::l_brace) ||
632            State.NextToken->Parent->Type == TT_TemplateOpener))
633         State.Stack.back().Indent = State.Column + Spaces;
634       if (Previous.is(tok::comma) && !isTrailingComment(Current))
635         State.Stack.back().HasMultiParameterLine = true;
636 
637       State.Column += Spaces;
638       if (Current.is(tok::l_paren) && Previous.is(tok::kw_if))
639         // Treat the condition inside an if as if it was a second function
640         // parameter, i.e. let nested calls have an indent of 4.
641         State.Stack.back().LastSpace = State.Column + 1; // 1 is length of "(".
642       else if (Previous.is(tok::comma) && State.ParenLevel != 0)
643         // Top-level spaces are exempt as that mostly leads to better results.
644         State.Stack.back().LastSpace = State.Column;
645       else if ((Previous.Type == TT_BinaryOperator ||
646                 Previous.Type == TT_ConditionalExpr ||
647                 Previous.Type == TT_CtorInitializerColon) &&
648                getPrecedence(Previous) != prec::Assignment)
649         State.Stack.back().LastSpace = State.Column;
650       else if (Previous.Type == TT_InheritanceColon)
651         State.Stack.back().Indent = State.Column;
652       else if (Previous.ParameterCount > 1 &&
653                (Previous.isOneOf(tok::l_paren, tok::l_square, tok::l_brace) ||
654                 Previous.Type == TT_TemplateOpener))
655         // If this function has multiple parameters, indent nested calls from
656         // the start of the first parameter.
657         State.Stack.back().LastSpace = State.Column;
658     }
659 
660     return moveStateToNextToken(State, DryRun);
661   }
662 
663   /// \brief Mark the next token as consumed in \p State and modify its stacks
664   /// accordingly.
moveStateToNextToken(LineState & State,bool DryRun)665   unsigned moveStateToNextToken(LineState &State, bool DryRun) {
666     const AnnotatedToken &Current = *State.NextToken;
667     assert(State.Stack.size());
668 
669     if (Current.Type == TT_InheritanceColon)
670       State.Stack.back().AvoidBinPacking = true;
671     if (Current.is(tok::lessless) && State.Stack.back().FirstLessLess == 0)
672       State.Stack.back().FirstLessLess = State.Column;
673     if (Current.is(tok::question))
674       State.Stack.back().QuestionColumn = State.Column;
675     if (Current.isOneOf(tok::period, tok::arrow) &&
676         Line.Type == LT_BuilderTypeCall && State.ParenLevel == 0)
677       State.Stack.back().StartOfFunctionCall =
678           Current.LastInChainOfCalls ? 0 : State.Column;
679     if (Current.Type == TT_CtorInitializerColon) {
680       if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine)
681         State.Stack.back().AvoidBinPacking = true;
682       State.Stack.back().BreakBeforeParameter = false;
683     }
684 
685     // Insert scopes created by fake parenthesis.
686     for (unsigned i = 0, e = Current.FakeLParens; i != e; ++i) {
687       ParenState NewParenState = State.Stack.back();
688       NewParenState.Indent = std::max(State.Column, State.Stack.back().Indent);
689       NewParenState.BreakBeforeParameter = false;
690       State.Stack.push_back(NewParenState);
691     }
692 
693     // If we encounter an opening (, [, { or <, we add a level to our stacks to
694     // prepare for the following tokens.
695     if (Current.isOneOf(tok::l_paren, tok::l_square, tok::l_brace) ||
696         State.NextToken->Type == TT_TemplateOpener) {
697       unsigned NewIndent;
698       bool AvoidBinPacking;
699       if (Current.is(tok::l_brace)) {
700         NewIndent = 2 + State.Stack.back().LastSpace;
701         AvoidBinPacking = false;
702       } else {
703         NewIndent = 4 + std::max(State.Stack.back().LastSpace,
704                                  State.Stack.back().StartOfFunctionCall);
705         AvoidBinPacking =
706             !Style.BinPackParameters || State.Stack.back().AvoidBinPacking;
707       }
708       State.Stack.push_back(
709           ParenState(NewIndent, State.Stack.back().LastSpace, AvoidBinPacking,
710                      State.Stack.back().HasMultiParameterLine));
711       ++State.ParenLevel;
712     }
713 
714     // If this '[' opens an ObjC call, determine whether all parameters fit into
715     // one line and put one per line if they don't.
716     if (Current.is(tok::l_square) && Current.Type == TT_ObjCMethodExpr &&
717         Current.MatchingParen != NULL) {
718       if (getLengthToMatchingParen(Current) + State.Column > getColumnLimit())
719         State.Stack.back().BreakBeforeParameter = true;
720     }
721 
722     // If we encounter a closing ), ], } or >, we can remove a level from our
723     // stacks.
724     if (Current.isOneOf(tok::r_paren, tok::r_square) ||
725         (Current.is(tok::r_brace) && State.NextToken != &RootToken) ||
726         State.NextToken->Type == TT_TemplateCloser) {
727       State.Stack.pop_back();
728       --State.ParenLevel;
729     }
730 
731     // Remove scopes created by fake parenthesis.
732     for (unsigned i = 0, e = Current.FakeRParens; i != e; ++i) {
733       State.Stack.pop_back();
734     }
735 
736     if (Current.is(tok::string_literal)) {
737       State.StartOfStringLiteral = State.Column;
738     } else if (Current.isNot(tok::comment)) {
739       State.StartOfStringLiteral = 0;
740     }
741 
742     State.Column += Current.FormatTok.TokenLength;
743 
744     if (State.NextToken->Children.empty())
745       State.NextToken = NULL;
746     else
747       State.NextToken = &State.NextToken->Children[0];
748 
749     return breakProtrudingToken(Current, State, DryRun);
750   }
751 
752   /// \brief If the current token sticks out over the end of the line, break
753   /// it if possible.
breakProtrudingToken(const AnnotatedToken & Current,LineState & State,bool DryRun)754   unsigned breakProtrudingToken(const AnnotatedToken &Current, LineState &State,
755                                 bool DryRun) {
756     if (Current.isNot(tok::string_literal))
757       return 0;
758     // Only break up default narrow strings.
759     if (StringRef(Current.FormatTok.Tok.getLiteralData()).find('"') != 0)
760       return 0;
761 
762     unsigned Penalty = 0;
763     unsigned TailOffset = 0;
764     unsigned TailLength = Current.FormatTok.TokenLength;
765     unsigned StartColumn = State.Column - Current.FormatTok.TokenLength;
766     unsigned OffsetFromStart = 0;
767     while (StartColumn + TailLength > getColumnLimit()) {
768       StringRef Text = StringRef(
769           Current.FormatTok.Tok.getLiteralData() + TailOffset, TailLength);
770       if (StartColumn + OffsetFromStart + 1 > getColumnLimit())
771         break;
772       StringRef::size_type SplitPoint = getSplitPoint(
773           Text, getColumnLimit() - StartColumn - OffsetFromStart - 1);
774       if (SplitPoint == StringRef::npos)
775         break;
776       assert(SplitPoint != 0);
777       // +2, because 'Text' starts after the opening quotes, and does not
778       // include the closing quote we need to insert.
779       unsigned WhitespaceStartColumn =
780           StartColumn + OffsetFromStart + SplitPoint + 2;
781       State.Stack.back().LastSpace = StartColumn;
782       if (!DryRun) {
783         Whitespaces.breakToken(Current, TailOffset + SplitPoint + 1, "\"", "\"",
784                                Line.InPPDirective, StartColumn,
785                                WhitespaceStartColumn, Style);
786       }
787       TailOffset += SplitPoint + 1;
788       TailLength -= SplitPoint + 1;
789       OffsetFromStart = 1;
790       Penalty += Style.PenaltyExcessCharacter;
791       for (unsigned i = 0, e = State.Stack.size(); i != e; ++i)
792         State.Stack[i].BreakBeforeParameter = true;
793     }
794     State.Column = StartColumn + TailLength;
795     return Penalty;
796   }
797 
798   StringRef::size_type
getSplitPoint(StringRef Text,StringRef::size_type Offset)799   getSplitPoint(StringRef Text, StringRef::size_type Offset) {
800     StringRef::size_type SpaceOffset = Text.rfind(' ', Offset);
801     if (SpaceOffset != StringRef::npos && SpaceOffset != 0)
802       return SpaceOffset;
803     StringRef::size_type SlashOffset = Text.rfind('/', Offset);
804     if (SlashOffset != StringRef::npos && SlashOffset != 0)
805       return SlashOffset;
806     StringRef::size_type Split = getStartOfCharacter(Text, Offset);
807     if (Split != StringRef::npos && Split > 1)
808       // Do not split at 0.
809       return Split - 1;
810     return StringRef::npos;
811   }
812 
813   StringRef::size_type
getStartOfCharacter(StringRef Text,StringRef::size_type Offset)814   getStartOfCharacter(StringRef Text, StringRef::size_type Offset) {
815     StringRef::size_type NextEscape = Text.find('\\');
816     while (NextEscape != StringRef::npos && NextEscape < Offset) {
817       StringRef::size_type SequenceLength =
818           getEscapeSequenceLength(Text.substr(NextEscape));
819       if (Offset < NextEscape + SequenceLength)
820         return NextEscape;
821       NextEscape = Text.find('\\', NextEscape + SequenceLength);
822     }
823     return Offset;
824   }
825 
getEscapeSequenceLength(StringRef Text)826   unsigned getEscapeSequenceLength(StringRef Text) {
827     assert(Text[0] == '\\');
828     if (Text.size() < 2)
829       return 1;
830 
831     switch (Text[1]) {
832     case 'u':
833       return 6;
834     case 'U':
835       return 10;
836     case 'x':
837       return getHexLength(Text);
838     default:
839       if (Text[1] >= '0' && Text[1] <= '7')
840         return getOctalLength(Text);
841       return 2;
842     }
843   }
844 
getHexLength(StringRef Text)845   unsigned getHexLength(StringRef Text) {
846     unsigned I = 2; // Point after '\x'.
847     while (I < Text.size() && ((Text[I] >= '0' && Text[I] <= '9') ||
848                                (Text[I] >= 'a' && Text[I] <= 'f') ||
849                                (Text[I] >= 'A' && Text[I] <= 'F'))) {
850       ++I;
851     }
852     return I;
853   }
854 
getOctalLength(StringRef Text)855   unsigned getOctalLength(StringRef Text) {
856     unsigned I = 1;
857     while (I < Text.size() && I < 4 && (Text[I] >= '0' && Text[I] <= '7')) {
858       ++I;
859     }
860     return I;
861   }
862 
getColumnLimit()863   unsigned getColumnLimit() {
864     return Style.ColumnLimit - (Line.InPPDirective ? 2 : 0);
865   }
866 
867   /// \brief An edge in the solution space from \c Previous->State to \c State,
868   /// inserting a newline dependent on the \c NewLine.
869   struct StateNode {
StateNodeclang::format::UnwrappedLineFormatter::StateNode870     StateNode(const LineState &State, bool NewLine, StateNode *Previous)
871         : State(State), NewLine(NewLine), Previous(Previous) {}
872     LineState State;
873     bool NewLine;
874     StateNode *Previous;
875   };
876 
877   /// \brief A pair of <penalty, count> that is used to prioritize the BFS on.
878   ///
879   /// In case of equal penalties, we want to prefer states that were inserted
880   /// first. During state generation we make sure that we insert states first
881   /// that break the line as late as possible.
882   typedef std::pair<unsigned, unsigned> OrderedPenalty;
883 
884   /// \brief An item in the prioritized BFS search queue. The \c StateNode's
885   /// \c State has the given \c OrderedPenalty.
886   typedef std::pair<OrderedPenalty, StateNode *> QueueItem;
887 
888   /// \brief The BFS queue type.
889   typedef std::priority_queue<QueueItem, std::vector<QueueItem>,
890                               std::greater<QueueItem> > QueueType;
891 
892   /// \brief Analyze the entire solution space starting from \p InitialState.
893   ///
894   /// This implements a variant of Dijkstra's algorithm on the graph that spans
895   /// the solution space (\c LineStates are the nodes). The algorithm tries to
896   /// find the shortest path (the one with lowest penalty) from \p InitialState
897   /// to a state where all tokens are placed.
analyzeSolutionSpace(LineState & InitialState)898   unsigned analyzeSolutionSpace(LineState &InitialState) {
899     std::set<LineState> Seen;
900 
901     // Insert start element into queue.
902     StateNode *Node =
903         new (Allocator.Allocate()) StateNode(InitialState, false, NULL);
904     Queue.push(QueueItem(OrderedPenalty(0, Count), Node));
905     ++Count;
906 
907     // While not empty, take first element and follow edges.
908     while (!Queue.empty()) {
909       unsigned Penalty = Queue.top().first.first;
910       StateNode *Node = Queue.top().second;
911       if (Node->State.NextToken == NULL) {
912         DEBUG(llvm::errs() << "\n---\nPenalty for line: " << Penalty << "\n");
913         break;
914       }
915       Queue.pop();
916 
917       if (!Seen.insert(Node->State).second)
918         // State already examined with lower penalty.
919         continue;
920 
921       addNextStateToQueue(Penalty, Node, /*NewLine=*/ false);
922       addNextStateToQueue(Penalty, Node, /*NewLine=*/ true);
923     }
924 
925     if (Queue.empty())
926       // We were unable to find a solution, do nothing.
927       // FIXME: Add diagnostic?
928       return 0;
929 
930     // Reconstruct the solution.
931     reconstructPath(InitialState, Queue.top().second);
932     DEBUG(llvm::errs() << "---\n");
933 
934     // Return the column after the last token of the solution.
935     return Queue.top().second->State.Column;
936   }
937 
reconstructPath(LineState & State,StateNode * Current)938   void reconstructPath(LineState &State, StateNode *Current) {
939     // FIXME: This recursive implementation limits the possible number
940     // of tokens per line if compiled into a binary with small stack space.
941     // To become more independent of stack frame limitations we would need
942     // to also change the TokenAnnotator.
943     if (Current->Previous == NULL)
944       return;
945     reconstructPath(State, Current->Previous);
946     DEBUG({
947       if (Current->NewLine) {
948         llvm::errs()
949             << "Penalty for splitting before "
950             << Current->Previous->State.NextToken->FormatTok.Tok.getName()
951             << ": " << Current->Previous->State.NextToken->SplitPenalty << "\n";
952       }
953     });
954     addTokenToState(Current->NewLine, false, State);
955   }
956 
957   /// \brief Add the following state to the analysis queue \c Queue.
958   ///
959   /// Assume the current state is \p PreviousNode and has been reached with a
960   /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true.
addNextStateToQueue(unsigned Penalty,StateNode * PreviousNode,bool NewLine)961   void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode,
962                            bool NewLine) {
963     if (NewLine && !canBreak(PreviousNode->State))
964       return;
965     if (!NewLine && mustBreak(PreviousNode->State))
966       return;
967     if (NewLine)
968       Penalty += PreviousNode->State.NextToken->SplitPenalty;
969 
970     StateNode *Node = new (Allocator.Allocate())
971         StateNode(PreviousNode->State, NewLine, PreviousNode);
972     Penalty += addTokenToState(NewLine, true, Node->State);
973     if (Node->State.Column > getColumnLimit()) {
974       unsigned ExcessCharacters = Node->State.Column - getColumnLimit();
975       Penalty += Style.PenaltyExcessCharacter * ExcessCharacters;
976     }
977 
978     Queue.push(QueueItem(OrderedPenalty(Penalty, Count), Node));
979     ++Count;
980   }
981 
982   /// \brief Returns \c true, if a line break after \p State is allowed.
canBreak(const LineState & State)983   bool canBreak(const LineState &State) {
984     if (!State.NextToken->CanBreakBefore &&
985         !(State.NextToken->is(tok::r_brace) &&
986           State.Stack.back().BreakBeforeClosingBrace))
987       return false;
988     // Trying to insert a parameter on a new line if there are already more than
989     // one parameter on the current line is bin packing.
990     if (State.Stack.back().HasMultiParameterLine &&
991         State.Stack.back().AvoidBinPacking)
992       return false;
993     return true;
994   }
995 
996   /// \brief Returns \c true, if a line break after \p State is mandatory.
mustBreak(const LineState & State)997   bool mustBreak(const LineState &State) {
998     if (State.NextToken->MustBreakBefore)
999       return true;
1000     if (State.NextToken->is(tok::r_brace) &&
1001         State.Stack.back().BreakBeforeClosingBrace)
1002       return true;
1003     if (State.NextToken->Parent->is(tok::semi) &&
1004         State.LineContainsContinuedForLoopSection)
1005       return true;
1006     if ((State.NextToken->Parent->isOneOf(tok::comma, tok::semi) ||
1007          State.NextToken->is(tok::question) ||
1008          State.NextToken->Type == TT_ConditionalExpr) &&
1009         State.Stack.back().BreakBeforeParameter &&
1010         !isTrailingComment(*State.NextToken) &&
1011         State.NextToken->isNot(tok::r_paren) &&
1012         State.NextToken->isNot(tok::r_brace))
1013       return true;
1014     // FIXME: Comparing LongestObjCSelectorName to 0 is a hacky way of finding
1015     // out whether it is the first parameter. Clean this up.
1016     if (State.NextToken->Type == TT_ObjCSelectorName &&
1017         State.NextToken->LongestObjCSelectorName == 0 &&
1018         State.Stack.back().BreakBeforeParameter)
1019       return true;
1020     if ((State.NextToken->Type == TT_CtorInitializerColon ||
1021          (State.NextToken->Parent->ClosesTemplateDeclaration &&
1022           State.ParenLevel == 0)))
1023       return true;
1024     if (State.NextToken->Type == TT_InlineASMColon)
1025       return true;
1026     // This prevents breaks like:
1027     //   ...
1028     //   SomeParameter, OtherParameter).DoSomething(
1029     //   ...
1030     // As they hide "DoSomething" and generally bad for readability.
1031     if (State.NextToken->isOneOf(tok::period, tok::arrow) &&
1032         getRemainingLength(State) + State.Column > getColumnLimit() &&
1033         State.ParenLevel < State.StartOfLineLevel)
1034       return true;
1035     return false;
1036   }
1037 
1038   // Returns the total number of columns required for the remaining tokens.
getRemainingLength(const LineState & State)1039   unsigned getRemainingLength(const LineState &State) {
1040     if (State.NextToken && State.NextToken->Parent)
1041       return Line.Last->TotalLength - State.NextToken->Parent->TotalLength;
1042     return 0;
1043   }
1044 
1045   FormatStyle Style;
1046   SourceManager &SourceMgr;
1047   const AnnotatedLine &Line;
1048   const unsigned FirstIndent;
1049   const AnnotatedToken &RootToken;
1050   WhitespaceManager &Whitespaces;
1051 
1052   llvm::SpecificBumpPtrAllocator<StateNode> Allocator;
1053   QueueType Queue;
1054   // Increasing count of \c StateNode items we have created. This is used
1055   // to create a deterministic order independent of the container.
1056   unsigned Count;
1057 };
1058 
1059 class LexerBasedFormatTokenSource : public FormatTokenSource {
1060 public:
LexerBasedFormatTokenSource(Lexer & Lex,SourceManager & SourceMgr)1061   LexerBasedFormatTokenSource(Lexer &Lex, SourceManager &SourceMgr)
1062       : GreaterStashed(false), Lex(Lex), SourceMgr(SourceMgr),
1063         IdentTable(Lex.getLangOpts()) {
1064     Lex.SetKeepWhitespaceMode(true);
1065   }
1066 
getNextToken()1067   virtual FormatToken getNextToken() {
1068     if (GreaterStashed) {
1069       FormatTok.NewlinesBefore = 0;
1070       FormatTok.WhiteSpaceStart =
1071           FormatTok.Tok.getLocation().getLocWithOffset(1);
1072       FormatTok.WhiteSpaceLength = 0;
1073       GreaterStashed = false;
1074       return FormatTok;
1075     }
1076 
1077     FormatTok = FormatToken();
1078     Lex.LexFromRawLexer(FormatTok.Tok);
1079     StringRef Text = rawTokenText(FormatTok.Tok);
1080     FormatTok.WhiteSpaceStart = FormatTok.Tok.getLocation();
1081     if (SourceMgr.getFileOffset(FormatTok.WhiteSpaceStart) == 0)
1082       FormatTok.IsFirst = true;
1083 
1084     // Consume and record whitespace until we find a significant token.
1085     while (FormatTok.Tok.is(tok::unknown)) {
1086       unsigned Newlines = Text.count('\n');
1087       if (Newlines > 0)
1088         FormatTok.LastNewlineOffset =
1089             FormatTok.WhiteSpaceLength + Text.rfind('\n') + 1;
1090       unsigned EscapedNewlines = Text.count("\\\n");
1091       FormatTok.NewlinesBefore += Newlines;
1092       FormatTok.HasUnescapedNewline |= EscapedNewlines != Newlines;
1093       FormatTok.WhiteSpaceLength += FormatTok.Tok.getLength();
1094 
1095       if (FormatTok.Tok.is(tok::eof))
1096         return FormatTok;
1097       Lex.LexFromRawLexer(FormatTok.Tok);
1098       Text = rawTokenText(FormatTok.Tok);
1099     }
1100 
1101     // Now FormatTok is the next non-whitespace token.
1102     FormatTok.TokenLength = Text.size();
1103 
1104     // In case the token starts with escaped newlines, we want to
1105     // take them into account as whitespace - this pattern is quite frequent
1106     // in macro definitions.
1107     // FIXME: What do we want to do with other escaped spaces, and escaped
1108     // spaces or newlines in the middle of tokens?
1109     // FIXME: Add a more explicit test.
1110     unsigned i = 0;
1111     while (i + 1 < Text.size() && Text[i] == '\\' && Text[i + 1] == '\n') {
1112       // FIXME: ++FormatTok.NewlinesBefore is missing...
1113       FormatTok.WhiteSpaceLength += 2;
1114       FormatTok.TokenLength -= 2;
1115       i += 2;
1116     }
1117 
1118     if (FormatTok.Tok.is(tok::raw_identifier)) {
1119       IdentifierInfo &Info = IdentTable.get(Text);
1120       FormatTok.Tok.setIdentifierInfo(&Info);
1121       FormatTok.Tok.setKind(Info.getTokenID());
1122     }
1123 
1124     if (FormatTok.Tok.is(tok::greatergreater)) {
1125       FormatTok.Tok.setKind(tok::greater);
1126       FormatTok.TokenLength = 1;
1127       GreaterStashed = true;
1128     }
1129 
1130     // If we reformat comments, we remove trailing whitespace. Update the length
1131     // accordingly.
1132     if (FormatTok.Tok.is(tok::comment))
1133       FormatTok.TokenLength = Text.rtrim().size();
1134 
1135     return FormatTok;
1136   }
1137 
getIdentTable()1138   IdentifierTable &getIdentTable() { return IdentTable; }
1139 
1140 private:
1141   FormatToken FormatTok;
1142   bool GreaterStashed;
1143   Lexer &Lex;
1144   SourceManager &SourceMgr;
1145   IdentifierTable IdentTable;
1146 
1147   /// Returns the text of \c FormatTok.
rawTokenText(Token & Tok)1148   StringRef rawTokenText(Token &Tok) {
1149     return StringRef(SourceMgr.getCharacterData(Tok.getLocation()),
1150                      Tok.getLength());
1151   }
1152 };
1153 
1154 class Formatter : public UnwrappedLineConsumer {
1155 public:
Formatter(DiagnosticsEngine & Diag,const FormatStyle & Style,Lexer & Lex,SourceManager & SourceMgr,const std::vector<CharSourceRange> & Ranges)1156   Formatter(DiagnosticsEngine &Diag, const FormatStyle &Style, Lexer &Lex,
1157             SourceManager &SourceMgr,
1158             const std::vector<CharSourceRange> &Ranges)
1159       : Diag(Diag), Style(Style), Lex(Lex), SourceMgr(SourceMgr),
1160         Whitespaces(SourceMgr), Ranges(Ranges) {}
1161 
~Formatter()1162   virtual ~Formatter() {}
1163 
format()1164   tooling::Replacements format() {
1165     LexerBasedFormatTokenSource Tokens(Lex, SourceMgr);
1166     UnwrappedLineParser Parser(Diag, Style, Tokens, *this);
1167     StructuralError = Parser.parse();
1168     unsigned PreviousEndOfLineColumn = 0;
1169     TokenAnnotator Annotator(Style, SourceMgr, Lex,
1170                              Tokens.getIdentTable().get("in"));
1171     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1172       Annotator.annotate(AnnotatedLines[i]);
1173     }
1174     deriveLocalStyle();
1175     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1176       Annotator.calculateFormattingInformation(AnnotatedLines[i]);
1177 
1178       // Adapt level to the next line if this is a comment.
1179       // FIXME: Can/should this be done in the UnwrappedLineParser?
1180       if (i + 1 != e && AnnotatedLines[i].First.is(tok::comment) &&
1181           AnnotatedLines[i].First.Children.empty() &&
1182           AnnotatedLines[i + 1].First.isNot(tok::r_brace))
1183         AnnotatedLines[i].Level = AnnotatedLines[i + 1].Level;
1184     }
1185     std::vector<int> IndentForLevel;
1186     bool PreviousLineWasTouched = false;
1187     for (std::vector<AnnotatedLine>::iterator I = AnnotatedLines.begin(),
1188                                               E = AnnotatedLines.end();
1189          I != E; ++I) {
1190       const AnnotatedLine &TheLine = *I;
1191       const FormatToken &FirstTok = TheLine.First.FormatTok;
1192       int Offset = getIndentOffset(TheLine.First);
1193       while (IndentForLevel.size() <= TheLine.Level)
1194         IndentForLevel.push_back(-1);
1195       IndentForLevel.resize(TheLine.Level + 1);
1196       bool WasMoved =
1197           PreviousLineWasTouched && FirstTok.NewlinesBefore == 0;
1198       if (TheLine.First.is(tok::eof)) {
1199         if (PreviousLineWasTouched) {
1200           unsigned NewLines = std::min(FirstTok.NewlinesBefore, 1u);
1201           Whitespaces.replaceWhitespace(TheLine.First, NewLines, /*Indent*/ 0,
1202                                         /*WhitespaceStartColumn*/ 0, Style);
1203         }
1204       } else if (TheLine.Type != LT_Invalid &&
1205                  (WasMoved || touchesLine(TheLine))) {
1206         unsigned LevelIndent = getIndent(IndentForLevel, TheLine.Level);
1207         unsigned Indent = LevelIndent;
1208         if (static_cast<int>(Indent) + Offset >= 0)
1209           Indent += Offset;
1210         if (!FirstTok.WhiteSpaceStart.isValid() || StructuralError) {
1211           Indent = LevelIndent = SourceMgr.getSpellingColumnNumber(
1212               FirstTok.Tok.getLocation()) - 1;
1213         } else {
1214           formatFirstToken(TheLine.First, Indent, TheLine.InPPDirective,
1215                            PreviousEndOfLineColumn);
1216         }
1217         tryFitMultipleLinesInOne(Indent, I, E);
1218         UnwrappedLineFormatter Formatter(Style, SourceMgr, TheLine, Indent,
1219                                          TheLine.First, Whitespaces,
1220                                          StructuralError);
1221         PreviousEndOfLineColumn =
1222             Formatter.format(I + 1 != E ? &*(I + 1) : NULL);
1223         IndentForLevel[TheLine.Level] = LevelIndent;
1224         PreviousLineWasTouched = true;
1225       } else {
1226         if (FirstTok.NewlinesBefore > 0 || FirstTok.IsFirst) {
1227           unsigned Indent =
1228               SourceMgr.getSpellingColumnNumber(FirstTok.Tok.getLocation()) - 1;
1229           unsigned LevelIndent = Indent;
1230           if (static_cast<int>(LevelIndent) - Offset >= 0)
1231             LevelIndent -= Offset;
1232           IndentForLevel[TheLine.Level] = LevelIndent;
1233 
1234           // Remove trailing whitespace of the previous line if it was touched.
1235           if (PreviousLineWasTouched || touchesEmptyLineBefore(TheLine))
1236             formatFirstToken(TheLine.First, Indent, TheLine.InPPDirective,
1237                              PreviousEndOfLineColumn);
1238         }
1239         // If we did not reformat this unwrapped line, the column at the end of
1240         // the last token is unchanged - thus, we can calculate the end of the
1241         // last token.
1242         SourceLocation LastLoc = TheLine.Last->FormatTok.Tok.getLocation();
1243         PreviousEndOfLineColumn =
1244             SourceMgr.getSpellingColumnNumber(LastLoc) +
1245             Lex.MeasureTokenLength(LastLoc, SourceMgr, Lex.getLangOpts()) - 1;
1246         PreviousLineWasTouched = false;
1247       }
1248     }
1249     return Whitespaces.generateReplacements();
1250   }
1251 
1252 private:
deriveLocalStyle()1253   void deriveLocalStyle() {
1254     unsigned CountBoundToVariable = 0;
1255     unsigned CountBoundToType = 0;
1256     bool HasCpp03IncompatibleFormat = false;
1257     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1258       if (AnnotatedLines[i].First.Children.empty())
1259         continue;
1260       AnnotatedToken *Tok = &AnnotatedLines[i].First.Children[0];
1261       while (!Tok->Children.empty()) {
1262         if (Tok->Type == TT_PointerOrReference) {
1263           bool SpacesBefore = Tok->FormatTok.WhiteSpaceLength > 0;
1264           bool SpacesAfter = Tok->Children[0].FormatTok.WhiteSpaceLength > 0;
1265           if (SpacesBefore && !SpacesAfter)
1266             ++CountBoundToVariable;
1267           else if (!SpacesBefore && SpacesAfter)
1268             ++CountBoundToType;
1269         }
1270 
1271         if (Tok->Type == TT_TemplateCloser &&
1272             Tok->Parent->Type == TT_TemplateCloser &&
1273             Tok->FormatTok.WhiteSpaceLength == 0)
1274           HasCpp03IncompatibleFormat = true;
1275         Tok = &Tok->Children[0];
1276       }
1277     }
1278     if (Style.DerivePointerBinding) {
1279       if (CountBoundToType > CountBoundToVariable)
1280         Style.PointerBindsToType = true;
1281       else if (CountBoundToType < CountBoundToVariable)
1282         Style.PointerBindsToType = false;
1283     }
1284     if (Style.Standard == FormatStyle::LS_Auto) {
1285       Style.Standard = HasCpp03IncompatibleFormat ? FormatStyle::LS_Cpp11
1286                                                   : FormatStyle::LS_Cpp03;
1287     }
1288   }
1289 
1290   /// \brief Get the indent of \p Level from \p IndentForLevel.
1291   ///
1292   /// \p IndentForLevel must contain the indent for the level \c l
1293   /// at \p IndentForLevel[l], or a value < 0 if the indent for
1294   /// that level is unknown.
getIndent(const std::vector<int> IndentForLevel,unsigned Level)1295   unsigned getIndent(const std::vector<int> IndentForLevel, unsigned Level) {
1296     if (IndentForLevel[Level] != -1)
1297       return IndentForLevel[Level];
1298     if (Level == 0)
1299       return 0;
1300     return getIndent(IndentForLevel, Level - 1) + 2;
1301   }
1302 
1303   /// \brief Get the offset of the line relatively to the level.
1304   ///
1305   /// For example, 'public:' labels in classes are offset by 1 or 2
1306   /// characters to the left from their level.
getIndentOffset(const AnnotatedToken & RootToken)1307   int getIndentOffset(const AnnotatedToken &RootToken) {
1308     bool IsAccessModifier = false;
1309     if (RootToken.isOneOf(tok::kw_public, tok::kw_protected, tok::kw_private))
1310       IsAccessModifier = true;
1311     else if (RootToken.is(tok::at) && !RootToken.Children.empty() &&
1312              (RootToken.Children[0].isObjCAtKeyword(tok::objc_public) ||
1313               RootToken.Children[0].isObjCAtKeyword(tok::objc_protected) ||
1314               RootToken.Children[0].isObjCAtKeyword(tok::objc_package) ||
1315               RootToken.Children[0].isObjCAtKeyword(tok::objc_private)))
1316       IsAccessModifier = true;
1317 
1318     if (IsAccessModifier)
1319       return Style.AccessModifierOffset;
1320     return 0;
1321   }
1322 
1323   /// \brief Tries to merge lines into one.
1324   ///
1325   /// This will change \c Line and \c AnnotatedLine to contain the merged line,
1326   /// if possible; note that \c I will be incremented when lines are merged.
1327   ///
1328   /// Returns whether the resulting \c Line can fit in a single line.
tryFitMultipleLinesInOne(unsigned Indent,std::vector<AnnotatedLine>::iterator & I,std::vector<AnnotatedLine>::iterator E)1329   void tryFitMultipleLinesInOne(unsigned Indent,
1330                                 std::vector<AnnotatedLine>::iterator &I,
1331                                 std::vector<AnnotatedLine>::iterator E) {
1332     // We can never merge stuff if there are trailing line comments.
1333     if (I->Last->Type == TT_LineComment)
1334       return;
1335 
1336     unsigned Limit = Style.ColumnLimit - Indent;
1337     // If we already exceed the column limit, we set 'Limit' to 0. The different
1338     // tryMerge..() functions can then decide whether to still do merging.
1339     Limit = I->Last->TotalLength > Limit ? 0 : Limit - I->Last->TotalLength;
1340 
1341     if (I + 1 == E || (I + 1)->Type == LT_Invalid)
1342       return;
1343 
1344     if (I->Last->is(tok::l_brace)) {
1345       tryMergeSimpleBlock(I, E, Limit);
1346     } else if (I->First.is(tok::kw_if)) {
1347       tryMergeSimpleIf(I, E, Limit);
1348     } else if (I->InPPDirective && (I->First.FormatTok.HasUnescapedNewline ||
1349                                     I->First.FormatTok.IsFirst)) {
1350       tryMergeSimplePPDirective(I, E, Limit);
1351     }
1352     return;
1353   }
1354 
tryMergeSimplePPDirective(std::vector<AnnotatedLine>::iterator & I,std::vector<AnnotatedLine>::iterator E,unsigned Limit)1355   void tryMergeSimplePPDirective(std::vector<AnnotatedLine>::iterator &I,
1356                                  std::vector<AnnotatedLine>::iterator E,
1357                                  unsigned Limit) {
1358     if (Limit == 0)
1359       return;
1360     AnnotatedLine &Line = *I;
1361     if (!(I + 1)->InPPDirective || (I + 1)->First.FormatTok.HasUnescapedNewline)
1362       return;
1363     if (I + 2 != E && (I + 2)->InPPDirective &&
1364         !(I + 2)->First.FormatTok.HasUnescapedNewline)
1365       return;
1366     if (1 + (I + 1)->Last->TotalLength > Limit)
1367       return;
1368     join(Line, *(++I));
1369   }
1370 
tryMergeSimpleIf(std::vector<AnnotatedLine>::iterator & I,std::vector<AnnotatedLine>::iterator E,unsigned Limit)1371   void tryMergeSimpleIf(std::vector<AnnotatedLine>::iterator &I,
1372                         std::vector<AnnotatedLine>::iterator E,
1373                         unsigned Limit) {
1374     if (Limit == 0)
1375       return;
1376     if (!Style.AllowShortIfStatementsOnASingleLine)
1377       return;
1378     if ((I + 1)->InPPDirective != I->InPPDirective ||
1379         ((I + 1)->InPPDirective &&
1380          (I + 1)->First.FormatTok.HasUnescapedNewline))
1381       return;
1382     AnnotatedLine &Line = *I;
1383     if (Line.Last->isNot(tok::r_paren))
1384       return;
1385     if (1 + (I + 1)->Last->TotalLength > Limit)
1386       return;
1387     if ((I + 1)->First.is(tok::kw_if) || (I + 1)->First.Type == TT_LineComment)
1388       return;
1389     // Only inline simple if's (no nested if or else).
1390     if (I + 2 != E && (I + 2)->First.is(tok::kw_else))
1391       return;
1392     join(Line, *(++I));
1393   }
1394 
tryMergeSimpleBlock(std::vector<AnnotatedLine>::iterator & I,std::vector<AnnotatedLine>::iterator E,unsigned Limit)1395   void tryMergeSimpleBlock(std::vector<AnnotatedLine>::iterator &I,
1396                            std::vector<AnnotatedLine>::iterator E,
1397                            unsigned Limit) {
1398     // First, check that the current line allows merging. This is the case if
1399     // we're not in a control flow statement and the last token is an opening
1400     // brace.
1401     AnnotatedLine &Line = *I;
1402     if (Line.First.isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::r_brace,
1403                            tok::kw_else, tok::kw_try, tok::kw_catch,
1404                            tok::kw_for,
1405                            // This gets rid of all ObjC @ keywords and methods.
1406                            tok::at, tok::minus, tok::plus))
1407       return;
1408 
1409     AnnotatedToken *Tok = &(I + 1)->First;
1410     if (Tok->Children.empty() && Tok->is(tok::r_brace) &&
1411         !Tok->MustBreakBefore) {
1412       // We merge empty blocks even if the line exceeds the column limit.
1413       Tok->SpacesRequiredBefore = 0;
1414       Tok->CanBreakBefore = true;
1415       join(Line, *(I + 1));
1416       I += 1;
1417     } else if (Limit != 0) {
1418       // Check that we still have three lines and they fit into the limit.
1419       if (I + 2 == E || (I + 2)->Type == LT_Invalid ||
1420           !nextTwoLinesFitInto(I, Limit))
1421         return;
1422 
1423       // Second, check that the next line does not contain any braces - if it
1424       // does, readability declines when putting it into a single line.
1425       if ((I + 1)->Last->Type == TT_LineComment || Tok->MustBreakBefore)
1426         return;
1427       do {
1428         if (Tok->isOneOf(tok::l_brace, tok::r_brace))
1429           return;
1430         Tok = Tok->Children.empty() ? NULL : &Tok->Children.back();
1431       } while (Tok != NULL);
1432 
1433       // Last, check that the third line contains a single closing brace.
1434       Tok = &(I + 2)->First;
1435       if (!Tok->Children.empty() || Tok->isNot(tok::r_brace) ||
1436           Tok->MustBreakBefore)
1437         return;
1438 
1439       join(Line, *(I + 1));
1440       join(Line, *(I + 2));
1441       I += 2;
1442     }
1443   }
1444 
nextTwoLinesFitInto(std::vector<AnnotatedLine>::iterator I,unsigned Limit)1445   bool nextTwoLinesFitInto(std::vector<AnnotatedLine>::iterator I,
1446                            unsigned Limit) {
1447     return 1 + (I + 1)->Last->TotalLength + 1 + (I + 2)->Last->TotalLength <=
1448            Limit;
1449   }
1450 
join(AnnotatedLine & A,const AnnotatedLine & B)1451   void join(AnnotatedLine &A, const AnnotatedLine &B) {
1452     unsigned LengthA = A.Last->TotalLength + B.First.SpacesRequiredBefore;
1453     A.Last->Children.push_back(B.First);
1454     while (!A.Last->Children.empty()) {
1455       A.Last->Children[0].Parent = A.Last;
1456       A.Last->Children[0].TotalLength += LengthA;
1457       A.Last = &A.Last->Children[0];
1458     }
1459   }
1460 
touchesRanges(const CharSourceRange & Range)1461   bool touchesRanges(const CharSourceRange &Range) {
1462     for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
1463       if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(),
1464                                                Ranges[i].getBegin()) &&
1465           !SourceMgr.isBeforeInTranslationUnit(Ranges[i].getEnd(),
1466                                                Range.getBegin()))
1467         return true;
1468     }
1469     return false;
1470   }
1471 
touchesLine(const AnnotatedLine & TheLine)1472   bool touchesLine(const AnnotatedLine &TheLine) {
1473     const FormatToken *First = &TheLine.First.FormatTok;
1474     const FormatToken *Last = &TheLine.Last->FormatTok;
1475     CharSourceRange LineRange = CharSourceRange::getTokenRange(
1476         First->WhiteSpaceStart.getLocWithOffset(First->LastNewlineOffset),
1477         Last->Tok.getLocation());
1478     return touchesRanges(LineRange);
1479   }
1480 
touchesEmptyLineBefore(const AnnotatedLine & TheLine)1481   bool touchesEmptyLineBefore(const AnnotatedLine &TheLine) {
1482     const FormatToken *First = &TheLine.First.FormatTok;
1483     CharSourceRange LineRange = CharSourceRange::getCharRange(
1484         First->WhiteSpaceStart,
1485         First->WhiteSpaceStart.getLocWithOffset(First->LastNewlineOffset));
1486     return touchesRanges(LineRange);
1487   }
1488 
consumeUnwrappedLine(const UnwrappedLine & TheLine)1489   virtual void consumeUnwrappedLine(const UnwrappedLine &TheLine) {
1490     AnnotatedLines.push_back(AnnotatedLine(TheLine));
1491   }
1492 
1493   /// \brief Add a new line and the required indent before the first Token
1494   /// of the \c UnwrappedLine if there was no structural parsing error.
1495   /// Returns the indent level of the \c UnwrappedLine.
formatFirstToken(const AnnotatedToken & RootToken,unsigned Indent,bool InPPDirective,unsigned PreviousEndOfLineColumn)1496   void formatFirstToken(const AnnotatedToken &RootToken, unsigned Indent,
1497                         bool InPPDirective, unsigned PreviousEndOfLineColumn) {
1498     const FormatToken &Tok = RootToken.FormatTok;
1499 
1500     unsigned Newlines =
1501         std::min(Tok.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1);
1502     if (Newlines == 0 && !Tok.IsFirst)
1503       Newlines = 1;
1504 
1505     if (!InPPDirective || Tok.HasUnescapedNewline) {
1506       Whitespaces.replaceWhitespace(RootToken, Newlines, Indent, 0, Style);
1507     } else {
1508       Whitespaces.replacePPWhitespace(RootToken, Newlines, Indent,
1509                                       PreviousEndOfLineColumn, Style);
1510     }
1511   }
1512 
1513   DiagnosticsEngine &Diag;
1514   FormatStyle Style;
1515   Lexer &Lex;
1516   SourceManager &SourceMgr;
1517   WhitespaceManager Whitespaces;
1518   std::vector<CharSourceRange> Ranges;
1519   std::vector<AnnotatedLine> AnnotatedLines;
1520   bool StructuralError;
1521 };
1522 
1523 tooling::Replacements
reformat(const FormatStyle & Style,Lexer & Lex,SourceManager & SourceMgr,std::vector<CharSourceRange> Ranges,DiagnosticConsumer * DiagClient)1524 reformat(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr,
1525          std::vector<CharSourceRange> Ranges, DiagnosticConsumer *DiagClient) {
1526   IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions();
1527   OwningPtr<DiagnosticConsumer> DiagPrinter;
1528   if (DiagClient == 0) {
1529     DiagPrinter.reset(new TextDiagnosticPrinter(llvm::errs(), &*DiagOpts));
1530     DiagPrinter->BeginSourceFile(Lex.getLangOpts(), Lex.getPP());
1531     DiagClient = DiagPrinter.get();
1532   }
1533   DiagnosticsEngine Diagnostics(
1534       IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs()), &*DiagOpts,
1535       DiagClient, false);
1536   Diagnostics.setSourceManager(&SourceMgr);
1537   Formatter formatter(Diagnostics, Style, Lex, SourceMgr, Ranges);
1538   return formatter.format();
1539 }
1540 
getFormattingLangOpts()1541 LangOptions getFormattingLangOpts() {
1542   LangOptions LangOpts;
1543   LangOpts.CPlusPlus = 1;
1544   LangOpts.CPlusPlus11 = 1;
1545   LangOpts.Bool = 1;
1546   LangOpts.ObjC1 = 1;
1547   LangOpts.ObjC2 = 1;
1548   return LangOpts;
1549 }
1550 
1551 } // namespace format
1552 } // namespace clang
1553