1 //===--- Format.cpp - Format C++ code -------------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements functions declared in Format.h. This will be
12 /// split into separate files as we go.
13 ///
14 //===----------------------------------------------------------------------===//
15
16 #define DEBUG_TYPE "format-formatter"
17
18 #include "TokenAnnotator.h"
19 #include "UnwrappedLineParser.h"
20 #include "clang/Basic/Diagnostic.h"
21 #include "clang/Basic/OperatorPrecedence.h"
22 #include "clang/Basic/SourceManager.h"
23 #include "clang/Format/Format.h"
24 #include "clang/Frontend/TextDiagnosticPrinter.h"
25 #include "clang/Lex/Lexer.h"
26 #include "llvm/Support/Allocator.h"
27 #include "llvm/Support/Debug.h"
28 #include <queue>
29 #include <string>
30
31 namespace clang {
32 namespace format {
33
getLLVMStyle()34 FormatStyle getLLVMStyle() {
35 FormatStyle LLVMStyle;
36 LLVMStyle.ColumnLimit = 80;
37 LLVMStyle.MaxEmptyLinesToKeep = 1;
38 LLVMStyle.PointerBindsToType = false;
39 LLVMStyle.DerivePointerBinding = false;
40 LLVMStyle.AccessModifierOffset = -2;
41 LLVMStyle.Standard = FormatStyle::LS_Cpp03;
42 LLVMStyle.IndentCaseLabels = false;
43 LLVMStyle.SpacesBeforeTrailingComments = 1;
44 LLVMStyle.BinPackParameters = true;
45 LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true;
46 LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false;
47 LLVMStyle.AllowShortIfStatementsOnASingleLine = false;
48 LLVMStyle.ObjCSpaceBeforeProtocolList = true;
49 LLVMStyle.PenaltyExcessCharacter = 1000000;
50 LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 5;
51 return LLVMStyle;
52 }
53
getGoogleStyle()54 FormatStyle getGoogleStyle() {
55 FormatStyle GoogleStyle;
56 GoogleStyle.ColumnLimit = 80;
57 GoogleStyle.MaxEmptyLinesToKeep = 1;
58 GoogleStyle.PointerBindsToType = true;
59 GoogleStyle.DerivePointerBinding = true;
60 GoogleStyle.AccessModifierOffset = -1;
61 GoogleStyle.Standard = FormatStyle::LS_Auto;
62 GoogleStyle.IndentCaseLabels = true;
63 GoogleStyle.SpacesBeforeTrailingComments = 2;
64 GoogleStyle.BinPackParameters = true;
65 GoogleStyle.AllowAllParametersOfDeclarationOnNextLine = true;
66 GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
67 GoogleStyle.AllowShortIfStatementsOnASingleLine = false;
68 GoogleStyle.ObjCSpaceBeforeProtocolList = false;
69 GoogleStyle.PenaltyExcessCharacter = 1000000;
70 GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 100;
71 return GoogleStyle;
72 }
73
getChromiumStyle()74 FormatStyle getChromiumStyle() {
75 FormatStyle ChromiumStyle = getGoogleStyle();
76 ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false;
77 ChromiumStyle.BinPackParameters = false;
78 ChromiumStyle.Standard = FormatStyle::LS_Cpp03;
79 ChromiumStyle.DerivePointerBinding = false;
80 return ChromiumStyle;
81 }
82
isTrailingComment(const AnnotatedToken & Tok)83 static bool isTrailingComment(const AnnotatedToken &Tok) {
84 return Tok.is(tok::comment) &&
85 (Tok.Children.empty() || Tok.Children[0].MustBreakBefore);
86 }
87
88 // Returns the length of everything up to the first possible line break after
89 // the ), ], } or > matching \c Tok.
getLengthToMatchingParen(const AnnotatedToken & Tok)90 static unsigned getLengthToMatchingParen(const AnnotatedToken &Tok) {
91 if (Tok.MatchingParen == NULL)
92 return 0;
93 AnnotatedToken *End = Tok.MatchingParen;
94 while (!End->Children.empty() && !End->Children[0].CanBreakBefore) {
95 End = &End->Children[0];
96 }
97 return End->TotalLength - Tok.TotalLength + 1;
98 }
99
100 /// \brief Manages the whitespaces around tokens and their replacements.
101 ///
102 /// This includes special handling for certain constructs, e.g. the alignment of
103 /// trailing line comments.
104 class WhitespaceManager {
105 public:
WhitespaceManager(SourceManager & SourceMgr)106 WhitespaceManager(SourceManager &SourceMgr) : SourceMgr(SourceMgr) {}
107
108 /// \brief Replaces the whitespace in front of \p Tok. Only call once for
109 /// each \c AnnotatedToken.
replaceWhitespace(const AnnotatedToken & Tok,unsigned NewLines,unsigned Spaces,unsigned WhitespaceStartColumn,const FormatStyle & Style)110 void replaceWhitespace(const AnnotatedToken &Tok, unsigned NewLines,
111 unsigned Spaces, unsigned WhitespaceStartColumn,
112 const FormatStyle &Style) {
113 // 2+ newlines mean an empty line separating logic scopes.
114 if (NewLines >= 2)
115 alignComments();
116
117 // Align line comments if they are trailing or if they continue other
118 // trailing comments.
119 if (isTrailingComment(Tok)) {
120 // Remove the comment's trailing whitespace.
121 if (Tok.FormatTok.Tok.getLength() != Tok.FormatTok.TokenLength)
122 Replaces.insert(tooling::Replacement(
123 SourceMgr, Tok.FormatTok.Tok.getLocation().getLocWithOffset(
124 Tok.FormatTok.TokenLength),
125 Tok.FormatTok.Tok.getLength() - Tok.FormatTok.TokenLength, ""));
126
127 // Align comment with other comments.
128 if (Tok.Parent != NULL || !Comments.empty()) {
129 if (Style.ColumnLimit >=
130 Spaces + WhitespaceStartColumn + Tok.FormatTok.TokenLength) {
131 StoredComment Comment;
132 Comment.Tok = Tok.FormatTok;
133 Comment.Spaces = Spaces;
134 Comment.NewLines = NewLines;
135 Comment.MinColumn =
136 NewLines > 0 ? Spaces : WhitespaceStartColumn + Spaces;
137 Comment.MaxColumn = Style.ColumnLimit - Tok.FormatTok.TokenLength;
138 Comments.push_back(Comment);
139 return;
140 }
141 }
142 }
143
144 // If this line does not have a trailing comment, align the stored comments.
145 if (Tok.Children.empty() && !isTrailingComment(Tok))
146 alignComments();
147
148 if (Tok.Type == TT_BlockComment)
149 indentBlockComment(Tok.FormatTok, Spaces);
150
151 storeReplacement(Tok.FormatTok, getNewLineText(NewLines, Spaces));
152 }
153
154 /// \brief Like \c replaceWhitespace, but additionally adds right-aligned
155 /// backslashes to escape newlines inside a preprocessor directive.
156 ///
157 /// This function and \c replaceWhitespace have the same behavior if
158 /// \c Newlines == 0.
replacePPWhitespace(const AnnotatedToken & Tok,unsigned NewLines,unsigned Spaces,unsigned WhitespaceStartColumn,const FormatStyle & Style)159 void replacePPWhitespace(const AnnotatedToken &Tok, unsigned NewLines,
160 unsigned Spaces, unsigned WhitespaceStartColumn,
161 const FormatStyle &Style) {
162 storeReplacement(
163 Tok.FormatTok,
164 getNewLineText(NewLines, Spaces, WhitespaceStartColumn, Style));
165 }
166
167 /// \brief Inserts a line break into the middle of a token.
168 ///
169 /// Will break at \p Offset inside \p Tok, putting \p Prefix before the line
170 /// break and \p Postfix before the rest of the token starts in the next line.
171 ///
172 /// \p InPPDirective, \p Spaces, \p WhitespaceStartColumn and \p Style are
173 /// used to generate the correct line break.
breakToken(const AnnotatedToken & Tok,unsigned Offset,StringRef Prefix,StringRef Postfix,bool InPPDirective,unsigned Spaces,unsigned WhitespaceStartColumn,const FormatStyle & Style)174 void breakToken(const AnnotatedToken &Tok, unsigned Offset, StringRef Prefix,
175 StringRef Postfix, bool InPPDirective, unsigned Spaces,
176 unsigned WhitespaceStartColumn, const FormatStyle &Style) {
177 std::string NewLineText;
178 if (!InPPDirective)
179 NewLineText = getNewLineText(1, Spaces);
180 else
181 NewLineText = getNewLineText(1, Spaces, WhitespaceStartColumn, Style);
182 std::string ReplacementText = (Prefix + NewLineText + Postfix).str();
183 SourceLocation InsertAt = Tok.FormatTok.WhiteSpaceStart
184 .getLocWithOffset(Tok.FormatTok.WhiteSpaceLength + Offset);
185 Replaces.insert(
186 tooling::Replacement(SourceMgr, InsertAt, 0, ReplacementText));
187 }
188
189 /// \brief Returns all the \c Replacements created during formatting.
generateReplacements()190 const tooling::Replacements &generateReplacements() {
191 alignComments();
192 return Replaces;
193 }
194
195 private:
indentBlockComment(const FormatToken & Tok,int Indent)196 void indentBlockComment(const FormatToken &Tok, int Indent) {
197 SourceLocation TokenLoc = Tok.Tok.getLocation();
198 int IndentDelta = Indent - SourceMgr.getSpellingColumnNumber(TokenLoc) + 1;
199 const char *Start = SourceMgr.getCharacterData(TokenLoc);
200 const char *Current = Start;
201 const char *TokEnd = Current + Tok.TokenLength;
202 llvm::SmallVector<SourceLocation, 16> LineStarts;
203 while (Current < TokEnd) {
204 if (*Current == '\n') {
205 ++Current;
206 LineStarts.push_back(TokenLoc.getLocWithOffset(Current - Start));
207 // If we need to outdent the line, check that it's indented enough.
208 for (int i = 0; i < -IndentDelta; ++i, ++Current)
209 if (Current >= TokEnd || *Current != ' ')
210 return;
211 } else {
212 ++Current;
213 }
214 }
215
216 for (size_t i = 0; i < LineStarts.size(); ++i) {
217 if (IndentDelta > 0)
218 Replaces.insert(tooling::Replacement(SourceMgr, LineStarts[i], 0,
219 std::string(IndentDelta, ' ')));
220 else if (IndentDelta < 0)
221 Replaces.insert(
222 tooling::Replacement(SourceMgr, LineStarts[i], -IndentDelta, ""));
223 }
224 }
225
getNewLineText(unsigned NewLines,unsigned Spaces)226 std::string getNewLineText(unsigned NewLines, unsigned Spaces) {
227 return std::string(NewLines, '\n') + std::string(Spaces, ' ');
228 }
229
230 std::string
getNewLineText(unsigned NewLines,unsigned Spaces,unsigned WhitespaceStartColumn,const FormatStyle & Style)231 getNewLineText(unsigned NewLines, unsigned Spaces,
232 unsigned WhitespaceStartColumn, const FormatStyle &Style) {
233 std::string NewLineText;
234 if (NewLines > 0) {
235 unsigned Offset =
236 std::min<int>(Style.ColumnLimit - 1, WhitespaceStartColumn);
237 for (unsigned i = 0; i < NewLines; ++i) {
238 NewLineText += std::string(Style.ColumnLimit - Offset - 1, ' ');
239 NewLineText += "\\\n";
240 Offset = 0;
241 }
242 }
243 return NewLineText + std::string(Spaces, ' ');
244 }
245
246 /// \brief Structure to store a comment for later layout and alignment.
247 struct StoredComment {
248 FormatToken Tok;
249 unsigned MinColumn;
250 unsigned MaxColumn;
251 unsigned NewLines;
252 unsigned Spaces;
253 };
254 SmallVector<StoredComment, 16> Comments;
255 typedef SmallVector<StoredComment, 16>::iterator comment_iterator;
256
257 /// \brief Try to align all stashed comments.
alignComments()258 void alignComments() {
259 unsigned MinColumn = 0;
260 unsigned MaxColumn = UINT_MAX;
261 comment_iterator Start = Comments.begin();
262 for (comment_iterator I = Start, E = Comments.end(); I != E; ++I) {
263 if (I->MinColumn > MaxColumn || I->MaxColumn < MinColumn) {
264 alignComments(Start, I, MinColumn);
265 MinColumn = I->MinColumn;
266 MaxColumn = I->MaxColumn;
267 Start = I;
268 } else {
269 MinColumn = std::max(MinColumn, I->MinColumn);
270 MaxColumn = std::min(MaxColumn, I->MaxColumn);
271 }
272 }
273 alignComments(Start, Comments.end(), MinColumn);
274 Comments.clear();
275 }
276
277 /// \brief Put all the comments between \p I and \p E into \p Column.
alignComments(comment_iterator I,comment_iterator E,unsigned Column)278 void alignComments(comment_iterator I, comment_iterator E, unsigned Column) {
279 while (I != E) {
280 unsigned Spaces = I->Spaces + Column - I->MinColumn;
281 storeReplacement(I->Tok, std::string(I->NewLines, '\n') +
282 std::string(Spaces, ' '));
283 ++I;
284 }
285 }
286
287 /// \brief Stores \p Text as the replacement for the whitespace in front of
288 /// \p Tok.
storeReplacement(const FormatToken & Tok,const std::string Text)289 void storeReplacement(const FormatToken &Tok, const std::string Text) {
290 // Don't create a replacement, if it does not change anything.
291 if (StringRef(SourceMgr.getCharacterData(Tok.WhiteSpaceStart),
292 Tok.WhiteSpaceLength) == Text)
293 return;
294
295 Replaces.insert(tooling::Replacement(SourceMgr, Tok.WhiteSpaceStart,
296 Tok.WhiteSpaceLength, Text));
297 }
298
299 SourceManager &SourceMgr;
300 tooling::Replacements Replaces;
301 };
302
303 class UnwrappedLineFormatter {
304 public:
UnwrappedLineFormatter(const FormatStyle & Style,SourceManager & SourceMgr,const AnnotatedLine & Line,unsigned FirstIndent,const AnnotatedToken & RootToken,WhitespaceManager & Whitespaces,bool StructuralError)305 UnwrappedLineFormatter(const FormatStyle &Style, SourceManager &SourceMgr,
306 const AnnotatedLine &Line, unsigned FirstIndent,
307 const AnnotatedToken &RootToken,
308 WhitespaceManager &Whitespaces, bool StructuralError)
309 : Style(Style), SourceMgr(SourceMgr), Line(Line),
310 FirstIndent(FirstIndent), RootToken(RootToken),
311 Whitespaces(Whitespaces), Count(0) {}
312
313 /// \brief Formats an \c UnwrappedLine.
314 ///
315 /// \returns The column after the last token in the last line of the
316 /// \c UnwrappedLine.
format(const AnnotatedLine * NextLine)317 unsigned format(const AnnotatedLine *NextLine) {
318 // Initialize state dependent on indent.
319 LineState State;
320 State.Column = FirstIndent;
321 State.NextToken = &RootToken;
322 State.Stack.push_back(
323 ParenState(FirstIndent + 4, FirstIndent, !Style.BinPackParameters,
324 /*HasMultiParameterLine=*/ false));
325 State.VariablePos = 0;
326 State.LineContainsContinuedForLoopSection = false;
327 State.ParenLevel = 0;
328 State.StartOfStringLiteral = 0;
329 State.StartOfLineLevel = State.ParenLevel;
330
331 DEBUG({
332 DebugTokenState(*State.NextToken);
333 });
334
335 // The first token has already been indented and thus consumed.
336 moveStateToNextToken(State, /*DryRun=*/ false);
337
338 // If everything fits on a single line, just put it there.
339 unsigned ColumnLimit = Style.ColumnLimit;
340 if (NextLine && NextLine->InPPDirective &&
341 !NextLine->First.FormatTok.HasUnescapedNewline)
342 ColumnLimit = getColumnLimit();
343 if (Line.Last->TotalLength <= ColumnLimit - FirstIndent) {
344 while (State.NextToken != NULL) {
345 addTokenToState(false, false, State);
346 }
347 return State.Column;
348 }
349
350 // If the ObjC method declaration does not fit on a line, we should format
351 // it with one arg per line.
352 if (Line.Type == LT_ObjCMethodDecl)
353 State.Stack.back().BreakBeforeParameter = true;
354
355 // Find best solution in solution space.
356 return analyzeSolutionSpace(State);
357 }
358
359 private:
DebugTokenState(const AnnotatedToken & AnnotatedTok)360 void DebugTokenState(const AnnotatedToken &AnnotatedTok) {
361 const Token &Tok = AnnotatedTok.FormatTok.Tok;
362 llvm::errs() << StringRef(SourceMgr.getCharacterData(Tok.getLocation()),
363 Tok.getLength());
364 llvm::errs();
365 }
366
367 struct ParenState {
ParenStateclang::format::UnwrappedLineFormatter::ParenState368 ParenState(unsigned Indent, unsigned LastSpace, bool AvoidBinPacking,
369 bool HasMultiParameterLine)
370 : Indent(Indent), LastSpace(LastSpace), FirstLessLess(0),
371 BreakBeforeClosingBrace(false), QuestionColumn(0),
372 AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
373 HasMultiParameterLine(HasMultiParameterLine), ColonPos(0),
374 StartOfFunctionCall(0) {}
375
376 /// \brief The position to which a specific parenthesis level needs to be
377 /// indented.
378 unsigned Indent;
379
380 /// \brief The position of the last space on each level.
381 ///
382 /// Used e.g. to break like:
383 /// functionCall(Parameter, otherCall(
384 /// OtherParameter));
385 unsigned LastSpace;
386
387 /// \brief The position the first "<<" operator encountered on each level.
388 ///
389 /// Used to align "<<" operators. 0 if no such operator has been encountered
390 /// on a level.
391 unsigned FirstLessLess;
392
393 /// \brief Whether a newline needs to be inserted before the block's closing
394 /// brace.
395 ///
396 /// We only want to insert a newline before the closing brace if there also
397 /// was a newline after the beginning left brace.
398 bool BreakBeforeClosingBrace;
399
400 /// \brief The column of a \c ? in a conditional expression;
401 unsigned QuestionColumn;
402
403 /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple
404 /// lines, in this context.
405 bool AvoidBinPacking;
406
407 /// \brief Break after the next comma (or all the commas in this context if
408 /// \c AvoidBinPacking is \c true).
409 bool BreakBeforeParameter;
410
411 /// \brief This context already has a line with more than one parameter.
412 bool HasMultiParameterLine;
413
414 /// \brief The position of the colon in an ObjC method declaration/call.
415 unsigned ColonPos;
416
417 /// \brief The start of the most recent function in a builder-type call.
418 unsigned StartOfFunctionCall;
419
operator <clang::format::UnwrappedLineFormatter::ParenState420 bool operator<(const ParenState &Other) const {
421 if (Indent != Other.Indent)
422 return Indent < Other.Indent;
423 if (LastSpace != Other.LastSpace)
424 return LastSpace < Other.LastSpace;
425 if (FirstLessLess != Other.FirstLessLess)
426 return FirstLessLess < Other.FirstLessLess;
427 if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
428 return BreakBeforeClosingBrace;
429 if (QuestionColumn != Other.QuestionColumn)
430 return QuestionColumn < Other.QuestionColumn;
431 if (AvoidBinPacking != Other.AvoidBinPacking)
432 return AvoidBinPacking;
433 if (BreakBeforeParameter != Other.BreakBeforeParameter)
434 return BreakBeforeParameter;
435 if (HasMultiParameterLine != Other.HasMultiParameterLine)
436 return HasMultiParameterLine;
437 if (ColonPos != Other.ColonPos)
438 return ColonPos < Other.ColonPos;
439 if (StartOfFunctionCall != Other.StartOfFunctionCall)
440 return StartOfFunctionCall < Other.StartOfFunctionCall;
441 return false;
442 }
443 };
444
445 /// \brief The current state when indenting a unwrapped line.
446 ///
447 /// As the indenting tries different combinations this is copied by value.
448 struct LineState {
449 /// \brief The number of used columns in the current line.
450 unsigned Column;
451
452 /// \brief The token that needs to be next formatted.
453 const AnnotatedToken *NextToken;
454
455 /// \brief The column of the first variable name in a variable declaration.
456 ///
457 /// Used to align further variables if necessary.
458 unsigned VariablePos;
459
460 /// \brief \c true if this line contains a continued for-loop section.
461 bool LineContainsContinuedForLoopSection;
462
463 /// \brief The level of nesting inside (), [], <> and {}.
464 unsigned ParenLevel;
465
466 /// \brief The \c ParenLevel at the start of this line.
467 unsigned StartOfLineLevel;
468
469 /// \brief The start column of the string literal, if we're in a string
470 /// literal sequence, 0 otherwise.
471 unsigned StartOfStringLiteral;
472
473 /// \brief A stack keeping track of properties applying to parenthesis
474 /// levels.
475 std::vector<ParenState> Stack;
476
477 /// \brief Comparison operator to be able to used \c LineState in \c map.
operator <clang::format::UnwrappedLineFormatter::LineState478 bool operator<(const LineState &Other) const {
479 if (NextToken != Other.NextToken)
480 return NextToken < Other.NextToken;
481 if (Column != Other.Column)
482 return Column < Other.Column;
483 if (VariablePos != Other.VariablePos)
484 return VariablePos < Other.VariablePos;
485 if (LineContainsContinuedForLoopSection !=
486 Other.LineContainsContinuedForLoopSection)
487 return LineContainsContinuedForLoopSection;
488 if (ParenLevel != Other.ParenLevel)
489 return ParenLevel < Other.ParenLevel;
490 if (StartOfLineLevel != Other.StartOfLineLevel)
491 return StartOfLineLevel < Other.StartOfLineLevel;
492 if (StartOfStringLiteral != Other.StartOfStringLiteral)
493 return StartOfStringLiteral < Other.StartOfStringLiteral;
494 return Stack < Other.Stack;
495 }
496 };
497
498 /// \brief Appends the next token to \p State and updates information
499 /// necessary for indentation.
500 ///
501 /// Puts the token on the current line if \p Newline is \c true and adds a
502 /// line break and necessary indentation otherwise.
503 ///
504 /// If \p DryRun is \c false, also creates and stores the required
505 /// \c Replacement.
addTokenToState(bool Newline,bool DryRun,LineState & State)506 unsigned addTokenToState(bool Newline, bool DryRun, LineState &State) {
507 const AnnotatedToken &Current = *State.NextToken;
508 const AnnotatedToken &Previous = *State.NextToken->Parent;
509 assert(State.Stack.size());
510
511 if (Current.Type == TT_ImplicitStringLiteral) {
512 State.Column += State.NextToken->FormatTok.WhiteSpaceLength +
513 State.NextToken->FormatTok.TokenLength;
514 if (State.NextToken->Children.empty())
515 State.NextToken = NULL;
516 else
517 State.NextToken = &State.NextToken->Children[0];
518 return 0;
519 }
520
521 if (Newline) {
522 unsigned WhitespaceStartColumn = State.Column;
523 if (Current.is(tok::r_brace)) {
524 State.Column = Line.Level * 2;
525 } else if (Current.is(tok::string_literal) &&
526 State.StartOfStringLiteral != 0) {
527 State.Column = State.StartOfStringLiteral;
528 State.Stack.back().BreakBeforeParameter = true;
529 } else if (Current.is(tok::lessless) &&
530 State.Stack.back().FirstLessLess != 0) {
531 State.Column = State.Stack.back().FirstLessLess;
532 } else if (State.ParenLevel != 0 &&
533 (Previous.isOneOf(tok::equal, tok::coloncolon) ||
534 Current.isOneOf(tok::period, tok::arrow, tok::question))) {
535 // Indent and extra 4 spaces after if we know the current expression is
536 // continued. Don't do that on the top level, as we already indent 4
537 // there.
538 State.Column = std::max(State.Stack.back().LastSpace,
539 State.Stack.back().Indent) + 4;
540 } else if (Current.Type == TT_ConditionalExpr) {
541 State.Column = State.Stack.back().QuestionColumn;
542 } else if (Previous.is(tok::comma) && State.VariablePos != 0 &&
543 ((RootToken.is(tok::kw_for) && State.ParenLevel == 1) ||
544 State.ParenLevel == 0)) {
545 State.Column = State.VariablePos;
546 } else if (Previous.ClosesTemplateDeclaration ||
547 (Current.Type == TT_StartOfName && State.ParenLevel == 0)) {
548 State.Column = State.Stack.back().Indent - 4;
549 } else if (Current.Type == TT_ObjCSelectorName) {
550 if (State.Stack.back().ColonPos > Current.FormatTok.TokenLength) {
551 State.Column =
552 State.Stack.back().ColonPos - Current.FormatTok.TokenLength;
553 } else {
554 State.Column = State.Stack.back().Indent;
555 State.Stack.back().ColonPos =
556 State.Column + Current.FormatTok.TokenLength;
557 }
558 } else if (Previous.Type == TT_ObjCMethodExpr ||
559 Current.Type == TT_StartOfName) {
560 State.Column = State.Stack.back().Indent + 4;
561 } else {
562 State.Column = State.Stack.back().Indent;
563 }
564
565 if (Current.is(tok::question))
566 State.Stack.back().BreakBeforeParameter = true;
567 if (Previous.isOneOf(tok::comma, tok::semi) &&
568 !State.Stack.back().AvoidBinPacking)
569 State.Stack.back().BreakBeforeParameter = false;
570
571 if (!DryRun) {
572 unsigned NewLines = 1;
573 if (Current.Type == TT_LineComment)
574 NewLines =
575 std::max(NewLines, std::min(Current.FormatTok.NewlinesBefore,
576 Style.MaxEmptyLinesToKeep + 1));
577 if (!Line.InPPDirective)
578 Whitespaces.replaceWhitespace(Current, NewLines, State.Column,
579 WhitespaceStartColumn, Style);
580 else
581 Whitespaces.replacePPWhitespace(Current, NewLines, State.Column,
582 WhitespaceStartColumn, Style);
583 }
584
585 State.Stack.back().LastSpace = State.Column;
586 State.StartOfLineLevel = State.ParenLevel;
587
588 // Any break on this level means that the parent level has been broken
589 // and we need to avoid bin packing there.
590 for (unsigned i = 0, e = State.Stack.size() - 1; i != e; ++i) {
591 State.Stack[i].BreakBeforeParameter = true;
592 }
593 if (Current.isOneOf(tok::period, tok::arrow))
594 State.Stack.back().BreakBeforeParameter = true;
595
596 // If we break after {, we should also break before the corresponding }.
597 if (Previous.is(tok::l_brace))
598 State.Stack.back().BreakBeforeClosingBrace = true;
599
600 if (State.Stack.back().AvoidBinPacking) {
601 // If we are breaking after '(', '{', '<', this is not bin packing
602 // unless AllowAllParametersOfDeclarationOnNextLine is false.
603 if ((Previous.isNot(tok::l_paren) && Previous.isNot(tok::l_brace)) ||
604 (!Style.AllowAllParametersOfDeclarationOnNextLine &&
605 Line.MustBeDeclaration))
606 State.Stack.back().BreakBeforeParameter = true;
607 }
608 } else {
609 // FIXME: Put VariablePos into ParenState and remove second part of if().
610 if (Current.is(tok::equal) &&
611 (RootToken.is(tok::kw_for) || State.ParenLevel == 0))
612 State.VariablePos = State.Column - Previous.FormatTok.TokenLength;
613
614 unsigned Spaces = State.NextToken->SpacesRequiredBefore;
615
616 if (!DryRun)
617 Whitespaces.replaceWhitespace(Current, 0, Spaces, State.Column, Style);
618
619 if (Current.Type == TT_ObjCSelectorName &&
620 State.Stack.back().ColonPos == 0) {
621 if (State.Stack.back().Indent + Current.LongestObjCSelectorName >
622 State.Column + Spaces + Current.FormatTok.TokenLength)
623 State.Stack.back().ColonPos =
624 State.Stack.back().Indent + Current.LongestObjCSelectorName;
625 else
626 State.Stack.back().ColonPos =
627 State.Column + Spaces + Current.FormatTok.TokenLength;
628 }
629
630 if (Current.Type != TT_LineComment &&
631 (Previous.isOneOf(tok::l_paren, tok::l_brace) ||
632 State.NextToken->Parent->Type == TT_TemplateOpener))
633 State.Stack.back().Indent = State.Column + Spaces;
634 if (Previous.is(tok::comma) && !isTrailingComment(Current))
635 State.Stack.back().HasMultiParameterLine = true;
636
637 State.Column += Spaces;
638 if (Current.is(tok::l_paren) && Previous.is(tok::kw_if))
639 // Treat the condition inside an if as if it was a second function
640 // parameter, i.e. let nested calls have an indent of 4.
641 State.Stack.back().LastSpace = State.Column + 1; // 1 is length of "(".
642 else if (Previous.is(tok::comma) && State.ParenLevel != 0)
643 // Top-level spaces are exempt as that mostly leads to better results.
644 State.Stack.back().LastSpace = State.Column;
645 else if ((Previous.Type == TT_BinaryOperator ||
646 Previous.Type == TT_ConditionalExpr ||
647 Previous.Type == TT_CtorInitializerColon) &&
648 getPrecedence(Previous) != prec::Assignment)
649 State.Stack.back().LastSpace = State.Column;
650 else if (Previous.Type == TT_InheritanceColon)
651 State.Stack.back().Indent = State.Column;
652 else if (Previous.ParameterCount > 1 &&
653 (Previous.isOneOf(tok::l_paren, tok::l_square, tok::l_brace) ||
654 Previous.Type == TT_TemplateOpener))
655 // If this function has multiple parameters, indent nested calls from
656 // the start of the first parameter.
657 State.Stack.back().LastSpace = State.Column;
658 }
659
660 return moveStateToNextToken(State, DryRun);
661 }
662
663 /// \brief Mark the next token as consumed in \p State and modify its stacks
664 /// accordingly.
moveStateToNextToken(LineState & State,bool DryRun)665 unsigned moveStateToNextToken(LineState &State, bool DryRun) {
666 const AnnotatedToken &Current = *State.NextToken;
667 assert(State.Stack.size());
668
669 if (Current.Type == TT_InheritanceColon)
670 State.Stack.back().AvoidBinPacking = true;
671 if (Current.is(tok::lessless) && State.Stack.back().FirstLessLess == 0)
672 State.Stack.back().FirstLessLess = State.Column;
673 if (Current.is(tok::question))
674 State.Stack.back().QuestionColumn = State.Column;
675 if (Current.isOneOf(tok::period, tok::arrow) &&
676 Line.Type == LT_BuilderTypeCall && State.ParenLevel == 0)
677 State.Stack.back().StartOfFunctionCall =
678 Current.LastInChainOfCalls ? 0 : State.Column;
679 if (Current.Type == TT_CtorInitializerColon) {
680 if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine)
681 State.Stack.back().AvoidBinPacking = true;
682 State.Stack.back().BreakBeforeParameter = false;
683 }
684
685 // Insert scopes created by fake parenthesis.
686 for (unsigned i = 0, e = Current.FakeLParens; i != e; ++i) {
687 ParenState NewParenState = State.Stack.back();
688 NewParenState.Indent = std::max(State.Column, State.Stack.back().Indent);
689 NewParenState.BreakBeforeParameter = false;
690 State.Stack.push_back(NewParenState);
691 }
692
693 // If we encounter an opening (, [, { or <, we add a level to our stacks to
694 // prepare for the following tokens.
695 if (Current.isOneOf(tok::l_paren, tok::l_square, tok::l_brace) ||
696 State.NextToken->Type == TT_TemplateOpener) {
697 unsigned NewIndent;
698 bool AvoidBinPacking;
699 if (Current.is(tok::l_brace)) {
700 NewIndent = 2 + State.Stack.back().LastSpace;
701 AvoidBinPacking = false;
702 } else {
703 NewIndent = 4 + std::max(State.Stack.back().LastSpace,
704 State.Stack.back().StartOfFunctionCall);
705 AvoidBinPacking =
706 !Style.BinPackParameters || State.Stack.back().AvoidBinPacking;
707 }
708 State.Stack.push_back(
709 ParenState(NewIndent, State.Stack.back().LastSpace, AvoidBinPacking,
710 State.Stack.back().HasMultiParameterLine));
711 ++State.ParenLevel;
712 }
713
714 // If this '[' opens an ObjC call, determine whether all parameters fit into
715 // one line and put one per line if they don't.
716 if (Current.is(tok::l_square) && Current.Type == TT_ObjCMethodExpr &&
717 Current.MatchingParen != NULL) {
718 if (getLengthToMatchingParen(Current) + State.Column > getColumnLimit())
719 State.Stack.back().BreakBeforeParameter = true;
720 }
721
722 // If we encounter a closing ), ], } or >, we can remove a level from our
723 // stacks.
724 if (Current.isOneOf(tok::r_paren, tok::r_square) ||
725 (Current.is(tok::r_brace) && State.NextToken != &RootToken) ||
726 State.NextToken->Type == TT_TemplateCloser) {
727 State.Stack.pop_back();
728 --State.ParenLevel;
729 }
730
731 // Remove scopes created by fake parenthesis.
732 for (unsigned i = 0, e = Current.FakeRParens; i != e; ++i) {
733 State.Stack.pop_back();
734 }
735
736 if (Current.is(tok::string_literal)) {
737 State.StartOfStringLiteral = State.Column;
738 } else if (Current.isNot(tok::comment)) {
739 State.StartOfStringLiteral = 0;
740 }
741
742 State.Column += Current.FormatTok.TokenLength;
743
744 if (State.NextToken->Children.empty())
745 State.NextToken = NULL;
746 else
747 State.NextToken = &State.NextToken->Children[0];
748
749 return breakProtrudingToken(Current, State, DryRun);
750 }
751
752 /// \brief If the current token sticks out over the end of the line, break
753 /// it if possible.
breakProtrudingToken(const AnnotatedToken & Current,LineState & State,bool DryRun)754 unsigned breakProtrudingToken(const AnnotatedToken &Current, LineState &State,
755 bool DryRun) {
756 if (Current.isNot(tok::string_literal))
757 return 0;
758 // Only break up default narrow strings.
759 if (StringRef(Current.FormatTok.Tok.getLiteralData()).find('"') != 0)
760 return 0;
761
762 unsigned Penalty = 0;
763 unsigned TailOffset = 0;
764 unsigned TailLength = Current.FormatTok.TokenLength;
765 unsigned StartColumn = State.Column - Current.FormatTok.TokenLength;
766 unsigned OffsetFromStart = 0;
767 while (StartColumn + TailLength > getColumnLimit()) {
768 StringRef Text = StringRef(
769 Current.FormatTok.Tok.getLiteralData() + TailOffset, TailLength);
770 if (StartColumn + OffsetFromStart + 1 > getColumnLimit())
771 break;
772 StringRef::size_type SplitPoint = getSplitPoint(
773 Text, getColumnLimit() - StartColumn - OffsetFromStart - 1);
774 if (SplitPoint == StringRef::npos)
775 break;
776 assert(SplitPoint != 0);
777 // +2, because 'Text' starts after the opening quotes, and does not
778 // include the closing quote we need to insert.
779 unsigned WhitespaceStartColumn =
780 StartColumn + OffsetFromStart + SplitPoint + 2;
781 State.Stack.back().LastSpace = StartColumn;
782 if (!DryRun) {
783 Whitespaces.breakToken(Current, TailOffset + SplitPoint + 1, "\"", "\"",
784 Line.InPPDirective, StartColumn,
785 WhitespaceStartColumn, Style);
786 }
787 TailOffset += SplitPoint + 1;
788 TailLength -= SplitPoint + 1;
789 OffsetFromStart = 1;
790 Penalty += Style.PenaltyExcessCharacter;
791 for (unsigned i = 0, e = State.Stack.size(); i != e; ++i)
792 State.Stack[i].BreakBeforeParameter = true;
793 }
794 State.Column = StartColumn + TailLength;
795 return Penalty;
796 }
797
798 StringRef::size_type
getSplitPoint(StringRef Text,StringRef::size_type Offset)799 getSplitPoint(StringRef Text, StringRef::size_type Offset) {
800 StringRef::size_type SpaceOffset = Text.rfind(' ', Offset);
801 if (SpaceOffset != StringRef::npos && SpaceOffset != 0)
802 return SpaceOffset;
803 StringRef::size_type SlashOffset = Text.rfind('/', Offset);
804 if (SlashOffset != StringRef::npos && SlashOffset != 0)
805 return SlashOffset;
806 StringRef::size_type Split = getStartOfCharacter(Text, Offset);
807 if (Split != StringRef::npos && Split > 1)
808 // Do not split at 0.
809 return Split - 1;
810 return StringRef::npos;
811 }
812
813 StringRef::size_type
getStartOfCharacter(StringRef Text,StringRef::size_type Offset)814 getStartOfCharacter(StringRef Text, StringRef::size_type Offset) {
815 StringRef::size_type NextEscape = Text.find('\\');
816 while (NextEscape != StringRef::npos && NextEscape < Offset) {
817 StringRef::size_type SequenceLength =
818 getEscapeSequenceLength(Text.substr(NextEscape));
819 if (Offset < NextEscape + SequenceLength)
820 return NextEscape;
821 NextEscape = Text.find('\\', NextEscape + SequenceLength);
822 }
823 return Offset;
824 }
825
getEscapeSequenceLength(StringRef Text)826 unsigned getEscapeSequenceLength(StringRef Text) {
827 assert(Text[0] == '\\');
828 if (Text.size() < 2)
829 return 1;
830
831 switch (Text[1]) {
832 case 'u':
833 return 6;
834 case 'U':
835 return 10;
836 case 'x':
837 return getHexLength(Text);
838 default:
839 if (Text[1] >= '0' && Text[1] <= '7')
840 return getOctalLength(Text);
841 return 2;
842 }
843 }
844
getHexLength(StringRef Text)845 unsigned getHexLength(StringRef Text) {
846 unsigned I = 2; // Point after '\x'.
847 while (I < Text.size() && ((Text[I] >= '0' && Text[I] <= '9') ||
848 (Text[I] >= 'a' && Text[I] <= 'f') ||
849 (Text[I] >= 'A' && Text[I] <= 'F'))) {
850 ++I;
851 }
852 return I;
853 }
854
getOctalLength(StringRef Text)855 unsigned getOctalLength(StringRef Text) {
856 unsigned I = 1;
857 while (I < Text.size() && I < 4 && (Text[I] >= '0' && Text[I] <= '7')) {
858 ++I;
859 }
860 return I;
861 }
862
getColumnLimit()863 unsigned getColumnLimit() {
864 return Style.ColumnLimit - (Line.InPPDirective ? 2 : 0);
865 }
866
867 /// \brief An edge in the solution space from \c Previous->State to \c State,
868 /// inserting a newline dependent on the \c NewLine.
869 struct StateNode {
StateNodeclang::format::UnwrappedLineFormatter::StateNode870 StateNode(const LineState &State, bool NewLine, StateNode *Previous)
871 : State(State), NewLine(NewLine), Previous(Previous) {}
872 LineState State;
873 bool NewLine;
874 StateNode *Previous;
875 };
876
877 /// \brief A pair of <penalty, count> that is used to prioritize the BFS on.
878 ///
879 /// In case of equal penalties, we want to prefer states that were inserted
880 /// first. During state generation we make sure that we insert states first
881 /// that break the line as late as possible.
882 typedef std::pair<unsigned, unsigned> OrderedPenalty;
883
884 /// \brief An item in the prioritized BFS search queue. The \c StateNode's
885 /// \c State has the given \c OrderedPenalty.
886 typedef std::pair<OrderedPenalty, StateNode *> QueueItem;
887
888 /// \brief The BFS queue type.
889 typedef std::priority_queue<QueueItem, std::vector<QueueItem>,
890 std::greater<QueueItem> > QueueType;
891
892 /// \brief Analyze the entire solution space starting from \p InitialState.
893 ///
894 /// This implements a variant of Dijkstra's algorithm on the graph that spans
895 /// the solution space (\c LineStates are the nodes). The algorithm tries to
896 /// find the shortest path (the one with lowest penalty) from \p InitialState
897 /// to a state where all tokens are placed.
analyzeSolutionSpace(LineState & InitialState)898 unsigned analyzeSolutionSpace(LineState &InitialState) {
899 std::set<LineState> Seen;
900
901 // Insert start element into queue.
902 StateNode *Node =
903 new (Allocator.Allocate()) StateNode(InitialState, false, NULL);
904 Queue.push(QueueItem(OrderedPenalty(0, Count), Node));
905 ++Count;
906
907 // While not empty, take first element and follow edges.
908 while (!Queue.empty()) {
909 unsigned Penalty = Queue.top().first.first;
910 StateNode *Node = Queue.top().second;
911 if (Node->State.NextToken == NULL) {
912 DEBUG(llvm::errs() << "\n---\nPenalty for line: " << Penalty << "\n");
913 break;
914 }
915 Queue.pop();
916
917 if (!Seen.insert(Node->State).second)
918 // State already examined with lower penalty.
919 continue;
920
921 addNextStateToQueue(Penalty, Node, /*NewLine=*/ false);
922 addNextStateToQueue(Penalty, Node, /*NewLine=*/ true);
923 }
924
925 if (Queue.empty())
926 // We were unable to find a solution, do nothing.
927 // FIXME: Add diagnostic?
928 return 0;
929
930 // Reconstruct the solution.
931 reconstructPath(InitialState, Queue.top().second);
932 DEBUG(llvm::errs() << "---\n");
933
934 // Return the column after the last token of the solution.
935 return Queue.top().second->State.Column;
936 }
937
reconstructPath(LineState & State,StateNode * Current)938 void reconstructPath(LineState &State, StateNode *Current) {
939 // FIXME: This recursive implementation limits the possible number
940 // of tokens per line if compiled into a binary with small stack space.
941 // To become more independent of stack frame limitations we would need
942 // to also change the TokenAnnotator.
943 if (Current->Previous == NULL)
944 return;
945 reconstructPath(State, Current->Previous);
946 DEBUG({
947 if (Current->NewLine) {
948 llvm::errs()
949 << "Penalty for splitting before "
950 << Current->Previous->State.NextToken->FormatTok.Tok.getName()
951 << ": " << Current->Previous->State.NextToken->SplitPenalty << "\n";
952 }
953 });
954 addTokenToState(Current->NewLine, false, State);
955 }
956
957 /// \brief Add the following state to the analysis queue \c Queue.
958 ///
959 /// Assume the current state is \p PreviousNode and has been reached with a
960 /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true.
addNextStateToQueue(unsigned Penalty,StateNode * PreviousNode,bool NewLine)961 void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode,
962 bool NewLine) {
963 if (NewLine && !canBreak(PreviousNode->State))
964 return;
965 if (!NewLine && mustBreak(PreviousNode->State))
966 return;
967 if (NewLine)
968 Penalty += PreviousNode->State.NextToken->SplitPenalty;
969
970 StateNode *Node = new (Allocator.Allocate())
971 StateNode(PreviousNode->State, NewLine, PreviousNode);
972 Penalty += addTokenToState(NewLine, true, Node->State);
973 if (Node->State.Column > getColumnLimit()) {
974 unsigned ExcessCharacters = Node->State.Column - getColumnLimit();
975 Penalty += Style.PenaltyExcessCharacter * ExcessCharacters;
976 }
977
978 Queue.push(QueueItem(OrderedPenalty(Penalty, Count), Node));
979 ++Count;
980 }
981
982 /// \brief Returns \c true, if a line break after \p State is allowed.
canBreak(const LineState & State)983 bool canBreak(const LineState &State) {
984 if (!State.NextToken->CanBreakBefore &&
985 !(State.NextToken->is(tok::r_brace) &&
986 State.Stack.back().BreakBeforeClosingBrace))
987 return false;
988 // Trying to insert a parameter on a new line if there are already more than
989 // one parameter on the current line is bin packing.
990 if (State.Stack.back().HasMultiParameterLine &&
991 State.Stack.back().AvoidBinPacking)
992 return false;
993 return true;
994 }
995
996 /// \brief Returns \c true, if a line break after \p State is mandatory.
mustBreak(const LineState & State)997 bool mustBreak(const LineState &State) {
998 if (State.NextToken->MustBreakBefore)
999 return true;
1000 if (State.NextToken->is(tok::r_brace) &&
1001 State.Stack.back().BreakBeforeClosingBrace)
1002 return true;
1003 if (State.NextToken->Parent->is(tok::semi) &&
1004 State.LineContainsContinuedForLoopSection)
1005 return true;
1006 if ((State.NextToken->Parent->isOneOf(tok::comma, tok::semi) ||
1007 State.NextToken->is(tok::question) ||
1008 State.NextToken->Type == TT_ConditionalExpr) &&
1009 State.Stack.back().BreakBeforeParameter &&
1010 !isTrailingComment(*State.NextToken) &&
1011 State.NextToken->isNot(tok::r_paren) &&
1012 State.NextToken->isNot(tok::r_brace))
1013 return true;
1014 // FIXME: Comparing LongestObjCSelectorName to 0 is a hacky way of finding
1015 // out whether it is the first parameter. Clean this up.
1016 if (State.NextToken->Type == TT_ObjCSelectorName &&
1017 State.NextToken->LongestObjCSelectorName == 0 &&
1018 State.Stack.back().BreakBeforeParameter)
1019 return true;
1020 if ((State.NextToken->Type == TT_CtorInitializerColon ||
1021 (State.NextToken->Parent->ClosesTemplateDeclaration &&
1022 State.ParenLevel == 0)))
1023 return true;
1024 if (State.NextToken->Type == TT_InlineASMColon)
1025 return true;
1026 // This prevents breaks like:
1027 // ...
1028 // SomeParameter, OtherParameter).DoSomething(
1029 // ...
1030 // As they hide "DoSomething" and generally bad for readability.
1031 if (State.NextToken->isOneOf(tok::period, tok::arrow) &&
1032 getRemainingLength(State) + State.Column > getColumnLimit() &&
1033 State.ParenLevel < State.StartOfLineLevel)
1034 return true;
1035 return false;
1036 }
1037
1038 // Returns the total number of columns required for the remaining tokens.
getRemainingLength(const LineState & State)1039 unsigned getRemainingLength(const LineState &State) {
1040 if (State.NextToken && State.NextToken->Parent)
1041 return Line.Last->TotalLength - State.NextToken->Parent->TotalLength;
1042 return 0;
1043 }
1044
1045 FormatStyle Style;
1046 SourceManager &SourceMgr;
1047 const AnnotatedLine &Line;
1048 const unsigned FirstIndent;
1049 const AnnotatedToken &RootToken;
1050 WhitespaceManager &Whitespaces;
1051
1052 llvm::SpecificBumpPtrAllocator<StateNode> Allocator;
1053 QueueType Queue;
1054 // Increasing count of \c StateNode items we have created. This is used
1055 // to create a deterministic order independent of the container.
1056 unsigned Count;
1057 };
1058
1059 class LexerBasedFormatTokenSource : public FormatTokenSource {
1060 public:
LexerBasedFormatTokenSource(Lexer & Lex,SourceManager & SourceMgr)1061 LexerBasedFormatTokenSource(Lexer &Lex, SourceManager &SourceMgr)
1062 : GreaterStashed(false), Lex(Lex), SourceMgr(SourceMgr),
1063 IdentTable(Lex.getLangOpts()) {
1064 Lex.SetKeepWhitespaceMode(true);
1065 }
1066
getNextToken()1067 virtual FormatToken getNextToken() {
1068 if (GreaterStashed) {
1069 FormatTok.NewlinesBefore = 0;
1070 FormatTok.WhiteSpaceStart =
1071 FormatTok.Tok.getLocation().getLocWithOffset(1);
1072 FormatTok.WhiteSpaceLength = 0;
1073 GreaterStashed = false;
1074 return FormatTok;
1075 }
1076
1077 FormatTok = FormatToken();
1078 Lex.LexFromRawLexer(FormatTok.Tok);
1079 StringRef Text = rawTokenText(FormatTok.Tok);
1080 FormatTok.WhiteSpaceStart = FormatTok.Tok.getLocation();
1081 if (SourceMgr.getFileOffset(FormatTok.WhiteSpaceStart) == 0)
1082 FormatTok.IsFirst = true;
1083
1084 // Consume and record whitespace until we find a significant token.
1085 while (FormatTok.Tok.is(tok::unknown)) {
1086 unsigned Newlines = Text.count('\n');
1087 if (Newlines > 0)
1088 FormatTok.LastNewlineOffset =
1089 FormatTok.WhiteSpaceLength + Text.rfind('\n') + 1;
1090 unsigned EscapedNewlines = Text.count("\\\n");
1091 FormatTok.NewlinesBefore += Newlines;
1092 FormatTok.HasUnescapedNewline |= EscapedNewlines != Newlines;
1093 FormatTok.WhiteSpaceLength += FormatTok.Tok.getLength();
1094
1095 if (FormatTok.Tok.is(tok::eof))
1096 return FormatTok;
1097 Lex.LexFromRawLexer(FormatTok.Tok);
1098 Text = rawTokenText(FormatTok.Tok);
1099 }
1100
1101 // Now FormatTok is the next non-whitespace token.
1102 FormatTok.TokenLength = Text.size();
1103
1104 // In case the token starts with escaped newlines, we want to
1105 // take them into account as whitespace - this pattern is quite frequent
1106 // in macro definitions.
1107 // FIXME: What do we want to do with other escaped spaces, and escaped
1108 // spaces or newlines in the middle of tokens?
1109 // FIXME: Add a more explicit test.
1110 unsigned i = 0;
1111 while (i + 1 < Text.size() && Text[i] == '\\' && Text[i + 1] == '\n') {
1112 // FIXME: ++FormatTok.NewlinesBefore is missing...
1113 FormatTok.WhiteSpaceLength += 2;
1114 FormatTok.TokenLength -= 2;
1115 i += 2;
1116 }
1117
1118 if (FormatTok.Tok.is(tok::raw_identifier)) {
1119 IdentifierInfo &Info = IdentTable.get(Text);
1120 FormatTok.Tok.setIdentifierInfo(&Info);
1121 FormatTok.Tok.setKind(Info.getTokenID());
1122 }
1123
1124 if (FormatTok.Tok.is(tok::greatergreater)) {
1125 FormatTok.Tok.setKind(tok::greater);
1126 FormatTok.TokenLength = 1;
1127 GreaterStashed = true;
1128 }
1129
1130 // If we reformat comments, we remove trailing whitespace. Update the length
1131 // accordingly.
1132 if (FormatTok.Tok.is(tok::comment))
1133 FormatTok.TokenLength = Text.rtrim().size();
1134
1135 return FormatTok;
1136 }
1137
getIdentTable()1138 IdentifierTable &getIdentTable() { return IdentTable; }
1139
1140 private:
1141 FormatToken FormatTok;
1142 bool GreaterStashed;
1143 Lexer &Lex;
1144 SourceManager &SourceMgr;
1145 IdentifierTable IdentTable;
1146
1147 /// Returns the text of \c FormatTok.
rawTokenText(Token & Tok)1148 StringRef rawTokenText(Token &Tok) {
1149 return StringRef(SourceMgr.getCharacterData(Tok.getLocation()),
1150 Tok.getLength());
1151 }
1152 };
1153
1154 class Formatter : public UnwrappedLineConsumer {
1155 public:
Formatter(DiagnosticsEngine & Diag,const FormatStyle & Style,Lexer & Lex,SourceManager & SourceMgr,const std::vector<CharSourceRange> & Ranges)1156 Formatter(DiagnosticsEngine &Diag, const FormatStyle &Style, Lexer &Lex,
1157 SourceManager &SourceMgr,
1158 const std::vector<CharSourceRange> &Ranges)
1159 : Diag(Diag), Style(Style), Lex(Lex), SourceMgr(SourceMgr),
1160 Whitespaces(SourceMgr), Ranges(Ranges) {}
1161
~Formatter()1162 virtual ~Formatter() {}
1163
format()1164 tooling::Replacements format() {
1165 LexerBasedFormatTokenSource Tokens(Lex, SourceMgr);
1166 UnwrappedLineParser Parser(Diag, Style, Tokens, *this);
1167 StructuralError = Parser.parse();
1168 unsigned PreviousEndOfLineColumn = 0;
1169 TokenAnnotator Annotator(Style, SourceMgr, Lex,
1170 Tokens.getIdentTable().get("in"));
1171 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1172 Annotator.annotate(AnnotatedLines[i]);
1173 }
1174 deriveLocalStyle();
1175 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1176 Annotator.calculateFormattingInformation(AnnotatedLines[i]);
1177
1178 // Adapt level to the next line if this is a comment.
1179 // FIXME: Can/should this be done in the UnwrappedLineParser?
1180 if (i + 1 != e && AnnotatedLines[i].First.is(tok::comment) &&
1181 AnnotatedLines[i].First.Children.empty() &&
1182 AnnotatedLines[i + 1].First.isNot(tok::r_brace))
1183 AnnotatedLines[i].Level = AnnotatedLines[i + 1].Level;
1184 }
1185 std::vector<int> IndentForLevel;
1186 bool PreviousLineWasTouched = false;
1187 for (std::vector<AnnotatedLine>::iterator I = AnnotatedLines.begin(),
1188 E = AnnotatedLines.end();
1189 I != E; ++I) {
1190 const AnnotatedLine &TheLine = *I;
1191 const FormatToken &FirstTok = TheLine.First.FormatTok;
1192 int Offset = getIndentOffset(TheLine.First);
1193 while (IndentForLevel.size() <= TheLine.Level)
1194 IndentForLevel.push_back(-1);
1195 IndentForLevel.resize(TheLine.Level + 1);
1196 bool WasMoved =
1197 PreviousLineWasTouched && FirstTok.NewlinesBefore == 0;
1198 if (TheLine.First.is(tok::eof)) {
1199 if (PreviousLineWasTouched) {
1200 unsigned NewLines = std::min(FirstTok.NewlinesBefore, 1u);
1201 Whitespaces.replaceWhitespace(TheLine.First, NewLines, /*Indent*/ 0,
1202 /*WhitespaceStartColumn*/ 0, Style);
1203 }
1204 } else if (TheLine.Type != LT_Invalid &&
1205 (WasMoved || touchesLine(TheLine))) {
1206 unsigned LevelIndent = getIndent(IndentForLevel, TheLine.Level);
1207 unsigned Indent = LevelIndent;
1208 if (static_cast<int>(Indent) + Offset >= 0)
1209 Indent += Offset;
1210 if (!FirstTok.WhiteSpaceStart.isValid() || StructuralError) {
1211 Indent = LevelIndent = SourceMgr.getSpellingColumnNumber(
1212 FirstTok.Tok.getLocation()) - 1;
1213 } else {
1214 formatFirstToken(TheLine.First, Indent, TheLine.InPPDirective,
1215 PreviousEndOfLineColumn);
1216 }
1217 tryFitMultipleLinesInOne(Indent, I, E);
1218 UnwrappedLineFormatter Formatter(Style, SourceMgr, TheLine, Indent,
1219 TheLine.First, Whitespaces,
1220 StructuralError);
1221 PreviousEndOfLineColumn =
1222 Formatter.format(I + 1 != E ? &*(I + 1) : NULL);
1223 IndentForLevel[TheLine.Level] = LevelIndent;
1224 PreviousLineWasTouched = true;
1225 } else {
1226 if (FirstTok.NewlinesBefore > 0 || FirstTok.IsFirst) {
1227 unsigned Indent =
1228 SourceMgr.getSpellingColumnNumber(FirstTok.Tok.getLocation()) - 1;
1229 unsigned LevelIndent = Indent;
1230 if (static_cast<int>(LevelIndent) - Offset >= 0)
1231 LevelIndent -= Offset;
1232 IndentForLevel[TheLine.Level] = LevelIndent;
1233
1234 // Remove trailing whitespace of the previous line if it was touched.
1235 if (PreviousLineWasTouched || touchesEmptyLineBefore(TheLine))
1236 formatFirstToken(TheLine.First, Indent, TheLine.InPPDirective,
1237 PreviousEndOfLineColumn);
1238 }
1239 // If we did not reformat this unwrapped line, the column at the end of
1240 // the last token is unchanged - thus, we can calculate the end of the
1241 // last token.
1242 SourceLocation LastLoc = TheLine.Last->FormatTok.Tok.getLocation();
1243 PreviousEndOfLineColumn =
1244 SourceMgr.getSpellingColumnNumber(LastLoc) +
1245 Lex.MeasureTokenLength(LastLoc, SourceMgr, Lex.getLangOpts()) - 1;
1246 PreviousLineWasTouched = false;
1247 }
1248 }
1249 return Whitespaces.generateReplacements();
1250 }
1251
1252 private:
deriveLocalStyle()1253 void deriveLocalStyle() {
1254 unsigned CountBoundToVariable = 0;
1255 unsigned CountBoundToType = 0;
1256 bool HasCpp03IncompatibleFormat = false;
1257 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1258 if (AnnotatedLines[i].First.Children.empty())
1259 continue;
1260 AnnotatedToken *Tok = &AnnotatedLines[i].First.Children[0];
1261 while (!Tok->Children.empty()) {
1262 if (Tok->Type == TT_PointerOrReference) {
1263 bool SpacesBefore = Tok->FormatTok.WhiteSpaceLength > 0;
1264 bool SpacesAfter = Tok->Children[0].FormatTok.WhiteSpaceLength > 0;
1265 if (SpacesBefore && !SpacesAfter)
1266 ++CountBoundToVariable;
1267 else if (!SpacesBefore && SpacesAfter)
1268 ++CountBoundToType;
1269 }
1270
1271 if (Tok->Type == TT_TemplateCloser &&
1272 Tok->Parent->Type == TT_TemplateCloser &&
1273 Tok->FormatTok.WhiteSpaceLength == 0)
1274 HasCpp03IncompatibleFormat = true;
1275 Tok = &Tok->Children[0];
1276 }
1277 }
1278 if (Style.DerivePointerBinding) {
1279 if (CountBoundToType > CountBoundToVariable)
1280 Style.PointerBindsToType = true;
1281 else if (CountBoundToType < CountBoundToVariable)
1282 Style.PointerBindsToType = false;
1283 }
1284 if (Style.Standard == FormatStyle::LS_Auto) {
1285 Style.Standard = HasCpp03IncompatibleFormat ? FormatStyle::LS_Cpp11
1286 : FormatStyle::LS_Cpp03;
1287 }
1288 }
1289
1290 /// \brief Get the indent of \p Level from \p IndentForLevel.
1291 ///
1292 /// \p IndentForLevel must contain the indent for the level \c l
1293 /// at \p IndentForLevel[l], or a value < 0 if the indent for
1294 /// that level is unknown.
getIndent(const std::vector<int> IndentForLevel,unsigned Level)1295 unsigned getIndent(const std::vector<int> IndentForLevel, unsigned Level) {
1296 if (IndentForLevel[Level] != -1)
1297 return IndentForLevel[Level];
1298 if (Level == 0)
1299 return 0;
1300 return getIndent(IndentForLevel, Level - 1) + 2;
1301 }
1302
1303 /// \brief Get the offset of the line relatively to the level.
1304 ///
1305 /// For example, 'public:' labels in classes are offset by 1 or 2
1306 /// characters to the left from their level.
getIndentOffset(const AnnotatedToken & RootToken)1307 int getIndentOffset(const AnnotatedToken &RootToken) {
1308 bool IsAccessModifier = false;
1309 if (RootToken.isOneOf(tok::kw_public, tok::kw_protected, tok::kw_private))
1310 IsAccessModifier = true;
1311 else if (RootToken.is(tok::at) && !RootToken.Children.empty() &&
1312 (RootToken.Children[0].isObjCAtKeyword(tok::objc_public) ||
1313 RootToken.Children[0].isObjCAtKeyword(tok::objc_protected) ||
1314 RootToken.Children[0].isObjCAtKeyword(tok::objc_package) ||
1315 RootToken.Children[0].isObjCAtKeyword(tok::objc_private)))
1316 IsAccessModifier = true;
1317
1318 if (IsAccessModifier)
1319 return Style.AccessModifierOffset;
1320 return 0;
1321 }
1322
1323 /// \brief Tries to merge lines into one.
1324 ///
1325 /// This will change \c Line and \c AnnotatedLine to contain the merged line,
1326 /// if possible; note that \c I will be incremented when lines are merged.
1327 ///
1328 /// Returns whether the resulting \c Line can fit in a single line.
tryFitMultipleLinesInOne(unsigned Indent,std::vector<AnnotatedLine>::iterator & I,std::vector<AnnotatedLine>::iterator E)1329 void tryFitMultipleLinesInOne(unsigned Indent,
1330 std::vector<AnnotatedLine>::iterator &I,
1331 std::vector<AnnotatedLine>::iterator E) {
1332 // We can never merge stuff if there are trailing line comments.
1333 if (I->Last->Type == TT_LineComment)
1334 return;
1335
1336 unsigned Limit = Style.ColumnLimit - Indent;
1337 // If we already exceed the column limit, we set 'Limit' to 0. The different
1338 // tryMerge..() functions can then decide whether to still do merging.
1339 Limit = I->Last->TotalLength > Limit ? 0 : Limit - I->Last->TotalLength;
1340
1341 if (I + 1 == E || (I + 1)->Type == LT_Invalid)
1342 return;
1343
1344 if (I->Last->is(tok::l_brace)) {
1345 tryMergeSimpleBlock(I, E, Limit);
1346 } else if (I->First.is(tok::kw_if)) {
1347 tryMergeSimpleIf(I, E, Limit);
1348 } else if (I->InPPDirective && (I->First.FormatTok.HasUnescapedNewline ||
1349 I->First.FormatTok.IsFirst)) {
1350 tryMergeSimplePPDirective(I, E, Limit);
1351 }
1352 return;
1353 }
1354
tryMergeSimplePPDirective(std::vector<AnnotatedLine>::iterator & I,std::vector<AnnotatedLine>::iterator E,unsigned Limit)1355 void tryMergeSimplePPDirective(std::vector<AnnotatedLine>::iterator &I,
1356 std::vector<AnnotatedLine>::iterator E,
1357 unsigned Limit) {
1358 if (Limit == 0)
1359 return;
1360 AnnotatedLine &Line = *I;
1361 if (!(I + 1)->InPPDirective || (I + 1)->First.FormatTok.HasUnescapedNewline)
1362 return;
1363 if (I + 2 != E && (I + 2)->InPPDirective &&
1364 !(I + 2)->First.FormatTok.HasUnescapedNewline)
1365 return;
1366 if (1 + (I + 1)->Last->TotalLength > Limit)
1367 return;
1368 join(Line, *(++I));
1369 }
1370
tryMergeSimpleIf(std::vector<AnnotatedLine>::iterator & I,std::vector<AnnotatedLine>::iterator E,unsigned Limit)1371 void tryMergeSimpleIf(std::vector<AnnotatedLine>::iterator &I,
1372 std::vector<AnnotatedLine>::iterator E,
1373 unsigned Limit) {
1374 if (Limit == 0)
1375 return;
1376 if (!Style.AllowShortIfStatementsOnASingleLine)
1377 return;
1378 if ((I + 1)->InPPDirective != I->InPPDirective ||
1379 ((I + 1)->InPPDirective &&
1380 (I + 1)->First.FormatTok.HasUnescapedNewline))
1381 return;
1382 AnnotatedLine &Line = *I;
1383 if (Line.Last->isNot(tok::r_paren))
1384 return;
1385 if (1 + (I + 1)->Last->TotalLength > Limit)
1386 return;
1387 if ((I + 1)->First.is(tok::kw_if) || (I + 1)->First.Type == TT_LineComment)
1388 return;
1389 // Only inline simple if's (no nested if or else).
1390 if (I + 2 != E && (I + 2)->First.is(tok::kw_else))
1391 return;
1392 join(Line, *(++I));
1393 }
1394
tryMergeSimpleBlock(std::vector<AnnotatedLine>::iterator & I,std::vector<AnnotatedLine>::iterator E,unsigned Limit)1395 void tryMergeSimpleBlock(std::vector<AnnotatedLine>::iterator &I,
1396 std::vector<AnnotatedLine>::iterator E,
1397 unsigned Limit) {
1398 // First, check that the current line allows merging. This is the case if
1399 // we're not in a control flow statement and the last token is an opening
1400 // brace.
1401 AnnotatedLine &Line = *I;
1402 if (Line.First.isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::r_brace,
1403 tok::kw_else, tok::kw_try, tok::kw_catch,
1404 tok::kw_for,
1405 // This gets rid of all ObjC @ keywords and methods.
1406 tok::at, tok::minus, tok::plus))
1407 return;
1408
1409 AnnotatedToken *Tok = &(I + 1)->First;
1410 if (Tok->Children.empty() && Tok->is(tok::r_brace) &&
1411 !Tok->MustBreakBefore) {
1412 // We merge empty blocks even if the line exceeds the column limit.
1413 Tok->SpacesRequiredBefore = 0;
1414 Tok->CanBreakBefore = true;
1415 join(Line, *(I + 1));
1416 I += 1;
1417 } else if (Limit != 0) {
1418 // Check that we still have three lines and they fit into the limit.
1419 if (I + 2 == E || (I + 2)->Type == LT_Invalid ||
1420 !nextTwoLinesFitInto(I, Limit))
1421 return;
1422
1423 // Second, check that the next line does not contain any braces - if it
1424 // does, readability declines when putting it into a single line.
1425 if ((I + 1)->Last->Type == TT_LineComment || Tok->MustBreakBefore)
1426 return;
1427 do {
1428 if (Tok->isOneOf(tok::l_brace, tok::r_brace))
1429 return;
1430 Tok = Tok->Children.empty() ? NULL : &Tok->Children.back();
1431 } while (Tok != NULL);
1432
1433 // Last, check that the third line contains a single closing brace.
1434 Tok = &(I + 2)->First;
1435 if (!Tok->Children.empty() || Tok->isNot(tok::r_brace) ||
1436 Tok->MustBreakBefore)
1437 return;
1438
1439 join(Line, *(I + 1));
1440 join(Line, *(I + 2));
1441 I += 2;
1442 }
1443 }
1444
nextTwoLinesFitInto(std::vector<AnnotatedLine>::iterator I,unsigned Limit)1445 bool nextTwoLinesFitInto(std::vector<AnnotatedLine>::iterator I,
1446 unsigned Limit) {
1447 return 1 + (I + 1)->Last->TotalLength + 1 + (I + 2)->Last->TotalLength <=
1448 Limit;
1449 }
1450
join(AnnotatedLine & A,const AnnotatedLine & B)1451 void join(AnnotatedLine &A, const AnnotatedLine &B) {
1452 unsigned LengthA = A.Last->TotalLength + B.First.SpacesRequiredBefore;
1453 A.Last->Children.push_back(B.First);
1454 while (!A.Last->Children.empty()) {
1455 A.Last->Children[0].Parent = A.Last;
1456 A.Last->Children[0].TotalLength += LengthA;
1457 A.Last = &A.Last->Children[0];
1458 }
1459 }
1460
touchesRanges(const CharSourceRange & Range)1461 bool touchesRanges(const CharSourceRange &Range) {
1462 for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
1463 if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(),
1464 Ranges[i].getBegin()) &&
1465 !SourceMgr.isBeforeInTranslationUnit(Ranges[i].getEnd(),
1466 Range.getBegin()))
1467 return true;
1468 }
1469 return false;
1470 }
1471
touchesLine(const AnnotatedLine & TheLine)1472 bool touchesLine(const AnnotatedLine &TheLine) {
1473 const FormatToken *First = &TheLine.First.FormatTok;
1474 const FormatToken *Last = &TheLine.Last->FormatTok;
1475 CharSourceRange LineRange = CharSourceRange::getTokenRange(
1476 First->WhiteSpaceStart.getLocWithOffset(First->LastNewlineOffset),
1477 Last->Tok.getLocation());
1478 return touchesRanges(LineRange);
1479 }
1480
touchesEmptyLineBefore(const AnnotatedLine & TheLine)1481 bool touchesEmptyLineBefore(const AnnotatedLine &TheLine) {
1482 const FormatToken *First = &TheLine.First.FormatTok;
1483 CharSourceRange LineRange = CharSourceRange::getCharRange(
1484 First->WhiteSpaceStart,
1485 First->WhiteSpaceStart.getLocWithOffset(First->LastNewlineOffset));
1486 return touchesRanges(LineRange);
1487 }
1488
consumeUnwrappedLine(const UnwrappedLine & TheLine)1489 virtual void consumeUnwrappedLine(const UnwrappedLine &TheLine) {
1490 AnnotatedLines.push_back(AnnotatedLine(TheLine));
1491 }
1492
1493 /// \brief Add a new line and the required indent before the first Token
1494 /// of the \c UnwrappedLine if there was no structural parsing error.
1495 /// Returns the indent level of the \c UnwrappedLine.
formatFirstToken(const AnnotatedToken & RootToken,unsigned Indent,bool InPPDirective,unsigned PreviousEndOfLineColumn)1496 void formatFirstToken(const AnnotatedToken &RootToken, unsigned Indent,
1497 bool InPPDirective, unsigned PreviousEndOfLineColumn) {
1498 const FormatToken &Tok = RootToken.FormatTok;
1499
1500 unsigned Newlines =
1501 std::min(Tok.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1);
1502 if (Newlines == 0 && !Tok.IsFirst)
1503 Newlines = 1;
1504
1505 if (!InPPDirective || Tok.HasUnescapedNewline) {
1506 Whitespaces.replaceWhitespace(RootToken, Newlines, Indent, 0, Style);
1507 } else {
1508 Whitespaces.replacePPWhitespace(RootToken, Newlines, Indent,
1509 PreviousEndOfLineColumn, Style);
1510 }
1511 }
1512
1513 DiagnosticsEngine &Diag;
1514 FormatStyle Style;
1515 Lexer &Lex;
1516 SourceManager &SourceMgr;
1517 WhitespaceManager Whitespaces;
1518 std::vector<CharSourceRange> Ranges;
1519 std::vector<AnnotatedLine> AnnotatedLines;
1520 bool StructuralError;
1521 };
1522
1523 tooling::Replacements
reformat(const FormatStyle & Style,Lexer & Lex,SourceManager & SourceMgr,std::vector<CharSourceRange> Ranges,DiagnosticConsumer * DiagClient)1524 reformat(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr,
1525 std::vector<CharSourceRange> Ranges, DiagnosticConsumer *DiagClient) {
1526 IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions();
1527 OwningPtr<DiagnosticConsumer> DiagPrinter;
1528 if (DiagClient == 0) {
1529 DiagPrinter.reset(new TextDiagnosticPrinter(llvm::errs(), &*DiagOpts));
1530 DiagPrinter->BeginSourceFile(Lex.getLangOpts(), Lex.getPP());
1531 DiagClient = DiagPrinter.get();
1532 }
1533 DiagnosticsEngine Diagnostics(
1534 IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs()), &*DiagOpts,
1535 DiagClient, false);
1536 Diagnostics.setSourceManager(&SourceMgr);
1537 Formatter formatter(Diagnostics, Style, Lex, SourceMgr, Ranges);
1538 return formatter.format();
1539 }
1540
getFormattingLangOpts()1541 LangOptions getFormattingLangOpts() {
1542 LangOptions LangOpts;
1543 LangOpts.CPlusPlus = 1;
1544 LangOpts.CPlusPlus11 = 1;
1545 LangOpts.Bool = 1;
1546 LangOpts.ObjC1 = 1;
1547 LangOpts.ObjC2 = 1;
1548 return LangOpts;
1549 }
1550
1551 } // namespace format
1552 } // namespace clang
1553