1 //===--- Format.cpp - Format C++ code -------------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements functions declared in Format.h. This will be
12 /// split into separate files as we go.
13 ///
14 //===----------------------------------------------------------------------===//
15
16 #define DEBUG_TYPE "format-formatter"
17
18 #include "BreakableToken.h"
19 #include "TokenAnnotator.h"
20 #include "UnwrappedLineParser.h"
21 #include "WhitespaceManager.h"
22 #include "clang/Basic/Diagnostic.h"
23 #include "clang/Basic/OperatorPrecedence.h"
24 #include "clang/Basic/SourceManager.h"
25 #include "clang/Format/Format.h"
26 #include "clang/Lex/Lexer.h"
27 #include "llvm/ADT/STLExtras.h"
28 #include "llvm/Support/Allocator.h"
29 #include "llvm/Support/Debug.h"
30 #include "llvm/Support/YAMLTraits.h"
31 #include <queue>
32 #include <string>
33
34 namespace llvm {
35 namespace yaml {
36 template <>
37 struct ScalarEnumerationTraits<clang::format::FormatStyle::LanguageStandard> {
enumerationllvm::yaml::ScalarEnumerationTraits38 static void enumeration(IO &IO,
39 clang::format::FormatStyle::LanguageStandard &Value) {
40 IO.enumCase(Value, "C++03", clang::format::FormatStyle::LS_Cpp03);
41 IO.enumCase(Value, "C++11", clang::format::FormatStyle::LS_Cpp11);
42 IO.enumCase(Value, "Auto", clang::format::FormatStyle::LS_Auto);
43 }
44 };
45
46 template <>
47 struct ScalarEnumerationTraits<clang::format::FormatStyle::BraceBreakingStyle> {
48 static void
enumerationllvm::yaml::ScalarEnumerationTraits49 enumeration(IO &IO, clang::format::FormatStyle::BraceBreakingStyle &Value) {
50 IO.enumCase(Value, "Attach", clang::format::FormatStyle::BS_Attach);
51 IO.enumCase(Value, "Linux", clang::format::FormatStyle::BS_Linux);
52 IO.enumCase(Value, "Stroustrup", clang::format::FormatStyle::BS_Stroustrup);
53 IO.enumCase(Value, "Allman", clang::format::FormatStyle::BS_Allman);
54 }
55 };
56
57 template <>
58 struct ScalarEnumerationTraits<
59 clang::format::FormatStyle::NamespaceIndentationKind> {
60 static void
enumerationllvm::yaml::ScalarEnumerationTraits61 enumeration(IO &IO,
62 clang::format::FormatStyle::NamespaceIndentationKind &Value) {
63 IO.enumCase(Value, "None", clang::format::FormatStyle::NI_None);
64 IO.enumCase(Value, "Inner", clang::format::FormatStyle::NI_Inner);
65 IO.enumCase(Value, "All", clang::format::FormatStyle::NI_All);
66 }
67 };
68
69 template <> struct MappingTraits<clang::format::FormatStyle> {
mappingllvm::yaml::MappingTraits70 static void mapping(llvm::yaml::IO &IO, clang::format::FormatStyle &Style) {
71 if (IO.outputting()) {
72 StringRef StylesArray[] = { "LLVM", "Google", "Chromium", "Mozilla" };
73 ArrayRef<StringRef> Styles(StylesArray);
74 for (size_t i = 0, e = Styles.size(); i < e; ++i) {
75 StringRef StyleName(Styles[i]);
76 clang::format::FormatStyle PredefinedStyle;
77 if (clang::format::getPredefinedStyle(StyleName, &PredefinedStyle) &&
78 Style == PredefinedStyle) {
79 IO.mapOptional("# BasedOnStyle", StyleName);
80 break;
81 }
82 }
83 } else {
84 StringRef BasedOnStyle;
85 IO.mapOptional("BasedOnStyle", BasedOnStyle);
86 if (!BasedOnStyle.empty())
87 if (!clang::format::getPredefinedStyle(BasedOnStyle, &Style)) {
88 IO.setError(Twine("Unknown value for BasedOnStyle: ", BasedOnStyle));
89 return;
90 }
91 }
92
93 IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset);
94 IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft);
95 IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments);
96 IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine",
97 Style.AllowAllParametersOfDeclarationOnNextLine);
98 IO.mapOptional("AllowShortIfStatementsOnASingleLine",
99 Style.AllowShortIfStatementsOnASingleLine);
100 IO.mapOptional("AllowShortLoopsOnASingleLine",
101 Style.AllowShortLoopsOnASingleLine);
102 IO.mapOptional("AlwaysBreakTemplateDeclarations",
103 Style.AlwaysBreakTemplateDeclarations);
104 IO.mapOptional("AlwaysBreakBeforeMultilineStrings",
105 Style.AlwaysBreakBeforeMultilineStrings);
106 IO.mapOptional("BreakBeforeBinaryOperators",
107 Style.BreakBeforeBinaryOperators);
108 IO.mapOptional("BreakConstructorInitializersBeforeComma",
109 Style.BreakConstructorInitializersBeforeComma);
110 IO.mapOptional("BinPackParameters", Style.BinPackParameters);
111 IO.mapOptional("ColumnLimit", Style.ColumnLimit);
112 IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine",
113 Style.ConstructorInitializerAllOnOneLineOrOnePerLine);
114 IO.mapOptional("DerivePointerBinding", Style.DerivePointerBinding);
115 IO.mapOptional("ExperimentalAutoDetectBinPacking",
116 Style.ExperimentalAutoDetectBinPacking);
117 IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels);
118 IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep);
119 IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation);
120 IO.mapOptional("ObjCSpaceBeforeProtocolList",
121 Style.ObjCSpaceBeforeProtocolList);
122 IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment);
123 IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString);
124 IO.mapOptional("PenaltyBreakFirstLessLess",
125 Style.PenaltyBreakFirstLessLess);
126 IO.mapOptional("PenaltyExcessCharacter", Style.PenaltyExcessCharacter);
127 IO.mapOptional("PenaltyReturnTypeOnItsOwnLine",
128 Style.PenaltyReturnTypeOnItsOwnLine);
129 IO.mapOptional("PointerBindsToType", Style.PointerBindsToType);
130 IO.mapOptional("SpacesBeforeTrailingComments",
131 Style.SpacesBeforeTrailingComments);
132 IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle);
133 IO.mapOptional("Standard", Style.Standard);
134 IO.mapOptional("IndentWidth", Style.IndentWidth);
135 IO.mapOptional("UseTab", Style.UseTab);
136 IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces);
137 IO.mapOptional("IndentFunctionDeclarationAfterType",
138 Style.IndentFunctionDeclarationAfterType);
139 }
140 };
141 }
142 }
143
144 namespace clang {
145 namespace format {
146
setDefaultPenalties(FormatStyle & Style)147 void setDefaultPenalties(FormatStyle &Style) {
148 Style.PenaltyBreakComment = 45;
149 Style.PenaltyBreakFirstLessLess = 120;
150 Style.PenaltyBreakString = 1000;
151 Style.PenaltyExcessCharacter = 1000000;
152 }
153
getLLVMStyle()154 FormatStyle getLLVMStyle() {
155 FormatStyle LLVMStyle;
156 LLVMStyle.AccessModifierOffset = -2;
157 LLVMStyle.AlignEscapedNewlinesLeft = false;
158 LLVMStyle.AlignTrailingComments = true;
159 LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true;
160 LLVMStyle.AllowShortIfStatementsOnASingleLine = false;
161 LLVMStyle.AllowShortLoopsOnASingleLine = false;
162 LLVMStyle.AlwaysBreakBeforeMultilineStrings = false;
163 LLVMStyle.AlwaysBreakTemplateDeclarations = false;
164 LLVMStyle.BinPackParameters = true;
165 LLVMStyle.BreakBeforeBinaryOperators = false;
166 LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach;
167 LLVMStyle.BreakConstructorInitializersBeforeComma = false;
168 LLVMStyle.ColumnLimit = 80;
169 LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false;
170 LLVMStyle.Cpp11BracedListStyle = false;
171 LLVMStyle.DerivePointerBinding = false;
172 LLVMStyle.ExperimentalAutoDetectBinPacking = false;
173 LLVMStyle.IndentCaseLabels = false;
174 LLVMStyle.IndentFunctionDeclarationAfterType = false;
175 LLVMStyle.IndentWidth = 2;
176 LLVMStyle.MaxEmptyLinesToKeep = 1;
177 LLVMStyle.NamespaceIndentation = FormatStyle::NI_None;
178 LLVMStyle.ObjCSpaceBeforeProtocolList = true;
179 LLVMStyle.PointerBindsToType = false;
180 LLVMStyle.SpacesBeforeTrailingComments = 1;
181 LLVMStyle.Standard = FormatStyle::LS_Cpp03;
182 LLVMStyle.UseTab = false;
183
184 setDefaultPenalties(LLVMStyle);
185 LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60;
186
187 return LLVMStyle;
188 }
189
getGoogleStyle()190 FormatStyle getGoogleStyle() {
191 FormatStyle GoogleStyle;
192 GoogleStyle.AccessModifierOffset = -1;
193 GoogleStyle.AlignEscapedNewlinesLeft = true;
194 GoogleStyle.AlignTrailingComments = true;
195 GoogleStyle.AllowAllParametersOfDeclarationOnNextLine = true;
196 GoogleStyle.AllowShortIfStatementsOnASingleLine = true;
197 GoogleStyle.AllowShortLoopsOnASingleLine = true;
198 GoogleStyle.AlwaysBreakBeforeMultilineStrings = true;
199 GoogleStyle.AlwaysBreakTemplateDeclarations = true;
200 GoogleStyle.BinPackParameters = true;
201 GoogleStyle.BreakBeforeBinaryOperators = false;
202 GoogleStyle.BreakBeforeBraces = FormatStyle::BS_Attach;
203 GoogleStyle.BreakConstructorInitializersBeforeComma = false;
204 GoogleStyle.ColumnLimit = 80;
205 GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
206 GoogleStyle.Cpp11BracedListStyle = true;
207 GoogleStyle.DerivePointerBinding = true;
208 GoogleStyle.ExperimentalAutoDetectBinPacking = false;
209 GoogleStyle.IndentCaseLabels = true;
210 GoogleStyle.IndentFunctionDeclarationAfterType = true;
211 GoogleStyle.IndentWidth = 2;
212 GoogleStyle.MaxEmptyLinesToKeep = 1;
213 GoogleStyle.NamespaceIndentation = FormatStyle::NI_None;
214 GoogleStyle.ObjCSpaceBeforeProtocolList = false;
215 GoogleStyle.PointerBindsToType = true;
216 GoogleStyle.SpacesBeforeTrailingComments = 2;
217 GoogleStyle.Standard = FormatStyle::LS_Auto;
218 GoogleStyle.UseTab = false;
219
220 setDefaultPenalties(GoogleStyle);
221 GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200;
222
223 return GoogleStyle;
224 }
225
getChromiumStyle()226 FormatStyle getChromiumStyle() {
227 FormatStyle ChromiumStyle = getGoogleStyle();
228 ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false;
229 ChromiumStyle.AllowShortIfStatementsOnASingleLine = false;
230 ChromiumStyle.AllowShortLoopsOnASingleLine = false;
231 ChromiumStyle.BinPackParameters = false;
232 ChromiumStyle.DerivePointerBinding = false;
233 ChromiumStyle.Standard = FormatStyle::LS_Cpp03;
234 return ChromiumStyle;
235 }
236
getMozillaStyle()237 FormatStyle getMozillaStyle() {
238 FormatStyle MozillaStyle = getLLVMStyle();
239 MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false;
240 MozillaStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
241 MozillaStyle.DerivePointerBinding = true;
242 MozillaStyle.IndentCaseLabels = true;
243 MozillaStyle.ObjCSpaceBeforeProtocolList = false;
244 MozillaStyle.PenaltyReturnTypeOnItsOwnLine = 200;
245 MozillaStyle.PointerBindsToType = true;
246 return MozillaStyle;
247 }
248
getWebKitStyle()249 FormatStyle getWebKitStyle() {
250 FormatStyle Style = getLLVMStyle();
251 Style.AccessModifierOffset = -4;
252 Style.AlignTrailingComments = false;
253 Style.BreakBeforeBinaryOperators = true;
254 Style.BreakBeforeBraces = FormatStyle::BS_Stroustrup;
255 Style.BreakConstructorInitializersBeforeComma = true;
256 Style.ColumnLimit = 0;
257 Style.IndentWidth = 4;
258 Style.NamespaceIndentation = FormatStyle::NI_Inner;
259 Style.PointerBindsToType = true;
260 return Style;
261 }
262
getPredefinedStyle(StringRef Name,FormatStyle * Style)263 bool getPredefinedStyle(StringRef Name, FormatStyle *Style) {
264 if (Name.equals_lower("llvm"))
265 *Style = getLLVMStyle();
266 else if (Name.equals_lower("chromium"))
267 *Style = getChromiumStyle();
268 else if (Name.equals_lower("mozilla"))
269 *Style = getMozillaStyle();
270 else if (Name.equals_lower("google"))
271 *Style = getGoogleStyle();
272 else if (Name.equals_lower("webkit"))
273 *Style = getWebKitStyle();
274 else
275 return false;
276
277 return true;
278 }
279
parseConfiguration(StringRef Text,FormatStyle * Style)280 llvm::error_code parseConfiguration(StringRef Text, FormatStyle *Style) {
281 if (Text.trim().empty())
282 return llvm::make_error_code(llvm::errc::invalid_argument);
283 llvm::yaml::Input Input(Text);
284 Input >> *Style;
285 return Input.error();
286 }
287
configurationAsText(const FormatStyle & Style)288 std::string configurationAsText(const FormatStyle &Style) {
289 std::string Text;
290 llvm::raw_string_ostream Stream(Text);
291 llvm::yaml::Output Output(Stream);
292 // We use the same mapping method for input and output, so we need a non-const
293 // reference here.
294 FormatStyle NonConstStyle = Style;
295 Output << NonConstStyle;
296 return Stream.str();
297 }
298
299 // Returns the length of everything up to the first possible line break after
300 // the ), ], } or > matching \c Tok.
getLengthToMatchingParen(const FormatToken & Tok)301 static unsigned getLengthToMatchingParen(const FormatToken &Tok) {
302 if (Tok.MatchingParen == NULL)
303 return 0;
304 FormatToken *End = Tok.MatchingParen;
305 while (End->Next && !End->Next->CanBreakBefore) {
306 End = End->Next;
307 }
308 return End->TotalLength - Tok.TotalLength + 1;
309 }
310
311 namespace {
312
313 class UnwrappedLineFormatter {
314 public:
UnwrappedLineFormatter(const FormatStyle & Style,SourceManager & SourceMgr,const AnnotatedLine & Line,unsigned FirstIndent,const FormatToken * RootToken,WhitespaceManager & Whitespaces,encoding::Encoding Encoding,bool BinPackInconclusiveFunctions)315 UnwrappedLineFormatter(const FormatStyle &Style, SourceManager &SourceMgr,
316 const AnnotatedLine &Line, unsigned FirstIndent,
317 const FormatToken *RootToken,
318 WhitespaceManager &Whitespaces,
319 encoding::Encoding Encoding,
320 bool BinPackInconclusiveFunctions)
321 : Style(Style), SourceMgr(SourceMgr), Line(Line),
322 FirstIndent(FirstIndent), RootToken(RootToken),
323 Whitespaces(Whitespaces), Count(0), Encoding(Encoding),
324 BinPackInconclusiveFunctions(BinPackInconclusiveFunctions) {}
325
326 /// \brief Formats an \c UnwrappedLine.
format(const AnnotatedLine * NextLine)327 void format(const AnnotatedLine *NextLine) {
328 // Initialize state dependent on indent.
329 LineState State;
330 State.Column = FirstIndent;
331 State.NextToken = RootToken;
332 State.Stack.push_back(ParenState(FirstIndent, FirstIndent,
333 /*AvoidBinPacking=*/false,
334 /*NoLineBreak=*/false));
335 State.LineContainsContinuedForLoopSection = false;
336 State.ParenLevel = 0;
337 State.StartOfStringLiteral = 0;
338 State.StartOfLineLevel = State.ParenLevel;
339 State.LowestLevelOnLine = State.ParenLevel;
340 State.IgnoreStackForComparison = false;
341
342 // The first token has already been indented and thus consumed.
343 moveStateToNextToken(State, /*DryRun=*/false, /*Newline=*/false);
344
345 if (Style.ColumnLimit == 0) {
346 formatWithoutColumnLimit(State);
347 return;
348 }
349
350 // If everything fits on a single line, just put it there.
351 unsigned ColumnLimit = Style.ColumnLimit;
352 if (NextLine && NextLine->InPPDirective &&
353 !NextLine->First->HasUnescapedNewline)
354 ColumnLimit = getColumnLimit();
355 if (Line.Last->TotalLength <= ColumnLimit - FirstIndent) {
356 while (State.NextToken != NULL) {
357 addTokenToState(false, false, State);
358 }
359 }
360
361 // If the ObjC method declaration does not fit on a line, we should format
362 // it with one arg per line.
363 if (Line.Type == LT_ObjCMethodDecl)
364 State.Stack.back().BreakBeforeParameter = true;
365
366 // Find best solution in solution space.
367 analyzeSolutionSpace(State);
368 }
369
370 private:
DebugTokenState(const FormatToken & FormatTok)371 void DebugTokenState(const FormatToken &FormatTok) {
372 const Token &Tok = FormatTok.Tok;
373 llvm::dbgs() << StringRef(SourceMgr.getCharacterData(Tok.getLocation()),
374 Tok.getLength());
375 llvm::dbgs();
376 }
377
378 struct ParenState {
ParenStateclang::format::__anon7e6ce1c70111::UnwrappedLineFormatter::ParenState379 ParenState(unsigned Indent, unsigned LastSpace, bool AvoidBinPacking,
380 bool NoLineBreak)
381 : Indent(Indent), LastSpace(LastSpace), FirstLessLess(0),
382 BreakBeforeClosingBrace(false), QuestionColumn(0),
383 AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
384 NoLineBreak(NoLineBreak), ColonPos(0), StartOfFunctionCall(0),
385 StartOfArraySubscripts(0), NestedNameSpecifierContinuation(0),
386 CallContinuation(0), VariablePos(0), ContainsLineBreak(false) {}
387
388 /// \brief The position to which a specific parenthesis level needs to be
389 /// indented.
390 unsigned Indent;
391
392 /// \brief The position of the last space on each level.
393 ///
394 /// Used e.g. to break like:
395 /// functionCall(Parameter, otherCall(
396 /// OtherParameter));
397 unsigned LastSpace;
398
399 /// \brief The position the first "<<" operator encountered on each level.
400 ///
401 /// Used to align "<<" operators. 0 if no such operator has been encountered
402 /// on a level.
403 unsigned FirstLessLess;
404
405 /// \brief Whether a newline needs to be inserted before the block's closing
406 /// brace.
407 ///
408 /// We only want to insert a newline before the closing brace if there also
409 /// was a newline after the beginning left brace.
410 bool BreakBeforeClosingBrace;
411
412 /// \brief The column of a \c ? in a conditional expression;
413 unsigned QuestionColumn;
414
415 /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple
416 /// lines, in this context.
417 bool AvoidBinPacking;
418
419 /// \brief Break after the next comma (or all the commas in this context if
420 /// \c AvoidBinPacking is \c true).
421 bool BreakBeforeParameter;
422
423 /// \brief Line breaking in this context would break a formatting rule.
424 bool NoLineBreak;
425
426 /// \brief The position of the colon in an ObjC method declaration/call.
427 unsigned ColonPos;
428
429 /// \brief The start of the most recent function in a builder-type call.
430 unsigned StartOfFunctionCall;
431
432 /// \brief Contains the start of array subscript expressions, so that they
433 /// can be aligned.
434 unsigned StartOfArraySubscripts;
435
436 /// \brief If a nested name specifier was broken over multiple lines, this
437 /// contains the start column of the second line. Otherwise 0.
438 unsigned NestedNameSpecifierContinuation;
439
440 /// \brief If a call expression was broken over multiple lines, this
441 /// contains the start column of the second line. Otherwise 0.
442 unsigned CallContinuation;
443
444 /// \brief The column of the first variable name in a variable declaration.
445 ///
446 /// Used to align further variables if necessary.
447 unsigned VariablePos;
448
449 /// \brief \c true if this \c ParenState already contains a line-break.
450 ///
451 /// The first line break in a certain \c ParenState causes extra penalty so
452 /// that clang-format prefers similar breaks, i.e. breaks in the same
453 /// parenthesis.
454 bool ContainsLineBreak;
455
operator <clang::format::__anon7e6ce1c70111::UnwrappedLineFormatter::ParenState456 bool operator<(const ParenState &Other) const {
457 if (Indent != Other.Indent)
458 return Indent < Other.Indent;
459 if (LastSpace != Other.LastSpace)
460 return LastSpace < Other.LastSpace;
461 if (FirstLessLess != Other.FirstLessLess)
462 return FirstLessLess < Other.FirstLessLess;
463 if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
464 return BreakBeforeClosingBrace;
465 if (QuestionColumn != Other.QuestionColumn)
466 return QuestionColumn < Other.QuestionColumn;
467 if (AvoidBinPacking != Other.AvoidBinPacking)
468 return AvoidBinPacking;
469 if (BreakBeforeParameter != Other.BreakBeforeParameter)
470 return BreakBeforeParameter;
471 if (NoLineBreak != Other.NoLineBreak)
472 return NoLineBreak;
473 if (ColonPos != Other.ColonPos)
474 return ColonPos < Other.ColonPos;
475 if (StartOfFunctionCall != Other.StartOfFunctionCall)
476 return StartOfFunctionCall < Other.StartOfFunctionCall;
477 if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
478 return StartOfArraySubscripts < Other.StartOfArraySubscripts;
479 if (CallContinuation != Other.CallContinuation)
480 return CallContinuation < Other.CallContinuation;
481 if (VariablePos != Other.VariablePos)
482 return VariablePos < Other.VariablePos;
483 if (ContainsLineBreak != Other.ContainsLineBreak)
484 return ContainsLineBreak < Other.ContainsLineBreak;
485 return false;
486 }
487 };
488
489 /// \brief The current state when indenting a unwrapped line.
490 ///
491 /// As the indenting tries different combinations this is copied by value.
492 struct LineState {
493 /// \brief The number of used columns in the current line.
494 unsigned Column;
495
496 /// \brief The token that needs to be next formatted.
497 const FormatToken *NextToken;
498
499 /// \brief \c true if this line contains a continued for-loop section.
500 bool LineContainsContinuedForLoopSection;
501
502 /// \brief The level of nesting inside (), [], <> and {}.
503 unsigned ParenLevel;
504
505 /// \brief The \c ParenLevel at the start of this line.
506 unsigned StartOfLineLevel;
507
508 /// \brief The lowest \c ParenLevel on the current line.
509 unsigned LowestLevelOnLine;
510
511 /// \brief The start column of the string literal, if we're in a string
512 /// literal sequence, 0 otherwise.
513 unsigned StartOfStringLiteral;
514
515 /// \brief A stack keeping track of properties applying to parenthesis
516 /// levels.
517 std::vector<ParenState> Stack;
518
519 /// \brief Ignore the stack of \c ParenStates for state comparison.
520 ///
521 /// In long and deeply nested unwrapped lines, the current algorithm can
522 /// be insufficient for finding the best formatting with a reasonable amount
523 /// of time and memory. Setting this flag will effectively lead to the
524 /// algorithm not analyzing some combinations. However, these combinations
525 /// rarely contain the optimal solution: In short, accepting a higher
526 /// penalty early would need to lead to different values in the \c
527 /// ParenState stack (in an otherwise identical state) and these different
528 /// values would need to lead to a significant amount of avoided penalty
529 /// later.
530 ///
531 /// FIXME: Come up with a better algorithm instead.
532 bool IgnoreStackForComparison;
533
534 /// \brief Comparison operator to be able to used \c LineState in \c map.
operator <clang::format::__anon7e6ce1c70111::UnwrappedLineFormatter::LineState535 bool operator<(const LineState &Other) const {
536 if (NextToken != Other.NextToken)
537 return NextToken < Other.NextToken;
538 if (Column != Other.Column)
539 return Column < Other.Column;
540 if (LineContainsContinuedForLoopSection !=
541 Other.LineContainsContinuedForLoopSection)
542 return LineContainsContinuedForLoopSection;
543 if (ParenLevel != Other.ParenLevel)
544 return ParenLevel < Other.ParenLevel;
545 if (StartOfLineLevel != Other.StartOfLineLevel)
546 return StartOfLineLevel < Other.StartOfLineLevel;
547 if (LowestLevelOnLine != Other.LowestLevelOnLine)
548 return LowestLevelOnLine < Other.LowestLevelOnLine;
549 if (StartOfStringLiteral != Other.StartOfStringLiteral)
550 return StartOfStringLiteral < Other.StartOfStringLiteral;
551 if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
552 return false;
553 return Stack < Other.Stack;
554 }
555 };
556
557 /// \brief Formats the line starting at \p State, simply keeping all of the
558 /// input's line breaking decisions.
formatWithoutColumnLimit(LineState & State)559 void formatWithoutColumnLimit(LineState &State) {
560 while (State.NextToken != NULL) {
561 bool Newline = mustBreak(State) ||
562 (canBreak(State) && State.NextToken->NewlinesBefore > 0);
563 addTokenToState(Newline, /*DryRun=*/false, State);
564 }
565 }
566
567 /// \brief Appends the next token to \p State and updates information
568 /// necessary for indentation.
569 ///
570 /// Puts the token on the current line if \p Newline is \c false and adds a
571 /// line break and necessary indentation otherwise.
572 ///
573 /// If \p DryRun is \c false, also creates and stores the required
574 /// \c Replacement.
addTokenToState(bool Newline,bool DryRun,LineState & State)575 unsigned addTokenToState(bool Newline, bool DryRun, LineState &State) {
576 const FormatToken &Current = *State.NextToken;
577 const FormatToken &Previous = *State.NextToken->Previous;
578
579 // Extra penalty that needs to be added because of the way certain line
580 // breaks are chosen.
581 unsigned ExtraPenalty = 0;
582
583 if (State.Stack.size() == 0 || Current.Type == TT_ImplicitStringLiteral) {
584 // FIXME: Is this correct?
585 int WhitespaceLength = SourceMgr.getSpellingColumnNumber(
586 State.NextToken->WhitespaceRange.getEnd()) -
587 SourceMgr.getSpellingColumnNumber(
588 State.NextToken->WhitespaceRange.getBegin());
589 State.Column += WhitespaceLength + State.NextToken->CodePointCount;
590 State.NextToken = State.NextToken->Next;
591 return 0;
592 }
593
594 // If we are continuing an expression, we want to indent an extra 4 spaces.
595 unsigned ContinuationIndent =
596 std::max(State.Stack.back().LastSpace, State.Stack.back().Indent) + 4;
597 if (Newline) {
598 State.Stack.back().ContainsLineBreak = true;
599 if (Current.is(tok::r_brace)) {
600 if (Current.BlockKind == BK_BracedInit)
601 State.Column = State.Stack[State.Stack.size() - 2].LastSpace;
602 else
603 State.Column = FirstIndent;
604 } else if (Current.is(tok::string_literal) &&
605 State.StartOfStringLiteral != 0) {
606 State.Column = State.StartOfStringLiteral;
607 State.Stack.back().BreakBeforeParameter = true;
608 } else if (Current.is(tok::lessless) &&
609 State.Stack.back().FirstLessLess != 0) {
610 State.Column = State.Stack.back().FirstLessLess;
611 } else if (Current.isOneOf(tok::period, tok::arrow) &&
612 Current.Type != TT_DesignatedInitializerPeriod) {
613 if (State.Stack.back().CallContinuation == 0) {
614 State.Column = ContinuationIndent;
615 State.Stack.back().CallContinuation = State.Column;
616 } else {
617 State.Column = State.Stack.back().CallContinuation;
618 }
619 } else if (Current.Type == TT_ConditionalExpr) {
620 State.Column = State.Stack.back().QuestionColumn;
621 } else if (Previous.is(tok::comma) &&
622 State.Stack.back().VariablePos != 0) {
623 State.Column = State.Stack.back().VariablePos;
624 } else if (Previous.ClosesTemplateDeclaration ||
625 ((Current.Type == TT_StartOfName ||
626 Current.is(tok::kw_operator)) &&
627 State.ParenLevel == 0 &&
628 (!Style.IndentFunctionDeclarationAfterType ||
629 Line.StartsDefinition))) {
630 State.Column = State.Stack.back().Indent;
631 } else if (Current.Type == TT_ObjCSelectorName) {
632 if (State.Stack.back().ColonPos > Current.CodePointCount) {
633 State.Column = State.Stack.back().ColonPos - Current.CodePointCount;
634 } else {
635 State.Column = State.Stack.back().Indent;
636 State.Stack.back().ColonPos = State.Column + Current.CodePointCount;
637 }
638 } else if (Current.is(tok::l_square) &&
639 Current.Type != TT_ObjCMethodExpr) {
640 if (State.Stack.back().StartOfArraySubscripts != 0)
641 State.Column = State.Stack.back().StartOfArraySubscripts;
642 else
643 State.Column = ContinuationIndent;
644 } else if (Current.Type == TT_StartOfName ||
645 Previous.isOneOf(tok::coloncolon, tok::equal) ||
646 Previous.Type == TT_ObjCMethodExpr) {
647 State.Column = ContinuationIndent;
648 } else {
649 State.Column = State.Stack.back().Indent;
650 // Ensure that we fall back to indenting 4 spaces instead of just
651 // flushing continuations left.
652 if (State.Column == FirstIndent)
653 State.Column += 4;
654 }
655
656 if (Current.is(tok::question))
657 State.Stack.back().BreakBeforeParameter = true;
658 if ((Previous.isOneOf(tok::comma, tok::semi) &&
659 !State.Stack.back().AvoidBinPacking) ||
660 Previous.Type == TT_BinaryOperator)
661 State.Stack.back().BreakBeforeParameter = false;
662 if (Previous.Type == TT_TemplateCloser && State.ParenLevel == 0)
663 State.Stack.back().BreakBeforeParameter = false;
664
665 if (!DryRun) {
666 unsigned NewLines = 1;
667 if (Current.is(tok::comment))
668 NewLines = std::max(
669 NewLines,
670 std::min(Current.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1));
671 Whitespaces.replaceWhitespace(Current, NewLines, State.Column,
672 State.Column, Line.InPPDirective);
673 }
674
675 if (!Current.isTrailingComment())
676 State.Stack.back().LastSpace = State.Column;
677 if (Current.isOneOf(tok::arrow, tok::period) &&
678 Current.Type != TT_DesignatedInitializerPeriod)
679 State.Stack.back().LastSpace += Current.CodePointCount;
680 State.StartOfLineLevel = State.ParenLevel;
681 State.LowestLevelOnLine = State.ParenLevel;
682
683 // Any break on this level means that the parent level has been broken
684 // and we need to avoid bin packing there.
685 for (unsigned i = 0, e = State.Stack.size() - 1; i != e; ++i) {
686 State.Stack[i].BreakBeforeParameter = true;
687 }
688 const FormatToken *TokenBefore = Current.getPreviousNonComment();
689 if (TokenBefore && !TokenBefore->isOneOf(tok::comma, tok::semi) &&
690 TokenBefore->Type != TT_TemplateCloser &&
691 TokenBefore->Type != TT_BinaryOperator && !TokenBefore->opensScope())
692 State.Stack.back().BreakBeforeParameter = true;
693
694 // If we break after {, we should also break before the corresponding }.
695 if (Previous.is(tok::l_brace))
696 State.Stack.back().BreakBeforeClosingBrace = true;
697
698 if (State.Stack.back().AvoidBinPacking) {
699 // If we are breaking after '(', '{', '<', this is not bin packing
700 // unless AllowAllParametersOfDeclarationOnNextLine is false.
701 if (!(Previous.isOneOf(tok::l_paren, tok::l_brace) ||
702 Previous.Type == TT_BinaryOperator) ||
703 (!Style.AllowAllParametersOfDeclarationOnNextLine &&
704 Line.MustBeDeclaration))
705 State.Stack.back().BreakBeforeParameter = true;
706 }
707
708 // Breaking before the first "<<" is generally not desirable.
709 if (Current.is(tok::lessless) && State.Stack.back().FirstLessLess == 0)
710 ExtraPenalty += Style.PenaltyBreakFirstLessLess;
711
712 } else {
713 if (Current.is(tok::equal) &&
714 (RootToken->is(tok::kw_for) || State.ParenLevel == 0) &&
715 State.Stack.back().VariablePos == 0) {
716 State.Stack.back().VariablePos = State.Column;
717 // Move over * and & if they are bound to the variable name.
718 const FormatToken *Tok = &Previous;
719 while (Tok && State.Stack.back().VariablePos >= Tok->CodePointCount) {
720 State.Stack.back().VariablePos -= Tok->CodePointCount;
721 if (Tok->SpacesRequiredBefore != 0)
722 break;
723 Tok = Tok->Previous;
724 }
725 if (Previous.PartOfMultiVariableDeclStmt)
726 State.Stack.back().LastSpace = State.Stack.back().VariablePos;
727 }
728
729 unsigned Spaces = State.NextToken->SpacesRequiredBefore;
730
731 if (!DryRun)
732 Whitespaces.replaceWhitespace(Current, 0, Spaces,
733 State.Column + Spaces);
734
735 if (Current.Type == TT_ObjCSelectorName &&
736 State.Stack.back().ColonPos == 0) {
737 if (State.Stack.back().Indent + Current.LongestObjCSelectorName >
738 State.Column + Spaces + Current.CodePointCount)
739 State.Stack.back().ColonPos =
740 State.Stack.back().Indent + Current.LongestObjCSelectorName;
741 else
742 State.Stack.back().ColonPos =
743 State.Column + Spaces + Current.CodePointCount;
744 }
745
746 if (Previous.opensScope() && Previous.Type != TT_ObjCMethodExpr &&
747 Current.Type != TT_LineComment)
748 State.Stack.back().Indent = State.Column + Spaces;
749 if (Previous.is(tok::comma) && !Current.isTrailingComment() &&
750 State.Stack.back().AvoidBinPacking)
751 State.Stack.back().NoLineBreak = true;
752
753 State.Column += Spaces;
754 if (Current.is(tok::l_paren) && Previous.isOneOf(tok::kw_if, tok::kw_for))
755 // Treat the condition inside an if as if it was a second function
756 // parameter, i.e. let nested calls have an indent of 4.
757 State.Stack.back().LastSpace = State.Column + 1; // 1 is length of "(".
758 else if (Previous.is(tok::comma))
759 State.Stack.back().LastSpace = State.Column;
760 else if ((Previous.Type == TT_BinaryOperator ||
761 Previous.Type == TT_ConditionalExpr ||
762 Previous.Type == TT_CtorInitializerColon) &&
763 !(Previous.getPrecedence() == prec::Assignment &&
764 Current.FakeLParens.empty()))
765 // Always indent relative to the RHS of the expression unless this is a
766 // simple assignment without binary expression on the RHS.
767 State.Stack.back().LastSpace = State.Column;
768 else if (Previous.Type == TT_InheritanceColon)
769 State.Stack.back().Indent = State.Column;
770 else if (Previous.opensScope()) {
771 // If a function has multiple parameters (including a single parameter
772 // that is a binary expression) or a trailing call, indent all
773 // parameters from the opening parenthesis. This avoids confusing
774 // indents like:
775 // OuterFunction(InnerFunctionCall(
776 // ParameterToInnerFunction),
777 // SecondParameterToOuterFunction);
778 bool HasMultipleParameters = !Current.FakeLParens.empty();
779 bool HasTrailingCall = false;
780 if (Previous.MatchingParen) {
781 const FormatToken *Next = Previous.MatchingParen->getNextNonComment();
782 if (Next && Next->isOneOf(tok::period, tok::arrow))
783 HasTrailingCall = true;
784 }
785 if (HasMultipleParameters || HasTrailingCall)
786 State.Stack.back().LastSpace = State.Column;
787 }
788 }
789
790 return moveStateToNextToken(State, DryRun, Newline) + ExtraPenalty;
791 }
792
793 /// \brief Mark the next token as consumed in \p State and modify its stacks
794 /// accordingly.
moveStateToNextToken(LineState & State,bool DryRun,bool Newline)795 unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline) {
796 const FormatToken &Current = *State.NextToken;
797 assert(State.Stack.size());
798
799 if (Current.Type == TT_InheritanceColon)
800 State.Stack.back().AvoidBinPacking = true;
801 if (Current.is(tok::lessless) && State.Stack.back().FirstLessLess == 0)
802 State.Stack.back().FirstLessLess = State.Column;
803 if (Current.is(tok::l_square) &&
804 State.Stack.back().StartOfArraySubscripts == 0)
805 State.Stack.back().StartOfArraySubscripts = State.Column;
806 if (Current.is(tok::question))
807 State.Stack.back().QuestionColumn = State.Column;
808 if (!Current.opensScope() && !Current.closesScope())
809 State.LowestLevelOnLine =
810 std::min(State.LowestLevelOnLine, State.ParenLevel);
811 if (Current.isOneOf(tok::period, tok::arrow) &&
812 Line.Type == LT_BuilderTypeCall && State.ParenLevel == 0)
813 State.Stack.back().StartOfFunctionCall =
814 Current.LastInChainOfCalls ? 0
815 : State.Column + Current.CodePointCount;
816 if (Current.Type == TT_CtorInitializerColon) {
817 // Indent 2 from the column, so:
818 // SomeClass::SomeClass()
819 // : First(...), ...
820 // Next(...)
821 // ^ line up here.
822 if (!Style.BreakConstructorInitializersBeforeComma)
823 State.Stack.back().Indent = State.Column + 2;
824 if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine)
825 State.Stack.back().AvoidBinPacking = true;
826 State.Stack.back().BreakBeforeParameter = false;
827 }
828
829 // If return returns a binary expression, align after it.
830 if (Current.is(tok::kw_return) && !Current.FakeLParens.empty())
831 State.Stack.back().LastSpace = State.Column + 7;
832
833 // In ObjC method declaration we align on the ":" of parameters, but we need
834 // to ensure that we indent parameters on subsequent lines by at least 4.
835 if (Current.Type == TT_ObjCMethodSpecifier)
836 State.Stack.back().Indent += 4;
837
838 // Insert scopes created by fake parenthesis.
839 const FormatToken *Previous = Current.getPreviousNonComment();
840 // Don't add extra indentation for the first fake parenthesis after
841 // 'return', assignements or opening <({[. The indentation for these cases
842 // is special cased.
843 bool SkipFirstExtraIndent =
844 Current.is(tok::kw_return) ||
845 (Previous && (Previous->opensScope() ||
846 Previous->getPrecedence() == prec::Assignment));
847 for (SmallVectorImpl<prec::Level>::const_reverse_iterator
848 I = Current.FakeLParens.rbegin(),
849 E = Current.FakeLParens.rend();
850 I != E; ++I) {
851 ParenState NewParenState = State.Stack.back();
852 NewParenState.ContainsLineBreak = false;
853 NewParenState.Indent =
854 std::max(std::max(State.Column, NewParenState.Indent),
855 State.Stack.back().LastSpace);
856
857 // Always indent conditional expressions. Never indent expression where
858 // the 'operator' is ',', ';' or an assignment (i.e. *I <=
859 // prec::Assignment) as those have different indentation rules. Indent
860 // other expression, unless the indentation needs to be skipped.
861 if (*I == prec::Conditional ||
862 (!SkipFirstExtraIndent && *I > prec::Assignment &&
863 !Style.BreakBeforeBinaryOperators))
864 NewParenState.Indent += 4;
865 if (Previous && !Previous->opensScope())
866 NewParenState.BreakBeforeParameter = false;
867 State.Stack.push_back(NewParenState);
868 SkipFirstExtraIndent = false;
869 }
870
871 // If we encounter an opening (, [, { or <, we add a level to our stacks to
872 // prepare for the following tokens.
873 if (Current.opensScope()) {
874 unsigned NewIndent;
875 unsigned LastSpace = State.Stack.back().LastSpace;
876 bool AvoidBinPacking;
877 if (Current.is(tok::l_brace)) {
878 NewIndent =
879 LastSpace + (Style.Cpp11BracedListStyle ? 4 : Style.IndentWidth);
880 const FormatToken *NextNoComment = Current.getNextNonComment();
881 AvoidBinPacking = NextNoComment &&
882 NextNoComment->Type == TT_DesignatedInitializerPeriod;
883 } else {
884 NewIndent =
885 4 + std::max(LastSpace, State.Stack.back().StartOfFunctionCall);
886 AvoidBinPacking = !Style.BinPackParameters ||
887 (Style.ExperimentalAutoDetectBinPacking &&
888 (Current.PackingKind == PPK_OnePerLine ||
889 (!BinPackInconclusiveFunctions &&
890 Current.PackingKind == PPK_Inconclusive)));
891 }
892
893 State.Stack.push_back(ParenState(NewIndent, LastSpace, AvoidBinPacking,
894 State.Stack.back().NoLineBreak));
895 ++State.ParenLevel;
896 }
897
898 // If this '[' opens an ObjC call, determine whether all parameters fit into
899 // one line and put one per line if they don't.
900 if (Current.is(tok::l_square) && Current.Type == TT_ObjCMethodExpr &&
901 Current.MatchingParen != NULL) {
902 if (getLengthToMatchingParen(Current) + State.Column > getColumnLimit())
903 State.Stack.back().BreakBeforeParameter = true;
904 }
905
906 // If we encounter a closing ), ], } or >, we can remove a level from our
907 // stacks.
908 if (Current.isOneOf(tok::r_paren, tok::r_square) ||
909 (Current.is(tok::r_brace) && State.NextToken != RootToken) ||
910 State.NextToken->Type == TT_TemplateCloser) {
911 State.Stack.pop_back();
912 --State.ParenLevel;
913 }
914 if (Current.is(tok::r_square)) {
915 // If this ends the array subscript expr, reset the corresponding value.
916 const FormatToken *NextNonComment = Current.getNextNonComment();
917 if (NextNonComment && NextNonComment->isNot(tok::l_square))
918 State.Stack.back().StartOfArraySubscripts = 0;
919 }
920
921 // Remove scopes created by fake parenthesis.
922 for (unsigned i = 0, e = Current.FakeRParens; i != e; ++i) {
923 unsigned VariablePos = State.Stack.back().VariablePos;
924 State.Stack.pop_back();
925 State.Stack.back().VariablePos = VariablePos;
926 }
927
928 if (Current.is(tok::string_literal) && State.StartOfStringLiteral == 0) {
929 State.StartOfStringLiteral = State.Column;
930 } else if (!Current.isOneOf(tok::comment, tok::identifier, tok::hash,
931 tok::string_literal)) {
932 State.StartOfStringLiteral = 0;
933 }
934
935 State.Column += Current.CodePointCount;
936
937 State.NextToken = State.NextToken->Next;
938
939 if (!Newline && Style.AlwaysBreakBeforeMultilineStrings &&
940 Current.is(tok::string_literal) && Current.CanBreakBefore)
941 return 0;
942
943 return breakProtrudingToken(Current, State, DryRun);
944 }
945
946 /// \brief If the current token sticks out over the end of the line, break
947 /// it if possible.
948 ///
949 /// \returns An extra penalty if a token was broken, otherwise 0.
950 ///
951 /// The returned penalty will cover the cost of the additional line breaks and
952 /// column limit violation in all lines except for the last one. The penalty
953 /// for the column limit violation in the last line (and in single line
954 /// tokens) is handled in \c addNextStateToQueue.
breakProtrudingToken(const FormatToken & Current,LineState & State,bool DryRun)955 unsigned breakProtrudingToken(const FormatToken &Current, LineState &State,
956 bool DryRun) {
957 llvm::OwningPtr<BreakableToken> Token;
958 unsigned StartColumn = State.Column - Current.CodePointCount;
959 unsigned OriginalStartColumn =
960 SourceMgr.getSpellingColumnNumber(Current.getStartOfNonWhitespace()) -
961 1;
962
963 if (Current.is(tok::string_literal) &&
964 Current.Type != TT_ImplicitStringLiteral) {
965 // Only break up default narrow strings.
966 if (!Current.TokenText.startswith("\""))
967 return 0;
968 // Don't break string literals with escaped newlines. As clang-format must
969 // not change the string's content, it is unlikely that we'll end up with
970 // a better format.
971 if (Current.TokenText.find("\\\n") != StringRef::npos)
972 return 0;
973 // Exempts unterminated string literals from line breaking. The user will
974 // likely want to terminate the string before any line breaking is done.
975 if (Current.IsUnterminatedLiteral)
976 return 0;
977
978 Token.reset(new BreakableStringLiteral(Current, StartColumn,
979 Line.InPPDirective, Encoding));
980 } else if (Current.Type == TT_BlockComment && Current.isTrailingComment()) {
981 Token.reset(new BreakableBlockComment(
982 Style, Current, StartColumn, OriginalStartColumn, !Current.Previous,
983 Line.InPPDirective, Encoding));
984 } else if (Current.Type == TT_LineComment &&
985 (Current.Previous == NULL ||
986 Current.Previous->Type != TT_ImplicitStringLiteral)) {
987 // Don't break line comments with escaped newlines. These look like
988 // separate line comments, but in fact contain a single line comment with
989 // multiple lines including leading whitespace and the '//' markers.
990 //
991 // FIXME: If we want to handle them correctly, we'll need to adjust
992 // leading whitespace in consecutive lines when changing indentation of
993 // the first line similar to what we do with block comments.
994 StringRef::size_type EscapedNewlinePos = Current.TokenText.find("\\\n");
995 if (EscapedNewlinePos != StringRef::npos) {
996 State.Column =
997 StartColumn +
998 encoding::getCodePointCount(
999 Current.TokenText.substr(0, EscapedNewlinePos), Encoding) +
1000 1;
1001 return 0;
1002 }
1003
1004 Token.reset(new BreakableLineComment(Current, StartColumn,
1005 Line.InPPDirective, Encoding));
1006 } else {
1007 return 0;
1008 }
1009 if (Current.UnbreakableTailLength >= getColumnLimit())
1010 return 0;
1011
1012 unsigned RemainingSpace = getColumnLimit() - Current.UnbreakableTailLength;
1013 bool BreakInserted = false;
1014 unsigned Penalty = 0;
1015 unsigned RemainingTokenColumns = 0;
1016 for (unsigned LineIndex = 0, EndIndex = Token->getLineCount();
1017 LineIndex != EndIndex; ++LineIndex) {
1018 if (!DryRun)
1019 Token->replaceWhitespaceBefore(LineIndex, Whitespaces);
1020 unsigned TailOffset = 0;
1021 RemainingTokenColumns = Token->getLineLengthAfterSplit(
1022 LineIndex, TailOffset, StringRef::npos);
1023 while (RemainingTokenColumns > RemainingSpace) {
1024 BreakableToken::Split Split =
1025 Token->getSplit(LineIndex, TailOffset, getColumnLimit());
1026 if (Split.first == StringRef::npos) {
1027 // The last line's penalty is handled in addNextStateToQueue().
1028 if (LineIndex < EndIndex - 1)
1029 Penalty += Style.PenaltyExcessCharacter *
1030 (RemainingTokenColumns - RemainingSpace);
1031 break;
1032 }
1033 assert(Split.first != 0);
1034 unsigned NewRemainingTokenColumns = Token->getLineLengthAfterSplit(
1035 LineIndex, TailOffset + Split.first + Split.second,
1036 StringRef::npos);
1037 assert(NewRemainingTokenColumns < RemainingTokenColumns);
1038 if (!DryRun)
1039 Token->insertBreak(LineIndex, TailOffset, Split, Whitespaces);
1040 Penalty += Current.is(tok::string_literal) ? Style.PenaltyBreakString
1041 : Style.PenaltyBreakComment;
1042 unsigned ColumnsUsed =
1043 Token->getLineLengthAfterSplit(LineIndex, TailOffset, Split.first);
1044 if (ColumnsUsed > getColumnLimit()) {
1045 Penalty +=
1046 Style.PenaltyExcessCharacter * (ColumnsUsed - getColumnLimit());
1047 }
1048 TailOffset += Split.first + Split.second;
1049 RemainingTokenColumns = NewRemainingTokenColumns;
1050 BreakInserted = true;
1051 }
1052 }
1053
1054 State.Column = RemainingTokenColumns;
1055
1056 if (BreakInserted) {
1057 // If we break the token inside a parameter list, we need to break before
1058 // the next parameter on all levels, so that the next parameter is clearly
1059 // visible. Line comments already introduce a break.
1060 if (Current.Type != TT_LineComment) {
1061 for (unsigned i = 0, e = State.Stack.size(); i != e; ++i)
1062 State.Stack[i].BreakBeforeParameter = true;
1063 }
1064
1065 State.Stack.back().LastSpace = StartColumn;
1066 }
1067 return Penalty;
1068 }
1069
getColumnLimit()1070 unsigned getColumnLimit() {
1071 // In preprocessor directives reserve two chars for trailing " \"
1072 return Style.ColumnLimit - (Line.InPPDirective ? 2 : 0);
1073 }
1074
1075 /// \brief An edge in the solution space from \c Previous->State to \c State,
1076 /// inserting a newline dependent on the \c NewLine.
1077 struct StateNode {
StateNodeclang::format::__anon7e6ce1c70111::UnwrappedLineFormatter::StateNode1078 StateNode(const LineState &State, bool NewLine, StateNode *Previous)
1079 : State(State), NewLine(NewLine), Previous(Previous) {}
1080 LineState State;
1081 bool NewLine;
1082 StateNode *Previous;
1083 };
1084
1085 /// \brief A pair of <penalty, count> that is used to prioritize the BFS on.
1086 ///
1087 /// In case of equal penalties, we want to prefer states that were inserted
1088 /// first. During state generation we make sure that we insert states first
1089 /// that break the line as late as possible.
1090 typedef std::pair<unsigned, unsigned> OrderedPenalty;
1091
1092 /// \brief An item in the prioritized BFS search queue. The \c StateNode's
1093 /// \c State has the given \c OrderedPenalty.
1094 typedef std::pair<OrderedPenalty, StateNode *> QueueItem;
1095
1096 /// \brief The BFS queue type.
1097 typedef std::priority_queue<QueueItem, std::vector<QueueItem>,
1098 std::greater<QueueItem> > QueueType;
1099
1100 /// \brief Analyze the entire solution space starting from \p InitialState.
1101 ///
1102 /// This implements a variant of Dijkstra's algorithm on the graph that spans
1103 /// the solution space (\c LineStates are the nodes). The algorithm tries to
1104 /// find the shortest path (the one with lowest penalty) from \p InitialState
1105 /// to a state where all tokens are placed.
analyzeSolutionSpace(LineState & InitialState)1106 void analyzeSolutionSpace(LineState &InitialState) {
1107 std::set<LineState> Seen;
1108
1109 // Insert start element into queue.
1110 StateNode *Node =
1111 new (Allocator.Allocate()) StateNode(InitialState, false, NULL);
1112 Queue.push(QueueItem(OrderedPenalty(0, Count), Node));
1113 ++Count;
1114
1115 // While not empty, take first element and follow edges.
1116 while (!Queue.empty()) {
1117 unsigned Penalty = Queue.top().first.first;
1118 StateNode *Node = Queue.top().second;
1119 if (Node->State.NextToken == NULL) {
1120 DEBUG(llvm::dbgs() << "\n---\nPenalty for line: " << Penalty << "\n");
1121 break;
1122 }
1123 Queue.pop();
1124
1125 // Cut off the analysis of certain solutions if the analysis gets too
1126 // complex. See description of IgnoreStackForComparison.
1127 if (Count > 10000)
1128 Node->State.IgnoreStackForComparison = true;
1129
1130 if (!Seen.insert(Node->State).second)
1131 // State already examined with lower penalty.
1132 continue;
1133
1134 addNextStateToQueue(Penalty, Node, /*NewLine=*/false);
1135 addNextStateToQueue(Penalty, Node, /*NewLine=*/true);
1136 }
1137
1138 if (Queue.empty())
1139 // We were unable to find a solution, do nothing.
1140 // FIXME: Add diagnostic?
1141 return;
1142
1143 // Reconstruct the solution.
1144 reconstructPath(InitialState, Queue.top().second);
1145 DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count << "\n");
1146 DEBUG(llvm::dbgs() << "---\n");
1147 }
1148
reconstructPath(LineState & State,StateNode * Current)1149 void reconstructPath(LineState &State, StateNode *Current) {
1150 std::deque<StateNode *> Path;
1151 // We do not need a break before the initial token.
1152 while (Current->Previous) {
1153 Path.push_front(Current);
1154 Current = Current->Previous;
1155 }
1156 for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end();
1157 I != E; ++I) {
1158 DEBUG({
1159 if ((*I)->NewLine) {
1160 llvm::dbgs() << "Penalty for splitting before "
1161 << (*I)->Previous->State.NextToken->Tok.getName() << ": "
1162 << (*I)->Previous->State.NextToken->SplitPenalty << "\n";
1163 }
1164 });
1165 addTokenToState((*I)->NewLine, false, State);
1166 }
1167 }
1168
1169 /// \brief Add the following state to the analysis queue \c Queue.
1170 ///
1171 /// Assume the current state is \p PreviousNode and has been reached with a
1172 /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true.
addNextStateToQueue(unsigned Penalty,StateNode * PreviousNode,bool NewLine)1173 void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode,
1174 bool NewLine) {
1175 if (NewLine && !canBreak(PreviousNode->State))
1176 return;
1177 if (!NewLine && mustBreak(PreviousNode->State))
1178 return;
1179 if (NewLine) {
1180 if (!PreviousNode->State.Stack.back().ContainsLineBreak)
1181 Penalty += 15;
1182 Penalty += PreviousNode->State.NextToken->SplitPenalty;
1183 }
1184
1185 StateNode *Node = new (Allocator.Allocate())
1186 StateNode(PreviousNode->State, NewLine, PreviousNode);
1187 Penalty += addTokenToState(NewLine, true, Node->State);
1188 if (Node->State.Column > getColumnLimit()) {
1189 unsigned ExcessCharacters = Node->State.Column - getColumnLimit();
1190 Penalty += Style.PenaltyExcessCharacter * ExcessCharacters;
1191 }
1192
1193 Queue.push(QueueItem(OrderedPenalty(Penalty, Count), Node));
1194 ++Count;
1195 }
1196
1197 /// \brief Returns \c true, if a line break after \p State is allowed.
canBreak(const LineState & State)1198 bool canBreak(const LineState &State) {
1199 const FormatToken &Current = *State.NextToken;
1200 const FormatToken &Previous = *Current.Previous;
1201 assert(&Previous == Current.Previous);
1202 if (!Current.CanBreakBefore &&
1203 !(Current.is(tok::r_brace) &&
1204 State.Stack.back().BreakBeforeClosingBrace))
1205 return false;
1206 // The opening "{" of a braced list has to be on the same line as the first
1207 // element if it is nested in another braced init list or function call.
1208 if (!Current.MustBreakBefore && Previous.is(tok::l_brace) &&
1209 Previous.Previous &&
1210 Previous.Previous->isOneOf(tok::l_brace, tok::l_paren, tok::comma))
1211 return false;
1212 // This prevents breaks like:
1213 // ...
1214 // SomeParameter, OtherParameter).DoSomething(
1215 // ...
1216 // As they hide "DoSomething" and are generally bad for readability.
1217 if (Previous.opensScope() &&
1218 State.LowestLevelOnLine < State.StartOfLineLevel)
1219 return false;
1220 return !State.Stack.back().NoLineBreak;
1221 }
1222
1223 /// \brief Returns \c true, if a line break after \p State is mandatory.
mustBreak(const LineState & State)1224 bool mustBreak(const LineState &State) {
1225 const FormatToken &Current = *State.NextToken;
1226 const FormatToken &Previous = *Current.Previous;
1227 if (Current.MustBreakBefore || Current.Type == TT_InlineASMColon)
1228 return true;
1229 if (!Style.Cpp11BracedListStyle && Current.is(tok::r_brace) &&
1230 State.Stack.back().BreakBeforeClosingBrace)
1231 return true;
1232 if (Previous.is(tok::semi) && State.LineContainsContinuedForLoopSection)
1233 return true;
1234 if (Style.BreakConstructorInitializersBeforeComma) {
1235 if (Previous.Type == TT_CtorInitializerComma)
1236 return false;
1237 if (Current.Type == TT_CtorInitializerComma)
1238 return true;
1239 }
1240 if ((Previous.isOneOf(tok::comma, tok::semi) || Current.is(tok::question) ||
1241 (Current.Type == TT_ConditionalExpr &&
1242 !(Current.is(tok::colon) && Previous.is(tok::question)))) &&
1243 State.Stack.back().BreakBeforeParameter &&
1244 !Current.isTrailingComment() &&
1245 !Current.isOneOf(tok::r_paren, tok::r_brace))
1246 return true;
1247 if (Style.AlwaysBreakBeforeMultilineStrings &&
1248 State.Column > State.Stack.back().Indent &&
1249 Current.is(tok::string_literal) && Previous.isNot(tok::lessless) &&
1250 Previous.Type != TT_InlineASMColon &&
1251 ((Current.getNextNonComment() &&
1252 Current.getNextNonComment()->is(tok::string_literal)) ||
1253 (Current.TokenText.find("\\\n") != StringRef::npos)))
1254 return true;
1255
1256 if (!Style.BreakBeforeBinaryOperators) {
1257 // If we need to break somewhere inside the LHS of a binary expression, we
1258 // should also break after the operator. Otherwise, the formatting would
1259 // hide the operator precedence, e.g. in:
1260 // if (aaaaaaaaaaaaaa ==
1261 // bbbbbbbbbbbbbb && c) {..
1262 // For comparisons, we only apply this rule, if the LHS is a binary
1263 // expression itself as otherwise, the line breaks seem superfluous.
1264 // We need special cases for ">>" which we have split into two ">" while
1265 // lexing in order to make template parsing easier.
1266 //
1267 // FIXME: We'll need something similar for styles that break before binary
1268 // operators.
1269 bool IsComparison = (Previous.getPrecedence() == prec::Relational ||
1270 Previous.getPrecedence() == prec::Equality) &&
1271 Previous.Previous && Previous.Previous->Type !=
1272 TT_BinaryOperator; // For >>.
1273 bool LHSIsBinaryExpr =
1274 Previous.Previous && Previous.Previous->FakeRParens > 0;
1275 if (Previous.Type == TT_BinaryOperator &&
1276 (!IsComparison || LHSIsBinaryExpr) &&
1277 Current.Type != TT_BinaryOperator && // For >>.
1278 !Current.isTrailingComment() &&
1279 !Previous.isOneOf(tok::lessless, tok::question) &&
1280 Previous.getPrecedence() != prec::Assignment &&
1281 State.Stack.back().BreakBeforeParameter)
1282 return true;
1283 }
1284
1285 // Same as above, but for the first "<<" operator.
1286 if (Current.is(tok::lessless) && State.Stack.back().BreakBeforeParameter &&
1287 State.Stack.back().FirstLessLess == 0)
1288 return true;
1289
1290 // FIXME: Comparing LongestObjCSelectorName to 0 is a hacky way of finding
1291 // out whether it is the first parameter. Clean this up.
1292 if (Current.Type == TT_ObjCSelectorName &&
1293 Current.LongestObjCSelectorName == 0 &&
1294 State.Stack.back().BreakBeforeParameter)
1295 return true;
1296 if ((Current.Type == TT_CtorInitializerColon ||
1297 (Previous.ClosesTemplateDeclaration && State.ParenLevel == 0)))
1298 return true;
1299
1300 if ((Current.Type == TT_StartOfName || Current.is(tok::kw_operator)) &&
1301 Line.MightBeFunctionDecl && State.Stack.back().BreakBeforeParameter &&
1302 State.ParenLevel == 0)
1303 return true;
1304 return false;
1305 }
1306
1307 // Returns the total number of columns required for the remaining tokens.
getRemainingLength(const LineState & State)1308 unsigned getRemainingLength(const LineState &State) {
1309 if (State.NextToken && State.NextToken->Previous)
1310 return Line.Last->TotalLength - State.NextToken->Previous->TotalLength;
1311 return 0;
1312 }
1313
1314 FormatStyle Style;
1315 SourceManager &SourceMgr;
1316 const AnnotatedLine &Line;
1317 const unsigned FirstIndent;
1318 const FormatToken *RootToken;
1319 WhitespaceManager &Whitespaces;
1320
1321 llvm::SpecificBumpPtrAllocator<StateNode> Allocator;
1322 QueueType Queue;
1323 // Increasing count of \c StateNode items we have created. This is used
1324 // to create a deterministic order independent of the container.
1325 unsigned Count;
1326 encoding::Encoding Encoding;
1327 bool BinPackInconclusiveFunctions;
1328 };
1329
1330 class FormatTokenLexer {
1331 public:
FormatTokenLexer(Lexer & Lex,SourceManager & SourceMgr,encoding::Encoding Encoding)1332 FormatTokenLexer(Lexer &Lex, SourceManager &SourceMgr,
1333 encoding::Encoding Encoding)
1334 : FormatTok(NULL), GreaterStashed(false), TrailingWhitespace(0), Lex(Lex),
1335 SourceMgr(SourceMgr), IdentTable(getFormattingLangOpts()),
1336 Encoding(Encoding) {
1337 Lex.SetKeepWhitespaceMode(true);
1338 }
1339
lex()1340 ArrayRef<FormatToken *> lex() {
1341 assert(Tokens.empty());
1342 do {
1343 Tokens.push_back(getNextToken());
1344 } while (Tokens.back()->Tok.isNot(tok::eof));
1345 return Tokens;
1346 }
1347
getIdentTable()1348 IdentifierTable &getIdentTable() { return IdentTable; }
1349
1350 private:
getNextToken()1351 FormatToken *getNextToken() {
1352 if (GreaterStashed) {
1353 // Create a synthesized second '>' token.
1354 Token Greater = FormatTok->Tok;
1355 FormatTok = new (Allocator.Allocate()) FormatToken;
1356 FormatTok->Tok = Greater;
1357 SourceLocation GreaterLocation =
1358 FormatTok->Tok.getLocation().getLocWithOffset(1);
1359 FormatTok->WhitespaceRange =
1360 SourceRange(GreaterLocation, GreaterLocation);
1361 FormatTok->TokenText = ">";
1362 FormatTok->CodePointCount = 1;
1363 GreaterStashed = false;
1364 return FormatTok;
1365 }
1366
1367 FormatTok = new (Allocator.Allocate()) FormatToken;
1368 readRawToken(*FormatTok);
1369 SourceLocation WhitespaceStart =
1370 FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace);
1371 if (SourceMgr.getFileOffset(WhitespaceStart) == 0)
1372 FormatTok->IsFirst = true;
1373
1374 // Consume and record whitespace until we find a significant token.
1375 unsigned WhitespaceLength = TrailingWhitespace;
1376 while (FormatTok->Tok.is(tok::unknown)) {
1377 unsigned Newlines = FormatTok->TokenText.count('\n');
1378 if (Newlines > 0)
1379 FormatTok->LastNewlineOffset =
1380 WhitespaceLength + FormatTok->TokenText.rfind('\n') + 1;
1381 FormatTok->NewlinesBefore += Newlines;
1382 unsigned EscapedNewlines = FormatTok->TokenText.count("\\\n");
1383 FormatTok->HasUnescapedNewline |= EscapedNewlines != Newlines;
1384 WhitespaceLength += FormatTok->Tok.getLength();
1385
1386 readRawToken(*FormatTok);
1387 }
1388
1389 // In case the token starts with escaped newlines, we want to
1390 // take them into account as whitespace - this pattern is quite frequent
1391 // in macro definitions.
1392 // FIXME: What do we want to do with other escaped spaces, and escaped
1393 // spaces or newlines in the middle of tokens?
1394 // FIXME: Add a more explicit test.
1395 while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' &&
1396 FormatTok->TokenText[1] == '\n') {
1397 // FIXME: ++FormatTok->NewlinesBefore is missing...
1398 WhitespaceLength += 2;
1399 FormatTok->TokenText = FormatTok->TokenText.substr(2);
1400 }
1401
1402 TrailingWhitespace = 0;
1403 if (FormatTok->Tok.is(tok::comment)) {
1404 StringRef UntrimmedText = FormatTok->TokenText;
1405 FormatTok->TokenText = FormatTok->TokenText.rtrim();
1406 TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size();
1407 } else if (FormatTok->Tok.is(tok::raw_identifier)) {
1408 IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText);
1409 FormatTok->Tok.setIdentifierInfo(&Info);
1410 FormatTok->Tok.setKind(Info.getTokenID());
1411 } else if (FormatTok->Tok.is(tok::greatergreater)) {
1412 FormatTok->Tok.setKind(tok::greater);
1413 FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
1414 GreaterStashed = true;
1415 }
1416
1417 // Now FormatTok is the next non-whitespace token.
1418 FormatTok->CodePointCount =
1419 encoding::getCodePointCount(FormatTok->TokenText, Encoding);
1420
1421 FormatTok->WhitespaceRange = SourceRange(
1422 WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
1423 return FormatTok;
1424 }
1425
1426 FormatToken *FormatTok;
1427 bool GreaterStashed;
1428 unsigned TrailingWhitespace;
1429 Lexer &Lex;
1430 SourceManager &SourceMgr;
1431 IdentifierTable IdentTable;
1432 encoding::Encoding Encoding;
1433 llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
1434 SmallVector<FormatToken *, 16> Tokens;
1435
readRawToken(FormatToken & Tok)1436 void readRawToken(FormatToken &Tok) {
1437 Lex.LexFromRawLexer(Tok.Tok);
1438 Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
1439 Tok.Tok.getLength());
1440
1441 // For formatting, treat unterminated string literals like normal string
1442 // literals.
1443 if (Tok.is(tok::unknown) && !Tok.TokenText.empty() &&
1444 Tok.TokenText[0] == '"') {
1445 Tok.Tok.setKind(tok::string_literal);
1446 Tok.IsUnterminatedLiteral = true;
1447 }
1448 }
1449 };
1450
1451 class Formatter : public UnwrappedLineConsumer {
1452 public:
Formatter(const FormatStyle & Style,Lexer & Lex,SourceManager & SourceMgr,const std::vector<CharSourceRange> & Ranges)1453 Formatter(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr,
1454 const std::vector<CharSourceRange> &Ranges)
1455 : Style(Style), Lex(Lex), SourceMgr(SourceMgr),
1456 Whitespaces(SourceMgr, Style), Ranges(Ranges),
1457 Encoding(encoding::detectEncoding(Lex.getBuffer())) {
1458 DEBUG(llvm::dbgs() << "File encoding: "
1459 << (Encoding == encoding::Encoding_UTF8 ? "UTF8"
1460 : "unknown")
1461 << "\n");
1462 }
1463
~Formatter()1464 virtual ~Formatter() {}
1465
format()1466 tooling::Replacements format() {
1467 FormatTokenLexer Tokens(Lex, SourceMgr, Encoding);
1468
1469 UnwrappedLineParser Parser(Style, Tokens.lex(), *this);
1470 bool StructuralError = Parser.parse();
1471 TokenAnnotator Annotator(Style, Tokens.getIdentTable().get("in"));
1472 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1473 Annotator.annotate(AnnotatedLines[i]);
1474 }
1475 deriveLocalStyle();
1476 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1477 Annotator.calculateFormattingInformation(AnnotatedLines[i]);
1478 }
1479
1480 // Adapt level to the next line if this is a comment.
1481 // FIXME: Can/should this be done in the UnwrappedLineParser?
1482 const AnnotatedLine *NextNonCommentLine = NULL;
1483 for (unsigned i = AnnotatedLines.size() - 1; i > 0; --i) {
1484 if (NextNonCommentLine && AnnotatedLines[i].First->is(tok::comment) &&
1485 !AnnotatedLines[i].First->Next)
1486 AnnotatedLines[i].Level = NextNonCommentLine->Level;
1487 else
1488 NextNonCommentLine = AnnotatedLines[i].First->isNot(tok::r_brace)
1489 ? &AnnotatedLines[i]
1490 : NULL;
1491 }
1492
1493 std::vector<int> IndentForLevel;
1494 bool PreviousLineWasTouched = false;
1495 const FormatToken *PreviousLineLastToken = 0;
1496 bool FormatPPDirective = false;
1497 for (std::vector<AnnotatedLine>::iterator I = AnnotatedLines.begin(),
1498 E = AnnotatedLines.end();
1499 I != E; ++I) {
1500 const AnnotatedLine &TheLine = *I;
1501 const FormatToken *FirstTok = TheLine.First;
1502 int Offset = getIndentOffset(*TheLine.First);
1503
1504 // Check whether this line is part of a formatted preprocessor directive.
1505 if (FirstTok->HasUnescapedNewline)
1506 FormatPPDirective = false;
1507 if (!FormatPPDirective && TheLine.InPPDirective &&
1508 (touchesLine(TheLine) || touchesPPDirective(I + 1, E)))
1509 FormatPPDirective = true;
1510
1511 // Determine indent and try to merge multiple unwrapped lines.
1512 while (IndentForLevel.size() <= TheLine.Level)
1513 IndentForLevel.push_back(-1);
1514 IndentForLevel.resize(TheLine.Level + 1);
1515 unsigned Indent = getIndent(IndentForLevel, TheLine.Level);
1516 if (static_cast<int>(Indent) + Offset >= 0)
1517 Indent += Offset;
1518 tryFitMultipleLinesInOne(Indent, I, E);
1519
1520 bool WasMoved = PreviousLineWasTouched && FirstTok->NewlinesBefore == 0;
1521 if (TheLine.First->is(tok::eof)) {
1522 if (PreviousLineWasTouched) {
1523 unsigned NewLines = std::min(FirstTok->NewlinesBefore, 1u);
1524 Whitespaces.replaceWhitespace(*TheLine.First, NewLines, /*Indent*/ 0,
1525 /*TargetColumn*/ 0);
1526 }
1527 } else if (TheLine.Type != LT_Invalid &&
1528 (WasMoved || FormatPPDirective || touchesLine(TheLine))) {
1529 unsigned LevelIndent = getIndent(IndentForLevel, TheLine.Level);
1530 if (FirstTok->WhitespaceRange.isValid() &&
1531 // Insert a break even if there is a structural error in case where
1532 // we break apart a line consisting of multiple unwrapped lines.
1533 (FirstTok->NewlinesBefore == 0 || !StructuralError)) {
1534 formatFirstToken(*TheLine.First, PreviousLineLastToken, Indent,
1535 TheLine.InPPDirective);
1536 } else {
1537 Indent = LevelIndent =
1538 SourceMgr.getSpellingColumnNumber(FirstTok->Tok.getLocation()) -
1539 1;
1540 }
1541 UnwrappedLineFormatter Formatter(Style, SourceMgr, TheLine, Indent,
1542 TheLine.First, Whitespaces, Encoding,
1543 BinPackInconclusiveFunctions);
1544 Formatter.format(I + 1 != E ? &*(I + 1) : NULL);
1545 IndentForLevel[TheLine.Level] = LevelIndent;
1546 PreviousLineWasTouched = true;
1547 } else {
1548 // Format the first token if necessary, and notify the WhitespaceManager
1549 // about the unchanged whitespace.
1550 for (const FormatToken *Tok = TheLine.First; Tok != NULL;
1551 Tok = Tok->Next) {
1552 if (Tok == TheLine.First &&
1553 (Tok->NewlinesBefore > 0 || Tok->IsFirst)) {
1554 unsigned LevelIndent =
1555 SourceMgr.getSpellingColumnNumber(Tok->Tok.getLocation()) - 1;
1556 // Remove trailing whitespace of the previous line if it was
1557 // touched.
1558 if (PreviousLineWasTouched || touchesEmptyLineBefore(TheLine)) {
1559 formatFirstToken(*Tok, PreviousLineLastToken, LevelIndent,
1560 TheLine.InPPDirective);
1561 } else {
1562 Whitespaces.addUntouchableToken(*Tok, TheLine.InPPDirective);
1563 }
1564
1565 if (static_cast<int>(LevelIndent) - Offset >= 0)
1566 LevelIndent -= Offset;
1567 if (Tok->isNot(tok::comment))
1568 IndentForLevel[TheLine.Level] = LevelIndent;
1569 } else {
1570 Whitespaces.addUntouchableToken(*Tok, TheLine.InPPDirective);
1571 }
1572 }
1573 // If we did not reformat this unwrapped line, the column at the end of
1574 // the last token is unchanged - thus, we can calculate the end of the
1575 // last token.
1576 PreviousLineWasTouched = false;
1577 }
1578 PreviousLineLastToken = I->Last;
1579 }
1580 return Whitespaces.generateReplacements();
1581 }
1582
1583 private:
deriveLocalStyle()1584 void deriveLocalStyle() {
1585 unsigned CountBoundToVariable = 0;
1586 unsigned CountBoundToType = 0;
1587 bool HasCpp03IncompatibleFormat = false;
1588 bool HasBinPackedFunction = false;
1589 bool HasOnePerLineFunction = false;
1590 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1591 if (!AnnotatedLines[i].First->Next)
1592 continue;
1593 FormatToken *Tok = AnnotatedLines[i].First->Next;
1594 while (Tok->Next) {
1595 if (Tok->Type == TT_PointerOrReference) {
1596 bool SpacesBefore =
1597 Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd();
1598 bool SpacesAfter = Tok->Next->WhitespaceRange.getBegin() !=
1599 Tok->Next->WhitespaceRange.getEnd();
1600 if (SpacesBefore && !SpacesAfter)
1601 ++CountBoundToVariable;
1602 else if (!SpacesBefore && SpacesAfter)
1603 ++CountBoundToType;
1604 }
1605
1606 if (Tok->Type == TT_TemplateCloser &&
1607 Tok->Previous->Type == TT_TemplateCloser &&
1608 Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd())
1609 HasCpp03IncompatibleFormat = true;
1610
1611 if (Tok->PackingKind == PPK_BinPacked)
1612 HasBinPackedFunction = true;
1613 if (Tok->PackingKind == PPK_OnePerLine)
1614 HasOnePerLineFunction = true;
1615
1616 Tok = Tok->Next;
1617 }
1618 }
1619 if (Style.DerivePointerBinding) {
1620 if (CountBoundToType > CountBoundToVariable)
1621 Style.PointerBindsToType = true;
1622 else if (CountBoundToType < CountBoundToVariable)
1623 Style.PointerBindsToType = false;
1624 }
1625 if (Style.Standard == FormatStyle::LS_Auto) {
1626 Style.Standard = HasCpp03IncompatibleFormat ? FormatStyle::LS_Cpp11
1627 : FormatStyle::LS_Cpp03;
1628 }
1629 BinPackInconclusiveFunctions =
1630 HasBinPackedFunction || !HasOnePerLineFunction;
1631 }
1632
1633 /// \brief Get the indent of \p Level from \p IndentForLevel.
1634 ///
1635 /// \p IndentForLevel must contain the indent for the level \c l
1636 /// at \p IndentForLevel[l], or a value < 0 if the indent for
1637 /// that level is unknown.
getIndent(const std::vector<int> IndentForLevel,unsigned Level)1638 unsigned getIndent(const std::vector<int> IndentForLevel, unsigned Level) {
1639 if (IndentForLevel[Level] != -1)
1640 return IndentForLevel[Level];
1641 if (Level == 0)
1642 return 0;
1643 return getIndent(IndentForLevel, Level - 1) + Style.IndentWidth;
1644 }
1645
1646 /// \brief Get the offset of the line relatively to the level.
1647 ///
1648 /// For example, 'public:' labels in classes are offset by 1 or 2
1649 /// characters to the left from their level.
getIndentOffset(const FormatToken & RootToken)1650 int getIndentOffset(const FormatToken &RootToken) {
1651 if (RootToken.isAccessSpecifier(false) || RootToken.isObjCAccessSpecifier())
1652 return Style.AccessModifierOffset;
1653 return 0;
1654 }
1655
1656 /// \brief Tries to merge lines into one.
1657 ///
1658 /// This will change \c Line and \c AnnotatedLine to contain the merged line,
1659 /// if possible; note that \c I will be incremented when lines are merged.
tryFitMultipleLinesInOne(unsigned Indent,std::vector<AnnotatedLine>::iterator & I,std::vector<AnnotatedLine>::iterator E)1660 void tryFitMultipleLinesInOne(unsigned Indent,
1661 std::vector<AnnotatedLine>::iterator &I,
1662 std::vector<AnnotatedLine>::iterator E) {
1663 // We can never merge stuff if there are trailing line comments.
1664 if (I->Last->Type == TT_LineComment)
1665 return;
1666
1667 if (Indent > Style.ColumnLimit)
1668 return;
1669
1670 unsigned Limit = Style.ColumnLimit - Indent;
1671 // If we already exceed the column limit, we set 'Limit' to 0. The different
1672 // tryMerge..() functions can then decide whether to still do merging.
1673 Limit = I->Last->TotalLength > Limit ? 0 : Limit - I->Last->TotalLength;
1674
1675 if (I + 1 == E || (I + 1)->Type == LT_Invalid)
1676 return;
1677
1678 if (I->Last->is(tok::l_brace)) {
1679 tryMergeSimpleBlock(I, E, Limit);
1680 } else if (Style.AllowShortIfStatementsOnASingleLine &&
1681 I->First->is(tok::kw_if)) {
1682 tryMergeSimpleControlStatement(I, E, Limit);
1683 } else if (Style.AllowShortLoopsOnASingleLine &&
1684 I->First->isOneOf(tok::kw_for, tok::kw_while)) {
1685 tryMergeSimpleControlStatement(I, E, Limit);
1686 } else if (I->InPPDirective &&
1687 (I->First->HasUnescapedNewline || I->First->IsFirst)) {
1688 tryMergeSimplePPDirective(I, E, Limit);
1689 }
1690 }
1691
tryMergeSimplePPDirective(std::vector<AnnotatedLine>::iterator & I,std::vector<AnnotatedLine>::iterator E,unsigned Limit)1692 void tryMergeSimplePPDirective(std::vector<AnnotatedLine>::iterator &I,
1693 std::vector<AnnotatedLine>::iterator E,
1694 unsigned Limit) {
1695 if (Limit == 0)
1696 return;
1697 AnnotatedLine &Line = *I;
1698 if (!(I + 1)->InPPDirective || (I + 1)->First->HasUnescapedNewline)
1699 return;
1700 if (I + 2 != E && (I + 2)->InPPDirective &&
1701 !(I + 2)->First->HasUnescapedNewline)
1702 return;
1703 if (1 + (I + 1)->Last->TotalLength > Limit)
1704 return;
1705 join(Line, *(++I));
1706 }
1707
tryMergeSimpleControlStatement(std::vector<AnnotatedLine>::iterator & I,std::vector<AnnotatedLine>::iterator E,unsigned Limit)1708 void tryMergeSimpleControlStatement(std::vector<AnnotatedLine>::iterator &I,
1709 std::vector<AnnotatedLine>::iterator E,
1710 unsigned Limit) {
1711 if (Limit == 0)
1712 return;
1713 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman &&
1714 (I + 1)->First->is(tok::l_brace))
1715 return;
1716 if ((I + 1)->InPPDirective != I->InPPDirective ||
1717 ((I + 1)->InPPDirective && (I + 1)->First->HasUnescapedNewline))
1718 return;
1719 AnnotatedLine &Line = *I;
1720 if (Line.Last->isNot(tok::r_paren))
1721 return;
1722 if (1 + (I + 1)->Last->TotalLength > Limit)
1723 return;
1724 if ((I + 1)->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for,
1725 tok::kw_while) ||
1726 (I + 1)->First->Type == TT_LineComment)
1727 return;
1728 // Only inline simple if's (no nested if or else).
1729 if (I + 2 != E && Line.First->is(tok::kw_if) &&
1730 (I + 2)->First->is(tok::kw_else))
1731 return;
1732 join(Line, *(++I));
1733 }
1734
tryMergeSimpleBlock(std::vector<AnnotatedLine>::iterator & I,std::vector<AnnotatedLine>::iterator E,unsigned Limit)1735 void tryMergeSimpleBlock(std::vector<AnnotatedLine>::iterator &I,
1736 std::vector<AnnotatedLine>::iterator E,
1737 unsigned Limit) {
1738 // No merging if the brace already is on the next line.
1739 if (Style.BreakBeforeBraces != FormatStyle::BS_Attach)
1740 return;
1741
1742 // First, check that the current line allows merging. This is the case if
1743 // we're not in a control flow statement and the last token is an opening
1744 // brace.
1745 AnnotatedLine &Line = *I;
1746 if (Line.First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::r_brace,
1747 tok::kw_else, tok::kw_try, tok::kw_catch,
1748 tok::kw_for,
1749 // This gets rid of all ObjC @ keywords and methods.
1750 tok::at, tok::minus, tok::plus))
1751 return;
1752
1753 FormatToken *Tok = (I + 1)->First;
1754 if (Tok->is(tok::r_brace) && !Tok->MustBreakBefore &&
1755 (Tok->getNextNonComment() == NULL ||
1756 Tok->getNextNonComment()->is(tok::semi))) {
1757 // We merge empty blocks even if the line exceeds the column limit.
1758 Tok->SpacesRequiredBefore = 0;
1759 Tok->CanBreakBefore = true;
1760 join(Line, *(I + 1));
1761 I += 1;
1762 } else if (Limit != 0 && Line.First->isNot(tok::kw_namespace)) {
1763 // Check that we still have three lines and they fit into the limit.
1764 if (I + 2 == E || (I + 2)->Type == LT_Invalid ||
1765 !nextTwoLinesFitInto(I, Limit))
1766 return;
1767
1768 // Second, check that the next line does not contain any braces - if it
1769 // does, readability declines when putting it into a single line.
1770 if ((I + 1)->Last->Type == TT_LineComment || Tok->MustBreakBefore)
1771 return;
1772 do {
1773 if (Tok->isOneOf(tok::l_brace, tok::r_brace))
1774 return;
1775 Tok = Tok->Next;
1776 } while (Tok != NULL);
1777
1778 // Last, check that the third line contains a single closing brace.
1779 Tok = (I + 2)->First;
1780 if (Tok->getNextNonComment() != NULL || Tok->isNot(tok::r_brace) ||
1781 Tok->MustBreakBefore)
1782 return;
1783
1784 join(Line, *(I + 1));
1785 join(Line, *(I + 2));
1786 I += 2;
1787 }
1788 }
1789
nextTwoLinesFitInto(std::vector<AnnotatedLine>::iterator I,unsigned Limit)1790 bool nextTwoLinesFitInto(std::vector<AnnotatedLine>::iterator I,
1791 unsigned Limit) {
1792 return 1 + (I + 1)->Last->TotalLength + 1 + (I + 2)->Last->TotalLength <=
1793 Limit;
1794 }
1795
join(AnnotatedLine & A,const AnnotatedLine & B)1796 void join(AnnotatedLine &A, const AnnotatedLine &B) {
1797 assert(!A.Last->Next);
1798 assert(!B.First->Previous);
1799 A.Last->Next = B.First;
1800 B.First->Previous = A.Last;
1801 unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore;
1802 for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) {
1803 Tok->TotalLength += LengthA;
1804 A.Last = Tok;
1805 }
1806 }
1807
touchesRanges(const CharSourceRange & Range)1808 bool touchesRanges(const CharSourceRange &Range) {
1809 for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
1810 if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(),
1811 Ranges[i].getBegin()) &&
1812 !SourceMgr.isBeforeInTranslationUnit(Ranges[i].getEnd(),
1813 Range.getBegin()))
1814 return true;
1815 }
1816 return false;
1817 }
1818
touchesLine(const AnnotatedLine & TheLine)1819 bool touchesLine(const AnnotatedLine &TheLine) {
1820 const FormatToken *First = TheLine.First;
1821 const FormatToken *Last = TheLine.Last;
1822 CharSourceRange LineRange = CharSourceRange::getCharRange(
1823 First->WhitespaceRange.getBegin().getLocWithOffset(
1824 First->LastNewlineOffset),
1825 Last->Tok.getLocation().getLocWithOffset(Last->TokenText.size() - 1));
1826 return touchesRanges(LineRange);
1827 }
1828
touchesPPDirective(std::vector<AnnotatedLine>::iterator I,std::vector<AnnotatedLine>::iterator E)1829 bool touchesPPDirective(std::vector<AnnotatedLine>::iterator I,
1830 std::vector<AnnotatedLine>::iterator E) {
1831 for (; I != E; ++I) {
1832 if (I->First->HasUnescapedNewline)
1833 return false;
1834 if (touchesLine(*I))
1835 return true;
1836 }
1837 return false;
1838 }
1839
touchesEmptyLineBefore(const AnnotatedLine & TheLine)1840 bool touchesEmptyLineBefore(const AnnotatedLine &TheLine) {
1841 const FormatToken *First = TheLine.First;
1842 CharSourceRange LineRange = CharSourceRange::getCharRange(
1843 First->WhitespaceRange.getBegin(),
1844 First->WhitespaceRange.getBegin().getLocWithOffset(
1845 First->LastNewlineOffset));
1846 return touchesRanges(LineRange);
1847 }
1848
consumeUnwrappedLine(const UnwrappedLine & TheLine)1849 virtual void consumeUnwrappedLine(const UnwrappedLine &TheLine) {
1850 AnnotatedLines.push_back(AnnotatedLine(TheLine));
1851 }
1852
1853 /// \brief Add a new line and the required indent before the first Token
1854 /// of the \c UnwrappedLine if there was no structural parsing error.
1855 /// Returns the indent level of the \c UnwrappedLine.
formatFirstToken(const FormatToken & RootToken,const FormatToken * PreviousToken,unsigned Indent,bool InPPDirective)1856 void formatFirstToken(const FormatToken &RootToken,
1857 const FormatToken *PreviousToken, unsigned Indent,
1858 bool InPPDirective) {
1859 unsigned Newlines =
1860 std::min(RootToken.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1);
1861 // Remove empty lines before "}" where applicable.
1862 if (RootToken.is(tok::r_brace) &&
1863 (!RootToken.Next ||
1864 (RootToken.Next->is(tok::semi) && !RootToken.Next->Next)))
1865 Newlines = std::min(Newlines, 1u);
1866 if (Newlines == 0 && !RootToken.IsFirst)
1867 Newlines = 1;
1868
1869 // Insert extra new line before access specifiers.
1870 if (PreviousToken && PreviousToken->isOneOf(tok::semi, tok::r_brace) &&
1871 RootToken.isAccessSpecifier() && RootToken.NewlinesBefore == 1)
1872 ++Newlines;
1873
1874 Whitespaces.replaceWhitespace(
1875 RootToken, Newlines, Indent, Indent,
1876 InPPDirective && !RootToken.HasUnescapedNewline);
1877 }
1878
1879 FormatStyle Style;
1880 Lexer &Lex;
1881 SourceManager &SourceMgr;
1882 WhitespaceManager Whitespaces;
1883 std::vector<CharSourceRange> Ranges;
1884 std::vector<AnnotatedLine> AnnotatedLines;
1885
1886 encoding::Encoding Encoding;
1887 bool BinPackInconclusiveFunctions;
1888 };
1889
1890 } // end anonymous namespace
1891
reformat(const FormatStyle & Style,Lexer & Lex,SourceManager & SourceMgr,std::vector<CharSourceRange> Ranges)1892 tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex,
1893 SourceManager &SourceMgr,
1894 std::vector<CharSourceRange> Ranges) {
1895 Formatter formatter(Style, Lex, SourceMgr, Ranges);
1896 return formatter.format();
1897 }
1898
reformat(const FormatStyle & Style,StringRef Code,std::vector<tooling::Range> Ranges,StringRef FileName)1899 tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
1900 std::vector<tooling::Range> Ranges,
1901 StringRef FileName) {
1902 FileManager Files((FileSystemOptions()));
1903 DiagnosticsEngine Diagnostics(
1904 IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
1905 new DiagnosticOptions);
1906 SourceManager SourceMgr(Diagnostics, Files);
1907 llvm::MemoryBuffer *Buf = llvm::MemoryBuffer::getMemBuffer(Code, FileName);
1908 const clang::FileEntry *Entry =
1909 Files.getVirtualFile(FileName, Buf->getBufferSize(), 0);
1910 SourceMgr.overrideFileContents(Entry, Buf);
1911 FileID ID =
1912 SourceMgr.createFileID(Entry, SourceLocation(), clang::SrcMgr::C_User);
1913 Lexer Lex(ID, SourceMgr.getBuffer(ID), SourceMgr,
1914 getFormattingLangOpts(Style.Standard));
1915 SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID);
1916 std::vector<CharSourceRange> CharRanges;
1917 for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
1918 SourceLocation Start = StartOfFile.getLocWithOffset(Ranges[i].getOffset());
1919 SourceLocation End = Start.getLocWithOffset(Ranges[i].getLength());
1920 CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
1921 }
1922 return reformat(Style, Lex, SourceMgr, CharRanges);
1923 }
1924
getFormattingLangOpts(FormatStyle::LanguageStandard Standard)1925 LangOptions getFormattingLangOpts(FormatStyle::LanguageStandard Standard) {
1926 LangOptions LangOpts;
1927 LangOpts.CPlusPlus = 1;
1928 LangOpts.CPlusPlus11 = Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
1929 LangOpts.LineComment = 1;
1930 LangOpts.Bool = 1;
1931 LangOpts.ObjC1 = 1;
1932 LangOpts.ObjC2 = 1;
1933 return LangOpts;
1934 }
1935
1936 } // namespace format
1937 } // namespace clang
1938