• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements an indenter that manages the indentation of
12 /// continuations.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef LLVM_CLANG_FORMAT_CONTINUATION_INDENTER_H
17 #define LLVM_CLANG_FORMAT_CONTINUATION_INDENTER_H
18 
19 #include "Encoding.h"
20 #include "clang/Format/Format.h"
21 #include "llvm/Support/Regex.h"
22 
23 namespace clang {
24 class SourceManager;
25 
26 namespace format {
27 
28 class AnnotatedLine;
29 struct FormatToken;
30 struct LineState;
31 struct ParenState;
32 class WhitespaceManager;
33 
34 class ContinuationIndenter {
35 public:
36   /// \brief Constructs a \c ContinuationIndenter to format \p Line starting in
37   /// column \p FirstIndent.
38   ContinuationIndenter(const FormatStyle &Style, SourceManager &SourceMgr,
39                        WhitespaceManager &Whitespaces,
40                        encoding::Encoding Encoding,
41                        bool BinPackInconclusiveFunctions);
42 
43   /// \brief Get the initial state, i.e. the state after placing \p Line's
44   /// first token at \p FirstIndent.
45   LineState getInitialState(unsigned FirstIndent, const AnnotatedLine *Line,
46                             bool DryRun);
47 
48   // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
49   // better home.
50   /// \brief Returns \c true, if a line break after \p State is allowed.
51   bool canBreak(const LineState &State);
52 
53   /// \brief Returns \c true, if a line break after \p State is mandatory.
54   bool mustBreak(const LineState &State);
55 
56   /// \brief Appends the next token to \p State and updates information
57   /// necessary for indentation.
58   ///
59   /// Puts the token on the current line if \p Newline is \c false and adds a
60   /// line break and necessary indentation otherwise.
61   ///
62   /// If \p DryRun is \c false, also creates and stores the required
63   /// \c Replacement.
64   unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
65                            unsigned ExtraSpaces = 0);
66 
67   /// \brief Get the column limit for this line. This is the style's column
68   /// limit, potentially reduced for preprocessor definitions.
69   unsigned getColumnLimit(const LineState &State) const;
70 
71 private:
72   /// \brief Mark the next token as consumed in \p State and modify its stacks
73   /// accordingly.
74   unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
75 
76   /// \brief Update 'State' according to the next token's fake left parentheses.
77   void moveStatePastFakeLParens(LineState &State, bool Newline);
78   /// \brief Update 'State' according to the next token's fake r_parens.
79   void moveStatePastFakeRParens(LineState &State);
80 
81   /// \brief Update 'State' according to the next token being one of "(<{[".
82   void moveStatePastScopeOpener(LineState &State, bool Newline);
83   /// \brief Update 'State' according to the next token being one of ")>}]".
84   void moveStatePastScopeCloser(LineState &State);
85   /// \brief Update 'State' with the next token opening a nested block.
86   void moveStateToNewBlock(LineState &State);
87 
88   /// \brief If the current token sticks out over the end of the line, break
89   /// it if possible.
90   ///
91   /// \returns An extra penalty if a token was broken, otherwise 0.
92   ///
93   /// The returned penalty will cover the cost of the additional line breaks and
94   /// column limit violation in all lines except for the last one. The penalty
95   /// for the column limit violation in the last line (and in single line
96   /// tokens) is handled in \c addNextStateToQueue.
97   unsigned breakProtrudingToken(const FormatToken &Current, LineState &State,
98                                 bool DryRun);
99 
100   /// \brief Appends the next token to \p State and updates information
101   /// necessary for indentation.
102   ///
103   /// Puts the token on the current line.
104   ///
105   /// If \p DryRun is \c false, also creates and stores the required
106   /// \c Replacement.
107   void addTokenOnCurrentLine(LineState &State, bool DryRun,
108                              unsigned ExtraSpaces);
109 
110   /// \brief Appends the next token to \p State and updates information
111   /// necessary for indentation.
112   ///
113   /// Adds a line break and necessary indentation.
114   ///
115   /// If \p DryRun is \c false, also creates and stores the required
116   /// \c Replacement.
117   unsigned addTokenOnNewLine(LineState &State, bool DryRun);
118 
119   /// \brief Calculate the new column for a line wrap before the next token.
120   unsigned getNewLineColumn(const LineState &State);
121 
122   /// \brief Adds a multiline token to the \p State.
123   ///
124   /// \returns Extra penalty for the first line of the literal: last line is
125   /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
126   /// matter, as we don't change them.
127   unsigned addMultilineToken(const FormatToken &Current, LineState &State);
128 
129   /// \brief Returns \c true if the next token starts a multiline string
130   /// literal.
131   ///
132   /// This includes implicitly concatenated strings, strings that will be broken
133   /// by clang-format and string literals with escaped newlines.
134   bool nextIsMultilineString(const LineState &State);
135 
136   FormatStyle Style;
137   SourceManager &SourceMgr;
138   WhitespaceManager &Whitespaces;
139   encoding::Encoding Encoding;
140   bool BinPackInconclusiveFunctions;
141   llvm::Regex CommentPragmasRegex;
142 };
143 
144 struct ParenState {
ParenStateParenState145   ParenState(unsigned Indent, unsigned IndentLevel, unsigned LastSpace,
146              bool AvoidBinPacking, bool NoLineBreak)
147       : Indent(Indent), IndentLevel(IndentLevel), LastSpace(LastSpace),
148         FirstLessLess(0), BreakBeforeClosingBrace(false), QuestionColumn(0),
149         AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
150         NoLineBreak(NoLineBreak), LastOperatorWrapped(true), ColonPos(0),
151         StartOfFunctionCall(0), StartOfArraySubscripts(0),
152         NestedNameSpecifierContinuation(0), CallContinuation(0), VariablePos(0),
153         ContainsLineBreak(false), ContainsUnwrappedBuilder(0),
154         AlignColons(true), ObjCSelectorNameFound(false),
155         HasMultipleNestedBlocks(false), JSFunctionInlined(false) {}
156 
157   /// \brief The position to which a specific parenthesis level needs to be
158   /// indented.
159   unsigned Indent;
160 
161   /// \brief The number of indentation levels of the block.
162   unsigned IndentLevel;
163 
164   /// \brief The position of the last space on each level.
165   ///
166   /// Used e.g. to break like:
167   /// functionCall(Parameter, otherCall(
168   ///                             OtherParameter));
169   unsigned LastSpace;
170 
171   /// \brief The position the first "<<" operator encountered on each level.
172   ///
173   /// Used to align "<<" operators. 0 if no such operator has been encountered
174   /// on a level.
175   unsigned FirstLessLess;
176 
177   /// \brief Whether a newline needs to be inserted before the block's closing
178   /// brace.
179   ///
180   /// We only want to insert a newline before the closing brace if there also
181   /// was a newline after the beginning left brace.
182   bool BreakBeforeClosingBrace;
183 
184   /// \brief The column of a \c ? in a conditional expression;
185   unsigned QuestionColumn;
186 
187   /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple
188   /// lines, in this context.
189   bool AvoidBinPacking;
190 
191   /// \brief Break after the next comma (or all the commas in this context if
192   /// \c AvoidBinPacking is \c true).
193   bool BreakBeforeParameter;
194 
195   /// \brief Line breaking in this context would break a formatting rule.
196   bool NoLineBreak;
197 
198   /// \brief True if the last binary operator on this level was wrapped to the
199   /// next line.
200   bool LastOperatorWrapped;
201 
202   /// \brief The position of the colon in an ObjC method declaration/call.
203   unsigned ColonPos;
204 
205   /// \brief The start of the most recent function in a builder-type call.
206   unsigned StartOfFunctionCall;
207 
208   /// \brief Contains the start of array subscript expressions, so that they
209   /// can be aligned.
210   unsigned StartOfArraySubscripts;
211 
212   /// \brief If a nested name specifier was broken over multiple lines, this
213   /// contains the start column of the second line. Otherwise 0.
214   unsigned NestedNameSpecifierContinuation;
215 
216   /// \brief If a call expression was broken over multiple lines, this
217   /// contains the start column of the second line. Otherwise 0.
218   unsigned CallContinuation;
219 
220   /// \brief The column of the first variable name in a variable declaration.
221   ///
222   /// Used to align further variables if necessary.
223   unsigned VariablePos;
224 
225   /// \brief \c true if this \c ParenState already contains a line-break.
226   ///
227   /// The first line break in a certain \c ParenState causes extra penalty so
228   /// that clang-format prefers similar breaks, i.e. breaks in the same
229   /// parenthesis.
230   bool ContainsLineBreak;
231 
232   /// \brief \c true if this \c ParenState contains multiple segments of a
233   /// builder-type call on one line.
234   bool ContainsUnwrappedBuilder;
235 
236   /// \brief \c true if the colons of the curren ObjC method expression should
237   /// be aligned.
238   ///
239   /// Not considered for memoization as it will always have the same value at
240   /// the same token.
241   bool AlignColons;
242 
243   /// \brief \c true if at least one selector name was found in the current
244   /// ObjC method expression.
245   ///
246   /// Not considered for memoization as it will always have the same value at
247   /// the same token.
248   bool ObjCSelectorNameFound;
249 
250   /// \brief \c true if there are multiple nested blocks inside these parens.
251   ///
252   /// Not considered for memoization as it will always have the same value at
253   /// the same token.
254   bool HasMultipleNestedBlocks;
255 
256   // \brief The previous JavaScript 'function' keyword is not wrapped to a new
257   // line.
258   bool JSFunctionInlined;
259 
260   bool operator<(const ParenState &Other) const {
261     if (Indent != Other.Indent)
262       return Indent < Other.Indent;
263     if (LastSpace != Other.LastSpace)
264       return LastSpace < Other.LastSpace;
265     if (FirstLessLess != Other.FirstLessLess)
266       return FirstLessLess < Other.FirstLessLess;
267     if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
268       return BreakBeforeClosingBrace;
269     if (QuestionColumn != Other.QuestionColumn)
270       return QuestionColumn < Other.QuestionColumn;
271     if (AvoidBinPacking != Other.AvoidBinPacking)
272       return AvoidBinPacking;
273     if (BreakBeforeParameter != Other.BreakBeforeParameter)
274       return BreakBeforeParameter;
275     if (NoLineBreak != Other.NoLineBreak)
276       return NoLineBreak;
277     if (LastOperatorWrapped != Other.LastOperatorWrapped)
278       return LastOperatorWrapped;
279     if (ColonPos != Other.ColonPos)
280       return ColonPos < Other.ColonPos;
281     if (StartOfFunctionCall != Other.StartOfFunctionCall)
282       return StartOfFunctionCall < Other.StartOfFunctionCall;
283     if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
284       return StartOfArraySubscripts < Other.StartOfArraySubscripts;
285     if (CallContinuation != Other.CallContinuation)
286       return CallContinuation < Other.CallContinuation;
287     if (VariablePos != Other.VariablePos)
288       return VariablePos < Other.VariablePos;
289     if (ContainsLineBreak != Other.ContainsLineBreak)
290       return ContainsLineBreak < Other.ContainsLineBreak;
291     if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
292       return ContainsUnwrappedBuilder < Other.ContainsUnwrappedBuilder;
293     if (JSFunctionInlined != Other.JSFunctionInlined)
294       return JSFunctionInlined < Other.JSFunctionInlined;
295     return false;
296   }
297 };
298 
299 /// \brief The current state when indenting a unwrapped line.
300 ///
301 /// As the indenting tries different combinations this is copied by value.
302 struct LineState {
303   /// \brief The number of used columns in the current line.
304   unsigned Column;
305 
306   /// \brief The token that needs to be next formatted.
307   FormatToken *NextToken;
308 
309   /// \brief \c true if this line contains a continued for-loop section.
310   bool LineContainsContinuedForLoopSection;
311 
312   /// \brief The \c NestingLevel at the start of this line.
313   unsigned StartOfLineLevel;
314 
315   /// \brief The lowest \c NestingLevel on the current line.
316   unsigned LowestLevelOnLine;
317 
318   /// \brief The start column of the string literal, if we're in a string
319   /// literal sequence, 0 otherwise.
320   unsigned StartOfStringLiteral;
321 
322   /// \brief A stack keeping track of properties applying to parenthesis
323   /// levels.
324   std::vector<ParenState> Stack;
325 
326   /// \brief Ignore the stack of \c ParenStates for state comparison.
327   ///
328   /// In long and deeply nested unwrapped lines, the current algorithm can
329   /// be insufficient for finding the best formatting with a reasonable amount
330   /// of time and memory. Setting this flag will effectively lead to the
331   /// algorithm not analyzing some combinations. However, these combinations
332   /// rarely contain the optimal solution: In short, accepting a higher
333   /// penalty early would need to lead to different values in the \c
334   /// ParenState stack (in an otherwise identical state) and these different
335   /// values would need to lead to a significant amount of avoided penalty
336   /// later.
337   ///
338   /// FIXME: Come up with a better algorithm instead.
339   bool IgnoreStackForComparison;
340 
341   /// \brief The indent of the first token.
342   unsigned FirstIndent;
343 
344   /// \brief The line that is being formatted.
345   ///
346   /// Does not need to be considered for memoization because it doesn't change.
347   const AnnotatedLine *Line;
348 
349   /// \brief Comparison operator to be able to used \c LineState in \c map.
350   bool operator<(const LineState &Other) const {
351     if (NextToken != Other.NextToken)
352       return NextToken < Other.NextToken;
353     if (Column != Other.Column)
354       return Column < Other.Column;
355     if (LineContainsContinuedForLoopSection !=
356         Other.LineContainsContinuedForLoopSection)
357       return LineContainsContinuedForLoopSection;
358     if (StartOfLineLevel != Other.StartOfLineLevel)
359       return StartOfLineLevel < Other.StartOfLineLevel;
360     if (LowestLevelOnLine != Other.LowestLevelOnLine)
361       return LowestLevelOnLine < Other.LowestLevelOnLine;
362     if (StartOfStringLiteral != Other.StartOfStringLiteral)
363       return StartOfStringLiteral < Other.StartOfStringLiteral;
364     if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
365       return false;
366     return Stack < Other.Stack;
367   }
368 };
369 
370 } // end namespace format
371 } // end namespace clang
372 
373 #endif // LLVM_CLANG_FORMAT_CONTINUATION_INDENTER_H
374