1 //===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file implements an indenter that manages the indentation of 12 /// continuations. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H 17 #define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H 18 19 #include "Encoding.h" 20 #include "FormatToken.h" 21 #include "clang/Format/Format.h" 22 #include "llvm/Support/Regex.h" 23 24 namespace clang { 25 class SourceManager; 26 27 namespace format { 28 29 class AnnotatedLine; 30 struct FormatToken; 31 struct LineState; 32 struct ParenState; 33 class WhitespaceManager; 34 35 class ContinuationIndenter { 36 public: 37 /// \brief Constructs a \c ContinuationIndenter to format \p Line starting in 38 /// column \p FirstIndent. 39 ContinuationIndenter(const FormatStyle &Style, 40 const AdditionalKeywords &Keywords, 41 SourceManager &SourceMgr, WhitespaceManager &Whitespaces, 42 encoding::Encoding Encoding, 43 bool BinPackInconclusiveFunctions); 44 45 /// \brief Get the initial state, i.e. the state after placing \p Line's 46 /// first token at \p FirstIndent. 47 LineState getInitialState(unsigned FirstIndent, const AnnotatedLine *Line, 48 bool DryRun); 49 50 // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a 51 // better home. 52 /// \brief Returns \c true, if a line break after \p State is allowed. 53 bool canBreak(const LineState &State); 54 55 /// \brief Returns \c true, if a line break after \p State is mandatory. 56 bool mustBreak(const LineState &State); 57 58 /// \brief Appends the next token to \p State and updates information 59 /// necessary for indentation. 60 /// 61 /// Puts the token on the current line if \p Newline is \c false and adds a 62 /// line break and necessary indentation otherwise. 63 /// 64 /// If \p DryRun is \c false, also creates and stores the required 65 /// \c Replacement. 66 unsigned addTokenToState(LineState &State, bool Newline, bool DryRun, 67 unsigned ExtraSpaces = 0); 68 69 /// \brief Get the column limit for this line. This is the style's column 70 /// limit, potentially reduced for preprocessor definitions. 71 unsigned getColumnLimit(const LineState &State) const; 72 73 private: 74 /// \brief Mark the next token as consumed in \p State and modify its stacks 75 /// accordingly. 76 unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline); 77 78 /// \brief Update 'State' according to the next token's fake left parentheses. 79 void moveStatePastFakeLParens(LineState &State, bool Newline); 80 /// \brief Update 'State' according to the next token's fake r_parens. 81 void moveStatePastFakeRParens(LineState &State); 82 83 /// \brief Update 'State' according to the next token being one of "(<{[". 84 void moveStatePastScopeOpener(LineState &State, bool Newline); 85 /// \brief Update 'State' according to the next token being one of ")>}]". 86 void moveStatePastScopeCloser(LineState &State); 87 /// \brief Update 'State' with the next token opening a nested block. 88 void moveStateToNewBlock(LineState &State); 89 90 /// \brief If the current token sticks out over the end of the line, break 91 /// it if possible. 92 /// 93 /// \returns An extra penalty if a token was broken, otherwise 0. 94 /// 95 /// The returned penalty will cover the cost of the additional line breaks and 96 /// column limit violation in all lines except for the last one. The penalty 97 /// for the column limit violation in the last line (and in single line 98 /// tokens) is handled in \c addNextStateToQueue. 99 unsigned breakProtrudingToken(const FormatToken &Current, LineState &State, 100 bool DryRun); 101 102 /// \brief Appends the next token to \p State and updates information 103 /// necessary for indentation. 104 /// 105 /// Puts the token on the current line. 106 /// 107 /// If \p DryRun is \c false, also creates and stores the required 108 /// \c Replacement. 109 void addTokenOnCurrentLine(LineState &State, bool DryRun, 110 unsigned ExtraSpaces); 111 112 /// \brief Appends the next token to \p State and updates information 113 /// necessary for indentation. 114 /// 115 /// Adds a line break and necessary indentation. 116 /// 117 /// If \p DryRun is \c false, also creates and stores the required 118 /// \c Replacement. 119 unsigned addTokenOnNewLine(LineState &State, bool DryRun); 120 121 /// \brief Calculate the new column for a line wrap before the next token. 122 unsigned getNewLineColumn(const LineState &State); 123 124 /// \brief Adds a multiline token to the \p State. 125 /// 126 /// \returns Extra penalty for the first line of the literal: last line is 127 /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't 128 /// matter, as we don't change them. 129 unsigned addMultilineToken(const FormatToken &Current, LineState &State); 130 131 /// \brief Returns \c true if the next token starts a multiline string 132 /// literal. 133 /// 134 /// This includes implicitly concatenated strings, strings that will be broken 135 /// by clang-format and string literals with escaped newlines. 136 bool nextIsMultilineString(const LineState &State); 137 138 FormatStyle Style; 139 const AdditionalKeywords &Keywords; 140 SourceManager &SourceMgr; 141 WhitespaceManager &Whitespaces; 142 encoding::Encoding Encoding; 143 bool BinPackInconclusiveFunctions; 144 llvm::Regex CommentPragmasRegex; 145 }; 146 147 struct ParenState { ParenStateParenState148 ParenState(unsigned Indent, unsigned IndentLevel, unsigned LastSpace, 149 bool AvoidBinPacking, bool NoLineBreak) 150 : Indent(Indent), IndentLevel(IndentLevel), LastSpace(LastSpace), 151 NestedBlockIndent(Indent), FirstLessLess(0), 152 BreakBeforeClosingBrace(false), QuestionColumn(0), 153 AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false), 154 NoLineBreak(NoLineBreak), LastOperatorWrapped(true), ColonPos(0), 155 StartOfFunctionCall(0), StartOfArraySubscripts(0), 156 NestedNameSpecifierContinuation(0), CallContinuation(0), VariablePos(0), 157 ContainsLineBreak(false), ContainsUnwrappedBuilder(0), 158 AlignColons(true), ObjCSelectorNameFound(false), 159 HasMultipleNestedBlocks(false), NestedBlockInlined(false) {} 160 161 /// \brief The position to which a specific parenthesis level needs to be 162 /// indented. 163 unsigned Indent; 164 165 /// \brief The number of indentation levels of the block. 166 unsigned IndentLevel; 167 168 /// \brief The position of the last space on each level. 169 /// 170 /// Used e.g. to break like: 171 /// functionCall(Parameter, otherCall( 172 /// OtherParameter)); 173 unsigned LastSpace; 174 175 /// \brief If a block relative to this parenthesis level gets wrapped, indent 176 /// it this much. 177 unsigned NestedBlockIndent; 178 179 /// \brief The position the first "<<" operator encountered on each level. 180 /// 181 /// Used to align "<<" operators. 0 if no such operator has been encountered 182 /// on a level. 183 unsigned FirstLessLess; 184 185 /// \brief Whether a newline needs to be inserted before the block's closing 186 /// brace. 187 /// 188 /// We only want to insert a newline before the closing brace if there also 189 /// was a newline after the beginning left brace. 190 bool BreakBeforeClosingBrace; 191 192 /// \brief The column of a \c ? in a conditional expression; 193 unsigned QuestionColumn; 194 195 /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple 196 /// lines, in this context. 197 bool AvoidBinPacking; 198 199 /// \brief Break after the next comma (or all the commas in this context if 200 /// \c AvoidBinPacking is \c true). 201 bool BreakBeforeParameter; 202 203 /// \brief Line breaking in this context would break a formatting rule. 204 bool NoLineBreak; 205 206 /// \brief True if the last binary operator on this level was wrapped to the 207 /// next line. 208 bool LastOperatorWrapped; 209 210 /// \brief The position of the colon in an ObjC method declaration/call. 211 unsigned ColonPos; 212 213 /// \brief The start of the most recent function in a builder-type call. 214 unsigned StartOfFunctionCall; 215 216 /// \brief Contains the start of array subscript expressions, so that they 217 /// can be aligned. 218 unsigned StartOfArraySubscripts; 219 220 /// \brief If a nested name specifier was broken over multiple lines, this 221 /// contains the start column of the second line. Otherwise 0. 222 unsigned NestedNameSpecifierContinuation; 223 224 /// \brief If a call expression was broken over multiple lines, this 225 /// contains the start column of the second line. Otherwise 0. 226 unsigned CallContinuation; 227 228 /// \brief The column of the first variable name in a variable declaration. 229 /// 230 /// Used to align further variables if necessary. 231 unsigned VariablePos; 232 233 /// \brief \c true if this \c ParenState already contains a line-break. 234 /// 235 /// The first line break in a certain \c ParenState causes extra penalty so 236 /// that clang-format prefers similar breaks, i.e. breaks in the same 237 /// parenthesis. 238 bool ContainsLineBreak; 239 240 /// \brief \c true if this \c ParenState contains multiple segments of a 241 /// builder-type call on one line. 242 bool ContainsUnwrappedBuilder; 243 244 /// \brief \c true if the colons of the curren ObjC method expression should 245 /// be aligned. 246 /// 247 /// Not considered for memoization as it will always have the same value at 248 /// the same token. 249 bool AlignColons; 250 251 /// \brief \c true if at least one selector name was found in the current 252 /// ObjC method expression. 253 /// 254 /// Not considered for memoization as it will always have the same value at 255 /// the same token. 256 bool ObjCSelectorNameFound; 257 258 /// \brief \c true if there are multiple nested blocks inside these parens. 259 /// 260 /// Not considered for memoization as it will always have the same value at 261 /// the same token. 262 bool HasMultipleNestedBlocks; 263 264 // \brief The start of a nested block (e.g. lambda introducer in C++ or 265 // "function" in JavaScript) is not wrapped to a new line. 266 bool NestedBlockInlined; 267 268 bool operator<(const ParenState &Other) const { 269 if (Indent != Other.Indent) 270 return Indent < Other.Indent; 271 if (LastSpace != Other.LastSpace) 272 return LastSpace < Other.LastSpace; 273 if (NestedBlockIndent != Other.NestedBlockIndent) 274 return NestedBlockIndent < Other.NestedBlockIndent; 275 if (FirstLessLess != Other.FirstLessLess) 276 return FirstLessLess < Other.FirstLessLess; 277 if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace) 278 return BreakBeforeClosingBrace; 279 if (QuestionColumn != Other.QuestionColumn) 280 return QuestionColumn < Other.QuestionColumn; 281 if (AvoidBinPacking != Other.AvoidBinPacking) 282 return AvoidBinPacking; 283 if (BreakBeforeParameter != Other.BreakBeforeParameter) 284 return BreakBeforeParameter; 285 if (NoLineBreak != Other.NoLineBreak) 286 return NoLineBreak; 287 if (LastOperatorWrapped != Other.LastOperatorWrapped) 288 return LastOperatorWrapped; 289 if (ColonPos != Other.ColonPos) 290 return ColonPos < Other.ColonPos; 291 if (StartOfFunctionCall != Other.StartOfFunctionCall) 292 return StartOfFunctionCall < Other.StartOfFunctionCall; 293 if (StartOfArraySubscripts != Other.StartOfArraySubscripts) 294 return StartOfArraySubscripts < Other.StartOfArraySubscripts; 295 if (CallContinuation != Other.CallContinuation) 296 return CallContinuation < Other.CallContinuation; 297 if (VariablePos != Other.VariablePos) 298 return VariablePos < Other.VariablePos; 299 if (ContainsLineBreak != Other.ContainsLineBreak) 300 return ContainsLineBreak < Other.ContainsLineBreak; 301 if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder) 302 return ContainsUnwrappedBuilder < Other.ContainsUnwrappedBuilder; 303 if (NestedBlockInlined != Other.NestedBlockInlined) 304 return NestedBlockInlined < Other.NestedBlockInlined; 305 return false; 306 } 307 }; 308 309 /// \brief The current state when indenting a unwrapped line. 310 /// 311 /// As the indenting tries different combinations this is copied by value. 312 struct LineState { 313 /// \brief The number of used columns in the current line. 314 unsigned Column; 315 316 /// \brief The token that needs to be next formatted. 317 FormatToken *NextToken; 318 319 /// \brief \c true if this line contains a continued for-loop section. 320 bool LineContainsContinuedForLoopSection; 321 322 /// \brief The \c NestingLevel at the start of this line. 323 unsigned StartOfLineLevel; 324 325 /// \brief The lowest \c NestingLevel on the current line. 326 unsigned LowestLevelOnLine; 327 328 /// \brief The start column of the string literal, if we're in a string 329 /// literal sequence, 0 otherwise. 330 unsigned StartOfStringLiteral; 331 332 /// \brief A stack keeping track of properties applying to parenthesis 333 /// levels. 334 std::vector<ParenState> Stack; 335 336 /// \brief Ignore the stack of \c ParenStates for state comparison. 337 /// 338 /// In long and deeply nested unwrapped lines, the current algorithm can 339 /// be insufficient for finding the best formatting with a reasonable amount 340 /// of time and memory. Setting this flag will effectively lead to the 341 /// algorithm not analyzing some combinations. However, these combinations 342 /// rarely contain the optimal solution: In short, accepting a higher 343 /// penalty early would need to lead to different values in the \c 344 /// ParenState stack (in an otherwise identical state) and these different 345 /// values would need to lead to a significant amount of avoided penalty 346 /// later. 347 /// 348 /// FIXME: Come up with a better algorithm instead. 349 bool IgnoreStackForComparison; 350 351 /// \brief The indent of the first token. 352 unsigned FirstIndent; 353 354 /// \brief The line that is being formatted. 355 /// 356 /// Does not need to be considered for memoization because it doesn't change. 357 const AnnotatedLine *Line; 358 359 /// \brief Comparison operator to be able to used \c LineState in \c map. 360 bool operator<(const LineState &Other) const { 361 if (NextToken != Other.NextToken) 362 return NextToken < Other.NextToken; 363 if (Column != Other.Column) 364 return Column < Other.Column; 365 if (LineContainsContinuedForLoopSection != 366 Other.LineContainsContinuedForLoopSection) 367 return LineContainsContinuedForLoopSection; 368 if (StartOfLineLevel != Other.StartOfLineLevel) 369 return StartOfLineLevel < Other.StartOfLineLevel; 370 if (LowestLevelOnLine != Other.LowestLevelOnLine) 371 return LowestLevelOnLine < Other.LowestLevelOnLine; 372 if (StartOfStringLiteral != Other.StartOfStringLiteral) 373 return StartOfStringLiteral < Other.StartOfStringLiteral; 374 if (IgnoreStackForComparison || Other.IgnoreStackForComparison) 375 return false; 376 return Stack < Other.Stack; 377 } 378 }; 379 380 } // end namespace format 381 } // end namespace clang 382 383 #endif 384