• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===--- FormatToken.h - Format C++ code ------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file contains the declaration of the FormatToken, a wrapper
12 /// around Token with additional information related to formatting.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef LLVM_CLANG_FORMAT_FORMAT_TOKEN_H
17 #define LLVM_CLANG_FORMAT_FORMAT_TOKEN_H
18 
19 #include "clang/Basic/OperatorPrecedence.h"
20 #include "clang/Lex/Lexer.h"
21 
22 namespace clang {
23 namespace format {
24 
25 enum TokenType {
26   TT_BinaryOperator,
27   TT_BlockComment,
28   TT_CastRParen,
29   TT_ConditionalExpr,
30   TT_CtorInitializerColon,
31   TT_CtorInitializerComma,
32   TT_DesignatedInitializerPeriod,
33   TT_ImplicitStringLiteral,
34   TT_InlineASMColon,
35   TT_InheritanceColon,
36   TT_FunctionTypeLParen,
37   TT_LineComment,
38   TT_ObjCArrayLiteral,
39   TT_ObjCBlockLParen,
40   TT_ObjCDecl,
41   TT_ObjCDictLiteral,
42   TT_ObjCForIn,
43   TT_ObjCMethodExpr,
44   TT_ObjCMethodSpecifier,
45   TT_ObjCProperty,
46   TT_ObjCSelectorName,
47   TT_OverloadedOperator,
48   TT_OverloadedOperatorLParen,
49   TT_PointerOrReference,
50   TT_PureVirtualSpecifier,
51   TT_RangeBasedForLoopColon,
52   TT_StartOfName,
53   TT_TemplateCloser,
54   TT_TemplateOpener,
55   TT_TrailingReturnArrow,
56   TT_TrailingUnaryOperator,
57   TT_UnaryOperator,
58   TT_Unknown
59 };
60 
61 // Represents what type of block a set of braces open.
62 enum BraceBlockKind {
63   BK_Unknown,
64   BK_Block,
65   BK_BracedInit
66 };
67 
68 // The packing kind of a function's parameters.
69 enum ParameterPackingKind {
70   PPK_BinPacked,
71   PPK_OnePerLine,
72   PPK_Inconclusive
73 };
74 
75 /// \brief A wrapper around a \c Token storing information about the
76 /// whitespace characters preceeding it.
77 struct FormatToken {
FormatTokenFormatToken78   FormatToken()
79       : NewlinesBefore(0), HasUnescapedNewline(false), LastNewlineOffset(0),
80         CodePointCount(0), IsFirst(false), MustBreakBefore(false),
81         IsUnterminatedLiteral(false), BlockKind(BK_Unknown), Type(TT_Unknown),
82         SpacesRequiredBefore(0), CanBreakBefore(false),
83         ClosesTemplateDeclaration(false), ParameterCount(0),
84         PackingKind(PPK_Inconclusive), TotalLength(0), UnbreakableTailLength(0),
85         BindingStrength(0), SplitPenalty(0), LongestObjCSelectorName(0),
86         FakeRParens(0), LastInChainOfCalls(false),
87         PartOfMultiVariableDeclStmt(false), MatchingParen(NULL), Previous(NULL),
88         Next(NULL) {}
89 
90   /// \brief The \c Token.
91   Token Tok;
92 
93   /// \brief The number of newlines immediately before the \c Token.
94   ///
95   /// This can be used to determine what the user wrote in the original code
96   /// and thereby e.g. leave an empty line between two function definitions.
97   unsigned NewlinesBefore;
98 
99   /// \brief Whether there is at least one unescaped newline before the \c
100   /// Token.
101   bool HasUnescapedNewline;
102 
103   /// \brief The range of the whitespace immediately preceeding the \c Token.
104   SourceRange WhitespaceRange;
105 
106   /// \brief The offset just past the last '\n' in this token's leading
107   /// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'.
108   unsigned LastNewlineOffset;
109 
110   /// \brief The length of the non-whitespace parts of the token in CodePoints.
111   /// We need this to correctly measure number of columns a token spans.
112   unsigned CodePointCount;
113 
114   /// \brief Indicates that this is the first token.
115   bool IsFirst;
116 
117   /// \brief Whether there must be a line break before this token.
118   ///
119   /// This happens for example when a preprocessor directive ended directly
120   /// before the token.
121   bool MustBreakBefore;
122 
123   /// \brief Returns actual token start location without leading escaped
124   /// newlines and whitespace.
125   ///
126   /// This can be different to Tok.getLocation(), which includes leading escaped
127   /// newlines.
getStartOfNonWhitespaceFormatToken128   SourceLocation getStartOfNonWhitespace() const {
129     return WhitespaceRange.getEnd();
130   }
131 
132   /// \brief The raw text of the token.
133   ///
134   /// Contains the raw token text without leading whitespace and without leading
135   /// escaped newlines.
136   StringRef TokenText;
137 
138   /// \brief Set to \c true if this token is an unterminated literal.
139   bool IsUnterminatedLiteral;
140 
141   /// \brief Contains the kind of block if this token is a brace.
142   BraceBlockKind BlockKind;
143 
144   TokenType Type;
145 
146   unsigned SpacesRequiredBefore;
147   bool CanBreakBefore;
148 
149   bool ClosesTemplateDeclaration;
150 
151   /// \brief Number of parameters, if this is "(", "[" or "<".
152   ///
153   /// This is initialized to 1 as we don't need to distinguish functions with
154   /// 0 parameters from functions with 1 parameter. Thus, we can simply count
155   /// the number of commas.
156   unsigned ParameterCount;
157 
158   /// \brief If this is an opening parenthesis, how are the parameters packed?
159   ParameterPackingKind PackingKind;
160 
161   /// \brief The total length of the line up to and including this token.
162   unsigned TotalLength;
163 
164   /// \brief The length of following tokens until the next natural split point,
165   /// or the next token that can be broken.
166   unsigned UnbreakableTailLength;
167 
168   // FIXME: Come up with a 'cleaner' concept.
169   /// \brief The binding strength of a token. This is a combined value of
170   /// operator precedence, parenthesis nesting, etc.
171   unsigned BindingStrength;
172 
173   /// \brief Penalty for inserting a line break before this token.
174   unsigned SplitPenalty;
175 
176   /// \brief If this is the first ObjC selector name in an ObjC method
177   /// definition or call, this contains the length of the longest name.
178   unsigned LongestObjCSelectorName;
179 
180   /// \brief Stores the number of required fake parentheses and the
181   /// corresponding operator precedence.
182   ///
183   /// If multiple fake parentheses start at a token, this vector stores them in
184   /// reverse order, i.e. inner fake parenthesis first.
185   SmallVector<prec::Level, 4> FakeLParens;
186   /// \brief Insert this many fake ) after this token for correct indentation.
187   unsigned FakeRParens;
188 
189   /// \brief Is this the last "." or "->" in a builder-type call?
190   bool LastInChainOfCalls;
191 
192   /// \brief Is this token part of a \c DeclStmt defining multiple variables?
193   ///
194   /// Only set if \c Type == \c TT_StartOfName.
195   bool PartOfMultiVariableDeclStmt;
196 
isFormatToken197   bool is(tok::TokenKind Kind) const { return Tok.is(Kind); }
198 
isOneOfFormatToken199   bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const {
200     return is(K1) || is(K2);
201   }
202 
isOneOfFormatToken203   bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3) const {
204     return is(K1) || is(K2) || is(K3);
205   }
206 
207   bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3,
208                tok::TokenKind K4, tok::TokenKind K5 = tok::NUM_TOKENS,
209                tok::TokenKind K6 = tok::NUM_TOKENS,
210                tok::TokenKind K7 = tok::NUM_TOKENS,
211                tok::TokenKind K8 = tok::NUM_TOKENS,
212                tok::TokenKind K9 = tok::NUM_TOKENS,
213                tok::TokenKind K10 = tok::NUM_TOKENS,
214                tok::TokenKind K11 = tok::NUM_TOKENS,
215                tok::TokenKind K12 = tok::NUM_TOKENS) const {
216     return is(K1) || is(K2) || is(K3) || is(K4) || is(K5) || is(K6) || is(K7) ||
217            is(K8) || is(K9) || is(K10) || is(K11) || is(K12);
218   }
219 
isNotFormatToken220   bool isNot(tok::TokenKind Kind) const { return Tok.isNot(Kind); }
221 
isObjCAtKeywordFormatToken222   bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const {
223     return Tok.isObjCAtKeyword(Kind);
224   }
225 
226   bool isAccessSpecifier(bool ColonRequired = true) const {
227     return isOneOf(tok::kw_public, tok::kw_protected, tok::kw_private) &&
228            (!ColonRequired || (Next && Next->is(tok::colon)));
229   }
230 
isObjCAccessSpecifierFormatToken231   bool isObjCAccessSpecifier() const {
232     return is(tok::at) && Next && (Next->isObjCAtKeyword(tok::objc_public) ||
233                                    Next->isObjCAtKeyword(tok::objc_protected) ||
234                                    Next->isObjCAtKeyword(tok::objc_package) ||
235                                    Next->isObjCAtKeyword(tok::objc_private));
236   }
237 
238   /// \brief Returns whether \p Tok is ([{ or a template opening <.
opensScopeFormatToken239   bool opensScope() const {
240     return isOneOf(tok::l_paren, tok::l_brace, tok::l_square) ||
241            Type == TT_TemplateOpener;
242   }
243   /// \brief Returns whether \p Tok is )]} or a template closing >.
closesScopeFormatToken244   bool closesScope() const {
245     return isOneOf(tok::r_paren, tok::r_brace, tok::r_square) ||
246            Type == TT_TemplateCloser;
247   }
248 
isUnaryOperatorFormatToken249   bool isUnaryOperator() const {
250     switch (Tok.getKind()) {
251     case tok::plus:
252     case tok::plusplus:
253     case tok::minus:
254     case tok::minusminus:
255     case tok::exclaim:
256     case tok::tilde:
257     case tok::kw_sizeof:
258     case tok::kw_alignof:
259       return true;
260     default:
261       return false;
262     }
263   }
isBinaryOperatorFormatToken264   bool isBinaryOperator() const {
265     // Comma is a binary operator, but does not behave as such wrt. formatting.
266     return getPrecedence() > prec::Comma;
267   }
isTrailingCommentFormatToken268   bool isTrailingComment() const {
269     return is(tok::comment) && (!Next || Next->NewlinesBefore > 0);
270   }
271 
getPrecedenceFormatToken272   prec::Level getPrecedence() const {
273     return getBinOpPrecedence(Tok.getKind(), true, true);
274   }
275 
276   /// \brief Returns the previous token ignoring comments.
getPreviousNonCommentFormatToken277   FormatToken *getPreviousNonComment() const {
278     FormatToken *Tok = Previous;
279     while (Tok != NULL && Tok->is(tok::comment))
280       Tok = Tok->Previous;
281     return Tok;
282   }
283 
284   /// \brief Returns the next token ignoring comments.
getNextNonCommentFormatToken285   const FormatToken *getNextNonComment() const {
286     const FormatToken *Tok = Next;
287     while (Tok != NULL && Tok->is(tok::comment))
288       Tok = Tok->Next;
289     return Tok;
290   }
291 
292   FormatToken *MatchingParen;
293 
294   FormatToken *Previous;
295   FormatToken *Next;
296 
297 private:
298   // Disallow copying.
299   FormatToken(const FormatToken &) LLVM_DELETED_FUNCTION;
300   void operator=(const FormatToken &) LLVM_DELETED_FUNCTION;
301 };
302 
303 } // namespace format
304 } // namespace clang
305 
306 #endif // LLVM_CLANG_FORMAT_FORMAT_TOKEN_H
307