• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements a token annotator, i.e. creates
12 /// \c AnnotatedTokens out of \c FormatTokens with required extra information.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H
17 #define LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H
18 
19 #include "UnwrappedLineParser.h"
20 #include "clang/Basic/OperatorPrecedence.h"
21 #include "clang/Format/Format.h"
22 #include <string>
23 
24 namespace clang {
25 class Lexer;
26 class SourceManager;
27 
28 namespace format {
29 
30 enum TokenType {
31   TT_BinaryOperator,
32   TT_BlockComment,
33   TT_CastRParen,
34   TT_ConditionalExpr,
35   TT_CtorInitializerColon,
36   TT_ImplicitStringLiteral,
37   TT_InlineASMColon,
38   TT_InheritanceColon,
39   TT_LineComment,
40   TT_ObjCArrayLiteral,
41   TT_ObjCBlockLParen,
42   TT_ObjCDecl,
43   TT_ObjCForIn,
44   TT_ObjCMethodExpr,
45   TT_ObjCMethodSpecifier,
46   TT_ObjCProperty,
47   TT_ObjCSelectorName,
48   TT_OverloadedOperatorLParen,
49   TT_PointerOrReference,
50   TT_PureVirtualSpecifier,
51   TT_RangeBasedForLoopColon,
52   TT_StartOfName,
53   TT_TemplateCloser,
54   TT_TemplateOpener,
55   TT_TrailingUnaryOperator,
56   TT_UnaryOperator,
57   TT_Unknown
58 };
59 
60 enum LineType {
61   LT_Invalid,
62   LT_Other,
63   LT_BuilderTypeCall,
64   LT_PreprocessorDirective,
65   LT_VirtualFunctionDecl,
66   LT_ObjCDecl, // An @interface, @implementation, or @protocol line.
67   LT_ObjCMethodDecl,
68   LT_ObjCProperty // An @property line.
69 };
70 
71 class AnnotatedToken {
72 public:
AnnotatedToken(const FormatToken & FormatTok)73   explicit AnnotatedToken(const FormatToken &FormatTok)
74       : FormatTok(FormatTok), Type(TT_Unknown), SpacesRequiredBefore(0),
75         CanBreakBefore(false), MustBreakBefore(false),
76         ClosesTemplateDeclaration(false), MatchingParen(NULL),
77         ParameterCount(0), BindingStrength(0), SplitPenalty(0),
78         LongestObjCSelectorName(0), Parent(NULL), FakeLParens(0),
79         FakeRParens(0), LastInChainOfCalls(false) {
80   }
81 
is(tok::TokenKind Kind)82   bool is(tok::TokenKind Kind) const { return FormatTok.Tok.is(Kind); }
83 
isOneOf(tok::TokenKind K1,tok::TokenKind K2)84   bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const {
85     return is(K1) || is(K2);
86   }
87 
isOneOf(tok::TokenKind K1,tok::TokenKind K2,tok::TokenKind K3)88   bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3) const {
89     return is(K1) || is(K2) || is(K3);
90   }
91 
92   bool isOneOf(
93       tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3,
94       tok::TokenKind K4, tok::TokenKind K5 = tok::NUM_TOKENS,
95       tok::TokenKind K6 = tok::NUM_TOKENS, tok::TokenKind K7 = tok::NUM_TOKENS,
96       tok::TokenKind K8 = tok::NUM_TOKENS, tok::TokenKind K9 = tok::NUM_TOKENS,
97       tok::TokenKind K10 = tok::NUM_TOKENS,
98       tok::TokenKind K11 = tok::NUM_TOKENS,
99       tok::TokenKind K12 = tok::NUM_TOKENS) const {
100     return is(K1) || is(K2) || is(K3) || is(K4) || is(K5) || is(K6) || is(K7) ||
101            is(K8) || is(K9) || is(K10) || is(K11) || is(K12);
102   }
103 
isNot(tok::TokenKind Kind)104   bool isNot(tok::TokenKind Kind) const { return FormatTok.Tok.isNot(Kind); }
105 
isObjCAtKeyword(tok::ObjCKeywordKind Kind)106   bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const {
107     return FormatTok.Tok.isObjCAtKeyword(Kind);
108   }
109 
110   FormatToken FormatTok;
111 
112   TokenType Type;
113 
114   unsigned SpacesRequiredBefore;
115   bool CanBreakBefore;
116   bool MustBreakBefore;
117 
118   bool ClosesTemplateDeclaration;
119 
120   AnnotatedToken *MatchingParen;
121 
122   /// \brief Number of parameters, if this is "(", "[" or "<".
123   ///
124   /// This is initialized to 1 as we don't need to distinguish functions with
125   /// 0 parameters from functions with 1 parameter. Thus, we can simply count
126   /// the number of commas.
127   unsigned ParameterCount;
128 
129   /// \brief The total length of the line up to and including this token.
130   unsigned TotalLength;
131 
132   // FIXME: Come up with a 'cleaner' concept.
133   /// \brief The binding strength of a token. This is a combined value of
134   /// operator precedence, parenthesis nesting, etc.
135   unsigned BindingStrength;
136 
137   /// \brief Penalty for inserting a line break before this token.
138   unsigned SplitPenalty;
139 
140   /// \brief If this is the first ObjC selector name in an ObjC method
141   /// definition or call, this contains the length of the longest name.
142   unsigned LongestObjCSelectorName;
143 
144   std::vector<AnnotatedToken> Children;
145   AnnotatedToken *Parent;
146 
147   /// \brief Insert this many fake ( before this token for correct indentation.
148   unsigned FakeLParens;
149   /// \brief Insert this many fake ) after this token for correct indentation.
150   unsigned FakeRParens;
151 
152   /// \brief Is this the last "." or "->" in a builder-type call?
153   bool LastInChainOfCalls;
154 
getPreviousNoneComment()155   const AnnotatedToken *getPreviousNoneComment() const {
156     AnnotatedToken *Tok = Parent;
157     while (Tok != NULL && Tok->is(tok::comment))
158       Tok = Tok->Parent;
159     return Tok;
160   }
161 };
162 
163 class AnnotatedLine {
164 public:
AnnotatedLine(const UnwrappedLine & Line)165   AnnotatedLine(const UnwrappedLine &Line)
166       : First(Line.Tokens.front()), Level(Line.Level),
167         InPPDirective(Line.InPPDirective),
168         MustBeDeclaration(Line.MustBeDeclaration),
169         MightBeFunctionDecl(false) {
170     assert(!Line.Tokens.empty());
171     AnnotatedToken *Current = &First;
172     for (std::list<FormatToken>::const_iterator I = ++Line.Tokens.begin(),
173                                                 E = Line.Tokens.end();
174          I != E; ++I) {
175       Current->Children.push_back(AnnotatedToken(*I));
176       Current->Children[0].Parent = Current;
177       Current = &Current->Children[0];
178     }
179     Last = Current;
180   }
AnnotatedLine(const AnnotatedLine & Other)181   AnnotatedLine(const AnnotatedLine &Other)
182       : First(Other.First), Type(Other.Type), Level(Other.Level),
183         InPPDirective(Other.InPPDirective),
184         MustBeDeclaration(Other.MustBeDeclaration),
185         MightBeFunctionDecl(Other.MightBeFunctionDecl) {
186     Last = &First;
187     while (!Last->Children.empty()) {
188       Last->Children[0].Parent = Last;
189       Last = &Last->Children[0];
190     }
191   }
192 
193   AnnotatedToken First;
194   AnnotatedToken *Last;
195 
196   LineType Type;
197   unsigned Level;
198   bool InPPDirective;
199   bool MustBeDeclaration;
200   bool MightBeFunctionDecl;
201 };
202 
getPrecedence(const AnnotatedToken & Tok)203 inline prec::Level getPrecedence(const AnnotatedToken &Tok) {
204   return getBinOpPrecedence(Tok.FormatTok.Tok.getKind(), true, true);
205 }
206 
207 /// \brief Determines extra information about the tokens comprising an
208 /// \c UnwrappedLine.
209 class TokenAnnotator {
210 public:
TokenAnnotator(const FormatStyle & Style,SourceManager & SourceMgr,Lexer & Lex,IdentifierInfo & Ident_in)211   TokenAnnotator(const FormatStyle &Style, SourceManager &SourceMgr, Lexer &Lex,
212                  IdentifierInfo &Ident_in)
213       : Style(Style), SourceMgr(SourceMgr), Lex(Lex), Ident_in(Ident_in) {
214   }
215 
216   void annotate(AnnotatedLine &Line);
217   void calculateFormattingInformation(AnnotatedLine &Line);
218 
219 private:
220   /// \brief Calculate the penalty for splitting before \c Tok.
221   unsigned splitPenalty(const AnnotatedLine &Line, const AnnotatedToken &Tok);
222 
223   bool spaceRequiredBetween(const AnnotatedLine &Line,
224                             const AnnotatedToken &Left,
225                             const AnnotatedToken &Right);
226 
227   bool spaceRequiredBefore(const AnnotatedLine &Line,
228                            const AnnotatedToken &Tok);
229 
230   bool canBreakBefore(const AnnotatedLine &Line, const AnnotatedToken &Right);
231 
232   const FormatStyle &Style;
233   SourceManager &SourceMgr;
234   Lexer &Lex;
235 
236   // Contextual keywords:
237   IdentifierInfo &Ident_in;
238 };
239 
240 } // end namespace format
241 } // end namespace clang
242 
243 #endif // LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H
244