1 //===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements a token annotator, i.e. creates
12 /// \c AnnotatedTokens out of \c FormatTokens with required extra information.
13 ///
14 //===----------------------------------------------------------------------===//
15
16 #ifndef LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H
17 #define LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H
18
19 #include "UnwrappedLineParser.h"
20 #include "clang/Basic/OperatorPrecedence.h"
21 #include "clang/Format/Format.h"
22 #include <string>
23
24 namespace clang {
25 class Lexer;
26 class SourceManager;
27
28 namespace format {
29
30 enum TokenType {
31 TT_BinaryOperator,
32 TT_BlockComment,
33 TT_CastRParen,
34 TT_ConditionalExpr,
35 TT_CtorInitializerColon,
36 TT_ImplicitStringLiteral,
37 TT_InlineASMColon,
38 TT_InheritanceColon,
39 TT_LineComment,
40 TT_ObjCArrayLiteral,
41 TT_ObjCBlockLParen,
42 TT_ObjCDecl,
43 TT_ObjCForIn,
44 TT_ObjCMethodExpr,
45 TT_ObjCMethodSpecifier,
46 TT_ObjCProperty,
47 TT_ObjCSelectorName,
48 TT_OverloadedOperatorLParen,
49 TT_PointerOrReference,
50 TT_PureVirtualSpecifier,
51 TT_RangeBasedForLoopColon,
52 TT_StartOfName,
53 TT_TemplateCloser,
54 TT_TemplateOpener,
55 TT_TrailingUnaryOperator,
56 TT_UnaryOperator,
57 TT_Unknown
58 };
59
60 enum LineType {
61 LT_Invalid,
62 LT_Other,
63 LT_BuilderTypeCall,
64 LT_PreprocessorDirective,
65 LT_VirtualFunctionDecl,
66 LT_ObjCDecl, // An @interface, @implementation, or @protocol line.
67 LT_ObjCMethodDecl,
68 LT_ObjCProperty // An @property line.
69 };
70
71 class AnnotatedToken {
72 public:
AnnotatedToken(const FormatToken & FormatTok)73 explicit AnnotatedToken(const FormatToken &FormatTok)
74 : FormatTok(FormatTok), Type(TT_Unknown), SpacesRequiredBefore(0),
75 CanBreakBefore(false), MustBreakBefore(false),
76 ClosesTemplateDeclaration(false), MatchingParen(NULL),
77 ParameterCount(0), BindingStrength(0), SplitPenalty(0),
78 LongestObjCSelectorName(0), Parent(NULL), FakeLParens(0),
79 FakeRParens(0), LastInChainOfCalls(false) {
80 }
81
is(tok::TokenKind Kind)82 bool is(tok::TokenKind Kind) const { return FormatTok.Tok.is(Kind); }
83
isOneOf(tok::TokenKind K1,tok::TokenKind K2)84 bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const {
85 return is(K1) || is(K2);
86 }
87
isOneOf(tok::TokenKind K1,tok::TokenKind K2,tok::TokenKind K3)88 bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3) const {
89 return is(K1) || is(K2) || is(K3);
90 }
91
92 bool isOneOf(
93 tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3,
94 tok::TokenKind K4, tok::TokenKind K5 = tok::NUM_TOKENS,
95 tok::TokenKind K6 = tok::NUM_TOKENS, tok::TokenKind K7 = tok::NUM_TOKENS,
96 tok::TokenKind K8 = tok::NUM_TOKENS, tok::TokenKind K9 = tok::NUM_TOKENS,
97 tok::TokenKind K10 = tok::NUM_TOKENS,
98 tok::TokenKind K11 = tok::NUM_TOKENS,
99 tok::TokenKind K12 = tok::NUM_TOKENS) const {
100 return is(K1) || is(K2) || is(K3) || is(K4) || is(K5) || is(K6) || is(K7) ||
101 is(K8) || is(K9) || is(K10) || is(K11) || is(K12);
102 }
103
isNot(tok::TokenKind Kind)104 bool isNot(tok::TokenKind Kind) const { return FormatTok.Tok.isNot(Kind); }
105
isObjCAtKeyword(tok::ObjCKeywordKind Kind)106 bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const {
107 return FormatTok.Tok.isObjCAtKeyword(Kind);
108 }
109
110 FormatToken FormatTok;
111
112 TokenType Type;
113
114 unsigned SpacesRequiredBefore;
115 bool CanBreakBefore;
116 bool MustBreakBefore;
117
118 bool ClosesTemplateDeclaration;
119
120 AnnotatedToken *MatchingParen;
121
122 /// \brief Number of parameters, if this is "(", "[" or "<".
123 ///
124 /// This is initialized to 1 as we don't need to distinguish functions with
125 /// 0 parameters from functions with 1 parameter. Thus, we can simply count
126 /// the number of commas.
127 unsigned ParameterCount;
128
129 /// \brief The total length of the line up to and including this token.
130 unsigned TotalLength;
131
132 // FIXME: Come up with a 'cleaner' concept.
133 /// \brief The binding strength of a token. This is a combined value of
134 /// operator precedence, parenthesis nesting, etc.
135 unsigned BindingStrength;
136
137 /// \brief Penalty for inserting a line break before this token.
138 unsigned SplitPenalty;
139
140 /// \brief If this is the first ObjC selector name in an ObjC method
141 /// definition or call, this contains the length of the longest name.
142 unsigned LongestObjCSelectorName;
143
144 std::vector<AnnotatedToken> Children;
145 AnnotatedToken *Parent;
146
147 /// \brief Insert this many fake ( before this token for correct indentation.
148 unsigned FakeLParens;
149 /// \brief Insert this many fake ) after this token for correct indentation.
150 unsigned FakeRParens;
151
152 /// \brief Is this the last "." or "->" in a builder-type call?
153 bool LastInChainOfCalls;
154
getPreviousNoneComment()155 const AnnotatedToken *getPreviousNoneComment() const {
156 AnnotatedToken *Tok = Parent;
157 while (Tok != NULL && Tok->is(tok::comment))
158 Tok = Tok->Parent;
159 return Tok;
160 }
161 };
162
163 class AnnotatedLine {
164 public:
AnnotatedLine(const UnwrappedLine & Line)165 AnnotatedLine(const UnwrappedLine &Line)
166 : First(Line.Tokens.front()), Level(Line.Level),
167 InPPDirective(Line.InPPDirective),
168 MustBeDeclaration(Line.MustBeDeclaration),
169 MightBeFunctionDecl(false) {
170 assert(!Line.Tokens.empty());
171 AnnotatedToken *Current = &First;
172 for (std::list<FormatToken>::const_iterator I = ++Line.Tokens.begin(),
173 E = Line.Tokens.end();
174 I != E; ++I) {
175 Current->Children.push_back(AnnotatedToken(*I));
176 Current->Children[0].Parent = Current;
177 Current = &Current->Children[0];
178 }
179 Last = Current;
180 }
AnnotatedLine(const AnnotatedLine & Other)181 AnnotatedLine(const AnnotatedLine &Other)
182 : First(Other.First), Type(Other.Type), Level(Other.Level),
183 InPPDirective(Other.InPPDirective),
184 MustBeDeclaration(Other.MustBeDeclaration),
185 MightBeFunctionDecl(Other.MightBeFunctionDecl) {
186 Last = &First;
187 while (!Last->Children.empty()) {
188 Last->Children[0].Parent = Last;
189 Last = &Last->Children[0];
190 }
191 }
192
193 AnnotatedToken First;
194 AnnotatedToken *Last;
195
196 LineType Type;
197 unsigned Level;
198 bool InPPDirective;
199 bool MustBeDeclaration;
200 bool MightBeFunctionDecl;
201 };
202
getPrecedence(const AnnotatedToken & Tok)203 inline prec::Level getPrecedence(const AnnotatedToken &Tok) {
204 return getBinOpPrecedence(Tok.FormatTok.Tok.getKind(), true, true);
205 }
206
207 /// \brief Determines extra information about the tokens comprising an
208 /// \c UnwrappedLine.
209 class TokenAnnotator {
210 public:
TokenAnnotator(const FormatStyle & Style,SourceManager & SourceMgr,Lexer & Lex,IdentifierInfo & Ident_in)211 TokenAnnotator(const FormatStyle &Style, SourceManager &SourceMgr, Lexer &Lex,
212 IdentifierInfo &Ident_in)
213 : Style(Style), SourceMgr(SourceMgr), Lex(Lex), Ident_in(Ident_in) {
214 }
215
216 void annotate(AnnotatedLine &Line);
217 void calculateFormattingInformation(AnnotatedLine &Line);
218
219 private:
220 /// \brief Calculate the penalty for splitting before \c Tok.
221 unsigned splitPenalty(const AnnotatedLine &Line, const AnnotatedToken &Tok);
222
223 bool spaceRequiredBetween(const AnnotatedLine &Line,
224 const AnnotatedToken &Left,
225 const AnnotatedToken &Right);
226
227 bool spaceRequiredBefore(const AnnotatedLine &Line,
228 const AnnotatedToken &Tok);
229
230 bool canBreakBefore(const AnnotatedLine &Line, const AnnotatedToken &Right);
231
232 const FormatStyle &Style;
233 SourceManager &SourceMgr;
234 Lexer &Lex;
235
236 // Contextual keywords:
237 IdentifierInfo &Ident_in;
238 };
239
240 } // end namespace format
241 } // end namespace clang
242
243 #endif // LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H
244