• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===--- BreakableToken.h - Format C++ code -------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief Declares BreakableToken, BreakableStringLiteral, and
12 /// BreakableBlockComment classes, that contain token type-specific logic to
13 /// break long lines in tokens.
14 ///
15 //===----------------------------------------------------------------------===//
16 
17 #ifndef LLVM_CLANG_FORMAT_BREAKABLETOKEN_H
18 #define LLVM_CLANG_FORMAT_BREAKABLETOKEN_H
19 
20 #include "Encoding.h"
21 #include "TokenAnnotator.h"
22 #include "WhitespaceManager.h"
23 #include <utility>
24 
25 namespace clang {
26 namespace format {
27 
28 struct FormatStyle;
29 
30 /// \brief Base class for strategies on how to break tokens.
31 ///
32 /// FIXME: The interface seems set in stone, so we might want to just pull the
33 /// strategy into the class, instead of controlling it from the outside.
34 class BreakableToken {
35 public:
36   /// \brief Contains starting character index and length of split.
37   typedef std::pair<StringRef::size_type, unsigned> Split;
38 
~BreakableToken()39   virtual ~BreakableToken() {}
40 
41   /// \brief Returns the number of lines in this token in the original code.
42   virtual unsigned getLineCount() const = 0;
43 
44   /// \brief Returns the number of columns required to format the piece of line
45   /// at \p LineIndex, from byte offset \p Offset with length \p Length.
46   ///
47   /// Note that previous breaks are not taken into account. \p Offset is always
48   /// specified from the start of the (original) line.
49   /// \p Length can be set to StringRef::npos, which means "to the end of line".
50   virtual unsigned
51   getLineLengthAfterSplit(unsigned LineIndex, unsigned Offset,
52                           StringRef::size_type Length) const = 0;
53 
54   /// \brief Returns a range (offset, length) at which to break the line at
55   /// \p LineIndex, if previously broken at \p TailOffset. If possible, do not
56   /// violate \p ColumnLimit.
57   virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
58                          unsigned ColumnLimit) const = 0;
59 
60   /// \brief Emits the previously retrieved \p Split via \p Whitespaces.
61   virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
62                            WhitespaceManager &Whitespaces) = 0;
63 
64   /// \brief Replaces the whitespace between \p LineIndex-1 and \p LineIndex.
replaceWhitespaceBefore(unsigned LineIndex,WhitespaceManager & Whitespaces)65   virtual void replaceWhitespaceBefore(unsigned LineIndex,
66                                        WhitespaceManager &Whitespaces) {}
67 
68 protected:
BreakableToken(const FormatToken & Tok,bool InPPDirective,encoding::Encoding Encoding)69   BreakableToken(const FormatToken &Tok, bool InPPDirective,
70                  encoding::Encoding Encoding)
71       : Tok(Tok), InPPDirective(InPPDirective), Encoding(Encoding) {}
72 
73   const FormatToken &Tok;
74   const bool InPPDirective;
75   const encoding::Encoding Encoding;
76 };
77 
78 /// \brief Base class for single line tokens that can be broken.
79 ///
80 /// \c getSplit() needs to be implemented by child classes.
81 class BreakableSingleLineToken : public BreakableToken {
82 public:
83   virtual unsigned getLineCount() const;
84   virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
85                                            unsigned TailOffset,
86                                            StringRef::size_type Length) const;
87 
88 protected:
89   BreakableSingleLineToken(const FormatToken &Tok, unsigned StartColumn,
90                            StringRef Prefix, StringRef Postfix,
91                            bool InPPDirective, encoding::Encoding Encoding);
92 
93   // The column in which the token starts.
94   unsigned StartColumn;
95   // The prefix a line needs after a break in the token.
96   StringRef Prefix;
97   // The postfix a line needs before introducing a break.
98   StringRef Postfix;
99   // The token text excluding the prefix and postfix.
100   StringRef Line;
101 };
102 
103 class BreakableStringLiteral : public BreakableSingleLineToken {
104 public:
105   /// \brief Creates a breakable token for a single line string literal.
106   ///
107   /// \p StartColumn specifies the column in which the token will start
108   /// after formatting.
109   BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn,
110                          bool InPPDirective, encoding::Encoding Encoding);
111 
112   virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
113                          unsigned ColumnLimit) const;
114   virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
115                            WhitespaceManager &Whitespaces);
116 };
117 
118 class BreakableLineComment : public BreakableSingleLineToken {
119 public:
120   /// \brief Creates a breakable token for a line comment.
121   ///
122   /// \p StartColumn specifies the column in which the comment will start
123   /// after formatting.
124   BreakableLineComment(const FormatToken &Token, unsigned StartColumn,
125                        bool InPPDirective, encoding::Encoding Encoding);
126 
127   virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
128                          unsigned ColumnLimit) const;
129   virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
130                            WhitespaceManager &Whitespaces);
131   virtual void replaceWhitespaceBefore(unsigned LineIndex,
132                                        WhitespaceManager &Whitespaces);
133 
134 private:
135   // The prefix without an additional space if one was added.
136   StringRef OriginalPrefix;
137 };
138 
139 class BreakableBlockComment : public BreakableToken {
140 public:
141   /// \brief Creates a breakable token for a block comment.
142   ///
143   /// \p StartColumn specifies the column in which the comment will start
144   /// after formatting, while \p OriginalStartColumn specifies in which
145   /// column the comment started before formatting.
146   /// If the comment starts a line after formatting, set \p FirstInLine to true.
147   BreakableBlockComment(const FormatStyle &Style, const FormatToken &Token,
148                         unsigned StartColumn, unsigned OriginaStartColumn,
149                         bool FirstInLine, bool InPPDirective,
150                         encoding::Encoding Encoding);
151 
152   virtual unsigned getLineCount() const;
153   virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
154                                            unsigned TailOffset,
155                                            StringRef::size_type Length) const;
156   virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
157                          unsigned ColumnLimit) const;
158   virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
159                            WhitespaceManager &Whitespaces);
160   virtual void replaceWhitespaceBefore(unsigned LineIndex,
161                                        WhitespaceManager &Whitespaces);
162 
163 private:
164   // Rearranges the whitespace between Lines[LineIndex-1] and Lines[LineIndex],
165   // so that all whitespace between the lines is accounted to Lines[LineIndex]
166   // as leading whitespace:
167   // - Lines[LineIndex] points to the text after that whitespace
168   // - Lines[LineIndex-1] shrinks by its trailing whitespace
169   // - LeadingWhitespace[LineIndex] is updated with the complete whitespace
170   //   between the end of the text of Lines[LineIndex-1] and Lines[LineIndex]
171   //
172   // Sets StartOfLineColumn to the intended column in which the text at
173   // Lines[LineIndex] starts (note that the decoration, if present, is not
174   // considered part of the text).
175   void adjustWhitespace(const FormatStyle &Style, unsigned LineIndex,
176                         int IndentDelta);
177 
178   // Returns the column at which the text in line LineIndex starts, when broken
179   // at TailOffset. Note that the decoration (if present) is not considered part
180   // of the text.
181   unsigned getContentStartColumn(unsigned LineIndex, unsigned TailOffset) const;
182 
183   // Contains the text of the lines of the block comment, excluding the leading
184   // /* in the first line and trailing */ in the last line, and excluding all
185   // trailing whitespace between the lines. Note that the decoration (if
186   // present) is also not considered part of the text.
187   SmallVector<StringRef, 16> Lines;
188 
189   // LeadingWhitespace[i] is the number of characters regarded as whitespace in
190   // front of Lines[i]. Note that this can include "* " sequences, which we
191   // regard as whitespace when all lines have a "*" prefix.
192   SmallVector<unsigned, 16> LeadingWhitespace;
193 
194   // StartOfLineColumn[i] is the target column at which Line[i] should be.
195   // Note that this excludes a leading "* " or "*" in case all lines have
196   // a "*" prefix.
197   SmallVector<unsigned, 16> StartOfLineColumn;
198 
199   // The column at which the text of a broken line should start.
200   // Note that an optional decoration would go before that column.
201   // IndentAtLineBreak is a uniform position for all lines in a block comment,
202   // regardless of their relative position.
203   // FIXME: Revisit the decision to do this; the main reason was to support
204   // patterns like
205   // /**************//**
206   //  * Comment
207   // We could also support such patterns by special casing the first line
208   // instead.
209   unsigned IndentAtLineBreak;
210 
211   // This is to distinguish between the case when the last line was empty and
212   // the case when it started with a decoration ("*" or "* ").
213   bool LastLineNeedsDecoration;
214 
215   // Either "* " if all lines begin with a "*", or empty.
216   StringRef Decoration;
217 };
218 
219 } // namespace format
220 } // namespace clang
221 
222 #endif // LLVM_CLANG_FORMAT_BREAKABLETOKEN_H
223