• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===- Replacement.h - Framework for clang refactoring tools ----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  Classes supporting refactorings that span multiple translation units.
10 //  While single translation unit refactorings are supported via the Rewriter,
11 //  when refactoring multiple translation units changes must be stored in a
12 //  SourceManager independent form, duplicate changes need to be removed, and
13 //  all changes must be applied at once at the end of the refactoring so that
14 //  the code is always parseable.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #ifndef LLVM_CLANG_TOOLING_CORE_REPLACEMENT_H
19 #define LLVM_CLANG_TOOLING_CORE_REPLACEMENT_H
20 
21 #include "clang/Basic/LangOptions.h"
22 #include "clang/Basic/SourceLocation.h"
23 #include "llvm/ADT/Optional.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/Support/Compiler.h"
26 #include "llvm/Support/Error.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include <map>
29 #include <set>
30 #include <string>
31 #include <system_error>
32 #include <utility>
33 #include <vector>
34 
35 namespace clang {
36 
37 class FileManager;
38 class Rewriter;
39 class SourceManager;
40 
41 namespace tooling {
42 
43 /// A source range independent of the \c SourceManager.
44 class Range {
45 public:
46   Range() = default;
Range(unsigned Offset,unsigned Length)47   Range(unsigned Offset, unsigned Length) : Offset(Offset), Length(Length) {}
48 
49   /// Accessors.
50   /// @{
getOffset()51   unsigned getOffset() const { return Offset; }
getLength()52   unsigned getLength() const { return Length; }
53   /// @}
54 
55   /// \name Range Predicates
56   /// @{
57   /// Whether this range overlaps with \p RHS or not.
overlapsWith(Range RHS)58   bool overlapsWith(Range RHS) const {
59     return Offset + Length > RHS.Offset && Offset < RHS.Offset + RHS.Length;
60   }
61 
62   /// Whether this range contains \p RHS or not.
contains(Range RHS)63   bool contains(Range RHS) const {
64     return RHS.Offset >= Offset &&
65            (RHS.Offset + RHS.Length) <= (Offset + Length);
66   }
67 
68   /// Whether this range equals to \p RHS or not.
69   bool operator==(const Range &RHS) const {
70     return Offset == RHS.getOffset() && Length == RHS.getLength();
71   }
72   /// @}
73 
74 private:
75   unsigned Offset = 0;
76   unsigned Length = 0;
77 };
78 
79 /// A text replacement.
80 ///
81 /// Represents a SourceManager independent replacement of a range of text in a
82 /// specific file.
83 class Replacement {
84 public:
85   /// Creates an invalid (not applicable) replacement.
86   Replacement();
87 
88   /// Creates a replacement of the range [Offset, Offset+Length) in
89   /// FilePath with ReplacementText.
90   ///
91   /// \param FilePath A source file accessible via a SourceManager.
92   /// \param Offset The byte offset of the start of the range in the file.
93   /// \param Length The length of the range in bytes.
94   Replacement(StringRef FilePath, unsigned Offset, unsigned Length,
95               StringRef ReplacementText);
96 
97   /// Creates a Replacement of the range [Start, Start+Length) with
98   /// ReplacementText.
99   Replacement(const SourceManager &Sources, SourceLocation Start,
100               unsigned Length, StringRef ReplacementText);
101 
102   /// Creates a Replacement of the given range with ReplacementText.
103   Replacement(const SourceManager &Sources, const CharSourceRange &Range,
104               StringRef ReplacementText,
105               const LangOptions &LangOpts = LangOptions());
106 
107   /// Creates a Replacement of the node with ReplacementText.
108   template <typename Node>
109   Replacement(const SourceManager &Sources, const Node &NodeToReplace,
110               StringRef ReplacementText,
111               const LangOptions &LangOpts = LangOptions());
112 
113   /// Returns whether this replacement can be applied to a file.
114   ///
115   /// Only replacements that are in a valid file can be applied.
116   bool isApplicable() const;
117 
118   /// Accessors.
119   /// @{
getFilePath()120   StringRef getFilePath() const { return FilePath; }
getOffset()121   unsigned getOffset() const { return ReplacementRange.getOffset(); }
getLength()122   unsigned getLength() const { return ReplacementRange.getLength(); }
getReplacementText()123   StringRef getReplacementText() const { return ReplacementText; }
124   /// @}
125 
126   /// Applies the replacement on the Rewriter.
127   bool apply(Rewriter &Rewrite) const;
128 
129   /// Returns a human readable string representation.
130   std::string toString() const;
131 
132 private:
133   void setFromSourceLocation(const SourceManager &Sources, SourceLocation Start,
134                              unsigned Length, StringRef ReplacementText);
135   void setFromSourceRange(const SourceManager &Sources,
136                           const CharSourceRange &Range,
137                           StringRef ReplacementText,
138                           const LangOptions &LangOpts);
139 
140   std::string FilePath;
141   Range ReplacementRange;
142   std::string ReplacementText;
143 };
144 
145 enum class replacement_error {
146   fail_to_apply = 0,
147   wrong_file_path,
148   overlap_conflict,
149   insert_conflict,
150 };
151 
152 /// Carries extra error information in replacement-related llvm::Error,
153 /// e.g. fail applying replacements and replacements conflict.
154 class ReplacementError : public llvm::ErrorInfo<ReplacementError> {
155 public:
ReplacementError(replacement_error Err)156   ReplacementError(replacement_error Err) : Err(Err) {}
157 
158   /// Constructs an error related to an existing replacement.
ReplacementError(replacement_error Err,Replacement Existing)159   ReplacementError(replacement_error Err, Replacement Existing)
160       : Err(Err), ExistingReplacement(std::move(Existing)) {}
161 
162   /// Constructs an error related to a new replacement and an existing
163   /// replacement in a set of replacements.
ReplacementError(replacement_error Err,Replacement New,Replacement Existing)164   ReplacementError(replacement_error Err, Replacement New, Replacement Existing)
165       : Err(Err), NewReplacement(std::move(New)),
166         ExistingReplacement(std::move(Existing)) {}
167 
168   std::string message() const override;
169 
log(raw_ostream & OS)170   void log(raw_ostream &OS) const override { OS << message(); }
171 
get()172   replacement_error get() const { return Err; }
173 
174   static char ID;
175 
getNewReplacement()176   const llvm::Optional<Replacement> &getNewReplacement() const {
177     return NewReplacement;
178   }
179 
getExistingReplacement()180   const llvm::Optional<Replacement> &getExistingReplacement() const {
181     return ExistingReplacement;
182   }
183 
184 private:
185   // Users are not expected to use error_code.
convertToErrorCode()186   std::error_code convertToErrorCode() const override {
187     return llvm::inconvertibleErrorCode();
188   }
189 
190   replacement_error Err;
191 
192   // A new replacement, which is to expected be added into a set of
193   // replacements, that is causing problem.
194   llvm::Optional<Replacement> NewReplacement;
195 
196   // An existing replacement in a replacements set that is causing problem.
197   llvm::Optional<Replacement> ExistingReplacement;
198 };
199 
200 /// Less-than operator between two Replacements.
201 bool operator<(const Replacement &LHS, const Replacement &RHS);
202 
203 /// Equal-to operator between two Replacements.
204 bool operator==(const Replacement &LHS, const Replacement &RHS);
205 
206 /// Maintains a set of replacements that are conflict-free.
207 /// Two replacements are considered conflicts if they overlap or have the same
208 /// offset (i.e. order-dependent).
209 class Replacements {
210 private:
211   using ReplacementsImpl = std::set<Replacement>;
212 
213 public:
214   using const_iterator = ReplacementsImpl::const_iterator;
215   using const_reverse_iterator = ReplacementsImpl::const_reverse_iterator;
216 
217   Replacements() = default;
218 
Replacements(const Replacement & R)219   explicit Replacements(const Replacement &R) { Replaces.insert(R); }
220 
221   /// Adds a new replacement \p R to the current set of replacements.
222   /// \p R must have the same file path as all existing replacements.
223   /// Returns `success` if the replacement is successfully inserted; otherwise,
224   /// it returns an llvm::Error, i.e. there is a conflict between R and the
225   /// existing replacements (i.e. they are order-dependent) or R's file path is
226   /// different from the filepath of existing replacements. Callers must
227   /// explicitly check the Error returned, and the returned error can be
228   /// converted to a string message with `llvm::toString()`. This prevents users
229   /// from adding order-dependent replacements. To control the order in which
230   /// order-dependent replacements are applied, use merge({R}) with R referring
231   /// to the changed code after applying all existing replacements.
232   /// Two replacements A and B are considered order-independent if applying them
233   /// in either order produces the same result. Note that the range of the
234   /// replacement that is applied later still refers to the original code.
235   /// These include (but not restricted to) replacements that:
236   ///   - don't overlap (being directly adjacent is fine) and
237   ///   - are overlapping deletions.
238   ///   - are insertions at the same offset and applying them in either order
239   ///     has the same effect, i.e. X + Y = Y + X when inserting X and Y
240   ///     respectively.
241   ///   - are identical replacements, i.e. applying the same replacement twice
242   ///     is equivalent to applying it once.
243   /// Examples:
244   /// 1. Replacement A(0, 0, "a") and B(0, 0, "aa") are order-independent since
245   ///    applying them in either order gives replacement (0, 0, "aaa").
246   ///    However, A(0, 0, "a") and B(0, 0, "b") are order-dependent since
247   ///    applying A first gives (0, 0, "ab") while applying B first gives (B, A,
248   ///    "ba").
249   /// 2. Replacement A(0, 2, "123") and B(0, 2, "123") are order-independent
250   ///    since applying them in either order gives (0, 2, "123").
251   /// 3. Replacement A(0, 3, "123") and B(2, 3, "321") are order-independent
252   ///    since either order gives (0, 5, "12321").
253   /// 4. Replacement A(0, 3, "ab") and B(0, 3, "ab") are order-independent since
254   ///    applying the same replacement twice is equivalent to applying it once.
255   /// Replacements with offset UINT_MAX are special - we do not detect conflicts
256   /// for such replacements since users may add them intentionally as a special
257   /// category of replacements.
258   llvm::Error add(const Replacement &R);
259 
260   /// Merges \p Replaces into the current replacements. \p Replaces
261   /// refers to code after applying the current replacements.
262   LLVM_NODISCARD Replacements merge(const Replacements &Replaces) const;
263 
264   // Returns the affected ranges in the changed code.
265   std::vector<Range> getAffectedRanges() const;
266 
267   // Returns the new offset in the code after replacements being applied.
268   // Note that if there is an insertion at Offset in the current replacements,
269   // \p Offset will be shifted to Offset + Length in inserted text.
270   unsigned getShiftedCodePosition(unsigned Position) const;
271 
size()272   unsigned size() const { return Replaces.size(); }
273 
clear()274   void clear() { Replaces.clear(); }
275 
empty()276   bool empty() const { return Replaces.empty(); }
277 
begin()278   const_iterator begin() const { return Replaces.begin(); }
279 
end()280   const_iterator end() const { return Replaces.end(); }
281 
rbegin()282   const_reverse_iterator rbegin() const  { return Replaces.rbegin(); }
283 
rend()284   const_reverse_iterator rend() const { return Replaces.rend(); }
285 
286   bool operator==(const Replacements &RHS) const {
287     return Replaces == RHS.Replaces;
288   }
289 
290 private:
Replacements(const_iterator Begin,const_iterator End)291   Replacements(const_iterator Begin, const_iterator End)
292       : Replaces(Begin, End) {}
293 
294   // Returns `R` with new range that refers to code after `Replaces` being
295   // applied.
296   Replacement getReplacementInChangedCode(const Replacement &R) const;
297 
298   // Returns a set of replacements that is equivalent to the current
299   // replacements by merging all adjacent replacements. Two sets of replacements
300   // are considered equivalent if they have the same effect when they are
301   // applied.
302   Replacements getCanonicalReplacements() const;
303 
304   // If `R` and all existing replacements are order-indepedent, then merge it
305   // with `Replaces` and returns the merged replacements; otherwise, returns an
306   // error.
307   llvm::Expected<Replacements>
308   mergeIfOrderIndependent(const Replacement &R) const;
309 
310   ReplacementsImpl Replaces;
311 };
312 
313 /// Apply all replacements in \p Replaces to the Rewriter \p Rewrite.
314 ///
315 /// Replacement applications happen independently of the success of
316 /// other applications.
317 ///
318 /// \returns true if all replacements apply. false otherwise.
319 bool applyAllReplacements(const Replacements &Replaces, Rewriter &Rewrite);
320 
321 /// Applies all replacements in \p Replaces to \p Code.
322 ///
323 /// This completely ignores the path stored in each replacement. If all
324 /// replacements are applied successfully, this returns the code with
325 /// replacements applied; otherwise, an llvm::Error carrying llvm::StringError
326 /// is returned (the Error message can be converted to string using
327 /// `llvm::toString()` and 'std::error_code` in the `Error` should be ignored).
328 llvm::Expected<std::string> applyAllReplacements(StringRef Code,
329                                                  const Replacements &Replaces);
330 
331 /// Collection of Replacements generated from a single translation unit.
332 struct TranslationUnitReplacements {
333   /// Name of the main source for the translation unit.
334   std::string MainSourceFile;
335 
336   std::vector<Replacement> Replacements;
337 };
338 
339 /// Calculates the new ranges after \p Replaces are applied. These
340 /// include both the original \p Ranges and the affected ranges of \p Replaces
341 /// in the new code.
342 ///
343 /// \pre Replacements must be for the same file.
344 ///
345 /// \return The new ranges after \p Replaces are applied. The new ranges will be
346 /// sorted and non-overlapping.
347 std::vector<Range>
348 calculateRangesAfterReplacements(const Replacements &Replaces,
349                                  const std::vector<Range> &Ranges);
350 
351 /// If there are multiple <File, Replacements> pairs with the same file
352 /// entry, we only keep one pair and discard the rest.
353 /// If a file does not exist, its corresponding replacements will be ignored.
354 std::map<std::string, Replacements> groupReplacementsByFile(
355     FileManager &FileMgr,
356     const std::map<std::string, Replacements> &FileToReplaces);
357 
358 template <typename Node>
Replacement(const SourceManager & Sources,const Node & NodeToReplace,StringRef ReplacementText,const LangOptions & LangOpts)359 Replacement::Replacement(const SourceManager &Sources,
360                          const Node &NodeToReplace, StringRef ReplacementText,
361                          const LangOptions &LangOpts) {
362   const CharSourceRange Range =
363       CharSourceRange::getTokenRange(NodeToReplace->getSourceRange());
364   setFromSourceRange(Sources, Range, ReplacementText, LangOpts);
365 }
366 
367 } // namespace tooling
368 
369 } // namespace clang
370 
371 #endif // LLVM_CLANG_TOOLING_CORE_REPLACEMENT_H
372