• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===- Tree.h - structure of the syntax tree ------------------*- C++ -*-=====//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // Defines the basic structure of the syntax tree. There are two kinds of nodes:
9 //   - leaf nodes correspond to a token in the expanded token stream,
10 //   - tree nodes correspond to language grammar constructs.
11 //
12 // The tree is initially built from an AST. Each node of a newly built tree
13 // covers a continous subrange of expanded tokens (i.e. tokens after
14 // preprocessing), the specific tokens coverered are stored in the leaf nodes of
15 // a tree. A post-order traversal of a tree will visit leaf nodes in an order
16 // corresponding the original order of expanded tokens.
17 //
18 // This is still work in progress and highly experimental, we leave room for
19 // ourselves to completely change the design and/or implementation.
20 //===----------------------------------------------------------------------===//
21 #ifndef LLVM_CLANG_TOOLING_SYNTAX_TREE_CASCADE_H
22 #define LLVM_CLANG_TOOLING_SYNTAX_TREE_CASCADE_H
23 
24 #include "clang/Basic/LangOptions.h"
25 #include "clang/Basic/SourceLocation.h"
26 #include "clang/Basic/SourceManager.h"
27 #include "clang/Basic/TokenKinds.h"
28 #include "clang/Tooling/Syntax/Tokens.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/iterator.h"
32 #include "llvm/Support/Allocator.h"
33 #include <cstdint>
34 #include <iterator>
35 
36 namespace clang {
37 namespace syntax {
38 
39 /// A memory arena for syntax trees. Also tracks the underlying token buffers,
40 /// source manager, etc.
41 class Arena {
42 public:
43   Arena(SourceManager &SourceMgr, const LangOptions &LangOpts,
44         const TokenBuffer &Tokens);
45 
getSourceManager()46   const SourceManager &getSourceManager() const { return SourceMgr; }
getLangOptions()47   const LangOptions &getLangOptions() const { return LangOpts; }
48 
49   const TokenBuffer &getTokenBuffer() const;
getAllocator()50   llvm::BumpPtrAllocator &getAllocator() { return Allocator; }
51 
52 private:
53   /// Add \p Buffer to the underlying source manager, tokenize it and store the
54   /// resulting tokens. Used exclusively in `FactoryImpl` to materialize tokens
55   /// that were not written in user code.
56   std::pair<FileID, ArrayRef<Token>>
57   lexBuffer(std::unique_ptr<llvm::MemoryBuffer> Buffer);
58   friend class FactoryImpl;
59 
60 private:
61   SourceManager &SourceMgr;
62   const LangOptions &LangOpts;
63   const TokenBuffer &Tokens;
64   /// IDs and storage for additional tokenized files.
65   llvm::DenseMap<FileID, std::vector<Token>> ExtraTokens;
66   /// Keeps all the allocated nodes and their intermediate data structures.
67   llvm::BumpPtrAllocator Allocator;
68 };
69 
70 class Tree;
71 class TreeBuilder;
72 class FactoryImpl;
73 class MutationsImpl;
74 
75 enum class NodeKind : uint16_t;
76 enum class NodeRole : uint8_t;
77 
78 /// A node in a syntax tree. Each node is either a Leaf (representing tokens) or
79 /// a Tree (representing language constructrs).
80 class Node {
81 protected:
82   /// Newly created nodes are detached from a tree, parent and sibling links are
83   /// set when the node is added as a child to another one.
84   Node(NodeKind Kind);
85   /// Nodes are allocated on Arenas; the destructor is never called.
86   ~Node() = default;
87 
88 public:
89   /// Nodes cannot simply be copied without violating tree invariants.
90   Node(const Node &) = delete;
91   Node &operator=(const Node &) = delete;
92   /// Idiomatically, nodes are allocated on an Arena and never moved.
93   Node(Node &&) = delete;
94   Node &operator=(Node &&) = delete;
95 
getKind()96   NodeKind getKind() const { return static_cast<NodeKind>(Kind); }
getRole()97   NodeRole getRole() const { return static_cast<NodeRole>(Role); }
98 
99   /// Whether the node is detached from a tree, i.e. does not have a parent.
100   bool isDetached() const;
101   /// Whether the node was created from the AST backed by the source code
102   /// rather than added later through mutation APIs or created with factory
103   /// functions.
104   /// When this flag is true, all subtrees are also original.
105   /// This flag is set to false on any modifications to the node or any of its
106   /// subtrees, even if this simply involves swapping existing subtrees.
isOriginal()107   bool isOriginal() const { return Original; }
108   /// If this function return false, the tree cannot be modified because there
109   /// is no reasonable way to produce the corresponding textual replacements.
110   /// This can happen when the node crosses macro expansion boundaries.
111   ///
112   /// Note that even if the node is not modifiable, its child nodes can be
113   /// modifiable.
canModify()114   bool canModify() const { return CanModify; }
115 
getParent()116   const Tree *getParent() const { return Parent; }
getParent()117   Tree *getParent() { return Parent; }
118 
getNextSibling()119   const Node *getNextSibling() const { return NextSibling; }
getNextSibling()120   Node *getNextSibling() { return NextSibling; }
getPreviousSibling()121   const Node *getPreviousSibling() const { return PreviousSibling; }
getPreviousSibling()122   Node *getPreviousSibling() { return PreviousSibling; }
123 
124   /// Dumps the structure of a subtree. For debugging and testing purposes.
125   std::string dump(const SourceManager &SM) const;
126   /// Dumps the tokens forming this subtree.
127   std::string dumpTokens(const SourceManager &SM) const;
128 
129   /// Asserts invariants on this node of the tree and its immediate children.
130   /// Will not recurse into the subtree. No-op if NDEBUG is set.
131   void assertInvariants() const;
132   /// Runs checkInvariants on all nodes in the subtree. No-op if NDEBUG is set.
133   void assertInvariantsRecursive() const;
134 
135 private:
136   // Tree is allowed to change the Parent link and Role.
137   friend class Tree;
138   // TreeBuilder is allowed to set the Original and CanModify flags.
139   friend class TreeBuilder;
140   // MutationsImpl sets roles and CanModify flag.
141   friend class MutationsImpl;
142   // FactoryImpl sets CanModify flag.
143   friend class FactoryImpl;
144 
145   void setRole(NodeRole NR);
146 
147   Tree *Parent;
148   Node *NextSibling;
149   Node *PreviousSibling;
150   unsigned Kind : 16;
151   unsigned Role : 8;
152   unsigned Original : 1;
153   unsigned CanModify : 1;
154 };
155 
156 /// A leaf node points to a single token inside the expanded token stream.
157 class Leaf final : public Node {
158 public:
159   Leaf(const Token *T);
160   static bool classof(const Node *N);
161 
getToken()162   const Token *getToken() const { return Tok; }
163 
164 private:
165   const Token *Tok;
166 };
167 
168 /// A node that has children and represents a syntactic language construct.
169 class Tree : public Node {
170   /// Iterator over children (common base for const/non-const).
171   /// Not invalidated by tree mutations (holds a stable node pointer).
172   template <typename DerivedT, typename NodeT>
173   class ChildIteratorBase
174       : public llvm::iterator_facade_base<DerivedT, std::forward_iterator_tag,
175                                           NodeT> {
176   protected:
177     NodeT *N = nullptr;
178     using Base = ChildIteratorBase;
179 
180   public:
181     ChildIteratorBase() = default;
ChildIteratorBase(NodeT * N)182     explicit ChildIteratorBase(NodeT *N) : N(N) {}
183 
184     bool operator==(const DerivedT &O) const { return O.N == N; }
185     NodeT &operator*() const { return *N; }
186     DerivedT &operator++() {
187       N = N->getNextSibling();
188       return *static_cast<DerivedT *>(this);
189     }
190 
191     /// Truthy if valid (not past-the-end).
192     /// This allows: if (auto It = find_if(N.children(), ...) )
193     explicit operator bool() const { return N != nullptr; }
194     /// The element, or nullptr if past-the-end.
asPointer()195     NodeT *asPointer() const { return N; }
196   };
197 
198 public:
199   static bool classof(const Node *N);
200 
getFirstChild()201   Node *getFirstChild() { return FirstChild; }
getFirstChild()202   const Node *getFirstChild() const { return FirstChild; }
getLastChild()203   Node *getLastChild() { return LastChild; }
getLastChild()204   const Node *getLastChild() const { return LastChild; }
205 
206   const Leaf *findFirstLeaf() const;
findFirstLeaf()207   Leaf *findFirstLeaf() {
208     return const_cast<Leaf *>(const_cast<const Tree *>(this)->findFirstLeaf());
209   }
210 
211   const Leaf *findLastLeaf() const;
findLastLeaf()212   Leaf *findLastLeaf() {
213     return const_cast<Leaf *>(const_cast<const Tree *>(this)->findLastLeaf());
214   }
215 
216   /// child_iterator is not invalidated by mutations.
217   struct ChildIterator : ChildIteratorBase<ChildIterator, Node> {
218     using Base::ChildIteratorBase;
219   };
220   struct ConstChildIterator
221       : ChildIteratorBase<ConstChildIterator, const Node> {
222     using Base::ChildIteratorBase;
223     ConstChildIterator() = default;
ConstChildIteratorConstChildIterator224     ConstChildIterator(const ChildIterator &I) : Base(I.asPointer()) {}
225   };
226 
getChildren()227   llvm::iterator_range<ChildIterator> getChildren() {
228     return {ChildIterator(getFirstChild()), ChildIterator()};
229   }
getChildren()230   llvm::iterator_range<ConstChildIterator> getChildren() const {
231     return {ConstChildIterator(getFirstChild()), ConstChildIterator()};
232   }
233 
234   /// Find the first node with a corresponding role.
235   const Node *findChild(NodeRole R) const;
findChild(NodeRole R)236   Node *findChild(NodeRole R) {
237     return const_cast<Node *>(const_cast<const Tree *>(this)->findChild(R));
238   }
239 
240 protected:
241   using Node::Node;
242 
243 private:
244   /// Append \p Child to the list of children and sets the parent pointer.
245   /// A very low-level operation that does not check any invariants, only used
246   /// by TreeBuilder and FactoryImpl.
247   /// EXPECTS: Role != Detached.
248   void appendChildLowLevel(Node *Child, NodeRole Role);
249   /// Similar but prepends.
250   void prependChildLowLevel(Node *Child, NodeRole Role);
251 
252   /// Like the previous overloads, but does not set role for \p Child.
253   /// EXPECTS: Child->Role != Detached
254   void appendChildLowLevel(Node *Child);
255   void prependChildLowLevel(Node *Child);
256   friend class TreeBuilder;
257   friend class FactoryImpl;
258 
259   /// Replace a range of children [Begin, End) with a list of
260   /// new nodes starting at \p New.
261   /// Only used by MutationsImpl to implement higher-level mutation operations.
262   /// (!) \p New can be null to model removal of the child range.
263   /// (!) \p End can be null to model one past the end.
264   /// (!) \p Begin can be null to model an append.
265   void replaceChildRangeLowLevel(Node *Begin, Node *End, Node *New);
266   friend class MutationsImpl;
267 
268   Node *FirstChild = nullptr;
269   Node *LastChild = nullptr;
270 };
271 
272 // Provide missing non_const == const overload.
273 // iterator_facade_base requires == to be a member, but implicit conversions
274 // don't work on the LHS of a member operator.
275 inline bool operator==(const Tree::ConstChildIterator &A,
276                        const Tree::ConstChildIterator &B) {
277   return A.operator==(B);
278 }
279 
280 /// A list of Elements separated or terminated by a fixed token.
281 ///
282 /// This type models the following grammar construct:
283 /// delimited-list(element, delimiter, termination, canBeEmpty)
284 class List : public Tree {
285 public:
286   template <typename Element> struct ElementAndDelimiter {
287     Element *element;
288     Leaf *delimiter;
289   };
290 
291   enum class TerminationKind {
292     Terminated,
293     MaybeTerminated,
294     Separated,
295   };
296 
297   using Tree::Tree;
298   static bool classof(const Node *N);
299   /// Returns the elements and corresponding delimiters. Missing elements
300   /// and delimiters are represented as null pointers.
301   ///
302   /// For example, in a separated list:
303   /// "a, b, c"  <=> [("a" , ","), ("b" , "," ), ("c" , null)]
304   /// "a,  , c"  <=> [("a" , ","), (null, "," ), ("c" , null)]
305   /// "a, b  c"  <=> [("a" , ","), ("b" , null), ("c" , null)]
306   /// "a, b,"    <=> [("a" , ","), ("b" , "," ), (null, null)]
307   ///
308   /// In a terminated or maybe-terminated list:
309   /// "a; b; c;" <=> [("a" , ";"), ("b" , ";" ), ("c" , ";" )]
310   /// "a;  ; c;" <=> [("a" , ";"), (null, ";" ), ("c" , ";" )]
311   /// "a; b  c;" <=> [("a" , ";"), ("b" , null), ("c" , ";" )]
312   /// "a; b; c"  <=> [("a" , ";"), ("b" , ";" ), ("c" , null)]
313   std::vector<ElementAndDelimiter<Node>> getElementsAsNodesAndDelimiters();
314 
315   /// Returns the elements of the list. Missing elements are represented
316   /// as null pointers in the same way as in the return value of
317   /// `getElementsAsNodesAndDelimiters()`.
318   std::vector<Node *> getElementsAsNodes();
319 
320   // These can't be implemented with the information we have!
321 
322   /// Returns the appropriate delimiter for this list.
323   ///
324   /// Useful for discovering the correct delimiter to use when adding
325   /// elements to empty or one-element lists.
326   clang::tok::TokenKind getDelimiterTokenKind() const;
327 
328   TerminationKind getTerminationKind() const;
329 
330   /// Whether this list can be empty in syntactically and semantically correct
331   /// code.
332   ///
333   /// This list may be empty when the source code has errors even if
334   /// canBeEmpty() returns false.
335   bool canBeEmpty() const;
336 };
337 
338 } // namespace syntax
339 } // namespace clang
340 
341 #endif
342