1 //===- Tree.cpp -----------------------------------------------*- C++ -*-=====//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "clang/Tooling/Syntax/Tree.h"
9 #include "clang/Basic/TokenKinds.h"
10 #include "clang/Tooling/Syntax/Nodes.h"
11 #include "llvm/ADT/ArrayRef.h"
12 #include "llvm/ADT/STLExtras.h"
13 #include "llvm/Support/Casting.h"
14 #include <cassert>
15
16 using namespace clang;
17
18 namespace {
traverse(const syntax::Node * N,llvm::function_ref<void (const syntax::Node *)> Visit)19 static void traverse(const syntax::Node *N,
20 llvm::function_ref<void(const syntax::Node *)> Visit) {
21 if (auto *T = dyn_cast<syntax::Tree>(N)) {
22 for (const syntax::Node &C : T->getChildren())
23 traverse(&C, Visit);
24 }
25 Visit(N);
26 }
traverse(syntax::Node * N,llvm::function_ref<void (syntax::Node *)> Visit)27 static void traverse(syntax::Node *N,
28 llvm::function_ref<void(syntax::Node *)> Visit) {
29 traverse(static_cast<const syntax::Node *>(N), [&](const syntax::Node *N) {
30 Visit(const_cast<syntax::Node *>(N));
31 });
32 }
33 } // namespace
34
Arena(SourceManager & SourceMgr,const LangOptions & LangOpts,const TokenBuffer & Tokens)35 syntax::Arena::Arena(SourceManager &SourceMgr, const LangOptions &LangOpts,
36 const TokenBuffer &Tokens)
37 : SourceMgr(SourceMgr), LangOpts(LangOpts), Tokens(Tokens) {}
38
getTokenBuffer() const39 const syntax::TokenBuffer &syntax::Arena::getTokenBuffer() const {
40 return Tokens;
41 }
42
43 std::pair<FileID, ArrayRef<syntax::Token>>
lexBuffer(std::unique_ptr<llvm::MemoryBuffer> Input)44 syntax::Arena::lexBuffer(std::unique_ptr<llvm::MemoryBuffer> Input) {
45 auto FID = SourceMgr.createFileID(std::move(Input));
46 auto It = ExtraTokens.try_emplace(FID, tokenize(FID, SourceMgr, LangOpts));
47 assert(It.second && "duplicate FileID");
48 return {FID, It.first->second};
49 }
50
Leaf(const syntax::Token * Tok)51 syntax::Leaf::Leaf(const syntax::Token *Tok) : Node(NodeKind::Leaf), Tok(Tok) {
52 assert(Tok != nullptr);
53 }
54
Node(NodeKind Kind)55 syntax::Node::Node(NodeKind Kind)
56 : Parent(nullptr), NextSibling(nullptr), PreviousSibling(nullptr),
57 Kind(static_cast<unsigned>(Kind)), Role(0), Original(false),
58 CanModify(false) {
59 this->setRole(NodeRole::Detached);
60 }
61
isDetached() const62 bool syntax::Node::isDetached() const {
63 return getRole() == NodeRole::Detached;
64 }
65
setRole(NodeRole NR)66 void syntax::Node::setRole(NodeRole NR) {
67 this->Role = static_cast<unsigned>(NR);
68 }
69
appendChildLowLevel(Node * Child,NodeRole Role)70 void syntax::Tree::appendChildLowLevel(Node *Child, NodeRole Role) {
71 assert(Child->getRole() == NodeRole::Detached);
72 assert(Role != NodeRole::Detached);
73
74 Child->setRole(Role);
75 appendChildLowLevel(Child);
76 }
77
appendChildLowLevel(Node * Child)78 void syntax::Tree::appendChildLowLevel(Node *Child) {
79 assert(Child->Parent == nullptr);
80 assert(Child->NextSibling == nullptr);
81 assert(Child->PreviousSibling == nullptr);
82 assert(Child->getRole() != NodeRole::Detached);
83
84 Child->Parent = this;
85 if (this->LastChild) {
86 Child->PreviousSibling = this->LastChild;
87 this->LastChild->NextSibling = Child;
88 } else
89 this->FirstChild = Child;
90
91 this->LastChild = Child;
92 }
93
prependChildLowLevel(Node * Child,NodeRole Role)94 void syntax::Tree::prependChildLowLevel(Node *Child, NodeRole Role) {
95 assert(Child->getRole() == NodeRole::Detached);
96 assert(Role != NodeRole::Detached);
97
98 Child->setRole(Role);
99 prependChildLowLevel(Child);
100 }
101
prependChildLowLevel(Node * Child)102 void syntax::Tree::prependChildLowLevel(Node *Child) {
103 assert(Child->Parent == nullptr);
104 assert(Child->NextSibling == nullptr);
105 assert(Child->PreviousSibling == nullptr);
106 assert(Child->getRole() != NodeRole::Detached);
107
108 Child->Parent = this;
109 if (this->FirstChild) {
110 Child->NextSibling = this->FirstChild;
111 this->FirstChild->PreviousSibling = Child;
112 } else
113 this->LastChild = Child;
114
115 this->FirstChild = Child;
116 }
117
replaceChildRangeLowLevel(Node * Begin,Node * End,Node * New)118 void syntax::Tree::replaceChildRangeLowLevel(Node *Begin, Node *End,
119 Node *New) {
120 assert((!Begin || Begin->Parent == this) &&
121 "`Begin` is not a child of `this`.");
122 assert((!End || End->Parent == this) && "`End` is not a child of `this`.");
123 assert(canModify() && "Cannot modify `this`.");
124
125 #ifndef NDEBUG
126 for (auto *N = New; N; N = N->NextSibling) {
127 assert(N->Parent == nullptr);
128 assert(N->getRole() != NodeRole::Detached && "Roles must be set");
129 // FIXME: sanity-check the role.
130 }
131
132 auto Reachable = [](Node *From, Node *N) {
133 if (!N)
134 return true;
135 for (auto *It = From; It; It = It->NextSibling)
136 if (It == N)
137 return true;
138 return false;
139 };
140 assert(Reachable(FirstChild, Begin) && "`Begin` is not reachable.");
141 assert(Reachable(Begin, End) && "`End` is not after `Begin`.");
142 #endif
143
144 if (!New && Begin == End)
145 return;
146
147 // Mark modification.
148 for (auto *T = this; T && T->Original; T = T->Parent)
149 T->Original = false;
150
151 // Save the node before the range to be removed. Later we insert the `New`
152 // range after this node.
153 auto *BeforeBegin = Begin ? Begin->PreviousSibling : LastChild;
154
155 // Detach old nodes.
156 for (auto *N = Begin; N != End;) {
157 auto *Next = N->NextSibling;
158
159 N->setRole(NodeRole::Detached);
160 N->Parent = nullptr;
161 N->NextSibling = nullptr;
162 N->PreviousSibling = nullptr;
163 if (N->Original)
164 traverse(N, [](Node *C) { C->Original = false; });
165
166 N = Next;
167 }
168
169 // Attach new range.
170 auto *&NewFirst = BeforeBegin ? BeforeBegin->NextSibling : FirstChild;
171 auto *&NewLast = End ? End->PreviousSibling : LastChild;
172
173 if (!New) {
174 NewFirst = End;
175 NewLast = BeforeBegin;
176 return;
177 }
178
179 New->PreviousSibling = BeforeBegin;
180 NewFirst = New;
181
182 Node *LastInNew;
183 for (auto *N = New; N != nullptr; N = N->NextSibling) {
184 LastInNew = N;
185 N->Parent = this;
186 }
187 LastInNew->NextSibling = End;
188 NewLast = LastInNew;
189 }
190
191 namespace {
dumpLeaf(raw_ostream & OS,const syntax::Leaf * L,const SourceManager & SM)192 static void dumpLeaf(raw_ostream &OS, const syntax::Leaf *L,
193 const SourceManager &SM) {
194 assert(L);
195 const auto *Token = L->getToken();
196 assert(Token);
197 // Handle 'eof' separately, calling text() on it produces an empty string.
198 if (Token->kind() == tok::eof)
199 OS << "<eof>";
200 else
201 OS << Token->text(SM);
202 }
203
dumpNode(raw_ostream & OS,const syntax::Node * N,const SourceManager & SM,std::vector<bool> IndentMask)204 static void dumpNode(raw_ostream &OS, const syntax::Node *N,
205 const SourceManager &SM, std::vector<bool> IndentMask) {
206 auto DumpExtraInfo = [&OS](const syntax::Node *N) {
207 if (N->getRole() != syntax::NodeRole::Unknown)
208 OS << " " << N->getRole();
209 if (!N->isOriginal())
210 OS << " synthesized";
211 if (!N->canModify())
212 OS << " unmodifiable";
213 };
214
215 assert(N);
216 if (const auto *L = dyn_cast<syntax::Leaf>(N)) {
217 OS << "'";
218 dumpLeaf(OS, L, SM);
219 OS << "'";
220 DumpExtraInfo(N);
221 OS << "\n";
222 return;
223 }
224
225 const auto *T = cast<syntax::Tree>(N);
226 OS << T->getKind();
227 DumpExtraInfo(N);
228 OS << "\n";
229
230 for (const syntax::Node &It : T->getChildren()) {
231 for (bool Filled : IndentMask) {
232 if (Filled)
233 OS << "| ";
234 else
235 OS << " ";
236 }
237 if (!It.getNextSibling()) {
238 OS << "`-";
239 IndentMask.push_back(false);
240 } else {
241 OS << "|-";
242 IndentMask.push_back(true);
243 }
244 dumpNode(OS, &It, SM, IndentMask);
245 IndentMask.pop_back();
246 }
247 }
248 } // namespace
249
dump(const SourceManager & SM) const250 std::string syntax::Node::dump(const SourceManager &SM) const {
251 std::string Str;
252 llvm::raw_string_ostream OS(Str);
253 dumpNode(OS, this, SM, /*IndentMask=*/{});
254 return std::move(OS.str());
255 }
256
dumpTokens(const SourceManager & SM) const257 std::string syntax::Node::dumpTokens(const SourceManager &SM) const {
258 std::string Storage;
259 llvm::raw_string_ostream OS(Storage);
260 traverse(this, [&](const syntax::Node *N) {
261 if (const auto *L = dyn_cast<syntax::Leaf>(N)) {
262 dumpLeaf(OS, L, SM);
263 OS << " ";
264 }
265 });
266 return OS.str();
267 }
268
assertInvariants() const269 void syntax::Node::assertInvariants() const {
270 #ifndef NDEBUG
271 if (isDetached())
272 assert(getParent() == nullptr);
273 else
274 assert(getParent() != nullptr);
275
276 const auto *T = dyn_cast<Tree>(this);
277 if (!T)
278 return;
279 for (const Node &C : T->getChildren()) {
280 if (T->isOriginal())
281 assert(C.isOriginal());
282 assert(!C.isDetached());
283 assert(C.getParent() == T);
284 const auto *Next = C.getNextSibling();
285 assert(!Next || &C == Next->getPreviousSibling());
286 if (!C.getNextSibling())
287 assert(&C == T->getLastChild() &&
288 "Last child is reachable by advancing from the first child.");
289 }
290
291 const auto *L = dyn_cast<List>(T);
292 if (!L)
293 return;
294 for (const Node &C : T->getChildren()) {
295 assert(C.getRole() == NodeRole::ListElement ||
296 C.getRole() == NodeRole::ListDelimiter);
297 if (C.getRole() == NodeRole::ListDelimiter) {
298 assert(isa<Leaf>(C));
299 assert(cast<Leaf>(C).getToken()->kind() == L->getDelimiterTokenKind());
300 }
301 }
302
303 #endif
304 }
305
assertInvariantsRecursive() const306 void syntax::Node::assertInvariantsRecursive() const {
307 #ifndef NDEBUG
308 traverse(this, [&](const syntax::Node *N) { N->assertInvariants(); });
309 #endif
310 }
311
findFirstLeaf() const312 const syntax::Leaf *syntax::Tree::findFirstLeaf() const {
313 for (const Node &C : getChildren()) {
314 if (const auto *L = dyn_cast<syntax::Leaf>(&C))
315 return L;
316 if (const auto *L = cast<syntax::Tree>(C).findFirstLeaf())
317 return L;
318 }
319 return nullptr;
320 }
321
findLastLeaf() const322 const syntax::Leaf *syntax::Tree::findLastLeaf() const {
323 for (const auto *C = getLastChild(); C; C = C->getPreviousSibling()) {
324 if (const auto *L = dyn_cast<syntax::Leaf>(C))
325 return L;
326 if (const auto *L = cast<syntax::Tree>(C)->findLastLeaf())
327 return L;
328 }
329 return nullptr;
330 }
331
findChild(NodeRole R) const332 const syntax::Node *syntax::Tree::findChild(NodeRole R) const {
333 for (const Node &C : getChildren()) {
334 if (C.getRole() == R)
335 return &C;
336 }
337 return nullptr;
338 }
339
340 std::vector<syntax::List::ElementAndDelimiter<syntax::Node>>
getElementsAsNodesAndDelimiters()341 syntax::List::getElementsAsNodesAndDelimiters() {
342 if (!getFirstChild())
343 return {};
344
345 std::vector<syntax::List::ElementAndDelimiter<Node>> Children;
346 syntax::Node *ElementWithoutDelimiter = nullptr;
347 for (Node &C : getChildren()) {
348 switch (C.getRole()) {
349 case syntax::NodeRole::ListElement: {
350 if (ElementWithoutDelimiter) {
351 Children.push_back({ElementWithoutDelimiter, nullptr});
352 }
353 ElementWithoutDelimiter = &C;
354 break;
355 }
356 case syntax::NodeRole::ListDelimiter: {
357 Children.push_back({ElementWithoutDelimiter, cast<syntax::Leaf>(&C)});
358 ElementWithoutDelimiter = nullptr;
359 break;
360 }
361 default:
362 llvm_unreachable(
363 "A list can have only elements and delimiters as children.");
364 }
365 }
366
367 switch (getTerminationKind()) {
368 case syntax::List::TerminationKind::Separated: {
369 Children.push_back({ElementWithoutDelimiter, nullptr});
370 break;
371 }
372 case syntax::List::TerminationKind::Terminated:
373 case syntax::List::TerminationKind::MaybeTerminated: {
374 if (ElementWithoutDelimiter) {
375 Children.push_back({ElementWithoutDelimiter, nullptr});
376 }
377 break;
378 }
379 }
380
381 return Children;
382 }
383
384 // Almost the same implementation of `getElementsAsNodesAndDelimiters` but
385 // ignoring delimiters
getElementsAsNodes()386 std::vector<syntax::Node *> syntax::List::getElementsAsNodes() {
387 if (!getFirstChild())
388 return {};
389
390 std::vector<syntax::Node *> Children;
391 syntax::Node *ElementWithoutDelimiter = nullptr;
392 for (Node &C : getChildren()) {
393 switch (C.getRole()) {
394 case syntax::NodeRole::ListElement: {
395 if (ElementWithoutDelimiter) {
396 Children.push_back(ElementWithoutDelimiter);
397 }
398 ElementWithoutDelimiter = &C;
399 break;
400 }
401 case syntax::NodeRole::ListDelimiter: {
402 Children.push_back(ElementWithoutDelimiter);
403 ElementWithoutDelimiter = nullptr;
404 break;
405 }
406 default:
407 llvm_unreachable("A list has only elements or delimiters.");
408 }
409 }
410
411 switch (getTerminationKind()) {
412 case syntax::List::TerminationKind::Separated: {
413 Children.push_back(ElementWithoutDelimiter);
414 break;
415 }
416 case syntax::List::TerminationKind::Terminated:
417 case syntax::List::TerminationKind::MaybeTerminated: {
418 if (ElementWithoutDelimiter) {
419 Children.push_back(ElementWithoutDelimiter);
420 }
421 break;
422 }
423 }
424
425 return Children;
426 }
427
getDelimiterTokenKind() const428 clang::tok::TokenKind syntax::List::getDelimiterTokenKind() const {
429 switch (this->getKind()) {
430 case NodeKind::NestedNameSpecifier:
431 return clang::tok::coloncolon;
432 case NodeKind::CallArguments:
433 case NodeKind::ParameterDeclarationList:
434 case NodeKind::DeclaratorList:
435 return clang::tok::comma;
436 default:
437 llvm_unreachable("This is not a subclass of List, thus "
438 "getDelimiterTokenKind() cannot be called");
439 }
440 }
441
getTerminationKind() const442 syntax::List::TerminationKind syntax::List::getTerminationKind() const {
443 switch (this->getKind()) {
444 case NodeKind::NestedNameSpecifier:
445 return TerminationKind::Terminated;
446 case NodeKind::CallArguments:
447 case NodeKind::ParameterDeclarationList:
448 case NodeKind::DeclaratorList:
449 return TerminationKind::Separated;
450 default:
451 llvm_unreachable("This is not a subclass of List, thus "
452 "getTerminationKind() cannot be called");
453 }
454 }
455
canBeEmpty() const456 bool syntax::List::canBeEmpty() const {
457 switch (this->getKind()) {
458 case NodeKind::NestedNameSpecifier:
459 return false;
460 case NodeKind::CallArguments:
461 return true;
462 case NodeKind::ParameterDeclarationList:
463 return true;
464 case NodeKind::DeclaratorList:
465 return true;
466 default:
467 llvm_unreachable("This is not a subclass of List, thus canBeEmpty() "
468 "cannot be called");
469 }
470 }
471