1 //===-- xray_function_call_trie.h ------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file is a part of XRay, a dynamic runtime instrumentation system. 10 // 11 // This file defines the interface for a function call trie. 12 // 13 //===----------------------------------------------------------------------===// 14 #ifndef XRAY_FUNCTION_CALL_TRIE_H 15 #define XRAY_FUNCTION_CALL_TRIE_H 16 17 #include "xray_buffer_queue.h" 18 #include "xray_defs.h" 19 #include "xray_profiling_flags.h" 20 #include "xray_segmented_array.h" 21 #include <limits> 22 #include <memory> // For placement new. 23 #include <utility> 24 25 namespace __xray { 26 27 /// A FunctionCallTrie represents the stack traces of XRay instrumented 28 /// functions that we've encountered, where a node corresponds to a function and 29 /// the path from the root to the node its stack trace. Each node in the trie 30 /// will contain some useful values, including: 31 /// 32 /// * The cumulative amount of time spent in this particular node/stack. 33 /// * The number of times this stack has appeared. 34 /// * A histogram of latencies for that particular node. 35 /// 36 /// Each node in the trie will also contain a list of callees, represented using 37 /// a Array<NodeIdPair> -- each NodeIdPair instance will contain the function 38 /// ID of the callee, and a pointer to the node. 39 /// 40 /// If we visualise this data structure, we'll find the following potential 41 /// representation: 42 /// 43 /// [function id node] -> [callees] [cumulative time] 44 /// [call counter] [latency histogram] 45 /// 46 /// As an example, when we have a function in this pseudocode: 47 /// 48 /// func f(N) { 49 /// g() 50 /// h() 51 /// for i := 1..N { j() } 52 /// } 53 /// 54 /// We may end up with a trie of the following form: 55 /// 56 /// f -> [ g, h, j ] [...] [1] [...] 57 /// g -> [ ... ] [...] [1] [...] 58 /// h -> [ ... ] [...] [1] [...] 59 /// j -> [ ... ] [...] [N] [...] 60 /// 61 /// If for instance the function g() called j() like so: 62 /// 63 /// func g() { 64 /// for i := 1..10 { j() } 65 /// } 66 /// 67 /// We'll find the following updated trie: 68 /// 69 /// f -> [ g, h, j ] [...] [1] [...] 70 /// g -> [ j' ] [...] [1] [...] 71 /// h -> [ ... ] [...] [1] [...] 72 /// j -> [ ... ] [...] [N] [...] 73 /// j' -> [ ... ] [...] [10] [...] 74 /// 75 /// Note that we'll have a new node representing the path `f -> g -> j'` with 76 /// isolated data. This isolation gives us a means of representing the stack 77 /// traces as a path, as opposed to a key in a table. The alternative 78 /// implementation here would be to use a separate table for the path, and use 79 /// hashes of the path as an identifier to accumulate the information. We've 80 /// moved away from this approach as it takes a lot of time to compute the hash 81 /// every time we need to update a function's call information as we're handling 82 /// the entry and exit events. 83 /// 84 /// This approach allows us to maintain a shadow stack, which represents the 85 /// currently executing path, and on function exits quickly compute the amount 86 /// of time elapsed from the entry, then update the counters for the node 87 /// already represented in the trie. This necessitates an efficient 88 /// representation of the various data structures (the list of callees must be 89 /// cache-aware and efficient to look up, and the histogram must be compact and 90 /// quick to update) to enable us to keep the overheads of this implementation 91 /// to the minimum. 92 class FunctionCallTrie { 93 public: 94 struct Node; 95 96 // We use a NodeIdPair type instead of a std::pair<...> to not rely on the 97 // standard library types in this header. 98 struct NodeIdPair { 99 Node *NodePtr; 100 int32_t FId; 101 }; 102 103 using NodeIdPairArray = Array<NodeIdPair>; 104 using NodeIdPairAllocatorType = NodeIdPairArray::AllocatorType; 105 106 // A Node in the FunctionCallTrie gives us a list of callees, the cumulative 107 // number of times this node actually appeared, the cumulative amount of time 108 // for this particular node including its children call times, and just the 109 // local time spent on this node. Each Node will have the ID of the XRay 110 // instrumented function that it is associated to. 111 struct Node { 112 Node *Parent; 113 NodeIdPairArray Callees; 114 uint64_t CallCount; 115 uint64_t CumulativeLocalTime; // Typically in TSC deltas, not wall-time. 116 int32_t FId; 117 118 // TODO: Include the compact histogram. 119 }; 120 121 private: 122 struct ShadowStackEntry { 123 uint64_t EntryTSC; 124 Node *NodePtr; 125 uint16_t EntryCPU; 126 }; 127 128 using NodeArray = Array<Node>; 129 using RootArray = Array<Node *>; 130 using ShadowStackArray = Array<ShadowStackEntry>; 131 132 public: 133 // We collate the allocators we need into a single struct, as a convenience to 134 // allow us to initialize these as a group. 135 struct Allocators { 136 using NodeAllocatorType = NodeArray::AllocatorType; 137 using RootAllocatorType = RootArray::AllocatorType; 138 using ShadowStackAllocatorType = ShadowStackArray::AllocatorType; 139 140 // Use hosted aligned storage members to allow for trivial move and init. 141 // This also allows us to sidestep the potential-failing allocation issue. 142 typename std::aligned_storage<sizeof(NodeAllocatorType), 143 alignof(NodeAllocatorType)>::type 144 NodeAllocatorStorage; 145 typename std::aligned_storage<sizeof(RootAllocatorType), 146 alignof(RootAllocatorType)>::type 147 RootAllocatorStorage; 148 typename std::aligned_storage<sizeof(ShadowStackAllocatorType), 149 alignof(ShadowStackAllocatorType)>::type 150 ShadowStackAllocatorStorage; 151 typename std::aligned_storage<sizeof(NodeIdPairAllocatorType), 152 alignof(NodeIdPairAllocatorType)>::type 153 NodeIdPairAllocatorStorage; 154 155 NodeAllocatorType *NodeAllocator = nullptr; 156 RootAllocatorType *RootAllocator = nullptr; 157 ShadowStackAllocatorType *ShadowStackAllocator = nullptr; 158 NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr; 159 160 Allocators() = default; 161 Allocators(const Allocators &) = delete; 162 Allocators &operator=(const Allocators &) = delete; 163 164 struct Buffers { 165 BufferQueue::Buffer NodeBuffer; 166 BufferQueue::Buffer RootsBuffer; 167 BufferQueue::Buffer ShadowStackBuffer; 168 BufferQueue::Buffer NodeIdPairBuffer; 169 }; 170 AllocatorsAllocators171 explicit Allocators(Buffers &B) XRAY_NEVER_INSTRUMENT { 172 new (&NodeAllocatorStorage) 173 NodeAllocatorType(B.NodeBuffer.Data, B.NodeBuffer.Size); 174 NodeAllocator = 175 reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage); 176 177 new (&RootAllocatorStorage) 178 RootAllocatorType(B.RootsBuffer.Data, B.RootsBuffer.Size); 179 RootAllocator = 180 reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage); 181 182 new (&ShadowStackAllocatorStorage) ShadowStackAllocatorType( 183 B.ShadowStackBuffer.Data, B.ShadowStackBuffer.Size); 184 ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>( 185 &ShadowStackAllocatorStorage); 186 187 new (&NodeIdPairAllocatorStorage) NodeIdPairAllocatorType( 188 B.NodeIdPairBuffer.Data, B.NodeIdPairBuffer.Size); 189 NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>( 190 &NodeIdPairAllocatorStorage); 191 } 192 AllocatorsAllocators193 explicit Allocators(uptr Max) XRAY_NEVER_INSTRUMENT { 194 new (&NodeAllocatorStorage) NodeAllocatorType(Max); 195 NodeAllocator = 196 reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage); 197 198 new (&RootAllocatorStorage) RootAllocatorType(Max); 199 RootAllocator = 200 reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage); 201 202 new (&ShadowStackAllocatorStorage) ShadowStackAllocatorType(Max); 203 ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>( 204 &ShadowStackAllocatorStorage); 205 206 new (&NodeIdPairAllocatorStorage) NodeIdPairAllocatorType(Max); 207 NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>( 208 &NodeIdPairAllocatorStorage); 209 } 210 AllocatorsAllocators211 Allocators(Allocators &&O) XRAY_NEVER_INSTRUMENT { 212 // Here we rely on the safety of memcpy'ing contents of the storage 213 // members, and then pointing the source pointers to nullptr. 214 internal_memcpy(&NodeAllocatorStorage, &O.NodeAllocatorStorage, 215 sizeof(NodeAllocatorType)); 216 internal_memcpy(&RootAllocatorStorage, &O.RootAllocatorStorage, 217 sizeof(RootAllocatorType)); 218 internal_memcpy(&ShadowStackAllocatorStorage, 219 &O.ShadowStackAllocatorStorage, 220 sizeof(ShadowStackAllocatorType)); 221 internal_memcpy(&NodeIdPairAllocatorStorage, 222 &O.NodeIdPairAllocatorStorage, 223 sizeof(NodeIdPairAllocatorType)); 224 225 NodeAllocator = 226 reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage); 227 RootAllocator = 228 reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage); 229 ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>( 230 &ShadowStackAllocatorStorage); 231 NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>( 232 &NodeIdPairAllocatorStorage); 233 234 O.NodeAllocator = nullptr; 235 O.RootAllocator = nullptr; 236 O.ShadowStackAllocator = nullptr; 237 O.NodeIdPairAllocator = nullptr; 238 } 239 240 Allocators &operator=(Allocators &&O) XRAY_NEVER_INSTRUMENT { 241 // When moving into an existing instance, we ensure that we clean up the 242 // current allocators. 243 if (NodeAllocator) 244 NodeAllocator->~NodeAllocatorType(); 245 if (O.NodeAllocator) { 246 new (&NodeAllocatorStorage) 247 NodeAllocatorType(std::move(*O.NodeAllocator)); 248 NodeAllocator = 249 reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage); 250 O.NodeAllocator = nullptr; 251 } else { 252 NodeAllocator = nullptr; 253 } 254 255 if (RootAllocator) 256 RootAllocator->~RootAllocatorType(); 257 if (O.RootAllocator) { 258 new (&RootAllocatorStorage) 259 RootAllocatorType(std::move(*O.RootAllocator)); 260 RootAllocator = 261 reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage); 262 O.RootAllocator = nullptr; 263 } else { 264 RootAllocator = nullptr; 265 } 266 267 if (ShadowStackAllocator) 268 ShadowStackAllocator->~ShadowStackAllocatorType(); 269 if (O.ShadowStackAllocator) { 270 new (&ShadowStackAllocatorStorage) 271 ShadowStackAllocatorType(std::move(*O.ShadowStackAllocator)); 272 ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>( 273 &ShadowStackAllocatorStorage); 274 O.ShadowStackAllocator = nullptr; 275 } else { 276 ShadowStackAllocator = nullptr; 277 } 278 279 if (NodeIdPairAllocator) 280 NodeIdPairAllocator->~NodeIdPairAllocatorType(); 281 if (O.NodeIdPairAllocator) { 282 new (&NodeIdPairAllocatorStorage) 283 NodeIdPairAllocatorType(std::move(*O.NodeIdPairAllocator)); 284 NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>( 285 &NodeIdPairAllocatorStorage); 286 O.NodeIdPairAllocator = nullptr; 287 } else { 288 NodeIdPairAllocator = nullptr; 289 } 290 291 return *this; 292 } 293 ~AllocatorsAllocators294 ~Allocators() XRAY_NEVER_INSTRUMENT { 295 if (NodeAllocator != nullptr) 296 NodeAllocator->~NodeAllocatorType(); 297 if (RootAllocator != nullptr) 298 RootAllocator->~RootAllocatorType(); 299 if (ShadowStackAllocator != nullptr) 300 ShadowStackAllocator->~ShadowStackAllocatorType(); 301 if (NodeIdPairAllocator != nullptr) 302 NodeIdPairAllocator->~NodeIdPairAllocatorType(); 303 } 304 }; 305 InitAllocators()306 static Allocators InitAllocators() XRAY_NEVER_INSTRUMENT { 307 return InitAllocatorsCustom(profilingFlags()->per_thread_allocator_max); 308 } 309 InitAllocatorsCustom(uptr Max)310 static Allocators InitAllocatorsCustom(uptr Max) XRAY_NEVER_INSTRUMENT { 311 Allocators A(Max); 312 return A; 313 } 314 315 static Allocators InitAllocatorsFromBuffers(Allocators::Buffers & Bufs)316 InitAllocatorsFromBuffers(Allocators::Buffers &Bufs) XRAY_NEVER_INSTRUMENT { 317 Allocators A(Bufs); 318 return A; 319 } 320 321 private: 322 NodeArray Nodes; 323 RootArray Roots; 324 ShadowStackArray ShadowStack; 325 NodeIdPairAllocatorType *NodeIdPairAllocator; 326 uint32_t OverflowedFunctions; 327 328 public: FunctionCallTrie(const Allocators & A)329 explicit FunctionCallTrie(const Allocators &A) XRAY_NEVER_INSTRUMENT 330 : Nodes(*A.NodeAllocator), 331 Roots(*A.RootAllocator), 332 ShadowStack(*A.ShadowStackAllocator), 333 NodeIdPairAllocator(A.NodeIdPairAllocator), 334 OverflowedFunctions(0) {} 335 336 FunctionCallTrie() = delete; 337 FunctionCallTrie(const FunctionCallTrie &) = delete; 338 FunctionCallTrie &operator=(const FunctionCallTrie &) = delete; 339 FunctionCallTrie(FunctionCallTrie && O)340 FunctionCallTrie(FunctionCallTrie &&O) XRAY_NEVER_INSTRUMENT 341 : Nodes(std::move(O.Nodes)), 342 Roots(std::move(O.Roots)), 343 ShadowStack(std::move(O.ShadowStack)), 344 NodeIdPairAllocator(O.NodeIdPairAllocator), 345 OverflowedFunctions(O.OverflowedFunctions) {} 346 347 FunctionCallTrie &operator=(FunctionCallTrie &&O) XRAY_NEVER_INSTRUMENT { 348 Nodes = std::move(O.Nodes); 349 Roots = std::move(O.Roots); 350 ShadowStack = std::move(O.ShadowStack); 351 NodeIdPairAllocator = O.NodeIdPairAllocator; 352 OverflowedFunctions = O.OverflowedFunctions; 353 return *this; 354 } 355 ~FunctionCallTrie()356 ~FunctionCallTrie() XRAY_NEVER_INSTRUMENT {} 357 enterFunction(const int32_t FId,uint64_t TSC,uint16_t CPU)358 void enterFunction(const int32_t FId, uint64_t TSC, 359 uint16_t CPU) XRAY_NEVER_INSTRUMENT { 360 DCHECK_NE(FId, 0); 361 362 // If we're already overflowed the function call stack, do not bother 363 // attempting to record any more function entries. 364 if (UNLIKELY(OverflowedFunctions)) { 365 ++OverflowedFunctions; 366 return; 367 } 368 369 // If this is the first function we've encountered, we want to set up the 370 // node(s) and treat it as a root. 371 if (UNLIKELY(ShadowStack.empty())) { 372 auto *NewRoot = Nodes.AppendEmplace( 373 nullptr, NodeIdPairArray(*NodeIdPairAllocator), 0u, 0u, FId); 374 if (UNLIKELY(NewRoot == nullptr)) 375 return; 376 if (Roots.AppendEmplace(NewRoot) == nullptr) { 377 Nodes.trim(1); 378 return; 379 } 380 if (ShadowStack.AppendEmplace(TSC, NewRoot, CPU) == nullptr) { 381 Nodes.trim(1); 382 Roots.trim(1); 383 ++OverflowedFunctions; 384 return; 385 } 386 return; 387 } 388 389 // From this point on, we require that the stack is not empty. 390 DCHECK(!ShadowStack.empty()); 391 auto TopNode = ShadowStack.back().NodePtr; 392 DCHECK_NE(TopNode, nullptr); 393 394 // If we've seen this callee before, then we access that node and place that 395 // on the top of the stack. 396 auto* Callee = TopNode->Callees.find_element( 397 [FId](const NodeIdPair &NR) { return NR.FId == FId; }); 398 if (Callee != nullptr) { 399 CHECK_NE(Callee->NodePtr, nullptr); 400 if (ShadowStack.AppendEmplace(TSC, Callee->NodePtr, CPU) == nullptr) 401 ++OverflowedFunctions; 402 return; 403 } 404 405 // This means we've never seen this stack before, create a new node here. 406 auto* NewNode = Nodes.AppendEmplace( 407 TopNode, NodeIdPairArray(*NodeIdPairAllocator), 0u, 0u, FId); 408 if (UNLIKELY(NewNode == nullptr)) 409 return; 410 DCHECK_NE(NewNode, nullptr); 411 TopNode->Callees.AppendEmplace(NewNode, FId); 412 if (ShadowStack.AppendEmplace(TSC, NewNode, CPU) == nullptr) 413 ++OverflowedFunctions; 414 return; 415 } 416 exitFunction(int32_t FId,uint64_t TSC,uint16_t CPU)417 void exitFunction(int32_t FId, uint64_t TSC, 418 uint16_t CPU) XRAY_NEVER_INSTRUMENT { 419 // If we're exiting functions that have "overflowed" or don't fit into the 420 // stack due to allocator constraints, we then decrement that count first. 421 if (OverflowedFunctions) { 422 --OverflowedFunctions; 423 return; 424 } 425 426 // When we exit a function, we look up the ShadowStack to see whether we've 427 // entered this function before. We do as little processing here as we can, 428 // since most of the hard work would have already been done at function 429 // entry. 430 uint64_t CumulativeTreeTime = 0; 431 432 while (!ShadowStack.empty()) { 433 const auto &Top = ShadowStack.back(); 434 auto TopNode = Top.NodePtr; 435 DCHECK_NE(TopNode, nullptr); 436 437 // We may encounter overflow on the TSC we're provided, which may end up 438 // being less than the TSC when we first entered the function. 439 // 440 // To get the accurate measurement of cycles, we need to check whether 441 // we've overflowed (TSC < Top.EntryTSC) and then account the difference 442 // between the entry TSC and the max for the TSC counter (max of uint64_t) 443 // then add the value of TSC. We can prove that the maximum delta we will 444 // get is at most the 64-bit unsigned value, since the difference between 445 // a TSC of 0 and a Top.EntryTSC of 1 is (numeric_limits<uint64_t>::max() 446 // - 1) + 1. 447 // 448 // NOTE: This assumes that TSCs are synchronised across CPUs. 449 // TODO: Count the number of times we've seen CPU migrations. 450 uint64_t LocalTime = 451 Top.EntryTSC > TSC 452 ? (std::numeric_limits<uint64_t>::max() - Top.EntryTSC) + TSC 453 : TSC - Top.EntryTSC; 454 TopNode->CallCount++; 455 TopNode->CumulativeLocalTime += LocalTime - CumulativeTreeTime; 456 CumulativeTreeTime += LocalTime; 457 ShadowStack.trim(1); 458 459 // TODO: Update the histogram for the node. 460 if (TopNode->FId == FId) 461 break; 462 } 463 } 464 getRoots()465 const RootArray &getRoots() const XRAY_NEVER_INSTRUMENT { return Roots; } 466 467 // The deepCopyInto operation will update the provided FunctionCallTrie by 468 // re-creating the contents of this particular FunctionCallTrie in the other 469 // FunctionCallTrie. It will do this using a Depth First Traversal from the 470 // roots, and while doing so recreating the traversal in the provided 471 // FunctionCallTrie. 472 // 473 // This operation will *not* destroy the state in `O`, and thus may cause some 474 // duplicate entries in `O` if it is not empty. 475 // 476 // This function is *not* thread-safe, and may require external 477 // synchronisation of both "this" and |O|. 478 // 479 // This function must *not* be called with a non-empty FunctionCallTrie |O|. deepCopyInto(FunctionCallTrie & O)480 void deepCopyInto(FunctionCallTrie &O) const XRAY_NEVER_INSTRUMENT { 481 DCHECK(O.getRoots().empty()); 482 483 // We then push the root into a stack, to use as the parent marker for new 484 // nodes we push in as we're traversing depth-first down the call tree. 485 struct NodeAndParent { 486 FunctionCallTrie::Node *Node; 487 FunctionCallTrie::Node *NewNode; 488 }; 489 using Stack = Array<NodeAndParent>; 490 491 typename Stack::AllocatorType StackAllocator( 492 profilingFlags()->stack_allocator_max); 493 Stack DFSStack(StackAllocator); 494 495 for (const auto Root : getRoots()) { 496 // Add a node in O for this root. 497 auto NewRoot = O.Nodes.AppendEmplace( 498 nullptr, NodeIdPairArray(*O.NodeIdPairAllocator), Root->CallCount, 499 Root->CumulativeLocalTime, Root->FId); 500 501 // Because we cannot allocate more memory we should bail out right away. 502 if (UNLIKELY(NewRoot == nullptr)) 503 return; 504 505 if (UNLIKELY(O.Roots.Append(NewRoot) == nullptr)) 506 return; 507 508 // TODO: Figure out what to do if we fail to allocate any more stack 509 // space. Maybe warn or report once? 510 if (DFSStack.AppendEmplace(Root, NewRoot) == nullptr) 511 return; 512 while (!DFSStack.empty()) { 513 NodeAndParent NP = DFSStack.back(); 514 DCHECK_NE(NP.Node, nullptr); 515 DCHECK_NE(NP.NewNode, nullptr); 516 DFSStack.trim(1); 517 for (const auto Callee : NP.Node->Callees) { 518 auto NewNode = O.Nodes.AppendEmplace( 519 NP.NewNode, NodeIdPairArray(*O.NodeIdPairAllocator), 520 Callee.NodePtr->CallCount, Callee.NodePtr->CumulativeLocalTime, 521 Callee.FId); 522 if (UNLIKELY(NewNode == nullptr)) 523 return; 524 if (UNLIKELY(NP.NewNode->Callees.AppendEmplace(NewNode, Callee.FId) == 525 nullptr)) 526 return; 527 if (UNLIKELY(DFSStack.AppendEmplace(Callee.NodePtr, NewNode) == 528 nullptr)) 529 return; 530 } 531 } 532 } 533 } 534 535 // The mergeInto operation will update the provided FunctionCallTrie by 536 // traversing the current trie's roots and updating (i.e. merging) the data in 537 // the nodes with the data in the target's nodes. If the node doesn't exist in 538 // the provided trie, we add a new one in the right position, and inherit the 539 // data from the original (current) trie, along with all its callees. 540 // 541 // This function is *not* thread-safe, and may require external 542 // synchronisation of both "this" and |O|. mergeInto(FunctionCallTrie & O)543 void mergeInto(FunctionCallTrie &O) const XRAY_NEVER_INSTRUMENT { 544 struct NodeAndTarget { 545 FunctionCallTrie::Node *OrigNode; 546 FunctionCallTrie::Node *TargetNode; 547 }; 548 using Stack = Array<NodeAndTarget>; 549 typename Stack::AllocatorType StackAllocator( 550 profilingFlags()->stack_allocator_max); 551 Stack DFSStack(StackAllocator); 552 553 for (const auto Root : getRoots()) { 554 Node *TargetRoot = nullptr; 555 auto R = O.Roots.find_element( 556 [&](const Node *Node) { return Node->FId == Root->FId; }); 557 if (R == nullptr) { 558 TargetRoot = O.Nodes.AppendEmplace( 559 nullptr, NodeIdPairArray(*O.NodeIdPairAllocator), 0u, 0u, 560 Root->FId); 561 if (UNLIKELY(TargetRoot == nullptr)) 562 return; 563 564 O.Roots.Append(TargetRoot); 565 } else { 566 TargetRoot = *R; 567 } 568 569 DFSStack.AppendEmplace(Root, TargetRoot); 570 while (!DFSStack.empty()) { 571 NodeAndTarget NT = DFSStack.back(); 572 DCHECK_NE(NT.OrigNode, nullptr); 573 DCHECK_NE(NT.TargetNode, nullptr); 574 DFSStack.trim(1); 575 // TODO: Update the histogram as well when we have it ready. 576 NT.TargetNode->CallCount += NT.OrigNode->CallCount; 577 NT.TargetNode->CumulativeLocalTime += NT.OrigNode->CumulativeLocalTime; 578 for (const auto Callee : NT.OrigNode->Callees) { 579 auto TargetCallee = NT.TargetNode->Callees.find_element( 580 [&](const FunctionCallTrie::NodeIdPair &C) { 581 return C.FId == Callee.FId; 582 }); 583 if (TargetCallee == nullptr) { 584 auto NewTargetNode = O.Nodes.AppendEmplace( 585 NT.TargetNode, NodeIdPairArray(*O.NodeIdPairAllocator), 0u, 0u, 586 Callee.FId); 587 588 if (UNLIKELY(NewTargetNode == nullptr)) 589 return; 590 591 TargetCallee = 592 NT.TargetNode->Callees.AppendEmplace(NewTargetNode, Callee.FId); 593 } 594 DFSStack.AppendEmplace(Callee.NodePtr, TargetCallee->NodePtr); 595 } 596 } 597 } 598 } 599 }; 600 601 } // namespace __xray 602 603 #endif // XRAY_FUNCTION_CALL_TRIE_H 604