1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_CORE_PROFILER_UTILS_GROUP_EVENTS_H_ 17 #define TENSORFLOW_CORE_PROFILER_UTILS_GROUP_EVENTS_H_ 18 19 #include <deque> 20 #include <functional> 21 #include <memory> 22 #include <string> 23 #include <vector> 24 25 #include "absl/container/flat_hash_map.h" 26 #include "absl/container/flat_hash_set.h" 27 #include "absl/strings/string_view.h" 28 #include "absl/types/optional.h" 29 #include "tensorflow/core/platform/logging.h" 30 #include "tensorflow/core/platform/types.h" 31 #include "tensorflow/core/profiler/protobuf/xplane.pb.h" 32 #include "tensorflow/core/profiler/utils/xplane_visitor.h" 33 34 namespace tensorflow { 35 namespace profiler { 36 37 // Information required to connect events across threads. The first two fields 38 // specify the event types of parent and child events. In addition to matching 39 // the event types, both events should have stats of the stat types specified 40 // in stat_types and their values should be the same. 41 struct InterThreadConnectInfo { 42 int64 parent_event_type; 43 int64 child_event_type; 44 std::vector<int64> parent_stat_types; 45 std::vector<int64> child_stat_types; 46 }; 47 48 struct ContextInfo { ContextInfoContextInfo49 ContextInfo(int type, uint64 id) : type(type), id(id) {} 50 int type; 51 uint64 id; 52 }; 53 54 struct GroupMetadata { 55 std::string name; 56 std::string model_id; // inference only. 57 absl::flat_hash_set<int64> parents; 58 absl::flat_hash_set<int64> children; 59 }; 60 61 using GroupMetadataMap = absl::flat_hash_map<int64 /*group_id*/, GroupMetadata>; 62 63 // A wrapper for XEvent with parent and children pointers. Through these 64 // pointers, a tree of EventNode is formed. 65 class EventNode { 66 public: 67 // REQUIRED: all inputs should not be nullptr. 68 EventNode(const XPlaneVisitor* plane, XLine* raw_line, XEvent* raw_event); 69 70 EventNode(const EventNode& event_node); 71 GetParents()72 const std::vector<EventNode*>& GetParents() const { return parents_; } 73 GetChildren()74 const std::vector<EventNode*>& GetChildren() const { return children_; } 75 AddChild(EventNode * child)76 void AddChild(EventNode* child) { 77 children_.push_back(child); 78 child->parents_.push_back(this); 79 } 80 GetGroupId()81 absl::optional<int64> GetGroupId() const { return group_id_; } 82 83 std::string GetGroupName() const; 84 85 void SetGroupId(int64 group_id); 86 87 // Sets group_id for this node and its descendants. 88 void PropagateGroupId(int64 group_id, GroupMetadataMap* group_metadata_map); 89 GetPlaneVisitor()90 const XPlaneVisitor& GetPlaneVisitor() const { return *plane_; } 91 GetEventVisitor()92 const XEventVisitor& GetEventVisitor() const { return visitor_; } 93 94 absl::optional<XStatVisitor> GetContextStat(int64 stat_type) const; 95 96 void AddStepName(absl::string_view step_name); 97 98 // Add a helper stat, "selected_group_ids", with group_ids of the groups 99 // connected to this event's group. 100 void AddSelectedGroupIds(const GroupMetadataMap& group_metadata_map); 101 102 void SetIsEager(bool is_eager); 103 104 // Returns true if this event is part of eagerly executed op. 105 bool IsEager(); 106 107 bool IsNestedIn(EventNode* parent); 108 109 // Returns the closest parent (including itself) of the given event type. 110 const EventNode* FindParent(int64 event_type) const; 111 GetProducerContext()112 absl::optional<ContextInfo> GetProducerContext() const { 113 return producer_context_; 114 } 115 GetConsumerContext()116 absl::optional<ContextInfo> GetConsumerContext() const { 117 return consumer_context_; 118 } 119 SetIsRoot(bool is_root)120 void SetIsRoot(bool is_root) { is_root_ = is_root; } 121 IsRoot()122 bool IsRoot() const { return is_root_; } 123 IsAsync()124 bool IsAsync() const { return is_async_; } 125 126 bool StartsBefore(const EventNode& other) const; 127 128 private: 129 XStat* FindOrAddStatByType(int64 stat_type); 130 131 const XPlaneVisitor* plane_; 132 XEventVisitor visitor_; 133 XLine* raw_line_; 134 XEvent* raw_event_; 135 std::vector<EventNode*> parents_; 136 std::vector<EventNode*> children_; 137 absl::optional<int64> group_id_; 138 absl::optional<ContextInfo> producer_context_; 139 absl::optional<ContextInfo> consumer_context_; 140 bool is_root_ = false; 141 bool is_async_ = false; 142 }; 143 144 using EventNodeMap = 145 absl::flat_hash_map<int64 /*event_type*/, 146 std::vector<std::unique_ptr<EventNode>>>; 147 148 using EventList = std::vector<EventNode*>; 149 150 struct ContextGroup { 151 std::vector<EventNode*> producers; 152 std::vector<EventNode*> consumers; 153 }; 154 155 using ContextGroupMap = absl::flat_hash_map< 156 int /*context_type*/, 157 absl::flat_hash_map<uint64 /*context_id*/, ContextGroup>>; 158 159 // EventForest augments the input XSpace with the trace context. The trace 160 // context is created by stitching XEvents (1) using the nesting relationship 161 // within the same thread and (2) comparing the semantic arguments or using 162 // connect_info_list across threads. It also groups the events by the root 163 // events specified in root_event_types or marked by the semantic argument. 164 class EventForest { 165 public: 166 void AddSpace( 167 const std::function<XPlaneVisitor(const XPlane*)> visitor_factory, 168 XSpace* space); 169 170 void AddPlanes( 171 const std::function<XPlaneVisitor(const XPlane*)> visitor_factory, 172 const std::vector<XPlane*>& planes); 173 174 void ConnectEvents( 175 const std::vector<InterThreadConnectInfo>& connect_info_list = {}); 176 177 void ConnectTfDataEvents(); 178 179 void GroupEvents( 180 const std::vector<int64>& user_defined_root_event_types = {}); 181 GetEventNodeMap()182 const EventNodeMap& GetEventNodeMap() const { return event_node_map_; } 183 GetGroupMetadataMap()184 const GroupMetadataMap& GetGroupMetadataMap() const { 185 return group_metadata_map_; 186 } 187 188 private: 189 void AddPlane( 190 const std::function<XPlaneVisitor(const XPlane*)> visitor_factory, 191 XPlane* plane); 192 193 // Creates an EventNode for each event in event_node_map and connect events 194 // according to the nesting relationship within the thread. 195 void ConnectIntraThread(XPlane* plane, XPlaneVisitor* visitor, 196 ContextGroupMap* context_groups); 197 198 // Connects events across threads according to connect_info_list. 199 void ConnectInterThread( 200 const std::vector<InterThreadConnectInfo>& connect_info_list); 201 202 void ProcessUserDefinedRootEvents( 203 const std::vector<int64 /*EventType*/>& user_defined_root_event_types); 204 205 // Creates event groups and populates group_metadata_map. If a TF loop is 206 // used, each TF loop iteration becomes a root. Otherwise, top root events 207 // (i.e., none of their ancestors is a root event) are used as roots. A new 208 // group is created with all events reachable from a root. 209 void CreateEventGroups(); 210 211 // Sets the is_eager stat to true for the eagerly executed GPU kernel events. 212 void MarkEagerlyExecutedGpuKernels(); 213 214 // Sets the is_eager stat to true for the eagerly executed CPU TF op events. 215 void MarkEagerlyExecutedCpuTfOps(); 216 217 // Processes the TF loops and registers the first TF executor event of each 218 // iteraton to `tf_loop_root_events_`. 219 void ProcessTensorFlowLoop(); 220 221 // Processes the worker thread by connecting a FunctionRun with the following 222 // eager ops (e.g., for Keras callback). 223 void ProcessWorker(); 224 225 // Adds model ids to group_metadata_map for inference profiles. 226 void ProcessModelIds(); 227 228 EventNodeMap event_node_map_; 229 std::vector<XPlaneVisitor> visitors_; 230 // std::deque for pointer stability. 231 std::deque<std::pair<XPlane*, XPlaneVisitor>> planes_; 232 EventList root_events_; 233 EventList tf_loop_root_events_; 234 GroupMetadataMap group_metadata_map_; 235 int64 next_group_id_ = 0; 236 }; 237 238 std::vector<InterThreadConnectInfo> CreateInterThreadConnectInfoList(); 239 240 // Calls GroupEvents with connect_info_list and root_event_types specific to 241 // TensorFlow. 242 void GroupTfEvents(XSpace* space, EventForest* event_forest); 243 void GroupTfEvents(XSpace* space); 244 245 // Returns true if the given space has TF's loop ops. 246 bool CheckLoopOp(const XSpace& space); 247 248 } // namespace profiler 249 } // namespace tensorflow 250 251 #endif // TENSORFLOW_CORE_PROFILER_UTILS_GROUP_EVENTS_H_ 252