1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_CORE_PROFILER_UTILS_GROUP_EVENTS_H_ 17 #define TENSORFLOW_CORE_PROFILER_UTILS_GROUP_EVENTS_H_ 18 19 #include <deque> 20 #include <functional> 21 #include <memory> 22 #include <string> 23 #include <vector> 24 25 #include "absl/container/flat_hash_map.h" 26 #include "absl/container/flat_hash_set.h" 27 #include "absl/strings/string_view.h" 28 #include "absl/types/optional.h" 29 #include "tensorflow/core/platform/logging.h" 30 #include "tensorflow/core/platform/types.h" 31 #include "tensorflow/core/profiler/protobuf/xplane.pb.h" 32 #include "tensorflow/core/profiler/utils/xplane_visitor.h" 33 34 namespace tensorflow { 35 namespace profiler { 36 37 // Information required to connect events across threads. The first two fields 38 // specify the event types of parent and child events. In addition to matching 39 // the event types, both events should have stats of the stat types specified 40 // in stat_types and their values should be the same. 41 struct InterThreadConnectInfo { 42 int64 parent_event_type; 43 int64 child_event_type; 44 std::vector<int64> parent_stat_types; 45 std::vector<int64> child_stat_types; 46 }; 47 48 struct ContextInfo { ContextInfoContextInfo49 ContextInfo(int type, uint64 id) : type(type), id(id) {} 50 int type; 51 uint64 id; 52 }; 53 54 struct GroupMetadata { 55 std::string name; 56 std::string model_id; // inference only. 57 absl::flat_hash_set<int64> parents; 58 absl::flat_hash_set<int64> children; 59 }; 60 61 using GroupMetadataMap = absl::flat_hash_map<int64 /*group_id*/, GroupMetadata>; 62 63 // A wrapper for XEvent with parent and children pointers. Through these 64 // pointers, a tree of EventNode is formed. 65 class EventNode { 66 public: 67 // REQUIRED: all inputs should not be nullptr. 68 EventNode(const XPlaneVisitor* plane, XLine* raw_line, XEvent* raw_event); 69 70 EventNode(const EventNode& event_node); 71 GetParents()72 const std::vector<EventNode*>& GetParents() const { return parents_; } 73 GetChildren()74 const std::vector<EventNode*>& GetChildren() const { return children_; } 75 AddChild(EventNode * child)76 void AddChild(EventNode* child) { 77 children_.push_back(child); 78 child->parents_.push_back(this); 79 } 80 GetGroupId()81 absl::optional<int64> GetGroupId() const { return group_id_; } 82 83 std::string GetGroupName() const; 84 85 void SetGroupId(int64_t group_id); 86 87 // Sets group_id for this node and its descendants. 88 void PropagateGroupId(int64_t group_id, GroupMetadataMap* group_metadata_map); 89 GetPlaneVisitor()90 const XPlaneVisitor& GetPlaneVisitor() const { return *plane_; } 91 GetEventVisitor()92 const XEventVisitor& GetEventVisitor() const { return visitor_; } 93 94 absl::optional<XStatVisitor> GetContextStat(int64_t stat_type) const; 95 96 void AddStepName(absl::string_view step_name); 97 98 // Add a helper stat, "selected_group_ids", with group_ids of the groups 99 // connected to this event's group. 100 void AddSelectedGroupIds(const GroupMetadataMap& group_metadata_map); 101 102 void SetIsEager(bool is_eager); 103 104 // Returns true if this event is part of eagerly executed op. 105 bool IsEager(); 106 107 bool IsNestedIn(EventNode* parent); 108 109 // Returns the closest parent (including itself) of the given event type. 110 const EventNode* FindParent(int64_t event_type) const; 111 GetProducerContext()112 absl::optional<ContextInfo> GetProducerContext() const { 113 return producer_context_; 114 } 115 GetConsumerContext()116 absl::optional<ContextInfo> GetConsumerContext() const { 117 return consumer_context_; 118 } 119 SetRootLevel(int root_level)120 void SetRootLevel(int root_level) { root_level_ = root_level; } 121 RootLevel()122 int RootLevel() const { return root_level_; } 123 IsAsync()124 bool IsAsync() const { return is_async_; } 125 126 // Compare two EventNodes based on start timestamp. 127 bool operator<(const EventNode& other) const { 128 return GetEventVisitor().TimestampPs() < 129 other.GetEventVisitor().TimestampPs(); 130 } 131 132 private: 133 XStat* FindOrAddStatByType(int64_t stat_type); 134 135 const XPlaneVisitor* plane_; 136 XEventVisitor visitor_; 137 XLine* raw_line_; 138 XEvent* raw_event_; 139 std::vector<EventNode*> parents_; 140 std::vector<EventNode*> children_; 141 absl::optional<int64> group_id_; 142 absl::optional<ContextInfo> producer_context_; 143 absl::optional<ContextInfo> consumer_context_; 144 // Root event level. 145 // By default root_level_ is set to 0, which means it is not a root event. 146 // Events with root_level_ greater than 0 are considered as root events. 147 int root_level_ = 0; 148 bool is_async_ = false; 149 }; 150 151 using EventNodeMap = 152 absl::flat_hash_map<int64 /*event_type*/, 153 std::vector<std::unique_ptr<EventNode>>>; 154 155 using EventList = std::vector<EventNode*>; 156 157 struct ContextGroup { 158 std::vector<EventNode*> producers; 159 std::vector<EventNode*> consumers; 160 }; 161 162 using ContextGroupMap = absl::flat_hash_map< 163 int /*context_type*/, 164 absl::flat_hash_map<uint64 /*context_id*/, ContextGroup>>; 165 166 // EventForest augments the input XSpace with the trace context. The trace 167 // context is created by stitching XEvents (1) using the nesting relationship 168 // within the same thread and (2) comparing the semantic arguments or using 169 // connect_info_list across threads. It also groups the events by the root 170 // events specified in root_event_types or marked by the semantic argument. 171 class EventForest { 172 public: 173 void AddSpace( 174 const std::function<XPlaneVisitor(const XPlane*)> visitor_factory, 175 XSpace* space); 176 177 void AddPlanes( 178 const std::function<XPlaneVisitor(const XPlane*)> visitor_factory, 179 const std::vector<XPlane*>& planes); 180 181 void ConnectEvents( 182 const std::vector<InterThreadConnectInfo>& connect_info_list = {}); 183 184 void ConnectTfDataEvents(); 185 186 void GroupEvents(); 187 GetEventNodeMap()188 const EventNodeMap& GetEventNodeMap() const { return event_node_map_; } 189 GetGroupMetadataMap()190 const GroupMetadataMap& GetGroupMetadataMap() const { 191 return group_metadata_map_; 192 } 193 194 private: 195 void AddPlane( 196 const std::function<XPlaneVisitor(const XPlane*)> visitor_factory, 197 XPlane* plane); 198 199 // Creates an EventNode for each event in event_node_map and connect events 200 // according to the nesting relationship within the thread. 201 void ConnectIntraThread(XPlane* plane, XPlaneVisitor* visitor, 202 ContextGroupMap* context_groups); 203 204 // Connects events across threads according to connect_info_list. 205 void ConnectInterThread( 206 const std::vector<InterThreadConnectInfo>& connect_info_list); 207 208 // Creates event groups and populates group_metadata_map. If a TF loop is 209 // used, each TF loop iteration becomes a root. Otherwise, top root events 210 // (i.e., none of their ancestors is a root event) are used as roots. A new 211 // group is created with all events reachable from a root. 212 void CreateEventGroups(); 213 214 // Sets the is_eager stat to true for the eagerly executed GPU kernel events. 215 void MarkEagerlyExecutedGpuKernels(); 216 217 // Sets the is_eager stat to true for the eagerly executed CPU TF op events. 218 void MarkEagerlyExecutedCpuTfOps(); 219 220 // Populate all the step ids that associated with tf.data pipeline. 221 // Because FunctionRun is considered as root, but we want to exclude those 222 // FunctionRuns from tf.data. 223 void ProcessTfDataSteps(); 224 225 // Processes the TF loops and registers the first TF executor event of each 226 // iteraton to `tf_loop_root_events_`. 227 void ProcessTensorFlowLoop(); 228 229 // Processes the worker thread by connecting a FunctionRun with the following 230 // eager ops (e.g., for Keras callback). 231 void ProcessWorker(); 232 233 // Adds model ids to group_metadata_map for inference profiles. 234 void ProcessModelIds(); 235 236 EventNodeMap event_node_map_; 237 std::vector<XPlaneVisitor> visitors_; 238 // std::deque for pointer stability. 239 std::deque<std::pair<XPlane*, XPlaneVisitor>> planes_; 240 // The "step" id (actually it is "function" id that are associated with 241 // the tf.data pipeline. 242 absl::flat_hash_set<int64> tf_data_step_ids_; 243 EventList tf_loop_root_events_; 244 GroupMetadataMap group_metadata_map_; 245 }; 246 247 std::vector<InterThreadConnectInfo> CreateInterThreadConnectInfoList(); 248 249 // Calls GroupEvents with connect_info_list and root_event_types specific to 250 // TensorFlow. 251 void GroupTfEvents(XSpace* space, EventForest* event_forest); 252 void GroupTfEvents(XSpace* space); 253 254 // Returns true if the given space has TF's loop ops. 255 bool CheckLoopOp(const XSpace& space); 256 257 } // namespace profiler 258 } // namespace tensorflow 259 260 #endif // TENSORFLOW_CORE_PROFILER_UTILS_GROUP_EVENTS_H_ 261