1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/core/profiler/utils/step_intersection.h"
16
17 #include "tensorflow/core/lib/gtl/map_util.h"
18 #include "tensorflow/core/platform/logging.h"
19 #include "tensorflow/core/profiler/utils/timespan.h"
20
21 namespace tensorflow {
22 namespace profiler {
23
24 namespace {
25
26 // Returns the timespan in this step (across all cores).
StepTimespan(const PerCoreStepInfo & percore_stepinfo)27 Timespan StepTimespan(const PerCoreStepInfo& percore_stepinfo) {
28 uint64 min_ps = kuint64max;
29 uint64 max_ps = 0;
30 for (const auto& core_stepinfo : percore_stepinfo.step_info_per_core()) {
31 const auto& stepinfo = core_stepinfo.second;
32 uint64 begin_ps = stepinfo.begin_ps();
33 uint64 end_ps = begin_ps + stepinfo.duration_ps();
34 min_ps = std::min(min_ps, begin_ps);
35 max_ps = std::max(max_ps, end_ps);
36 }
37 return (min_ps < max_ps) ? Timespan::FromEndPoints(min_ps, max_ps)
38 : Timespan();
39 }
40
41 // Returns the timespan across all steps in the given step_db.
AllStepsTimespan(const StepDatabaseResult & step_db)42 Timespan AllStepsTimespan(const StepDatabaseResult& step_db) {
43 uint64 min_ps = kuint64max;
44 uint64 max_ps = 0;
45 for (const auto& step : step_db.step_sequence()) {
46 Timespan timespan = StepTimespan(step);
47 uint64 begin_ps = timespan.begin_ps();
48 uint64 end_ps = timespan.end_ps();
49 min_ps = std::min(min_ps, begin_ps);
50 max_ps = std::max(max_ps, end_ps);
51 }
52 return (min_ps < max_ps) ? Timespan::FromEndPoints(min_ps, max_ps)
53 : Timespan();
54 }
55
56 struct AlignmentInfo {
57 StepsAlignment alignment;
58 double similarity;
59 };
60
61 // Computes the similarity between the given two steps. The closer their
62 // timespans are, the larger is the similarity.
StepSimilarity(const PerCoreStepInfo & subordinate_step,const PerCoreStepInfo & chief_step)63 double StepSimilarity(const PerCoreStepInfo& subordinate_step,
64 const PerCoreStepInfo& chief_step) {
65 Timespan subordinate_timespan = StepTimespan(subordinate_step);
66 Timespan chief_timespan = StepTimespan(chief_step);
67 return chief_timespan.OverlappedDurationPs(subordinate_timespan);
68 }
69
70 // If the subordinate steps and the chief steps are aligned at the given anchor
71 // points (i.e. at the subordinate_anchor step on the subordinate sequence, at
72 // the chief_anchor step on the chief sequence), returns the corresponding
73 // AlignmentInfo.
ComputeAlignmentInfo(const StepDatabaseResult & subordinate,uint32 subordinate_anchor,const StepDatabaseResult & chief,uint32 chief_anchor)74 AlignmentInfo ComputeAlignmentInfo(const StepDatabaseResult& subordinate,
75 uint32 subordinate_anchor,
76 const StepDatabaseResult& chief,
77 uint32 chief_anchor) {
78 // Assumes that the step at subordinate_anchor on the subordinate sequence is
79 // aligned with the step at the chief_anchor on the chief sequence. Then the
80 // number of steps before the anchor is the minimum of the number of steps
81 // before the anchor in the subordinate and that before the anchor in the
82 // chief. Similarly, the number of steps after the anchor is the minimum of
83 // the number of steps after the anchor in the subordinate and that after the
84 // anchor in the chief.
85 uint32 pre_anchor_steps = std::min(subordinate_anchor, chief_anchor);
86 uint32 post_anchor_steps =
87 std::min(subordinate.step_sequence_size() - subordinate_anchor,
88 chief.step_sequence_size() - chief_anchor);
89 // total number of steps aligned = pre_anchor_steps + post_anchor_steps.
90 uint32 alignment_steps = pre_anchor_steps + post_anchor_steps;
91
92 double similarity = 0;
93 // Where the aligned steps begin on the subordinate sequence.
94 uint32 begin_subordinate_idx = subordinate_anchor - pre_anchor_steps;
95 // Where the aligned steps begin on the chief sequence.
96 uint32 begin_chief_idx = chief_anchor - pre_anchor_steps;
97
98 for (uint32 i = 0; i < alignment_steps; i++) {
99 // Accumulates the similarity at each step.
100 similarity +=
101 StepSimilarity(subordinate.step_sequence(begin_subordinate_idx + i),
102 chief.step_sequence(begin_chief_idx + i));
103 }
104 StepsAlignment alignment = {begin_subordinate_idx, begin_chief_idx,
105 alignment_steps};
106 return {alignment, similarity};
107 }
108
109 // Returns the best alignment for aligning subordinate against chief.
FindStepsAlignment(const StepDatabaseResult & subordinate,const StepDatabaseResult & chief)110 StepsAlignment FindStepsAlignment(const StepDatabaseResult& subordinate,
111 const StepDatabaseResult& chief) {
112 double max_similarity = -1;
113 StepsAlignment alignment = {0, 0, 0};
114 if (subordinate.step_sequence_size() == 0 || chief.step_sequence_size() == 0)
115 return alignment;
116 for (auto c = 0; c < chief.step_sequence_size(); c++) {
117 AlignmentInfo info =
118 ComputeAlignmentInfo(subordinate, /*subordinate_anchor=*/0, chief, c);
119 if (info.similarity <= max_similarity) continue;
120 max_similarity = info.similarity;
121 alignment = info.alignment;
122 }
123 for (auto s = 1; s < subordinate.step_sequence_size(); s++) {
124 // s starts at 1 instead of 0, because the loop above already considers
125 // (s=0, c=0).
126 AlignmentInfo info =
127 ComputeAlignmentInfo(subordinate, s, chief, /*chief_anchor=*/0);
128 if (info.similarity <= max_similarity) continue;
129 max_similarity = info.similarity;
130 alignment = info.alignment;
131 }
132 return alignment;
133 }
134
StringStepsAlignment(const StepsAlignment & alignment)135 std::string StringStepsAlignment(const StepsAlignment& alignment) {
136 return absl::StrCat(
137 "[begin_subordinate_idx: ", alignment.begin_subordinate_idx,
138 ", begin_chief_idx: ", alignment.begin_chief_idx,
139 ", num_steps: ", alignment.num_steps, "]");
140 }
141
StringDstStepNumbers(const std::vector<uint32> & step_numbers)142 std::string StringDstStepNumbers(const std::vector<uint32>& step_numbers) {
143 std::string str;
144 absl::StrAppend(&str, "[");
145 for (auto i = 0; i < step_numbers.size(); i++) {
146 if (i > 0) absl::StrAppend(&str, ", ");
147 absl::StrAppend(&str, step_numbers[i]);
148 }
149 absl::StrAppend(&str, "]");
150 return str;
151 }
152
StringSrcToDstIndexMap(uint32 src_first_step_idx,uint32 num_steps)153 std::string StringSrcToDstIndexMap(uint32 src_first_step_idx,
154 uint32 num_steps) {
155 std::string str;
156 absl::StrAppend(&str, "[");
157 for (auto i = 0; i < num_steps; i++) {
158 if (i > 0) absl::StrAppend(&str, ", ");
159 absl::StrAppend(&str, src_first_step_idx + i, ":", i);
160 }
161 absl::StrAppend(&str, "]");
162 return str;
163 }
164
165 } // namespace
166
StepIntersection(uint32 max_steps,const absl::flat_hash_map<uint32,const StepDatabaseResult * > & perhost_stepdb)167 StepIntersection::StepIntersection(
168 uint32 max_steps,
169 const absl::flat_hash_map<uint32, const StepDatabaseResult*>&
170 perhost_stepdb) {
171 // Figures out the host with the shortest timespan among their steps (called
172 // this host the "chief").
173 chief_host_id_ = kuint32max;
174 uint64 min_duration_ps = kuint64max;
175 const StepDatabaseResult* chief_step_db = nullptr;
176 for (const auto& hostid_stepdb : perhost_stepdb) {
177 auto host_id = hostid_stepdb.first;
178 const auto& step_db = hostid_stepdb.second;
179 Timespan timespan = AllStepsTimespan(*step_db);
180 if (timespan.duration_ps() < min_duration_ps) {
181 chief_host_id_ = host_id;
182 chief_step_db = step_db;
183 min_duration_ps = timespan.duration_ps();
184 }
185 }
186 if (chief_host_id_ == kuint32max) {
187 // There is no step at all on any host.
188 steps_dropped_ = 0;
189 begin_chief_idx_ = 0;
190 end_chief_idx_ = 0;
191 return;
192 }
193
194 uint32 max_begin_chief_idx = 0;
195 uint32 min_end_chief_idx = kuint32max;
196 // Aligns the steps in all hosts with those in the chief.
197 for (const auto& hostid_stepdb : perhost_stepdb) {
198 auto host_id = hostid_stepdb.first;
199 const auto& step_db = hostid_stepdb.second;
200 if (host_id == chief_host_id_) {
201 // Simply aligns with itself.
202 perhost_alignment_[host_id] = {
203 /*begin_subordinate_idx=*/0, /*begin_chief_idx=*/0,
204 static_cast<uint32>(step_db->step_sequence_size())};
205 } else {
206 perhost_alignment_[host_id] =
207 FindStepsAlignment(*step_db, *chief_step_db);
208 }
209 // Intersects this host's alignment with other hosts' alignments.
210 uint32 host_begin_chief_idx = perhost_alignment_[host_id].begin_chief_idx;
211 max_begin_chief_idx = std::max(max_begin_chief_idx, host_begin_chief_idx);
212 uint32 host_end_chief_idx = perhost_alignment_[host_id].begin_chief_idx +
213 perhost_alignment_[host_id].num_steps;
214 min_end_chief_idx = std::min(min_end_chief_idx, host_end_chief_idx);
215 }
216 DCHECK(max_begin_chief_idx <= min_end_chief_idx);
217
218 begin_chief_idx_ = max_begin_chief_idx;
219
220 // Takes max_steps into account.
221 uint32 num_steps = min_end_chief_idx - max_begin_chief_idx;
222 if (num_steps > max_steps) {
223 steps_dropped_ = num_steps - max_steps;
224 // TODO(ckluk): Drops from both ends to avoid incomplete steps at the
225 // beginning and end of the profile.
226 end_chief_idx_ = max_begin_chief_idx + max_steps;
227 } else {
228 steps_dropped_ = 0;
229 end_chief_idx_ = min_end_chief_idx;
230 }
231 }
232
DstStepNumbers() const233 std::vector<uint32> StepIntersection::DstStepNumbers() const {
234 // TODO(ckluk): Honors training-loop boundaries (if more than one loop
235 // sampled).
236 std::vector<uint32> result;
237 result.reserve(NumSteps());
238 for (uint32 i = 0; i < NumSteps(); i++) {
239 result.push_back(i);
240 }
241 return result;
242 }
243
FirstStepIndex(uint32 host_id) const244 uint32 StepIntersection::FirstStepIndex(uint32 host_id) const {
245 const auto* alignment = gtl::FindOrNull(perhost_alignment_, host_id);
246 if (alignment == nullptr) return 0;
247 DCHECK(alignment->begin_chief_idx <= begin_chief_idx_);
248 uint32 shift = begin_chief_idx_ - alignment->begin_chief_idx;
249 uint32 begin_subordinate_idx = alignment->begin_subordinate_idx + shift;
250 return begin_subordinate_idx;
251 }
252
DebugString() const253 std::string StepIntersection::DebugString() const {
254 std::string str;
255 absl::StrAppend(&str, "chief host id_: ", chief_host_id_, "\n");
256 absl::StrAppend(&str, "begin_chief_idx_: ", begin_chief_idx_,
257 ", num_steps: ", NumSteps(), "\n");
258 absl::StrAppend(
259 &str, "DstStepNumbers(): ", StringDstStepNumbers(DstStepNumbers()), "\n");
260
261 std::vector<uint32> host_ids;
262 host_ids.reserve(perhost_alignment_.size());
263 for (const auto& hostid_alignment : perhost_alignment_) {
264 auto host_id = hostid_alignment.first;
265 host_ids.push_back(host_id);
266 }
267 absl::c_sort(host_ids);
268
269 absl::StrAppend(&str, "perhost_alignment:\n");
270 for (const auto host_id : host_ids) {
271 const auto* ptr = gtl::FindOrNull(perhost_alignment_, host_id);
272 if (ptr == nullptr) continue;
273 absl::StrAppend(&str, "host: ", host_id,
274 ", step-alignment: ", StringStepsAlignment(*ptr), "\n");
275 }
276 absl::StrAppend(&str, "SrcToDstIndexMap():\n");
277 for (const auto host_id : host_ids) {
278 absl::StrAppend(&str, "host: ", host_id, ", src-to-dst-index-map: ",
279 StringSrcToDstIndexMap(FirstStepIndex(host_id), NumSteps()),
280 "\n");
281 }
282 return str;
283 }
284
285 } // namespace profiler
286 } // namespace tensorflow
287