1 /**
2 * Copyright 2020-2022 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include "debug/debugger/tensor_summary.h"
17 #include <cmath>
18 #include <algorithm>
19 #include <future>
20 #include <limits>
21 #include <memory>
22 #include <bitset>
23 #include <tuple>
24 #include <type_traits>
25
26 #ifdef OFFLINE_DBG_MODE
27 #include "base/float16.h"
28 #endif
29
30 namespace mindspore {
31 using CONDITION_TYPE = DebugServices::CONDITION_TYPE;
32
RangeCountCalculator()33 RangeCountCalculator::RangeCountCalculator()
34 : range_start_inclusive(-std::numeric_limits<double>::infinity()),
35 range_end_inclusive(std::numeric_limits<double>::infinity()),
36 count(0),
37 total(0) {}
38
ProcessElement(double element)39 void RangeCountCalculator::ProcessElement(double element) {
40 if (element >= range_start_inclusive && element <= range_end_inclusive) {
41 count += 1;
42 }
43 total += 1;
44 }
45
GetPercentInRange() const46 double RangeCountCalculator::GetPercentInRange() const {
47 if (total == 0) {
48 return 0.0;
49 }
50 const double factor = 100.0;
51 return factor * count / total;
52 }
53
AllCloseCalculator()54 AllCloseCalculator::AllCloseCalculator() : atol(1.0e-8), rtol(1.0e-5), result(true) {}
55
ProcessElement(double current,double previous)56 void AllCloseCalculator::ProcessElement(double current, double previous) {
57 result = result && (std::abs(current - previous) <= (atol + rtol * std::abs(previous)));
58 }
59
IsAllClose() const60 bool AllCloseCalculator::IsAllClose() const { return result; }
61
MeanCalculator()62 MeanCalculator::MeanCalculator() : mean(0.0), count(0) {}
63
ProcessElement(double value)64 void MeanCalculator::ProcessElement(double value) {
65 count += 1;
66 double delta = value - mean;
67 mean += delta / count;
68 }
69
GetMean() const70 double MeanCalculator::GetMean() const { return mean; }
71
VarianceAndMeanCalculator()72 VarianceAndMeanCalculator::VarianceAndMeanCalculator() : mean(0.0), count(0), m2(0.0) {}
73
ProcessElement(double value)74 void VarianceAndMeanCalculator::ProcessElement(double value) {
75 count += 1;
76 double delta = value - mean;
77 mean += delta / count;
78 m2 += delta * (value - mean);
79 }
80
GetMean() const81 double VarianceAndMeanCalculator::GetMean() const { return mean; }
82
GetVariance() const83 double VarianceAndMeanCalculator::GetVariance() const {
84 if (count > 1) {
85 return m2 / (count - 1);
86 }
87 return 0.0;
88 }
89
GetStandardDeviation() const90 double VarianceAndMeanCalculator::GetStandardDeviation() const { return sqrt(GetVariance()); }
91
ProcessElement(double value)92 void L2Calculator::ProcessElement(double value) { squre_sum += value * value; }
93
ProcessElement(const L2Calculator & other)94 void L2Calculator::ProcessElement(const L2Calculator &other) { this->squre_sum += other.squre_sum; }
95
GetL2Value() const96 double L2Calculator::GetL2Value() const { return std::sqrt(squre_sum); }
97
98 template <typename T>
TensorSummary(const void * current_tensor_ptr,const void * const previous_tensor_ptr,uint64_t num_elements,uint64_t prev_num_elements)99 TensorSummary<T>::TensorSummary(const void *current_tensor_ptr, const void *const previous_tensor_ptr,
100 uint64_t num_elements, uint64_t prev_num_elements)
101 : current_tensor_ptr_(static_cast<const T *>(current_tensor_ptr)),
102 prev_tensor_ptr_(static_cast<const T *>(previous_tensor_ptr)),
103 num_elements_(num_elements),
104 prev_num_elements_(prev_num_elements),
105 min_(std::numeric_limits<double>::max()),
106 max_(std::numeric_limits<double>::lowest()),
107 avg_(0.0),
108 is_bool_(false),
109 neg_zero_count_(0),
110 pos_zero_count_(0),
111 pos_inf_count_(0),
112 neg_inf_count_(0),
113 inf_count_(0),
114 nan_count_(0),
115 zero_count_(0),
116 epsilon_(1.0e-9),
117 mean_sd_cal_enabled_(false) {}
118
119 /*
120 * Feature group: Online debugger, Offline debugger.
121 * Target device group: Ascend, GPU.
122 * Runtime category: Old runtime, MindRT.
123 * Description: Initialize watchpoints calculators based on the watchpoint category. Process all the elements within the
124 * current tensor.
125 */
126 template <typename T>
SummarizeTensor(const std::vector<DebugServices::watchpoint_t> & wps)127 void TensorSummary<T>::SummarizeTensor(const std::vector<DebugServices::watchpoint_t> &wps) {
128 InitCalculators(wps);
129 for (size_t i = 0; i < num_elements_; ++i) {
130 auto current_value = static_cast<double>(current_tensor_ptr_[i]);
131 double previous_value = std::numeric_limits<double>::quiet_NaN();
132 if (prev_tensor_ptr_) {
133 if (num_elements_ == prev_num_elements_) {
134 previous_value = static_cast<double>(prev_tensor_ptr_[i]);
135 } else {
136 MS_LOG(DEBUG) << "Current and previous tensor are not the same size.";
137 }
138 }
139 if (std::isinf(current_value)) {
140 inf_count_ += 1;
141 }
142 if (std::isnan(current_value)) {
143 nan_count_ += 1;
144 }
145 if (current_value == 0.0) {
146 zero_count_ += 1;
147 }
148 max_ = std::max(max_, current_value);
149 min_ = std::min(min_, current_value);
150 if (mean_sd_cal_enabled_) {
151 current_mean_variance_.ProcessElement(current_value);
152 }
153 for (auto &it : all_close_) {
154 it.second->ProcessElement(current_value, previous_value);
155 }
156 for (auto &range_count : range_counts_) {
157 range_count.second->ProcessElement(current_value);
158 }
159 for (auto &mean : means_) {
160 if (mean.first.compare("curr_prev_diff_mean") == 0) {
161 mean.second->ProcessElement(std::abs(current_value - previous_value));
162 } else if (mean.first.compare("abs_prev_mean") == 0) {
163 mean.second->ProcessElement(std::abs(previous_value));
164 } else if (mean.first.compare("abs_current_mean") == 0) {
165 mean.second->ProcessElement(std::abs(current_value));
166 }
167 }
168 }
169 }
170
171 /*
172 * Feature group: Online debugger, Offline debugger.
173 * Target device group: Ascend, GPU.
174 * Runtime category: Old runtime, MindRT.
175 * Description: Calculates statistics on chunks of data.
176 */
177 template <typename T>
TensorStatistics(DbgDataType dtype_value)178 void TensorSummary<T>::TensorStatistics(DbgDataType dtype_value) {
179 if (dtype_value == DT_BOOL) {
180 is_bool_ = true;
181 }
182 const uint64_t default_threads = 32;
183 const uint64_t default_elements_per_thread = 10000;
184
185 if (num_elements_ <= default_elements_per_thread) {
186 return TensorStatisticsSingleThread();
187 }
188 uint64_t desired_threads = num_elements_ / default_elements_per_thread;
189 uint64_t actual_threads = std::min(desired_threads, default_threads);
190 uint64_t actual_elements_per_thread = num_elements_ / actual_threads;
191
192 // Use multithread to calculate statistic on chunks of data
193 void *previous_tensor_ptr = nullptr;
194 size_t offset = 0;
195 std::vector<std::unique_ptr<TensorSummary<T>>> summary_vec;
196 std::vector<std::future<void>> summary_future_vec;
197 for (uint64_t i = 0; i < actual_threads; i++) {
198 uint64_t num_elements_for_thread;
199 if (i == actual_threads - 1) {
200 num_elements_for_thread = num_elements_ - offset;
201 } else {
202 num_elements_for_thread = actual_elements_per_thread;
203 }
204 (void)summary_vec.emplace_back(std::make_unique<TensorSummary<T>>(current_tensor_ptr_ + offset, previous_tensor_ptr,
205 num_elements_for_thread, 0));
206 (void)summary_future_vec.emplace_back(
207 std::async(std::launch::async, &TensorSummary<T>::TensorStatisticsSingleThread, summary_vec[i].get()));
208 offset += num_elements_for_thread;
209 }
210
211 // Aggregate results of all chunks
212 num_elements_ = 0; // Let current tensor weight 0 in the aggregation
213 for (unsigned int i = 0; i < summary_future_vec.size(); i++) {
214 summary_future_vec[i].wait();
215 summary_future_vec[i].get();
216 auto &cur_summary = *(summary_vec[i]);
217 num_elements_ += cur_summary.num_elements_;
218 min_ = std::min(min_, cur_summary.min_);
219 max_ = std::max(max_, cur_summary.max_);
220 double avg_delta = cur_summary.avg_ - avg_;
221 avg_ += avg_delta * (cur_summary.num_elements_ / num_elements_);
222 neg_zero_count_ += cur_summary.neg_zero_count_;
223 pos_zero_count_ += cur_summary.pos_zero_count_;
224 neg_inf_count_ += cur_summary.neg_inf_count_;
225 pos_inf_count_ += cur_summary.pos_inf_count_;
226 inf_count_ += cur_summary.inf_count_;
227 nan_count_ += cur_summary.nan_count_;
228 zero_count_ += cur_summary.zero_count_;
229 l2_calc_.ProcessElement(cur_summary.l2_calc_);
230 }
231 }
232
233 /*
234 * Feature group: Online debugger, Offline debugger.
235 * Target device group: Ascend, GPU.
236 * Runtime category: Old runtime, MindRT.
237 * Description: Process all the elements of the chunked data and calculates the statistics.
238 */
239 template <typename T>
TensorStatisticsSingleThread()240 void TensorSummary<T>::TensorStatisticsSingleThread() {
241 MeanCalculator mean_calc = MeanCalculator();
242 for (size_t i = 0; i < num_elements_; ++i) {
243 auto current_value = static_cast<double>(current_tensor_ptr_[i]);
244 l2_calc_.ProcessElement(current_value);
245 if (std::isnan(current_value)) {
246 nan_count_ += 1;
247 max_ = current_value;
248 min_ = current_value;
249 mean_calc.ProcessElement(current_value);
250 continue;
251 }
252 if (std::isinf(current_value)) {
253 if (current_value > 0) {
254 pos_inf_count_ += 1;
255 } else {
256 neg_inf_count_ += 1;
257 }
258 }
259 if (current_value == 0.0) {
260 zero_count_ += 1;
261 }
262 // only considering tensor elements with value
263 if (std::signbit(current_value) && !(current_value == 0.0)) {
264 neg_zero_count_ += 1;
265 } else if (!(current_value == 0.0)) {
266 pos_zero_count_ += 1;
267 }
268 max_ = std::max(max_, current_value);
269 min_ = std::min(min_, current_value);
270 mean_calc.ProcessElement(current_value);
271 }
272 avg_ = mean_calc.GetMean();
273 }
274
275 /*
276 * Feature group: Online debugger, Offline debugger.
277 * Target device group: Ascend, GPU.
278 * Runtime category: Old runtime, MindRT.
279 * Description: Returns a tuple with three elements, the first element is a bool and it is true if the watchpoint is
280 * hit. The second element is the error_code which is set in this function and the third element is the parameter_list
281 * for the watchpoint.
282 */
283 template <typename T>
IsWatchpointHit(DebugServices::watchpoint_t wp)284 std::tuple<bool, int, std::vector<DebugServices::parameter_t>> TensorSummary<T>::IsWatchpointHit(
285 DebugServices::watchpoint_t wp) {
286 auto parameter_list = wp.parameter_list;
287 bool hit = false;
288 const uint8_t bit_size = 32;
289 std::bitset<bit_size> error_code;
290 CONDITION_TYPE type = wp.condition.type;
291 // bit 0 denotes presence of nan
292 (void)error_code.set(0, nan_count_ > 0);
293 // bit 1 denotes presence of inf
294 (void)error_code.set(1, inf_count_ > 0);
295
296 if (type == CONDITION_TYPE::HAS_NAN) {
297 error_code.reset();
298 hit = nan_count_ > 0;
299 } else if (type == CONDITION_TYPE::HAS_INF) {
300 error_code.reset();
301 hit = inf_count_ > 0;
302 } else if (type == CONDITION_TYPE::GENERAL_OVERFLOW) {
303 error_code.reset();
304 hit = (nan_count_ + inf_count_) > 0;
305 } else if (type == CONDITION_TYPE::NOT_CHANGED && prev_tensor_ptr_ && error_code.none()) {
306 hit = all_close_[wp.id]->IsAllClose();
307 } else if ((type == CONDITION_TYPE::NOT_CHANGED || type == CONDITION_TYPE::CHANGE_TOO_LARGE ||
308 type == CONDITION_TYPE::CHANGE_TOO_SMALL) &&
309 !prev_tensor_ptr_) {
310 // bit 2 denotes absence of previous tensor
311 error_code.set(2, true);
312 }
313
314 if (error_code.none()) {
315 for (auto ¶meter : parameter_list) {
316 if (parameter.disabled || error_code.any()) {
317 continue;
318 }
319 // extract inequality type from watchpoint for backward compatibility
320 std::string inequality_type;
321 if (wp.is_gt_wp()) {
322 inequality_type = "gt";
323 } else if (wp.is_lt_wp()) {
324 inequality_type = "lt";
325 }
326 parameter.Evaluate(StatLookup(parameter.name, wp), inequality_type);
327 hit = hit || parameter.hit;
328 }
329 }
330 return std::make_tuple(hit, static_cast<int32_t>(error_code.to_ulong()), parameter_list);
331 }
332
333 template <typename T>
StatLookup(const std::string & parameter_name,const DebugServices::watchpoint_t & wp)334 double_t TensorSummary<T>::StatLookup(const std::string ¶meter_name, const DebugServices::watchpoint_t &wp) {
335 if (parameter_name == "param") {
336 return StatLookup(wp);
337 }
338 std::string param_type;
339 auto pos = parameter_name.find_last_of('_');
340 if (pos != std::string::npos) {
341 param_type = parameter_name.substr(0, pos);
342 }
343
344 if (param_type == "max") {
345 return max_;
346 }
347 if (param_type == "min") {
348 return min_;
349 }
350 if (param_type == "max_min") {
351 return max_ - min_;
352 }
353 if (param_type == "mean") {
354 return current_mean_variance_.GetMean();
355 }
356 if (param_type == "sd") {
357 return current_mean_variance_.GetStandardDeviation();
358 }
359 if (param_type == "abs_mean") {
360 if (means_.find("abs_current_mean") != means_.end()) {
361 return means_["abs_current_mean"]->GetMean();
362 }
363 }
364 if (param_type == "abs_mean_update_ratio" && prev_tensor_ptr_) {
365 if (means_.find("curr_prev_diff_mean") != means_.end() && means_.find("abs_prev_mean") != means_.end()) {
366 return means_["curr_prev_diff_mean"]->GetMean() / (means_["abs_prev_mean"]->GetMean() + epsilon_);
367 }
368 }
369 if (param_type == "range_percentage") {
370 if (range_counts_.find(wp.id) != range_counts_.end()) {
371 return range_counts_[wp.id]->GetPercentInRange();
372 }
373 }
374 if (param_type == "zero_percentage") {
375 return GetZeroValPercent();
376 }
377 return std::numeric_limits<double_t>::quiet_NaN();
378 }
379
380 template <typename T>
StatLookup(const DebugServices::watchpoint_t & wp) const381 double_t TensorSummary<T>::StatLookup(const DebugServices::watchpoint_t &wp) const {
382 CONDITION_TYPE type = wp.condition.type;
383 if (type == CONDITION_TYPE::MAX_LT || type == CONDITION_TYPE::MAX_GT) {
384 return max_;
385 }
386 if (type == CONDITION_TYPE::MIN_LT || type == CONDITION_TYPE::MIN_GT) {
387 return min_;
388 }
389 if (type == CONDITION_TYPE::MEAN_LT || type == CONDITION_TYPE::MEAN_GT) {
390 return current_mean_variance_.GetMean();
391 }
392 if (type == CONDITION_TYPE::SD_LT || type == CONDITION_TYPE::SD_GT) {
393 return current_mean_variance_.GetStandardDeviation();
394 }
395 if (type == CONDITION_TYPE::MAX_MIN_GT || type == CONDITION_TYPE::MAX_MIN_LT) {
396 return max_ - min_;
397 }
398 return std::numeric_limits<double_t>::quiet_NaN();
399 }
400
401 template <typename T>
GetZeroValPercent() const402 double_t TensorSummary<T>::GetZeroValPercent() const {
403 if (num_elements_ == 0) {
404 return 0.0;
405 }
406
407 return (zero_count_ * 100.0) / num_elements_;
408 }
409
410 template <typename T>
InitCalculators(const std::vector<DebugServices::watchpoint_t> & wps)411 void TensorSummary<T>::InitCalculators(const std::vector<DebugServices::watchpoint_t> &wps) {
412 for (auto &wp : wps) {
413 auto wp_id = wp.id;
414 mean_sd_cal_enabled_ = mean_sd_cal_enabled_ || wp.mean_sd_enabled();
415 if (wp.allclose_enabled() && prev_tensor_ptr_) {
416 all_close_[wp_id] = std::make_unique<AllCloseCalculator>();
417 if (!wp.parameter_list[0].disabled) {
418 all_close_[wp_id]->set_rtol(wp.parameter_list[0].value);
419 }
420 if (!wp.parameter_list[1].disabled) {
421 all_close_[wp_id]->set_atol(wp.parameter_list[1].value);
422 }
423 } else if (wp.range_enabled()) {
424 range_counts_[wp_id] = std::make_unique<RangeCountCalculator>();
425 if (!wp.parameter_list[0].disabled) {
426 range_counts_[wp_id]->set_range_start_inclusive(wp.parameter_list[0].value);
427 }
428 if (!wp.parameter_list[1].disabled) {
429 range_counts_[wp_id]->set_range_end_inclusive(wp.parameter_list[1].value);
430 }
431 } else if (wp.tensor_update_ratio_mean_enabled() && prev_tensor_ptr_) {
432 (void)means_.emplace("curr_prev_diff_mean", std::make_unique<MeanCalculator>());
433 (void)means_.emplace("abs_prev_mean", std::make_unique<MeanCalculator>());
434 } else if (wp.abs_mean_enabled()) {
435 (void)means_.emplace("abs_current_mean", std::make_unique<MeanCalculator>());
436 }
437 }
438 }
439 template class TensorSummary<uint8_t>;
440 template class TensorSummary<int8_t>;
441 template class TensorSummary<uint16_t>;
442 template class TensorSummary<int16_t>;
443 template class TensorSummary<uint32_t>;
444 template class TensorSummary<int32_t>;
445 template class TensorSummary<uint64_t>;
446 template class TensorSummary<int64_t>;
447 template class TensorSummary<float16>;
448 template class TensorSummary<bfloat16>;
449 template class TensorSummary<float>;
450 template class TensorSummary<double>;
451 template class TensorSummary<bool>;
452 } // namespace mindspore
453