• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2022 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "debug/debugger/tensor_summary.h"
17 #include <cmath>
18 #include <algorithm>
19 #include <future>
20 #include <limits>
21 #include <memory>
22 #include <bitset>
23 #include <tuple>
24 #include <type_traits>
25 
26 #ifdef OFFLINE_DBG_MODE
27 #include "base/float16.h"
28 #endif
29 
30 namespace mindspore {
31 using CONDITION_TYPE = DebugServices::CONDITION_TYPE;
32 
RangeCountCalculator()33 RangeCountCalculator::RangeCountCalculator()
34     : range_start_inclusive(-std::numeric_limits<double>::infinity()),
35       range_end_inclusive(std::numeric_limits<double>::infinity()),
36       count(0),
37       total(0) {}
38 
ProcessElement(double element)39 void RangeCountCalculator::ProcessElement(double element) {
40   if (element >= range_start_inclusive && element <= range_end_inclusive) {
41     count += 1;
42   }
43   total += 1;
44 }
45 
GetPercentInRange() const46 double RangeCountCalculator::GetPercentInRange() const {
47   if (total == 0) {
48     return 0.0;
49   }
50   const double factor = 100.0;
51   return factor * count / total;
52 }
53 
AllCloseCalculator()54 AllCloseCalculator::AllCloseCalculator() : atol(1.0e-8), rtol(1.0e-5), result(true) {}
55 
ProcessElement(double current,double previous)56 void AllCloseCalculator::ProcessElement(double current, double previous) {
57   result = result && (std::abs(current - previous) <= (atol + rtol * std::abs(previous)));
58 }
59 
IsAllClose() const60 bool AllCloseCalculator::IsAllClose() const { return result; }
61 
MeanCalculator()62 MeanCalculator::MeanCalculator() : mean(0.0), count(0) {}
63 
ProcessElement(double value)64 void MeanCalculator::ProcessElement(double value) {
65   count += 1;
66   double delta = value - mean;
67   mean += delta / count;
68 }
69 
GetMean() const70 double MeanCalculator::GetMean() const { return mean; }
71 
VarianceAndMeanCalculator()72 VarianceAndMeanCalculator::VarianceAndMeanCalculator() : mean(0.0), count(0), m2(0.0) {}
73 
ProcessElement(double value)74 void VarianceAndMeanCalculator::ProcessElement(double value) {
75   count += 1;
76   double delta = value - mean;
77   mean += delta / count;
78   m2 += delta * (value - mean);
79 }
80 
GetMean() const81 double VarianceAndMeanCalculator::GetMean() const { return mean; }
82 
GetVariance() const83 double VarianceAndMeanCalculator::GetVariance() const {
84   if (count > 1) {
85     return m2 / (count - 1);
86   }
87   return 0.0;
88 }
89 
GetStandardDeviation() const90 double VarianceAndMeanCalculator::GetStandardDeviation() const { return sqrt(GetVariance()); }
91 
ProcessElement(double value)92 void L2Calculator::ProcessElement(double value) { squre_sum += value * value; }
93 
ProcessElement(const L2Calculator & other)94 void L2Calculator::ProcessElement(const L2Calculator &other) { this->squre_sum += other.squre_sum; }
95 
GetL2Value() const96 double L2Calculator::GetL2Value() const { return std::sqrt(squre_sum); }
97 
98 template <typename T>
TensorSummary(const void * current_tensor_ptr,const void * const previous_tensor_ptr,uint64_t num_elements,uint64_t prev_num_elements)99 TensorSummary<T>::TensorSummary(const void *current_tensor_ptr, const void *const previous_tensor_ptr,
100                                 uint64_t num_elements, uint64_t prev_num_elements)
101     : current_tensor_ptr_(static_cast<const T *>(current_tensor_ptr)),
102       prev_tensor_ptr_(static_cast<const T *>(previous_tensor_ptr)),
103       num_elements_(num_elements),
104       prev_num_elements_(prev_num_elements),
105       min_(std::numeric_limits<double>::max()),
106       max_(std::numeric_limits<double>::lowest()),
107       avg_(0.0),
108       is_bool_(false),
109       neg_zero_count_(0),
110       pos_zero_count_(0),
111       pos_inf_count_(0),
112       neg_inf_count_(0),
113       inf_count_(0),
114       nan_count_(0),
115       zero_count_(0),
116       epsilon_(1.0e-9),
117       mean_sd_cal_enabled_(false) {}
118 
119 /*
120  * Feature group: Online debugger, Offline debugger.
121  * Target device group: Ascend, GPU.
122  * Runtime category: Old runtime, MindRT.
123  * Description: Initialize watchpoints calculators based on the watchpoint category. Process all the elements within the
124  * current tensor.
125  */
126 template <typename T>
SummarizeTensor(const std::vector<DebugServices::watchpoint_t> & wps)127 void TensorSummary<T>::SummarizeTensor(const std::vector<DebugServices::watchpoint_t> &wps) {
128   InitCalculators(wps);
129   for (size_t i = 0; i < num_elements_; ++i) {
130     auto current_value = static_cast<double>(current_tensor_ptr_[i]);
131     double previous_value = std::numeric_limits<double>::quiet_NaN();
132     if (prev_tensor_ptr_) {
133       if (num_elements_ == prev_num_elements_) {
134         previous_value = static_cast<double>(prev_tensor_ptr_[i]);
135       } else {
136         MS_LOG(DEBUG) << "Current and previous tensor are not the same size.";
137       }
138     }
139     if (std::isinf(current_value)) {
140       inf_count_ += 1;
141     }
142     if (std::isnan(current_value)) {
143       nan_count_ += 1;
144     }
145     if (current_value == 0.0) {
146       zero_count_ += 1;
147     }
148     max_ = std::max(max_, current_value);
149     min_ = std::min(min_, current_value);
150     if (mean_sd_cal_enabled_) {
151       current_mean_variance_.ProcessElement(current_value);
152     }
153     for (auto &it : all_close_) {
154       it.second->ProcessElement(current_value, previous_value);
155     }
156     for (auto &range_count : range_counts_) {
157       range_count.second->ProcessElement(current_value);
158     }
159     for (auto &mean : means_) {
160       if (mean.first.compare("curr_prev_diff_mean") == 0) {
161         mean.second->ProcessElement(std::abs(current_value - previous_value));
162       } else if (mean.first.compare("abs_prev_mean") == 0) {
163         mean.second->ProcessElement(std::abs(previous_value));
164       } else if (mean.first.compare("abs_current_mean") == 0) {
165         mean.second->ProcessElement(std::abs(current_value));
166       }
167     }
168   }
169 }
170 
171 /*
172  * Feature group: Online debugger, Offline debugger.
173  * Target device group: Ascend, GPU.
174  * Runtime category: Old runtime, MindRT.
175  * Description: Calculates statistics on chunks of data.
176  */
177 template <typename T>
TensorStatistics(DbgDataType dtype_value)178 void TensorSummary<T>::TensorStatistics(DbgDataType dtype_value) {
179   if (dtype_value == DT_BOOL) {
180     is_bool_ = true;
181   }
182   const uint64_t default_threads = 32;
183   const uint64_t default_elements_per_thread = 10000;
184 
185   if (num_elements_ <= default_elements_per_thread) {
186     return TensorStatisticsSingleThread();
187   }
188   uint64_t desired_threads = num_elements_ / default_elements_per_thread;
189   uint64_t actual_threads = std::min(desired_threads, default_threads);
190   uint64_t actual_elements_per_thread = num_elements_ / actual_threads;
191 
192   // Use multithread to calculate statistic on chunks of data
193   void *previous_tensor_ptr = nullptr;
194   size_t offset = 0;
195   std::vector<std::unique_ptr<TensorSummary<T>>> summary_vec;
196   std::vector<std::future<void>> summary_future_vec;
197   for (uint64_t i = 0; i < actual_threads; i++) {
198     uint64_t num_elements_for_thread;
199     if (i == actual_threads - 1) {
200       num_elements_for_thread = num_elements_ - offset;
201     } else {
202       num_elements_for_thread = actual_elements_per_thread;
203     }
204     (void)summary_vec.emplace_back(std::make_unique<TensorSummary<T>>(current_tensor_ptr_ + offset, previous_tensor_ptr,
205                                                                       num_elements_for_thread, 0));
206     (void)summary_future_vec.emplace_back(
207       std::async(std::launch::async, &TensorSummary<T>::TensorStatisticsSingleThread, summary_vec[i].get()));
208     offset += num_elements_for_thread;
209   }
210 
211   // Aggregate results of all chunks
212   num_elements_ = 0;  // Let current tensor weight 0 in the aggregation
213   for (unsigned int i = 0; i < summary_future_vec.size(); i++) {
214     summary_future_vec[i].wait();
215     summary_future_vec[i].get();
216     auto &cur_summary = *(summary_vec[i]);
217     num_elements_ += cur_summary.num_elements_;
218     min_ = std::min(min_, cur_summary.min_);
219     max_ = std::max(max_, cur_summary.max_);
220     double avg_delta = cur_summary.avg_ - avg_;
221     avg_ += avg_delta * (cur_summary.num_elements_ / num_elements_);
222     neg_zero_count_ += cur_summary.neg_zero_count_;
223     pos_zero_count_ += cur_summary.pos_zero_count_;
224     neg_inf_count_ += cur_summary.neg_inf_count_;
225     pos_inf_count_ += cur_summary.pos_inf_count_;
226     inf_count_ += cur_summary.inf_count_;
227     nan_count_ += cur_summary.nan_count_;
228     zero_count_ += cur_summary.zero_count_;
229     l2_calc_.ProcessElement(cur_summary.l2_calc_);
230   }
231 }
232 
233 /*
234  * Feature group: Online debugger, Offline debugger.
235  * Target device group: Ascend, GPU.
236  * Runtime category: Old runtime, MindRT.
237  * Description: Process all the elements of the chunked data and calculates the statistics.
238  */
239 template <typename T>
TensorStatisticsSingleThread()240 void TensorSummary<T>::TensorStatisticsSingleThread() {
241   MeanCalculator mean_calc = MeanCalculator();
242   for (size_t i = 0; i < num_elements_; ++i) {
243     auto current_value = static_cast<double>(current_tensor_ptr_[i]);
244     l2_calc_.ProcessElement(current_value);
245     if (std::isnan(current_value)) {
246       nan_count_ += 1;
247       max_ = current_value;
248       min_ = current_value;
249       mean_calc.ProcessElement(current_value);
250       continue;
251     }
252     if (std::isinf(current_value)) {
253       if (current_value > 0) {
254         pos_inf_count_ += 1;
255       } else {
256         neg_inf_count_ += 1;
257       }
258     }
259     if (current_value == 0.0) {
260       zero_count_ += 1;
261     }
262     // only considering tensor elements with value
263     if (std::signbit(current_value) && !(current_value == 0.0)) {
264       neg_zero_count_ += 1;
265     } else if (!(current_value == 0.0)) {
266       pos_zero_count_ += 1;
267     }
268     max_ = std::max(max_, current_value);
269     min_ = std::min(min_, current_value);
270     mean_calc.ProcessElement(current_value);
271   }
272   avg_ = mean_calc.GetMean();
273 }
274 
275 /*
276  * Feature group: Online debugger, Offline debugger.
277  * Target device group: Ascend, GPU.
278  * Runtime category: Old runtime, MindRT.
279  * Description: Returns a tuple with three elements, the first element is a bool and it is true if the watchpoint is
280  * hit. The second element is the error_code which is set in this function and the third element is the parameter_list
281  * for the watchpoint.
282  */
283 template <typename T>
IsWatchpointHit(DebugServices::watchpoint_t wp)284 std::tuple<bool, int, std::vector<DebugServices::parameter_t>> TensorSummary<T>::IsWatchpointHit(
285   DebugServices::watchpoint_t wp) {
286   auto parameter_list = wp.parameter_list;
287   bool hit = false;
288   const uint8_t bit_size = 32;
289   std::bitset<bit_size> error_code;
290   CONDITION_TYPE type = wp.condition.type;
291   // bit 0 denotes presence of nan
292   (void)error_code.set(0, nan_count_ > 0);
293   // bit 1 denotes presence of inf
294   (void)error_code.set(1, inf_count_ > 0);
295 
296   if (type == CONDITION_TYPE::HAS_NAN) {
297     error_code.reset();
298     hit = nan_count_ > 0;
299   } else if (type == CONDITION_TYPE::HAS_INF) {
300     error_code.reset();
301     hit = inf_count_ > 0;
302   } else if (type == CONDITION_TYPE::GENERAL_OVERFLOW) {
303     error_code.reset();
304     hit = (nan_count_ + inf_count_) > 0;
305   } else if (type == CONDITION_TYPE::NOT_CHANGED && prev_tensor_ptr_ && error_code.none()) {
306     hit = all_close_[wp.id]->IsAllClose();
307   } else if ((type == CONDITION_TYPE::NOT_CHANGED || type == CONDITION_TYPE::CHANGE_TOO_LARGE ||
308               type == CONDITION_TYPE::CHANGE_TOO_SMALL) &&
309              !prev_tensor_ptr_) {
310     // bit 2 denotes absence of previous tensor
311     error_code.set(2, true);
312   }
313 
314   if (error_code.none()) {
315     for (auto &parameter : parameter_list) {
316       if (parameter.disabled || error_code.any()) {
317         continue;
318       }
319       // extract inequality type from watchpoint for backward compatibility
320       std::string inequality_type;
321       if (wp.is_gt_wp()) {
322         inequality_type = "gt";
323       } else if (wp.is_lt_wp()) {
324         inequality_type = "lt";
325       }
326       parameter.Evaluate(StatLookup(parameter.name, wp), inequality_type);
327       hit = hit || parameter.hit;
328     }
329   }
330   return std::make_tuple(hit, static_cast<int32_t>(error_code.to_ulong()), parameter_list);
331 }
332 
333 template <typename T>
StatLookup(const std::string & parameter_name,const DebugServices::watchpoint_t & wp)334 double_t TensorSummary<T>::StatLookup(const std::string &parameter_name, const DebugServices::watchpoint_t &wp) {
335   if (parameter_name == "param") {
336     return StatLookup(wp);
337   }
338   std::string param_type;
339   auto pos = parameter_name.find_last_of('_');
340   if (pos != std::string::npos) {
341     param_type = parameter_name.substr(0, pos);
342   }
343 
344   if (param_type == "max") {
345     return max_;
346   }
347   if (param_type == "min") {
348     return min_;
349   }
350   if (param_type == "max_min") {
351     return max_ - min_;
352   }
353   if (param_type == "mean") {
354     return current_mean_variance_.GetMean();
355   }
356   if (param_type == "sd") {
357     return current_mean_variance_.GetStandardDeviation();
358   }
359   if (param_type == "abs_mean") {
360     if (means_.find("abs_current_mean") != means_.end()) {
361       return means_["abs_current_mean"]->GetMean();
362     }
363   }
364   if (param_type == "abs_mean_update_ratio" && prev_tensor_ptr_) {
365     if (means_.find("curr_prev_diff_mean") != means_.end() && means_.find("abs_prev_mean") != means_.end()) {
366       return means_["curr_prev_diff_mean"]->GetMean() / (means_["abs_prev_mean"]->GetMean() + epsilon_);
367     }
368   }
369   if (param_type == "range_percentage") {
370     if (range_counts_.find(wp.id) != range_counts_.end()) {
371       return range_counts_[wp.id]->GetPercentInRange();
372     }
373   }
374   if (param_type == "zero_percentage") {
375     return GetZeroValPercent();
376   }
377   return std::numeric_limits<double_t>::quiet_NaN();
378 }
379 
380 template <typename T>
StatLookup(const DebugServices::watchpoint_t & wp) const381 double_t TensorSummary<T>::StatLookup(const DebugServices::watchpoint_t &wp) const {
382   CONDITION_TYPE type = wp.condition.type;
383   if (type == CONDITION_TYPE::MAX_LT || type == CONDITION_TYPE::MAX_GT) {
384     return max_;
385   }
386   if (type == CONDITION_TYPE::MIN_LT || type == CONDITION_TYPE::MIN_GT) {
387     return min_;
388   }
389   if (type == CONDITION_TYPE::MEAN_LT || type == CONDITION_TYPE::MEAN_GT) {
390     return current_mean_variance_.GetMean();
391   }
392   if (type == CONDITION_TYPE::SD_LT || type == CONDITION_TYPE::SD_GT) {
393     return current_mean_variance_.GetStandardDeviation();
394   }
395   if (type == CONDITION_TYPE::MAX_MIN_GT || type == CONDITION_TYPE::MAX_MIN_LT) {
396     return max_ - min_;
397   }
398   return std::numeric_limits<double_t>::quiet_NaN();
399 }
400 
401 template <typename T>
GetZeroValPercent() const402 double_t TensorSummary<T>::GetZeroValPercent() const {
403   if (num_elements_ == 0) {
404     return 0.0;
405   }
406 
407   return (zero_count_ * 100.0) / num_elements_;
408 }
409 
410 template <typename T>
InitCalculators(const std::vector<DebugServices::watchpoint_t> & wps)411 void TensorSummary<T>::InitCalculators(const std::vector<DebugServices::watchpoint_t> &wps) {
412   for (auto &wp : wps) {
413     auto wp_id = wp.id;
414     mean_sd_cal_enabled_ = mean_sd_cal_enabled_ || wp.mean_sd_enabled();
415     if (wp.allclose_enabled() && prev_tensor_ptr_) {
416       all_close_[wp_id] = std::make_unique<AllCloseCalculator>();
417       if (!wp.parameter_list[0].disabled) {
418         all_close_[wp_id]->set_rtol(wp.parameter_list[0].value);
419       }
420       if (!wp.parameter_list[1].disabled) {
421         all_close_[wp_id]->set_atol(wp.parameter_list[1].value);
422       }
423     } else if (wp.range_enabled()) {
424       range_counts_[wp_id] = std::make_unique<RangeCountCalculator>();
425       if (!wp.parameter_list[0].disabled) {
426         range_counts_[wp_id]->set_range_start_inclusive(wp.parameter_list[0].value);
427       }
428       if (!wp.parameter_list[1].disabled) {
429         range_counts_[wp_id]->set_range_end_inclusive(wp.parameter_list[1].value);
430       }
431     } else if (wp.tensor_update_ratio_mean_enabled() && prev_tensor_ptr_) {
432       (void)means_.emplace("curr_prev_diff_mean", std::make_unique<MeanCalculator>());
433       (void)means_.emplace("abs_prev_mean", std::make_unique<MeanCalculator>());
434     } else if (wp.abs_mean_enabled()) {
435       (void)means_.emplace("abs_current_mean", std::make_unique<MeanCalculator>());
436     }
437   }
438 }
439 template class TensorSummary<uint8_t>;
440 template class TensorSummary<int8_t>;
441 template class TensorSummary<uint16_t>;
442 template class TensorSummary<int16_t>;
443 template class TensorSummary<uint32_t>;
444 template class TensorSummary<int32_t>;
445 template class TensorSummary<uint64_t>;
446 template class TensorSummary<int64_t>;
447 template class TensorSummary<float16>;
448 template class TensorSummary<bfloat16>;
449 template class TensorSummary<float>;
450 template class TensorSummary<double>;
451 template class TensorSummary<bool>;
452 }  // namespace mindspore
453