• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <cmath>
18 #include <algorithm>
19 #include <limits>
20 #include <memory>
21 #include <bitset>
22 #include <tuple>
23 #include <type_traits>
24 #include "debug/debugger/tensor_summary.h"
25 
26 #ifdef OFFLINE_DBG_MODE
27 #include "base/float16.h"
28 #endif
29 
30 #ifdef ONLINE_DBG_MODE
31 namespace mindspore {
32 #endif
33 using CONDITION_TYPE = DebugServices::CONDITION_TYPE;
34 
RangeCountCalculator()35 RangeCountCalculator::RangeCountCalculator()
36     : range_start_inclusive(-std::numeric_limits<double>::infinity()),
37       range_end_inclusive(std::numeric_limits<double>::infinity()),
38       count(0),
39       total(0) {}
40 
ProcessElement(double element)41 void RangeCountCalculator::ProcessElement(double element) {
42   count += (element >= range_start_inclusive && element <= range_end_inclusive);
43   total += 1;
44 }
45 
GetPercentInRange() const46 double RangeCountCalculator::GetPercentInRange() const {
47   if (total == 0) {
48     return 0.0;
49   }
50   const double factor = 100.0;
51   return factor * count / total;
52 }
53 
AllCloseCalculator()54 AllCloseCalculator::AllCloseCalculator() : atol(1.0e-8), rtol(1.0e-5), result(true) {}
55 
ProcessElement(double current,double previous)56 void AllCloseCalculator::ProcessElement(double current, double previous) {
57   result = result && (std::abs(current - previous) <= (atol + rtol * std::abs(previous)));
58 }
59 
IsAllClose() const60 bool AllCloseCalculator::IsAllClose() const { return result; }
61 
MeanCalculator()62 MeanCalculator::MeanCalculator() : mean(0.0), count(0) {}
63 
ProcessElement(double value)64 void MeanCalculator::ProcessElement(double value) {
65   count += 1;
66   double delta = value - mean;
67   mean += delta / count;
68 }
69 
GetMean() const70 double MeanCalculator::GetMean() const { return mean; }
71 
VarianceAndMeanCalculator()72 VarianceAndMeanCalculator::VarianceAndMeanCalculator() : mean(0.0), count(0), m2(0.0) {}
73 
ProcessElement(double value)74 void VarianceAndMeanCalculator::ProcessElement(double value) {
75   count += 1;
76   double delta = value - mean;
77   mean += delta / count;
78   m2 += delta * (value - mean);
79 }
80 
GetMean() const81 double VarianceAndMeanCalculator::GetMean() const { return mean; }
82 
GetVariance() const83 double VarianceAndMeanCalculator::GetVariance() const {
84   if (count > 1) {
85     return m2 / (count - 1);
86   }
87   return 0.0;
88 }
89 
GetStandardDeviation()90 double VarianceAndMeanCalculator::GetStandardDeviation() { return sqrt(GetVariance()); }
91 
92 template <typename T>
TensorSummary(const void * current_tensor_ptr,const void * const previous_tensor_ptr,uint32_t num_elements,uint32_t prev_num_elements)93 TensorSummary<T>::TensorSummary(const void *current_tensor_ptr, const void *const previous_tensor_ptr,
94                                 uint32_t num_elements, uint32_t prev_num_elements)
95     : current_tensor_ptr_(reinterpret_cast<const T *>(current_tensor_ptr)),
96       prev_tensor_ptr_(reinterpret_cast<const T *>(previous_tensor_ptr)),
97       num_elements_(num_elements),
98       prev_num_elements_(prev_num_elements),
99       min_(std::numeric_limits<double>::max()),
100       max_(std::numeric_limits<double>::lowest()),
101       avg_(0.0),
102       is_bool_(false),
103       neg_zero_count_(0),
104       pos_zero_count_(0),
105       pos_inf_count_(0),
106       neg_inf_count_(0),
107       inf_count_(0),
108       nan_count_(0),
109       zero_count_(0),
110       epsilon_(1.0e-9),
111       mean_sd_cal_enabled_(false) {}
112 
113 template <typename T>
SummarizeTensor(const std::vector<DebugServices::watchpoint_t> & wps)114 void TensorSummary<T>::SummarizeTensor(const std::vector<DebugServices::watchpoint_t> &wps) {
115   InitCalculators(wps);
116   for (size_t i = 0; i < num_elements_; ++i) {
117     auto current_value = static_cast<double>(current_tensor_ptr_[i]);
118     double previous_value = std::numeric_limits<double>::quiet_NaN();
119     if (prev_tensor_ptr_) {
120       if (num_elements_ == prev_num_elements_) {
121         previous_value = static_cast<double>(prev_tensor_ptr_[i]);
122       } else {
123         MS_LOG(DEBUG) << "Current and previous tensor are not the same size.";
124       }
125     }
126     if (std::isinf(current_value)) {
127       inf_count_ += 1;
128     }
129     if (std::isnan(current_value)) {
130       nan_count_ += 1;
131     }
132     if (current_value == 0) {
133       zero_count_ += 1;
134     }
135     max_ = std::max(max_, current_value);
136     min_ = std::min(min_, current_value);
137     if (mean_sd_cal_enabled_) {
138       current_mean_variance_.ProcessElement(current_value);
139     }
140     for (auto &it : all_close_) {
141       it.second->ProcessElement(current_value, previous_value);
142     }
143     for (auto &range_count : range_counts_) {
144       range_count.second->ProcessElement(current_value);
145     }
146     for (auto &mean : means_) {
147       if (mean.first.compare("curr_prev_diff_mean") == 0) {
148         mean.second->ProcessElement(std::abs(current_value - previous_value));
149       } else if (mean.first.compare("abs_prev_mean") == 0) {
150         mean.second->ProcessElement(std::abs(previous_value));
151       } else if (mean.first.compare("abs_current_mean") == 0) {
152         mean.second->ProcessElement(std::abs(current_value));
153       }
154     }
155   }
156 }
157 
158 template <typename T>
TensorStatistics(DbgDataType dtype_value)159 void TensorSummary<T>::TensorStatistics(DbgDataType dtype_value) {
160   if (dtype_value == DT_BOOL) {
161     is_bool_ = true;
162   }
163   double sum_elements = 0.0;
164   for (size_t i = 0; i < num_elements_; ++i) {
165     auto current_value = static_cast<double>(current_tensor_ptr_[i]);
166     if (std::isinf(current_value)) {
167       if (current_value > 0) {
168         pos_inf_count_ += 1;
169       } else {
170         neg_inf_count_ += 1;
171       }
172     }
173     if (current_value == 0) {
174       zero_count_ += 1;
175     }
176     if (std::isnan(current_value)) {
177       nan_count_ += 1;
178     }
179     if (!(std::isnan(current_value) || std::isinf(current_value))) {
180       // only considering tensor elements with value
181       if (std::signbit(current_value) && !(current_value == 0)) {
182         neg_zero_count_ += 1;
183       } else if (!(current_value == 0)) {
184         pos_zero_count_ += 1;
185       }
186       max_ = std::max(max_, current_value);
187       min_ = std::min(min_, current_value);
188       sum_elements += current_value;
189     }
190   }
191   unsigned int value_count = zero_count_ + neg_zero_count_ + pos_zero_count_;
192   avg_ = sum_elements / value_count;
193 }
194 
195 template <typename T>
IsWatchpointHit(DebugServices::watchpoint_t wp)196 std::tuple<bool, int, std::vector<DebugServices::parameter_t>> TensorSummary<T>::IsWatchpointHit(
197   DebugServices::watchpoint_t wp) {
198   auto parameter_list = wp.parameter_list;
199   bool hit = false;
200   const uint8_t bit_size = 32;
201   std::bitset<bit_size> error_code;
202   CONDITION_TYPE type = wp.condition.type;
203   // bit 0 denotes presence of nan
204   (void)error_code.set(0, nan_count_ > 0);
205   // bit 1 denotes presence of inf
206   (void)error_code.set(1, inf_count_ > 0);
207 
208   if (type == CONDITION_TYPE::HAS_NAN) {
209     error_code.reset();
210     hit = nan_count_ > 0;
211   } else if (type == CONDITION_TYPE::HAS_INF) {
212     error_code.reset();
213     hit = inf_count_ > 0;
214   } else if (type == CONDITION_TYPE::GENERAL_OVERFLOW) {
215     error_code.reset();
216     hit = (nan_count_ + inf_count_) > 0;
217   } else if (type == CONDITION_TYPE::NOT_CHANGED && prev_tensor_ptr_ && error_code.none()) {
218     hit = all_close_[wp.id]->IsAllClose();
219   } else if ((type == CONDITION_TYPE::NOT_CHANGED || type == CONDITION_TYPE::CHANGE_TOO_LARGE ||
220               type == CONDITION_TYPE::CHANGE_TOO_SMALL) &&
221              !prev_tensor_ptr_) {
222     // bit 2 denotes absence of previous tensor
223     error_code.set(2, true);
224   }
225 
226   if (error_code.none()) {
227     for (auto &parameter : parameter_list) {
228       if (parameter.disabled || error_code.any()) {
229         continue;
230       }
231       // extract inequality type from watchpoint for backward compatibility
232       std::string inequality_type;
233       if (wp.is_gt_wp()) {
234         inequality_type = "gt";
235       } else if (wp.is_lt_wp()) {
236         inequality_type = "lt";
237       }
238       parameter.Evaluate(StatLookup(parameter.name, wp), inequality_type);
239       hit = hit || parameter.hit;
240     }
241   }
242   return std::make_tuple(hit, static_cast<int32_t>(error_code.to_ulong()), parameter_list);
243 }
244 
245 template <typename T>
StatLookup(const std::string & parameter_name,const DebugServices::watchpoint_t & wp)246 double_t TensorSummary<T>::StatLookup(const std::string &parameter_name, const DebugServices::watchpoint_t &wp) {
247   if (parameter_name == "param") return StatLookup(wp);
248   std::string param_type;
249   auto pos = parameter_name.find_last_of('_');
250   if (pos != std::string::npos) {
251     param_type = parameter_name.substr(0, pos);
252   }
253 
254   if (param_type == "max") {
255     return max_;
256   }
257   if (param_type == "min") {
258     return min_;
259   }
260   if (param_type == "max_min") {
261     return max_ - min_;
262   }
263   if (param_type == "mean") {
264     return current_mean_variance_.GetMean();
265   }
266   if (param_type == "sd") {
267     return current_mean_variance_.GetStandardDeviation();
268   }
269   if (param_type == "abs_mean") {
270     if (means_.find("abs_current_mean") != means_.end()) {
271       return means_["abs_current_mean"]->GetMean();
272     }
273   }
274   if (param_type == "abs_mean_update_ratio" && prev_tensor_ptr_) {
275     if (means_.find("curr_prev_diff_mean") != means_.end() && means_.find("abs_prev_mean") != means_.end()) {
276       return means_["curr_prev_diff_mean"]->GetMean() / (means_["abs_prev_mean"]->GetMean() + epsilon_);
277     }
278   }
279   if (param_type == "range_percentage") {
280     if (range_counts_.find(wp.id) != range_counts_.end()) {
281       return range_counts_[wp.id]->GetPercentInRange();
282     }
283   }
284   if (param_type == "zero_percentage") {
285     return GetZeroValPercent();
286   }
287   return std::numeric_limits<double_t>::quiet_NaN();
288 }
289 
290 template <typename T>
StatLookup(const DebugServices::watchpoint_t & wp)291 double_t TensorSummary<T>::StatLookup(const DebugServices::watchpoint_t &wp) {
292   CONDITION_TYPE type = wp.condition.type;
293   if (type == CONDITION_TYPE::MAX_LT || type == CONDITION_TYPE::MAX_GT) {
294     return max_;
295   }
296   if (type == CONDITION_TYPE::MIN_LT || type == CONDITION_TYPE::MIN_GT) {
297     return min_;
298   }
299   if (type == CONDITION_TYPE::MEAN_LT || type == CONDITION_TYPE::MEAN_GT) {
300     return current_mean_variance_.GetMean();
301   }
302   if (type == CONDITION_TYPE::SD_LT || type == CONDITION_TYPE::SD_GT) {
303     return current_mean_variance_.GetStandardDeviation();
304   }
305   if (type == CONDITION_TYPE::MAX_MIN_GT || type == CONDITION_TYPE::MAX_MIN_LT) {
306     return max_ - min_;
307   }
308   return std::numeric_limits<double_t>::quiet_NaN();
309 }
310 
311 template <typename T>
GetZeroValPercent()312 double_t TensorSummary<T>::GetZeroValPercent() {
313   if (num_elements_ == 0) {
314     return 0;
315   }
316 
317   return (zero_count_ * 100.0) / num_elements_;
318 }
319 
320 template <typename T>
InitCalculators(const std::vector<DebugServices::watchpoint_t> & wps)321 void TensorSummary<T>::InitCalculators(const std::vector<DebugServices::watchpoint_t> &wps) {
322   for (auto &wp : wps) {
323     auto wp_id = wp.id;
324     mean_sd_cal_enabled_ = mean_sd_cal_enabled_ || wp.mean_sd_enabled();
325     if (wp.allclose_enabled() && prev_tensor_ptr_) {
326       all_close_[wp_id] = std::make_unique<AllCloseCalculator>();
327       if (!wp.parameter_list[0].disabled) {
328         all_close_[wp_id]->set_atol(wp.parameter_list[0].value);
329       }
330       if (!wp.parameter_list[1].disabled) {
331         all_close_[wp_id]->set_rtol(wp.parameter_list[1].value);
332       }
333     } else if (wp.range_enabled()) {
334       range_counts_[wp_id] = std::make_unique<RangeCountCalculator>();
335       if (!wp.parameter_list[0].disabled) {
336         range_counts_[wp_id]->set_range_start_inclusive(wp.parameter_list[0].value);
337       }
338       if (!wp.parameter_list[1].disabled) {
339         range_counts_[wp_id]->set_range_end_inclusive(wp.parameter_list[1].value);
340       }
341     } else if (wp.tensor_update_ratio_mean_enabled() && prev_tensor_ptr_) {
342       (void)means_.emplace("curr_prev_diff_mean", std::make_unique<MeanCalculator>());
343       (void)means_.emplace("abs_prev_mean", std::make_unique<MeanCalculator>());
344     } else if (wp.abs_mean_enabled()) {
345       (void)means_.emplace("abs_current_mean", std::make_unique<MeanCalculator>());
346     }
347   }
348 }
349 template class TensorSummary<uint8_t>;
350 template class TensorSummary<int8_t>;
351 template class TensorSummary<uint16_t>;
352 template class TensorSummary<int16_t>;
353 template class TensorSummary<uint32_t>;
354 template class TensorSummary<int32_t>;
355 template class TensorSummary<uint64_t>;
356 template class TensorSummary<int64_t>;
357 template class TensorSummary<float16>;
358 template class TensorSummary<float>;
359 template class TensorSummary<double>;
360 template class TensorSummary<bool>;
361 #ifdef ONLINE_DBG_MODE
362 }  // namespace mindspore
363 #endif
364