• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_LITE_TOOLS_COMMON_STATISTIC_UTILS_H_
18 #define MINDSPORE_LITE_TOOLS_COMMON_STATISTIC_UTILS_H_
19 
20 #include <vector>
21 #include <algorithm>
22 #include <cmath>
23 #include <numeric>
24 #include <cfloat>
25 #include <utility>
26 #include "include/errorcode.h"
27 #include "src/common/log_adapter.h"
28 #include "nnacl/op_base.h"
29 #include "mindapi/base/type_id.h"
30 
31 namespace mindspore::lite {
32 std::pair<float, float> GetFloatMinMaxValue(const float *data, int size);
33 
34 template <typename T>
GetMinMaxValue(const T * data,size_t data_size)35 std::pair<T, T> GetMinMaxValue(const T *data, size_t data_size) {
36   MS_ASSERT(data != nullptr);
37   MS_ASSERT(data_size > 1);
38   T min = data[0];
39   T max = data[0];
40   for (size_t i = 1; i < data_size; i++) {
41     min = std::min(min, data[i]);
42     max = std::max(max, data[i]);
43   }
44   return {min, max};
45 }
46 
47 template <typename T>
GetMinValue(const std::vector<T> & data_vector)48 T GetMinValue(const std::vector<T> &data_vector) {
49   MS_ASSERT(!data_vector.empty());
50   return *min_element(data_vector.begin(), data_vector.end());
51 }
52 
53 template <typename T>
GetMaxValue(const std::vector<T> & data_vector)54 T GetMaxValue(const std::vector<T> &data_vector) {
55   MS_ASSERT(!data_vector.empty());
56   return *max_element(data_vector.begin(), data_vector.end());
57 }
58 
59 template <typename T>
Quantile(const std::vector<T> & data_vector,float q)60 float Quantile(const std::vector<T> &data_vector, float q) {
61   MS_ASSERT(q >= 0.0f && q <= 1.0f);
62   std::vector<T> bak_data(data_vector);
63   std::sort(bak_data.begin(), bak_data.end());
64   const int n = bak_data.size();
65   float id = (n - 1) * q;
66   int lo = std::floor(id);
67   int hi = std::ceil(id);
68   float qs = bak_data.at(lo);
69   float h = (id - lo);
70   return (1.0 - h) * qs + h * bak_data.at(hi);
71 }
72 
73 template <typename T>
GetMeanValue(const std::vector<T> & data_vector)74 float GetMeanValue(const std::vector<T> &data_vector) {
75   MS_ASSERT(!data_vector.empty());
76   float sum = std::accumulate(std::begin(data_vector), std::end(data_vector), 0.0);
77   float mean = sum / data_vector.size();
78   return mean;
79 }
80 
81 template <typename T>
GetMeanVar(const std::vector<T> & data_vector)82 std::pair<float, float> GetMeanVar(const std::vector<T> &data_vector) {
83   MS_ASSERT(!data_vector.empty());
84   float mean = GetMeanValue(data_vector);
85   float accumulate = 0.0;
86   std::for_each(std::begin(data_vector), std::end(data_vector),
87                 [&](const float data) { accumulate += (data - mean) * (data - mean); });
88   float var = sqrt(accumulate / data_vector.size());
89   return {mean, var};
90 }
91 
92 template <typename T>
GetVarValue(const std::vector<T> & data_vector)93 float GetVarValue(const std::vector<T> &data_vector) {
94   MS_ASSERT(!data_vector.empty());
95   float mean = GetMeanValue(data_vector);
96   float accumulate = 0.0;
97   std::for_each(std::begin(data_vector), std::end(data_vector),
98                 [&](const float data) { accumulate += (data - mean) * (data - mean); });
99   float var = sqrt(accumulate / data_vector.size());
100   return var;
101 }
102 
103 template <typename T>
GetSparsity(const std::vector<T> & data_vector)104 float GetSparsity(const std::vector<T> &data_vector) {
105   MS_ASSERT(!data_vector.empty());
106   auto zero_nums = std::count(data_vector.begin(), data_vector.end(), 0);
107   return 1.0 * zero_nums / data_vector.size();
108 }
109 
110 template <typename T>
GetClipRate(const T * origin,const T * compared,size_t size)111 float GetClipRate(const T *origin, const T *compared, size_t size) {
112   MS_ASSERT(origin != nullptr);
113   MS_ASSERT(compared != nullptr);
114   MS_ASSERT(size > 0);
115   auto min = *std::min_element(compared, compared + size);
116   auto max = *std::max_element(compared, compared + size);
117   size_t total = 0;
118   for (size_t i = 0; i < size; ++i) {
119     if (origin[i] > max || origin[i] < min) {
120       total++;
121     }
122   }
123   return 1.0f * total / size;
124 }
125 
GetClipRate(const void * vector_a,const void * vector_b,size_t size,mindspore::TypeId type_id)126 inline float GetClipRate(const void *vector_a, const void *vector_b, size_t size, mindspore::TypeId type_id) {
127   MS_ASSERT(vector_a != nullptr);
128   MS_ASSERT(vector_b != nullptr);
129   if (type_id == mindspore::kNumberTypeFloat32) {
130     return mindspore::lite::GetClipRate<float>(static_cast<const float *>(vector_a),
131                                                static_cast<const float *>(vector_b), size);
132   } else if (type_id == mindspore::kNumberTypeInt32) {
133     return mindspore::lite::GetClipRate(static_cast<const int *>(vector_a), static_cast<const int *>(vector_b), size);
134   } else {
135     MS_LOG(ERROR) << "Unsupported data type:" << type_id;
136     return 0;
137   }
138 }
139 
140 template <typename T>
GetCosSimilarity(const T * vector_a,const T * vector_b,size_t size)141 float GetCosSimilarity(const T *vector_a, const T *vector_b, size_t size) {
142   MS_ASSERT(vector_a != nullptr);
143   MS_ASSERT(vector_b != nullptr);
144   double dot_sum = 0;
145   double sum_a = 0;
146   double sum_b = 0;
147   for (size_t i = 0; i < size; i++) {
148     if (std::is_same<T, float>::value && ((std::isnan(vector_a[i]) || std::isinf(vector_a[i])) ||
149                                           (std::isnan(vector_b[i]) || std::isinf(vector_b[i])))) {
150       MS_LOG(ERROR) << "tensor has nan or inf data, compare fail";
151       return 0;
152     }
153     dot_sum += static_cast<double>(vector_a[i]) * static_cast<double>(vector_b[i]);
154     sum_a += static_cast<double>(vector_a[i]) * static_cast<double>(vector_a[i]);
155     sum_b += static_cast<double>(vector_b[i]) * static_cast<double>(vector_b[i]);
156   }
157   if (sum_a < DBL_EPSILON && sum_b < DBL_EPSILON) {
158     return 1;
159   } else if (sum_a * sum_b < DBL_EPSILON) {
160     return 0;
161   }
162   return dot_sum / (std::sqrt(sum_a) * std::sqrt(sum_b));
163 }
164 
GetCosSimilarity(const void * vector_a,const void * vector_b,size_t size,mindspore::TypeId type_id)165 inline float GetCosSimilarity(const void *vector_a, const void *vector_b, size_t size, mindspore::TypeId type_id) {
166   MS_ASSERT(vector_a != nullptr);
167   MS_ASSERT(vector_b != nullptr);
168   if (type_id == mindspore::kNumberTypeFloat32) {
169     return mindspore::lite::GetCosSimilarity<float>(static_cast<const float *>(vector_a),
170                                                     static_cast<const float *>(vector_b), size);
171   } else if (type_id == mindspore::kNumberTypeInt32) {
172     return mindspore::lite::GetCosSimilarity(static_cast<const int *>(vector_a), static_cast<const int *>(vector_b),
173                                              size);
174   } else {
175     MS_LOG(ERROR) << "Unsupported data type:" << type_id;
176     return 0;
177   }
178 }
179 
180 template <typename T>
KLDivergence(std::vector<T> p,std::vector<T> q)181 float KLDivergence(std::vector<T> p, std::vector<T> q) {
182   auto sum = 0.0f;
183   std::for_each(p.begin(), p.end(), [&sum](T item) { sum += item; });
184   MS_ASSERT(sum > DBL_EPSILON);
185   std::for_each(p.begin(), p.end(), [sum](T &item) { item /= sum; });
186   sum = 0.0f;
187   std::for_each(q.begin(), q.end(), [&sum](T item) { sum += item; });
188   MS_ASSERT(sum > DBL_EPSILON);
189   std::for_each(q.begin(), q.end(), [sum](T &item) { item /= sum; });
190 
191   float result = 0.0f;
192   const size_t size = p.size();
193   for (size_t i = 0; i < size; ++i) {
194     if (p[i] != 0) {
195       if (q[i] == 0) {
196         result += 1.0f;
197       } else {
198         result += (p[i] * std::log((p[i]) / (q[i])));
199       }
200     }
201   }
202   return result;
203 }
204 }  // namespace mindspore::lite
205 #endif  // MINDSPORE_LITE_TOOLS_COMMON_STATISTIC_UTILS_H_
206