1 /**
2 * Copyright 2021 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef MINDSPORE_LITE_TOOLS_COMMON_STATISTIC_UTILS_H_
18 #define MINDSPORE_LITE_TOOLS_COMMON_STATISTIC_UTILS_H_
19
20 #include <vector>
21 #include <algorithm>
22 #include <cmath>
23 #include <numeric>
24 #include <cfloat>
25 #include <utility>
26 #include "include/errorcode.h"
27 #include "src/common/log_adapter.h"
28 #include "nnacl/op_base.h"
29 #include "mindapi/base/type_id.h"
30
31 namespace mindspore::lite {
32 std::pair<float, float> GetFloatMinMaxValue(const float *data, int size);
33
34 template <typename T>
GetMinMaxValue(const T * data,size_t data_size)35 std::pair<T, T> GetMinMaxValue(const T *data, size_t data_size) {
36 MS_ASSERT(data != nullptr);
37 MS_ASSERT(data_size > 1);
38 T min = data[0];
39 T max = data[0];
40 for (size_t i = 1; i < data_size; i++) {
41 min = std::min(min, data[i]);
42 max = std::max(max, data[i]);
43 }
44 return {min, max};
45 }
46
47 template <typename T>
GetMinValue(const std::vector<T> & data_vector)48 T GetMinValue(const std::vector<T> &data_vector) {
49 MS_ASSERT(!data_vector.empty());
50 return *min_element(data_vector.begin(), data_vector.end());
51 }
52
53 template <typename T>
GetMaxValue(const std::vector<T> & data_vector)54 T GetMaxValue(const std::vector<T> &data_vector) {
55 MS_ASSERT(!data_vector.empty());
56 return *max_element(data_vector.begin(), data_vector.end());
57 }
58
59 template <typename T>
Quantile(const std::vector<T> & data_vector,float q)60 float Quantile(const std::vector<T> &data_vector, float q) {
61 MS_ASSERT(q >= 0.0f && q <= 1.0f);
62 std::vector<T> bak_data(data_vector);
63 std::sort(bak_data.begin(), bak_data.end());
64 const int n = bak_data.size();
65 float id = (n - 1) * q;
66 int lo = std::floor(id);
67 int hi = std::ceil(id);
68 float qs = bak_data.at(lo);
69 float h = (id - lo);
70 return (1.0 - h) * qs + h * bak_data.at(hi);
71 }
72
73 template <typename T>
GetMeanValue(const std::vector<T> & data_vector)74 float GetMeanValue(const std::vector<T> &data_vector) {
75 MS_ASSERT(!data_vector.empty());
76 float sum = std::accumulate(std::begin(data_vector), std::end(data_vector), 0.0);
77 float mean = sum / data_vector.size();
78 return mean;
79 }
80
81 template <typename T>
GetMeanVar(const std::vector<T> & data_vector)82 std::pair<float, float> GetMeanVar(const std::vector<T> &data_vector) {
83 MS_ASSERT(!data_vector.empty());
84 float mean = GetMeanValue(data_vector);
85 float accumulate = 0.0;
86 std::for_each(std::begin(data_vector), std::end(data_vector),
87 [&](const float data) { accumulate += (data - mean) * (data - mean); });
88 float var = sqrt(accumulate / data_vector.size());
89 return {mean, var};
90 }
91
92 template <typename T>
GetVarValue(const std::vector<T> & data_vector)93 float GetVarValue(const std::vector<T> &data_vector) {
94 MS_ASSERT(!data_vector.empty());
95 float mean = GetMeanValue(data_vector);
96 float accumulate = 0.0;
97 std::for_each(std::begin(data_vector), std::end(data_vector),
98 [&](const float data) { accumulate += (data - mean) * (data - mean); });
99 float var = sqrt(accumulate / data_vector.size());
100 return var;
101 }
102
103 template <typename T>
GetSparsity(const std::vector<T> & data_vector)104 float GetSparsity(const std::vector<T> &data_vector) {
105 MS_ASSERT(!data_vector.empty());
106 auto zero_nums = std::count(data_vector.begin(), data_vector.end(), 0);
107 return 1.0 * zero_nums / data_vector.size();
108 }
109
110 template <typename T>
GetClipRate(const T * origin,const T * compared,size_t size)111 float GetClipRate(const T *origin, const T *compared, size_t size) {
112 MS_ASSERT(origin != nullptr);
113 MS_ASSERT(compared != nullptr);
114 MS_ASSERT(size > 0);
115 auto min = *std::min_element(compared, compared + size);
116 auto max = *std::max_element(compared, compared + size);
117 size_t total = 0;
118 for (size_t i = 0; i < size; ++i) {
119 if (origin[i] > max || origin[i] < min) {
120 total++;
121 }
122 }
123 return 1.0f * total / size;
124 }
125
GetClipRate(const void * vector_a,const void * vector_b,size_t size,mindspore::TypeId type_id)126 inline float GetClipRate(const void *vector_a, const void *vector_b, size_t size, mindspore::TypeId type_id) {
127 MS_ASSERT(vector_a != nullptr);
128 MS_ASSERT(vector_b != nullptr);
129 if (type_id == mindspore::kNumberTypeFloat32) {
130 return mindspore::lite::GetClipRate<float>(static_cast<const float *>(vector_a),
131 static_cast<const float *>(vector_b), size);
132 } else if (type_id == mindspore::kNumberTypeInt32) {
133 return mindspore::lite::GetClipRate(static_cast<const int *>(vector_a), static_cast<const int *>(vector_b), size);
134 } else {
135 MS_LOG(ERROR) << "Unsupported data type:" << type_id;
136 return 0;
137 }
138 }
139
140 template <typename T>
GetCosSimilarity(const T * vector_a,const T * vector_b,size_t size)141 float GetCosSimilarity(const T *vector_a, const T *vector_b, size_t size) {
142 MS_ASSERT(vector_a != nullptr);
143 MS_ASSERT(vector_b != nullptr);
144 double dot_sum = 0;
145 double sum_a = 0;
146 double sum_b = 0;
147 for (size_t i = 0; i < size; i++) {
148 if (std::is_same<T, float>::value && ((std::isnan(vector_a[i]) || std::isinf(vector_a[i])) ||
149 (std::isnan(vector_b[i]) || std::isinf(vector_b[i])))) {
150 MS_LOG(ERROR) << "tensor has nan or inf data, compare fail";
151 return 0;
152 }
153 dot_sum += static_cast<double>(vector_a[i]) * static_cast<double>(vector_b[i]);
154 sum_a += static_cast<double>(vector_a[i]) * static_cast<double>(vector_a[i]);
155 sum_b += static_cast<double>(vector_b[i]) * static_cast<double>(vector_b[i]);
156 }
157 if (sum_a < DBL_EPSILON && sum_b < DBL_EPSILON) {
158 return 1;
159 } else if (sum_a * sum_b < DBL_EPSILON) {
160 return 0;
161 }
162 return dot_sum / (std::sqrt(sum_a) * std::sqrt(sum_b));
163 }
164
GetCosSimilarity(const void * vector_a,const void * vector_b,size_t size,mindspore::TypeId type_id)165 inline float GetCosSimilarity(const void *vector_a, const void *vector_b, size_t size, mindspore::TypeId type_id) {
166 MS_ASSERT(vector_a != nullptr);
167 MS_ASSERT(vector_b != nullptr);
168 if (type_id == mindspore::kNumberTypeFloat32) {
169 return mindspore::lite::GetCosSimilarity<float>(static_cast<const float *>(vector_a),
170 static_cast<const float *>(vector_b), size);
171 } else if (type_id == mindspore::kNumberTypeInt32) {
172 return mindspore::lite::GetCosSimilarity(static_cast<const int *>(vector_a), static_cast<const int *>(vector_b),
173 size);
174 } else {
175 MS_LOG(ERROR) << "Unsupported data type:" << type_id;
176 return 0;
177 }
178 }
179
180 template <typename T>
KLDivergence(std::vector<T> p,std::vector<T> q)181 float KLDivergence(std::vector<T> p, std::vector<T> q) {
182 auto sum = 0.0f;
183 std::for_each(p.begin(), p.end(), [&sum](T item) { sum += item; });
184 MS_ASSERT(sum > DBL_EPSILON);
185 std::for_each(p.begin(), p.end(), [sum](T &item) { item /= sum; });
186 sum = 0.0f;
187 std::for_each(q.begin(), q.end(), [&sum](T item) { sum += item; });
188 MS_ASSERT(sum > DBL_EPSILON);
189 std::for_each(q.begin(), q.end(), [sum](T &item) { item /= sum; });
190
191 float result = 0.0f;
192 const size_t size = p.size();
193 for (size_t i = 0; i < size; ++i) {
194 if (p[i] != 0) {
195 if (q[i] == 0) {
196 result += 1.0f;
197 } else {
198 result += (p[i] * std::log((p[i]) / (q[i])));
199 }
200 }
201 }
202 return result;
203 }
204 } // namespace mindspore::lite
205 #endif // MINDSPORE_LITE_TOOLS_COMMON_STATISTIC_UTILS_H_
206