1 /**
2 * Copyright 2024 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include "debug/data_dump/statistic_kernel.h"
17 #include <memory>
18 #include <string>
19 #include <utility>
20 #include <vector>
21 #include "debug/debugger/debugger_utils.h"
22 #include "include/common/debug/common.h"
23
24 namespace mindspore {
25
26 namespace {
27 using TensorPtr = tensor::TensorPtr;
28 const std::set<TypeId> max_supported_dtype{
29 kNumberTypeBFloat16, kNumberTypeFloat16, kNumberTypeFloat32, kNumberTypeFloat64, kNumberTypeFloat,
30 kNumberTypeDouble, kNumberTypeInt, kNumberTypeInt8, kNumberTypeUInt8, kNumberTypeInt16,
31 kNumberTypeInt32, kNumberTypeInt64, kNumberTypeBool};
32 const std::set<TypeId> &min_supported_dtype = max_supported_dtype;
33 const std::set<TypeId> mean_supported_dtype = {
34 kNumberTypeBFloat16, kNumberTypeFloat16, kNumberTypeFloat32, kNumberTypeFloat64, kNumberTypeFloat, kNumberTypeDouble,
35 kNumberTypeInt, kNumberTypeInt8, kNumberTypeUInt8, kNumberTypeInt16, kNumberTypeInt32, kNumberTypeInt64};
36 const std::set<TypeId> &norm_supported_dtype = {kNumberTypeBFloat16, kNumberTypeFloat16, kNumberTypeFloat32};
37
38 const char KStatMax[] = "max";
39 const char KStatMin[] = "min";
40 const char KStatMean[] = "avg";
41 const char KStatL2Norm[] = "l2norm";
42
WarningOnce(const string & device_name,const string & type_name,const string & statistic_name)43 void WarningOnce(const string &device_name, const string &type_name, const string &statistic_name) {
44 static std::set<string> warning_once;
45 string name = device_name + type_name + statistic_name;
46 if (warning_once.find(name) != warning_once.end()) {
47 return;
48 } else {
49 warning_once.insert(name);
50 MS_LOG(WARNING) << "In the '" << device_name << "' platform, '" << type_name << "' is not supported for '"
51 << statistic_name << "' statistic dump.";
52 }
53 }
54
WarningOnceCategory(const string & name)55 void WarningOnceCategory(const string &name) {
56 static std::set<string> warning_once;
57 if (warning_once.find(name) != warning_once.end()) {
58 return;
59 } else {
60 warning_once.insert(name);
61 MS_LOG(WARNING) << name << " category is not support!";
62 }
63 }
64
65 template <typename Func, typename... Args>
TimeWrapper(Func && func,const std::string & funcName,Args &&...args)66 auto TimeWrapper(Func &&func, const std::string &funcName, Args &&... args) -> decltype(auto) {
67 auto start = std::chrono::high_resolution_clock::now();
68 auto result = std::forward<Func>(func)(std::forward<Args>(args)...);
69 auto end = std::chrono::high_resolution_clock::now();
70 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
71 auto milliseconds = std::chrono::duration_cast<std::chrono::milliseconds>(duration);
72 auto microseconds = duration.count() % 1000;
73 MS_LOG(DEBUG) << funcName << " took " << milliseconds.count() << " ms " << microseconds << " us";
74 return result;
75 }
76
77 } // namespace
78
79 namespace datadump {
80
GenerateDeviceAddress(const uint32_t & stream_id,const size_t & mem_size,const TypeId & dtype_id,const ShapeVector & shape,const ValuePtr & value)81 DeviceAddressPtr StatisticKernel::GenerateDeviceAddress(const uint32_t &stream_id, const size_t &mem_size,
82 const TypeId &dtype_id, const ShapeVector &shape,
83 const ValuePtr &value) {
84 auto addr = device_context_->device_res_manager_->AllocateMemory(mem_size, stream_id);
85 MS_EXCEPTION_IF_NULL(addr);
86
87 auto tensor = std::make_shared<kernel::KernelTensor>(addr, mem_size, Format::DEFAULT_FORMAT, dtype_id, shape,
88 device_context_->device_context_key().device_name_,
89 device_context_->device_context_key().device_id_);
90 tensor->set_stream_id(stream_id);
91 tensor->SetType(std::make_shared<TensorType>(TypeIdToType(dtype_id)));
92 tensor->SetShape(std::make_shared<abstract::TensorShape>(shape));
93 if (value) {
94 tensor->SetValue(value);
95 }
96 return device_context_->device_res_manager_->CreateDeviceAddress(tensor);
97 }
98
SyncDeviceToHostTensor(DeviceAddressPtr device_addr)99 TensorPtr StatisticKernel::SyncDeviceToHostTensor(DeviceAddressPtr device_addr) {
100 MS_EXCEPTION_IF_NULL(device_addr);
101 auto kernel_tensor = device_addr->kernel_tensor();
102 MS_EXCEPTION_IF_NULL(kernel_tensor);
103 auto dtype_id = kernel_tensor->dtype_id();
104 const auto &shape_vec = kernel_tensor->GetShapeVector();
105
106 mindspore::tensor::TensorPtr out_tensor = std::make_shared<tensor::Tensor>(dtype_id, shape_vec);
107 auto ret_sync = device_addr->SyncDeviceToHost(UnitSizeInBytes(dtype_id), out_tensor->data_c());
108 if (!ret_sync) {
109 MS_LOG(EXCEPTION) << "Convert format or Copy device mem to host failed";
110 }
111 return out_tensor;
112 }
113
GetWorkSpaceDeviceAddress(const uint32_t stream_id,const vector<KernelTensor * > & inputs,const vector<KernelTensor * > & outputs)114 DeviceAddressPtr StatisticKernel::GetWorkSpaceDeviceAddress(const uint32_t stream_id,
115 const vector<KernelTensor *> &inputs,
116 const vector<KernelTensor *> &outputs) {
117 auto ret = kernel_mod_->Resize(inputs, outputs);
118 if (ret) {
119 MS_LOG(EXCEPTION) << "Call Resize error, error id is " << ret;
120 }
121 auto work_space = kernel_mod_->GetWorkspaceSizeList();
122 if (!work_space.empty() && work_space[0] != 0) {
123 return runtime::DeviceAddressUtils::CreateWorkspaceAddress(device_context_, stream_id, work_space[0]);
124 }
125 return nullptr;
126 }
127
GetOutputDeviceAddress(const uint32_t stream_id,TypeId dtype_id)128 DeviceAddressPtr StatisticKernel::GetOutputDeviceAddress(const uint32_t stream_id, TypeId dtype_id) {
129 ShapeVector shape_vec = {};
130 return GenerateDeviceAddress(stream_id, UnitSizeInBytes(dtype_id), dtype_id, shape_vec);
131 }
132
LaunchKernel(KernelTensor * input)133 TensorPtr StatisticKernel::LaunchKernel(KernelTensor *input) {
134 MS_EXCEPTION_IF_NULL(input);
135 if (input->GetShapeVector().empty()) {
136 return std::make_shared<tensor::Tensor>(input->dtype_id(), input->GetShapeVector(),
137 const_cast<void *>(input->GetValuePtr()),
138 UnitSizeInBytes(input->dtype_id()));
139 }
140 vector<KernelTensor *> inputs{input};
141 const auto stream_id = input->stream_id();
142 auto output_addr = GetOutputDeviceAddress(stream_id, input->dtype_id());
143 MS_EXCEPTION_IF_NULL(output_addr);
144 MS_EXCEPTION_IF_NULL(kernel_mod_);
145
146 void *stream_ptr = device_context_->device_res_manager_->GetStream(stream_id);
147 MS_EXCEPTION_IF_NULL(stream_ptr);
148 auto workspace_addr = GetWorkSpaceDeviceAddress(stream_id, {input}, {output_addr->kernel_tensor().get()});
149 bool ret = false;
150 if (workspace_addr) {
151 ret = kernel_mod_->Launch(inputs, {workspace_addr->kernel_tensor().get()}, {output_addr->kernel_tensor().get()},
152 stream_ptr);
153 } else {
154 ret = kernel_mod_->Launch(inputs, {}, {output_addr->kernel_tensor().get()}, stream_ptr);
155 }
156 if (!ret) {
157 MS_LOG(EXCEPTION) << "Launch error";
158 }
159 return SyncDeviceToHostTensor(output_addr);
160 }
161
GetAxisDeviceAddress(const uint32_t stream_id,size_t dim)162 DeviceAddressPtr DimStatisticKernel::GetAxisDeviceAddress(const uint32_t stream_id, size_t dim) {
163 vector<int64_t> axes(dim);
164 for (size_t i = 0; i < dim; i++) {
165 axes[i] = static_cast<int64_t>(i);
166 }
167 ShapeVector axes_shape{static_cast<int64_t>(dim)};
168 size_t axisbytes = UnitSizeInBytes(kNumberTypeInt64) * dim;
169 return GenerateDeviceAddress(stream_id, axisbytes, kNumberTypeInt64, axes_shape, MakeValue(axes));
170 }
171
GetKeepDimsDeviceAddress(const uint32_t stream_id)172 DeviceAddressPtr DimStatisticKernel::GetKeepDimsDeviceAddress(const uint32_t stream_id) {
173 ShapeVector keepdims_shape = {};
174 return GenerateDeviceAddress(stream_id, UnitSizeInBytes(kNumberTypeBool), kNumberTypeBool, keepdims_shape,
175 MakeValue(false));
176 }
177
GetDtypeDeviceAddress(const uint32_t stream_id,const TypeId & dtype_id)178 DeviceAddressPtr DimStatisticKernel::GetDtypeDeviceAddress(const uint32_t stream_id, const TypeId &dtype_id) {
179 ShapeVector dtype_shape_vec = {1};
180 return GenerateDeviceAddress(stream_id, UnitSizeInBytes(dtype_id), dtype_id, dtype_shape_vec);
181 }
182
Launch(vector<KernelTensor * > inputs,DeviceAddressPtr output_addr,uint32_t stream_id)183 TensorPtr DimStatisticKernel::Launch(vector<KernelTensor *> inputs, DeviceAddressPtr output_addr, uint32_t stream_id) {
184 void *stream_ptr = device_context_->device_res_manager_->GetStream(stream_id);
185 MS_EXCEPTION_IF_NULL(stream_ptr);
186 auto workspace_addr = GetWorkSpaceDeviceAddress(stream_id, inputs, {output_addr->kernel_tensor().get()});
187 bool ret = false;
188 if (workspace_addr) {
189 ret = kernel_mod_->Launch(inputs, {workspace_addr->kernel_tensor().get()}, {output_addr->kernel_tensor().get()},
190 stream_ptr);
191 } else {
192 ret = kernel_mod_->Launch(inputs, {}, {output_addr->kernel_tensor().get()}, stream_ptr);
193 }
194 if (!ret) {
195 MS_LOG(EXCEPTION) << kernel_name_ << " kernel launch failed";
196 }
197 return SyncDeviceToHostTensor(output_addr);
198 }
199
LaunchKernel(KernelTensor * input)200 TensorPtr DimStatisticKernel::LaunchKernel(KernelTensor *input) {
201 MS_EXCEPTION_IF_NULL(input);
202 const auto stream_id = input->stream_id();
203 vector<KernelTensor *> inputs{input};
204 auto output_addr = GetOutputDeviceAddress(stream_id, kNumberTypeFloat32);
205 MS_EXCEPTION_IF_NULL(output_addr);
206 MS_EXCEPTION_IF_NULL(kernel_mod_);
207
208 auto axis = GetAxisDeviceAddress(stream_id, input->GetShapeVector().size());
209 MS_EXCEPTION_IF_NULL(axis);
210 inputs.emplace_back(axis->kernel_tensor().get());
211
212 auto keepdims = GetKeepDimsDeviceAddress(stream_id);
213 inputs.emplace_back(keepdims->kernel_tensor().get());
214
215 auto dtype = GetDtypeDeviceAddress(stream_id, kNumberTypeFloat32);
216 inputs.emplace_back(dtype->kernel_tensor().get());
217
218 return Launch(inputs, output_addr, stream_id);
219 }
220
GetScalar(const uint32_t stream_id,float scalar)221 DeviceAddressPtr NormStatisticKernel::GetScalar(const uint32_t stream_id, float scalar) {
222 ShapeVector axes_shape{};
223 size_t axisbytes = UnitSizeInBytes(kNumberTypeFloat32);
224 return GenerateDeviceAddress(stream_id, axisbytes, kNumberTypeFloat32, axes_shape, MakeValue(scalar));
225 }
226
LaunchKernel(KernelTensor * input)227 TensorPtr NormStatisticKernel::LaunchKernel(KernelTensor *input) {
228 MS_EXCEPTION_IF_NULL(input);
229 if (input->GetShapeVector().empty()) {
230 return std::make_shared<tensor::Tensor>(input->dtype_id(), input->GetShapeVector(),
231 const_cast<void *>(input->GetValuePtr()),
232 UnitSizeInBytes(input->dtype_id()));
233 }
234 const auto stream_id = input->stream_id();
235 vector<KernelTensor *> inputs{input};
236 auto output_addr = GetOutputDeviceAddress(stream_id, kNumberTypeFloat32);
237 MS_EXCEPTION_IF_NULL(output_addr);
238 MS_EXCEPTION_IF_NULL(kernel_mod_);
239
240 auto scalar = GetScalar(stream_id);
241 MS_EXCEPTION_IF_NULL(scalar);
242 inputs.emplace_back(scalar->kernel_tensor().get());
243
244 auto axis = GetAxisDeviceAddress(stream_id, input->GetShapeVector().size());
245 MS_EXCEPTION_IF_NULL(axis);
246 inputs.emplace_back(axis->kernel_tensor().get());
247
248 auto keepdims = GetKeepDimsDeviceAddress(stream_id);
249 inputs.emplace_back(keepdims->kernel_tensor().get());
250
251 auto dtype = GetDtypeDeviceAddress(stream_id, kNumberTypeFloat32);
252 inputs.emplace_back(dtype->kernel_tensor().get());
253
254 return Launch(inputs, output_addr, stream_id);
255 }
256
CalMax(const DeviceContext * device_context,KernelTensor * input)257 TensorPtr CalMax(const DeviceContext *device_context, KernelTensor *input) {
258 static std::map<const DeviceContext *, std::unique_ptr<StatisticKernel>> max_kernel;
259 auto result = max_kernel.try_emplace(
260 device_context, std::make_unique<StatisticKernel>(device_context, ops::kNameMax, max_supported_dtype));
261 auto &kernel = result.first->second;
262 auto dtype = input->dtype_id();
263 if (kernel->CheckDataType(dtype)) {
264 return kernel->LaunchKernel(input);
265 } else {
266 const auto &device_name = device_context->device_context_key_.device_name_;
267 const auto &type_name = TypeIdToString(dtype);
268 WarningOnce(device_name, type_name, "max");
269 return nullptr;
270 }
271 }
272
CalMin(const DeviceContext * device_context,KernelTensor * input)273 TensorPtr CalMin(const DeviceContext *device_context, KernelTensor *input) {
274 static std::map<const DeviceContext *, std::unique_ptr<StatisticKernel>> min_kernel;
275 auto result = min_kernel.try_emplace(
276 device_context, std::make_unique<StatisticKernel>(device_context, ops::kNameMin, min_supported_dtype));
277 auto &kernel = result.first->second;
278 auto dtype = input->dtype_id();
279 if (kernel->CheckDataType(dtype)) {
280 return kernel->LaunchKernel(input);
281 } else {
282 const auto &device_name = device_context->device_context_key_.device_name_;
283 const auto &type_name = TypeIdToString(dtype);
284 WarningOnce(device_name, type_name, "min");
285 return nullptr;
286 }
287 }
288
CalMean(const DeviceContext * device_context,KernelTensor * input)289 TensorPtr CalMean(const DeviceContext *device_context, KernelTensor *input) {
290 static std::map<const DeviceContext *, std::unique_ptr<MeanStatisticKernel>> mean_kernel;
291 auto result = mean_kernel.try_emplace(device_context,
292 std::make_unique<MeanStatisticKernel>(device_context, mean_supported_dtype));
293 auto &kernel = result.first->second;
294 auto dtype = input->dtype_id();
295 if (kernel->CheckDataType(dtype)) {
296 return kernel->LaunchKernel(input);
297 } else {
298 const auto &device_name = device_context->device_context_key_.device_name_;
299 const auto &type_name = TypeIdToString(dtype);
300 WarningOnce(device_name, type_name, "mean");
301 return nullptr;
302 }
303 }
304
CalL2Norm(const DeviceContext * device_context,KernelTensor * input)305 TensorPtr CalL2Norm(const DeviceContext *device_context, KernelTensor *input) {
306 static std::map<const DeviceContext *, std::unique_ptr<NormStatisticKernel>> norm_kernel;
307 auto result = norm_kernel.try_emplace(device_context,
308 std::make_unique<NormStatisticKernel>(device_context, norm_supported_dtype));
309 auto &kernel = result.first->second;
310 auto dtype = input->dtype_id();
311 if (kernel->CheckDataType(dtype)) {
312 return kernel->LaunchKernel(input);
313 } else {
314 const auto &device_name = device_context->device_context_key_.device_name_;
315 const auto &type_name = TypeIdToString(dtype);
316 WarningOnce(device_name, type_name, "norm");
317 return nullptr;
318 }
319 }
320
CalStatistic(const std::string & stat_name,const DeviceContext * device_context,KernelTensor * input)321 TensorPtr CalStatistic(const std::string &stat_name, const DeviceContext *device_context, KernelTensor *input) {
322 static const std::map<std::string, std::function<TensorPtr(const DeviceContext *, KernelTensor *)>> func_map = {
323 {KStatMax, CalMax}, {KStatMin, CalMin}, {KStatL2Norm, CalL2Norm}, {KStatMean, CalMean}};
324 auto it = func_map.find(stat_name);
325 if (it == func_map.end()) {
326 WarningOnceCategory(stat_name);
327 return nullptr;
328 }
329 return TimeWrapper(it->second, stat_name, device_context, input);
330 }
331
332 } // namespace datadump
333 } // namespace mindspore
334