• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2024 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "debug/data_dump/statistic_kernel.h"
17 #include <memory>
18 #include <string>
19 #include <utility>
20 #include <vector>
21 #include "debug/debugger/debugger_utils.h"
22 #include "include/common/debug/common.h"
23 
24 namespace mindspore {
25 
26 namespace {
27 using TensorPtr = tensor::TensorPtr;
28 const std::set<TypeId> max_supported_dtype{
29   kNumberTypeBFloat16, kNumberTypeFloat16, kNumberTypeFloat32, kNumberTypeFloat64, kNumberTypeFloat,
30   kNumberTypeDouble,   kNumberTypeInt,     kNumberTypeInt8,    kNumberTypeUInt8,   kNumberTypeInt16,
31   kNumberTypeInt32,    kNumberTypeInt64,   kNumberTypeBool};
32 const std::set<TypeId> &min_supported_dtype = max_supported_dtype;
33 const std::set<TypeId> mean_supported_dtype = {
34   kNumberTypeBFloat16, kNumberTypeFloat16, kNumberTypeFloat32, kNumberTypeFloat64, kNumberTypeFloat, kNumberTypeDouble,
35   kNumberTypeInt,      kNumberTypeInt8,    kNumberTypeUInt8,   kNumberTypeInt16,   kNumberTypeInt32, kNumberTypeInt64};
36 const std::set<TypeId> &norm_supported_dtype = {kNumberTypeBFloat16, kNumberTypeFloat16, kNumberTypeFloat32};
37 
38 const char KStatMax[] = "max";
39 const char KStatMin[] = "min";
40 const char KStatMean[] = "avg";
41 const char KStatL2Norm[] = "l2norm";
42 
WarningOnce(const string & device_name,const string & type_name,const string & statistic_name)43 void WarningOnce(const string &device_name, const string &type_name, const string &statistic_name) {
44   static std::set<string> warning_once;
45   string name = device_name + type_name + statistic_name;
46   if (warning_once.find(name) != warning_once.end()) {
47     return;
48   } else {
49     warning_once.insert(name);
50     MS_LOG(WARNING) << "In the '" << device_name << "' platform, '" << type_name << "' is not supported for '"
51                     << statistic_name << "' statistic dump.";
52   }
53 }
54 
WarningOnceCategory(const string & name)55 void WarningOnceCategory(const string &name) {
56   static std::set<string> warning_once;
57   if (warning_once.find(name) != warning_once.end()) {
58     return;
59   } else {
60     warning_once.insert(name);
61     MS_LOG(WARNING) << name << " category is not support!";
62   }
63 }
64 
65 template <typename Func, typename... Args>
TimeWrapper(Func && func,const std::string & funcName,Args &&...args)66 auto TimeWrapper(Func &&func, const std::string &funcName, Args &&... args) -> decltype(auto) {
67   auto start = std::chrono::high_resolution_clock::now();
68   auto result = std::forward<Func>(func)(std::forward<Args>(args)...);
69   auto end = std::chrono::high_resolution_clock::now();
70   auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
71   auto milliseconds = std::chrono::duration_cast<std::chrono::milliseconds>(duration);
72   auto microseconds = duration.count() % 1000;
73   MS_LOG(DEBUG) << funcName << " took " << milliseconds.count() << " ms " << microseconds << " us";
74   return result;
75 }
76 
77 }  // namespace
78 
79 namespace datadump {
80 
GenerateDeviceAddress(const uint32_t & stream_id,const size_t & mem_size,const TypeId & dtype_id,const ShapeVector & shape,const ValuePtr & value)81 DeviceAddressPtr StatisticKernel::GenerateDeviceAddress(const uint32_t &stream_id, const size_t &mem_size,
82                                                         const TypeId &dtype_id, const ShapeVector &shape,
83                                                         const ValuePtr &value) {
84   auto addr = device_context_->device_res_manager_->AllocateMemory(mem_size, stream_id);
85   MS_EXCEPTION_IF_NULL(addr);
86 
87   auto tensor = std::make_shared<kernel::KernelTensor>(addr, mem_size, Format::DEFAULT_FORMAT, dtype_id, shape,
88                                                        device_context_->device_context_key().device_name_,
89                                                        device_context_->device_context_key().device_id_);
90   tensor->set_stream_id(stream_id);
91   tensor->SetType(std::make_shared<TensorType>(TypeIdToType(dtype_id)));
92   tensor->SetShape(std::make_shared<abstract::TensorShape>(shape));
93   if (value) {
94     tensor->SetValue(value);
95   }
96   return device_context_->device_res_manager_->CreateDeviceAddress(tensor);
97 }
98 
SyncDeviceToHostTensor(DeviceAddressPtr device_addr)99 TensorPtr StatisticKernel::SyncDeviceToHostTensor(DeviceAddressPtr device_addr) {
100   MS_EXCEPTION_IF_NULL(device_addr);
101   auto kernel_tensor = device_addr->kernel_tensor();
102   MS_EXCEPTION_IF_NULL(kernel_tensor);
103   auto dtype_id = kernel_tensor->dtype_id();
104   const auto &shape_vec = kernel_tensor->GetShapeVector();
105 
106   mindspore::tensor::TensorPtr out_tensor = std::make_shared<tensor::Tensor>(dtype_id, shape_vec);
107   auto ret_sync = device_addr->SyncDeviceToHost(UnitSizeInBytes(dtype_id), out_tensor->data_c());
108   if (!ret_sync) {
109     MS_LOG(EXCEPTION) << "Convert format or Copy device mem to host failed";
110   }
111   return out_tensor;
112 }
113 
GetWorkSpaceDeviceAddress(const uint32_t stream_id,const vector<KernelTensor * > & inputs,const vector<KernelTensor * > & outputs)114 DeviceAddressPtr StatisticKernel::GetWorkSpaceDeviceAddress(const uint32_t stream_id,
115                                                             const vector<KernelTensor *> &inputs,
116                                                             const vector<KernelTensor *> &outputs) {
117   auto ret = kernel_mod_->Resize(inputs, outputs);
118   if (ret) {
119     MS_LOG(EXCEPTION) << "Call Resize error, error id is " << ret;
120   }
121   auto work_space = kernel_mod_->GetWorkspaceSizeList();
122   if (!work_space.empty() && work_space[0] != 0) {
123     return runtime::DeviceAddressUtils::CreateWorkspaceAddress(device_context_, stream_id, work_space[0]);
124   }
125   return nullptr;
126 }
127 
GetOutputDeviceAddress(const uint32_t stream_id,TypeId dtype_id)128 DeviceAddressPtr StatisticKernel::GetOutputDeviceAddress(const uint32_t stream_id, TypeId dtype_id) {
129   ShapeVector shape_vec = {};
130   return GenerateDeviceAddress(stream_id, UnitSizeInBytes(dtype_id), dtype_id, shape_vec);
131 }
132 
LaunchKernel(KernelTensor * input)133 TensorPtr StatisticKernel::LaunchKernel(KernelTensor *input) {
134   MS_EXCEPTION_IF_NULL(input);
135   if (input->GetShapeVector().empty()) {
136     return std::make_shared<tensor::Tensor>(input->dtype_id(), input->GetShapeVector(),
137                                             const_cast<void *>(input->GetValuePtr()),
138                                             UnitSizeInBytes(input->dtype_id()));
139   }
140   vector<KernelTensor *> inputs{input};
141   const auto stream_id = input->stream_id();
142   auto output_addr = GetOutputDeviceAddress(stream_id, input->dtype_id());
143   MS_EXCEPTION_IF_NULL(output_addr);
144   MS_EXCEPTION_IF_NULL(kernel_mod_);
145 
146   void *stream_ptr = device_context_->device_res_manager_->GetStream(stream_id);
147   MS_EXCEPTION_IF_NULL(stream_ptr);
148   auto workspace_addr = GetWorkSpaceDeviceAddress(stream_id, {input}, {output_addr->kernel_tensor().get()});
149   bool ret = false;
150   if (workspace_addr) {
151     ret = kernel_mod_->Launch(inputs, {workspace_addr->kernel_tensor().get()}, {output_addr->kernel_tensor().get()},
152                               stream_ptr);
153   } else {
154     ret = kernel_mod_->Launch(inputs, {}, {output_addr->kernel_tensor().get()}, stream_ptr);
155   }
156   if (!ret) {
157     MS_LOG(EXCEPTION) << "Launch error";
158   }
159   return SyncDeviceToHostTensor(output_addr);
160 }
161 
GetAxisDeviceAddress(const uint32_t stream_id,size_t dim)162 DeviceAddressPtr DimStatisticKernel::GetAxisDeviceAddress(const uint32_t stream_id, size_t dim) {
163   vector<int64_t> axes(dim);
164   for (size_t i = 0; i < dim; i++) {
165     axes[i] = static_cast<int64_t>(i);
166   }
167   ShapeVector axes_shape{static_cast<int64_t>(dim)};
168   size_t axisbytes = UnitSizeInBytes(kNumberTypeInt64) * dim;
169   return GenerateDeviceAddress(stream_id, axisbytes, kNumberTypeInt64, axes_shape, MakeValue(axes));
170 }
171 
GetKeepDimsDeviceAddress(const uint32_t stream_id)172 DeviceAddressPtr DimStatisticKernel::GetKeepDimsDeviceAddress(const uint32_t stream_id) {
173   ShapeVector keepdims_shape = {};
174   return GenerateDeviceAddress(stream_id, UnitSizeInBytes(kNumberTypeBool), kNumberTypeBool, keepdims_shape,
175                                MakeValue(false));
176 }
177 
GetDtypeDeviceAddress(const uint32_t stream_id,const TypeId & dtype_id)178 DeviceAddressPtr DimStatisticKernel::GetDtypeDeviceAddress(const uint32_t stream_id, const TypeId &dtype_id) {
179   ShapeVector dtype_shape_vec = {1};
180   return GenerateDeviceAddress(stream_id, UnitSizeInBytes(dtype_id), dtype_id, dtype_shape_vec);
181 }
182 
Launch(vector<KernelTensor * > inputs,DeviceAddressPtr output_addr,uint32_t stream_id)183 TensorPtr DimStatisticKernel::Launch(vector<KernelTensor *> inputs, DeviceAddressPtr output_addr, uint32_t stream_id) {
184   void *stream_ptr = device_context_->device_res_manager_->GetStream(stream_id);
185   MS_EXCEPTION_IF_NULL(stream_ptr);
186   auto workspace_addr = GetWorkSpaceDeviceAddress(stream_id, inputs, {output_addr->kernel_tensor().get()});
187   bool ret = false;
188   if (workspace_addr) {
189     ret = kernel_mod_->Launch(inputs, {workspace_addr->kernel_tensor().get()}, {output_addr->kernel_tensor().get()},
190                               stream_ptr);
191   } else {
192     ret = kernel_mod_->Launch(inputs, {}, {output_addr->kernel_tensor().get()}, stream_ptr);
193   }
194   if (!ret) {
195     MS_LOG(EXCEPTION) << kernel_name_ << " kernel launch failed";
196   }
197   return SyncDeviceToHostTensor(output_addr);
198 }
199 
LaunchKernel(KernelTensor * input)200 TensorPtr DimStatisticKernel::LaunchKernel(KernelTensor *input) {
201   MS_EXCEPTION_IF_NULL(input);
202   const auto stream_id = input->stream_id();
203   vector<KernelTensor *> inputs{input};
204   auto output_addr = GetOutputDeviceAddress(stream_id, kNumberTypeFloat32);
205   MS_EXCEPTION_IF_NULL(output_addr);
206   MS_EXCEPTION_IF_NULL(kernel_mod_);
207 
208   auto axis = GetAxisDeviceAddress(stream_id, input->GetShapeVector().size());
209   MS_EXCEPTION_IF_NULL(axis);
210   inputs.emplace_back(axis->kernel_tensor().get());
211 
212   auto keepdims = GetKeepDimsDeviceAddress(stream_id);
213   inputs.emplace_back(keepdims->kernel_tensor().get());
214 
215   auto dtype = GetDtypeDeviceAddress(stream_id, kNumberTypeFloat32);
216   inputs.emplace_back(dtype->kernel_tensor().get());
217 
218   return Launch(inputs, output_addr, stream_id);
219 }
220 
GetScalar(const uint32_t stream_id,float scalar)221 DeviceAddressPtr NormStatisticKernel::GetScalar(const uint32_t stream_id, float scalar) {
222   ShapeVector axes_shape{};
223   size_t axisbytes = UnitSizeInBytes(kNumberTypeFloat32);
224   return GenerateDeviceAddress(stream_id, axisbytes, kNumberTypeFloat32, axes_shape, MakeValue(scalar));
225 }
226 
LaunchKernel(KernelTensor * input)227 TensorPtr NormStatisticKernel::LaunchKernel(KernelTensor *input) {
228   MS_EXCEPTION_IF_NULL(input);
229   if (input->GetShapeVector().empty()) {
230     return std::make_shared<tensor::Tensor>(input->dtype_id(), input->GetShapeVector(),
231                                             const_cast<void *>(input->GetValuePtr()),
232                                             UnitSizeInBytes(input->dtype_id()));
233   }
234   const auto stream_id = input->stream_id();
235   vector<KernelTensor *> inputs{input};
236   auto output_addr = GetOutputDeviceAddress(stream_id, kNumberTypeFloat32);
237   MS_EXCEPTION_IF_NULL(output_addr);
238   MS_EXCEPTION_IF_NULL(kernel_mod_);
239 
240   auto scalar = GetScalar(stream_id);
241   MS_EXCEPTION_IF_NULL(scalar);
242   inputs.emplace_back(scalar->kernel_tensor().get());
243 
244   auto axis = GetAxisDeviceAddress(stream_id, input->GetShapeVector().size());
245   MS_EXCEPTION_IF_NULL(axis);
246   inputs.emplace_back(axis->kernel_tensor().get());
247 
248   auto keepdims = GetKeepDimsDeviceAddress(stream_id);
249   inputs.emplace_back(keepdims->kernel_tensor().get());
250 
251   auto dtype = GetDtypeDeviceAddress(stream_id, kNumberTypeFloat32);
252   inputs.emplace_back(dtype->kernel_tensor().get());
253 
254   return Launch(inputs, output_addr, stream_id);
255 }
256 
CalMax(const DeviceContext * device_context,KernelTensor * input)257 TensorPtr CalMax(const DeviceContext *device_context, KernelTensor *input) {
258   static std::map<const DeviceContext *, std::unique_ptr<StatisticKernel>> max_kernel;
259   auto result = max_kernel.try_emplace(
260     device_context, std::make_unique<StatisticKernel>(device_context, ops::kNameMax, max_supported_dtype));
261   auto &kernel = result.first->second;
262   auto dtype = input->dtype_id();
263   if (kernel->CheckDataType(dtype)) {
264     return kernel->LaunchKernel(input);
265   } else {
266     const auto &device_name = device_context->device_context_key_.device_name_;
267     const auto &type_name = TypeIdToString(dtype);
268     WarningOnce(device_name, type_name, "max");
269     return nullptr;
270   }
271 }
272 
CalMin(const DeviceContext * device_context,KernelTensor * input)273 TensorPtr CalMin(const DeviceContext *device_context, KernelTensor *input) {
274   static std::map<const DeviceContext *, std::unique_ptr<StatisticKernel>> min_kernel;
275   auto result = min_kernel.try_emplace(
276     device_context, std::make_unique<StatisticKernel>(device_context, ops::kNameMin, min_supported_dtype));
277   auto &kernel = result.first->second;
278   auto dtype = input->dtype_id();
279   if (kernel->CheckDataType(dtype)) {
280     return kernel->LaunchKernel(input);
281   } else {
282     const auto &device_name = device_context->device_context_key_.device_name_;
283     const auto &type_name = TypeIdToString(dtype);
284     WarningOnce(device_name, type_name, "min");
285     return nullptr;
286   }
287 }
288 
CalMean(const DeviceContext * device_context,KernelTensor * input)289 TensorPtr CalMean(const DeviceContext *device_context, KernelTensor *input) {
290   static std::map<const DeviceContext *, std::unique_ptr<MeanStatisticKernel>> mean_kernel;
291   auto result = mean_kernel.try_emplace(device_context,
292                                         std::make_unique<MeanStatisticKernel>(device_context, mean_supported_dtype));
293   auto &kernel = result.first->second;
294   auto dtype = input->dtype_id();
295   if (kernel->CheckDataType(dtype)) {
296     return kernel->LaunchKernel(input);
297   } else {
298     const auto &device_name = device_context->device_context_key_.device_name_;
299     const auto &type_name = TypeIdToString(dtype);
300     WarningOnce(device_name, type_name, "mean");
301     return nullptr;
302   }
303 }
304 
CalL2Norm(const DeviceContext * device_context,KernelTensor * input)305 TensorPtr CalL2Norm(const DeviceContext *device_context, KernelTensor *input) {
306   static std::map<const DeviceContext *, std::unique_ptr<NormStatisticKernel>> norm_kernel;
307   auto result = norm_kernel.try_emplace(device_context,
308                                         std::make_unique<NormStatisticKernel>(device_context, norm_supported_dtype));
309   auto &kernel = result.first->second;
310   auto dtype = input->dtype_id();
311   if (kernel->CheckDataType(dtype)) {
312     return kernel->LaunchKernel(input);
313   } else {
314     const auto &device_name = device_context->device_context_key_.device_name_;
315     const auto &type_name = TypeIdToString(dtype);
316     WarningOnce(device_name, type_name, "norm");
317     return nullptr;
318   }
319 }
320 
CalStatistic(const std::string & stat_name,const DeviceContext * device_context,KernelTensor * input)321 TensorPtr CalStatistic(const std::string &stat_name, const DeviceContext *device_context, KernelTensor *input) {
322   static const std::map<std::string, std::function<TensorPtr(const DeviceContext *, KernelTensor *)>> func_map = {
323     {KStatMax, CalMax}, {KStatMin, CalMin}, {KStatL2Norm, CalL2Norm}, {KStatMean, CalMean}};
324   auto it = func_map.find(stat_name);
325   if (it == func_map.end()) {
326     WarningOnceCategory(stat_name);
327     return nullptr;
328   }
329   return TimeWrapper(it->second, stat_name, device_context, input);
330 }
331 
332 }  // namespace datadump
333 }  // namespace mindspore
334