• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2023 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "plugin/device/gpu/kernel/nn/maxpool_grad_with_argmax_v2_gpu_kernel.h"
18 #include <functional>
19 #include <memory>
20 #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/maxpool_grad_with_argmax_v2_impl.cuh"
21 
22 namespace mindspore {
23 namespace kernel {
24 constexpr auto kMaxPoolGradWithArgmaxV2 = "MaxPoolGradWithArgmaxV2";
25 constexpr size_t kInputShapeSize = 4;
26 constexpr size_t kInputNum = 3;
27 constexpr size_t kOutputNum = 1;
28 
29 template <typename T, typename S>
LaunchKernel(const std::vector<KernelTensor * > & inputs,const std::vector<KernelTensor * > & outputs)30 bool MaxPoolGradWithArgmaxV2GpuKernelMod::LaunchKernel(const std::vector<KernelTensor *> &inputs,
31                                                        const std::vector<KernelTensor *> &outputs) {
32   if (is_null_input_) {
33     return true;
34   }
35   T *dy_addr = GetDeviceAddress<T>(inputs, kIndex1);
36   S *index_addr = GetDeviceAddress<S>(inputs, kIndex2);
37   T *dx_addr = GetDeviceAddress<T>(outputs, kIndex0);
38   CHECK_CUDA_RET_WITH_ERROR_NOTRACE(
39     cudaMemsetAsync(dx_addr, 0, outputs[kIndex0]->size(), reinterpret_cast<cudaStream_t>(cuda_stream_)),
40     "For 'MaxPoolWithArgmaxGradV2' failed to cudaMemsetAsync");
41   auto status = CalMaxPoolGradWithArgmaxV2(dy_addr, index_addr, x_hw_, x_chw_, x_nchw_, dy_hw_, dy_chw_, dy_nchw_,
42                                            dx_addr, device_id_, reinterpret_cast<cudaStream_t>(cuda_stream_));
43   CHECK_CUDA_STATUS(status, kernel_name_);
44   return true;
45 }
46 
Init(const std::vector<KernelTensor * > & inputs,const std::vector<KernelTensor * > & outputs)47 bool MaxPoolGradWithArgmaxV2GpuKernelMod::Init(const std::vector<KernelTensor *> &inputs,
48                                                const std::vector<KernelTensor *> &outputs) {
49   auto kernel_attr = GetKernelAttrFromTensors(inputs, outputs);
50   auto [is_match, index] = MatchKernelAttr(kernel_attr, GetOpSupport());
51   if (!is_match) {
52     MS_LOG(ERROR) << "For '" << kernel_name_ << "' it does not support this kernel type: " << kernel_attr;
53     return false;
54   }
55   kernel_func_ = func_list_[index].second;
56   return true;
57 }
58 
Resize(const std::vector<KernelTensor * > & inputs,const std::vector<KernelTensor * > & outputs)59 int MaxPoolGradWithArgmaxV2GpuKernelMod::Resize(const std::vector<KernelTensor *> &inputs,
60                                                 const std::vector<KernelTensor *> &outputs) {
61   int ret = KernelMod::Resize(inputs, outputs);
62   if (ret != KRET_OK) {
63     return ret;
64   }
65   if (inputs.size() != kInputNum) {
66     MS_LOG(ERROR) << "For '" << kernel_name_ << "', the number of inputs should be " << kInputNum << ", but got "
67                   << inputs.size();
68     return KRET_RESIZE_FAILED;
69   }
70   if (outputs.size() != kOutputNum) {
71     MS_LOG(ERROR) << "For '" << kernel_name_ << "', the number of outputs should be " << kOutputNum << ", but got "
72                   << outputs.size();
73     return KRET_RESIZE_FAILED;
74   }
75   auto x_shape = inputs[kIndex0]->GetShapeVector();
76   auto dy_shape = inputs[kIndex1]->GetShapeVector();
77   auto index_shape = inputs[kIndex2]->GetShapeVector();
78   auto dx_shape = outputs[kIndex0]->GetShapeVector();
79 
80   is_null_input_ = CHECK_SHAPE_NULL(x_shape, kernel_name_, "x") || CHECK_SHAPE_NULL(dy_shape, kernel_name_, "dy") ||
81                    CHECK_SHAPE_NULL(index_shape, kernel_name_, "index") ||
82                    CHECK_SHAPE_NULL(dx_shape, kernel_name_, "dx");
83   if (is_null_input_) {
84     return KRET_RESIZE_FAILED;
85   }
86   if (x_shape.size() != kInputShapeSize || dy_shape.size() != kInputShapeSize) {
87     MS_LOG(ERROR) << "For '" << kernel_name_ << "', the dimension of x and dy should be equal to " << kInputShapeSize
88                   << ", but got the dimension of x: " << x_shape.size() << ", the dimension of dy: " << dy_shape.size();
89     return KRET_RESIZE_FAILED;
90   }
91 
92   const size_t offset_c = 1;
93   const size_t offset_h = 2;
94   x_hw_ = std::accumulate(x_shape.begin() + offset_h, x_shape.end(), 1, std::multiplies<int64_t>());
95   x_chw_ = std::accumulate(x_shape.begin() + offset_c, x_shape.end(), 1, std::multiplies<int64_t>());
96   x_nchw_ = std::accumulate(x_shape.begin(), x_shape.end(), 1, std::multiplies<int64_t>());
97   dy_hw_ = std::accumulate(dy_shape.begin() + offset_h, dy_shape.end(), 1, std::multiplies<int64_t>());
98   dy_chw_ = std::accumulate(dy_shape.begin() + offset_c, dy_shape.end(), 1, std::multiplies<int64_t>());
99   dy_nchw_ = std::accumulate(dy_shape.begin(), dy_shape.end(), 1, std::multiplies<int64_t>());
100   if ((dy_chw_ == 0) || (dy_hw_ == 0)) {
101     MS_LOG(ERROR) << "The shape of input_grads is invalid.";
102     return KRET_RESIZE_FAILED;
103   }
104   return KRET_OK;
105 }
106 
107 std::vector<std::pair<KernelAttr, MaxPoolGradWithArgmaxV2GpuKernelMod::MaxPoolArgmaxV2GradFunc>>
108   MaxPoolGradWithArgmaxV2GpuKernelMod::func_list_ = {
109     {KernelAttr()
110        .AddInputAttr(kNumberTypeFloat16)
111        .AddInputAttr(kNumberTypeFloat16)
112        .AddInputAttr(kNumberTypeInt32)
113        .AddOutputAttr(kNumberTypeFloat16),
114      &MaxPoolGradWithArgmaxV2GpuKernelMod::LaunchKernel<half, int32_t>},
115     {KernelAttr()
116        .AddInputAttr(kNumberTypeFloat32)
117        .AddInputAttr(kNumberTypeFloat32)
118        .AddInputAttr(kNumberTypeInt32)
119        .AddOutputAttr(kNumberTypeFloat32),
120      &MaxPoolGradWithArgmaxV2GpuKernelMod::LaunchKernel<float, int32_t>},
121     {KernelAttr()
122        .AddInputAttr(kNumberTypeFloat64)
123        .AddInputAttr(kNumberTypeFloat64)
124        .AddInputAttr(kNumberTypeInt32)
125        .AddOutputAttr(kNumberTypeFloat64),
126      &MaxPoolGradWithArgmaxV2GpuKernelMod::LaunchKernel<double, int32_t>},
127     {KernelAttr()
128        .AddInputAttr(kNumberTypeInt8)
129        .AddInputAttr(kNumberTypeInt8)
130        .AddInputAttr(kNumberTypeInt32)
131        .AddOutputAttr(kNumberTypeInt8),
132      &MaxPoolGradWithArgmaxV2GpuKernelMod::LaunchKernel<int8_t, int32_t>},
133     {KernelAttr()
134        .AddInputAttr(kNumberTypeInt16)
135        .AddInputAttr(kNumberTypeInt16)
136        .AddInputAttr(kNumberTypeInt32)
137        .AddOutputAttr(kNumberTypeInt16),
138      &MaxPoolGradWithArgmaxV2GpuKernelMod::LaunchKernel<int16_t, int32_t>},
139     {KernelAttr()
140        .AddInputAttr(kNumberTypeInt32)
141        .AddInputAttr(kNumberTypeInt32)
142        .AddInputAttr(kNumberTypeInt32)
143        .AddOutputAttr(kNumberTypeInt32),
144      &MaxPoolGradWithArgmaxV2GpuKernelMod::LaunchKernel<int32_t, int32_t>},
145     {KernelAttr()
146        .AddInputAttr(kNumberTypeInt64)
147        .AddInputAttr(kNumberTypeInt64)
148        .AddInputAttr(kNumberTypeInt32)
149        .AddOutputAttr(kNumberTypeInt64),
150      &MaxPoolGradWithArgmaxV2GpuKernelMod::LaunchKernel<int64_t, int32_t>},
151     {KernelAttr()
152        .AddInputAttr(kNumberTypeUInt8)
153        .AddInputAttr(kNumberTypeUInt8)
154        .AddInputAttr(kNumberTypeInt32)
155        .AddOutputAttr(kNumberTypeUInt8),
156      &MaxPoolGradWithArgmaxV2GpuKernelMod::LaunchKernel<uint8_t, int32_t>},
157     {KernelAttr()
158        .AddInputAttr(kNumberTypeUInt16)
159        .AddInputAttr(kNumberTypeUInt16)
160        .AddInputAttr(kNumberTypeInt32)
161        .AddOutputAttr(kNumberTypeUInt16),
162      &MaxPoolGradWithArgmaxV2GpuKernelMod::LaunchKernel<uint16_t, int32_t>},
163     {KernelAttr()
164        .AddInputAttr(kNumberTypeUInt32)
165        .AddInputAttr(kNumberTypeUInt32)
166        .AddInputAttr(kNumberTypeInt32)
167        .AddOutputAttr(kNumberTypeUInt32),
168      &MaxPoolGradWithArgmaxV2GpuKernelMod::LaunchKernel<uint32_t, int32_t>},
169     {KernelAttr()
170        .AddInputAttr(kNumberTypeUInt64)
171        .AddInputAttr(kNumberTypeUInt64)
172        .AddInputAttr(kNumberTypeInt32)
173        .AddOutputAttr(kNumberTypeUInt64),
174      &MaxPoolGradWithArgmaxV2GpuKernelMod::LaunchKernel<uint64_t, int32_t>},
175 
176     {KernelAttr()
177        .AddInputAttr(kNumberTypeFloat16)
178        .AddInputAttr(kNumberTypeFloat16)
179        .AddInputAttr(kNumberTypeInt64)
180        .AddOutputAttr(kNumberTypeFloat16),
181      &MaxPoolGradWithArgmaxV2GpuKernelMod::LaunchKernel<half, int64_t>},
182     {KernelAttr()
183        .AddInputAttr(kNumberTypeFloat32)
184        .AddInputAttr(kNumberTypeFloat32)
185        .AddInputAttr(kNumberTypeInt64)
186        .AddOutputAttr(kNumberTypeFloat32),
187      &MaxPoolGradWithArgmaxV2GpuKernelMod::LaunchKernel<float, int64_t>},
188     {KernelAttr()
189        .AddInputAttr(kNumberTypeFloat64)
190        .AddInputAttr(kNumberTypeFloat64)
191        .AddInputAttr(kNumberTypeInt64)
192        .AddOutputAttr(kNumberTypeFloat64),
193      &MaxPoolGradWithArgmaxV2GpuKernelMod::LaunchKernel<double, int64_t>},
194     {KernelAttr()
195        .AddInputAttr(kNumberTypeInt8)
196        .AddInputAttr(kNumberTypeInt8)
197        .AddInputAttr(kNumberTypeInt64)
198        .AddOutputAttr(kNumberTypeInt8),
199      &MaxPoolGradWithArgmaxV2GpuKernelMod::LaunchKernel<int8_t, int64_t>},
200     {KernelAttr()
201        .AddInputAttr(kNumberTypeInt16)
202        .AddInputAttr(kNumberTypeInt16)
203        .AddInputAttr(kNumberTypeInt64)
204        .AddOutputAttr(kNumberTypeInt16),
205      &MaxPoolGradWithArgmaxV2GpuKernelMod::LaunchKernel<int16_t, int64_t>},
206     {KernelAttr()
207        .AddInputAttr(kNumberTypeInt32)
208        .AddInputAttr(kNumberTypeInt32)
209        .AddInputAttr(kNumberTypeInt64)
210        .AddOutputAttr(kNumberTypeInt32),
211      &MaxPoolGradWithArgmaxV2GpuKernelMod::LaunchKernel<int32_t, int64_t>},
212     {KernelAttr()
213        .AddInputAttr(kNumberTypeInt64)
214        .AddInputAttr(kNumberTypeInt64)
215        .AddInputAttr(kNumberTypeInt64)
216        .AddOutputAttr(kNumberTypeInt64),
217      &MaxPoolGradWithArgmaxV2GpuKernelMod::LaunchKernel<int64_t, int64_t>},
218     {KernelAttr()
219        .AddInputAttr(kNumberTypeUInt8)
220        .AddInputAttr(kNumberTypeUInt8)
221        .AddInputAttr(kNumberTypeInt64)
222        .AddOutputAttr(kNumberTypeUInt8),
223      &MaxPoolGradWithArgmaxV2GpuKernelMod::LaunchKernel<uint8_t, int64_t>},
224     {KernelAttr()
225        .AddInputAttr(kNumberTypeUInt16)
226        .AddInputAttr(kNumberTypeUInt16)
227        .AddInputAttr(kNumberTypeInt64)
228        .AddOutputAttr(kNumberTypeUInt16),
229      &MaxPoolGradWithArgmaxV2GpuKernelMod::LaunchKernel<uint16_t, int64_t>},
230     {KernelAttr()
231        .AddInputAttr(kNumberTypeUInt32)
232        .AddInputAttr(kNumberTypeUInt32)
233        .AddInputAttr(kNumberTypeInt64)
234        .AddOutputAttr(kNumberTypeUInt32),
235      &MaxPoolGradWithArgmaxV2GpuKernelMod::LaunchKernel<uint32_t, int64_t>},
236     {KernelAttr()
237        .AddInputAttr(kNumberTypeUInt64)
238        .AddInputAttr(kNumberTypeUInt64)
239        .AddInputAttr(kNumberTypeInt64)
240        .AddOutputAttr(kNumberTypeUInt64),
241      &MaxPoolGradWithArgmaxV2GpuKernelMod::LaunchKernel<uint64_t, int64_t>},
242 };
243 
GetOpSupport()244 std::vector<KernelAttr> MaxPoolGradWithArgmaxV2GpuKernelMod::GetOpSupport() {
245   std::vector<KernelAttr> support_list;
246   (void)std::transform(func_list_.begin(), func_list_.end(), std::back_inserter(support_list),
247                        [](const std::pair<KernelAttr, MaxPoolArgmaxV2GradFunc> &pair) { return pair.first; });
248   return support_list;
249 }
250 
__anonca12f7e60202() 251 MS_KERNEL_FACTORY_REG_BY_CREATOR(NativeGpuKernelMod, MaxPoolGradWithArgmaxV2, []() {
252   return std::make_shared<MaxPoolGradWithArgmaxV2GpuKernelMod>(kMaxPoolGradWithArgmaxV2);
253 });
254 }  // namespace kernel
255 }  // namespace mindspore
256