1 /**
2 * Copyright 2023 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "plugin/device/gpu/kernel/nn/maxpool_grad_with_argmax_v2_gpu_kernel.h"
18 #include <functional>
19 #include <memory>
20 #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/maxpool_grad_with_argmax_v2_impl.cuh"
21
22 namespace mindspore {
23 namespace kernel {
24 constexpr auto kMaxPoolGradWithArgmaxV2 = "MaxPoolGradWithArgmaxV2";
25 constexpr size_t kInputShapeSize = 4;
26 constexpr size_t kInputNum = 3;
27 constexpr size_t kOutputNum = 1;
28
29 template <typename T, typename S>
LaunchKernel(const std::vector<KernelTensor * > & inputs,const std::vector<KernelTensor * > & outputs)30 bool MaxPoolGradWithArgmaxV2GpuKernelMod::LaunchKernel(const std::vector<KernelTensor *> &inputs,
31 const std::vector<KernelTensor *> &outputs) {
32 if (is_null_input_) {
33 return true;
34 }
35 T *dy_addr = GetDeviceAddress<T>(inputs, kIndex1);
36 S *index_addr = GetDeviceAddress<S>(inputs, kIndex2);
37 T *dx_addr = GetDeviceAddress<T>(outputs, kIndex0);
38 CHECK_CUDA_RET_WITH_ERROR_NOTRACE(
39 cudaMemsetAsync(dx_addr, 0, outputs[kIndex0]->size(), reinterpret_cast<cudaStream_t>(cuda_stream_)),
40 "For 'MaxPoolWithArgmaxGradV2' failed to cudaMemsetAsync");
41 auto status = CalMaxPoolGradWithArgmaxV2(dy_addr, index_addr, x_hw_, x_chw_, x_nchw_, dy_hw_, dy_chw_, dy_nchw_,
42 dx_addr, device_id_, reinterpret_cast<cudaStream_t>(cuda_stream_));
43 CHECK_CUDA_STATUS(status, kernel_name_);
44 return true;
45 }
46
Init(const std::vector<KernelTensor * > & inputs,const std::vector<KernelTensor * > & outputs)47 bool MaxPoolGradWithArgmaxV2GpuKernelMod::Init(const std::vector<KernelTensor *> &inputs,
48 const std::vector<KernelTensor *> &outputs) {
49 auto kernel_attr = GetKernelAttrFromTensors(inputs, outputs);
50 auto [is_match, index] = MatchKernelAttr(kernel_attr, GetOpSupport());
51 if (!is_match) {
52 MS_LOG(ERROR) << "For '" << kernel_name_ << "' it does not support this kernel type: " << kernel_attr;
53 return false;
54 }
55 kernel_func_ = func_list_[index].second;
56 return true;
57 }
58
Resize(const std::vector<KernelTensor * > & inputs,const std::vector<KernelTensor * > & outputs)59 int MaxPoolGradWithArgmaxV2GpuKernelMod::Resize(const std::vector<KernelTensor *> &inputs,
60 const std::vector<KernelTensor *> &outputs) {
61 int ret = KernelMod::Resize(inputs, outputs);
62 if (ret != KRET_OK) {
63 return ret;
64 }
65 if (inputs.size() != kInputNum) {
66 MS_LOG(ERROR) << "For '" << kernel_name_ << "', the number of inputs should be " << kInputNum << ", but got "
67 << inputs.size();
68 return KRET_RESIZE_FAILED;
69 }
70 if (outputs.size() != kOutputNum) {
71 MS_LOG(ERROR) << "For '" << kernel_name_ << "', the number of outputs should be " << kOutputNum << ", but got "
72 << outputs.size();
73 return KRET_RESIZE_FAILED;
74 }
75 auto x_shape = inputs[kIndex0]->GetShapeVector();
76 auto dy_shape = inputs[kIndex1]->GetShapeVector();
77 auto index_shape = inputs[kIndex2]->GetShapeVector();
78 auto dx_shape = outputs[kIndex0]->GetShapeVector();
79
80 is_null_input_ = CHECK_SHAPE_NULL(x_shape, kernel_name_, "x") || CHECK_SHAPE_NULL(dy_shape, kernel_name_, "dy") ||
81 CHECK_SHAPE_NULL(index_shape, kernel_name_, "index") ||
82 CHECK_SHAPE_NULL(dx_shape, kernel_name_, "dx");
83 if (is_null_input_) {
84 return KRET_RESIZE_FAILED;
85 }
86 if (x_shape.size() != kInputShapeSize || dy_shape.size() != kInputShapeSize) {
87 MS_LOG(ERROR) << "For '" << kernel_name_ << "', the dimension of x and dy should be equal to " << kInputShapeSize
88 << ", but got the dimension of x: " << x_shape.size() << ", the dimension of dy: " << dy_shape.size();
89 return KRET_RESIZE_FAILED;
90 }
91
92 const size_t offset_c = 1;
93 const size_t offset_h = 2;
94 x_hw_ = std::accumulate(x_shape.begin() + offset_h, x_shape.end(), 1, std::multiplies<int64_t>());
95 x_chw_ = std::accumulate(x_shape.begin() + offset_c, x_shape.end(), 1, std::multiplies<int64_t>());
96 x_nchw_ = std::accumulate(x_shape.begin(), x_shape.end(), 1, std::multiplies<int64_t>());
97 dy_hw_ = std::accumulate(dy_shape.begin() + offset_h, dy_shape.end(), 1, std::multiplies<int64_t>());
98 dy_chw_ = std::accumulate(dy_shape.begin() + offset_c, dy_shape.end(), 1, std::multiplies<int64_t>());
99 dy_nchw_ = std::accumulate(dy_shape.begin(), dy_shape.end(), 1, std::multiplies<int64_t>());
100 if ((dy_chw_ == 0) || (dy_hw_ == 0)) {
101 MS_LOG(ERROR) << "The shape of input_grads is invalid.";
102 return KRET_RESIZE_FAILED;
103 }
104 return KRET_OK;
105 }
106
107 std::vector<std::pair<KernelAttr, MaxPoolGradWithArgmaxV2GpuKernelMod::MaxPoolArgmaxV2GradFunc>>
108 MaxPoolGradWithArgmaxV2GpuKernelMod::func_list_ = {
109 {KernelAttr()
110 .AddInputAttr(kNumberTypeFloat16)
111 .AddInputAttr(kNumberTypeFloat16)
112 .AddInputAttr(kNumberTypeInt32)
113 .AddOutputAttr(kNumberTypeFloat16),
114 &MaxPoolGradWithArgmaxV2GpuKernelMod::LaunchKernel<half, int32_t>},
115 {KernelAttr()
116 .AddInputAttr(kNumberTypeFloat32)
117 .AddInputAttr(kNumberTypeFloat32)
118 .AddInputAttr(kNumberTypeInt32)
119 .AddOutputAttr(kNumberTypeFloat32),
120 &MaxPoolGradWithArgmaxV2GpuKernelMod::LaunchKernel<float, int32_t>},
121 {KernelAttr()
122 .AddInputAttr(kNumberTypeFloat64)
123 .AddInputAttr(kNumberTypeFloat64)
124 .AddInputAttr(kNumberTypeInt32)
125 .AddOutputAttr(kNumberTypeFloat64),
126 &MaxPoolGradWithArgmaxV2GpuKernelMod::LaunchKernel<double, int32_t>},
127 {KernelAttr()
128 .AddInputAttr(kNumberTypeInt8)
129 .AddInputAttr(kNumberTypeInt8)
130 .AddInputAttr(kNumberTypeInt32)
131 .AddOutputAttr(kNumberTypeInt8),
132 &MaxPoolGradWithArgmaxV2GpuKernelMod::LaunchKernel<int8_t, int32_t>},
133 {KernelAttr()
134 .AddInputAttr(kNumberTypeInt16)
135 .AddInputAttr(kNumberTypeInt16)
136 .AddInputAttr(kNumberTypeInt32)
137 .AddOutputAttr(kNumberTypeInt16),
138 &MaxPoolGradWithArgmaxV2GpuKernelMod::LaunchKernel<int16_t, int32_t>},
139 {KernelAttr()
140 .AddInputAttr(kNumberTypeInt32)
141 .AddInputAttr(kNumberTypeInt32)
142 .AddInputAttr(kNumberTypeInt32)
143 .AddOutputAttr(kNumberTypeInt32),
144 &MaxPoolGradWithArgmaxV2GpuKernelMod::LaunchKernel<int32_t, int32_t>},
145 {KernelAttr()
146 .AddInputAttr(kNumberTypeInt64)
147 .AddInputAttr(kNumberTypeInt64)
148 .AddInputAttr(kNumberTypeInt32)
149 .AddOutputAttr(kNumberTypeInt64),
150 &MaxPoolGradWithArgmaxV2GpuKernelMod::LaunchKernel<int64_t, int32_t>},
151 {KernelAttr()
152 .AddInputAttr(kNumberTypeUInt8)
153 .AddInputAttr(kNumberTypeUInt8)
154 .AddInputAttr(kNumberTypeInt32)
155 .AddOutputAttr(kNumberTypeUInt8),
156 &MaxPoolGradWithArgmaxV2GpuKernelMod::LaunchKernel<uint8_t, int32_t>},
157 {KernelAttr()
158 .AddInputAttr(kNumberTypeUInt16)
159 .AddInputAttr(kNumberTypeUInt16)
160 .AddInputAttr(kNumberTypeInt32)
161 .AddOutputAttr(kNumberTypeUInt16),
162 &MaxPoolGradWithArgmaxV2GpuKernelMod::LaunchKernel<uint16_t, int32_t>},
163 {KernelAttr()
164 .AddInputAttr(kNumberTypeUInt32)
165 .AddInputAttr(kNumberTypeUInt32)
166 .AddInputAttr(kNumberTypeInt32)
167 .AddOutputAttr(kNumberTypeUInt32),
168 &MaxPoolGradWithArgmaxV2GpuKernelMod::LaunchKernel<uint32_t, int32_t>},
169 {KernelAttr()
170 .AddInputAttr(kNumberTypeUInt64)
171 .AddInputAttr(kNumberTypeUInt64)
172 .AddInputAttr(kNumberTypeInt32)
173 .AddOutputAttr(kNumberTypeUInt64),
174 &MaxPoolGradWithArgmaxV2GpuKernelMod::LaunchKernel<uint64_t, int32_t>},
175
176 {KernelAttr()
177 .AddInputAttr(kNumberTypeFloat16)
178 .AddInputAttr(kNumberTypeFloat16)
179 .AddInputAttr(kNumberTypeInt64)
180 .AddOutputAttr(kNumberTypeFloat16),
181 &MaxPoolGradWithArgmaxV2GpuKernelMod::LaunchKernel<half, int64_t>},
182 {KernelAttr()
183 .AddInputAttr(kNumberTypeFloat32)
184 .AddInputAttr(kNumberTypeFloat32)
185 .AddInputAttr(kNumberTypeInt64)
186 .AddOutputAttr(kNumberTypeFloat32),
187 &MaxPoolGradWithArgmaxV2GpuKernelMod::LaunchKernel<float, int64_t>},
188 {KernelAttr()
189 .AddInputAttr(kNumberTypeFloat64)
190 .AddInputAttr(kNumberTypeFloat64)
191 .AddInputAttr(kNumberTypeInt64)
192 .AddOutputAttr(kNumberTypeFloat64),
193 &MaxPoolGradWithArgmaxV2GpuKernelMod::LaunchKernel<double, int64_t>},
194 {KernelAttr()
195 .AddInputAttr(kNumberTypeInt8)
196 .AddInputAttr(kNumberTypeInt8)
197 .AddInputAttr(kNumberTypeInt64)
198 .AddOutputAttr(kNumberTypeInt8),
199 &MaxPoolGradWithArgmaxV2GpuKernelMod::LaunchKernel<int8_t, int64_t>},
200 {KernelAttr()
201 .AddInputAttr(kNumberTypeInt16)
202 .AddInputAttr(kNumberTypeInt16)
203 .AddInputAttr(kNumberTypeInt64)
204 .AddOutputAttr(kNumberTypeInt16),
205 &MaxPoolGradWithArgmaxV2GpuKernelMod::LaunchKernel<int16_t, int64_t>},
206 {KernelAttr()
207 .AddInputAttr(kNumberTypeInt32)
208 .AddInputAttr(kNumberTypeInt32)
209 .AddInputAttr(kNumberTypeInt64)
210 .AddOutputAttr(kNumberTypeInt32),
211 &MaxPoolGradWithArgmaxV2GpuKernelMod::LaunchKernel<int32_t, int64_t>},
212 {KernelAttr()
213 .AddInputAttr(kNumberTypeInt64)
214 .AddInputAttr(kNumberTypeInt64)
215 .AddInputAttr(kNumberTypeInt64)
216 .AddOutputAttr(kNumberTypeInt64),
217 &MaxPoolGradWithArgmaxV2GpuKernelMod::LaunchKernel<int64_t, int64_t>},
218 {KernelAttr()
219 .AddInputAttr(kNumberTypeUInt8)
220 .AddInputAttr(kNumberTypeUInt8)
221 .AddInputAttr(kNumberTypeInt64)
222 .AddOutputAttr(kNumberTypeUInt8),
223 &MaxPoolGradWithArgmaxV2GpuKernelMod::LaunchKernel<uint8_t, int64_t>},
224 {KernelAttr()
225 .AddInputAttr(kNumberTypeUInt16)
226 .AddInputAttr(kNumberTypeUInt16)
227 .AddInputAttr(kNumberTypeInt64)
228 .AddOutputAttr(kNumberTypeUInt16),
229 &MaxPoolGradWithArgmaxV2GpuKernelMod::LaunchKernel<uint16_t, int64_t>},
230 {KernelAttr()
231 .AddInputAttr(kNumberTypeUInt32)
232 .AddInputAttr(kNumberTypeUInt32)
233 .AddInputAttr(kNumberTypeInt64)
234 .AddOutputAttr(kNumberTypeUInt32),
235 &MaxPoolGradWithArgmaxV2GpuKernelMod::LaunchKernel<uint32_t, int64_t>},
236 {KernelAttr()
237 .AddInputAttr(kNumberTypeUInt64)
238 .AddInputAttr(kNumberTypeUInt64)
239 .AddInputAttr(kNumberTypeInt64)
240 .AddOutputAttr(kNumberTypeUInt64),
241 &MaxPoolGradWithArgmaxV2GpuKernelMod::LaunchKernel<uint64_t, int64_t>},
242 };
243
GetOpSupport()244 std::vector<KernelAttr> MaxPoolGradWithArgmaxV2GpuKernelMod::GetOpSupport() {
245 std::vector<KernelAttr> support_list;
246 (void)std::transform(func_list_.begin(), func_list_.end(), std::back_inserter(support_list),
247 [](const std::pair<KernelAttr, MaxPoolArgmaxV2GradFunc> &pair) { return pair.first; });
248 return support_list;
249 }
250
__anonca12f7e60202() 251 MS_KERNEL_FACTORY_REG_BY_CREATOR(NativeGpuKernelMod, MaxPoolGradWithArgmaxV2, []() {
252 return std::make_shared<MaxPoolGradWithArgmaxV2GpuKernelMod>(kMaxPoolGradWithArgmaxV2);
253 });
254 } // namespace kernel
255 } // namespace mindspore
256