1 /**
2 * Copyright 2020 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/runtime/kernel/arm/fp32/pad_fp32.h"
18 #include "src/kernel_registry.h"
19 #include "schema/model_generated.h"
20
21 using mindspore::kernel::KERNEL_ARCH;
22 using mindspore::lite::KernelRegistrar;
23 using mindspore::lite::RET_ERROR;
24 using mindspore::lite::RET_NULL_PTR;
25 using mindspore::lite::RET_OK;
26 using mindspore::schema::PrimitiveType_PadFusion;
27
28 namespace mindspore::kernel {
29 namespace {
30 constexpr size_t kMirrorPadInputSize = 2;
31 constexpr size_t kPadCommonInputSize = 2;
32 } // namespace
Init()33 int PadCPUKernel::Init() {
34 MS_CHECK_TRUE_RET(in_tensors_.size() == kPadCommonInputSize || in_tensors_.size() == kInputSize2, RET_ERROR);
35 MS_CHECK_TRUE_RET(out_tensors_.size() == 1, RET_ERROR);
36 CHECK_NULL_RETURN(in_tensors_[0]);
37 CHECK_NULL_RETURN(in_tensors_[1]);
38 CHECK_NULL_RETURN(out_tensors_[0]);
39 if (!InferShapeDone()) {
40 return RET_OK;
41 }
42 return ReSize();
43 }
44
ReSize()45 int PadCPUKernel::ReSize() {
46 auto input = in_tensors_.at(0);
47 auto rank = input->shape().size();
48 if (rank > DEFAULT_PAD_NDIMS) {
49 MS_LOG(ERROR) << "Pad input rank should <= " << DEFAULT_PAD_NDIMS << ", got " << rank;
50 return RET_ERROR;
51 }
52 auto output = out_tensors_.at(0);
53 if (pad_param_->pad_mode_ == static_cast<int>(schema::PaddingMode_CONSTANT)) {
54 auto ret = ExtendShape(in_, DEFAULT_PAD_NDIMS, input->shape().data(), rank);
55 if (ret != RET_OK) {
56 return ret;
57 }
58 ret = ExtendShape(out_, DEFAULT_PAD_NDIMS, output->shape().data(), rank);
59 if (ret != RET_OK) {
60 return ret;
61 }
62 if (pad_param_->padding_length < MAX_PAD_SIZE) {
63 int ori_paddings[MAX_PAD_SIZE];
64 for (auto i = 0; i < pad_param_->padding_length; ++i) {
65 ori_paddings[i] = pad_param_->paddings_[i];
66 }
67 ret = ExtendPaddings(pad_param_->paddings_, MAX_PAD_SIZE, ori_paddings, pad_param_->padding_length);
68 if (ret != RET_OK) {
69 return ret;
70 }
71 pad_param_->padding_length = MAX_PAD_SIZE;
72 }
73 }
74 return RET_OK;
75 }
76
InitMirrorPadBlock()77 void PadCPUKernel::InitMirrorPadBlock() {
78 mirror_pad_block_.clear();
79 std::vector<int> left_pads(DEFAULT_PAD_NDIMS);
80 for (size_t i = 0; i < DEFAULT_PAD_NDIMS; ++i) {
81 left_pads[i] = pad_param_->paddings_[2 * i];
82 }
83 std::vector<int> input_separate_dims;
84 std::vector<int> output_separate_dims;
85 std::vector<int> separate_offset;
86 /* init separate dims */
87 int cur_input = 1;
88 int cur_output = 1;
89 for (size_t i = 0; i < DEFAULT_PAD_NDIMS; ++i) {
90 if (cur_input > 1) {
91 input_separate_dims.emplace_back(cur_input);
92 output_separate_dims.emplace_back(cur_output);
93 separate_offset.emplace_back(0);
94 }
95 input_separate_dims.emplace_back(in_[i]);
96 output_separate_dims.emplace_back(out_[i]);
97 separate_offset.emplace_back(left_pads[i]);
98 cur_input = 1;
99 cur_output = 1;
100 }
101 if (cur_input != 1 || cur_output != 1) {
102 input_separate_dims.emplace_back(cur_input);
103 output_separate_dims.emplace_back(cur_output);
104 separate_offset.emplace_back(0);
105 }
106 /* init separate stride */
107 std::vector<int> output_separate_stride;
108 output_separate_stride.resize(output_separate_dims.size());
109 GetStride(output_separate_stride.data(), output_separate_dims.data(), output_separate_dims.size());
110 /* init separate stride */
111 std::vector<int> remain_stride(0);
112 int remain_size = GetStride(remain_stride.data(), output_separate_dims.data(), remain_stride.size());
113 std::vector<int> right_pads(separate_offset.size());
114 for (size_t i = 0; i < right_pads.size(); ++i) {
115 right_pads[i] = output_separate_dims[i] - input_separate_dims[i] - separate_offset[i];
116 }
117 /* init pad region */
118 std::vector<int> pad_region;
119 for (size_t i = remain_stride.size(); i < output_separate_stride.size(); ++i) {
120 // 0: center, 1: left, 2: right
121 int r = 1;
122 if (separate_offset[i] > 0) {
123 r++;
124 }
125 if (right_pads[i] > 0) {
126 r++;
127 }
128 pad_region.emplace_back(r);
129 }
130 std::vector<int> pad_region_stride(pad_region.size());
131 int region_size = GetStride(pad_region_stride.data(), pad_region.data(), pad_region.size());
132 int remain_dim_offset = static_cast<int>(remain_stride.size());
133 std::vector<int> pad_cord(pad_region.size());
134 for (int pos = 0; pos < remain_size; ++pos) {
135 const int dst_basic_offset = 0;
136 for (int index = 1; index < region_size; ++index) {
137 int dst_offset = dst_basic_offset;
138 int value = index;
139 for (size_t i = 0; i < pad_region.size() && pad_region_stride[i] != 0; ++i) {
140 NNACL_CHECK_ZERO_RETURN(pad_region_stride[i]);
141 pad_cord[i] = value / pad_region_stride[i];
142 value = value % pad_region_stride[i];
143 }
144 MirrorPadBlock block;
145 const int size_offset = DEFAULT_PAD_NDIMS - static_cast<int>(pad_region.size());
146 for (size_t i = 0; i < pad_region.size(); ++i) {
147 int di = size_offset + i;
148 int si = remain_dim_offset + i;
149 if (di >= DEFAULT_PAD_NDIMS) {
150 continue;
151 }
152 switch (pad_cord[i]) {
153 case 0:
154 dst_offset += separate_offset[si] * output_separate_stride[si];
155 block.size_[di] = input_separate_dims[si];
156 block.out_stride_[di] = output_separate_stride[si];
157 break;
158 case 2:
159 dst_offset += (separate_offset[si] + input_separate_dims[si]) * output_separate_stride[si];
160 block.size_[di] = right_pads[si];
161 block.out_stride_[di] = output_separate_stride[si];
162 break;
163 case 1:
164 if (separate_offset[si] > 0) {
165 block.size_[di] = separate_offset[si];
166 block.out_stride_[di] = output_separate_stride[si];
167 } else {
168 dst_offset += (separate_offset[si] + input_separate_dims[si]) * output_separate_stride[si];
169 block.size_[di] = right_pads[si];
170 block.out_stride_[di] = output_separate_stride[si];
171 }
172 break;
173 default:
174 break;
175 }
176 }
177 block.out_offset_ = dst_offset;
178 mirror_pad_block_.push_back(std::move(block));
179 }
180 }
181 }
182
ExtendShape(int * shape,int length,const int * ori_shape,int rank) const183 int PadCPUKernel::ExtendShape(int *shape, int length, const int *ori_shape, int rank) const {
184 if (shape == nullptr || ori_shape == nullptr) {
185 return RET_NULL_PTR;
186 }
187 for (auto i = 0; i < length - rank; ++i) {
188 shape[i] = 1;
189 }
190 for (auto i = length - rank; i < length; ++i) {
191 shape[i] = ori_shape[i - (length - rank)];
192 }
193 return RET_OK;
194 }
195
ExtendPaddings(int * paddings,int length,const int * ori_paddings,int ori_length) const196 int PadCPUKernel::ExtendPaddings(int *paddings, int length, const int *ori_paddings, int ori_length) const {
197 if (paddings == nullptr || ori_paddings == nullptr) {
198 return RET_NULL_PTR;
199 }
200 for (auto i = 0; i < length - ori_length; ++i) {
201 paddings[i] = 0;
202 }
203 for (auto i = length - ori_length; i < length; ++i) {
204 paddings[i] = ori_paddings[i - (length - ori_length)];
205 }
206 return RET_OK;
207 }
208
PadImpl(void * cdata,int task_id,float lhs_scale,float rhs_scale)209 int PadImpl(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
210 auto padKernel = reinterpret_cast<PadCPUKernel *>(cdata);
211 int error_code = padKernel->RunImpl(task_id);
212 if (error_code != NNACL_OK) {
213 MS_LOG(ERROR) << "Pad Run error task_id[" << task_id << "] error_code[" << error_code << "]";
214 return RET_ERROR;
215 }
216 return RET_OK;
217 }
218
RunImpl(int task_id)219 int PadCPUKernel::RunImpl(int task_id) {
220 auto input = in_tensors_.at(0);
221 auto output = out_tensors_.at(0);
222 auto input_data = reinterpret_cast<float *>(input->data());
223 auto output_data = reinterpret_cast<float *>(output->data());
224 CHECK_NULL_RETURN(input_data);
225 CHECK_NULL_RETURN(output_data);
226 Pad(input_data, output_data, in_, out_, pad_param_->paddings_, task_id, op_parameter_->thread_num_);
227
228 return RET_OK;
229 }
230
MirrorPadImpl(void * cdata,int task_id,float lhs_scale,float rhs_scale)231 int MirrorPadImpl(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
232 auto padKernel = reinterpret_cast<PadCPUKernel *>(cdata);
233 int error_code = padKernel->RunMirrorPadImpl(task_id);
234 if (error_code != NNACL_OK) {
235 MS_LOG(ERROR) << "Pad Run error task_id[" << task_id << "] error_code[" << error_code << "]";
236 return RET_ERROR;
237 }
238 return RET_OK;
239 }
240
RunMirrorPadImpl(int task_id)241 int PadCPUKernel::RunMirrorPadImpl(int task_id) {
242 auto input = in_tensors_.at(0);
243 auto output = out_tensors_.at(0);
244 auto input_data = reinterpret_cast<float *>(input->data());
245 CHECK_NULL_RETURN(input_data);
246 auto output_data = reinterpret_cast<float *>(output->data());
247 CHECK_NULL_RETURN(output_data);
248 /* Fast Mirror pad */
249 if (mirror_pad_block_.size() != 0) {
250 /* copy center part */
251 Pad(input_data, output_data, in_, out_, pad_param_->paddings_, task_id, op_parameter_->thread_num_);
252
253 /* calculate region part */
254 for (size_t i = task_id; i < mirror_pad_block_.size(); i += static_cast<size_t>(op_parameter_->thread_num_)) {
255 auto block = mirror_pad_block_[i];
256
257 for (int a = 0; a < block.size_[0]; a++) {
258 int out_a_index = block.out_offset_ + a * block.out_stride_[0];
259 for (int b = 0; b < block.size_[1]; b++) {
260 int out_b_index = out_a_index + b * block.out_stride_[1];
261 for (int c = 0; c < block.size_[2]; ++c) {
262 int out_c_index = out_b_index + c * block.out_stride_[2];
263 for (int d = 0; d < block.size_[3]; ++d) {
264 int out_d_index = out_c_index + d * block.out_stride_[3];
265 for (int e = 0; e < block.size_[4]; ++e) {
266 int output_index = out_d_index + e * block.out_stride_[4];
267 MirrorPad(input_data, output_data, in_, pad_param_, output_index, output_index + block.size_[5]);
268 }
269 }
270 }
271 }
272 }
273 }
274 return RET_OK;
275 }
276
277 /* Common Mirror pad */
278 MS_CHECK_FALSE_MSG(op_parameter_->thread_num_ == 0, RET_ERROR, "div zero");
279 int unit = UP_DIV(output->ElementsNum(), op_parameter_->thread_num_);
280 int begin = unit * task_id;
281 int end = MSMIN(begin + unit, output->ElementsNum());
282 MirrorPad(input_data, output_data, in_, pad_param_, begin, end);
283 return RET_OK;
284 }
285
CheckPaddings(const int * paddings,int length,const int * input_shape,int mode)286 int PadCPUKernel::CheckPaddings(const int *paddings, int length, const int *input_shape, int mode) {
287 if (paddings == nullptr || input_shape == nullptr) {
288 return RET_NULL_PTR;
289 }
290 std::string prefix;
291 int offset;
292 if (mode == static_cast<int>(schema::PaddingMode_SYMMETRIC)) {
293 prefix = "For Pad SYMMETRIC ";
294 offset = 0;
295 } else {
296 prefix = "For Pad REFLECT ";
297 offset = 1;
298 }
299 for (auto i = 0; i < length; ++i) {
300 int max_valid = input_shape[i] - offset;
301 if (paddings[i * 2] > max_valid) {
302 MS_LOG(WARNING) << prefix << "paddings " << paddings[i * 2] << " should be less than " << max_valid + 1;
303 MS_LOG(WARNING) << "Running mirror pad with padding bigger than shape.";
304 }
305 if (paddings[i * 2 + 1] > max_valid) {
306 MS_LOG(WARNING) << prefix << "paddings " << paddings[i * 2 + 1] << " should be less than " << max_valid + 1;
307 MS_LOG(WARNING) << "Running mirror pad with padding bigger than shape.";
308 }
309 }
310 return RET_OK;
311 }
312
CopyPaddingFromInput()313 int PadCPUKernel::CopyPaddingFromInput() {
314 if (in_tensors_.size() < kMirrorPadInputSize) {
315 MS_LOG(ERROR) << "Pad Reflect or Symmetric mode need at least 2 inputs, got " << in_tensors_.size();
316 return RET_ERROR;
317 }
318 auto padding_tensor = in_tensors_.at(1);
319 auto paddings = reinterpret_cast<int *>(padding_tensor->data());
320 CHECK_NULL_RETURN(paddings);
321 auto input_shape = in_tensors_.at(0)->shape();
322 int rank = static_cast<int>(input_shape.size());
323 if (padding_tensor->ElementsNum() != rank * 2) {
324 MS_LOG(ERROR) << "Pad second input elements num" << padding_tensor->ElementsNum() << ", should be " << rank * 2;
325 return RET_ERROR;
326 }
327
328 auto ret = ExtendShape(in_, DEFAULT_PAD_NDIMS, input_shape.data(), rank);
329 if (ret != RET_OK) {
330 return ret;
331 }
332 ret = ExtendPaddings(pad_param_->paddings_, MAX_PAD_SIZE, paddings, padding_tensor->ElementsNum());
333 if (ret != RET_OK) {
334 return ret;
335 }
336 pad_param_->padding_length = MAX_PAD_SIZE;
337 return RET_OK;
338 }
339
CalculateStrides()340 void PadCPUKernel::CalculateStrides() {
341 pad_param_->in_strides[DEFAULT_PAD_NDIMS - 1] = 1;
342 for (auto i = DEFAULT_PAD_NDIMS - 2; i >= 0; --i) {
343 pad_param_->in_strides[i] = in_[i + 1] * pad_param_->in_strides[i + 1];
344 }
345 for (auto i = 0; i < DEFAULT_PAD_NDIMS; ++i) {
346 out_[i] = in_[i] + pad_param_->paddings_[i * 2] + pad_param_->paddings_[i * 2 + 1];
347 }
348 pad_param_->out_strides[DEFAULT_PAD_NDIMS - 1] = 1;
349 for (auto i = DEFAULT_PAD_NDIMS - 2; i >= 0; --i) {
350 pad_param_->out_strides[i] = out_[i + 1] * pad_param_->out_strides[i + 1];
351 }
352 }
353
HandleMirrorPad()354 int PadCPUKernel::HandleMirrorPad() {
355 int ret;
356 if (in_tensors_.size() == 1) {
357 auto input_shape = in_tensors_.at(0)->shape();
358 int rank = static_cast<int>(input_shape.size());
359 ret = ExtendShape(in_, DEFAULT_PAD_NDIMS, input_shape.data(), rank);
360 if (ret != RET_OK) {
361 return ret;
362 }
363 } else {
364 ret = CopyPaddingFromInput();
365 if (ret != RET_OK) {
366 return ret;
367 }
368 }
369 ret = CheckPaddings(pad_param_->paddings_, DEFAULT_PAD_NDIMS, in_, pad_param_->pad_mode_);
370 if (ret != RET_OK) {
371 return ret;
372 }
373 CalculateStrides();
374 pad_param_->mirror_offset_ = pad_param_->pad_mode_ == static_cast<int>(schema::PaddingMode_REFLECT) ? 1 : 0;
375
376 InitMirrorPadBlock();
377 return RET_OK;
378 }
379
Run()380 int PadCPUKernel::Run() {
381 if (in_tensors_.size() == kInputSize2) {
382 auto pad_value = in_tensors_.at(2);
383 auto value_num = pad_value->ElementsNum();
384 if (value_num != 1) {
385 MS_LOG(ERROR) << "The number of padding value should be only one, but got " << value_num;
386 return RET_ERROR;
387 }
388 pad_param_->constant_value_ = *(reinterpret_cast<float *>(pad_value->data()));
389 }
390 int error_code = 0;
391 if (pad_param_->pad_mode_ == static_cast<int>(schema::PaddingMode_CONSTANT)) {
392 if (in_tensors_.size() >= kPadCommonInputSize) {
393 error_code = CopyPaddingFromInput();
394 if (error_code != RET_OK) {
395 MS_LOG(ERROR) << "Pad run error, error_code[" << error_code << "]";
396 return RET_ERROR;
397 }
398 }
399 auto output = out_tensors_.at(0);
400 int output_size = output->ElementsNum();
401 auto output_data = reinterpret_cast<float *>(output->data());
402 if (abs(pad_param_->constant_value_ - 0.0f) < 1e-5) {
403 memset(output_data, 0, static_cast<size_t>(output_size) * sizeof(float));
404 } else {
405 for (auto i = 0; i < output_size; ++i) {
406 output_data[i] = pad_param_->constant_value_;
407 }
408 }
409 error_code = ParallelLaunch(this->ms_context_, PadImpl, this, op_parameter_->thread_num_);
410 if (error_code != RET_OK) {
411 MS_LOG(ERROR) << "Pad run error, error_code[" << error_code << "]";
412 return RET_ERROR;
413 }
414 } else {
415 // mirror pad case
416 error_code = HandleMirrorPad();
417 if (error_code != RET_OK) {
418 MS_LOG(ERROR) << "Handle mirror pad failed, error_code[" << error_code << "]";
419 return error_code;
420 }
421
422 error_code = ParallelLaunch(this->ms_context_, MirrorPadImpl, this, op_parameter_->thread_num_);
423 if (error_code != RET_OK) {
424 MS_LOG(ERROR) << "Pad Reflect or Symmetric mode run error, error_code[" << error_code << "]";
425 return RET_ERROR;
426 }
427 }
428
429 return RET_OK;
430 }
431
432 REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_PadFusion, LiteKernelCreator<PadCPUKernel>)
433 } // namespace mindspore::kernel
434