• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/runtime/kernel/arm/fp32/pad_fp32.h"
18 #include "src/kernel_registry.h"
19 #include "schema/model_generated.h"
20 
21 using mindspore::kernel::KERNEL_ARCH;
22 using mindspore::lite::KernelRegistrar;
23 using mindspore::lite::RET_ERROR;
24 using mindspore::lite::RET_NULL_PTR;
25 using mindspore::lite::RET_OK;
26 using mindspore::schema::PrimitiveType_PadFusion;
27 
28 namespace mindspore::kernel {
29 namespace {
30 constexpr size_t kMirrorPadInputSize = 2;
31 constexpr size_t kPadCommonInputSize = 2;
32 }  // namespace
Init()33 int PadCPUKernel::Init() {
34   MS_CHECK_TRUE_RET(in_tensors_.size() == kPadCommonInputSize || in_tensors_.size() == kInputSize2, RET_ERROR);
35   MS_CHECK_TRUE_RET(out_tensors_.size() == 1, RET_ERROR);
36   CHECK_NULL_RETURN(in_tensors_[0]);
37   CHECK_NULL_RETURN(in_tensors_[1]);
38   CHECK_NULL_RETURN(out_tensors_[0]);
39   if (!InferShapeDone()) {
40     return RET_OK;
41   }
42   return ReSize();
43 }
44 
ReSize()45 int PadCPUKernel::ReSize() {
46   auto input = in_tensors_.at(0);
47   auto rank = input->shape().size();
48   if (rank > DEFAULT_PAD_NDIMS) {
49     MS_LOG(ERROR) << "Pad input rank should <= " << DEFAULT_PAD_NDIMS << ", got " << rank;
50     return RET_ERROR;
51   }
52   auto output = out_tensors_.at(0);
53   if (pad_param_->pad_mode_ == static_cast<int>(schema::PaddingMode_CONSTANT)) {
54     auto ret = ExtendShape(in_, DEFAULT_PAD_NDIMS, input->shape().data(), rank);
55     if (ret != RET_OK) {
56       return ret;
57     }
58     ret = ExtendShape(out_, DEFAULT_PAD_NDIMS, output->shape().data(), rank);
59     if (ret != RET_OK) {
60       return ret;
61     }
62     if (pad_param_->padding_length < MAX_PAD_SIZE) {
63       int ori_paddings[MAX_PAD_SIZE];
64       for (auto i = 0; i < pad_param_->padding_length; ++i) {
65         ori_paddings[i] = pad_param_->paddings_[i];
66       }
67       ret = ExtendPaddings(pad_param_->paddings_, MAX_PAD_SIZE, ori_paddings, pad_param_->padding_length);
68       if (ret != RET_OK) {
69         return ret;
70       }
71       pad_param_->padding_length = MAX_PAD_SIZE;
72     }
73   }
74   return RET_OK;
75 }
76 
InitMirrorPadBlock()77 void PadCPUKernel::InitMirrorPadBlock() {
78   mirror_pad_block_.clear();
79   std::vector<int> left_pads(DEFAULT_PAD_NDIMS);
80   for (size_t i = 0; i < DEFAULT_PAD_NDIMS; ++i) {
81     left_pads[i] = pad_param_->paddings_[2 * i];
82   }
83   std::vector<int> input_separate_dims;
84   std::vector<int> output_separate_dims;
85   std::vector<int> separate_offset;
86   /* init separate dims */
87   int cur_input = 1;
88   int cur_output = 1;
89   for (size_t i = 0; i < DEFAULT_PAD_NDIMS; ++i) {
90     if (cur_input > 1) {
91       input_separate_dims.emplace_back(cur_input);
92       output_separate_dims.emplace_back(cur_output);
93       separate_offset.emplace_back(0);
94     }
95     input_separate_dims.emplace_back(in_[i]);
96     output_separate_dims.emplace_back(out_[i]);
97     separate_offset.emplace_back(left_pads[i]);
98     cur_input = 1;
99     cur_output = 1;
100   }
101   if (cur_input != 1 || cur_output != 1) {
102     input_separate_dims.emplace_back(cur_input);
103     output_separate_dims.emplace_back(cur_output);
104     separate_offset.emplace_back(0);
105   }
106   /* init separate stride */
107   std::vector<int> output_separate_stride;
108   output_separate_stride.resize(output_separate_dims.size());
109   GetStride(output_separate_stride.data(), output_separate_dims.data(), output_separate_dims.size());
110   /* init separate stride */
111   std::vector<int> remain_stride(0);
112   int remain_size = GetStride(remain_stride.data(), output_separate_dims.data(), remain_stride.size());
113   std::vector<int> right_pads(separate_offset.size());
114   for (size_t i = 0; i < right_pads.size(); ++i) {
115     right_pads[i] = output_separate_dims[i] - input_separate_dims[i] - separate_offset[i];
116   }
117   /* init pad region */
118   std::vector<int> pad_region;
119   for (size_t i = remain_stride.size(); i < output_separate_stride.size(); ++i) {
120     // 0: center, 1: left, 2: right
121     int r = 1;
122     if (separate_offset[i] > 0) {
123       r++;
124     }
125     if (right_pads[i] > 0) {
126       r++;
127     }
128     pad_region.emplace_back(r);
129   }
130   std::vector<int> pad_region_stride(pad_region.size());
131   int region_size = GetStride(pad_region_stride.data(), pad_region.data(), pad_region.size());
132   int remain_dim_offset = static_cast<int>(remain_stride.size());
133   std::vector<int> pad_cord(pad_region.size());
134   for (int pos = 0; pos < remain_size; ++pos) {
135     const int dst_basic_offset = 0;
136     for (int index = 1; index < region_size; ++index) {
137       int dst_offset = dst_basic_offset;
138       int value = index;
139       for (size_t i = 0; i < pad_region.size() && pad_region_stride[i] != 0; ++i) {
140         NNACL_CHECK_ZERO_RETURN(pad_region_stride[i]);
141         pad_cord[i] = value / pad_region_stride[i];
142         value = value % pad_region_stride[i];
143       }
144       MirrorPadBlock block;
145       const int size_offset = DEFAULT_PAD_NDIMS - static_cast<int>(pad_region.size());
146       for (size_t i = 0; i < pad_region.size(); ++i) {
147         int di = size_offset + i;
148         int si = remain_dim_offset + i;
149         if (di >= DEFAULT_PAD_NDIMS) {
150           continue;
151         }
152         switch (pad_cord[i]) {
153           case 0:
154             dst_offset += separate_offset[si] * output_separate_stride[si];
155             block.size_[di] = input_separate_dims[si];
156             block.out_stride_[di] = output_separate_stride[si];
157             break;
158           case 2:
159             dst_offset += (separate_offset[si] + input_separate_dims[si]) * output_separate_stride[si];
160             block.size_[di] = right_pads[si];
161             block.out_stride_[di] = output_separate_stride[si];
162             break;
163           case 1:
164             if (separate_offset[si] > 0) {
165               block.size_[di] = separate_offset[si];
166               block.out_stride_[di] = output_separate_stride[si];
167             } else {
168               dst_offset += (separate_offset[si] + input_separate_dims[si]) * output_separate_stride[si];
169               block.size_[di] = right_pads[si];
170               block.out_stride_[di] = output_separate_stride[si];
171             }
172             break;
173           default:
174             break;
175         }
176       }
177       block.out_offset_ = dst_offset;
178       mirror_pad_block_.push_back(std::move(block));
179     }
180   }
181 }
182 
ExtendShape(int * shape,int length,const int * ori_shape,int rank) const183 int PadCPUKernel::ExtendShape(int *shape, int length, const int *ori_shape, int rank) const {
184   if (shape == nullptr || ori_shape == nullptr) {
185     return RET_NULL_PTR;
186   }
187   for (auto i = 0; i < length - rank; ++i) {
188     shape[i] = 1;
189   }
190   for (auto i = length - rank; i < length; ++i) {
191     shape[i] = ori_shape[i - (length - rank)];
192   }
193   return RET_OK;
194 }
195 
ExtendPaddings(int * paddings,int length,const int * ori_paddings,int ori_length) const196 int PadCPUKernel::ExtendPaddings(int *paddings, int length, const int *ori_paddings, int ori_length) const {
197   if (paddings == nullptr || ori_paddings == nullptr) {
198     return RET_NULL_PTR;
199   }
200   for (auto i = 0; i < length - ori_length; ++i) {
201     paddings[i] = 0;
202   }
203   for (auto i = length - ori_length; i < length; ++i) {
204     paddings[i] = ori_paddings[i - (length - ori_length)];
205   }
206   return RET_OK;
207 }
208 
PadImpl(void * cdata,int task_id,float lhs_scale,float rhs_scale)209 int PadImpl(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
210   auto padKernel = reinterpret_cast<PadCPUKernel *>(cdata);
211   int error_code = padKernel->RunImpl(task_id);
212   if (error_code != NNACL_OK) {
213     MS_LOG(ERROR) << "Pad Run error task_id[" << task_id << "] error_code[" << error_code << "]";
214     return RET_ERROR;
215   }
216   return RET_OK;
217 }
218 
RunImpl(int task_id)219 int PadCPUKernel::RunImpl(int task_id) {
220   auto input = in_tensors_.at(0);
221   auto output = out_tensors_.at(0);
222   auto input_data = reinterpret_cast<float *>(input->data());
223   auto output_data = reinterpret_cast<float *>(output->data());
224   CHECK_NULL_RETURN(input_data);
225   CHECK_NULL_RETURN(output_data);
226   Pad(input_data, output_data, in_, out_, pad_param_->paddings_, task_id, op_parameter_->thread_num_);
227 
228   return RET_OK;
229 }
230 
MirrorPadImpl(void * cdata,int task_id,float lhs_scale,float rhs_scale)231 int MirrorPadImpl(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
232   auto padKernel = reinterpret_cast<PadCPUKernel *>(cdata);
233   int error_code = padKernel->RunMirrorPadImpl(task_id);
234   if (error_code != NNACL_OK) {
235     MS_LOG(ERROR) << "Pad Run error task_id[" << task_id << "] error_code[" << error_code << "]";
236     return RET_ERROR;
237   }
238   return RET_OK;
239 }
240 
RunMirrorPadImpl(int task_id)241 int PadCPUKernel::RunMirrorPadImpl(int task_id) {
242   auto input = in_tensors_.at(0);
243   auto output = out_tensors_.at(0);
244   auto input_data = reinterpret_cast<float *>(input->data());
245   CHECK_NULL_RETURN(input_data);
246   auto output_data = reinterpret_cast<float *>(output->data());
247   CHECK_NULL_RETURN(output_data);
248   /* Fast Mirror pad */
249   if (mirror_pad_block_.size() != 0) {
250     /* copy center part */
251     Pad(input_data, output_data, in_, out_, pad_param_->paddings_, task_id, op_parameter_->thread_num_);
252 
253     /* calculate region part */
254     for (size_t i = task_id; i < mirror_pad_block_.size(); i += static_cast<size_t>(op_parameter_->thread_num_)) {
255       auto block = mirror_pad_block_[i];
256 
257       for (int a = 0; a < block.size_[0]; a++) {
258         int out_a_index = block.out_offset_ + a * block.out_stride_[0];
259         for (int b = 0; b < block.size_[1]; b++) {
260           int out_b_index = out_a_index + b * block.out_stride_[1];
261           for (int c = 0; c < block.size_[2]; ++c) {
262             int out_c_index = out_b_index + c * block.out_stride_[2];
263             for (int d = 0; d < block.size_[3]; ++d) {
264               int out_d_index = out_c_index + d * block.out_stride_[3];
265               for (int e = 0; e < block.size_[4]; ++e) {
266                 int output_index = out_d_index + e * block.out_stride_[4];
267                 MirrorPad(input_data, output_data, in_, pad_param_, output_index, output_index + block.size_[5]);
268               }
269             }
270           }
271         }
272       }
273     }
274     return RET_OK;
275   }
276 
277   /* Common Mirror pad */
278   MS_CHECK_FALSE_MSG(op_parameter_->thread_num_ == 0, RET_ERROR, "div zero");
279   int unit = UP_DIV(output->ElementsNum(), op_parameter_->thread_num_);
280   int begin = unit * task_id;
281   int end = MSMIN(begin + unit, output->ElementsNum());
282   MirrorPad(input_data, output_data, in_, pad_param_, begin, end);
283   return RET_OK;
284 }
285 
CheckPaddings(const int * paddings,int length,const int * input_shape,int mode)286 int PadCPUKernel::CheckPaddings(const int *paddings, int length, const int *input_shape, int mode) {
287   if (paddings == nullptr || input_shape == nullptr) {
288     return RET_NULL_PTR;
289   }
290   std::string prefix;
291   int offset;
292   if (mode == static_cast<int>(schema::PaddingMode_SYMMETRIC)) {
293     prefix = "For Pad SYMMETRIC ";
294     offset = 0;
295   } else {
296     prefix = "For Pad REFLECT ";
297     offset = 1;
298   }
299   for (auto i = 0; i < length; ++i) {
300     int max_valid = input_shape[i] - offset;
301     if (paddings[i * 2] > max_valid) {
302       MS_LOG(WARNING) << prefix << "paddings " << paddings[i * 2] << " should be less than " << max_valid + 1;
303       MS_LOG(WARNING) << "Running mirror pad with padding bigger than shape.";
304     }
305     if (paddings[i * 2 + 1] > max_valid) {
306       MS_LOG(WARNING) << prefix << "paddings " << paddings[i * 2 + 1] << " should be less than " << max_valid + 1;
307       MS_LOG(WARNING) << "Running mirror pad with padding bigger than shape.";
308     }
309   }
310   return RET_OK;
311 }
312 
CopyPaddingFromInput()313 int PadCPUKernel::CopyPaddingFromInput() {
314   if (in_tensors_.size() < kMirrorPadInputSize) {
315     MS_LOG(ERROR) << "Pad Reflect or Symmetric mode need at least 2 inputs, got " << in_tensors_.size();
316     return RET_ERROR;
317   }
318   auto padding_tensor = in_tensors_.at(1);
319   auto paddings = reinterpret_cast<int *>(padding_tensor->data());
320   CHECK_NULL_RETURN(paddings);
321   auto input_shape = in_tensors_.at(0)->shape();
322   int rank = static_cast<int>(input_shape.size());
323   if (padding_tensor->ElementsNum() != rank * 2) {
324     MS_LOG(ERROR) << "Pad second input elements num" << padding_tensor->ElementsNum() << ", should be " << rank * 2;
325     return RET_ERROR;
326   }
327 
328   auto ret = ExtendShape(in_, DEFAULT_PAD_NDIMS, input_shape.data(), rank);
329   if (ret != RET_OK) {
330     return ret;
331   }
332   ret = ExtendPaddings(pad_param_->paddings_, MAX_PAD_SIZE, paddings, padding_tensor->ElementsNum());
333   if (ret != RET_OK) {
334     return ret;
335   }
336   pad_param_->padding_length = MAX_PAD_SIZE;
337   return RET_OK;
338 }
339 
CalculateStrides()340 void PadCPUKernel::CalculateStrides() {
341   pad_param_->in_strides[DEFAULT_PAD_NDIMS - 1] = 1;
342   for (auto i = DEFAULT_PAD_NDIMS - 2; i >= 0; --i) {
343     pad_param_->in_strides[i] = in_[i + 1] * pad_param_->in_strides[i + 1];
344   }
345   for (auto i = 0; i < DEFAULT_PAD_NDIMS; ++i) {
346     out_[i] = in_[i] + pad_param_->paddings_[i * 2] + pad_param_->paddings_[i * 2 + 1];
347   }
348   pad_param_->out_strides[DEFAULT_PAD_NDIMS - 1] = 1;
349   for (auto i = DEFAULT_PAD_NDIMS - 2; i >= 0; --i) {
350     pad_param_->out_strides[i] = out_[i + 1] * pad_param_->out_strides[i + 1];
351   }
352 }
353 
HandleMirrorPad()354 int PadCPUKernel::HandleMirrorPad() {
355   int ret;
356   if (in_tensors_.size() == 1) {
357     auto input_shape = in_tensors_.at(0)->shape();
358     int rank = static_cast<int>(input_shape.size());
359     ret = ExtendShape(in_, DEFAULT_PAD_NDIMS, input_shape.data(), rank);
360     if (ret != RET_OK) {
361       return ret;
362     }
363   } else {
364     ret = CopyPaddingFromInput();
365     if (ret != RET_OK) {
366       return ret;
367     }
368   }
369   ret = CheckPaddings(pad_param_->paddings_, DEFAULT_PAD_NDIMS, in_, pad_param_->pad_mode_);
370   if (ret != RET_OK) {
371     return ret;
372   }
373   CalculateStrides();
374   pad_param_->mirror_offset_ = pad_param_->pad_mode_ == static_cast<int>(schema::PaddingMode_REFLECT) ? 1 : 0;
375 
376   InitMirrorPadBlock();
377   return RET_OK;
378 }
379 
Run()380 int PadCPUKernel::Run() {
381   if (in_tensors_.size() == kInputSize2) {
382     auto pad_value = in_tensors_.at(2);
383     auto value_num = pad_value->ElementsNum();
384     if (value_num != 1) {
385       MS_LOG(ERROR) << "The number of padding value should be only one, but got " << value_num;
386       return RET_ERROR;
387     }
388     pad_param_->constant_value_ = *(reinterpret_cast<float *>(pad_value->data()));
389   }
390   int error_code = 0;
391   if (pad_param_->pad_mode_ == static_cast<int>(schema::PaddingMode_CONSTANT)) {
392     if (in_tensors_.size() >= kPadCommonInputSize) {
393       error_code = CopyPaddingFromInput();
394       if (error_code != RET_OK) {
395         MS_LOG(ERROR) << "Pad run error, error_code[" << error_code << "]";
396         return RET_ERROR;
397       }
398     }
399     auto output = out_tensors_.at(0);
400     int output_size = output->ElementsNum();
401     auto output_data = reinterpret_cast<float *>(output->data());
402     if (abs(pad_param_->constant_value_ - 0.0f) < 1e-5) {
403       memset(output_data, 0, static_cast<size_t>(output_size) * sizeof(float));
404     } else {
405       for (auto i = 0; i < output_size; ++i) {
406         output_data[i] = pad_param_->constant_value_;
407       }
408     }
409     error_code = ParallelLaunch(this->ms_context_, PadImpl, this, op_parameter_->thread_num_);
410     if (error_code != RET_OK) {
411       MS_LOG(ERROR) << "Pad run error, error_code[" << error_code << "]";
412       return RET_ERROR;
413     }
414   } else {
415     // mirror pad case
416     error_code = HandleMirrorPad();
417     if (error_code != RET_OK) {
418       MS_LOG(ERROR) << "Handle mirror pad failed, error_code[" << error_code << "]";
419       return error_code;
420     }
421 
422     error_code = ParallelLaunch(this->ms_context_, MirrorPadImpl, this, op_parameter_->thread_num_);
423     if (error_code != RET_OK) {
424       MS_LOG(ERROR) << "Pad Reflect or Symmetric mode run error, error_code[" << error_code << "]";
425       return RET_ERROR;
426     }
427   }
428 
429   return RET_OK;
430 }
431 
432 REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_PadFusion, LiteKernelCreator<PadCPUKernel>)
433 }  // namespace mindspore::kernel
434