1 /**
2 * Copyright 2020 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/runtime/kernel/arm/int8/concat_int8.h"
18 #include "schema/model_generated.h"
19 #include "src/kernel_registry.h"
20
21 using mindspore::kernel::KERNEL_ARCH;
22 using mindspore::lite::KernelRegistrar;
23 using mindspore::lite::RET_ERROR;
24 using mindspore::lite::RET_OK;
25 using mindspore::schema::PrimitiveType_Concat;
26
27 namespace mindspore::kernel {
Init()28 int ConcatInt8CPUKernel::Init() {
29 CHECK_LESS_RETURN(in_tensors_.size(), 1);
30 CHECK_NULL_RETURN(in_tensors_.front());
31 MS_CHECK_TRUE_RET(out_tensors_.size() == 1, RET_ERROR);
32 CHECK_NULL_RETURN(out_tensors_.front());
33 concat_param_->input_shapes_ = nullptr;
34 auto input_num = in_tensors_.size();
35 MS_CHECK_FALSE_MSG(SIZE_MUL_OVERFLOW(sizeof(int8_t *), input_num), RET_ERROR, "mul overflow");
36 input_data_ = reinterpret_cast<int8_t **>(malloc(sizeof(int8_t *) * input_num));
37 if (input_data_ == nullptr) {
38 MS_LOG(ERROR) << "Null pointer reference: inputs_array.";
39 return RET_ERROR;
40 }
41
42 MS_CHECK_FALSE_MSG(SIZE_MUL_OVERFLOW(sizeof(QuantArg), input_num), RET_ERROR, "mul overflow");
43 concat_param_->quant_arg_.in_args_ = reinterpret_cast<QuantArg *>(malloc(sizeof(QuantArg) * input_num));
44 if (concat_param_->quant_arg_.in_args_ == nullptr) {
45 MS_LOG(ERROR) << "Null pointer reference: quant_concat_parm_->in_quant_args_.";
46 return RET_ERROR;
47 }
48 for (size_t i = 0; i < input_num; i++) {
49 auto *input_tensor = in_tensors_.at(i);
50 auto in_quant_args = input_tensor->quant_params();
51 MS_CHECK_TRUE_RET(!in_quant_args.empty(), RET_ERROR);
52 concat_param_->quant_arg_.in_args_[i].scale_ = in_quant_args.front().scale;
53 concat_param_->quant_arg_.in_args_[i].zp_ = in_quant_args.front().zeroPoint;
54 }
55
56 auto output_tensor = out_tensors_.at(kOutputIndex);
57 auto quant_params = output_tensor->quant_params();
58 MS_CHECK_TRUE_RET(!quant_params.empty(), RET_ERROR);
59 concat_param_->quant_arg_.out_args_.scale_ = quant_params.front().scale;
60 concat_param_->quant_arg_.out_args_.zp_ = quant_params.front().zeroPoint;
61
62 concat_param_->quant_arg_.output_activation_min_ = std::numeric_limits<int8_t>::min();
63 concat_param_->quant_arg_.output_activation_max_ = std::numeric_limits<int8_t>::max();
64 if (!InferShapeDone()) {
65 return RET_OK;
66 }
67 return ReSize();
68 }
69
ReSize()70 int ConcatInt8CPUKernel::ReSize() {
71 concat_param_->axis_ = concat_param_->axis_ >= 0
72 ? concat_param_->axis_
73 : static_cast<int>(in_tensors_.front()->shape().size()) + concat_param_->axis_;
74
75 auto input_num = in_tensors_.size();
76 concat_param_->input_num_ = static_cast<int>(input_num);
77 MS_CHECK_FALSE_MSG(SIZE_MUL_OVERFLOW(sizeof(int *), input_num), RET_ERROR, "mul overflow");
78 concat_param_->input_shapes_ = reinterpret_cast<int **>(malloc(sizeof(int *) * input_num));
79 if (concat_param_->input_shapes_ == nullptr) {
80 MS_LOG(ERROR) << "malloc concat_param_->input_shapes_ failed.";
81 return RET_ERROR;
82 }
83 for (size_t i = 0; i < input_num; i++) {
84 auto in_shape = in_tensors_.at(i)->shape();
85 MS_CHECK_FALSE_MSG(SIZE_MUL_OVERFLOW(in_shape.size(), sizeof(int)), RET_ERROR, "mul overflow");
86 concat_param_->input_shapes_[i] = reinterpret_cast<int *>(malloc(in_shape.size() * sizeof(int)));
87 if (concat_param_->input_shapes_[i] == nullptr) {
88 MS_LOG(ERROR) << "malloc concat_param_->input_shapes_[" << i << "]"
89 << " failed.";
90 return RET_ERROR;
91 }
92 memcpy(reinterpret_cast<void *>(concat_param_->input_shapes_[i]), in_shape.data(), sizeof(int) * in_shape.size());
93 }
94
95 before_axis_size = 1;
96 for (int i = 0; i < concat_param_->axis_; i++) {
97 before_axis_size *= out_tensors_.at(kOutputIndex)->DimensionSize(i);
98 }
99
100 int64_t after_axis_size = 1;
101 auto output_tensor = out_tensors_.at(kOutputIndex);
102 auto out_shape = output_tensor->shape();
103 size_t output_dim = out_shape.size();
104 MS_CHECK_FALSE_MSG(SIZE_MUL_OVERFLOW(output_dim, sizeof(int)), RET_ERROR, "mul overflow");
105 concat_param_->output_shapes_ = reinterpret_cast<int *>(malloc(output_dim * sizeof(int)));
106 if (concat_param_->output_shapes_ == nullptr) {
107 MS_LOG(ERROR) << "malloc concat_param_->output_shapes_ failed.";
108 return RET_ERROR;
109 }
110 memcpy(reinterpret_cast<void *>(concat_param_->output_shapes_), output_tensor->shape().data(),
111 sizeof(int) * output_dim);
112
113 for (size_t i = static_cast<size_t>(concat_param_->axis_ + 1); i < output_dim; i++) {
114 after_axis_size *= concat_param_->output_shapes_[i];
115 }
116 concat_param_->after_axis_size = after_axis_size;
117 return RET_OK;
118 }
119
Run()120 int ConcatInt8CPUKernel::Run() {
121 auto input_num = concat_param_->input_num_;
122 MS_CHECK_FALSE_MSG(op_parameter_->thread_num_ == 0, RET_ERROR, "div zero");
123 count_unit_ =
124 op_parameter_->thread_num_ > 1 ? UP_DIV(before_axis_size, op_parameter_->thread_num_) : before_axis_size;
125 concat_param_->count_unit_ = count_unit_;
126
127 for (int i = 0; i < input_num; i++) {
128 input_data_[i] = static_cast<int8_t *>(in_tensors_.at(i)->MutableData());
129 CHECK_NULL_RETURN(input_data_[i]);
130 }
131 output_data_ = reinterpret_cast<int8_t *>(out_tensors_.at(0)->MutableData());
132 CHECK_NULL_RETURN(output_data_);
133 auto ret = ParallelLaunch(this->ms_context_, ConcatInt8Run, this, op_parameter_->thread_num_);
134
135 return ret;
136 }
137
ConcatInt8Run(void * cdata,int task_id,float lhs_scale,float rhs_scale)138 int ConcatInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
139 auto concat = reinterpret_cast<ConcatInt8CPUKernel *>(cdata);
140 concat->DoExecute(task_id);
141 return lite::RET_OK;
142 }
143
DoExecute(int task_id)144 void ConcatInt8CPUKernel::DoExecute(int task_id) {
145 int64_t real_dst_count = MSMIN(before_axis_size - task_id * count_unit_, count_unit_);
146 if (real_dst_count <= 0) {
147 return;
148 }
149 Int8Concat(input_data_, output_data_, concat_param_, concat_param_->axis_, real_dst_count, task_id);
150 return;
151 }
152
153 REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Concat, LiteKernelCreator<ConcatInt8CPUKernel>)
154 } // namespace mindspore::kernel
155