1From a4c343574d6d6998a6f1b95f436401c8eb8a2c90 Mon Sep 17 00:00:00 2001 2From: zhangyanhui <zhangyanhui17@huawei.com> 3Date: Mon, 1 Jul 2024 21:12:15 +0800 4Subject: [PATCH] auto-apply 0015-bugfix-for-cpu-kernel.patch 5 6--- 7 .../cpu/kernel/nnacl/infer/where_infer.c | 66 ++++++------- 8 .../device/cpu/kernel/nnacl/kernel/clip.c | 2 + 9 .../src/litert/kernel/cpu/fp32/prelu_fp32.cc | 12 +-- 10 .../src/litert/kernel/cpu/fp32/where_fp32.cc | 96 ++++++++++++++++--- 11 .../src/litert/kernel/cpu/fp32/where_fp32.h | 2 + 12 .../lite/tools/optimizer/fusion/glu_fusion.h | 4 +- 13 6 files changed, 124 insertions(+), 58 deletions(-) 14 15diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/where_infer.c b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/where_infer.c 16index f6d4e1b2..c714627a 100644 17--- a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/where_infer.c 18+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/where_infer.c 19@@ -17,18 +17,19 @@ 20 #include "nnacl/infer/where_infer.h" 21 #include "nnacl/infer/infer_register.h" 22 #include "nnacl/tensor_c_utils.h" 23+#include "nnacl/infer/broadcast_to_infer.h" 24 25-static size_t GetAxisout(const TensorC *input0, const TensorC *input1, const TensorC *input2, size_t index) { 26- if (input0->shape_[index] == input1->shape_[index] && input0->shape_[index] != input2->shape_[index]) { 27- return index; 28+int WhereBroadCastInferShape(const int input_shape0_size, const int input_shape1_size, const int *input_shape0, 29+ const int *input_shape1, int *ndim, int *in_shape0, int *in_shape1, int *out_shape, 30+ bool *has_broad_cast) { 31+ if (input_shape0_size > MAX_SHAPE_SIZE || input_shape1_size > MAX_SHAPE_SIZE) { 32+ return NNACL_ERR; 33 } 34- if (input0->shape_[index] == input2->shape_[index] && input0->shape_[index] != input1->shape_[index]) { 35- return index; 36- } 37- if (input1->shape_[index] == input2->shape_[index] && input0->shape_[index] != input1->shape_[index]) { 38- return index; 39+ MakeUpInputShapes(input_shape0_size, input_shape1_size, input_shape0, input_shape1, ndim, in_shape0, in_shape1); 40+ if (*ndim >= MAX_SHAPE_SIZE) { 41+ return NNACL_INFER_INVALID; 42 } 43- return MAX_SHAPE_SIZE + 1; 44+ return BroadCastOutputShape(in_shape0, in_shape1, *ndim, out_shape, has_broad_cast); 45 } 46 47 int WhereInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size, 48@@ -59,35 +60,28 @@ int WhereInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC ** 49 if (!InferFlag(inputs, inputs_size)) { 50 return NNACL_INFER_INVALID; 51 } 52- 53- int num = GetElementNum(input0); 54- int num1 = GetElementNum(input1); 55- int num2 = GetElementNum(input2); 56- int nummax = num > num1 ? num : (num1 > num2 ? num1 : num2); 57- size_t min_input_shape_size = input1->shape_size_ < input2->shape_size_ ? input1->shape_size_ : input2->shape_size_; 58- size_t axisout = MAX_SHAPE_SIZE + 1; 59- size_t temp = 0; 60- for (size_t j = 0; j < input0->shape_size_; j++) { 61- if (j >= MAX_SHAPE_SIZE) { 62- return NNACL_ERR; 63- } 64- if (j < min_input_shape_size) { 65- axisout = GetAxisout(input0, input1, input2, j); 66- if (axisout != MAX_SHAPE_SIZE + 1) { 67- break; 68- } 69- } 70- temp += 1; 71- if (temp == input0->shape_size_) { 72- SetShapeTensor(output, input); 73- return NNACL_OK; 74- } 75+ int in_shape0[MAX_SHAPE_SIZE] = {0}; 76+ int in_shape1[MAX_SHAPE_SIZE] = {0}; 77+ int in_shape2[MAX_SHAPE_SIZE] = {0}; 78+ int output_shape[MAX_SHAPE_SIZE] = {0}; 79+ size_t input_shape0_size = input0->shape_size_; 80+ size_t input_shape1_size = input1->shape_size_; 81+ size_t input_shape2_size = input2->shape_size_; 82+ const int *input_shape0 = input0->shape_; 83+ const int *input_shape1 = input1->shape_; 84+ const int *input_shape2 = input2->shape_; 85+ int ndim = (int)input_shape0_size; 86+ bool has_broad_cast_1 = false; 87+ bool has_broad_cast_2 = false; 88+ if (WhereBroadCastInferShape(input_shape0_size, input_shape1_size, input_shape0, input_shape1, &ndim, in_shape0, 89+ in_shape1, output_shape, &has_broad_cast_1) != NNACL_OK) { 90+ return NNACL_ERR; 91 } 92- 93- ShapeSet(output->shape_, &output->shape_size_, input0->shape_, input0->shape_size_); 94- if (axisout != MAX_SHAPE_SIZE + 1) { 95- output->shape_[axisout] = nummax; 96+ if (WhereBroadCastInferShape(ndim, input_shape2_size, output_shape, input_shape2, &ndim, in_shape0, in_shape2, 97+ output_shape, &has_broad_cast_2) != NNACL_OK) { 98+ return NNACL_ERR; 99 } 100+ ShapeSet(output->shape_, &output->shape_size_, output_shape, ndim); 101 return NNACL_OK; 102 } 103 104diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/kernel/clip.c b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/kernel/clip.c 105index ece0eff0..ae8ac5d8 100644 106--- a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/kernel/clip.c 107+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/kernel/clip.c 108@@ -81,6 +81,8 @@ int ClipCompute(struct KernelBase *self) { 109 NNACL_CHECK_NULL_RETURN_ERR(clip); 110 ClipParameter *param = (ClipParameter *)clip->base_.param_; 111 NNACL_CHECK_NULL_RETURN_ERR(param); 112+ clip->min_val_ = param->min_val_; 113+ clip->max_val_ = param->max_val_; 114 115 int ret = NNACL_OK; 116 if (clip->base_.in_size_ > ONE_TENSOR) { 117diff --git a/mindspore/lite/src/litert/kernel/cpu/fp32/prelu_fp32.cc b/mindspore/lite/src/litert/kernel/cpu/fp32/prelu_fp32.cc 118index cae491f5..74639503 100644 119--- a/mindspore/lite/src/litert/kernel/cpu/fp32/prelu_fp32.cc 120+++ b/mindspore/lite/src/litert/kernel/cpu/fp32/prelu_fp32.cc 121@@ -44,12 +44,6 @@ int PReluCPUKernel::Prepare() { 122 CHECK_NULL_RETURN(in_tensors_[kInputIndex]); 123 CHECK_NULL_RETURN(in_tensors_[kSlopeIndex]); 124 CHECK_NULL_RETURN(out_tensors_[kOutputIndex]); 125- auto slope_shapes = in_tensors_[C1NUM]->ElementsNum(); 126- auto input_channel = in_tensors_[C0NUM]->Channel(); 127- if ((slope_shapes != C1NUM) && (slope_shapes != input_channel)) { 128- MS_LOG(ERROR) << "slope_shapes: " << slope_shapes << " is not equal to 1 or input_channel: " << input_channel; 129- return lite::RET_ERROR; 130- } 131 if (in_tensors_[1]->ElementsNum() == 1) { 132 param_->channelShared = true; 133 } else { 134@@ -83,6 +77,12 @@ int PReluCPUKernel::DoExcute(int task_id) const { 135 } 136 137 int PReluCPUKernel::ReSize() { 138+ auto slope_shapes = in_tensors_[C1NUM]->ElementsNum(); 139+ auto input_channel = in_tensors_[C0NUM]->Channel(); 140+ if ((slope_shapes != C1NUM) && (slope_shapes != input_channel)) { 141+ MS_LOG(ERROR) << "slope_shapes: " << slope_shapes << " is not equal to 1 or input_channel: " << input_channel; 142+ return lite::RET_ERROR; 143+ } 144 auto &input = in_tensors_[kInputIndex]; 145 param_->input_num_ = input->ElementsNum(); 146 CHECK_NOT_EQUAL_RETURN(out_tensors_.front()->ElementsNum(), param_->input_num_); 147diff --git a/mindspore/lite/src/litert/kernel/cpu/fp32/where_fp32.cc b/mindspore/lite/src/litert/kernel/cpu/fp32/where_fp32.cc 148index d7c987e3..a73fda7c 100644 149--- a/mindspore/lite/src/litert/kernel/cpu/fp32/where_fp32.cc 150+++ b/mindspore/lite/src/litert/kernel/cpu/fp32/where_fp32.cc 151@@ -20,6 +20,7 @@ 152 #include "src/litert/kernel_registry.h" 153 #include "include/errorcode.h" 154 #include "nnacl/common_func.h" 155+#include "nnacl/base/broadcast_to.h" 156 157 using mindspore::kernel::KERNEL_ARCH; 158 using mindspore::lite::KernelRegistrar; 159@@ -153,36 +154,58 @@ int WhereCPUKernel::RunWithSingleInput() { 160 } 161 162 int WhereCPUKernel::RunWithTripleInputs() { 163- auto condition = in_tensors_.at(0); 164+ TensorC *condition = in_tensors_.at(0)->ConvertToTensorC(); 165 CHECK_NULL_RETURN(condition); 166- auto x = in_tensors_.at(1); 167+ TensorC *x = in_tensors_.at(1)->ConvertToTensorC(); 168 CHECK_NULL_RETURN(x); 169- auto y = in_tensors_.at(C2NUM); 170+ TensorC *y = in_tensors_.at(C2NUM)->ConvertToTensorC(); 171 CHECK_NULL_RETURN(y); 172- int condition_nums = condition->ElementsNum(); 173- int x_num = x->ElementsNum(); 174- int y_num = y->ElementsNum(); 175- int out_num = out_tensors_.front()->ElementsNum(); 176+ TensorC *output = out_tensors_.at(0)->ConvertToTensorC(); 177+ CHECK_NULL_RETURN(output); 178+ int condition_nums = GetElementNum(condition); 179+ int x_num = GetElementNum(x); 180+ int y_num = GetElementNum(y); 181+ int out_num = GetElementNum(output); 182 183- condition_ = reinterpret_cast<bool *>(condition->data()); 184+ condition_ = reinterpret_cast<bool *>(condition->data_); 185 CHECK_NULL_RETURN(condition_); 186- x_ = x->data(); 187+ x_ = x->data_; 188 CHECK_NULL_RETURN(x_); 189- y_ = y->data(); 190+ y_ = y->data_; 191 CHECK_NULL_RETURN(y_); 192- output_data_ = out_tensors_.at(0)->data(); 193+ output_data_ = output->data_; 194 int num_max = condition_nums > x_num ? condition_nums : (x_num > y_num ? x_num : y_num); 195 where_param_->condition_num_ = condition_nums; 196 where_param_->x_num_ = x_num; 197 where_param_->y_num_ = y_num; 198 where_param_->max_num_ = num_max; 199- 200+ void *condition_broadcast_buf = nullptr; 201+ void *x_broadcast_buf = nullptr; 202+ void *y_broadcast_buf = nullptr; 203 CHECK_LESS_RETURN(out_num, num_max); 204 205 if (((condition_nums != 1) && (condition_nums != num_max)) || ((x_num != 1) && (x_num != num_max)) || 206 ((y_num != 1) && (y_num != num_max))) { 207- MS_LOG(ERROR) << "The length of three inputs are not equal to 1 or length of output, which is unacceptable"; 208- return RET_ERROR; 209+ if (condition_nums != GetElementNum(y)) { 210+ int ret = 211+ BroadcastForInput(condition, x, y, &condition_broadcast_buf, &x_broadcast_buf, &y_broadcast_buf, output); 212+ if (ret != RET_OK) { 213+ MS_LOG(ERROR) << "BroadcastForInput failed."; 214+ return RET_ERROR; 215+ } 216+ int max_num = GetElementNum(output); 217+ condition_ = reinterpret_cast<bool *>(condition_broadcast_buf); 218+ x_ = x_broadcast_buf; 219+ y_ = y_broadcast_buf; 220+ output_data_ = output->data_; 221+ where_param_->condition_num_ = max_num; 222+ where_param_->x_num_ = max_num; 223+ where_param_->y_num_ = max_num; 224+ where_param_->max_num_ = max_num; 225+ } else { 226+ MS_LOG(ERROR) << "The length of three inputs are not equal to 1 or length of output, which is unacceptable"; 227+ return RET_ERROR; 228+ } 229 } 230 if (num_max <= 0) { 231 MS_LOG(ERROR) << "Error, inputs' length are zero !!!"; 232@@ -193,6 +216,9 @@ int WhereCPUKernel::RunWithTripleInputs() { 233 MS_LOG(ERROR) << "WhereDwRun error: error_code[" << ret << "]"; 234 return RET_ERROR; 235 } 236+ ms_context_->allocator->Free(condition_broadcast_buf); 237+ ms_context_->allocator->Free(x_broadcast_buf); 238+ ms_context_->allocator->Free(y_broadcast_buf); 239 return RET_OK; 240 } 241 242@@ -214,6 +240,48 @@ int WhereCPUKernel::Run() { 243 return ret; 244 } 245 246+int WhereCPUKernel::BroadcastForInput(TensorC *condition, TensorC *x, TensorC *y, void **condition_broadcast_buf, 247+ void **x_broadcast_buf, void **y_broadcast_buf, TensorC *output) { 248+ size_t broad_cast_buf_size = GetSize(output); 249+ BroadcastShapeInfo condition_info; 250+ condition_info.input_shape_size_ = condition->shape_size_; 251+ condition_info.output_shape_size_ = output->shape_size_; 252+ (void)memcpy(condition_info.input_shape_, condition->shape_, condition->shape_size_ * sizeof(int)); 253+ (void)memcpy(condition_info.output_shape_, output->shape_, output->shape_size_ * sizeof(int)); 254+ BroadcastShapeInfo x_info; 255+ x_info.input_shape_size_ = x->shape_size_; 256+ x_info.output_shape_size_ = output->shape_size_; 257+ (void)memcpy(x_info.input_shape_, x->shape_, x->shape_size_ * sizeof(int)); 258+ (void)memcpy(x_info.output_shape_, output->shape_, output->shape_size_ * sizeof(int)); 259+ BroadcastShapeInfo y_info; 260+ y_info.input_shape_size_ = y->shape_size_; 261+ y_info.output_shape_size_ = output->shape_size_; 262+ (void)memcpy(y_info.input_shape_, y->shape_, y->shape_size_ * sizeof(int)); 263+ (void)memcpy(y_info.output_shape_, output->shape_, output->shape_size_ * sizeof(int)); 264+ 265+ *condition_broadcast_buf = ms_context_->allocator->Malloc(broad_cast_buf_size); 266+ CHECK_NULL_RETURN(*condition_broadcast_buf); 267+ BroadcastToSize8(condition->data_, &condition_info, *condition_broadcast_buf); 268+ 269+ *x_broadcast_buf = ms_context_->allocator->Malloc(broad_cast_buf_size); 270+ if (*x_broadcast_buf == nullptr) { 271+ ms_context_->allocator->Free(*condition_broadcast_buf); 272+ MS_LOG(ERROR) << "malloc x_broadcast_buf error"; 273+ return RET_ERROR; 274+ } 275+ BroadcastToSize32(x->data_, &x_info, *x_broadcast_buf); 276+ 277+ *y_broadcast_buf = ms_context_->allocator->Malloc(broad_cast_buf_size); 278+ if (*y_broadcast_buf == nullptr) { 279+ ms_context_->allocator->Free(*condition_broadcast_buf); 280+ ms_context_->allocator->Free(*x_broadcast_buf); 281+ MS_LOG(ERROR) << "malloc y_broadcast_buf error"; 282+ return RET_ERROR; 283+ } 284+ BroadcastToSize32(y->data_, &y_info, *y_broadcast_buf); 285+ return RET_OK; 286+} 287+ 288 REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Where, LiteKernelCreator<WhereCPUKernel>) 289 REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Where, LiteKernelCreator<WhereCPUKernel>) 290 REG_KERNEL(kCPU, kNumberTypeBool, PrimitiveType_Where, LiteKernelCreator<WhereCPUKernel>) 291diff --git a/mindspore/lite/src/litert/kernel/cpu/fp32/where_fp32.h b/mindspore/lite/src/litert/kernel/cpu/fp32/where_fp32.h 292index 0d785732..ae6e3eba 100644 293--- a/mindspore/lite/src/litert/kernel/cpu/fp32/where_fp32.h 294+++ b/mindspore/lite/src/litert/kernel/cpu/fp32/where_fp32.h 295@@ -51,6 +51,8 @@ class WhereCPUKernel : public LiteKernel { 296 private: 297 int RunWithSingleInput(); 298 int RunWithTripleInputs(); 299+ int BroadcastForInput(TensorC *condition, TensorC *x, TensorC *y, void **condition_broadcast_buf, 300+ void **x_broadcast_buf, void **y_broadcast_buf, TensorC *output); 301 }; 302 } // namespace mindspore::kernel 303 #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_CPU_FP32_WHERE_FP32_H_ 304diff --git a/mindspore/lite/tools/optimizer/fusion/glu_fusion.h b/mindspore/lite/tools/optimizer/fusion/glu_fusion.h 305index 5e6a7e79..513a49d9 100644 306--- a/mindspore/lite/tools/optimizer/fusion/glu_fusion.h 307+++ b/mindspore/lite/tools/optimizer/fusion/glu_fusion.h 308@@ -1,5 +1,5 @@ 309 /** 310- * Copyright 2021 Huawei Technologies Co., Ltd 311+ * Copyright 2021~2024 Huawei Technologies Co., Ltd 312 * 313 * Licensed under the Apache License, Version 2.0 (the "License"); 314 * you may not use this file except in compliance with the License. 315@@ -26,7 +26,7 @@ namespace mindspore { 316 namespace opt { 317 class GLUFusion : public LitePatternProcessPass { 318 public: 319- explicit GLUFusion(const std::string &name = "glu_fusion", bool multigraph = true) 320+ explicit GLUFusion(const std::string &name = "GLUFusion", bool multigraph = true) 321 : LitePatternProcessPass(name, multigraph) {} 322 323 ~GLUFusion() override = default; 324-- 3252.25.1 326 327