From 7aa0a7bde120a88562e851b943d7682e5cf6ebf7 Mon Sep 17 00:00:00 2001 From: zhangyanhui Date: Tue, 9 Apr 2024 19:45:33 +0800 Subject: [PATCH] auto-apply 0015-bugfix-for-cpu-kernel.patch --- .../cpu/kernel/nnacl/infer/where_infer.c | 66 ++++++------- .../src/litert/kernel/cpu/fp32/where_fp32.cc | 96 ++++++++++++++++--- .../src/litert/kernel/cpu/fp32/where_fp32.h | 2 + .../lite/tools/optimizer/fusion/glu_fusion.h | 4 +- 4 files changed, 116 insertions(+), 52 deletions(-) diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/where_infer.c b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/where_infer.c index f6d4e1b2..c714627a 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/where_infer.c +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/where_infer.c @@ -17,18 +17,19 @@ #include "nnacl/infer/where_infer.h" #include "nnacl/infer/infer_register.h" #include "nnacl/tensor_c_utils.h" +#include "nnacl/infer/broadcast_to_infer.h" -static size_t GetAxisout(const TensorC *input0, const TensorC *input1, const TensorC *input2, size_t index) { - if (input0->shape_[index] == input1->shape_[index] && input0->shape_[index] != input2->shape_[index]) { - return index; +int WhereBroadCastInferShape(const int input_shape0_size, const int input_shape1_size, const int *input_shape0, + const int *input_shape1, int *ndim, int *in_shape0, int *in_shape1, int *out_shape, + bool *has_broad_cast) { + if (input_shape0_size > MAX_SHAPE_SIZE || input_shape1_size > MAX_SHAPE_SIZE) { + return NNACL_ERR; } - if (input0->shape_[index] == input2->shape_[index] && input0->shape_[index] != input1->shape_[index]) { - return index; - } - if (input1->shape_[index] == input2->shape_[index] && input0->shape_[index] != input1->shape_[index]) { - return index; + MakeUpInputShapes(input_shape0_size, input_shape1_size, input_shape0, input_shape1, ndim, in_shape0, in_shape1); + if (*ndim >= MAX_SHAPE_SIZE) { + return NNACL_INFER_INVALID; } - return MAX_SHAPE_SIZE + 1; + return BroadCastOutputShape(in_shape0, in_shape1, *ndim, out_shape, has_broad_cast); } int WhereInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size, @@ -59,35 +60,28 @@ int WhereInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC ** if (!InferFlag(inputs, inputs_size)) { return NNACL_INFER_INVALID; } - - int num = GetElementNum(input0); - int num1 = GetElementNum(input1); - int num2 = GetElementNum(input2); - int nummax = num > num1 ? num : (num1 > num2 ? num1 : num2); - size_t min_input_shape_size = input1->shape_size_ < input2->shape_size_ ? input1->shape_size_ : input2->shape_size_; - size_t axisout = MAX_SHAPE_SIZE + 1; - size_t temp = 0; - for (size_t j = 0; j < input0->shape_size_; j++) { - if (j >= MAX_SHAPE_SIZE) { - return NNACL_ERR; - } - if (j < min_input_shape_size) { - axisout = GetAxisout(input0, input1, input2, j); - if (axisout != MAX_SHAPE_SIZE + 1) { - break; - } - } - temp += 1; - if (temp == input0->shape_size_) { - SetShapeTensor(output, input); - return NNACL_OK; - } + int in_shape0[MAX_SHAPE_SIZE] = {0}; + int in_shape1[MAX_SHAPE_SIZE] = {0}; + int in_shape2[MAX_SHAPE_SIZE] = {0}; + int output_shape[MAX_SHAPE_SIZE] = {0}; + size_t input_shape0_size = input0->shape_size_; + size_t input_shape1_size = input1->shape_size_; + size_t input_shape2_size = input2->shape_size_; + const int *input_shape0 = input0->shape_; + const int *input_shape1 = input1->shape_; + const int *input_shape2 = input2->shape_; + int ndim = (int)input_shape0_size; + bool has_broad_cast_1 = false; + bool has_broad_cast_2 = false; + if (WhereBroadCastInferShape(input_shape0_size, input_shape1_size, input_shape0, input_shape1, &ndim, in_shape0, + in_shape1, output_shape, &has_broad_cast_1) != NNACL_OK) { + return NNACL_ERR; } - - ShapeSet(output->shape_, &output->shape_size_, input0->shape_, input0->shape_size_); - if (axisout != MAX_SHAPE_SIZE + 1) { - output->shape_[axisout] = nummax; + if (WhereBroadCastInferShape(ndim, input_shape2_size, output_shape, input_shape2, &ndim, in_shape0, in_shape2, + output_shape, &has_broad_cast_2) != NNACL_OK) { + return NNACL_ERR; } + ShapeSet(output->shape_, &output->shape_size_, output_shape, ndim); return NNACL_OK; } diff --git a/mindspore/lite/src/litert/kernel/cpu/fp32/where_fp32.cc b/mindspore/lite/src/litert/kernel/cpu/fp32/where_fp32.cc index d7c987e3..a73fda7c 100644 --- a/mindspore/lite/src/litert/kernel/cpu/fp32/where_fp32.cc +++ b/mindspore/lite/src/litert/kernel/cpu/fp32/where_fp32.cc @@ -20,6 +20,7 @@ #include "src/litert/kernel_registry.h" #include "include/errorcode.h" #include "nnacl/common_func.h" +#include "nnacl/base/broadcast_to.h" using mindspore::kernel::KERNEL_ARCH; using mindspore::lite::KernelRegistrar; @@ -153,36 +154,58 @@ int WhereCPUKernel::RunWithSingleInput() { } int WhereCPUKernel::RunWithTripleInputs() { - auto condition = in_tensors_.at(0); + TensorC *condition = in_tensors_.at(0)->ConvertToTensorC(); CHECK_NULL_RETURN(condition); - auto x = in_tensors_.at(1); + TensorC *x = in_tensors_.at(1)->ConvertToTensorC(); CHECK_NULL_RETURN(x); - auto y = in_tensors_.at(C2NUM); + TensorC *y = in_tensors_.at(C2NUM)->ConvertToTensorC(); CHECK_NULL_RETURN(y); - int condition_nums = condition->ElementsNum(); - int x_num = x->ElementsNum(); - int y_num = y->ElementsNum(); - int out_num = out_tensors_.front()->ElementsNum(); + TensorC *output = out_tensors_.at(0)->ConvertToTensorC(); + CHECK_NULL_RETURN(output); + int condition_nums = GetElementNum(condition); + int x_num = GetElementNum(x); + int y_num = GetElementNum(y); + int out_num = GetElementNum(output); - condition_ = reinterpret_cast(condition->data()); + condition_ = reinterpret_cast(condition->data_); CHECK_NULL_RETURN(condition_); - x_ = x->data(); + x_ = x->data_; CHECK_NULL_RETURN(x_); - y_ = y->data(); + y_ = y->data_; CHECK_NULL_RETURN(y_); - output_data_ = out_tensors_.at(0)->data(); + output_data_ = output->data_; int num_max = condition_nums > x_num ? condition_nums : (x_num > y_num ? x_num : y_num); where_param_->condition_num_ = condition_nums; where_param_->x_num_ = x_num; where_param_->y_num_ = y_num; where_param_->max_num_ = num_max; - + void *condition_broadcast_buf = nullptr; + void *x_broadcast_buf = nullptr; + void *y_broadcast_buf = nullptr; CHECK_LESS_RETURN(out_num, num_max); if (((condition_nums != 1) && (condition_nums != num_max)) || ((x_num != 1) && (x_num != num_max)) || ((y_num != 1) && (y_num != num_max))) { - MS_LOG(ERROR) << "The length of three inputs are not equal to 1 or length of output, which is unacceptable"; - return RET_ERROR; + if (condition_nums != GetElementNum(y)) { + int ret = + BroadcastForInput(condition, x, y, &condition_broadcast_buf, &x_broadcast_buf, &y_broadcast_buf, output); + if (ret != RET_OK) { + MS_LOG(ERROR) << "BroadcastForInput failed."; + return RET_ERROR; + } + int max_num = GetElementNum(output); + condition_ = reinterpret_cast(condition_broadcast_buf); + x_ = x_broadcast_buf; + y_ = y_broadcast_buf; + output_data_ = output->data_; + where_param_->condition_num_ = max_num; + where_param_->x_num_ = max_num; + where_param_->y_num_ = max_num; + where_param_->max_num_ = max_num; + } else { + MS_LOG(ERROR) << "The length of three inputs are not equal to 1 or length of output, which is unacceptable"; + return RET_ERROR; + } } if (num_max <= 0) { MS_LOG(ERROR) << "Error, inputs' length are zero !!!"; @@ -193,6 +216,9 @@ int WhereCPUKernel::RunWithTripleInputs() { MS_LOG(ERROR) << "WhereDwRun error: error_code[" << ret << "]"; return RET_ERROR; } + ms_context_->allocator->Free(condition_broadcast_buf); + ms_context_->allocator->Free(x_broadcast_buf); + ms_context_->allocator->Free(y_broadcast_buf); return RET_OK; } @@ -214,6 +240,48 @@ int WhereCPUKernel::Run() { return ret; } +int WhereCPUKernel::BroadcastForInput(TensorC *condition, TensorC *x, TensorC *y, void **condition_broadcast_buf, + void **x_broadcast_buf, void **y_broadcast_buf, TensorC *output) { + size_t broad_cast_buf_size = GetSize(output); + BroadcastShapeInfo condition_info; + condition_info.input_shape_size_ = condition->shape_size_; + condition_info.output_shape_size_ = output->shape_size_; + (void)memcpy(condition_info.input_shape_, condition->shape_, condition->shape_size_ * sizeof(int)); + (void)memcpy(condition_info.output_shape_, output->shape_, output->shape_size_ * sizeof(int)); + BroadcastShapeInfo x_info; + x_info.input_shape_size_ = x->shape_size_; + x_info.output_shape_size_ = output->shape_size_; + (void)memcpy(x_info.input_shape_, x->shape_, x->shape_size_ * sizeof(int)); + (void)memcpy(x_info.output_shape_, output->shape_, output->shape_size_ * sizeof(int)); + BroadcastShapeInfo y_info; + y_info.input_shape_size_ = y->shape_size_; + y_info.output_shape_size_ = output->shape_size_; + (void)memcpy(y_info.input_shape_, y->shape_, y->shape_size_ * sizeof(int)); + (void)memcpy(y_info.output_shape_, output->shape_, output->shape_size_ * sizeof(int)); + + *condition_broadcast_buf = ms_context_->allocator->Malloc(broad_cast_buf_size); + CHECK_NULL_RETURN(*condition_broadcast_buf); + BroadcastToSize8(condition->data_, &condition_info, *condition_broadcast_buf); + + *x_broadcast_buf = ms_context_->allocator->Malloc(broad_cast_buf_size); + if (*x_broadcast_buf == nullptr) { + ms_context_->allocator->Free(*condition_broadcast_buf); + MS_LOG(ERROR) << "malloc x_broadcast_buf error"; + return RET_ERROR; + } + BroadcastToSize32(x->data_, &x_info, *x_broadcast_buf); + + *y_broadcast_buf = ms_context_->allocator->Malloc(broad_cast_buf_size); + if (*y_broadcast_buf == nullptr) { + ms_context_->allocator->Free(*condition_broadcast_buf); + ms_context_->allocator->Free(*x_broadcast_buf); + MS_LOG(ERROR) << "malloc y_broadcast_buf error"; + return RET_ERROR; + } + BroadcastToSize32(y->data_, &y_info, *y_broadcast_buf); + return RET_OK; +} + REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Where, LiteKernelCreator) REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Where, LiteKernelCreator) REG_KERNEL(kCPU, kNumberTypeBool, PrimitiveType_Where, LiteKernelCreator) diff --git a/mindspore/lite/src/litert/kernel/cpu/fp32/where_fp32.h b/mindspore/lite/src/litert/kernel/cpu/fp32/where_fp32.h index 0d785732..ae6e3eba 100644 --- a/mindspore/lite/src/litert/kernel/cpu/fp32/where_fp32.h +++ b/mindspore/lite/src/litert/kernel/cpu/fp32/where_fp32.h @@ -51,6 +51,8 @@ class WhereCPUKernel : public LiteKernel { private: int RunWithSingleInput(); int RunWithTripleInputs(); + int BroadcastForInput(TensorC *condition, TensorC *x, TensorC *y, void **condition_broadcast_buf, + void **x_broadcast_buf, void **y_broadcast_buf, TensorC *output); }; } // namespace mindspore::kernel #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_CPU_FP32_WHERE_FP32_H_ diff --git a/mindspore/lite/tools/optimizer/fusion/glu_fusion.h b/mindspore/lite/tools/optimizer/fusion/glu_fusion.h index 5e6a7e79..513a49d9 100644 --- a/mindspore/lite/tools/optimizer/fusion/glu_fusion.h +++ b/mindspore/lite/tools/optimizer/fusion/glu_fusion.h @@ -1,5 +1,5 @@ /** - * Copyright 2021 Huawei Technologies Co., Ltd + * Copyright 2021~2024 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,7 +26,7 @@ namespace mindspore { namespace opt { class GLUFusion : public LitePatternProcessPass { public: - explicit GLUFusion(const std::string &name = "glu_fusion", bool multigraph = true) + explicit GLUFusion(const std::string &name = "GLUFusion", bool multigraph = true) : LitePatternProcessPass(name, multigraph) {} ~GLUFusion() override = default; -- 2.25.1