1 /**
2 * Copyright 2021 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include <cmath>
17 #include <cstring>
18 #include <memory>
19 #include "schema/inner/model_generated.h"
20 #include "common/common_test.h"
21 #include "include/api/context.h"
22 #include "include/api/model.h"
23 #include "include/errorcode.h"
24 #include "src/common/log_adapter.h"
25 #include "src/litert/lite_session.h"
26 #include "include/registry/register_kernel_interface.h"
27 #include "include/registry/register_kernel.h"
28 #include "include/registry/opencl_runtime_wrapper.h"
29 #include "include/api/data_type.h"
30
31 using mindspore::kernel::Kernel;
32 using mindspore::kernel::KernelInterface;
33 using mindspore::schema::PrimitiveType_AddFusion;
34 #define UP_ROUND(x, y) (((x) + (y) - (1)) / (y) * (y))
35 #define UP_DIV(x, y) (((x) + (y) - (1)) / (y))
36
37 namespace mindspore {
38 namespace {
39 constexpr int kDimIndex2 = 2;
40 constexpr int kDimIndex3 = 3;
41 constexpr int kDimIndex4 = 4;
42 constexpr size_t kDimSize2 = 2;
43 constexpr auto kFloat32 = DataType::kNumberTypeFloat32;
44 static const char *arithmetic_source =
45 "\n"
46 "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
47 "__constant sampler_t smp_none = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;\n"
48 "\n"
49 "__kernel void ElementAdd(__read_only image2d_t input_a, __read_only image2d_t input_b, __write_only image2d_t "
50 "output,\n"
51 " const int2 output_shape) {\n"
52 " int X = get_global_id(0);\n"
53 " int Y = get_global_id(1);\n"
54 " if (X >= output_shape.x || Y >= output_shape.y) {\n"
55 " return;\n"
56 " }\n"
57 "\n"
58 " FLT4 a = READ_IMAGE(input_a, smp_none, (int2)(X, Y));\n"
59 " FLT4 b = READ_IMAGE(input_b, smp_none, (int2)(X, Y));\n"
60 " FLT4 result = a + b;\n"
61 "\n"
62 " WRITE_IMAGE(output, (int2)(X, Y), result);\n"
63 "}\n";
64
65 template <typename SrcT, typename DstT>
Broadcast2GpuShape(DstT * dst,const SrcT * src,int src_num)66 void Broadcast2GpuShape(DstT *dst, const SrcT *src, int src_num) {
67 if (src == nullptr || src_num <= 0) {
68 return;
69 }
70 auto *N = dst;
71 auto *H = dst + 1;
72 auto *W = dst + kDimIndex2;
73 auto *C = dst + kDimIndex3;
74 if (src_num == 1) { // 1 1 1 C
75 *C = src[0];
76 } else if (src_num == kDimIndex2) { // N 1 1 C
77 *N = src[0];
78 *C = src[1];
79 } else if (src_num == kDimIndex3) { // N 1 W C
80 *N = src[0];
81 *W = src[1];
82 *C = src[2];
83 } else if (src_num == kDimIndex4) { // N H W C
84 *N = src[0];
85 *H = src[1];
86 *W = src[2];
87 *C = src[3];
88 } else if (src_num > 4) {
89 std::cerr << "GPU doesn't support ndim>=" << src_num;
90 }
91 }
92
93 template <typename SrcT, typename DstT>
Broadcast2GpuShape(DstT * dst,const SrcT * src,int src_num,DstT default_value)94 void Broadcast2GpuShape(DstT *dst, const SrcT *src, int src_num, DstT default_value) {
95 for (int i = 0; i < 4; ++i) {
96 dst[i] = default_value;
97 }
98 if (src == nullptr || src_num <= 0) {
99 return;
100 }
101 Broadcast2GpuShape(dst, src, src_num);
102 }
103
104 struct GpuTensorInfo {
105 GpuTensorInfo() = default;
GpuTensorInfomindspore::__anon6e20fdab0111::GpuTensorInfo106 explicit GpuTensorInfo(const MSTensor *tensor, registry::opencl::OpenCLRuntimeWrapper *opencl_run) {
107 if (tensor == nullptr) {
108 return;
109 }
110 auto shape_ori = tensor->Shape();
111 int64_t shape[kDimIndex4];
112 Broadcast2GpuShape(shape, shape_ori.data(), shape_ori.size(), 1l);
113 N = shape[0];
114 H = shape[1];
115 W = shape[kDimIndex2];
116 C = shape[kDimIndex3];
117 Slice = UP_DIV(C, C4NUM);
118 if (tensor->DataType() == mindspore::DataType::kNumberTypeFloat16) {
119 FLT_size = sizeof(cl_half);
120 } else {
121 FLT_size = sizeof(cl_float);
122 }
123 FLT4_size = FLT_size * 4;
124 if (W * Slice <= opencl_run->GetMaxImage2DWidth()) {
125 height = N * H;
126 width = W * Slice;
127 } else {
128 height = N * H * W;
129 width = Slice;
130 if (height > opencl_run->GetMaxImage2DHeight()) {
131 height = -1;
132 width = -1;
133 }
134 }
135
136 ElementsNum = N * H * W * C;
137 Image2DSize = height * width * FLT4_size;
138 }
139 size_t N{1};
140 size_t H{1};
141 size_t W{1};
142 size_t C{1};
143 size_t Slice{};
144 size_t width{};
145 size_t height{};
146 size_t FLT_size{4};
147 size_t FLT4_size{16};
148 size_t ElementsNum{};
149 size_t Image2DSize{};
150 };
151 } // namespace
152
153 class CustomAddKernel : public kernel::Kernel {
154 public:
CustomAddKernel(const std::vector<MSTensor> & inputs,const std::vector<MSTensor> & outputs,const schema::Primitive * primitive,const mindspore::Context * ctx,bool fp16_enable)155 CustomAddKernel(const std::vector<MSTensor> &inputs, const std::vector<MSTensor> &outputs,
156 const schema::Primitive *primitive, const mindspore::Context *ctx, bool fp16_enable)
157 : Kernel(inputs, outputs, primitive, ctx), fp16_enable_(fp16_enable) {}
~CustomAddKernel()158 ~CustomAddKernel() override { FreeWeight(); }
159
CheckInputsDataTypes()160 int CheckInputsDataTypes() { return kSuccess; }
161
162 // Prepare will be called during graph compilation
Prepare()163 int Prepare() override {
164 auto ret = CheckSpecs();
165 if (ret != kSuccess) {
166 std::cerr << "Prepare failed for check kernel specs!";
167 return ret;
168 }
169 const std::string kernel_name_ = "ElementAdd";
170 const std::string program_name = "Arithmetic";
171 std::string source = arithmetic_source;
172 if (opencl_runtime_.LoadSource(program_name, source) != kSuccess) {
173 std::cerr << "Load source failed.";
174 return kLiteError;
175 }
176 std::vector<std::string> build_options_ext = {"-cl-mad-enable -cl-fast-relaxed-math -Werror"};
177 if (fp16_enable_) {
178 build_options_ext.push_back(" -DFLT4=half4 -DWRITE_IMAGE=write_imageh -DREAD_IMAGE=read_imageh");
179 } else {
180 build_options_ext.push_back(" -DFLT4=float4 -DWRITE_IMAGE=write_imagef -DREAD_IMAGE=read_imagef");
181 }
182
183 if (opencl_runtime_.BuildKernel(&kernel_, program_name, kernel_name_, build_options_ext) != kSuccess) {
184 std::cerr << "Build kernel failed.";
185 return kLiteError;
186 }
187
188 auto out_shape = GpuTensorInfo(&outputs_[0], &opencl_runtime_);
189 local_range_ = cl::NullRange;
190 global_range_ = cl::NDRange(out_shape.width, out_shape.height);
191 for (int i = 0; i < inputs_.size(); ++i) {
192 auto &in_tensor = inputs_.at(i);
193 if (in_tensor.IsConst()) {
194 GpuTensorInfo in_shape = GpuTensorInfo(&in_tensor, &opencl_runtime_);
195 std::vector<char> weight(in_shape.Image2DSize, 0);
196 bool src_is_fp16 = in_tensor.DataType() == mindspore::DataType::kNumberTypeFloat16;
197 PackNHWCToNHWC4(in_tensor.MutableData(), weight.data(), src_is_fp16, fp16_enable_, in_shape,
198 in_tensor.DataType());
199 DataType dtype =
200 fp16_enable_ ? mindspore::DataType::kNumberTypeFloat16 : mindspore::DataType::kNumberTypeFloat32;
201 auto allocator = opencl_runtime_.GetAllocator();
202 if (allocator == nullptr) {
203 std::cerr << "GetAllocator fail.";
204 FreeWeight();
205 return kLiteError;
206 }
207 auto weight_ptr = allocator->Malloc(in_shape.width, in_shape.height, dtype);
208 if (weight_ptr == nullptr) {
209 std::cerr << "Malloc fail.";
210 FreeWeight();
211 return kLiteError;
212 }
213 weight_ptrs_.push_back(weight_ptr);
214 if (opencl_runtime_.WriteImage(weight_ptr, weight.data()) != kSuccess) {
215 std::cerr << "WriteImage fail.";
216 FreeWeight();
217 return kLiteError;
218 }
219 } else {
220 weight_ptrs_.push_back(nullptr);
221 }
222 }
223
224 int arg_idx = 3;
225 cl_int2 output_shape{static_cast<int>(global_range_[0]), static_cast<int>(global_range_[1])};
226 if (opencl_runtime_.SetKernelArg(kernel_, arg_idx, output_shape) != kSuccess) {
227 std::cerr << "Set kernel arg" << arg_idx << "failed.";
228 FreeWeight();
229 return kLiteError;
230 }
231
232 std::cout << kernel_name_ << " Prepare Done!" << std::endl;
233 return kSuccess;
234 }
235
236 // Execute is called to compute.
Execute()237 int Execute() override {
238 if (inputs_.size() != kDimSize2) {
239 return kLiteParamInvalid;
240 }
241 PreProcess();
242 std::cout << this->name() << " Running!" << std::endl;
243 auto input_0_ptr = weight_ptrs_[0] == nullptr ? inputs_[0].MutableData() : weight_ptrs_[0];
244 auto input_1_ptr = weight_ptrs_[1] == nullptr ? inputs_[1].MutableData() : weight_ptrs_[1];
245 int arg_idx = 0;
246 if (opencl_runtime_.SetKernelArg(kernel_, arg_idx++, input_0_ptr) != kSuccess) {
247 std::cerr << "Set kernel arg" << arg_idx - 1 << "failed.";
248 return kLiteError;
249 }
250 if (opencl_runtime_.SetKernelArg(kernel_, arg_idx++, input_1_ptr) != kSuccess) {
251 std::cerr << "Set kernel arg" << arg_idx - 1 << "failed.";
252 return kLiteError;
253 }
254 if (opencl_runtime_.SetKernelArg(kernel_, arg_idx++, outputs_[0].MutableData()) != kSuccess) {
255 std::cerr << "Set kernel arg" << arg_idx - 1 << "failed.";
256 return kLiteError;
257 }
258 if (opencl_runtime_.RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != kSuccess) {
259 std::cerr << "Run kernel failed.";
260 return kLiteError;
261 }
262
263 return kSuccess;
264 }
265
CheckSpecs()266 int CheckSpecs() {
267 for (auto &tensor : inputs_) {
268 if (tensor.DataType() != DataType::kNumberTypeFloat32 && tensor.DataType() != DataType::kNumberTypeFloat16) {
269 std::cerr << "ArithmeticOpenCLKernel only support fp32/fp16 input";
270 return kLiteError;
271 }
272 }
273 for (auto &tensor : outputs_) {
274 if (tensor.DataType() != DataType::kNumberTypeFloat32 && tensor.DataType() != DataType::kNumberTypeFloat16) {
275 std::cerr << "ArithmeticOpenCLKernel only support fp32/fp16 output";
276 return kLiteError;
277 }
278 }
279
280 if (inputs_.size() != kDimSize2 || outputs_.size() != 1) {
281 std::cerr << "in size: " << inputs_.size() << ", out size: " << outputs_.size();
282 return kLiteError;
283 }
284
285 for (int i = 0; i < inputs_.size(); ++i) {
286 auto &in_tensor = inputs_.at(i);
287 if (!in_tensor.IsConst()) {
288 if (fp16_enable_ && in_tensor.DataType() == mindspore::DataType::kNumberTypeFloat32) {
289 std::cerr << "Inputs data type error, expectation kNumberTypeFloat16 but kNumberTypeFloat32.";
290 return kLiteError;
291 } else if (!fp16_enable_ && in_tensor.DataType() == mindspore::DataType::kNumberTypeFloat16) {
292 std::cerr << "Inputs data type error, expectation kNumberTypeFloat32 but kNumberTypeFloat16.";
293 return kLiteError;
294 }
295 }
296 }
297
298 return kSuccess;
299 }
300
301 // Resize is used to update some parameters if current node can change along with inputs.
ReSize()302 int ReSize() override {
303 if (CheckOutputs(outputs_) == kSuccess) {
304 return kSuccess;
305 }
306 auto status =
307 registry::RegisterKernelInterface::GetKernelInterface("", primitive_)->Infer(&inputs_, &outputs_, primitive_);
308 if (status != kSuccess) {
309 std::cerr << "infer failed." << std::endl;
310 return kLiteError;
311 }
312 auto ret = Prepare();
313 if (ret != kSuccess) {
314 std::cerr << "ReSize failed for kernel prepare!";
315 return ret;
316 }
317 return kSuccess;
318 }
319
320 private:
321 const bool fp16_enable_;
322 cl::Kernel kernel_;
323 cl::Event event_;
324 cl::NDRange global_range_{cl::NullRange};
325 cl::NDRange local_range_{cl::NullRange};
326 std::vector<void *> weight_ptrs_;
327 registry::opencl::OpenCLRuntimeWrapper opencl_runtime_;
328
PreProcess()329 int PreProcess() {
330 int ret;
331 ret = ReSize();
332 if (ret != kSuccess) {
333 return ret;
334 }
335 for (auto i = 0; i < outputs_.size(); ++i) {
336 auto *output = &outputs_.at(i);
337 auto img_info = GpuTensorInfo(output, &opencl_runtime_);
338 auto allocator = output->allocator();
339 if (allocator == nullptr) {
340 std::cerr << "The output tensor of OpenCL kernel must have an allocator.";
341 return kLiteError;
342 }
343 auto data_ptr = allocator->Malloc(img_info.width, img_info.height, output->DataType());
344 if (data_ptr == nullptr) {
345 std::cerr << "Malloc data failed";
346 return kLiteError;
347 }
348 output->SetData(data_ptr);
349 }
350 return kSuccess;
351 }
352
CheckOutputs(const std::vector<mindspore::MSTensor> & outputs)353 int CheckOutputs(const std::vector<mindspore::MSTensor> &outputs) {
354 for (auto &output : outputs) {
355 auto output_shape = output.Shape();
356 if (std::find(output_shape.begin(), output_shape.end(), -1) != output_shape.end()) {
357 return kLiteInferInvalid;
358 }
359 }
360 return kSuccess;
361 }
362
PackNHWCToNHWC4(void * src,void * dst,bool src_is_fp16,bool dst_is_fp16,const GpuTensorInfo & tensor,mindspore::DataType data_type)363 void PackNHWCToNHWC4(void *src, void *dst, bool src_is_fp16, bool dst_is_fp16, const GpuTensorInfo &tensor,
364 mindspore::DataType data_type) {
365 auto src_fp16 = static_cast<float16_t *>(src);
366 auto src_fp32 = static_cast<float32_t *>(src);
367 auto src_int32 = static_cast<int32_t *>(src);
368 auto dst_fp16 = static_cast<float16_t *>(dst);
369 auto dst_fp32 = static_cast<float32_t *>(dst);
370 auto dst_int32 = static_cast<int32_t *>(dst);
371 for (int n = 0, src_idx = 0; n < tensor.N; n++) {
372 for (int h = 0; h < tensor.H; ++h) {
373 for (int w = 0; w < tensor.W; ++w) {
374 for (int c = 0; c < tensor.C; ++c, ++src_idx) {
375 int dst_idx = ((n * tensor.H + h) * tensor.W + w) * tensor.Slice * C4NUM + c;
376 if (data_type == mindspore::DataType::kNumberTypeInt32) {
377 dst_int32[dst_idx] = src_int32[src_idx];
378 } else if (dst_is_fp16) {
379 dst_fp16[dst_idx] = src_is_fp16 ? src_fp16[src_idx] : static_cast<float16_t>(src_fp32[src_idx]);
380 } else {
381 dst_fp32[dst_idx] = src_is_fp16 ? static_cast<float32_t>(src_fp16[src_idx]) : src_fp32[src_idx];
382 }
383 }
384 }
385 }
386 }
387 // scalar
388 if (tensor.ElementsNum == 1) {
389 if (dst_is_fp16) {
390 dst_fp16[kDimIndex3] = dst_fp16[kDimIndex2] = dst_fp16[1] = dst_fp16[0];
391 } else {
392 dst_fp32[kDimIndex3] = dst_fp32[kDimIndex2] = dst_fp32[1] = dst_fp32[0];
393 }
394 }
395 }
396
FreeWeight()397 void FreeWeight() {
398 auto allocator = opencl_runtime_.GetAllocator();
399 if (allocator == nullptr) {
400 std::cerr << "GetAllocator fail.";
401 return;
402 }
403 for (auto &weight_ptr : weight_ptrs_) {
404 if (weight_ptr != nullptr) {
405 allocator->Free(weight_ptr);
406 weight_ptr = nullptr;
407 }
408 }
409 }
410 };
411
412 class CustomAddInfer : public kernel::KernelInterface {
413 public:
414 CustomAddInfer() = default;
415 ~CustomAddInfer() = default;
416
Infer(std::vector<mindspore::MSTensor> * inputs,std::vector<mindspore::MSTensor> * outputs,const schema::Primitive * primitive)417 Status Infer(std::vector<mindspore::MSTensor> *inputs, std::vector<mindspore::MSTensor> *outputs,
418 const schema::Primitive *primitive) override {
419 (*outputs)[0].SetFormat((*inputs)[0].format());
420 (*outputs)[0].SetDataType((*inputs)[0].DataType());
421 (*outputs)[0].SetShape((*inputs)[0].Shape());
422 return kSuccess;
423 }
424 };
425
426 namespace {
CustomAddCreator(const std::vector<MSTensor> & inputs,const std::vector<MSTensor> & outputs,const schema::Primitive * primitive,const mindspore::Context * ctx)427 std::shared_ptr<kernel::Kernel> CustomAddCreator(const std::vector<MSTensor> &inputs,
428 const std::vector<MSTensor> &outputs,
429 const schema::Primitive *primitive, const mindspore::Context *ctx) {
430 bool fp16_enable = false;
431
432 std::cout << "using fp32 add.\n" << std::endl;
433 return std::make_shared<CustomAddKernel>(inputs, outputs, primitive, ctx, fp16_enable);
434 }
435
CustomAddInferCreator()436 std::shared_ptr<kernel::KernelInterface> CustomAddInferCreator() { return std::make_shared<CustomAddInfer>(); }
437 } // namespace
438
439 REGISTER_CUSTOM_KERNEL_INTERFACE(BuiltInTest, Custom_Add, CustomAddInferCreator)
440 // Register custom “Custom_Add” operator
441 REGISTER_CUSTOM_KERNEL(GPU, BuiltInTest, kFloat32, Custom_Add, CustomAddCreator)
442
443 class TestGPURegistryCustomOp : public mindspore::CommonTest {
444 public:
445 TestGPURegistryCustomOp() = default;
446 };
447
TEST_F(TestGPURegistryCustomOp,TestGPUCustomAdd)448 TEST_F(TestGPURegistryCustomOp, TestGPUCustomAdd) {
449 auto meta_graph = std::make_shared<schema::MetaGraphT>();
450 meta_graph->name = "graph";
451
452 auto node = std::make_unique<schema::CNodeT>();
453 node->inputIndex = {0, 1};
454 node->outputIndex = {2};
455 node->primitive = std::make_unique<schema::PrimitiveT>();
456 node->primitive->value.type = schema::PrimitiveType_Custom;
457 auto primitive = new schema::CustomT;
458 primitive->type = "Custom_Add";
459 node->primitive->value.value = primitive;
460 node->name = "Add";
461 meta_graph->nodes.emplace_back(std::move(node));
462 meta_graph->inputIndex = {0, 1};
463 meta_graph->outputIndex = {2};
464
465 auto input0 = std::make_unique<schema::TensorT>();
466 input0->nodeType = lite::NodeType_Parameter;
467 input0->format = schema::Format_NHWC;
468 input0->dataType = TypeId::kNumberTypeFloat32;
469 input0->dims = {1, 28, 28, 3};
470 input0->offset = -1;
471 meta_graph->allTensors.emplace_back(std::move(input0));
472
473 auto weight = std::make_unique<schema::TensorT>();
474 weight->nodeType = lite::NodeType_ValueNode;
475 weight->format = schema::Format_NHWC;
476 weight->dataType = TypeId::kNumberTypeFloat32;
477 weight->dims = {1, 28, 28, 3};
478
479 weight->offset = -1;
480 meta_graph->allTensors.emplace_back(std::move(weight));
481
482 auto output = std::make_unique<schema::TensorT>();
483 output->nodeType = lite::NodeType_Parameter;
484 output->format = schema::Format_NHWC;
485 output->dataType = TypeId::kNumberTypeFloat32;
486 output->offset = -1;
487 meta_graph->allTensors.emplace_back(std::move(output));
488
489 flatbuffers::FlatBufferBuilder builder(1024);
490 auto offset = schema::MetaGraph::Pack(builder, meta_graph.get());
491 builder.Finish(offset);
492 schema::FinishMetaGraphBuffer(builder, offset);
493 size_t size = builder.GetSize();
494 const char *content = reinterpret_cast<char *>(builder.GetBufferPointer());
495
496 // create a context
497 auto context = std::make_shared<mindspore::Context>();
498 context->SetThreadNum(1);
499 context->SetEnableParallel(false);
500 context->SetThreadAffinity(lite::HIGHER_CPU);
501 auto &device_list = context->MutableDeviceInfo();
502
503 std::shared_ptr<CPUDeviceInfo> device_info = std::make_shared<CPUDeviceInfo>();
504 device_info->SetEnableFP16(false);
505 device_list.push_back(device_info);
506
507 std::shared_ptr<GPUDeviceInfo> provider_gpu_device_info = std::make_shared<GPUDeviceInfo>();
508 provider_gpu_device_info->SetEnableFP16(false);
509 provider_gpu_device_info->SetProviderDevice("GPU");
510 provider_gpu_device_info->SetProvider("BuiltInTest");
511 device_list.push_back(provider_gpu_device_info);
512
513 // build a model
514 auto model = std::make_shared<mindspore::Model>();
515 auto ret = model->Build(content, size, kMindIR_Lite, context);
516 ASSERT_EQ(kSuccess, ret.StatusCode());
517 auto inputs = model->GetInputs();
518 ASSERT_EQ(inputs.size(), 2);
519 auto inTensor = inputs.front();
520 auto impl = inTensor.impl();
521 ASSERT_NE(nullptr, impl);
522 float *in0_data = static_cast<float *>(inTensor.MutableData());
523 in0_data[0] = 10.0f;
524 auto inTensor1 = inputs.back();
525 impl = inTensor1.impl();
526 ASSERT_NE(nullptr, impl);
527 float *in1_data = static_cast<float *>(inTensor1.MutableData());
528 in1_data[0] = 20.0f;
529 std::vector<mindspore::MSTensor> outputs;
530 ret = model->Predict(inputs, &outputs);
531 ASSERT_EQ(kSuccess, ret.StatusCode());
532 ASSERT_EQ(outputs.size(), 1);
533 impl = outputs.front().impl();
534 ASSERT_NE(nullptr, impl);
535 ASSERT_EQ(28 * 28 * 3, outputs.front().ElementNum());
536 ASSERT_EQ(DataType::kNumberTypeFloat32, outputs.front().DataType());
537 auto *outData = reinterpret_cast<const float *>(outputs.front().Data().get());
538 ASSERT_NE(nullptr, outData);
539 ASSERT_EQ(30.0f, outData[0]);
540 MS_LOG(INFO) << "Register add op test pass.";
541 }
542 } // namespace mindspore
543