1 /*
2 * Copyright (c) 2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "executor.h"
17
18 #include "securec.h"
19
20 #include "common/utils.h"
21 #include "common/scoped_trace.h"
22
23
24 namespace OHOS {
25 namespace NeuralNetworkRuntime {
Executor(const Compilation * compilation)26 Executor::Executor(const Compilation* compilation)
27 : m_modelInputs(compilation->GetInputTensors()),
28 m_modelOutputs(compilation->GetOutputTensors()),
29 m_executionPlan(compilation->GetExecutionPlan()) {}
30
BuildInputTensor(uint32_t index,const OH_NN_Tensor & nnTensor,std::shared_ptr<NNTensor> inputTensor) const31 OH_NN_ReturnCode Executor::BuildInputTensor(uint32_t index, const OH_NN_Tensor& nnTensor,
32 std::shared_ptr<NNTensor> inputTensor) const
33 {
34 // Note: inputs have only shapes info.
35 if (index >= m_modelInputs.size()) {
36 LOGE("BuildInputTensor failed, input index is out of range.");
37 return OH_NN_INVALID_PARAMETER;
38 }
39
40 // Build a tensor from nnTensor.
41 auto ret = inputTensor->BuildFromOHNNTensor(nnTensor);
42 if (ret != OH_NN_SUCCESS) {
43 LOGE("BuildInputTensor failed, please check input nnTensor.");
44 return ret;
45 }
46
47 if (inputTensor->IsDynamicShape()) {
48 LOGE("BuildInputTensor failed, input nnTensor should has certain dimensions which cannot contain -1.");
49 return OH_NN_INVALID_PARAMETER;
50 }
51
52 if (!m_modelInputs[index]->CompareAttribute(*inputTensor)) {
53 LOGE("BuildInputTensor failed, input has different attributes from the one in the constructed model.");
54 return OH_NN_INVALID_PARAMETER;
55 }
56
57 inputTensor->SetName(m_modelInputs[index]->GetName());
58 return OH_NN_SUCCESS;
59 }
60
61
SetInputTensorWithCurrentBuffer(uint32_t index,std::shared_ptr<NNTensor> inputTensor,const void * buffer,size_t dataLength,size_t curBufferLength)62 OH_NN_ReturnCode Executor::SetInputTensorWithCurrentBuffer(uint32_t index,
63 std::shared_ptr<NNTensor> inputTensor,
64 const void* buffer,
65 size_t dataLength,
66 size_t curBufferLength)
67 {
68 void* curBuffer = m_inputTensors[index].tensor->GetBuffer();
69 errno_t status = memcpy_s(curBuffer, dataLength, buffer, dataLength);
70 // Current buffer inside m_inputTensors is managed by executor, no need to release if memcpy failed.
71 if (status != EOK) {
72 LOGE("SetInputTensorWithCurrentBuffe failed, copy data from user buffer to device buffer failed. "
73 "Error code: %d.", status);
74 return OH_NN_MEMORY_ERROR;
75 }
76
77 // Set the new tensor with the buffer of current tensor
78 inputTensor->SetBuffer(curBuffer, curBufferLength);
79
80 // The memory is reused here. Thus, current tensor's buffer must set to nullptr, in case the memory is released
81 // twice.
82 m_inputTensors[index].tensor->SetBuffer(nullptr, 0);
83
84 // Set to the new tensor, and release current one.
85 m_inputTensors[index].tensor = inputTensor;
86 return OH_NN_SUCCESS;
87 }
88
89
SetInputTensorWithNewBuffer(uint32_t index,std::shared_ptr<NNTensor> inputTensor,const void * inputBuffer,size_t length,bool isInnerMem)90 void Executor::SetInputTensorWithNewBuffer(uint32_t index,
91 std::shared_ptr<NNTensor> inputTensor,
92 const void* inputBuffer,
93 size_t length,
94 bool isInnerMem)
95 {
96 // Release the memory inside the tensor first, if it is allocated by Executor during SetInput().
97 if (m_inputTensors.find(index) != m_inputTensors.end()) {
98 if (m_inputTensors[index].isInnerMem) {
99 void* curBuffer = m_inputTensors[index].tensor->GetBuffer();
100 std::shared_ptr<Device> inputDevice = m_executionPlan->GetInputDevice();
101 inputDevice->ReleaseBuffer(curBuffer);
102 }
103 // Set current tensor's buffer to nullptr in case the NNTensor release the driver memory in destruction.
104 m_inputTensors[index].tensor->SetBuffer(nullptr, 0);
105 }
106
107 // Set new input tensor data buffer
108 inputTensor->SetBuffer(inputBuffer, length);
109
110 // Create or update the input tensor
111 ExeTensor exeTensor{inputTensor, nullptr, 0, isInnerMem};
112 m_inputTensors[index] = exeTensor;
113 }
114
115
SetInput(uint32_t index,const OH_NN_Tensor & nnTensor,const void * buffer,size_t length)116 OH_NN_ReturnCode Executor::SetInput(uint32_t index, const OH_NN_Tensor& nnTensor, const void* buffer, size_t length)
117 {
118 std::shared_ptr<NNTensor> inputTensor = CreateSharedPtr<NNTensor>();
119 if (inputTensor == nullptr) {
120 LOGE("SetInput failed, error happened when creating NNTensor.");
121 return OH_NN_MEMORY_ERROR;
122 }
123
124 auto ret = BuildInputTensor(index, nnTensor, inputTensor);
125 if (ret != OH_NN_SUCCESS) {
126 LOGE("SetInput failed, please check input index or nnTensor.");
127 return ret;
128 }
129
130 // dataLength will be larger than 0 after BuildInputTensor()
131 size_t dataLength = inputTensor->GetDataLength();
132 if (length == 0 || length < dataLength) {
133 LOGE("SetInput failed, the given buffer length is too small to store the input nnTensor data.");
134 return OH_NN_INVALID_PARAMETER;
135 }
136
137 // Get length of current buffer if it is allocate by SetInput() before.
138 size_t curBufferLength = 0;
139 if ((m_inputTensors.find(index) != m_inputTensors.end()) && (m_inputTensors[index].isInnerMem)) {
140 curBufferLength = m_inputTensors[index].tensor->GetBufferLength();
141 }
142
143 // (dataLength <= curBufferLength) returns true if and only if current buffer is allocated by SetInput() before
144 // and is larger than user buffer.
145 if (dataLength <= curBufferLength) {
146 ret = SetInputTensorWithCurrentBuffer(index, inputTensor, buffer, dataLength, curBufferLength);
147 if (ret != OH_NN_SUCCESS) {
148 LOGE("SetInput failed, error happened when setting input with current buffer.");
149 return ret;
150 }
151 m_isRun = false;
152 return OH_NN_SUCCESS;
153 }
154
155 /**
156 * Buffer needs to allocated or reallocated if:
157 *
158 * - Current buffer is not enough.
159 * - SetInput() has not been called for the input before.
160 * - The buffer held in m_inputTensors is allocated and set by CreateInputMemory() and SetInputFromMemory().
161 */
162 std::shared_ptr<Device> inputDevice = m_executionPlan->GetInputDevice();
163 void* inputBuffer = inputDevice->AllocateBuffer(length);
164 if (inputBuffer == nullptr) {
165 LOGE("SetInput failed, error happened when allocating input device buffer.");
166 return OH_NN_MEMORY_ERROR;
167 }
168
169 errno_t status = memcpy_s(inputBuffer, dataLength, buffer, dataLength);
170 if (status != EOK) {
171 LOGE("SetInput failed, copy data from user buffer failed. Error code: %d.", status);
172 inputDevice->ReleaseBuffer(inputBuffer);
173 return OH_NN_MEMORY_ERROR;
174 }
175
176 SetInputTensorWithNewBuffer(index, inputTensor, inputBuffer, length, true);
177 m_isRun = false;
178 return OH_NN_SUCCESS;
179 }
180
181
SetInputFromMemory(uint32_t index,const OH_NN_Tensor & nnTensor,const OH_NN_Memory & memory)182 OH_NN_ReturnCode Executor::SetInputFromMemory(uint32_t index, const OH_NN_Tensor& nnTensor, const OH_NN_Memory& memory)
183 {
184 // Build a input tensor
185 std::shared_ptr<NNTensor> inputTensor = CreateSharedPtr<NNTensor>();
186 if (inputTensor == nullptr) {
187 LOGE("SetInputFromMemory failed, error happened when creating NNTensor.");
188 return OH_NN_MEMORY_ERROR;
189 }
190
191 auto ret = BuildInputTensor(index, nnTensor, inputTensor);
192 if (ret != OH_NN_SUCCESS) {
193 LOGE("SetInputFromMemory failed, please check input index or nnTensor");
194 return ret;
195 }
196
197 // check data length
198 size_t dataLength = inputTensor->GetDataLength();
199 if (memory.length == 0 || memory.length < dataLength) {
200 LOGE("SetInputFromMemory failed,"
201 " the length in the given memory is too small to store the input nnTensor data.");
202 return OH_NN_INVALID_PARAMETER;
203 }
204
205 SetInputTensorWithNewBuffer(index, inputTensor, const_cast<const void*>(memory.data), memory.length, false);
206 m_isRun = false;
207 return OH_NN_SUCCESS;
208 }
209
210
SetOutput(uint32_t index,void * buffer,size_t length)211 OH_NN_ReturnCode Executor::SetOutput(uint32_t index, void* buffer, size_t length)
212 {
213 if (index >= m_modelOutputs.size()) {
214 LOGE("SetOutput failed, output index is out of range.");
215 return OH_NN_INVALID_PARAMETER;
216 }
217
218 size_t dataLength = m_modelOutputs[index]->GetDataLength();
219 if (length == 0 || length < dataLength) {
220 LOGE("SetOutput failed, the given buffer length is too small to store the output tensor data.");
221 return OH_NN_INVALID_PARAMETER;
222 }
223
224 // If output tensor does not exist, or inner device buffer size is not enough,
225 // or device buffer is set by SetOutputFromMemory() before,
226 // allocate a new device buffer and set it to output tensor, and update the user buffer.
227 std::shared_ptr<Device> outputDevice = m_executionPlan->GetOutputDevice();
228 if (m_outputTensors.find(index) != m_outputTensors.end()) {
229 if (m_outputTensors[index].isInnerMem) {
230 size_t curBufferLength = m_outputTensors[index].tensor->GetBufferLength();
231 if (length <= curBufferLength) {
232 // If current device buffer size is enough, only update the user buffer.
233 m_outputTensors[index].userBuffer = buffer;
234 m_outputTensors[index].userBufferLength = length;
235 m_isRun = false;
236 return OH_NN_SUCCESS;
237 } else {
238 // If current device buffer size is not enough,
239 // release current device buffer and then allocate a new one below.
240 void* curBuffer = m_outputTensors[index].tensor->GetBuffer();
241 outputDevice->ReleaseBuffer(curBuffer);
242 }
243 }
244 } else {
245 // If output tensor does not exist, create a new null output tensor.
246 ExeTensor exeTensor;
247 m_outputTensors[index] = exeTensor;
248 m_outputTensors[index].tensor = m_modelOutputs[index];
249 }
250
251 void* deviceOutputBuffer = outputDevice->AllocateBuffer(length);
252 if (deviceOutputBuffer == nullptr) {
253 LOGE("SetOutput failed, allocating output device buffer failed.");
254 return OH_NN_MEMORY_ERROR;
255 }
256
257 m_outputTensors[index].tensor->SetBuffer(deviceOutputBuffer, length);
258 m_outputTensors[index].userBuffer = buffer;
259 m_outputTensors[index].userBufferLength = length;
260 m_outputTensors[index].isInnerMem = true;
261 m_isRun = false;
262 return OH_NN_SUCCESS;
263 }
264
265
SetOutputFromMemory(uint32_t index,const OH_NN_Memory & memory)266 OH_NN_ReturnCode Executor::SetOutputFromMemory(uint32_t index, const OH_NN_Memory& memory)
267 {
268 if (index >= m_modelOutputs.size()) {
269 LOGE("SetOutputFromMemory failed, output index is out of range.");
270 return OH_NN_INVALID_PARAMETER;
271 }
272
273 size_t dataLength = m_modelOutputs[index]->GetDataLength();
274 if (memory.length == 0 || memory.length < dataLength) {
275 LOGE("SetOutputFromMemory failed, the memory is too small to store the output tensor data.");
276 return OH_NN_INVALID_PARAMETER;
277 }
278
279 if (m_outputTensors.find(index) != m_outputTensors.end()) {
280 if (m_outputTensors[index].isInnerMem) {
281 // If it is inner buffer, releate it
282 void* curBuffer = m_outputTensors[index].tensor->GetBuffer();
283 std::shared_ptr<Device> outputDevice = m_executionPlan->GetOutputDevice();
284 outputDevice->ReleaseBuffer(curBuffer);
285 }
286 } else {
287 // If output tensor does not exist, create a new null output tensor.
288 ExeTensor exeTensor;
289 m_outputTensors[index] = exeTensor;
290 m_outputTensors[index].tensor = m_modelOutputs[index];
291 }
292
293 // Set the output tensor with memory
294 m_outputTensors[index].tensor->SetBuffer(const_cast<const void*>(memory.data), memory.length);
295 m_outputTensors[index].userBuffer = nullptr;
296 m_outputTensors[index].userBufferLength = 0;
297 m_outputTensors[index].isInnerMem = false;
298 m_isRun = false;
299 return OH_NN_SUCCESS;
300 }
301
302
GetOutputShape(uint32_t index,int32_t ** dimensions,uint32_t & dimensionCount)303 OH_NN_ReturnCode Executor::GetOutputShape(uint32_t index, int32_t** dimensions, uint32_t& dimensionCount)
304 {
305 if (!m_isRun) {
306 LOGE("GetOutputShape failed, cannot get output dimensions before Run.");
307 return OH_NN_OPERATION_FORBIDDEN;
308 }
309
310 if (index >= m_modelOutputs.size()) {
311 LOGE("GetOutputShape failed, output index is out of range.");
312 return OH_NN_INVALID_PARAMETER;
313 }
314
315 if (m_outputTensors.find(index) == m_outputTensors.end()) {
316 LOGE("GetOutputShape failed, output has not been set. Output index: %u.", index);
317 return OH_NN_INVALID_PARAMETER;
318 }
319
320 m_outputDimensions[index] = m_outputTensors[index].tensor->GetDimensions();
321 *dimensions = m_outputDimensions[index].data();
322 dimensionCount = m_outputDimensions[index].size();
323
324 return OH_NN_SUCCESS;
325 }
326
327
CreateInputMemory(uint32_t index,size_t length,OH_NN_Memory ** memory)328 OH_NN_ReturnCode Executor::CreateInputMemory(uint32_t index, size_t length, OH_NN_Memory** memory)
329 {
330 if (index >= m_modelInputs.size()) {
331 LOGE("CreateInputMemory failed, input index is out of range.");
332 return OH_NN_INVALID_PARAMETER;
333 }
334
335 // Allocate device buffer
336 std::shared_ptr<Device> inputDevice = m_executionPlan->GetInputDevice();
337 void* deviceInputBuffer = inputDevice->AllocateBuffer(length);
338 if (deviceInputBuffer == nullptr) {
339 LOGE("CreateInputMemory failed, allocating intput device buffer failed.");
340 return OH_NN_MEMORY_ERROR;
341 }
342
343 *memory = new(std::nothrow) OH_NN_Memory{deviceInputBuffer, length};
344 if (*memory == nullptr) {
345 LOGE("CreateInputMemory failed, constructing OH_NN_Memory failed.");
346 inputDevice->ReleaseBuffer(deviceInputBuffer);
347 return OH_NN_MEMORY_ERROR;
348 }
349
350 // Save the buffer address for check when destroying it.
351 m_inputCreatedMem[index].emplace_back(deviceInputBuffer);
352
353 return OH_NN_SUCCESS;
354 }
355
356
DestroyInputMemory(uint32_t index,OH_NN_Memory ** memory)357 OH_NN_ReturnCode Executor::DestroyInputMemory(uint32_t index, OH_NN_Memory** memory)
358 {
359 if (index >= m_modelInputs.size()) {
360 LOGE("DestroyInputMemory failed, input index is out of range.");
361 return OH_NN_INVALID_PARAMETER;
362 }
363
364 if (m_inputCreatedMem.find(index) == m_inputCreatedMem.end()) {
365 LOGE("DestroyInputMemory failed, the memory has not been created with the index.");
366 return OH_NN_INVALID_PARAMETER;
367 }
368
369 std::vector<void*>& inputCreatedMem = m_inputCreatedMem[index];
370 auto pos = std::find(inputCreatedMem.begin(), inputCreatedMem.end(), (*memory)->data);
371 if (pos == inputCreatedMem.end()) {
372 LOGE("DestroyInputMemory failed, the index does not match the memory.");
373 return OH_NN_INVALID_PARAMETER;
374 }
375
376 std::shared_ptr<Device> inputDevice = m_executionPlan->GetInputDevice();
377 auto ret = inputDevice->ReleaseBuffer((*memory)->data);
378 if (ret != OH_NN_SUCCESS) {
379 LOGE("Release input buffer failed.");
380 return ret;
381 }
382
383 inputCreatedMem.erase(pos);
384 delete *memory;
385 *memory = nullptr;
386
387 return OH_NN_SUCCESS;
388 }
389
390
CreateOutputMemory(uint32_t index,size_t length,OH_NN_Memory ** memory)391 OH_NN_ReturnCode Executor::CreateOutputMemory(uint32_t index, size_t length, OH_NN_Memory** memory)
392 {
393 if (index >= m_modelOutputs.size()) {
394 LOGE("CreateOutputMemory failed, output index is out of range.");
395 return OH_NN_INVALID_PARAMETER;
396 }
397
398 // Allocate device buffer
399 std::shared_ptr<Device> outputDevice = m_executionPlan->GetOutputDevice();
400 void* deviceOutputBuffer = outputDevice->AllocateBuffer(length);
401 if (deviceOutputBuffer == nullptr) {
402 LOGE("CreateOutputMemory failed, allocating output device buffer failed.");
403 return OH_NN_MEMORY_ERROR;
404 }
405
406 *memory = new(std::nothrow) OH_NN_Memory{deviceOutputBuffer, length};
407 if (*memory == nullptr) {
408 LOGE("CreateOutputMemory failed, constructing OH_NN_Memory failed.");
409 outputDevice->ReleaseBuffer(deviceOutputBuffer);
410 return OH_NN_MEMORY_ERROR;
411 }
412
413 // Save the buffer address for check when destroying it.
414 m_outputCreatedMem[index].emplace_back(deviceOutputBuffer);
415
416 return OH_NN_SUCCESS;
417 }
418
419
DestroyOutputMemory(uint32_t index,OH_NN_Memory ** memory)420 OH_NN_ReturnCode Executor::DestroyOutputMemory(uint32_t index, OH_NN_Memory** memory)
421 {
422 if (index >= m_modelOutputs.size()) {
423 LOGE("DestroyOutputMemory failed, output index is out of range.");
424 return OH_NN_INVALID_PARAMETER;
425 }
426
427 if (m_outputCreatedMem.find(index) == m_outputCreatedMem.end()) {
428 LOGE("DestroyOutputMemory failed, the memory has not been created with the index.");
429 return OH_NN_INVALID_PARAMETER;
430 }
431
432 std::vector<void*>& outputCreatedMem = m_outputCreatedMem[index];
433 auto pos = std::find(outputCreatedMem.begin(), outputCreatedMem.end(), (*memory)->data);
434 if (pos == outputCreatedMem.end()) {
435 LOGE("DestroyOutputMemory failed, the index does not match the memory.");
436 return OH_NN_INVALID_PARAMETER;
437 }
438
439 std::shared_ptr<Device> outputDevice = m_executionPlan->GetOutputDevice();
440 auto ret = outputDevice->ReleaseBuffer((*memory)->data);
441 if (ret != OH_NN_SUCCESS) {
442 LOGE("Release output buffer failed.");
443 return ret;
444 }
445
446 outputCreatedMem.erase(pos);
447 delete *memory;
448 *memory = nullptr;
449
450 return OH_NN_SUCCESS;
451 }
452
453
Run()454 OH_NN_ReturnCode Executor::Run()
455 {
456 NNRT_TRACE_NAME("Execution");
457 if (m_modelInputs.size() != m_inputTensors.size()) {
458 LOGE("Run failed, some input tensors have not been set.");
459 return OH_NN_INVALID_PARAMETER;
460 }
461 if (m_modelOutputs.size() != m_outputTensors.size()) {
462 LOGE("Run failed, some output tensors have not been set.");
463 return OH_NN_INVALID_PARAMETER;
464 }
465
466 // Build the NNTensor pointer vector: inputTensors and outputTensors
467 std::vector<std::shared_ptr<NNTensor>> inputTensors;
468 std::vector<std::shared_ptr<NNTensor>> outputTensors;
469 size_t inputSize = m_inputTensors.size();
470 size_t outputSize = m_outputTensors.size();
471 for (size_t i = 0; i < inputSize; ++i) {
472 inputTensors.emplace_back(m_inputTensors[i].tensor);
473 }
474 for (size_t i = 0; i < outputSize; ++i) {
475 outputTensors.emplace_back(m_outputTensors[i].tensor);
476 }
477
478 // Predict
479 auto ret = m_executionPlan->Run(inputTensors, outputTensors);
480 if (ret != OH_NN_SUCCESS) {
481 LOGE("Run failed, error happened when executing the inference.");
482 return ret;
483 }
484
485 errno_t status{EOK};
486 // Copy inner device buffer to user buffer if using SetOutput()
487 for (size_t i = 0; i < outputSize; ++i) {
488 if (m_outputTensors[i].isInnerMem) {
489 auto size = outputTensors[i]->GetDataLength();
490 if (size > m_outputTensors[i].userBufferLength) {
491 LOGE("Output buffer size is not enough. Your size=%zu, but actual output size=%zu.",
492 m_outputTensors[i].userBufferLength, size);
493 return OH_NN_INVALID_PARAMETER;
494 }
495
496 void* deviceBuffer = outputTensors[i]->GetBuffer();
497 if (deviceBuffer == nullptr) {
498 LOGE("Output buffer is nullptr.");
499 return OH_NN_FAILED;
500 }
501
502 status = memcpy_s(m_outputTensors[i].userBuffer, m_outputTensors[i].userBufferLength, deviceBuffer, size);
503 if (status != EOK) {
504 LOGE("Run failed, memory copy from device buffer to user buffer failed. Error code: %d.", status);
505 return OH_NN_MEMORY_ERROR;
506 }
507 }
508 }
509
510 m_isRun = true;
511 return OH_NN_SUCCESS;
512 }
513
~Executor()514 Executor::~Executor()
515 {
516 std::shared_ptr<Device> inputDevice;
517 for (auto& it : m_inputTensors) {
518 inputDevice = m_executionPlan->GetInputDevice();
519 if ((it.second).isInnerMem) {
520 inputDevice->ReleaseBuffer((it.second).tensor->GetBuffer());
521 }
522 (it.second).tensor->SetBuffer(nullptr, 0);
523 (it.second).tensor.reset();
524 (it.second).userBuffer = nullptr;
525 }
526 m_inputTensors.clear();
527
528 std::shared_ptr<Device> outputDevice;
529 for (auto& it : m_outputTensors) {
530 outputDevice = m_executionPlan->GetOutputDevice();
531 if ((it.second).isInnerMem) {
532 outputDevice->ReleaseBuffer((it.second).tensor->GetBuffer());
533 }
534 (it.second).tensor->SetBuffer(nullptr, 0);
535 (it.second).tensor.reset();
536 (it.second).userBuffer = nullptr;
537 }
538 m_outputTensors.clear();
539
540 for (auto& it : m_inputCreatedMem) {
541 it.second.clear();
542 }
543 m_inputCreatedMem.clear();
544
545 for (auto& it : m_outputCreatedMem) {
546 it.second.clear();
547 }
548 m_outputCreatedMem.clear();
549
550 m_outputDimensions.clear();
551 m_modelInputs.clear();
552 m_modelOutputs.clear();
553 }
554 } // namespace NeuralNetworkRuntime
555 } // namespace OHOS
556