1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 // to demonstrate the performance difference between ION and HLOS memory
17 // for sharing with ADSP.
18 #define USE_ION_MEMORY
19
20 #include "hexagon_controller.h"
21
22 #include <stdlib.h>
23 #include <stdio.h>
24
25 #include "adspmsgd.h"
26 #include "dspCV.h"
27 #include "node_data_float.h"
28 #include "rpcmem.h" // helper API's for shared buffer allocation
29 #include "soc_interface.h"
30 #include "tfm_log.h"
31
32 // if false, use int data as input. This is only for acceleration purpose.
33 // Also you may need to change android.min.
34 static const bool USE_FLOAT_DATA = true;
35
36 // if true, show id for each node
37 static const bool DBG_SHOW_ID = false;
38
39 static const uint32_t OUTPUT_PARAM_MAX_LINE_SIZE = 1000;
40
41 static const uint32_t PRINT_BUFSIZE = 2 * 1024 * 1024;
42
43 // extern pre-generated inception dummy data
44 extern uint8_t inception_dummy_int_data_224x224[];
45 extern uint8_t inception_dummy_int_data_299x299[];
46 extern float inception_dummy_float_data_299x299[];
47
48 #define HEXAGON_CONTROLLER_VERSION 101
49
50 // allocate print bufsize in advance @MB
51 #define PRINT_BUFSIZE (2 * 1024 * 1024)
52
53 static unsigned char s_print_buf[PRINT_BUFSIZE];
54
55 #define MAX_INPUTS 10
56 #define MAX_OUTPUTS 10
57
58 static struct NodeDataFloat s_input_node_data_buffer[MAX_INPUTS];
59 static uint8_t* s_output_node_data_buffer[MAX_OUTPUTS];
60 static int s_output_node_data_buffer_max_byte_size[MAX_OUTPUTS];
61 static int s_output_node_data_array_byte_size[MAX_OUTPUTS];
62 static uint32_t s_target_graph_id;
63
64 static bool s_dbg_use_inception_dummy_data = false;
65 static int s_dbg_inception_version = 3;
66
GetInputNodeCount()67 static int GetInputNodeCount() {
68 for (int i = 0; i < MAX_INPUTS; ++i) {
69 if (s_input_node_data_buffer[i].max_buf_byte_size == 0) {
70 return i;
71 }
72 }
73 return 0;
74 }
75
GetOutputNodeCount()76 static int GetOutputNodeCount() {
77 for (int i = 0; i < MAX_OUTPUTS; ++i) {
78 if (s_output_node_data_buffer_max_byte_size[i] == 0) {
79 return i;
80 }
81 }
82 return 0;
83 }
84
SetInputTensorDef(int port,hexagon_nn_tensordef * tensordef)85 static bool SetInputTensorDef(int port, hexagon_nn_tensordef* tensordef) {
86 if (port >= GetInputNodeCount()) {
87 TFMLOGE("Error exceeds input count.");
88 return false;
89 }
90 struct NodeDataFloat* input_node_data_buffer =
91 &s_input_node_data_buffer[port];
92 tensordef->batches = input_node_data_buffer->x;
93 tensordef->height = input_node_data_buffer->y;
94 tensordef->width = input_node_data_buffer->z;
95 tensordef->depth = input_node_data_buffer->d;
96 tensordef->data = input_node_data_buffer->byte_array_data;
97 tensordef->dataLen = input_node_data_buffer->array_byte_size;
98
99 return true;
100 }
101
hexagon_controller_SetAllInputTensorDef(int node_count,hexagon_nn_tensordef * tensordef)102 bool hexagon_controller_SetAllInputTensorDef(int node_count,
103 hexagon_nn_tensordef* tensordef) {
104 bool success = true;
105 if (node_count != GetInputNodeCount()) {
106 TFMLOGE("Error invalid input node count.");
107 return false;
108 }
109 for (int i = 0; i < node_count; ++i) {
110 SetInputTensorDef(i, &tensordef[i]);
111 }
112 return success;
113 }
114
SetOutputTensorDef(int port,hexagon_nn_tensordef * tensordef)115 static bool SetOutputTensorDef(int port, hexagon_nn_tensordef* tensordef) {
116 if (port >= GetOutputNodeCount()) {
117 TFMLOGE("Error exceeds output count.");
118 return false;
119 }
120 tensordef->data = s_output_node_data_buffer[port];
121 tensordef->dataLen = s_output_node_data_buffer_max_byte_size[port];
122 return true;
123 }
124
hexagon_controller_SetAllOutputTensorDef(int node_count,hexagon_nn_tensordef * tensordef)125 bool hexagon_controller_SetAllOutputTensorDef(int node_count,
126 hexagon_nn_tensordef* tensordef) {
127 bool success = true;
128 if (node_count != GetOutputNodeCount()) {
129 TFMLOGE("Error invalid output node count. %d != %d", node_count,
130 GetOutputNodeCount());
131 return false;
132 }
133 for (int i = 0; i < node_count; ++i) {
134 SetOutputTensorDef(i, &tensordef[i]);
135 }
136 return success;
137 }
138
hexagon_controller_InitInputNodeDataToInceptionDummyData(int version)139 void hexagon_controller_InitInputNodeDataToInceptionDummyData(int version) {
140 if (version == 1) {
141 if (USE_FLOAT_DATA) {
142 TFMLOGE("ERROR!!!! Do not use float data for v1");
143 return;
144 }
145 hexagon_controller_CopyByteNodeData(
146 0, INCEPTION_PARAM_BATCHES, INCEPTION_PARAM_HEIGHT_V1,
147 INCEPTION_PARAM_WIDTH_V1, INCEPTION_PARAM_DEPTH, 1,
148 inception_dummy_int_data_224x224);
149 } else if (version == 3) {
150 if (USE_FLOAT_DATA) {
151 hexagon_controller_CopyByteNodeData(
152 0, INCEPTION_PARAM_BATCHES, INCEPTION_PARAM_HEIGHT_V3,
153 INCEPTION_PARAM_WIDTH_V3, INCEPTION_PARAM_DEPTH, sizeof(float),
154 (uint8_t*)inception_dummy_float_data_299x299);
155 } else {
156 hexagon_controller_CopyByteNodeData(
157 0, INCEPTION_PARAM_BATCHES, INCEPTION_PARAM_HEIGHT_V3,
158 INCEPTION_PARAM_WIDTH_V3, INCEPTION_PARAM_DEPTH, 1,
159 inception_dummy_int_data_299x299);
160 }
161 }
162 }
163
hexagon_controller_ExecuteGraphWithBuffer(uint32_t nn_id,bool show_ranking)164 bool hexagon_controller_ExecuteGraphWithBuffer(uint32_t nn_id,
165 bool show_ranking) {
166 const int input_node_count = GetInputNodeCount();
167 hexagon_nn_tensordef inputs[input_node_count];
168 const int output_node_count = GetOutputNodeCount();
169 if (output_node_count <= 0) {
170 TFMLOGI("Error output node count is 0.");
171 return false;
172 }
173 hexagon_nn_tensordef outputs[output_node_count];
174 hexagon_controller_SetAllInputTensorDef(input_node_count, inputs);
175 hexagon_controller_SetAllOutputTensorDef(output_node_count, outputs);
176 const bool success = hexagon_controller_ExecuteGraphWithMultipleInOut(
177 nn_id, input_node_count, inputs, output_node_count, outputs);
178 for (int i = 0; i < output_node_count; ++i) {
179 s_output_node_data_array_byte_size[i] = outputs[i].data_valid_len;
180 }
181
182 const hexagon_nn_tensordef* output0 = &outputs[0];
183
184 const uint32_t out_batches = output0->batches;
185 const uint32_t out_height = output0->height;
186 const uint32_t out_width = output0->width;
187 const uint32_t out_depth = output0->depth;
188 const uint32_t out_data_size = output0->data_valid_len;
189 const uint32_t out_buf_byte_size = output0->dataLen;
190
191 if (!success) {
192 TFMLOGE("Execution failed");
193 DumpNNId(nn_id);
194 return false;
195 } else if (!show_ranking) {
196 return true;
197 }
198
199 static const int OUT_RANKING_SIZE = 5;
200 int out_ranking[OUT_RANKING_SIZE];
201 hexagon_controller_PrintMaxNIdx(
202 (float*)s_output_node_data_buffer[0],
203 out_batches * out_height * out_width * out_depth, OUT_RANKING_SIZE,
204 out_ranking);
205 TFMLOGD("%d x %d x %d x %d, byte size = %d, buf size = %d\n", out_batches,
206 out_height, out_width, out_depth, out_data_size, out_buf_byte_size);
207 if (s_dbg_use_inception_dummy_data) {
208 // Check the result of inception with a dummy data. This step shouldn't
209 // be passed when show_ranking != true to avoid adding unnecessary
210 // additional computation cost.
211 if (out_ranking[0] == 169 && out_ranking[1] == 7) {
212 TFMLOGD("Result is correct! %d, %d", out_ranking[0], out_ranking[1]);
213 return true;
214 } else {
215 TFMLOGD("Result is wrong! %d, %d", out_ranking[0], out_ranking[1]);
216 return false;
217 }
218 }
219 return true;
220 }
221
hexagon_controller_GetTargetGraphId()222 uint32_t hexagon_controller_GetTargetGraphId() { return s_target_graph_id; }
223
hexagon_controller_SetTargetGraphId(uint32_t graph_id)224 void hexagon_controller_SetTargetGraphId(uint32_t graph_id) {
225 s_target_graph_id = graph_id;
226 }
227
hexagon_controller_PrintGraph(uint32_t id)228 void hexagon_controller_PrintGraph(uint32_t id) {
229 int retval = hexagon_nn_snpprint(id, s_print_buf, PRINT_BUFSIZE);
230 TFMLOGD("PrintGraph %s\n", s_print_buf);
231 if (retval) {
232 TFMLOGE("Error on print graph\n");
233 }
234 }
235
hexagon_controller_GetWrapperVersion()236 int hexagon_controller_GetWrapperVersion() {
237 return HEXAGON_CONTROLLER_VERSION;
238 }
239
hexagon_controller_GetHexagonBinaryVersion()240 int hexagon_controller_GetHexagonBinaryVersion() {
241 int retval = 0;
242 hexagon_nn_version(&retval);
243 return retval;
244 }
245
hexagon_controller_AllocateInputNodeDataBuffers(int port,int input_buf_byte_size)246 bool hexagon_controller_AllocateInputNodeDataBuffers(int port,
247 int input_buf_byte_size) {
248 TFMLOGD("Allocate memory for input node data. port = %d, size = %d", port,
249 input_buf_byte_size);
250 if (s_input_node_data_buffer[port].max_buf_byte_size != 0) {
251 TFMLOGE("ERROR! input buffer is already allocated!!");
252 return false;
253 } else {
254 s_input_node_data_buffer[port].max_buf_byte_size = input_buf_byte_size;
255 posix_memalign((void**)&s_input_node_data_buffer[port].byte_array_data, 128,
256 input_buf_byte_size);
257 TFMLOGD("allocate input node data buffers done");
258 }
259 return true;
260 }
261
hexagon_controller_AllocateOutputNodeDataBuffers(int port,int output_buf_byte_size)262 bool hexagon_controller_AllocateOutputNodeDataBuffers(
263 int port, int output_buf_byte_size) {
264 TFMLOGD("Allocate memory for output node data. port = %d, size = %d", port,
265 output_buf_byte_size);
266 if (s_output_node_data_buffer_max_byte_size[port] != 0) {
267 TFMLOGE("ERROR! input buffer is already allocated!!");
268 return false;
269 } else {
270 // s_output_node_data_buffer = malloc(output_size * sizeof(float));
271 posix_memalign((void**)&s_output_node_data_buffer[port], 128,
272 output_buf_byte_size);
273 s_output_node_data_buffer_max_byte_size[port] = output_buf_byte_size;
274 s_output_node_data_array_byte_size[port] = 0;
275 TFMLOGD("allocate output node data buffers");
276 }
277 return true;
278 }
279
hexagon_controller_AllocateMultipleNodeDataBuffers(int input_count,int * input_sizes,int output_count,int * output_sizes)280 bool hexagon_controller_AllocateMultipleNodeDataBuffers(int input_count,
281 int* input_sizes,
282 int output_count,
283 int* output_sizes) {
284 bool success = true;
285 for (int i = 0; i < input_count; ++i) {
286 success &=
287 hexagon_controller_AllocateInputNodeDataBuffers(i, input_sizes[i]);
288 }
289 for (int i = 0; i < output_count; ++i) {
290 success &=
291 hexagon_controller_AllocateOutputNodeDataBuffers(i, output_sizes[i]);
292 }
293
294 if (s_dbg_use_inception_dummy_data) {
295 hexagon_controller_InitInputNodeDataToInceptionDummyData(
296 s_dbg_inception_version);
297 }
298 return success;
299 }
300
hexagon_controller_AllocateNodeDataBuffers(int input_size,int output_size)301 bool hexagon_controller_AllocateNodeDataBuffers(int input_size,
302 int output_size) {
303 return hexagon_controller_AllocateMultipleNodeDataBuffers(1, &input_size, 1,
304 &output_size);
305 }
306
hexagon_controller_ReleaseInputNodeDataBuffersWithPort(int port)307 bool hexagon_controller_ReleaseInputNodeDataBuffersWithPort(int port) {
308 struct NodeDataFloat* input_node_data_buffer =
309 &s_input_node_data_buffer[port];
310 if (input_node_data_buffer->max_buf_byte_size == 0) {
311 TFMLOGE("ERROR! input buffer has not been allocated yet!!");
312 return false;
313 } else {
314 input_node_data_buffer->max_buf_byte_size = 0;
315 input_node_data_buffer->array_byte_size = 0;
316 free(input_node_data_buffer->byte_array_data);
317 }
318 return true;
319 }
320
hexagon_controller_ReleaseOutputNodeDataBuffersWithPort(int port)321 bool hexagon_controller_ReleaseOutputNodeDataBuffersWithPort(int port) {
322 if (s_output_node_data_buffer_max_byte_size[port] == 0) {
323 TFMLOGE("ERROR! output buffer has not been allocated yet!!");
324 return false;
325 } else {
326 s_output_node_data_buffer_max_byte_size[port] = 0;
327 s_output_node_data_array_byte_size[port] = 0;
328 free(s_output_node_data_buffer[port]);
329 }
330 return true;
331 }
332
hexagon_controller_ReleaseNodeDataBuffers()333 bool hexagon_controller_ReleaseNodeDataBuffers() {
334 bool success = true;
335 for (int i = 0; i < GetInputNodeCount(); ++i) {
336 success &= hexagon_controller_ReleaseInputNodeDataBuffersWithPort(i);
337 }
338 for (int i = 0; i < GetOutputNodeCount(); ++i) {
339 success &= hexagon_controller_ReleaseOutputNodeDataBuffersWithPort(i);
340 }
341 return success;
342 }
343
hexagon_controller_CopyByteNodeData(int port,int x,int y,int z,int d,int type_byte_size,uint8_t * array_data)344 bool hexagon_controller_CopyByteNodeData(int port, int x, int y, int z, int d,
345 int type_byte_size,
346 uint8_t* array_data) {
347 int array_byte_size = x * y * z * d * type_byte_size;
348 TFMLOGD("--- %d, %d, %d, %d, %d, %d", x, y, z, d, type_byte_size,
349 array_byte_size);
350 struct NodeDataFloat* input_node_data_buffer = &s_input_node_data_buffer[0];
351
352 if (input_node_data_buffer->max_buf_byte_size < array_byte_size) {
353 TFMLOGE("ERROR! input buffer size is too small! %d < %d",
354 input_node_data_buffer->max_buf_byte_size, array_byte_size);
355 return false;
356 }
357 memcpy(input_node_data_buffer->byte_array_data, array_data, array_byte_size);
358 input_node_data_buffer->array_byte_size = array_byte_size;
359 input_node_data_buffer->x = x;
360 input_node_data_buffer->y = y;
361 input_node_data_buffer->z = z;
362 input_node_data_buffer->d = d;
363 return true;
364 }
365
hexagon_controller_InitHexagonWithMaxAttributes(int enable_dcvs,int bus_usage,int version)366 int hexagon_controller_InitHexagonWithMaxAttributes(int enable_dcvs,
367 int bus_usage,
368 int version) {
369 TFMLOGI("Init hexagon with max attributes (Controller version = %d)",
370 HEXAGON_CONTROLLER_VERSION);
371 const int MCPS = 1000;
372 const int MBPS = 12000;
373
374 adspmsgd_start(0, RPCMEM_HEAP_DEFAULT, 4096);
375
376 dspCV_Attribute attrib[] = {
377 // The below values will result in the maximum aDSP performance,
378 // at Turbo voltage.
379 // Slightly more MCPS than are available on current targets
380 {DSP_TOTAL_MCPS, MCPS},
381 // drive the clock to MAX on known targets
382 {DSP_MCPS_PER_THREAD, MCPS / 2},
383 // 12 GB/sec is slightly higher than the max realistic
384 // max BW on existing targets.
385 {PEAK_BUS_BANDWIDTH_MBPS, MBPS},
386 // This app is non-real time, and constantly reading/writing memory
387 {BUS_USAGE_PERCENT, bus_usage},
388 };
389 int retval = 0;
390 if (!enable_dcvs) {
391 retval = hexagon_nn_disable_dcvs();
392 if (retval) {
393 TFMLOGE("Failed to disable DSP DCVS: %x\n", retval);
394 }
395 }
396
397 retval =
398 dspCV_initQ6_with_attributes(attrib, sizeof(attrib) / sizeof(attrib[0]));
399 TFMLOGD("Return value from dspCV_initQ6() : %d\n", retval);
400
401 s_target_graph_id = 0;
402 s_dbg_inception_version = version;
403
404 return retval;
405 }
406
hexagon_controller_DeInitHexagon()407 int hexagon_controller_DeInitHexagon() {
408 adspmsgd_stop();
409 TFMLOGI("Finalize hexagon");
410 const int retval = dspCV_deinitQ6();
411 TFMLOGD("return value from dspCV_deinitQ6(): %d \n", retval);
412
413 hexagon_controller_ReleaseNodeDataBuffers();
414
415 return retval;
416 }
417
hexagon_controller_GrowMemorySize()418 void hexagon_controller_GrowMemorySize() { hexagon_nn_config(); }
419
hexagon_controller_GetInputNodeDataBuffer(int port)420 struct NodeDataFloat* hexagon_controller_GetInputNodeDataBuffer(int port) {
421 if (port >= GetInputNodeCount()) {
422 TFMLOGE("port should be less than 1");
423 }
424 return &s_input_node_data_buffer[port];
425 }
426
hexagon_controller_GetOutputNodeDataBuffer(int port,int * out_array_byte_size)427 uint8_t* hexagon_controller_GetOutputNodeDataBuffer(int port,
428 int* out_array_byte_size) {
429 if (port >= GetOutputNodeCount()) {
430 TFMLOGE("port should be less than 1");
431 }
432 *out_array_byte_size = s_output_node_data_array_byte_size[port];
433 return s_output_node_data_buffer[port];
434 }
435
436 // Append const node to the graph
hexagon_controller_AppendConstNode(const char * const name,int graph_id,int node_id,int batch,int height,int width,int depth,const uint8_t * const data,int data_length)437 int hexagon_controller_AppendConstNode(const char* const name, int graph_id,
438 int node_id, int batch, int height,
439 int width, int depth,
440 const uint8_t* const data,
441 int data_length) {
442 if (DBG_SHOW_ID) {
443 TFMLOGV("---(CONST) %s, %d, %d, %d, %d, %d, %d", name, node_id, batch,
444 height, width, depth, data_length);
445 } else {
446 TFMLOGV("---(CONST) %s, %d, %d, %d, %d, %d", name, batch, height, width,
447 depth, data_length);
448 }
449 const int retval = hexagon_nn_append_const_node(
450 graph_id, node_id, batch, height, width, depth, data, data_length);
451 if (retval != 0) {
452 TFMLOGE("Failed to append const node %d", node_id);
453 return retval;
454 }
455 return retval;
456 }
457
458 // Append node to the graph
hexagon_controller_AppendNode(const char * const name,int graph_id,int node_id,int ops_id,int padding_id,const hexagon_nn_input * const inputs,int inputs_count,const hexagon_nn_output * const outputs,int outputs_count)459 int hexagon_controller_AppendNode(const char* const name, int graph_id,
460 int node_id, int ops_id, int padding_id,
461 const hexagon_nn_input* const inputs,
462 int inputs_count,
463 const hexagon_nn_output* const outputs,
464 int outputs_count) {
465 char input_param_buf[OUTPUT_PARAM_MAX_LINE_SIZE];
466 memset(input_param_buf, 0, OUTPUT_PARAM_MAX_LINE_SIZE);
467 int pos = 0;
468 pos += snprintf(&input_param_buf[pos], 500, "in: ");
469 for (int i = 0; i < inputs_count; ++i) {
470 if (DBG_SHOW_ID) {
471 pos += snprintf(&input_param_buf[pos], 500, "(%d, %d), ",
472 inputs[i].src_id, inputs[i].output_idx);
473 } else {
474 pos +=
475 snprintf(&input_param_buf[pos], 500, "(%d), ", inputs[i].output_idx);
476 }
477 }
478
479 char output_param_buf[OUTPUT_PARAM_MAX_LINE_SIZE];
480 memset(output_param_buf, 0, OUTPUT_PARAM_MAX_LINE_SIZE);
481 pos = 0;
482 pos += snprintf(&output_param_buf[pos], 500, "out: ");
483 for (int i = 0; i < outputs_count; ++i) {
484 pos += snprintf(&output_param_buf[pos], 500, "(%d), ", outputs[i].max_size);
485 }
486
487 if (DBG_SHOW_ID) {
488 TFMLOGV("---(OP) %s, %d, %d, %d, %d, %d, %s, %s", name, node_id, ops_id,
489 padding_id, inputs_count, outputs_count, input_param_buf,
490 output_param_buf);
491 } else {
492 TFMLOGV("---(OP) %s, %d, %d, %d, %d, %s, %s", name, ops_id, padding_id,
493 inputs_count, outputs_count, input_param_buf, output_param_buf);
494 }
495 const int retval =
496 hexagon_nn_append_node(graph_id, node_id, ops_id, padding_id, inputs,
497 inputs_count, outputs, outputs_count);
498 if (retval != 0) {
499 TFMLOGE("Failed to append const node %d", node_id);
500 return retval;
501 }
502 return retval;
503 }
504
hexagon_controller_EnableDbgUseInceptionDummyData(bool enable)505 void hexagon_controller_EnableDbgUseInceptionDummyData(bool enable) {
506 s_dbg_use_inception_dummy_data = enable;
507 }
508
hexagon_controller_IsDbgUseInceptionDummyDataEnabled()509 bool hexagon_controller_IsDbgUseInceptionDummyDataEnabled() {
510 return s_dbg_use_inception_dummy_data;
511 }
512