• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 // to demonstrate the performance difference between ION and HLOS memory
17 // for sharing with ADSP.
18 #define USE_ION_MEMORY
19 
20 #include "hexagon_controller.h"
21 
22 #include <stdlib.h>
23 #include <stdio.h>
24 
25 #include "adspmsgd.h"
26 #include "dspCV.h"
27 #include "node_data_float.h"
28 #include "rpcmem.h"  // helper API's for shared buffer allocation
29 #include "soc_interface.h"
30 #include "tfm_log.h"
31 
32 // if false, use int data as input.  This is only for acceleration purpose.
33 // Also you may need to change android.min.
34 static const bool USE_FLOAT_DATA = true;
35 
36 // if true, show id for each node
37 static const bool DBG_SHOW_ID = false;
38 
39 static const uint32_t OUTPUT_PARAM_MAX_LINE_SIZE = 1000;
40 
41 static const uint32_t PRINT_BUFSIZE = 2 * 1024 * 1024;
42 
43 // extern pre-generated inception dummy data
44 extern uint8_t inception_dummy_int_data_224x224[];
45 extern uint8_t inception_dummy_int_data_299x299[];
46 extern float inception_dummy_float_data_299x299[];
47 
48 #define HEXAGON_CONTROLLER_VERSION 101
49 
50 // allocate print bufsize in advance @MB
51 #define PRINT_BUFSIZE (2 * 1024 * 1024)
52 
53 static unsigned char s_print_buf[PRINT_BUFSIZE];
54 
55 #define MAX_INPUTS 10
56 #define MAX_OUTPUTS 10
57 
58 static struct NodeDataFloat s_input_node_data_buffer[MAX_INPUTS];
59 static uint8_t* s_output_node_data_buffer[MAX_OUTPUTS];
60 static int s_output_node_data_buffer_max_byte_size[MAX_OUTPUTS];
61 static int s_output_node_data_array_byte_size[MAX_OUTPUTS];
62 static uint32_t s_target_graph_id;
63 
64 static bool s_dbg_use_inception_dummy_data = false;
65 static int s_dbg_inception_version = 3;
66 
GetInputNodeCount()67 static int GetInputNodeCount() {
68   for (int i = 0; i < MAX_INPUTS; ++i) {
69     if (s_input_node_data_buffer[i].max_buf_byte_size == 0) {
70       return i;
71     }
72   }
73   return 0;
74 }
75 
GetOutputNodeCount()76 static int GetOutputNodeCount() {
77   for (int i = 0; i < MAX_OUTPUTS; ++i) {
78     if (s_output_node_data_buffer_max_byte_size[i] == 0) {
79       return i;
80     }
81   }
82   return 0;
83 }
84 
SetInputTensorDef(int port,hexagon_nn_tensordef * tensordef)85 static bool SetInputTensorDef(int port, hexagon_nn_tensordef* tensordef) {
86   if (port >= GetInputNodeCount()) {
87     TFMLOGE("Error exceeds input count.");
88     return false;
89   }
90   struct NodeDataFloat* input_node_data_buffer =
91       &s_input_node_data_buffer[port];
92   tensordef->batches = input_node_data_buffer->x;
93   tensordef->height = input_node_data_buffer->y;
94   tensordef->width = input_node_data_buffer->z;
95   tensordef->depth = input_node_data_buffer->d;
96   tensordef->data = input_node_data_buffer->byte_array_data;
97   tensordef->dataLen = input_node_data_buffer->array_byte_size;
98 
99   return true;
100 }
101 
hexagon_controller_SetAllInputTensorDef(int node_count,hexagon_nn_tensordef * tensordef)102 bool hexagon_controller_SetAllInputTensorDef(int node_count,
103                                              hexagon_nn_tensordef* tensordef) {
104   bool success = true;
105   if (node_count != GetInputNodeCount()) {
106     TFMLOGE("Error invalid input node count.");
107     return false;
108   }
109   for (int i = 0; i < node_count; ++i) {
110     SetInputTensorDef(i, &tensordef[i]);
111   }
112   return success;
113 }
114 
SetOutputTensorDef(int port,hexagon_nn_tensordef * tensordef)115 static bool SetOutputTensorDef(int port, hexagon_nn_tensordef* tensordef) {
116   if (port >= GetOutputNodeCount()) {
117     TFMLOGE("Error exceeds output count.");
118     return false;
119   }
120   tensordef->data = s_output_node_data_buffer[port];
121   tensordef->dataLen = s_output_node_data_buffer_max_byte_size[port];
122   return true;
123 }
124 
hexagon_controller_SetAllOutputTensorDef(int node_count,hexagon_nn_tensordef * tensordef)125 bool hexagon_controller_SetAllOutputTensorDef(int node_count,
126                                               hexagon_nn_tensordef* tensordef) {
127   bool success = true;
128   if (node_count != GetOutputNodeCount()) {
129     TFMLOGE("Error invalid output node count. %d != %d", node_count,
130             GetOutputNodeCount());
131     return false;
132   }
133   for (int i = 0; i < node_count; ++i) {
134     SetOutputTensorDef(i, &tensordef[i]);
135   }
136   return success;
137 }
138 
hexagon_controller_InitInputNodeDataToInceptionDummyData(int version)139 void hexagon_controller_InitInputNodeDataToInceptionDummyData(int version) {
140   if (version == 1) {
141     if (USE_FLOAT_DATA) {
142       TFMLOGE("ERROR!!!! Do not use float data for v1");
143       return;
144     }
145     hexagon_controller_CopyByteNodeData(
146         0, INCEPTION_PARAM_BATCHES, INCEPTION_PARAM_HEIGHT_V1,
147         INCEPTION_PARAM_WIDTH_V1, INCEPTION_PARAM_DEPTH, 1,
148         inception_dummy_int_data_224x224);
149   } else if (version == 3) {
150     if (USE_FLOAT_DATA) {
151       hexagon_controller_CopyByteNodeData(
152           0, INCEPTION_PARAM_BATCHES, INCEPTION_PARAM_HEIGHT_V3,
153           INCEPTION_PARAM_WIDTH_V3, INCEPTION_PARAM_DEPTH, sizeof(float),
154           (uint8_t*)inception_dummy_float_data_299x299);
155     } else {
156       hexagon_controller_CopyByteNodeData(
157           0, INCEPTION_PARAM_BATCHES, INCEPTION_PARAM_HEIGHT_V3,
158           INCEPTION_PARAM_WIDTH_V3, INCEPTION_PARAM_DEPTH, 1,
159           inception_dummy_int_data_299x299);
160     }
161   }
162 }
163 
hexagon_controller_ExecuteGraphWithBuffer(uint32_t nn_id,bool show_ranking)164 bool hexagon_controller_ExecuteGraphWithBuffer(uint32_t nn_id,
165                                                bool show_ranking) {
166   const int input_node_count = GetInputNodeCount();
167   hexagon_nn_tensordef inputs[input_node_count];
168   const int output_node_count = GetOutputNodeCount();
169   if (output_node_count <= 0) {
170     TFMLOGI("Error output node count is 0.");
171     return false;
172   }
173   hexagon_nn_tensordef outputs[output_node_count];
174   hexagon_controller_SetAllInputTensorDef(input_node_count, inputs);
175   hexagon_controller_SetAllOutputTensorDef(output_node_count, outputs);
176   const bool success = hexagon_controller_ExecuteGraphWithMultipleInOut(
177       nn_id, input_node_count, inputs, output_node_count, outputs);
178   for (int i = 0; i < output_node_count; ++i) {
179     s_output_node_data_array_byte_size[i] = outputs[i].data_valid_len;
180   }
181 
182   const hexagon_nn_tensordef* output0 = &outputs[0];
183 
184   const uint32_t out_batches = output0->batches;
185   const uint32_t out_height = output0->height;
186   const uint32_t out_width = output0->width;
187   const uint32_t out_depth = output0->depth;
188   const uint32_t out_data_size = output0->data_valid_len;
189   const uint32_t out_buf_byte_size = output0->dataLen;
190 
191   if (!success) {
192     TFMLOGE("Execution failed");
193     DumpNNId(nn_id);
194     return false;
195   } else if (!show_ranking) {
196     return true;
197   }
198 
199   static const int OUT_RANKING_SIZE = 5;
200   int out_ranking[OUT_RANKING_SIZE];
201   hexagon_controller_PrintMaxNIdx(
202       (float*)s_output_node_data_buffer[0],
203       out_batches * out_height * out_width * out_depth, OUT_RANKING_SIZE,
204       out_ranking);
205   TFMLOGD("%d x %d x %d x %d, byte size = %d, buf size = %d\n", out_batches,
206           out_height, out_width, out_depth, out_data_size, out_buf_byte_size);
207   if (s_dbg_use_inception_dummy_data) {
208     // Check the result of inception with a dummy data. This step shouldn't
209     // be passed when show_ranking != true to avoid adding unnecessary
210     // additional computation cost.
211     if (out_ranking[0] == 169 && out_ranking[1] == 7) {
212       TFMLOGD("Result is correct! %d, %d", out_ranking[0], out_ranking[1]);
213       return true;
214     } else {
215       TFMLOGD("Result is wrong! %d, %d", out_ranking[0], out_ranking[1]);
216       return false;
217     }
218   }
219   return true;
220 }
221 
hexagon_controller_GetTargetGraphId()222 uint32_t hexagon_controller_GetTargetGraphId() { return s_target_graph_id; }
223 
hexagon_controller_SetTargetGraphId(uint32_t graph_id)224 void hexagon_controller_SetTargetGraphId(uint32_t graph_id) {
225   s_target_graph_id = graph_id;
226 }
227 
hexagon_controller_PrintGraph(uint32_t id)228 void hexagon_controller_PrintGraph(uint32_t id) {
229   int retval = hexagon_nn_snpprint(id, s_print_buf, PRINT_BUFSIZE);
230   TFMLOGD("PrintGraph %s\n", s_print_buf);
231   if (retval) {
232     TFMLOGE("Error on print graph\n");
233   }
234 }
235 
hexagon_controller_GetWrapperVersion()236 int hexagon_controller_GetWrapperVersion() {
237   return HEXAGON_CONTROLLER_VERSION;
238 }
239 
hexagon_controller_GetHexagonBinaryVersion()240 int hexagon_controller_GetHexagonBinaryVersion() {
241   int retval = 0;
242   hexagon_nn_version(&retval);
243   return retval;
244 }
245 
hexagon_controller_AllocateInputNodeDataBuffers(int port,int input_buf_byte_size)246 bool hexagon_controller_AllocateInputNodeDataBuffers(int port,
247                                                      int input_buf_byte_size) {
248   TFMLOGD("Allocate memory for input node data. port = %d, size = %d", port,
249           input_buf_byte_size);
250   if (s_input_node_data_buffer[port].max_buf_byte_size != 0) {
251     TFMLOGE("ERROR! input buffer is already allocated!!");
252     return false;
253   } else {
254     s_input_node_data_buffer[port].max_buf_byte_size = input_buf_byte_size;
255     posix_memalign((void**)&s_input_node_data_buffer[port].byte_array_data, 128,
256                    input_buf_byte_size);
257     TFMLOGD("allocate input node data buffers done");
258   }
259   return true;
260 }
261 
hexagon_controller_AllocateOutputNodeDataBuffers(int port,int output_buf_byte_size)262 bool hexagon_controller_AllocateOutputNodeDataBuffers(
263     int port, int output_buf_byte_size) {
264   TFMLOGD("Allocate memory for output node data. port = %d, size = %d", port,
265           output_buf_byte_size);
266   if (s_output_node_data_buffer_max_byte_size[port] != 0) {
267     TFMLOGE("ERROR! input buffer is already allocated!!");
268     return false;
269   } else {
270     // s_output_node_data_buffer = malloc(output_size * sizeof(float));
271     posix_memalign((void**)&s_output_node_data_buffer[port], 128,
272                    output_buf_byte_size);
273     s_output_node_data_buffer_max_byte_size[port] = output_buf_byte_size;
274     s_output_node_data_array_byte_size[port] = 0;
275     TFMLOGD("allocate output node data buffers");
276   }
277   return true;
278 }
279 
hexagon_controller_AllocateMultipleNodeDataBuffers(int input_count,int * input_sizes,int output_count,int * output_sizes)280 bool hexagon_controller_AllocateMultipleNodeDataBuffers(int input_count,
281                                                         int* input_sizes,
282                                                         int output_count,
283                                                         int* output_sizes) {
284   bool success = true;
285   for (int i = 0; i < input_count; ++i) {
286     success &=
287         hexagon_controller_AllocateInputNodeDataBuffers(i, input_sizes[i]);
288   }
289   for (int i = 0; i < output_count; ++i) {
290     success &=
291         hexagon_controller_AllocateOutputNodeDataBuffers(i, output_sizes[i]);
292   }
293 
294   if (s_dbg_use_inception_dummy_data) {
295     hexagon_controller_InitInputNodeDataToInceptionDummyData(
296         s_dbg_inception_version);
297   }
298   return success;
299 }
300 
hexagon_controller_AllocateNodeDataBuffers(int input_size,int output_size)301 bool hexagon_controller_AllocateNodeDataBuffers(int input_size,
302                                                 int output_size) {
303   return hexagon_controller_AllocateMultipleNodeDataBuffers(1, &input_size, 1,
304                                                             &output_size);
305 }
306 
hexagon_controller_ReleaseInputNodeDataBuffersWithPort(int port)307 bool hexagon_controller_ReleaseInputNodeDataBuffersWithPort(int port) {
308   struct NodeDataFloat* input_node_data_buffer =
309       &s_input_node_data_buffer[port];
310   if (input_node_data_buffer->max_buf_byte_size == 0) {
311     TFMLOGE("ERROR! input buffer has not been allocated yet!!");
312     return false;
313   } else {
314     input_node_data_buffer->max_buf_byte_size = 0;
315     input_node_data_buffer->array_byte_size = 0;
316     free(input_node_data_buffer->byte_array_data);
317   }
318   return true;
319 }
320 
hexagon_controller_ReleaseOutputNodeDataBuffersWithPort(int port)321 bool hexagon_controller_ReleaseOutputNodeDataBuffersWithPort(int port) {
322   if (s_output_node_data_buffer_max_byte_size[port] == 0) {
323     TFMLOGE("ERROR! output buffer has not been allocated yet!!");
324     return false;
325   } else {
326     s_output_node_data_buffer_max_byte_size[port] = 0;
327     s_output_node_data_array_byte_size[port] = 0;
328     free(s_output_node_data_buffer[port]);
329   }
330   return true;
331 }
332 
hexagon_controller_ReleaseNodeDataBuffers()333 bool hexagon_controller_ReleaseNodeDataBuffers() {
334   bool success = true;
335   for (int i = 0; i < GetInputNodeCount(); ++i) {
336     success &= hexagon_controller_ReleaseInputNodeDataBuffersWithPort(i);
337   }
338   for (int i = 0; i < GetOutputNodeCount(); ++i) {
339     success &= hexagon_controller_ReleaseOutputNodeDataBuffersWithPort(i);
340   }
341   return success;
342 }
343 
hexagon_controller_CopyByteNodeData(int port,int x,int y,int z,int d,int type_byte_size,uint8_t * array_data)344 bool hexagon_controller_CopyByteNodeData(int port, int x, int y, int z, int d,
345                                          int type_byte_size,
346                                          uint8_t* array_data) {
347   int array_byte_size = x * y * z * d * type_byte_size;
348   TFMLOGD("--- %d, %d, %d, %d, %d, %d", x, y, z, d, type_byte_size,
349           array_byte_size);
350   struct NodeDataFloat* input_node_data_buffer = &s_input_node_data_buffer[0];
351 
352   if (input_node_data_buffer->max_buf_byte_size < array_byte_size) {
353     TFMLOGE("ERROR! input buffer size is too small! %d < %d",
354             input_node_data_buffer->max_buf_byte_size, array_byte_size);
355     return false;
356   }
357   memcpy(input_node_data_buffer->byte_array_data, array_data, array_byte_size);
358   input_node_data_buffer->array_byte_size = array_byte_size;
359   input_node_data_buffer->x = x;
360   input_node_data_buffer->y = y;
361   input_node_data_buffer->z = z;
362   input_node_data_buffer->d = d;
363   return true;
364 }
365 
hexagon_controller_InitHexagonWithMaxAttributes(int enable_dcvs,int bus_usage,int version)366 int hexagon_controller_InitHexagonWithMaxAttributes(int enable_dcvs,
367                                                     int bus_usage,
368                                                     int version) {
369   TFMLOGI("Init hexagon with max attributes (Controller version = %d)",
370           HEXAGON_CONTROLLER_VERSION);
371   const int MCPS = 1000;
372   const int MBPS = 12000;
373 
374   adspmsgd_start(0, RPCMEM_HEAP_DEFAULT, 4096);
375 
376   dspCV_Attribute attrib[] = {
377       // The below values will result in the maximum aDSP performance,
378       // at Turbo voltage.
379       // Slightly more MCPS than are available on current targets
380       {DSP_TOTAL_MCPS, MCPS},
381       // drive the clock to MAX on known targets
382       {DSP_MCPS_PER_THREAD, MCPS / 2},
383       // 12 GB/sec is slightly higher than the max realistic
384       // max BW on existing targets.
385       {PEAK_BUS_BANDWIDTH_MBPS, MBPS},
386       // This app is non-real time, and constantly reading/writing memory
387       {BUS_USAGE_PERCENT, bus_usage},
388   };
389   int retval = 0;
390   if (!enable_dcvs) {
391     retval = hexagon_nn_disable_dcvs();
392     if (retval) {
393       TFMLOGE("Failed to disable DSP DCVS: %x\n", retval);
394     }
395   }
396 
397   retval =
398       dspCV_initQ6_with_attributes(attrib, sizeof(attrib) / sizeof(attrib[0]));
399   TFMLOGD("Return value from dspCV_initQ6() : %d\n", retval);
400 
401   s_target_graph_id = 0;
402   s_dbg_inception_version = version;
403 
404   return retval;
405 }
406 
hexagon_controller_DeInitHexagon()407 int hexagon_controller_DeInitHexagon() {
408   adspmsgd_stop();
409   TFMLOGI("Finalize hexagon");
410   const int retval = dspCV_deinitQ6();
411   TFMLOGD("return value from dspCV_deinitQ6(): %d \n", retval);
412 
413   hexagon_controller_ReleaseNodeDataBuffers();
414 
415   return retval;
416 }
417 
hexagon_controller_GrowMemorySize()418 void hexagon_controller_GrowMemorySize() { hexagon_nn_config(); }
419 
hexagon_controller_GetInputNodeDataBuffer(int port)420 struct NodeDataFloat* hexagon_controller_GetInputNodeDataBuffer(int port) {
421   if (port >= GetInputNodeCount()) {
422     TFMLOGE("port should be less than 1");
423   }
424   return &s_input_node_data_buffer[port];
425 }
426 
hexagon_controller_GetOutputNodeDataBuffer(int port,int * out_array_byte_size)427 uint8_t* hexagon_controller_GetOutputNodeDataBuffer(int port,
428                                                     int* out_array_byte_size) {
429   if (port >= GetOutputNodeCount()) {
430     TFMLOGE("port should be less than 1");
431   }
432   *out_array_byte_size = s_output_node_data_array_byte_size[port];
433   return s_output_node_data_buffer[port];
434 }
435 
436 // Append const node to the graph
hexagon_controller_AppendConstNode(const char * const name,int graph_id,int node_id,int batch,int height,int width,int depth,const uint8_t * const data,int data_length)437 int hexagon_controller_AppendConstNode(const char* const name, int graph_id,
438                                        int node_id, int batch, int height,
439                                        int width, int depth,
440                                        const uint8_t* const data,
441                                        int data_length) {
442   if (DBG_SHOW_ID) {
443     TFMLOGV("---(CONST) %s, %d, %d, %d, %d, %d, %d", name, node_id, batch,
444             height, width, depth, data_length);
445   } else {
446     TFMLOGV("---(CONST) %s, %d, %d, %d, %d, %d", name, batch, height, width,
447             depth, data_length);
448   }
449   const int retval = hexagon_nn_append_const_node(
450       graph_id, node_id, batch, height, width, depth, data, data_length);
451   if (retval != 0) {
452     TFMLOGE("Failed to append const node %d", node_id);
453     return retval;
454   }
455   return retval;
456 }
457 
458 // Append node to the graph
hexagon_controller_AppendNode(const char * const name,int graph_id,int node_id,int ops_id,int padding_id,const hexagon_nn_input * const inputs,int inputs_count,const hexagon_nn_output * const outputs,int outputs_count)459 int hexagon_controller_AppendNode(const char* const name, int graph_id,
460                                   int node_id, int ops_id, int padding_id,
461                                   const hexagon_nn_input* const inputs,
462                                   int inputs_count,
463                                   const hexagon_nn_output* const outputs,
464                                   int outputs_count) {
465   char input_param_buf[OUTPUT_PARAM_MAX_LINE_SIZE];
466   memset(input_param_buf, 0, OUTPUT_PARAM_MAX_LINE_SIZE);
467   int pos = 0;
468   pos += snprintf(&input_param_buf[pos], 500, "in: ");
469   for (int i = 0; i < inputs_count; ++i) {
470     if (DBG_SHOW_ID) {
471       pos += snprintf(&input_param_buf[pos], 500, "(%d, %d), ",
472                       inputs[i].src_id, inputs[i].output_idx);
473     } else {
474       pos +=
475           snprintf(&input_param_buf[pos], 500, "(%d), ", inputs[i].output_idx);
476     }
477   }
478 
479   char output_param_buf[OUTPUT_PARAM_MAX_LINE_SIZE];
480   memset(output_param_buf, 0, OUTPUT_PARAM_MAX_LINE_SIZE);
481   pos = 0;
482   pos += snprintf(&output_param_buf[pos], 500, "out: ");
483   for (int i = 0; i < outputs_count; ++i) {
484     pos += snprintf(&output_param_buf[pos], 500, "(%d), ", outputs[i].max_size);
485   }
486 
487   if (DBG_SHOW_ID) {
488     TFMLOGV("---(OP) %s, %d, %d, %d, %d, %d, %s, %s", name, node_id, ops_id,
489             padding_id, inputs_count, outputs_count, input_param_buf,
490             output_param_buf);
491   } else {
492     TFMLOGV("---(OP) %s, %d, %d, %d, %d, %s, %s", name, ops_id, padding_id,
493             inputs_count, outputs_count, input_param_buf, output_param_buf);
494   }
495   const int retval =
496       hexagon_nn_append_node(graph_id, node_id, ops_id, padding_id, inputs,
497                              inputs_count, outputs, outputs_count);
498   if (retval != 0) {
499     TFMLOGE("Failed to append const node %d", node_id);
500     return retval;
501   }
502   return retval;
503 }
504 
hexagon_controller_EnableDbgUseInceptionDummyData(bool enable)505 void hexagon_controller_EnableDbgUseInceptionDummyData(bool enable) {
506   s_dbg_use_inception_dummy_data = enable;
507 }
508 
hexagon_controller_IsDbgUseInceptionDummyDataEnabled()509 bool hexagon_controller_IsDbgUseInceptionDummyDataEnabled() {
510   return s_dbg_use_inception_dummy_data;
511 }
512