1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 // to demonstrate the performance difference between ION and HLOS memory
17 // for sharing with ADSP.
18 #define USE_ION_MEMORY
19
20 #include <limits.h>
21 #include <stdio.h>
22
23 #include "hexagon_controller.h"
24 #include "hexagon_nn.h"
25 #include "tfm_log.h"
26
27 static const uint32_t MAX_NODES = 2048;
28 static const uint32_t MAX_EVENT_COUNT = 256;
29
30 static const bool DUMP_OUTPUT = false;
31 static const bool DBG_EXECUTION = true;
32
33 static const int OUT_RANKING_SIZE = 5;
34
35 // static only for this file.
36 // TODO(satok): allocate dynamically
37 static float s_output_values[300 * 300 * 3 * 4];
38
39 extern void init_graph(uint32_t id);
40 extern void init_graph_v1(uint32_t id);
41 extern uint8_t inception_dummy_int_data_299x299[];
42 extern uint8_t inception_dummy_int_data_224x224[];
43 extern float inception_dummy_float_data_299x299[];
44
45 enum InceptionVersion {
46 INCEPTION_V1,
47 INCEPTION_V3,
48 };
49
50 static enum InceptionVersion s_inception_version = INCEPTION_V3;
51
52 /////////////////////////////////////////////////
53 // file local functions
54
ConvertGraphInfoIdToName(unsigned int id)55 static const char* ConvertGraphInfoIdToName(unsigned int id) {
56 // TODO(satok): implement
57 return "?";
58 }
59
ConvertGraphInfoIdToOpName(unsigned int id)60 static const char* ConvertGraphInfoIdToOpName(unsigned int id) {
61 // TODO(satok): implement
62 return "?";
63 }
64
65 /////////////////////////////////////////////////
66 // file local utilities
FindMaxIdxWithExcludeList(const float * data,uint32_t entries,const int exclude_size,const int * exclude_idx)67 static uint32_t FindMaxIdxWithExcludeList(const float* data, uint32_t entries,
68 const int exclude_size,
69 const int* exclude_idx) {
70 int i;
71 float maxval = data[0];
72 int maxidx = 0;
73 for (i = 0; i < entries; i++) {
74 bool exclude = false;
75 for (int j = 0; j < exclude_size; ++j) {
76 if (exclude_idx[j] == i) {
77 exclude = true;
78 break;
79 }
80 }
81 if (exclude) {
82 continue;
83 }
84 if (maxval < data[i]) {
85 maxval = data[i];
86 maxidx = i;
87 }
88 }
89 return maxidx;
90 }
91
FindMaxIdx(const float * data,uint32_t entries)92 static uint32_t FindMaxIdx(const float* data, uint32_t entries) {
93 return FindMaxIdxWithExcludeList(data, entries, 0, NULL);
94 }
95
hexagon_controller_PrintMaxNIdx(const float * data,const uint32_t entries,const int n,int * out_ranking)96 void hexagon_controller_PrintMaxNIdx(const float* data, const uint32_t entries,
97 const int n, int* out_ranking) {
98 if (DUMP_OUTPUT) {
99 for (int i = 0; i < entries; ++i) {
100 TFMLOGD("%d: val = %f", i, data[i]);
101 }
102 }
103 if (n >= entries) {
104 TFMLOGD("Too many N %d >= %d", n, entries);
105 }
106 for (int i = 0; i < n; ++i) {
107 out_ranking[i] = INT_MAX;
108 }
109 for (int i = 0; i < n; ++i) {
110 out_ranking[i] = FindMaxIdxWithExcludeList(data, entries, n, out_ranking);
111 }
112 TFMLOGD("=== RANKING ===");
113 for (int i = 0; i < n; ++i) {
114 TFMLOGD("%d: id = %d, val = %f", i, out_ranking[i], data[out_ranking[i]]);
115 }
116 }
117
GetCounter(hexagon_nn_perfinfo s)118 static inline unsigned long long int GetCounter(hexagon_nn_perfinfo s) {
119 unsigned long long int ret;
120 ret = s.counter_hi;
121 ret <<= 32;
122 ret |= s.counter_lo;
123 return ret;
124 }
125
CompareCycle(const void * va,const void * vb)126 static int CompareCycle(const void* va, const void* vb) {
127 const hexagon_nn_perfinfo* a = va;
128 const hexagon_nn_perfinfo* b = vb;
129 unsigned long long int acount = GetCounter(*a);
130 unsigned long long int bcount = GetCounter(*b);
131 if (acount < bcount) {
132 return -1;
133 } else if (acount > bcount) {
134 return 1;
135 } else {
136 return 0;
137 }
138 }
139
140 /////////////////////////////////////////////////
141 // Graph functions
142
hexagon_controller_InstantiateGraph()143 uint32_t hexagon_controller_InstantiateGraph() {
144 const uint32_t nn_id = hexagon_nn_init();
145 // TODO(satok): make this as argument
146 hexagon_nn_set_debug_level(nn_id, 0);
147 return nn_id;
148 }
149
hexagon_controller_InitGraph(int version,uint32_t nn_id)150 void hexagon_controller_InitGraph(int version, uint32_t nn_id) {
151 if (version == 1) {
152 s_inception_version = INCEPTION_V1;
153 } else if (version == 3) {
154 s_inception_version = INCEPTION_V3;
155 } else {
156 TFMLOGE("Unsupported inception version %d", version);
157 return;
158 }
159 if (s_inception_version == INCEPTION_V3) {
160 init_graph(nn_id);
161 } else if (s_inception_version == INCEPTION_V1) {
162 init_graph_v1(nn_id);
163 }
164 TFMLOGD("Init graph (inception version = %d) done.", version);
165 }
166
hexagon_controller_ConstructGraph(uint32_t nn_id)167 bool hexagon_controller_ConstructGraph(uint32_t nn_id) {
168 int err;
169 if ((err = hexagon_nn_prepare(nn_id)) != 0) {
170 TFMLOGE("Prepare failed! returned 0x%x\n", err);
171 DumpNNId(nn_id);
172 return false;
173 } else {
174 TFMLOGD("Prepare success!\n");
175 return true;
176 }
177 }
178
hexagon_controller_SetupGraph(int version)179 uint32_t hexagon_controller_SetupGraph(int version) {
180 const uint32_t nn_id = hexagon_controller_InstantiateGraph();
181 hexagon_controller_InitGraph(version, nn_id);
182 hexagon_controller_ConstructGraph(nn_id);
183 return nn_id;
184 }
185
hexagon_controller_ExecuteGraphWithMultipleInOut(const uint32_t nn_id,const int input_count,hexagon_nn_tensordef * inputs,const int output_count,hexagon_nn_tensordef * outputs)186 bool hexagon_controller_ExecuteGraphWithMultipleInOut(
187 const uint32_t nn_id, const int input_count, hexagon_nn_tensordef* inputs,
188 const int output_count, hexagon_nn_tensordef* outputs) {
189 if (DBG_EXECUTION) {
190 TFMLOGD("Preparing to execute... in = %d, out = %d", input_count,
191 output_count);
192 LogDHexagon("Execute graph!");
193 }
194
195 const int err =
196 hexagon_nn_execute_new(nn_id, inputs, input_count, outputs, output_count);
197 if (err != 0) {
198 if (DBG_EXECUTION) {
199 LogDHexagon("Execution failed!");
200 TFMLOGE("execute got err: %d\n", err);
201 DumpNNId(nn_id);
202 }
203 return false;
204 } else {
205 if (DBG_EXECUTION) {
206 LogDHexagon("Execution succeeded!");
207 }
208 return true;
209 }
210 }
211
hexagon_controller_ExecuteGraph(const uint32_t nn_id,const uint32_t batches,const uint32_t height,const uint32_t width,const uint32_t depth,uint8_t * int_data,const uint32_t int_data_size,uint32_t * out_batches,uint32_t * out_height,uint32_t * out_width,uint32_t * out_depth,uint8_t * out_vals,const uint32_t output_val_byte_size,uint32_t * out_data_byte_size)212 bool hexagon_controller_ExecuteGraph(
213 const uint32_t nn_id, const uint32_t batches, const uint32_t height,
214 const uint32_t width, const uint32_t depth, uint8_t* int_data,
215 const uint32_t int_data_size, uint32_t* out_batches, uint32_t* out_height,
216 uint32_t* out_width, uint32_t* out_depth, uint8_t* out_vals,
217 const uint32_t output_val_byte_size, uint32_t* out_data_byte_size) {
218 if (DBG_EXECUTION) {
219 TFMLOGD("Preparing to execute...");
220 TFMLOGD("Input: %d, %d, %d, %d, %d, %d", batches, height, width, depth,
221 int_data[0], int_data_size);
222 TFMLOGD("Output: %d, %p", output_val_byte_size, out_vals);
223 LogDHexagon("Execute graph!");
224 }
225
226 hexagon_nn_tensordef input;
227 hexagon_nn_tensordef output;
228
229 input.batches = batches;
230 input.height = height;
231 input.width = width;
232 input.depth = depth;
233 input.data = int_data;
234 input.dataLen = int_data_size;
235
236 output.data = out_vals;
237 output.dataLen = output_val_byte_size;
238
239 if (!hexagon_controller_ExecuteGraphWithMultipleInOut(nn_id, 1, &input, 1,
240 &output)) {
241 return false;
242 } else {
243 *out_batches = output.batches;
244 *out_height = output.height;
245 *out_width = output.width;
246 *out_depth = output.depth;
247 *out_data_byte_size = output.dataLen;
248
249 if (DBG_EXECUTION) {
250 LogDHexagon("Execution succeeded!");
251 TFMLOGD("%d x %d x %d x %d, byte size = %d\n", *out_batches, *out_height,
252 *out_width, *out_depth, *out_data_byte_size);
253 }
254 return true;
255 }
256 }
257
hexagon_controller_ExecuteInceptionDummyData(uint32_t nn_id)258 bool hexagon_controller_ExecuteInceptionDummyData(uint32_t nn_id) {
259 uint32_t out_batches, out_height, out_width, out_depth;
260 uint32_t out_data_size;
261 // s_output_values = 300 * 300 * 3 * 4 * 4
262 const bool success = hexagon_controller_ExecuteGraph(
263 nn_id, INCEPTION_PARAM_BATCHES, INCEPTION_PARAM_HEIGHT_V3,
264 INCEPTION_PARAM_WIDTH_V3, INCEPTION_PARAM_DEPTH,
265 (uint8_t*)inception_dummy_int_data_299x299,
266 INCEPTION_PARAM_HEIGHT_V3 * INCEPTION_PARAM_WIDTH_V3 *
267 INCEPTION_PARAM_DEPTH,
268 &out_batches, &out_height, &out_width, &out_depth,
269 (uint8_t*)s_output_values, sizeof(s_output_values), &out_data_size);
270 if (success) {
271 int out_ranking[OUT_RANKING_SIZE];
272 hexagon_controller_PrintMaxNIdx(
273 s_output_values, out_batches * out_height * out_width * out_depth,
274 OUT_RANKING_SIZE, out_ranking);
275 TFMLOGD("%d x %d x %d x %d, size = %d\n", out_batches, out_height,
276 out_width, out_depth, out_data_size);
277 TFMLOGD("max idx: %d\n",
278 FindMaxIdx(s_output_values,
279 out_batches * out_height * out_width * out_depth));
280 if (out_ranking[0] == 169 && out_ranking[1] == 7) {
281 return true;
282 } else {
283 TFMLOGD("Result is wrong! %d, %d", out_ranking[0], out_ranking[1]);
284 return false;
285 }
286 } else {
287 return false;
288 }
289 }
290
hexagon_controller_DumpPerf(uint32_t nn_id)291 void hexagon_controller_DumpPerf(uint32_t nn_id) {
292 hexagon_nn_perfinfo info[MAX_NODES];
293 unsigned long long int total_cycles = 0;
294 unsigned long long int cum_cycles = 0;
295 unsigned long long int counter = 0;
296 unsigned int n_nodes;
297 int i;
298 TFMLOGD("Perf dump follows:");
299 if (hexagon_nn_get_perfinfo(nn_id, info, MAX_NODES, &n_nodes) != 0) {
300 TFMLOGE("perf info failure");
301 return;
302 }
303 TFMLOGD("Total %d nodes.", n_nodes);
304 qsort(info, n_nodes, sizeof(info[0]), CompareCycle);
305 for (i = 0; i < n_nodes; i++) {
306 total_cycles += GetCounter(info[i]);
307 }
308 TFMLOGD("Total %lld cycles.", total_cycles);
309 for (i = 0; i < n_nodes; i++) {
310 counter = GetCounter(info[i]);
311 cum_cycles += counter;
312 TFMLOGD(
313 "node,0x%x,%s,%s,executions,%d,cycles,%lld,%f %%,"
314 "cum_cycles,%lld,%f %%\n",
315 info[i].node_id, ConvertGraphInfoIdToName(info[i].node_id),
316 ConvertGraphInfoIdToOpName(info[i].node_id), info[i].executions,
317 counter, 100 * ((double)counter) / total_cycles, cum_cycles,
318 100 * ((double)cum_cycles) / total_cycles);
319 }
320 #ifdef ENABLE_HVX_FULL_DEBUG
321 DumpAllPerf(nn_id);
322 #endif
323 }
324
hexagon_controller_DumpNodeName(uint32_t nn_id)325 void hexagon_controller_DumpNodeName(uint32_t nn_id) {
326 TFMLOGD("Show node name");
327 const uint32_t id = nn_id;
328 hexagon_nn_perfinfo info[MAX_NODES];
329 unsigned long long int total_cycles = 0;
330 unsigned long long int cum_cycles = 0;
331 unsigned long long int counter = 0;
332 unsigned int node_count;
333 int i;
334 TFMLOGD("Perf dump follows:");
335 if (hexagon_nn_get_perfinfo(id, info, MAX_NODES, &node_count) != 0) {
336 TFMLOGD("perf info failure");
337 return;
338 }
339 TFMLOGD("Total %d nodes.", node_count);
340 qsort(info, node_count, sizeof(info[0]), CompareCycle);
341 for (i = 0; i < node_count; i++) {
342 total_cycles += GetCounter(info[i]);
343 }
344 TFMLOGD("Total %lld cycles.", total_cycles);
345 for (i = 0; i < node_count; i++) {
346 counter = GetCounter(info[i]);
347 cum_cycles += counter;
348 TFMLOGD(
349 "node,0x%x,%s,%s,executions,%d,cycles,%lld,%f %%,"
350 "cum_cycles,%lld,%f %%",
351 info[i].node_id, ConvertGraphInfoIdToName(info[i].node_id),
352 ConvertGraphInfoIdToOpName(info[i].node_id), info[i].executions,
353 counter, 100 * ((double)counter) / total_cycles, cum_cycles,
354 100 * ((double)cum_cycles) / total_cycles);
355 }
356 }
357
hexagon_controller_Teardown(uint32_t nn_id)358 void hexagon_controller_Teardown(uint32_t nn_id) { hexagon_nn_teardown(nn_id); }
359