1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/lite/delegates/hexagon/hexagon_delegate_kernel.h"
16
17 #include <vector>
18
19 #include "tensorflow/lite/builtin_ops.h"
20 #include "tensorflow/lite/c/builtin_op_data.h"
21 #include "tensorflow/lite/c/common.h"
22 #include "tensorflow/lite/context_util.h"
23 #include "tensorflow/lite/delegates/hexagon/hexagon_implementation.h"
24 #include "tensorflow/lite/delegates/hexagon/utils.h"
25 #include "tensorflow/lite/kernels/kernel_util.h"
26
27 namespace tflite {
28
29 namespace {
30 // Returns uint64 representing total cycles in 'perf_info' by
31 // combining lo and hi counters.
GetCycles(const hexagon_nn_perfinfo & perf_info)32 inline uint64_t GetCycles(const hexagon_nn_perfinfo& perf_info) {
33 uint64_t res = perf_info.counter_hi;
34 res <<= 32;
35 res |= perf_info.counter_lo;
36 return res;
37 }
38 } // namespace
39
ReportError(TfLiteContext * context,const std::string & msg)40 void HexagonDelegateKernel::ReportError(TfLiteContext* context,
41 const std::string& msg) {
42 PrintLog();
43 TF_LITE_KERNEL_LOG(context, "Failed: %s.", msg.c_str());
44 }
45
Init(TfLiteContext * context,const TfLiteDelegateParams * params)46 TfLiteStatus HexagonDelegateKernel::Init(TfLiteContext* context,
47 const TfLiteDelegateParams* params) {
48 hexagon_nn_ = HexagonNNImplementation();
49 if (hexagon_nn_ == nullptr) {
50 TF_LITE_KERNEL_LOG(context, "Hexagon interface not available.");
51 return kTfLiteError;
52 }
53
54 // Ensure Hexagon NNLib is ready to start working.
55 int error = hexagon_nn_->hexagon_nn_config();
56 if (error != 0) {
57 TF_LITE_KERNEL_LOG(context, "hexagon_nn_config failed. Error: %d", error);
58 return kTfLiteError;
59 }
60
61 // Initialize an empty graph.
62 error = hexagon_nn_->hexagon_nn_init(&graph_id_);
63 if (error != 0) {
64 ReportError(context, "failed to init");
65 return kTfLiteError;
66 }
67 error =
68 hexagon_nn_->hexagon_nn_set_debug_level(graph_id_, params_.debug_level);
69 if (error != 0) {
70 TF_LITE_KERNEL_LOG(context, "Failed to set debug level, error: %d", error);
71 return kTfLiteError;
72 }
73 error = hexagon_nn_->hexagon_nn_set_powersave_level(params_.powersave_level);
74 if (error != 0) {
75 TF_LITE_KERNEL_LOG(context, "Failed to set powersave level, error %d",
76 error);
77 return kTfLiteError;
78 }
79
80 for (auto node_index : TfLiteIntArrayView(params->nodes_to_replace)) {
81 nodes_.push_back(node_index);
82 }
83
84 TF_LITE_ENSURE_STATUS(
85 BuildGraph(context, params->input_tensors, params->output_tensors));
86 return kTfLiteOk;
87 }
88
Eval(TfLiteContext * context,TfLiteNode * node)89 TfLiteStatus HexagonDelegateKernel::Eval(TfLiteContext* context,
90 TfLiteNode* node) {
91 if (hexagon_nn_ == nullptr) {
92 TF_LITE_KERNEL_LOG(context, "Hexagon interface not available.");
93 return kTfLiteError;
94 }
95 // Allocate inputs.
96 std::vector<hexagon_nn_tensordef> input_tensors;
97 for (int input_idx = 0; input_idx < node->inputs->size; ++input_idx) {
98 const auto tensor_index = node->inputs->data[input_idx];
99 if (tensor_index == kTfLiteOptionalTensor) {
100 continue;
101 }
102 TfLiteTensor* tensor = &context->tensors[tensor_index];
103 // Const tensors should have been handled at delegation time..
104 if (tensor->allocation_type != kTfLiteMmapRo) {
105 char* data_ptr = tensor->data.raw;
106
107 if (tensor->dims->size > 4) {
108 ReportError(context, "Only up to 4d tensor are supported.");
109 return kTfLiteError;
110 }
111 input_tensors.emplace_back();
112 auto& input_tensor = input_tensors.back();
113 input_tensor.data = reinterpret_cast<unsigned char*>(data_ptr);
114 input_tensor.dataLen = tensor->bytes;
115 input_tensor.data_valid_len = tensor->bytes;
116 TF_LITE_ENSURE_STATUS(
117 Get4DShape(&input_tensor.batches, &input_tensor.height,
118 &input_tensor.width, &input_tensor.depth, tensor->dims));
119 }
120 }
121
122 // Allocate outputs.
123 std::vector<hexagon_nn_tensordef> output_tensors;
124 for (auto tensor_index : TfLiteIntArrayView(node->outputs)) {
125 if (tensor_index == kTfLiteOptionalTensor) {
126 continue;
127 }
128 TfLiteTensor* tensor = &context->tensors[tensor_index];
129 if (tensor->allocation_type != kTfLiteMmapRo) {
130 if (tensor->dims->size > 4) {
131 ReportError(context, "Only up to 4d tensor are supported.");
132 return kTfLiteError;
133 }
134 output_tensors.emplace_back();
135 auto& output_tensor = output_tensors.back();
136 output_tensor.data = reinterpret_cast<unsigned char*>(tensor->data.raw);
137 output_tensor.dataLen = tensor->bytes;
138 }
139 }
140
141 if (params_.print_graph_profile) {
142 hexagon_nn_->hexagon_nn_reset_perfinfo(graph_id_, 0);
143 }
144
145 // Execute.
146 int error = hexagon_nn_->hexagon_nn_execute_new(
147 graph_id_, input_tensors.data(), input_tensors.size(),
148 output_tensors.data(), output_tensors.size());
149 if (error != 0) {
150 ReportError(context, "Failed to execute graph.");
151 return kTfLiteError;
152 }
153
154 if (params_.print_graph_profile) {
155 PrintPerformanceData(reinterpret_cast<Profiler*>(context->profiler));
156 }
157 return kTfLiteOk;
158 }
159
ResizeOutputTensors(TfLiteContext * context,TfLiteNode * node)160 TfLiteStatus HexagonDelegateKernel::ResizeOutputTensors(TfLiteContext* context,
161 TfLiteNode* node) {
162 if (!params_.enable_dynamic_batch_size) return kTfLiteError;
163 int new_batch = -1;
164 for (int i = 0; i < params_.input_batch_dimensions->size; ++i) {
165 // If this input has no dynamic shape skip it.
166 if (params_.input_batch_dimensions->data[i] == -1) continue;
167 int input_tensor_index = node->inputs->data[i];
168 TfLiteTensor* input_tensor = &context->tensors[input_tensor_index];
169 new_batch =
170 input_tensor->dims->data[params_.input_batch_dimensions->data[i]];
171 break;
172 }
173 if (new_batch == -1) {
174 TF_LITE_KERNEL_LOG(context, "Invalid Batch size.");
175 return kTfLiteError;
176 }
177 for (int i = 0; i < node->outputs->size; ++i) {
178 // If this output has no dynamic shape skip it.
179 if (params_.output_batch_dimensions->data[i] == -1) continue;
180 int output_tensor_index = node->outputs->data[i];
181 TfLiteTensor* output_tensor = &context->tensors[output_tensor_index];
182 TfLiteIntArray* new_shape = TfLiteIntArrayCopy(output_tensor->dims);
183 new_shape->data[params_.output_batch_dimensions->data[i]] = new_batch;
184 TF_LITE_ENSURE_OK(context,
185 context->ResizeTensor(context, output_tensor, new_shape));
186 }
187 return kTfLiteOk;
188 }
189
Prepare(TfLiteContext * context,TfLiteNode * node)190 TfLiteStatus HexagonDelegateKernel::Prepare(TfLiteContext* context,
191 TfLiteNode* node) {
192 if (graph_prepared_) {
193 if (!params_.enable_dynamic_batch_size)
194 TF_LITE_KERNEL_LOG(context, "Calling prepare multiple times");
195 // Graph already prepared, but we must resize TFLite output tensors
196 // based on the new input shape.
197 return ResizeOutputTensors(context, node);
198 }
199 if (hexagon_nn_ == nullptr) {
200 ReportError(context, "Hexagon interface not available. prepare");
201 return kTfLiteError;
202 }
203 int status = hexagon_nn_->hexagon_nn_prepare(graph_id_);
204 if (status != 0) {
205 ReportError(context, "Failed to prepare graph.\n");
206 return kTfLiteError;
207 }
208
209 // Check input/output tensors.
210 std::vector<int> tensors;
211 for (auto tensor_index : TfLiteIntArrayView(node->inputs)) {
212 tensors.push_back(tensor_index);
213 }
214 for (auto tensor_index : TfLiteIntArrayView(node->outputs)) {
215 tensors.push_back(tensor_index);
216 }
217 for (auto tensor_index : tensors) {
218 if (tensor_index == kTfLiteOptionalTensor) {
219 continue;
220 }
221 TfLiteTensor* tensor = &context->tensors[tensor_index];
222 // Const tensors should be added as const nodes during graph construction.
223 if (tensor->allocation_type != kTfLiteMmapRo && tensor->dims->size > 4) {
224 ReportError(context, "Only up to 4d tensor are supported.");
225 return kTfLiteError;
226 }
227 }
228
229 if (params_.print_graph_debug) {
230 PrintDebuggingGraph();
231 }
232
233 // Mark graph as prepared, since we can't prepare it multiple times.
234 graph_prepared_ = true;
235
236 return kTfLiteOk;
237 }
238
BuildGraph(TfLiteContext * context,const TfLiteIntArray * input_tensors,const TfLiteIntArray * output_tensors)239 TfLiteStatus HexagonDelegateKernel::BuildGraph(
240 TfLiteContext* context, const TfLiteIntArray* input_tensors,
241 const TfLiteIntArray* output_tensors) {
242 builder_.reset(
243 new delegates::hexagon::GraphBuilder(hexagon_nn_, context, graph_id_));
244 if (params_.enable_dynamic_batch_size) {
245 builder_->AddBatchSeqConfig(params_.max_batch_size,
246 params_.input_batch_dimensions,
247 params_.output_batch_dimensions);
248 }
249 // Add inputs to the graph.
250 TF_LITE_ENSURE_STATUS(builder_->AddInputTensors(input_tensors, context));
251
252 // Add all ops.
253 TfLiteNode* node;
254 TfLiteRegistration* reg;
255 for (int node_index : nodes_) {
256 TF_LITE_ENSURE_STATUS(
257 context->GetNodeAndRegistration(context, node_index, &node, ®));
258 // Const inputs needs to be added to the hexagon graph as const nodes.
259 // Adding them earlier here to the graph
260 // - Simplifies separate builders
261 // - Simplifies int8 vs uint8 cases, builders don't need to handle them.
262 for (int i = 0; i < node->inputs->size; ++i) {
263 const int tensor_id = node->inputs->data[i];
264 if (tensor_id == -1) continue;
265 const auto& input_tensor = context->tensors[tensor_id];
266 if (input_tensor.allocation_type == kTfLiteMmapRo) {
267 builder_->AddConstNodeWithData(
268 tensor_id, input_tensor,
269 /*int8_to_uint8*/ (input_tensor.type == kTfLiteInt8));
270 }
271 }
272 auto* op_builder =
273 builder_->AddNodeFromTfLiteOp(reg->builtin_code, node, node_index);
274 TF_LITE_ENSURE_STATUS(
275 op_builder->PopulateSubGraph(node->inputs, node->outputs, context));
276 TF_LITE_ENSURE_STATUS(op_builder->RegisterOutputs(node->outputs, context));
277 }
278
279 // Add Outputs.
280 TF_LITE_ENSURE_STATUS(builder_->AddOutputTensors(output_tensors, context));
281
282 builder_->Build();
283
284 return kTfLiteOk;
285 }
286
~HexagonDelegateKernel()287 HexagonDelegateKernel::~HexagonDelegateKernel() {
288 if (graph_id_ != -1) {
289 hexagon_nn_->hexagon_nn_teardown(graph_id_);
290 }
291 }
292
PrintLog()293 void HexagonDelegateKernel::PrintLog() {
294 std::vector<unsigned char> buf(3000000);
295 time_t my_time = time(nullptr);
296 hexagon_nn_->hexagon_nn_getlog(graph_id_, buf.data(), buf.size());
297 printf("----------------\n");
298 printf("Timestamp: %s\n\n", ctime(&my_time));
299 printf("Log\n%s\n", buf.data());
300 printf("----------------\n");
301 fflush(stdout);
302 }
303
PrintPerformanceData(Profiler * profiler)304 void HexagonDelegateKernel::PrintPerformanceData(Profiler* profiler) {
305 if (profiler == nullptr) {
306 return;
307 }
308 const int kMaxNodes = 2048;
309 const int kMaxNameLen = 100;
310 std::vector<hexagon_nn_perfinfo> perf_data(kMaxNodes);
311 std::vector<char> op_name(kMaxNameLen);
312 uint64_t counter = 0;
313 unsigned int num_nodes;
314 if (hexagon_nn_->hexagon_nn_get_perfinfo(graph_id_, perf_data.data(),
315 kMaxNodes, &num_nodes) != 0) {
316 printf("Failed fetching perf data.\n");
317 return;
318 }
319 for (int i = 0; i < num_nodes; i++) {
320 counter = GetCycles(perf_data[i]);
321 int op_type_id = builder_->GetOpTypeId(perf_data[i].node_id);
322 if (op_type_id >= 0 && hexagon_nn_->hexagon_nn_op_id_to_name(
323 op_type_id, op_name.data(), kMaxNameLen) != 0) {
324 printf("Failed to fetch name for %u with type %d\n", perf_data[i].node_id,
325 op_type_id);
326 continue;
327 }
328 int node_id = builder_->GetTFLiteNodeID(perf_data[i].node_id);
329 if (node_id != -1 && op_type_id >= 0) {
330 profiler->AddEvent((op_type_id < 0 ? "" : op_name.data()),
331 Profiler::EventType::OPERATOR_INVOKE_EVENT, 0, counter,
332 node_id);
333 }
334 }
335 }
336
PrintDebuggingGraph()337 void HexagonDelegateKernel::PrintDebuggingGraph() {
338 const int kMaxBufLen = 100000;
339 std::vector<unsigned char> buf(kMaxBufLen);
340 if (hexagon_nn_->hexagon_nn_snpprint(graph_id_, buf.data(), kMaxBufLen) !=
341 0) {
342 printf("Error fetching graph debug details.\n");
343 return;
344 }
345 printf("------- Graph Debugging Start -------\n");
346 printf("%s\n", buf.data());
347 printf("------- Graph Debugging End -------\n");
348 }
349
Teardown()350 void HexagonDelegateKernel::Teardown() {
351 auto* hexagon_nn = HexagonNNImplementation();
352 if (hexagon_nn != nullptr) {
353 hexagon_nn->hexagon_nn_global_teardown();
354 }
355 }
356
InitState()357 void HexagonDelegateKernel::InitState() {
358 auto* hexagon_nn = HexagonNNImplementation();
359 if (hexagon_nn != nullptr) {
360 hexagon_nn->hexagon_nn_global_init();
361 }
362 }
363 } // namespace tflite
364