• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 vcyou may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/core/kernels/hexagon/hexagon_control_wrapper.h"
17 
18 #include "tensorflow/core/framework/graph_transfer_info.pb.h"
19 #include "tensorflow/core/framework/remote_fused_graph_execute_info.pb.h"
20 #include "tensorflow/core/framework/tensor_shape.pb.h"
21 #include "tensorflow/core/kernels/hexagon/hexagon_ops_definitions.h"
22 #include "tensorflow/core/kernels/hexagon/soc_interface.h"
23 #include "tensorflow/core/platform/profile_utils/cpu_utils.h"
24 
25 namespace tensorflow {
26 
27 constexpr const char* const OUTPUT_OP_NAME = "OUTPUT";
28 constexpr const char* const REMOTE_FUSED_GRAPH_NODE_NAME_PREFIX =
29     "hexagon_remote_fused_graph";
30 /* static */ constexpr const char* const
31     HexagonControlWrapper::REMOTE_FUSED_GRAPH_EXECUTOR_NAME;
32 
33 constexpr int ALIGNMENT_BYTES = 16;
34 constexpr int MAX_IN_OUT_COUNT = 128;
35 
36 const bool DBG_DUMP_VERIFICATION_STRING = false;
37 const int DBG_LEVEL = 0;  // -2: verbose, -1: debug, 0: info
38 const bool DBG_USE_DUMMY_INPUT = false;
39 const bool DBG_USE_SAMPLE_INPUT = false;
40 const int64 FLAG_ENABLE_PANDA_BINARY_INPUT = 0x01;
41 const bool DBG_DUMP_INPUT_TENSOR_AS_FLOAT_DATA = false;
42 
AddPort(const string & node_name)43 static string AddPort(const string& node_name) {
44   if (node_name.find(':') != string::npos) {
45     return node_name;
46   } else {
47     return strings::StrCat(node_name, ":", 0);
48   }
49 }
50 
FindAlignedPointer(uint8 * ptr)51 static uint8* FindAlignedPointer(uint8* ptr) {
52   const uintptr_t data_ptr_int = reinterpret_cast<uintptr_t>(ptr);
53   const int shift_count =
54       (ALIGNMENT_BYTES - data_ptr_int % ALIGNMENT_BYTES) % ALIGNMENT_BYTES;
55   uint8* data_ptr = ptr + shift_count;
56   return data_ptr;
57 }
58 
FindNodeInfo(const string & name,GraphTransferInfo * graph_transfer_info)59 /* static */ GraphTransferNodeInfo* HexagonControlWrapper::FindNodeInfo(
60     const string& name, GraphTransferInfo* graph_transfer_info) {
61   for (GraphTransferNodeInfo& node_info :
62        *graph_transfer_info->mutable_node_info()) {
63     if (node_info.name() == name) {
64       return &node_info;
65     }
66   }
67   return nullptr;
68 }
69 
GetVersion()70 int HexagonControlWrapper::GetVersion() {
71   return soc_interface_GetSocControllerVersion();
72 }
73 
Init(const RemoteFusedGraphExecuteInfo & info)74 bool HexagonControlWrapper::Init(const RemoteFusedGraphExecuteInfo& info) {
75   soc_interface_SetLogLevel(DBG_LEVEL);
76   if (DBG_USE_SAMPLE_INPUT) {
77     soc_interface_SetDebugFlag(FLAG_ENABLE_PANDA_BINARY_INPUT);
78   }
79   if (info.serialized_executor_parameters().empty()) {
80     std::vector<std::pair<string, Tensor>> inputs;
81     std::vector<string> outputs;
82     RemoteFusedGraphExecuteUtils::BuildRemoteGraphInputsAndOutputsFromProto(
83         info, &inputs, &outputs);
84     Status status = graph_transferer_.LoadGraphFromProto(
85         HexagonOpsDefinitions::getInstance(), info.remote_graph(), inputs,
86         outputs,
87         false  // shape_inference_for_unknown_shape
88     );
89     TF_CHECK_OK(status) << status;
90   } else {
91     // If graph transfer info is attached, just import it.
92     graph_transferer_.SetSerializedGraphTransferInfo(
93         info.serialized_executor_parameters());
94   }
95   execute_info_ = &info;
96   bool success = soc_interface_Init();
97   if (!success) {
98     LOG(ERROR) << "Hexagon initialization was failed.  See log output.";
99     return false;
100   }
101   std::vector<int> input_sizes;
102   std::vector<int> output_sizes;
103   CHECK_NOTNULL(execute_info_);
104   for (int i = 0; i < execute_info_->graph_input_node_name_size(); ++i) {
105     const string& input = execute_info_->graph_input_node_name(i);
106     LOG(INFO) << "Add input: " << input << ", " << i;
107     CHECK(input_port_map_.emplace(AddPort(input), i).second);
108     const RemoteFusedGraphExecuteInfo::TensorShapeTypeProto& shape_type =
109         execute_info_->default_graph_input_tensor_shape(i);
110     int64 buf_size = DataTypeSize(shape_type.dtype());
111     for (const TensorShapeProto::Dim& dim : shape_type.shape().dim()) {
112       buf_size *= dim.size();
113     }
114     input_sizes.emplace_back(static_cast<int>(buf_size));
115   }
116   for (int i = 0; i < execute_info_->graph_output_node_name_size(); ++i) {
117     const string& output = execute_info_->graph_output_node_name(i);
118     CHECK(output_port_map_.emplace(AddPort(output), i).second);
119     const RemoteFusedGraphExecuteInfo::TensorShapeTypeProto& shape_type =
120         execute_info_->default_graph_output_tensor_shape(i);
121 
122     int64 buf_size = DataTypeSize(shape_type.dtype());
123     for (const TensorShapeProto::Dim& dim : shape_type.shape().dim()) {
124       buf_size *= dim.size();
125     }
126     output_sizes.emplace_back(static_cast<int>(buf_size));
127   }
128 
129   LOG(INFO) << "Allocate inout buffer";
130   success &= soc_interface_AllocateInOutNodeBuffers(
131       input_sizes.size(), input_sizes.data(), output_sizes.size(),
132       output_sizes.data());
133   return success;
134 }
135 
Finalize()136 bool HexagonControlWrapper::Finalize() { return soc_interface_Finalize(); }
SetupGraph()137 bool HexagonControlWrapper::SetupGraph() {
138   // Copy graph transfer info to modify to adapt hexnn library
139   GraphTransferInfo& graph_transfer_info =
140       graph_transferer_.GetMutableGraphTransferInfo();
141 
142   // Overwrite op type of input nodes for hexagon
143   for (const GraphTransferGraphInputNodeInfo& graph_input :
144        graph_transfer_info.graph_input_node_info()) {
145     GraphTransferNodeInfo* node_info =
146         FindNodeInfo(graph_input.name(), &graph_transfer_info);
147     CHECK_NE(node_info, nullptr);
148   }
149 
150   // Generate a new output node which is connected to graph output node
151   // TODO(satok): Support multiple output nodes
152   CHECK_EQ(graph_transfer_info.graph_output_node_info_size(), 1);
153   for (const GraphTransferGraphOutputNodeInfo& graph_output :
154        graph_transfer_info.graph_output_node_info()) {
155     const int new_output_node_id = graph_transfer_info.node_info_size() +
156                                    graph_transfer_info.const_node_info_size() +
157                                    2 /* offset for ids */;
158     // Register a new output node
159     GraphTransferNodeInfo& new_output_node_info =
160         *graph_transfer_info.add_node_info();
161     new_output_node_info.set_name(OUTPUT_OP_NAME);
162     new_output_node_info.set_node_id(new_output_node_id);
163     new_output_node_info.set_type_name(OUTPUT_OP_NAME);
164     new_output_node_info.set_soc_op_id(
165         HexagonOpsDefinitions::getInstance().GetOpIdFor(OUTPUT_OP_NAME, {}));
166     new_output_node_info.set_padding_id(0 /* PADDING_NA_ID */);
167     new_output_node_info.set_input_count(1);
168     new_output_node_info.set_output_count(0);
169 
170     const TensorId tid = ParseTensorName(graph_output.name());
171     const string node_name(tid.first);
172     const int port = tid.second;
173     // Register node input for the new output node
174     const GraphTransferNodeInfo* node_info =
175         FindNodeInfo(node_name, &graph_transfer_info);
176     CHECK_NE(node_info, nullptr);
177     GraphTransferNodeInputInfo& node_input_info =
178         *graph_transfer_info.add_node_input_info();
179     node_input_info.set_node_id(new_output_node_id);
180     GraphTransferNodeInput& node_input = *node_input_info.add_node_input();
181     node_input.set_node_id(node_info->node_id());
182     node_input.set_output_port(port);
183   }
184 
185   if (DBG_DUMP_VERIFICATION_STRING) {
186     GraphTransferer gt;
187     gt.SetSerializedGraphTransferInfo(graph_transfer_info.SerializeAsString());
188     gt.DumpVerificationStringOfNodeTransferParams();
189   }
190 
191   int inputs_count = 0;
192   int outputs_count = 0;
193   for (const GraphTransferNodeInputInfo& input_params :
194        graph_transfer_info.node_input_info()) {
195     inputs_count += input_params.node_input_size();
196   }
197 
198   for (const GraphTransferNodeOutputInfo& output_params :
199        graph_transfer_info.node_output_info()) {
200     outputs_count += output_params.max_byte_size_size();
201   }
202   // Allocate memory for node inputs and node outputs
203   soc_interface_AllocateNodeInputAndNodeOutputArray(inputs_count,
204                                                     outputs_count);
205 
206   // Construct node input parameters
207   std::unordered_map<int, std::tuple<void*, int>> inputs_map;
208   for (const GraphTransferNodeInputInfo& input_params :
209        graph_transfer_info.node_input_info()) {
210     const int count = input_params.node_input_size();
211     CHECK(count <= MAX_IN_OUT_COUNT);
212     int node_ids[MAX_IN_OUT_COUNT];
213     int ports[MAX_IN_OUT_COUNT];
214     for (int i = 0; i < count; ++i) {
215       const GraphTransferNodeInput& node_input = input_params.node_input(i);
216       node_ids[i] = node_input.node_id() + NODE_ID_OFFSET;
217       ports[i] = node_input.output_port();
218     }
219     void* inputs_ptr = soc_interface_SetOneNodeInputs(count, node_ids, ports);
220     const int node_id = input_params.node_id();
221     CHECK(inputs_map.count(node_id) == 0);
222     inputs_map.emplace(node_id, std::make_tuple(inputs_ptr, count));
223   }
224 
225   // Construct node output parameters
226   std::unordered_map<int, std::tuple<void*, int>> outputs_map;
227   for (const GraphTransferNodeOutputInfo& output_params :
228        graph_transfer_info.node_output_info()) {
229     const int count = output_params.max_byte_size_size();
230     CHECK(count <= MAX_IN_OUT_COUNT);
231     int sizes[MAX_IN_OUT_COUNT];
232     for (int i = 0; i < count; ++i) {
233       const int size = output_params.max_byte_size(i);
234       sizes[i] = size;
235     }
236     void* outputs_ptr = soc_interface_SetOneNodeOutputs(count, sizes);
237     const int node_id = output_params.node_id();
238     CHECK(outputs_map.count(node_id) == 0);
239     outputs_map.emplace(node_id, std::make_tuple(outputs_ptr, count));
240   }
241 
242   // Instantiate graph
243   soc_interface_InstantiateGraph();
244 
245   // Initialize graph
246   // 1. Setup const nodes
247   for (const GraphTransferConstNodeInfo& params :
248        graph_transfer_info.const_node_info()) {
249     const int node_id = params.node_id();
250     // TODO(satok): Stop assuming shape size is 4.
251     CHECK(params.shape_size() == 4);
252     const int64 shape_0 = params.shape(0);
253     const int64 shape_1 = params.shape(1);
254     const int64 shape_2 = params.shape(2);
255     const int64 shape_3 = params.shape(3);
256     const int data_size = params.data().length();
257     CHECK(dummy_const_data_.count(node_id) == 0);
258     auto data = dummy_const_data_.emplace(
259         std::piecewise_construct, std::make_tuple(node_id), std::make_tuple());
260     CHECK(data.second);
261     data.first->second.resize(data_size + ALIGNMENT_BYTES - 1);
262     uint8* data_ptr = FindAlignedPointer(data.first->second.data());
263     std::memcpy(data_ptr, params.data().data(), data_size);
264     soc_interface_AppendConstNode(params.name().c_str(),
265                                   node_id + NODE_ID_OFFSET, shape_0, shape_1,
266                                   shape_2, shape_3, data_ptr, data_size);
267   }
268 
269   // 2. Setup op nodes
270   for (const GraphTransferNodeInfo& params : graph_transfer_info.node_info()) {
271     const int node_id = params.node_id();
272     const int op_id = params.soc_op_id();
273     CHECK(inputs_map.count(node_id) == 1);
274     CHECK(outputs_map.count(node_id) <= 1);
275     // Only output node doesn't have output
276     const bool has_output = outputs_map.count(node_id) == 1;
277     const auto& input_ptr_and_count = inputs_map.at(node_id);
278     const void* input_ptr = std::get<0>(input_ptr_and_count);
279     const int input_count = std::get<1>(input_ptr_and_count);
280     void* output_ptr = nullptr;
281     int output_count = 0;
282     if (has_output) {
283       const auto& output_ptr_and_count = outputs_map.at(node_id);
284       output_ptr = std::get<0>(output_ptr_and_count);
285       output_count = std::get<1>(output_ptr_and_count);
286       // CHECK(output_count > 0);
287     }
288     int padding_id = -1;
289     if (params.padding_id() == 0) {
290       padding_id = 0;
291     } else if (params.padding_id() == Padding::SAME) {
292       padding_id = 1;
293     } else if (params.padding_id() == Padding::VALID) {
294       padding_id = 2;
295     } else {
296       LOG(FATAL);
297     }
298     soc_interface_AppendNode(params.name().c_str(), node_id + NODE_ID_OFFSET,
299                              op_id, padding_id, input_ptr, input_count,
300                              output_ptr, output_count);
301   }
302 
303   LOG(INFO) << "Setup graph completed";
304 
305   // 3. construct graph
306   return soc_interface_ConstructGraph();
307 
308   // Keep following comment to use dummy graph construction
309   // return soc_interface_setupDummyGraph(3 /* inception version */);
310 }
311 
ExecuteGraph()312 bool HexagonControlWrapper::ExecuteGraph() {
313   return soc_interface_ExecuteGraph();
314 }
315 
TeardownGraph()316 bool HexagonControlWrapper::TeardownGraph() {
317   soc_interface_ReleaseNodeInputAndNodeOutputArray();
318   return soc_interface_TeardownGraph();
319 }
320 
FillInputNode(const string & node_name,const std::array<int64,GraphTransferer::SHAPE_ARRAY_SIZE> & shape,const ConstByteArray bytes)321 bool HexagonControlWrapper::FillInputNode(
322     const string& node_name,
323     const std::array<int64, GraphTransferer::SHAPE_ARRAY_SIZE>& shape,
324     const ConstByteArray bytes) {
325   const string tensor_name = AddPort(node_name);
326   CHECK(input_port_map_.count(tensor_name) > 0);
327   const int port = input_port_map_.at(tensor_name);
328   if (input_tensor_data_.count(port) <= 0) {
329     input_tensor_data_.emplace(port, std::vector<uint8>{});
330   }
331   std::vector<uint8>& input_tensor_data = input_tensor_data_.at(port);
332 
333   // hexagon only supports 32bit dimension
334   const int x = static_cast<int>(shape[0]);
335   const int y = static_cast<int>(shape[1]);
336   const int z = static_cast<int>(shape[2]);
337   const int d = static_cast<int>(shape[3]);
338 
339   const uint64 byte_size = x * y * z * d * DataTypeSize(std::get<2>(bytes));
340   CHECK_EQ(byte_size, std::get<1>(bytes));
341   input_tensor_data.resize(byte_size + ALIGNMENT_BYTES);
342   uint8* data_ptr = FindAlignedPointer(input_tensor_data.data());
343 
344   if (DBG_USE_DUMMY_INPUT) {
345     std::memset(data_ptr, 0, byte_size);
346   } else {
347     std::memcpy(data_ptr, std::get<0>(bytes), byte_size);
348   }
349 
350   return soc_interface_FillInputNodeWithPort(port, x, y, z, d, data_ptr,
351                                              byte_size);
352 }
353 
ReadOutputNode(const string & node_name,TensorAllocatorFunc tensor_allocator)354 bool HexagonControlWrapper::ReadOutputNode(
355     const string& node_name, TensorAllocatorFunc tensor_allocator) {
356   CHECK_NE(execute_info_, nullptr);
357   TensorShape output_shape;
358   // TODO(satok): Switch shape corresponding to input shape
359   for (int i = 0; i < execute_info_->graph_output_node_name_size(); ++i) {
360     if (execute_info_->graph_output_node_name(i) == node_name) {
361       for (const TensorShapeProto::Dim& dim :
362            execute_info_->default_graph_output_tensor_shape(i).shape().dim()) {
363         output_shape.AddDim(dim.size());
364       }
365       break;
366     }
367   }
368   std::vector<ByteArray> outputs;
369   ReadOutputNode(node_name, &outputs);
370   CHECK_EQ(1, outputs.size());
371   ByteArray& output = outputs[0];
372   Tensor* output_tensor = tensor_allocator(output_shape);
373   CHECK(output_tensor->TotalBytes() >= std::get<1>(output))
374       << output_tensor->TotalBytes() << ", " << std::get<1>(output);
375   TF_CHECK_OK(RemoteFusedGraphExecuteUtils::CopyByteArrayToTensor(
376       std::get<0>(output), std::get<1>(output), output_tensor));
377   return true;
378 }
379 
ReadOutputNode(const string & node_name,std::vector<ByteArray> * const outputs)380 bool HexagonControlWrapper::ReadOutputNode(
381     const string& node_name, std::vector<ByteArray>* const outputs) {
382   CHECK(outputs != nullptr);
383   ByteArray output;
384   const string tensor_name = AddPort(node_name);
385   CHECK(output_port_map_.count(tensor_name) > 0);
386   const int port = output_port_map_.at(tensor_name);
387   soc_interface_ReadOutputNodeWithPort(
388       port, &std::get<0>(output),
389       reinterpret_cast<uint64_t*>(&std::get<1>(output)));
390   // TODO: Accept all results
391   // std::get<2>(output) = DT_FLOAT;
392   outputs->emplace_back(output);
393   return true;
394 }
395 
FuseRemoteGraph(const GraphDef & original_graph_def,const std::vector<string> & inputs,const std::vector<string> & outputs,GraphDef * fused_graph_def)396 Status HexagonControlWrapper::FuseRemoteGraph(
397     const GraphDef& original_graph_def, const std::vector<string>& inputs,
398     const std::vector<string>& outputs, GraphDef* fused_graph_def) {
399   const std::unordered_set<string> fused_node_names =
400       RemoteFusedGraphExecuteUtils::BuildNodeMapFromOpsDefinitions(
401           original_graph_def, HexagonOpsDefinitions::getInstance());
402   // TODO(satok): We may want to place shape and type inside this function
403   // if they are not placed in the given graph.
404   TF_RETURN_IF_ERROR(RemoteFusedGraphExecuteUtils::FuseRemoteGraphByNodeNames(
405       original_graph_def, inputs, outputs, REMOTE_FUSED_GRAPH_NODE_NAME_PREFIX,
406       fused_node_names, REMOTE_FUSED_GRAPH_EXECUTOR_NAME,
407       /*require_shape_type=*/true, fused_graph_def));
408   return Status::OK();
409 }
410 
FillInputNode(const string & node_name,const Tensor & tensor)411 bool HexagonControlWrapper::FillInputNode(const string& node_name,
412                                           const Tensor& tensor) {
413   StringPiece tensor_data = tensor.tensor_data();
414   const ConstByteArray ba =
415       ConstByteArray(reinterpret_cast<const uint8*>(tensor_data.data()),
416                      tensor_data.size(), tensor.dtype());
417   if (DBG_DUMP_INPUT_TENSOR_AS_FLOAT_DATA) {
418     LOG(INFO) << "Input tensor data: element size = " << tensor.NumElements()
419               << ", byte syze = " << tensor.TotalBytes();
420     std::stringstream line;
421     for (int i = 0; i < tensor.NumElements(); ++i) {
422       line << tensor.flat<float>().data()[i] << ", ";
423       if ((i - 2) % 3 == 0 || i == tensor.NumElements() - 1) {
424         LOG(INFO) << "(" << ((i - 2) / 3) << ") " << line.str();
425         line.str("");
426         line.clear();
427       }
428     }
429   }
430   const std::array<int64, GraphTransferer::SHAPE_ARRAY_SIZE> shape =
431       GraphTransferer::ToTensorShapeArray(tensor.shape());
432   FillInputNode(node_name, shape, ba);
433   return true;
434 }
435 
IsEnabled() const436 bool HexagonControlWrapper::IsEnabled() const { return true; };
437 }  // namespace tensorflow
438