• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 /* Before calling this test program, download a model as follows.
16 $ curl
17 https://storage.googleapis.com/download.tensorflow.org/models/tensorflow_inception_v3_stripped_optimized_quantized.pb
18 \ -o /tmp/tensorflow_inception_v3_stripped_optimized_quantized.pb
19 $ adb push /tmp/tensorflow_inception_v3_stripped_optimized_quantized.pb \
20 /data/local/tmp
21 $ curl
22 https://storage.googleapis.com/download.tensorflow.org/models/imagenet_comp_graph_label_strings.txt
23 -o /tmp/imagenet_comp_graph_label_strings.txt
24 adb push /tmp/imagenet_comp_graph_label_strings.txt /data/local/tmp
25 */
26 
27 // define EIGEN_USE_THREADS to include quantization_utils.h
28 #define EIGEN_USE_THREADS
29 
30 #include <memory>
31 
32 #include "tensorflow/core/framework/tensor_shape.pb.h"
33 #include "tensorflow/core/framework/tensor_testutil.h"
34 #include "tensorflow/core/kernels/hexagon/graph_transfer_utils.h"
35 #include "tensorflow/core/kernels/hexagon/graph_transferer.h"
36 #include "tensorflow/core/kernels/hexagon/hexagon_control_wrapper.h"
37 #include "tensorflow/core/kernels/hexagon/hexagon_ops_definitions.h"
38 #include "tensorflow/core/kernels/i_remote_fused_graph_executor.h"
39 #include "tensorflow/core/kernels/i_remote_fused_graph_ops_definitions.h"
40 #include "tensorflow/core/kernels/quantization_utils.h"
41 #include "tensorflow/core/lib/core/casts.h"
42 #include "tensorflow/core/lib/core/status.h"
43 #include "tensorflow/core/lib/core/status_test_util.h"
44 #include "tensorflow/core/lib/io/path.h"
45 #include "tensorflow/core/lib/strings/str_util.h"
46 #include "tensorflow/core/platform/env.h"
47 #include "tensorflow/core/platform/profile_utils/clock_cycle_profiler.h"
48 #include "tensorflow/core/platform/test.h"
49 #include "tensorflow/core/public/session.h"
50 #include "tensorflow/core/public/session_options.h"
51 
52 namespace tensorflow {
53 
54 using ByteArray = HexagonControlWrapper::ByteArray;
55 
56 constexpr const char* const IMAGE_FILENAME = "/data/local/tmp/img_299x299.bmp";
57 constexpr const char* const MODEL_FILENAME =
58     "/data/local/tmp/tensorflow_inception_v3_stripped_optimized_quantized.pb";
59 constexpr const char* const MODEL_WITH_QUANTIZED_INPUT_FILENAME =
60     "/data/local/tmp/"
61     "tensorflow_inception_v3_stripped_optimized_quantized_with_quantized_input."
62     "pb";
63 constexpr const char* const FUSED_MODEL_FILENAME =
64     "/data/local/tmp/"
65     "tensorflow_inception_v3_stripped_optimized_quantized_fused_hexagon.pb";
66 constexpr const char* const REMOTE_FUSED_GRAPH_EXECUTE_NODE_NAME =
67     "remote_fused_graph_execute_node";
68 constexpr bool USE_SHAPE_INFERENCE = false;
69 
70 const bool DBG_DUMP_FLOAT_DATA = false;
71 const int WIDTH = 299;
72 const int HEIGHT = 299;
73 const int DEPTH = 3;
74 const int EXPECTED_FIRST_RESULT_ID = 59;
75 const int EXECUTION_REPEAT_COUNT = 10;
76 
CheckHexagonControllerVersion()77 static void CheckHexagonControllerVersion() {
78   HexagonControlWrapper hexagon_control_wrapper;
79   const int version = hexagon_control_wrapper.GetVersion();
80   ASSERT_GE(version, 1);
81   LOG(INFO) << "Hexagon controller version is " << version;
82 }
83 
DumpTop10Results(const int byte_size,const float * const float_array)84 static void DumpTop10Results(const int byte_size,
85                              const float* const float_array) {
86   const int element_count = byte_size / sizeof(float);
87   const string label_filename =
88       "/data/local/tmp/imagenet_comp_graph_label_strings.txt";
89   string label_str;
90   TF_CHECK_OK(ReadFileToString(Env::Default(), label_filename, &label_str));
91   std::vector<string> labels = str_util::Split(label_str, '\n');
92   GraphTransferUtils::DumpTopNFloatResults(
93       float_array, labels.data(),
94       std::min(element_count, static_cast<int>(labels.size())),
95       10 /* show top_n results */);
96 }
97 
DumpTop10Results(const std::vector<ByteArray> & outputs)98 static void DumpTop10Results(const std::vector<ByteArray>& outputs) {
99   CHECK(outputs.size() == 1);
100   const int byte_size = std::get<1>(outputs.at(0));
101   const float* float_array =
102       reinterpret_cast<float*>(std::get<0>(outputs.at(0)));
103   DumpTop10Results(byte_size, float_array);
104 }
105 
CheckFirstResult(const std::vector<ByteArray> & outputs,const int expected_first_id)106 static void CheckFirstResult(const std::vector<ByteArray>& outputs,
107                              const int expected_first_id) {
108   EXPECT_GE(outputs.size(), 1);
109   const int byte_size = std::get<1>(outputs.at(0));
110   const int element_count = byte_size / sizeof(float);
111   const float* float_array =
112       reinterpret_cast<float*>(std::get<0>(outputs.at(0)));
113   EXPECT_GE(element_count, 1);
114   std::vector<string> labels(element_count);
115   std::priority_queue<std::tuple<float, int, string>> queue =
116       GraphTransferUtils::GetTopNFloatResults(float_array, labels.data(),
117                                               element_count);
118   const std::tuple<float, int, string>& entry = queue.top();
119   EXPECT_EQ(expected_first_id, std::get<1>(entry));
120 }
121 
LoadImage(std::vector<float> * img_floats_ptr)122 static void LoadImage(std::vector<float>* img_floats_ptr) {
123   CHECK(img_floats_ptr != nullptr);
124   std::vector<float>& img_floats = *img_floats_ptr;
125   // Read the data from the bitmap file into memory
126   string bmp;
127   TF_CHECK_OK(ReadFileToString(Env::Default(), IMAGE_FILENAME, &bmp));
128   const int fsize = bmp.size();
129   LOG(INFO) << "Read " << IMAGE_FILENAME << ", size = " << fsize << "bytes";
130   const int64 pixel_count = WIDTH * HEIGHT * DEPTH;
131   CHECK(fsize >= 22 /* pos of height */ + sizeof(int));
132   CHECK(bmp.data() != nullptr);
133   uint8* const img_bytes = bit_cast<uint8*>(bmp.data());
134   const int header_size = *(reinterpret_cast<int*>(img_bytes + 10));
135   LOG(INFO) << "header size = " << header_size;
136   const int size = *(reinterpret_cast<int*>(img_bytes + 14));
137   LOG(INFO) << "image size = " << size;
138   const int width = *(reinterpret_cast<int*>(img_bytes + 18));
139   LOG(INFO) << "width = " << width;
140   const int height = *(reinterpret_cast<int*>(img_bytes + 22));
141   LOG(INFO) << "height = " << height;
142   CHECK(fsize >= (WIDTH + 1) * WIDTH * 3 + header_size);
143 
144   uint8* const bmp_pixels = &img_bytes[header_size];
145 
146   img_floats.resize(pixel_count);
147   int src_pixel_index = 0;
148   CHECK(pixel_count % 3 == 0);
149   for (int i = 0; i < pixel_count / 3; ++i) {
150     const int src_pos = 3 * src_pixel_index;
151     const int dst_pos = 3 * i;
152     ++src_pixel_index;
153     CHECK(src_pos + 2 + header_size < fsize);
154     CHECK(dst_pos + 2 < pixel_count);
155     // Convert (B, G, R) in bitmap to (R, G, B)
156     img_floats[dst_pos] =
157         (static_cast<float>(bmp_pixels[src_pos + 2]) - 128.0f) / 128.0f;
158     img_floats[dst_pos + 1] =
159         (static_cast<float>(bmp_pixels[src_pos + 1]) - 128.0f) / 128.0f;
160     img_floats[dst_pos + 2] =
161         (static_cast<float>(bmp_pixels[src_pos]) - 128.0f) / 128.0f;
162     if (DBG_DUMP_FLOAT_DATA) {
163       LOG(INFO) << i << " (" << img_floats[dst_pos] << ", "
164                 << img_floats[dst_pos + 1] << ", " << img_floats[dst_pos + 2]
165                 << ") (" << static_cast<int>(bmp_pixels[src_pos + 2]) << ", "
166                 << static_cast<int>(bmp_pixels[src_pos + 1]) << ", "
167                 << static_cast<int>(bmp_pixels[src_pos]) << ")";
168     }
169     if (src_pixel_index % (WIDTH + 1) == (WIDTH - 1)) {
170       // skip bmp padding
171       ++src_pixel_index;
172     }
173   }
174 }
175 
QuantizeImage(const std::vector<float> & float_vec,std::vector<quint8> * quint8_vec)176 static void QuantizeImage(const std::vector<float>& float_vec,
177                           std::vector<quint8>* quint8_vec) {
178   quint8_vec->resize(float_vec.size());
179   for (int i = 0; i < float_vec.size(); ++i) {
180     quint8_vec->at(i) = FloatToQuantized<quint8>(float_vec[i], -1.0f, 1.0f);
181   }
182 }
183 
BuildImageTensor(const std::vector<float> & img_floats)184 static Tensor BuildImageTensor(const std::vector<float>& img_floats) {
185   LOG(INFO) << "Loading image finished.";
186   Tensor img_tensor(DT_FLOAT, {1, WIDTH, HEIGHT, DEPTH});
187   CHECK_EQ(WIDTH * HEIGHT * DEPTH, img_floats.size());
188   CHECK_EQ(img_tensor.TotalBytes(), img_floats.size() * sizeof(float));
189   LOG(INFO) << "Copy data to tensor.";
190   std::memcpy(img_tensor.flat<float>().data(), img_floats.data(),
191               img_tensor.TotalBytes());
192   return img_tensor;
193 }
194 
BuildQuantizedImageTensor(const std::vector<quint8> & quantized_img)195 static Tensor BuildQuantizedImageTensor(
196     const std::vector<quint8>& quantized_img) {
197   LOG(INFO) << "Loading image finished.";
198   Tensor img_tensor(DT_QUINT8, {1, WIDTH, HEIGHT, DEPTH});
199   CHECK_EQ(WIDTH * HEIGHT * DEPTH, quantized_img.size());
200   CHECK_EQ(img_tensor.TotalBytes(), quantized_img.size() * sizeof(quint8));
201   LOG(INFO) << "Copy data to tensor.";
202   std::memcpy(img_tensor.flat<quint8>().data(), quantized_img.data(),
203               img_tensor.TotalBytes());
204   return img_tensor;
205 }
206 
207 /* static */ RemoteFusedGraphExecuteInfo
BuildRemoteFusedGraphExecuteInfoWithGraphTransferInfo(const GraphTransferInfo & graph_transfer_info)208 BuildRemoteFusedGraphExecuteInfoWithGraphTransferInfo(
209     const GraphTransferInfo& graph_transfer_info) {
210   RemoteFusedGraphExecuteInfo execute_info;
211   execute_info.set_executor_name("build_hexagon_remote_fused_graph_executor");
212   for (const GraphTransferInfo::GraphInputNodeInfo& input :
213        graph_transfer_info.graph_input_node_info()) {
214     execute_info.add_graph_input_node_name(input.name());
215     RemoteFusedGraphExecuteInfo::TensorShapeTypeProto& tensor_shape_type =
216         *execute_info.add_default_graph_input_tensor_shape();
217     tensor_shape_type.set_dtype(input.dtype());
218     TensorShapeProto& tensor_shape_proto = *tensor_shape_type.mutable_shape();
219     for (const int64 dim : input.shape()) {
220       tensor_shape_proto.add_dim()->set_size(dim);
221     }
222   }
223 
224   for (const GraphTransferInfo::GraphOutputNodeInfo& output :
225        graph_transfer_info.graph_output_node_info()) {
226     execute_info.add_graph_output_node_name(output.name());
227     RemoteFusedGraphExecuteInfo::TensorShapeTypeProto& tensor_shape_type =
228         *execute_info.add_default_graph_output_tensor_shape();
229     tensor_shape_type.set_dtype(output.dtype());
230     TensorShapeProto& tensor_shape_proto = *tensor_shape_type.mutable_shape();
231     for (const int64 dim : output.shape()) {
232       tensor_shape_proto.add_dim()->set_size(dim);
233     }
234   }
235 
236   execute_info.set_serialized_executor_parameters(
237       graph_transfer_info.SerializeAsString());
238   return execute_info;
239 }
240 
RunInferenceByHexagonControlWrapper(const GraphTransferer & gt,const Tensor & img_tensor)241 static void RunInferenceByHexagonControlWrapper(const GraphTransferer& gt,
242                                                 const Tensor& img_tensor) {
243   const RemoteFusedGraphExecuteInfo execute_info =
244       BuildRemoteFusedGraphExecuteInfoWithGraphTransferInfo(
245           gt.GetGraphTransferInfo());
246 
247   HexagonControlWrapper hexagon_control_wrapper;
248   // 1. Initialize hexagon
249   hexagon_control_wrapper.Init(execute_info);
250 
251   // 2. Setup graph in hexagon
252   hexagon_control_wrapper.SetupGraph();
253 
254   // 3. Fill input node's output
255   hexagon_control_wrapper.FillInputNode("Mul", img_tensor);
256 
257   // 4. Execute graph
258   const int64 start_time_us = Env::Default()->NowMicros();
259   for (int i = 0; i < EXECUTION_REPEAT_COUNT; ++i) {
260     hexagon_control_wrapper.ExecuteGraph();
261   }
262   const int64 end_time_us = Env::Default()->NowMicros();
263 
264   // 5-1. Read output node's outputs
265   std::vector<ByteArray> outputs;
266   hexagon_control_wrapper.ReadOutputNode("softmax", &outputs);
267 
268   // 5-2. Dump results
269   DumpTop10Results(outputs);
270   CheckFirstResult(outputs, EXPECTED_FIRST_RESULT_ID);
271   LOG(INFO) << "Average execution time = "
272             << (end_time_us - start_time_us) / EXECUTION_REPEAT_COUNT << "us";
273 
274   // 6. Teardown graph in hexagon
275   hexagon_control_wrapper.TeardownGraph();
276 
277   // 7. Finalize hexagon
278   hexagon_control_wrapper.Finalize();
279 }
280 
RunFusedGraph(const GraphDef & fused_graph_def)281 static void RunFusedGraph(const GraphDef& fused_graph_def) {
282   // Setup input tensor
283   std::vector<float> img_floats;
284   LoadImage(&img_floats);
285 
286   LOG(INFO) << "Ioading image finished.";
287   const Tensor img_tensor = BuildImageTensor(img_floats);
288 
289   // Setup session
290   std::vector<Tensor> output_tensors;
291   SessionOptions session_options;
292   session_options.env = Env::Default();
293   std::unique_ptr<Session> session =
294       std::unique_ptr<Session>(NewSession(session_options));
295   TF_ASSERT_OK(session->Create(fused_graph_def));
296 
297   // Setup session arguments
298   RunOptions run_options;
299   run_options.set_trace_level(RunOptions::FULL_TRACE);
300   RunMetadata run_metadata;
301 
302   std::vector<std::pair<string, tensorflow::Tensor>> input_tensors;
303   input_tensors.emplace_back("Mul", img_tensor);
304   std::vector<string> output_node_names;
305   output_node_names.emplace_back(REMOTE_FUSED_GRAPH_EXECUTE_NODE_NAME);
306 
307   LOG(INFO) << "Run graph";
308   // Run inference with all node as output
309   TF_ASSERT_OK(session->Run(run_options, input_tensors, output_node_names, {},
310                             &output_tensors, &run_metadata));
311   ASSERT_EQ(1, output_tensors.size());
312   const Tensor& output_tensor = output_tensors.at(0);
313   LOG(INFO) << "Output byte size = " << output_tensor.TotalBytes();
314   LOG(INFO) << "Output shape = " << output_tensor.shape().DebugString();
315   DumpTop10Results(
316       output_tensor.TotalBytes(),
317       reinterpret_cast<const float*>(output_tensor.flat<float>().data()));
318 }
319 
CompareGraphTransferInfo(const GraphTransferInfo & gfi0,const GraphTransferInfo & gfi1)320 static void CompareGraphTransferInfo(const GraphTransferInfo& gfi0,
321                                      const GraphTransferInfo& gfi1) {
322   LOG(INFO) << "(1) node count: " << gfi1.node_info_size() << ", "
323             << gfi1.const_node_info_size();
324 
325   // 1. check node_info
326   ASSERT_EQ(gfi0.node_info_size(), gfi1.node_info_size());
327   for (int i = 0; i < gfi0.node_info_size(); ++i) {
328     const GraphTransferInfo::NodeInfo& ni0 = gfi0.node_info(i);
329     const GraphTransferInfo::NodeInfo& ni1 = gfi1.node_info(i);
330     EXPECT_EQ(ni0.DebugString(), ni1.DebugString());
331     EXPECT_EQ(ni0.ByteSizeLong(), ni1.ByteSizeLong());
332   }
333 
334   // 2. check const_node_info
335   ASSERT_EQ(gfi0.const_node_info_size(), gfi1.const_node_info_size());
336   for (int i = 0; i < gfi0.const_node_info_size(); ++i) {
337     const GraphTransferInfo::ConstNodeInfo& cni0 = gfi0.const_node_info(i);
338     const GraphTransferInfo::ConstNodeInfo& cni1 = gfi1.const_node_info(i);
339     ASSERT_EQ(cni0.shape_size(), cni1.shape_size());
340     for (int j = 0; j < cni0.shape_size(); ++j) {
341       EXPECT_EQ(cni0.shape(j), cni1.shape(j));
342     }
343     EXPECT_EQ(cni0.ByteSizeLong(), cni1.ByteSizeLong());
344     EXPECT_EQ(cni0.DebugString(), cni1.DebugString());
345   }
346 
347   // 3. check node_input_info
348   ASSERT_EQ(gfi0.node_input_info_size(), gfi1.node_input_info_size());
349   for (int i = 0; i < gfi0.node_input_info_size(); ++i) {
350     const GraphTransferInfo::NodeInputInfo& nii0 = gfi0.node_input_info(i);
351     const GraphTransferInfo::NodeInputInfo& nii1 = gfi1.node_input_info(i);
352     EXPECT_EQ(nii0.ByteSizeLong(), nii1.ByteSizeLong());
353     EXPECT_EQ(nii0.DebugString(), nii1.DebugString());
354   }
355 
356   // 4. check node_output_info
357   ASSERT_EQ(gfi0.node_output_info_size(), gfi1.node_output_info_size());
358   for (int i = 0; i < gfi0.node_output_info_size(); ++i) {
359     const GraphTransferInfo::NodeOutputInfo& noi0 = gfi0.node_output_info(i);
360     const GraphTransferInfo::NodeOutputInfo& noi1 = gfi1.node_output_info(i);
361     ASSERT_EQ(noi0.max_byte_size_size(), noi1.max_byte_size_size());
362     for (int j = 0; j < noi0.max_byte_size_size(); ++j) {
363       EXPECT_EQ(noi0.max_byte_size(j), noi1.max_byte_size(j));
364     }
365     EXPECT_EQ(noi0.ByteSizeLong(), noi1.ByteSizeLong());
366     EXPECT_EQ(noi0.DebugString(), noi1.DebugString());
367   }
368 
369   // 5. check graph_input_node_info
370   ASSERT_EQ(gfi0.graph_input_node_info_size(),
371             gfi1.graph_input_node_info_size());
372   for (int i = 0; i < gfi0.graph_input_node_info_size(); ++i) {
373     const GraphTransferInfo::GraphInputNodeInfo& gini0 =
374         gfi0.graph_input_node_info(i);
375     const GraphTransferInfo::GraphInputNodeInfo& gini1 =
376         gfi0.graph_input_node_info(i);
377     EXPECT_EQ(gini0.ByteSizeLong(), gini1.ByteSizeLong());
378     EXPECT_EQ(gini0.DebugString(), gini1.DebugString());
379   }
380 
381   // 6. check graph_output_node_info
382   ASSERT_EQ(gfi0.graph_output_node_info_size(),
383             gfi1.graph_output_node_info_size());
384   for (int i = 0; i < gfi0.graph_output_node_info_size(); ++i) {
385     const GraphTransferInfo::GraphOutputNodeInfo& goni0 =
386         gfi0.graph_output_node_info(i);
387     const GraphTransferInfo::GraphOutputNodeInfo& goni1 =
388         gfi0.graph_output_node_info(i);
389     EXPECT_EQ(goni0.ByteSizeLong(), goni1.ByteSizeLong());
390     EXPECT_EQ(goni0.DebugString(), goni1.DebugString());
391   }
392 }
393 
394 // CAVEAT: This test only runs when you specify hexagon library using
395 // makefile.
396 // CAVEAT: This test is disabled by default because hexagon can keep only
397 // two inception graphs on memory which are allocated by other two tests.
398 // Memory of these graphs are not released until process is killed right now.
399 // TODO(satok): Figure out how to release memory on hexagon without process
400 // termination.
401 #ifdef USE_HEXAGON_LIBS
TEST(GraphTransferer,DISABLED_RunInceptionV3OnHexagonExampleWithHexagonWrapper)402 TEST(GraphTransferer,
403      DISABLED_RunInceptionV3OnHexagonExampleWithHexagonWrapper) {
404   LOG(INFO) << "Run inception v3 on hexagon with hexagon controller";
405   CheckHexagonControllerVersion();
406 
407   const IRemoteFusedGraphOpsDefinitions* ops_definitions =
408       &HexagonOpsDefinitions::getInstance();
409   std::vector<std::pair<string, Tensor>> inputs;
410   inputs.emplace_back("Mul", Tensor(DT_FLOAT, {1, WIDTH, HEIGHT, DEPTH}));
411   std::vector<string> output_node_names = {"softmax"};
412 
413   GraphTransferer gt;
414   gt.EnableStrictCheckMode(false);
415   profile_utils::CpuUtils::EnableClockCycleProfiling(true);
416   ClockCycleProfiler prof;
417   prof.Start();
418   Status status = gt.LoadGraphFromProtoFile(
419       *ops_definitions, MODEL_FILENAME, inputs, output_node_names,
420       false,  // is_text_proto
421       false,  // shape_inference_for_unknown_shape
422       true    // dry_run_for_unknown_shape
423   );
424   ASSERT_TRUE(status.ok()) << status;
425   prof.Stop();
426   prof.DumpStatistics("LoadGraphFromProtoFile");
427 
428   std::vector<float> img_floats;
429   LoadImage(&img_floats);
430   const Tensor img_tensor = BuildImageTensor(img_floats);
431   RunInferenceByHexagonControlWrapper(gt, img_tensor);
432 }
433 
TEST(GraphTransferer,DISABLED_RunInceptionV3OnHexagonExampleWithHexagonWrapperQuantizedInput)434 TEST(GraphTransferer,
435      DISABLED_RunInceptionV3OnHexagonExampleWithHexagonWrapperQuantizedInput) {
436   LOG(INFO) << "Run inception v3 on hexagon with hexagon controller "
437             << "with quantized input";
438   CheckHexagonControllerVersion();
439 
440   const IRemoteFusedGraphOpsDefinitions* ops_definitions =
441       &HexagonOpsDefinitions::getInstance();
442   std::vector<std::pair<string, Tensor>> inputs;
443   inputs.emplace_back("Mul", Tensor(DT_QUINT8, {1, WIDTH, HEIGHT, DEPTH}));
444   std::vector<string> output_node_names = {"softmax"};
445 
446   GraphTransferer gt;
447   gt.EnableStrictCheckMode(false);
448   profile_utils::CpuUtils::EnableClockCycleProfiling(true);
449   ClockCycleProfiler prof;
450   prof.Start();
451   Status status = gt.LoadGraphFromProtoFile(
452       *ops_definitions, MODEL_WITH_QUANTIZED_INPUT_FILENAME, inputs,
453       output_node_names,
454       /*is_text_proto=*/false,
455       /*shape_inference_for_unknown_shape=*/false,
456       /*dry_run_for_unknown_shape=*/true);
457   ASSERT_TRUE(status.ok()) << status;
458   prof.Stop();
459   prof.DumpStatistics("LoadGraphFromProtoFile");
460 
461   std::vector<float> img_floats;
462   LoadImage(&img_floats);
463   std::vector<quint8> quantized_img;
464   QuantizeImage(img_floats, &quantized_img);
465   const Tensor img_tensor = BuildQuantizedImageTensor(quantized_img);
466   RunInferenceByHexagonControlWrapper(gt, img_tensor);
467 }
468 
TEST(GraphTransferer,DISABLED_RunInceptionV3OnHexagonExampleWithHexagonWrapperShapeInference)469 TEST(GraphTransferer,
470      DISABLED_RunInceptionV3OnHexagonExampleWithHexagonWrapperShapeInference) {
471   LOG(INFO) << "Run inception v3 on hexagon with hexagon controller";
472   CheckHexagonControllerVersion();
473 
474   const IRemoteFusedGraphOpsDefinitions* ops_definitions =
475       &HexagonOpsDefinitions::getInstance();
476   std::vector<std::pair<string, Tensor>> inputs;
477   inputs.emplace_back("Mul", Tensor(DT_FLOAT, {1, WIDTH, HEIGHT, DEPTH}));
478   std::vector<string> output_node_names = {"softmax"};
479 
480   GraphTransferer gt;
481   gt.EnableStrictCheckMode(false);
482   profile_utils::CpuUtils::EnableClockCycleProfiling(true);
483   ClockCycleProfiler prof;
484   prof.Start();
485   Status status = gt.LoadGraphFromProtoFile(
486       *ops_definitions, MODEL_FILENAME, inputs, output_node_names,
487       false,  // is_text_proto
488       true,   // shape_inference_for_unknown_shape
489       false   // dry_run_for_unknown_shape
490   );
491   ASSERT_TRUE(status.ok()) << status;
492   prof.Stop();
493   prof.DumpStatistics("LoadGraphFromProtoFile");
494 
495   std::vector<float> img_floats;
496   LoadImage(&img_floats);
497   const Tensor img_tensor = BuildImageTensor(img_floats);
498   RunInferenceByHexagonControlWrapper(gt, img_tensor);
499 }
500 
TEST(GraphTransferer,RunInceptionV3OnHexagonExampleWithTfRuntime)501 TEST(GraphTransferer, RunInceptionV3OnHexagonExampleWithTfRuntime) {
502   LOG(INFO) << "Fuse and run inception v3 on hexagon with tf runtime";
503   CheckHexagonControllerVersion();
504 
505   const IRemoteFusedGraphOpsDefinitions* ops_definitions =
506       &HexagonOpsDefinitions::getInstance();
507   std::vector<std::pair<string, Tensor>> inputs;
508   inputs.emplace_back("Mul", Tensor(DT_FLOAT, {1, WIDTH, HEIGHT, DEPTH}));
509   std::vector<string> outputs = {"softmax"};
510 
511   std::vector<float> img_floats;
512   LoadImage(&img_floats);
513 
514   LOG(INFO) << "Ioading image finished.";
515 
516   GraphDef graph_def;
517   Status status = ReadBinaryProto(Env::Default(), MODEL_FILENAME, &graph_def);
518 
519   ASSERT_TRUE(status.ok());
520 
521   LOG(INFO) << "Build fused graph";
522   GraphDef fused_graph_def = GraphTransferUtils::BuildFusedGraphDef(
523       HexagonOpsDefinitions::getInstance(),
524       REMOTE_FUSED_GRAPH_EXECUTE_NODE_NAME, inputs, outputs, &graph_def);
525 
526   RunFusedGraph(fused_graph_def);
527 }
528 
TEST(GraphTransferer,DISABLED_RunInceptionV3OnHexagonExampleWithFusedGraph)529 TEST(GraphTransferer, DISABLED_RunInceptionV3OnHexagonExampleWithFusedGraph) {
530   LOG(INFO) << "Run inception v3 with fused graph";
531   CheckHexagonControllerVersion();
532 
533   GraphDef fused_graph_def;
534   Status status =
535       ReadBinaryProto(Env::Default(), FUSED_MODEL_FILENAME, &fused_graph_def);
536   RunFusedGraph(fused_graph_def);
537 }
538 
TEST(GraphTransferer,DISABLED_CheckShapeInferencePerformance)539 TEST(GraphTransferer, DISABLED_CheckShapeInferencePerformance) {
540   CheckHexagonControllerVersion();
541   profile_utils::CpuUtils::EnableClockCycleProfiling(true);
542 
543   const IRemoteFusedGraphOpsDefinitions* ops_definitions =
544       &HexagonOpsDefinitions::getInstance();
545   std::vector<std::pair<string, Tensor>> inputs;
546   inputs.emplace_back("Mul", Tensor(DT_FLOAT, {1, WIDTH, HEIGHT, DEPTH}));
547   std::vector<string> output_node_names = {"softmax"};
548 
549   RemoteFusedGraphExecuteUtils::TensorShapeMap output_tensor_info0;
550   GraphTransferer gt0;
551   gt0.EnableStrictCheckMode(false);
552   ClockCycleProfiler prof0;
553   prof0.Start();
554   Status status = gt0.LoadGraphFromProtoFile(
555       *ops_definitions, MODEL_FILENAME, inputs, output_node_names,
556       false,  // is_text_proto
557       false,  // shape_inference_for_unknown_shape
558       true    // dry_run_for_unknown_shape
559   );
560   const GraphTransferInfo& gfi0 = gt0.GetGraphTransferInfo();
561 
562   ASSERT_TRUE(status.ok());
563   prof0.Stop();
564   prof0.DumpStatistics("Estimate shape by dryrun");
565 
566   LOG(INFO) << "(0) node count: " << gfi0.node_info_size() << ", "
567             << gfi0.const_node_info_size();
568 
569   RemoteFusedGraphExecuteUtils::TensorShapeMap output_tensor_info1;
570   GraphTransferer gt1;
571   gt1.EnableStrictCheckMode(true);
572   ClockCycleProfiler prof1;
573   prof1.Start();
574   status = gt1.LoadGraphFromProtoFile(
575       *ops_definitions, MODEL_FILENAME, inputs, output_node_names,
576       false,  // is_text_proto
577       true,   // shape_inference_for_unknown_shape
578       false   // dry_run_for_unknown_shape
579   );
580   const GraphTransferInfo& gfi1 = gt1.GetGraphTransferInfo();
581 
582   ASSERT_TRUE(status.ok());
583   prof1.Stop();
584   prof1.DumpStatistics("Estiame shape by shape inference");
585 
586   CompareGraphTransferInfo(gfi0, gfi1);
587 
588   const RemoteFusedGraphExecuteInfo ei0 =
589       BuildRemoteFusedGraphExecuteInfoWithGraphTransferInfo(gfi0);
590   const RemoteFusedGraphExecuteInfo ei1 =
591       BuildRemoteFusedGraphExecuteInfoWithGraphTransferInfo(gfi1);
592 
593   GraphTransferInfo rgfi0;
594   rgfi0.ParseFromString(ei0.serialized_executor_parameters());
595   GraphTransferInfo rgfi1;
596   rgfi1.ParseFromString(ei1.serialized_executor_parameters());
597 
598   CompareGraphTransferInfo(rgfi0, rgfi1);
599   CompareGraphTransferInfo(gfi0, rgfi0);
600   CompareGraphTransferInfo(gfi1, rgfi1);
601 }
602 #endif
603 
604 }  // namespace tensorflow
605