1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 /* Before calling this test program, download a model as follows.
16 $ curl
17 https://storage.googleapis.com/download.tensorflow.org/models/tensorflow_inception_v3_stripped_optimized_quantized.pb
18 \ -o /tmp/tensorflow_inception_v3_stripped_optimized_quantized.pb
19 $ adb push /tmp/tensorflow_inception_v3_stripped_optimized_quantized.pb \
20 /data/local/tmp
21 $ curl
22 https://storage.googleapis.com/download.tensorflow.org/models/imagenet_comp_graph_label_strings.txt
23 -o /tmp/imagenet_comp_graph_label_strings.txt
24 adb push /tmp/imagenet_comp_graph_label_strings.txt /data/local/tmp
25 */
26
27 // define EIGEN_USE_THREADS to include quantization_utils.h
28 #define EIGEN_USE_THREADS
29
30 #include <memory>
31
32 #include "tensorflow/core/framework/tensor_shape.pb.h"
33 #include "tensorflow/core/framework/tensor_testutil.h"
34 #include "tensorflow/core/kernels/hexagon/graph_transfer_utils.h"
35 #include "tensorflow/core/kernels/hexagon/graph_transferer.h"
36 #include "tensorflow/core/kernels/hexagon/hexagon_control_wrapper.h"
37 #include "tensorflow/core/kernels/hexagon/hexagon_ops_definitions.h"
38 #include "tensorflow/core/kernels/i_remote_fused_graph_executor.h"
39 #include "tensorflow/core/kernels/i_remote_fused_graph_ops_definitions.h"
40 #include "tensorflow/core/kernels/quantization_utils.h"
41 #include "tensorflow/core/lib/core/casts.h"
42 #include "tensorflow/core/lib/core/status.h"
43 #include "tensorflow/core/lib/core/status_test_util.h"
44 #include "tensorflow/core/lib/io/path.h"
45 #include "tensorflow/core/lib/strings/str_util.h"
46 #include "tensorflow/core/platform/env.h"
47 #include "tensorflow/core/platform/profile_utils/clock_cycle_profiler.h"
48 #include "tensorflow/core/platform/test.h"
49 #include "tensorflow/core/public/session.h"
50 #include "tensorflow/core/public/session_options.h"
51
52 namespace tensorflow {
53
54 using ByteArray = HexagonControlWrapper::ByteArray;
55
56 constexpr const char* const IMAGE_FILENAME = "/data/local/tmp/img_299x299.bmp";
57 constexpr const char* const MODEL_FILENAME =
58 "/data/local/tmp/tensorflow_inception_v3_stripped_optimized_quantized.pb";
59 constexpr const char* const MODEL_WITH_QUANTIZED_INPUT_FILENAME =
60 "/data/local/tmp/"
61 "tensorflow_inception_v3_stripped_optimized_quantized_with_quantized_input."
62 "pb";
63 constexpr const char* const FUSED_MODEL_FILENAME =
64 "/data/local/tmp/"
65 "tensorflow_inception_v3_stripped_optimized_quantized_fused_hexagon.pb";
66 constexpr const char* const REMOTE_FUSED_GRAPH_EXECUTE_NODE_NAME =
67 "remote_fused_graph_execute_node";
68 constexpr bool USE_SHAPE_INFERENCE = false;
69
70 const bool DBG_DUMP_FLOAT_DATA = false;
71 const int WIDTH = 299;
72 const int HEIGHT = 299;
73 const int DEPTH = 3;
74 const int EXPECTED_FIRST_RESULT_ID = 59;
75 const int EXECUTION_REPEAT_COUNT = 10;
76
CheckHexagonControllerVersion()77 static void CheckHexagonControllerVersion() {
78 HexagonControlWrapper hexagon_control_wrapper;
79 const int version = hexagon_control_wrapper.GetVersion();
80 ASSERT_GE(version, 1);
81 LOG(INFO) << "Hexagon controller version is " << version;
82 }
83
DumpTop10Results(const int byte_size,const float * const float_array)84 static void DumpTop10Results(const int byte_size,
85 const float* const float_array) {
86 const int element_count = byte_size / sizeof(float);
87 const string label_filename =
88 "/data/local/tmp/imagenet_comp_graph_label_strings.txt";
89 string label_str;
90 TF_CHECK_OK(ReadFileToString(Env::Default(), label_filename, &label_str));
91 std::vector<string> labels = str_util::Split(label_str, '\n');
92 GraphTransferUtils::DumpTopNFloatResults(
93 float_array, labels.data(),
94 std::min(element_count, static_cast<int>(labels.size())),
95 10 /* show top_n results */);
96 }
97
DumpTop10Results(const std::vector<ByteArray> & outputs)98 static void DumpTop10Results(const std::vector<ByteArray>& outputs) {
99 CHECK(outputs.size() == 1);
100 const int byte_size = std::get<1>(outputs.at(0));
101 const float* float_array =
102 reinterpret_cast<float*>(std::get<0>(outputs.at(0)));
103 DumpTop10Results(byte_size, float_array);
104 }
105
CheckFirstResult(const std::vector<ByteArray> & outputs,const int expected_first_id)106 static void CheckFirstResult(const std::vector<ByteArray>& outputs,
107 const int expected_first_id) {
108 EXPECT_GE(outputs.size(), 1);
109 const int byte_size = std::get<1>(outputs.at(0));
110 const int element_count = byte_size / sizeof(float);
111 const float* float_array =
112 reinterpret_cast<float*>(std::get<0>(outputs.at(0)));
113 EXPECT_GE(element_count, 1);
114 std::vector<string> labels(element_count);
115 std::priority_queue<std::tuple<float, int, string>> queue =
116 GraphTransferUtils::GetTopNFloatResults(float_array, labels.data(),
117 element_count);
118 const std::tuple<float, int, string>& entry = queue.top();
119 EXPECT_EQ(expected_first_id, std::get<1>(entry));
120 }
121
LoadImage(std::vector<float> * img_floats_ptr)122 static void LoadImage(std::vector<float>* img_floats_ptr) {
123 CHECK(img_floats_ptr != nullptr);
124 std::vector<float>& img_floats = *img_floats_ptr;
125 // Read the data from the bitmap file into memory
126 string bmp;
127 TF_CHECK_OK(ReadFileToString(Env::Default(), IMAGE_FILENAME, &bmp));
128 const int fsize = bmp.size();
129 LOG(INFO) << "Read " << IMAGE_FILENAME << ", size = " << fsize << "bytes";
130 const int64 pixel_count = WIDTH * HEIGHT * DEPTH;
131 CHECK(fsize >= 22 /* pos of height */ + sizeof(int));
132 CHECK(bmp.data() != nullptr);
133 uint8* const img_bytes = bit_cast<uint8*>(bmp.data());
134 const int header_size = *(reinterpret_cast<int*>(img_bytes + 10));
135 LOG(INFO) << "header size = " << header_size;
136 const int size = *(reinterpret_cast<int*>(img_bytes + 14));
137 LOG(INFO) << "image size = " << size;
138 const int width = *(reinterpret_cast<int*>(img_bytes + 18));
139 LOG(INFO) << "width = " << width;
140 const int height = *(reinterpret_cast<int*>(img_bytes + 22));
141 LOG(INFO) << "height = " << height;
142 CHECK(fsize >= (WIDTH + 1) * WIDTH * 3 + header_size);
143
144 uint8* const bmp_pixels = &img_bytes[header_size];
145
146 img_floats.resize(pixel_count);
147 int src_pixel_index = 0;
148 CHECK(pixel_count % 3 == 0);
149 for (int i = 0; i < pixel_count / 3; ++i) {
150 const int src_pos = 3 * src_pixel_index;
151 const int dst_pos = 3 * i;
152 ++src_pixel_index;
153 CHECK(src_pos + 2 + header_size < fsize);
154 CHECK(dst_pos + 2 < pixel_count);
155 // Convert (B, G, R) in bitmap to (R, G, B)
156 img_floats[dst_pos] =
157 (static_cast<float>(bmp_pixels[src_pos + 2]) - 128.0f) / 128.0f;
158 img_floats[dst_pos + 1] =
159 (static_cast<float>(bmp_pixels[src_pos + 1]) - 128.0f) / 128.0f;
160 img_floats[dst_pos + 2] =
161 (static_cast<float>(bmp_pixels[src_pos]) - 128.0f) / 128.0f;
162 if (DBG_DUMP_FLOAT_DATA) {
163 LOG(INFO) << i << " (" << img_floats[dst_pos] << ", "
164 << img_floats[dst_pos + 1] << ", " << img_floats[dst_pos + 2]
165 << ") (" << static_cast<int>(bmp_pixels[src_pos + 2]) << ", "
166 << static_cast<int>(bmp_pixels[src_pos + 1]) << ", "
167 << static_cast<int>(bmp_pixels[src_pos]) << ")";
168 }
169 if (src_pixel_index % (WIDTH + 1) == (WIDTH - 1)) {
170 // skip bmp padding
171 ++src_pixel_index;
172 }
173 }
174 }
175
QuantizeImage(const std::vector<float> & float_vec,std::vector<quint8> * quint8_vec)176 static void QuantizeImage(const std::vector<float>& float_vec,
177 std::vector<quint8>* quint8_vec) {
178 quint8_vec->resize(float_vec.size());
179 for (int i = 0; i < float_vec.size(); ++i) {
180 quint8_vec->at(i) = FloatToQuantized<quint8>(float_vec[i], -1.0f, 1.0f);
181 }
182 }
183
BuildImageTensor(const std::vector<float> & img_floats)184 static Tensor BuildImageTensor(const std::vector<float>& img_floats) {
185 LOG(INFO) << "Loading image finished.";
186 Tensor img_tensor(DT_FLOAT, {1, WIDTH, HEIGHT, DEPTH});
187 CHECK_EQ(WIDTH * HEIGHT * DEPTH, img_floats.size());
188 CHECK_EQ(img_tensor.TotalBytes(), img_floats.size() * sizeof(float));
189 LOG(INFO) << "Copy data to tensor.";
190 std::memcpy(img_tensor.flat<float>().data(), img_floats.data(),
191 img_tensor.TotalBytes());
192 return img_tensor;
193 }
194
BuildQuantizedImageTensor(const std::vector<quint8> & quantized_img)195 static Tensor BuildQuantizedImageTensor(
196 const std::vector<quint8>& quantized_img) {
197 LOG(INFO) << "Loading image finished.";
198 Tensor img_tensor(DT_QUINT8, {1, WIDTH, HEIGHT, DEPTH});
199 CHECK_EQ(WIDTH * HEIGHT * DEPTH, quantized_img.size());
200 CHECK_EQ(img_tensor.TotalBytes(), quantized_img.size() * sizeof(quint8));
201 LOG(INFO) << "Copy data to tensor.";
202 std::memcpy(img_tensor.flat<quint8>().data(), quantized_img.data(),
203 img_tensor.TotalBytes());
204 return img_tensor;
205 }
206
207 /* static */ RemoteFusedGraphExecuteInfo
BuildRemoteFusedGraphExecuteInfoWithGraphTransferInfo(const GraphTransferInfo & graph_transfer_info)208 BuildRemoteFusedGraphExecuteInfoWithGraphTransferInfo(
209 const GraphTransferInfo& graph_transfer_info) {
210 RemoteFusedGraphExecuteInfo execute_info;
211 execute_info.set_executor_name("build_hexagon_remote_fused_graph_executor");
212 for (const GraphTransferInfo::GraphInputNodeInfo& input :
213 graph_transfer_info.graph_input_node_info()) {
214 execute_info.add_graph_input_node_name(input.name());
215 RemoteFusedGraphExecuteInfo::TensorShapeTypeProto& tensor_shape_type =
216 *execute_info.add_default_graph_input_tensor_shape();
217 tensor_shape_type.set_dtype(input.dtype());
218 TensorShapeProto& tensor_shape_proto = *tensor_shape_type.mutable_shape();
219 for (const int64 dim : input.shape()) {
220 tensor_shape_proto.add_dim()->set_size(dim);
221 }
222 }
223
224 for (const GraphTransferInfo::GraphOutputNodeInfo& output :
225 graph_transfer_info.graph_output_node_info()) {
226 execute_info.add_graph_output_node_name(output.name());
227 RemoteFusedGraphExecuteInfo::TensorShapeTypeProto& tensor_shape_type =
228 *execute_info.add_default_graph_output_tensor_shape();
229 tensor_shape_type.set_dtype(output.dtype());
230 TensorShapeProto& tensor_shape_proto = *tensor_shape_type.mutable_shape();
231 for (const int64 dim : output.shape()) {
232 tensor_shape_proto.add_dim()->set_size(dim);
233 }
234 }
235
236 execute_info.set_serialized_executor_parameters(
237 graph_transfer_info.SerializeAsString());
238 return execute_info;
239 }
240
RunInferenceByHexagonControlWrapper(const GraphTransferer & gt,const Tensor & img_tensor)241 static void RunInferenceByHexagonControlWrapper(const GraphTransferer& gt,
242 const Tensor& img_tensor) {
243 const RemoteFusedGraphExecuteInfo execute_info =
244 BuildRemoteFusedGraphExecuteInfoWithGraphTransferInfo(
245 gt.GetGraphTransferInfo());
246
247 HexagonControlWrapper hexagon_control_wrapper;
248 // 1. Initialize hexagon
249 hexagon_control_wrapper.Init(execute_info);
250
251 // 2. Setup graph in hexagon
252 hexagon_control_wrapper.SetupGraph();
253
254 // 3. Fill input node's output
255 hexagon_control_wrapper.FillInputNode("Mul", img_tensor);
256
257 // 4. Execute graph
258 const int64 start_time_us = Env::Default()->NowMicros();
259 for (int i = 0; i < EXECUTION_REPEAT_COUNT; ++i) {
260 hexagon_control_wrapper.ExecuteGraph();
261 }
262 const int64 end_time_us = Env::Default()->NowMicros();
263
264 // 5-1. Read output node's outputs
265 std::vector<ByteArray> outputs;
266 hexagon_control_wrapper.ReadOutputNode("softmax", &outputs);
267
268 // 5-2. Dump results
269 DumpTop10Results(outputs);
270 CheckFirstResult(outputs, EXPECTED_FIRST_RESULT_ID);
271 LOG(INFO) << "Average execution time = "
272 << (end_time_us - start_time_us) / EXECUTION_REPEAT_COUNT << "us";
273
274 // 6. Teardown graph in hexagon
275 hexagon_control_wrapper.TeardownGraph();
276
277 // 7. Finalize hexagon
278 hexagon_control_wrapper.Finalize();
279 }
280
RunFusedGraph(const GraphDef & fused_graph_def)281 static void RunFusedGraph(const GraphDef& fused_graph_def) {
282 // Setup input tensor
283 std::vector<float> img_floats;
284 LoadImage(&img_floats);
285
286 LOG(INFO) << "Ioading image finished.";
287 const Tensor img_tensor = BuildImageTensor(img_floats);
288
289 // Setup session
290 std::vector<Tensor> output_tensors;
291 SessionOptions session_options;
292 session_options.env = Env::Default();
293 std::unique_ptr<Session> session =
294 std::unique_ptr<Session>(NewSession(session_options));
295 TF_ASSERT_OK(session->Create(fused_graph_def));
296
297 // Setup session arguments
298 RunOptions run_options;
299 run_options.set_trace_level(RunOptions::FULL_TRACE);
300 RunMetadata run_metadata;
301
302 std::vector<std::pair<string, tensorflow::Tensor>> input_tensors;
303 input_tensors.emplace_back("Mul", img_tensor);
304 std::vector<string> output_node_names;
305 output_node_names.emplace_back(REMOTE_FUSED_GRAPH_EXECUTE_NODE_NAME);
306
307 LOG(INFO) << "Run graph";
308 // Run inference with all node as output
309 TF_ASSERT_OK(session->Run(run_options, input_tensors, output_node_names, {},
310 &output_tensors, &run_metadata));
311 ASSERT_EQ(1, output_tensors.size());
312 const Tensor& output_tensor = output_tensors.at(0);
313 LOG(INFO) << "Output byte size = " << output_tensor.TotalBytes();
314 LOG(INFO) << "Output shape = " << output_tensor.shape().DebugString();
315 DumpTop10Results(
316 output_tensor.TotalBytes(),
317 reinterpret_cast<const float*>(output_tensor.flat<float>().data()));
318 }
319
CompareGraphTransferInfo(const GraphTransferInfo & gfi0,const GraphTransferInfo & gfi1)320 static void CompareGraphTransferInfo(const GraphTransferInfo& gfi0,
321 const GraphTransferInfo& gfi1) {
322 LOG(INFO) << "(1) node count: " << gfi1.node_info_size() << ", "
323 << gfi1.const_node_info_size();
324
325 // 1. check node_info
326 ASSERT_EQ(gfi0.node_info_size(), gfi1.node_info_size());
327 for (int i = 0; i < gfi0.node_info_size(); ++i) {
328 const GraphTransferInfo::NodeInfo& ni0 = gfi0.node_info(i);
329 const GraphTransferInfo::NodeInfo& ni1 = gfi1.node_info(i);
330 EXPECT_EQ(ni0.DebugString(), ni1.DebugString());
331 EXPECT_EQ(ni0.ByteSizeLong(), ni1.ByteSizeLong());
332 }
333
334 // 2. check const_node_info
335 ASSERT_EQ(gfi0.const_node_info_size(), gfi1.const_node_info_size());
336 for (int i = 0; i < gfi0.const_node_info_size(); ++i) {
337 const GraphTransferInfo::ConstNodeInfo& cni0 = gfi0.const_node_info(i);
338 const GraphTransferInfo::ConstNodeInfo& cni1 = gfi1.const_node_info(i);
339 ASSERT_EQ(cni0.shape_size(), cni1.shape_size());
340 for (int j = 0; j < cni0.shape_size(); ++j) {
341 EXPECT_EQ(cni0.shape(j), cni1.shape(j));
342 }
343 EXPECT_EQ(cni0.ByteSizeLong(), cni1.ByteSizeLong());
344 EXPECT_EQ(cni0.DebugString(), cni1.DebugString());
345 }
346
347 // 3. check node_input_info
348 ASSERT_EQ(gfi0.node_input_info_size(), gfi1.node_input_info_size());
349 for (int i = 0; i < gfi0.node_input_info_size(); ++i) {
350 const GraphTransferInfo::NodeInputInfo& nii0 = gfi0.node_input_info(i);
351 const GraphTransferInfo::NodeInputInfo& nii1 = gfi1.node_input_info(i);
352 EXPECT_EQ(nii0.ByteSizeLong(), nii1.ByteSizeLong());
353 EXPECT_EQ(nii0.DebugString(), nii1.DebugString());
354 }
355
356 // 4. check node_output_info
357 ASSERT_EQ(gfi0.node_output_info_size(), gfi1.node_output_info_size());
358 for (int i = 0; i < gfi0.node_output_info_size(); ++i) {
359 const GraphTransferInfo::NodeOutputInfo& noi0 = gfi0.node_output_info(i);
360 const GraphTransferInfo::NodeOutputInfo& noi1 = gfi1.node_output_info(i);
361 ASSERT_EQ(noi0.max_byte_size_size(), noi1.max_byte_size_size());
362 for (int j = 0; j < noi0.max_byte_size_size(); ++j) {
363 EXPECT_EQ(noi0.max_byte_size(j), noi1.max_byte_size(j));
364 }
365 EXPECT_EQ(noi0.ByteSizeLong(), noi1.ByteSizeLong());
366 EXPECT_EQ(noi0.DebugString(), noi1.DebugString());
367 }
368
369 // 5. check graph_input_node_info
370 ASSERT_EQ(gfi0.graph_input_node_info_size(),
371 gfi1.graph_input_node_info_size());
372 for (int i = 0; i < gfi0.graph_input_node_info_size(); ++i) {
373 const GraphTransferInfo::GraphInputNodeInfo& gini0 =
374 gfi0.graph_input_node_info(i);
375 const GraphTransferInfo::GraphInputNodeInfo& gini1 =
376 gfi0.graph_input_node_info(i);
377 EXPECT_EQ(gini0.ByteSizeLong(), gini1.ByteSizeLong());
378 EXPECT_EQ(gini0.DebugString(), gini1.DebugString());
379 }
380
381 // 6. check graph_output_node_info
382 ASSERT_EQ(gfi0.graph_output_node_info_size(),
383 gfi1.graph_output_node_info_size());
384 for (int i = 0; i < gfi0.graph_output_node_info_size(); ++i) {
385 const GraphTransferInfo::GraphOutputNodeInfo& goni0 =
386 gfi0.graph_output_node_info(i);
387 const GraphTransferInfo::GraphOutputNodeInfo& goni1 =
388 gfi0.graph_output_node_info(i);
389 EXPECT_EQ(goni0.ByteSizeLong(), goni1.ByteSizeLong());
390 EXPECT_EQ(goni0.DebugString(), goni1.DebugString());
391 }
392 }
393
394 // CAVEAT: This test only runs when you specify hexagon library using
395 // makefile.
396 // CAVEAT: This test is disabled by default because hexagon can keep only
397 // two inception graphs on memory which are allocated by other two tests.
398 // Memory of these graphs are not released until process is killed right now.
399 // TODO(satok): Figure out how to release memory on hexagon without process
400 // termination.
401 #ifdef USE_HEXAGON_LIBS
TEST(GraphTransferer,DISABLED_RunInceptionV3OnHexagonExampleWithHexagonWrapper)402 TEST(GraphTransferer,
403 DISABLED_RunInceptionV3OnHexagonExampleWithHexagonWrapper) {
404 LOG(INFO) << "Run inception v3 on hexagon with hexagon controller";
405 CheckHexagonControllerVersion();
406
407 const IRemoteFusedGraphOpsDefinitions* ops_definitions =
408 &HexagonOpsDefinitions::getInstance();
409 std::vector<std::pair<string, Tensor>> inputs;
410 inputs.emplace_back("Mul", Tensor(DT_FLOAT, {1, WIDTH, HEIGHT, DEPTH}));
411 std::vector<string> output_node_names = {"softmax"};
412
413 GraphTransferer gt;
414 gt.EnableStrictCheckMode(false);
415 profile_utils::CpuUtils::EnableClockCycleProfiling(true);
416 ClockCycleProfiler prof;
417 prof.Start();
418 Status status = gt.LoadGraphFromProtoFile(
419 *ops_definitions, MODEL_FILENAME, inputs, output_node_names,
420 false, // is_text_proto
421 false, // shape_inference_for_unknown_shape
422 true // dry_run_for_unknown_shape
423 );
424 ASSERT_TRUE(status.ok()) << status;
425 prof.Stop();
426 prof.DumpStatistics("LoadGraphFromProtoFile");
427
428 std::vector<float> img_floats;
429 LoadImage(&img_floats);
430 const Tensor img_tensor = BuildImageTensor(img_floats);
431 RunInferenceByHexagonControlWrapper(gt, img_tensor);
432 }
433
TEST(GraphTransferer,DISABLED_RunInceptionV3OnHexagonExampleWithHexagonWrapperQuantizedInput)434 TEST(GraphTransferer,
435 DISABLED_RunInceptionV3OnHexagonExampleWithHexagonWrapperQuantizedInput) {
436 LOG(INFO) << "Run inception v3 on hexagon with hexagon controller "
437 << "with quantized input";
438 CheckHexagonControllerVersion();
439
440 const IRemoteFusedGraphOpsDefinitions* ops_definitions =
441 &HexagonOpsDefinitions::getInstance();
442 std::vector<std::pair<string, Tensor>> inputs;
443 inputs.emplace_back("Mul", Tensor(DT_QUINT8, {1, WIDTH, HEIGHT, DEPTH}));
444 std::vector<string> output_node_names = {"softmax"};
445
446 GraphTransferer gt;
447 gt.EnableStrictCheckMode(false);
448 profile_utils::CpuUtils::EnableClockCycleProfiling(true);
449 ClockCycleProfiler prof;
450 prof.Start();
451 Status status = gt.LoadGraphFromProtoFile(
452 *ops_definitions, MODEL_WITH_QUANTIZED_INPUT_FILENAME, inputs,
453 output_node_names,
454 /*is_text_proto=*/false,
455 /*shape_inference_for_unknown_shape=*/false,
456 /*dry_run_for_unknown_shape=*/true);
457 ASSERT_TRUE(status.ok()) << status;
458 prof.Stop();
459 prof.DumpStatistics("LoadGraphFromProtoFile");
460
461 std::vector<float> img_floats;
462 LoadImage(&img_floats);
463 std::vector<quint8> quantized_img;
464 QuantizeImage(img_floats, &quantized_img);
465 const Tensor img_tensor = BuildQuantizedImageTensor(quantized_img);
466 RunInferenceByHexagonControlWrapper(gt, img_tensor);
467 }
468
TEST(GraphTransferer,DISABLED_RunInceptionV3OnHexagonExampleWithHexagonWrapperShapeInference)469 TEST(GraphTransferer,
470 DISABLED_RunInceptionV3OnHexagonExampleWithHexagonWrapperShapeInference) {
471 LOG(INFO) << "Run inception v3 on hexagon with hexagon controller";
472 CheckHexagonControllerVersion();
473
474 const IRemoteFusedGraphOpsDefinitions* ops_definitions =
475 &HexagonOpsDefinitions::getInstance();
476 std::vector<std::pair<string, Tensor>> inputs;
477 inputs.emplace_back("Mul", Tensor(DT_FLOAT, {1, WIDTH, HEIGHT, DEPTH}));
478 std::vector<string> output_node_names = {"softmax"};
479
480 GraphTransferer gt;
481 gt.EnableStrictCheckMode(false);
482 profile_utils::CpuUtils::EnableClockCycleProfiling(true);
483 ClockCycleProfiler prof;
484 prof.Start();
485 Status status = gt.LoadGraphFromProtoFile(
486 *ops_definitions, MODEL_FILENAME, inputs, output_node_names,
487 false, // is_text_proto
488 true, // shape_inference_for_unknown_shape
489 false // dry_run_for_unknown_shape
490 );
491 ASSERT_TRUE(status.ok()) << status;
492 prof.Stop();
493 prof.DumpStatistics("LoadGraphFromProtoFile");
494
495 std::vector<float> img_floats;
496 LoadImage(&img_floats);
497 const Tensor img_tensor = BuildImageTensor(img_floats);
498 RunInferenceByHexagonControlWrapper(gt, img_tensor);
499 }
500
TEST(GraphTransferer,RunInceptionV3OnHexagonExampleWithTfRuntime)501 TEST(GraphTransferer, RunInceptionV3OnHexagonExampleWithTfRuntime) {
502 LOG(INFO) << "Fuse and run inception v3 on hexagon with tf runtime";
503 CheckHexagonControllerVersion();
504
505 const IRemoteFusedGraphOpsDefinitions* ops_definitions =
506 &HexagonOpsDefinitions::getInstance();
507 std::vector<std::pair<string, Tensor>> inputs;
508 inputs.emplace_back("Mul", Tensor(DT_FLOAT, {1, WIDTH, HEIGHT, DEPTH}));
509 std::vector<string> outputs = {"softmax"};
510
511 std::vector<float> img_floats;
512 LoadImage(&img_floats);
513
514 LOG(INFO) << "Ioading image finished.";
515
516 GraphDef graph_def;
517 Status status = ReadBinaryProto(Env::Default(), MODEL_FILENAME, &graph_def);
518
519 ASSERT_TRUE(status.ok());
520
521 LOG(INFO) << "Build fused graph";
522 GraphDef fused_graph_def = GraphTransferUtils::BuildFusedGraphDef(
523 HexagonOpsDefinitions::getInstance(),
524 REMOTE_FUSED_GRAPH_EXECUTE_NODE_NAME, inputs, outputs, &graph_def);
525
526 RunFusedGraph(fused_graph_def);
527 }
528
TEST(GraphTransferer,DISABLED_RunInceptionV3OnHexagonExampleWithFusedGraph)529 TEST(GraphTransferer, DISABLED_RunInceptionV3OnHexagonExampleWithFusedGraph) {
530 LOG(INFO) << "Run inception v3 with fused graph";
531 CheckHexagonControllerVersion();
532
533 GraphDef fused_graph_def;
534 Status status =
535 ReadBinaryProto(Env::Default(), FUSED_MODEL_FILENAME, &fused_graph_def);
536 RunFusedGraph(fused_graph_def);
537 }
538
TEST(GraphTransferer,DISABLED_CheckShapeInferencePerformance)539 TEST(GraphTransferer, DISABLED_CheckShapeInferencePerformance) {
540 CheckHexagonControllerVersion();
541 profile_utils::CpuUtils::EnableClockCycleProfiling(true);
542
543 const IRemoteFusedGraphOpsDefinitions* ops_definitions =
544 &HexagonOpsDefinitions::getInstance();
545 std::vector<std::pair<string, Tensor>> inputs;
546 inputs.emplace_back("Mul", Tensor(DT_FLOAT, {1, WIDTH, HEIGHT, DEPTH}));
547 std::vector<string> output_node_names = {"softmax"};
548
549 RemoteFusedGraphExecuteUtils::TensorShapeMap output_tensor_info0;
550 GraphTransferer gt0;
551 gt0.EnableStrictCheckMode(false);
552 ClockCycleProfiler prof0;
553 prof0.Start();
554 Status status = gt0.LoadGraphFromProtoFile(
555 *ops_definitions, MODEL_FILENAME, inputs, output_node_names,
556 false, // is_text_proto
557 false, // shape_inference_for_unknown_shape
558 true // dry_run_for_unknown_shape
559 );
560 const GraphTransferInfo& gfi0 = gt0.GetGraphTransferInfo();
561
562 ASSERT_TRUE(status.ok());
563 prof0.Stop();
564 prof0.DumpStatistics("Estimate shape by dryrun");
565
566 LOG(INFO) << "(0) node count: " << gfi0.node_info_size() << ", "
567 << gfi0.const_node_info_size();
568
569 RemoteFusedGraphExecuteUtils::TensorShapeMap output_tensor_info1;
570 GraphTransferer gt1;
571 gt1.EnableStrictCheckMode(true);
572 ClockCycleProfiler prof1;
573 prof1.Start();
574 status = gt1.LoadGraphFromProtoFile(
575 *ops_definitions, MODEL_FILENAME, inputs, output_node_names,
576 false, // is_text_proto
577 true, // shape_inference_for_unknown_shape
578 false // dry_run_for_unknown_shape
579 );
580 const GraphTransferInfo& gfi1 = gt1.GetGraphTransferInfo();
581
582 ASSERT_TRUE(status.ok());
583 prof1.Stop();
584 prof1.DumpStatistics("Estiame shape by shape inference");
585
586 CompareGraphTransferInfo(gfi0, gfi1);
587
588 const RemoteFusedGraphExecuteInfo ei0 =
589 BuildRemoteFusedGraphExecuteInfoWithGraphTransferInfo(gfi0);
590 const RemoteFusedGraphExecuteInfo ei1 =
591 BuildRemoteFusedGraphExecuteInfoWithGraphTransferInfo(gfi1);
592
593 GraphTransferInfo rgfi0;
594 rgfi0.ParseFromString(ei0.serialized_executor_parameters());
595 GraphTransferInfo rgfi1;
596 rgfi1.ParseFromString(ei1.serialized_executor_parameters());
597
598 CompareGraphTransferInfo(rgfi0, rgfi1);
599 CompareGraphTransferInfo(gfi0, rgfi0);
600 CompareGraphTransferInfo(gfi1, rgfi1);
601 }
602 #endif
603
604 } // namespace tensorflow
605