1 /* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/tensorflow_profiler_logger.h"
17
18 #include <stdlib.h>
19
20 #include <algorithm>
21 #include <memory>
22 #include <string>
23
24 #include "tensorflow/core/profiler/lib/scoped_memory_debug_annotation.h"
25 #include "tensorflow/core/profiler/lib/traceme.h"
26 #include "tensorflow/lite/c/common.h"
27 #include "tensorflow/lite/kernels/kernel_util.h"
28
29 namespace tflite {
30 namespace {
31
32 struct Statistics {
33 uint64_t total_bytes_allocated = 0LL;
34 uint64_t peak_bytes_in_use = 0LL;
35 };
36 static Statistics g_stat_dynamic;
37 static Statistics g_stat_arena;
38
39 static char g_current_op_name[256];
40
41 // Adds memory trace information for TensorFlow profiler.
42 // `stat`: Statistics object for the (de)allocation.
43 // `is_allocating`: Whether memory is being allocated or deallocated.
44 // `allocation_bytes`: The number of bytes being allocated or deallocated.
45 // `requested_bytes`: The number of bytes requested for allocation/deallocation.
46 // `tensor_id`: A unique ID for the tensor being allocated or deallocated.
47 // Usually the memory address should be used.
48 // `name`: The name of the tensor being allocated or deallocated.
49 // `dims`: The dimension of the tensor in a string form.
AddTraceMeInternal(Statistics * stat,bool is_allocating,const std::string & allocator_name,int64_t tensor_id,const std::string & name,const std::string & dims,int64_t allocation_bytes,int64_t requested_bytes)50 std::string AddTraceMeInternal(Statistics* stat, bool is_allocating,
51 const std::string& allocator_name,
52 int64_t tensor_id, const std::string& name,
53 const std::string& dims,
54 int64_t allocation_bytes,
55 int64_t requested_bytes) {
56 if (is_allocating) {
57 stat->total_bytes_allocated += allocation_bytes;
58 } else {
59 stat->total_bytes_allocated -= allocation_bytes;
60 }
61 stat->peak_bytes_in_use =
62 std::max(stat->peak_bytes_in_use, stat->total_bytes_allocated);
63 int64_t total_bytes_allocated = stat->total_bytes_allocated;
64 int64_t peak_bytes_in_use = stat->peak_bytes_in_use;
65
66 std::string res = tensorflow::profiler::TraceMeEncode(
67 is_allocating ? "MemoryAllocation" : "MemoryDeallocation",
68 // Note that all of these fields are necessary for profiling UI.
69 {{"allocator_name", allocator_name}, // name shown on 'Memory ID'
70 {"bytes_allocated", total_bytes_allocated},
71 {"peak_bytes_in_use", peak_bytes_in_use},
72 {"requested_bytes", requested_bytes},
73 {"allocation_bytes", allocation_bytes},
74 // Note: addr is used as a key to match alloc and dealloc.
75 {"addr", tensor_id},
76 // Note that we're using tensor.name not op name here.
77 {"tf_op", name},
78 {"shape", dims}});
79 // Note: bytes_reserved, fragmentation, data_type, region_type, id
80 // can be potentially useful but not added.
81 return res;
82 }
83
AddTraceMe(bool is_allocating,TfLiteTensor * tensor,size_t allocation_bytes)84 void AddTraceMe(bool is_allocating, TfLiteTensor* tensor,
85 size_t allocation_bytes) {
86 if (tensor == nullptr || allocation_bytes == 0) return;
87 int64_t tensor_id = reinterpret_cast<int64_t>(tensor->data.raw);
88 std::string name;
89 if (g_current_op_name[0]) {
90 name = g_current_op_name;
91 }
92 if (tensor->name) {
93 name += ":";
94 name += tensor->name;
95 }
96 std::string dims = tensor->dims ? GetShapeDebugString(tensor->dims) : "[]";
97 int64_t requested_bytes = is_allocating ? allocation_bytes : 0;
98 const std::string allocator_name = "_tflite_native_dynamic";
99
100 tensorflow::profiler::TraceMe::InstantActivity(
101 [is_allocating, allocator_name, tensor_id, name, dims, allocation_bytes,
102 requested_bytes]() {
103 return AddTraceMeInternal(&g_stat_dynamic, is_allocating,
104 allocator_name, tensor_id, name, dims,
105 allocation_bytes, requested_bytes);
106 },
107 /*level=*/tensorflow::profiler::TraceMeLevel::kInfo);
108 }
109
110 } // namespace
111
OnTfLiteOpPrepare(const char * op_name,int subgraph_index,int node_index)112 void OnTfLiteOpPrepare(const char* op_name, int subgraph_index,
113 int node_index) {
114 snprintf(g_current_op_name, sizeof(g_current_op_name), "%sPrepare_%d",
115 op_name, node_index);
116 // Updates TF's current annotation object by creating scoped annotation obj.
117 tensorflow::profiler::ScopedMemoryDebugAnnotation annotation(
118 g_current_op_name);
119 }
120
OnTfLiteSubgraphInvoke(const char * name,int index)121 tensorflow::profiler::TraceMe* OnTfLiteSubgraphInvoke(const char* name,
122 int index) {
123 tensorflow::profiler::TraceMe* trace_me =
124 new tensorflow::profiler::TraceMe([name, index]() {
125 char eventName[256];
126 snprintf(eventName, sizeof(eventName), "Subgraph%d", index);
127 return tensorflow::profiler::TraceMeEncode(
128 eventName, {{"subgraph_name", name}, {"subgraph_index", index}});
129 });
130 return trace_me;
131 }
132
OnTfLiteSubgraphInvokeEnd(tensorflow::profiler::TraceMe * trace_me)133 void OnTfLiteSubgraphInvokeEnd(tensorflow::profiler::TraceMe* trace_me) {
134 delete trace_me;
135 }
136
OnTfLiteOpInvoke(const char * op_name,int subgraph_index,int node_index)137 tensorflow::profiler::TraceMe* OnTfLiteOpInvoke(const char* op_name,
138 int subgraph_index,
139 int node_index) {
140 snprintf(g_current_op_name, sizeof(g_current_op_name), "%s_%d", op_name,
141 node_index);
142 // Updates TF's current annotation object by creating scoped annotation obj.
143 tensorflow::profiler::ScopedMemoryDebugAnnotation annotation(
144 g_current_op_name);
145
146 tensorflow::profiler::TraceMe* trace_me = new tensorflow::profiler::TraceMe(
147 [op_name, subgraph_index, node_index]() {
148 char eventName[256];
149 // TF ops should have "<detail>:<op_name>" format.
150 snprintf(eventName, sizeof(eventName), "%s:%s", op_name, op_name);
151 return tensorflow::profiler::TraceMeEncode(
152 eventName, {{"is_eager", 0},
153 {"subgraph_index", subgraph_index},
154 {"node_index", node_index}});
155 });
156 return trace_me;
157 }
158
OnTfLiteOpInvokeEnd(tensorflow::profiler::TraceMe * trace_me)159 void OnTfLiteOpInvokeEnd(tensorflow::profiler::TraceMe* trace_me) {
160 delete trace_me;
161 }
162
OnTfLiteTensorAlloc(TfLiteTensor * tensor,size_t num_bytes)163 void OnTfLiteTensorAlloc(TfLiteTensor* tensor, size_t num_bytes) {
164 AddTraceMe(/*is_allocating=*/true, tensor, num_bytes);
165 }
166
OnTfLiteTensorDealloc(TfLiteTensor * tensor)167 void OnTfLiteTensorDealloc(TfLiteTensor* tensor) {
168 if (tensor != nullptr) {
169 size_t num_bytes = tensor->bytes;
170 AddTraceMe(/*is_allocating=*/false, tensor, num_bytes);
171 }
172 }
173
AddArenaTrace(bool is_allocating,int subgraph_index,int arena_id,size_t allocation_bytes)174 void AddArenaTrace(bool is_allocating, int subgraph_index, int arena_id,
175 size_t allocation_bytes) {
176 std::string name = "Subgraph" + std::to_string(subgraph_index);
177 int64_t tensor_id = arena_id;
178 std::string dims = "";
179 int64_t requested_bytes = is_allocating ? allocation_bytes : 0;
180 const std::string allocator_name = "_tflite_arena";
181
182 tensorflow::profiler::TraceMe::InstantActivity(
183 [is_allocating, allocator_name, tensor_id, name, dims, allocation_bytes,
184 requested_bytes]() {
185 return AddTraceMeInternal(&g_stat_arena, is_allocating, allocator_name,
186 tensor_id, name, dims, allocation_bytes,
187 requested_bytes);
188 },
189 /*level=*/tensorflow::profiler::TraceMeLevel::kInfo);
190 }
191
OnTfLiteArenaAlloc(int subgraph_index,int arena_id,size_t num_bytes)192 void OnTfLiteArenaAlloc(int subgraph_index, int arena_id, size_t num_bytes) {
193 if (num_bytes == 0) return;
194 AddArenaTrace(/*is_allocating=*/true, subgraph_index, arena_id, num_bytes);
195 }
196
OnTfLiteArenaDealloc(int subgraph_index,int arena_id,size_t num_bytes)197 void OnTfLiteArenaDealloc(int subgraph_index, int arena_id, size_t num_bytes) {
198 if (num_bytes == 0) return;
199 AddArenaTrace(/*is_allocating=*/false, subgraph_index, arena_id, num_bytes);
200 }
201
202 } // namespace tflite
203