• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/tensorflow_profiler_logger.h"
17 
18 #include <stdlib.h>
19 
20 #include <algorithm>
21 #include <memory>
22 #include <string>
23 
24 #include "tensorflow/core/profiler/lib/scoped_memory_debug_annotation.h"
25 #include "tensorflow/core/profiler/lib/traceme.h"
26 #include "tensorflow/lite/c/common.h"
27 #include "tensorflow/lite/kernels/kernel_util.h"
28 
29 namespace tflite {
30 namespace {
31 
32 struct Statistics {
33   uint64_t total_bytes_allocated = 0LL;
34   uint64_t peak_bytes_in_use = 0LL;
35 };
36 static Statistics g_stat_dynamic;
37 static Statistics g_stat_arena;
38 
39 static char g_current_op_name[256];
40 
41 // Adds memory trace information for TensorFlow profiler.
42 // `stat`: Statistics object for the (de)allocation.
43 // `is_allocating`: Whether memory is being allocated or deallocated.
44 // `allocation_bytes`: The number of bytes being allocated or deallocated.
45 // `requested_bytes`: The number of bytes requested for allocation/deallocation.
46 // `tensor_id`: A unique ID for the tensor being allocated or deallocated.
47 //              Usually the memory address should be used.
48 // `name`: The name of the tensor being allocated or deallocated.
49 // `dims`: The dimension of the tensor in a string form.
AddTraceMeInternal(Statistics * stat,bool is_allocating,const std::string & allocator_name,int64_t tensor_id,const std::string & name,const std::string & dims,int64_t allocation_bytes,int64_t requested_bytes)50 std::string AddTraceMeInternal(Statistics* stat, bool is_allocating,
51                                const std::string& allocator_name,
52                                int64_t tensor_id, const std::string& name,
53                                const std::string& dims,
54                                int64_t allocation_bytes,
55                                int64_t requested_bytes) {
56   if (is_allocating) {
57     stat->total_bytes_allocated += allocation_bytes;
58   } else {
59     stat->total_bytes_allocated -= allocation_bytes;
60   }
61   stat->peak_bytes_in_use =
62       std::max(stat->peak_bytes_in_use, stat->total_bytes_allocated);
63   int64_t total_bytes_allocated = stat->total_bytes_allocated;
64   int64_t peak_bytes_in_use = stat->peak_bytes_in_use;
65 
66   std::string res = tensorflow::profiler::TraceMeEncode(
67       is_allocating ? "MemoryAllocation" : "MemoryDeallocation",
68       // Note that all of these fields are necessary for profiling UI.
69       {{"allocator_name", allocator_name},  // name shown on 'Memory ID'
70        {"bytes_allocated", total_bytes_allocated},
71        {"peak_bytes_in_use", peak_bytes_in_use},
72        {"requested_bytes", requested_bytes},
73        {"allocation_bytes", allocation_bytes},
74        // Note: addr is used as a key to match alloc and dealloc.
75        {"addr", tensor_id},
76        // Note that we're using tensor.name not op name here.
77        {"tf_op", name},
78        {"shape", dims}});
79   // Note: bytes_reserved, fragmentation, data_type, region_type, id
80   // can be potentially useful but not added.
81   return res;
82 }
83 
AddTraceMe(bool is_allocating,TfLiteTensor * tensor,size_t allocation_bytes)84 void AddTraceMe(bool is_allocating, TfLiteTensor* tensor,
85                 size_t allocation_bytes) {
86   if (tensor == nullptr || allocation_bytes == 0) return;
87   int64_t tensor_id = reinterpret_cast<int64_t>(tensor->data.raw);
88   std::string name;
89   if (g_current_op_name[0]) {
90     name = g_current_op_name;
91   }
92   if (tensor->name) {
93     name += ":";
94     name += tensor->name;
95   }
96   std::string dims = tensor->dims ? GetShapeDebugString(tensor->dims) : "[]";
97   int64_t requested_bytes = is_allocating ? allocation_bytes : 0;
98   const std::string allocator_name = "_tflite_native_dynamic";
99 
100   tensorflow::profiler::TraceMe::InstantActivity(
101       [is_allocating, allocator_name, tensor_id, name, dims, allocation_bytes,
102        requested_bytes]() {
103         return AddTraceMeInternal(&g_stat_dynamic, is_allocating,
104                                   allocator_name, tensor_id, name, dims,
105                                   allocation_bytes, requested_bytes);
106       },
107       /*level=*/tensorflow::profiler::TraceMeLevel::kInfo);
108 }
109 
110 }  // namespace
111 
OnTfLiteOpPrepare(const char * op_name,int subgraph_index,int node_index)112 void OnTfLiteOpPrepare(const char* op_name, int subgraph_index,
113                        int node_index) {
114   snprintf(g_current_op_name, sizeof(g_current_op_name), "%sPrepare_%d",
115            op_name, node_index);
116   // Updates TF's current annotation object by creating scoped annotation obj.
117   tensorflow::profiler::ScopedMemoryDebugAnnotation annotation(
118       g_current_op_name);
119 }
120 
OnTfLiteSubgraphInvoke(const char * name,int index)121 tensorflow::profiler::TraceMe* OnTfLiteSubgraphInvoke(const char* name,
122                                                       int index) {
123   tensorflow::profiler::TraceMe* trace_me =
124       new tensorflow::profiler::TraceMe([name, index]() {
125         char eventName[256];
126         snprintf(eventName, sizeof(eventName), "Subgraph%d", index);
127         return tensorflow::profiler::TraceMeEncode(
128             eventName, {{"subgraph_name", name}, {"subgraph_index", index}});
129       });
130   return trace_me;
131 }
132 
OnTfLiteSubgraphInvokeEnd(tensorflow::profiler::TraceMe * trace_me)133 void OnTfLiteSubgraphInvokeEnd(tensorflow::profiler::TraceMe* trace_me) {
134   delete trace_me;
135 }
136 
OnTfLiteOpInvoke(const char * op_name,int subgraph_index,int node_index)137 tensorflow::profiler::TraceMe* OnTfLiteOpInvoke(const char* op_name,
138                                                 int subgraph_index,
139                                                 int node_index) {
140   snprintf(g_current_op_name, sizeof(g_current_op_name), "%s_%d", op_name,
141            node_index);
142   // Updates TF's current annotation object by creating scoped annotation obj.
143   tensorflow::profiler::ScopedMemoryDebugAnnotation annotation(
144       g_current_op_name);
145 
146   tensorflow::profiler::TraceMe* trace_me = new tensorflow::profiler::TraceMe(
147       [op_name, subgraph_index, node_index]() {
148         char eventName[256];
149         // TF ops should have "<detail>:<op_name>" format.
150         snprintf(eventName, sizeof(eventName), "%s:%s", op_name, op_name);
151         return tensorflow::profiler::TraceMeEncode(
152             eventName, {{"is_eager", 0},
153                         {"subgraph_index", subgraph_index},
154                         {"node_index", node_index}});
155       });
156   return trace_me;
157 }
158 
OnTfLiteOpInvokeEnd(tensorflow::profiler::TraceMe * trace_me)159 void OnTfLiteOpInvokeEnd(tensorflow::profiler::TraceMe* trace_me) {
160   delete trace_me;
161 }
162 
OnTfLiteTensorAlloc(TfLiteTensor * tensor,size_t num_bytes)163 void OnTfLiteTensorAlloc(TfLiteTensor* tensor, size_t num_bytes) {
164   AddTraceMe(/*is_allocating=*/true, tensor, num_bytes);
165 }
166 
OnTfLiteTensorDealloc(TfLiteTensor * tensor)167 void OnTfLiteTensorDealloc(TfLiteTensor* tensor) {
168   if (tensor != nullptr) {
169     size_t num_bytes = tensor->bytes;
170     AddTraceMe(/*is_allocating=*/false, tensor, num_bytes);
171   }
172 }
173 
AddArenaTrace(bool is_allocating,int subgraph_index,int arena_id,size_t allocation_bytes)174 void AddArenaTrace(bool is_allocating, int subgraph_index, int arena_id,
175                    size_t allocation_bytes) {
176   std::string name = "Subgraph" + std::to_string(subgraph_index);
177   int64_t tensor_id = arena_id;
178   std::string dims = "";
179   int64_t requested_bytes = is_allocating ? allocation_bytes : 0;
180   const std::string allocator_name = "_tflite_arena";
181 
182   tensorflow::profiler::TraceMe::InstantActivity(
183       [is_allocating, allocator_name, tensor_id, name, dims, allocation_bytes,
184        requested_bytes]() {
185         return AddTraceMeInternal(&g_stat_arena, is_allocating, allocator_name,
186                                   tensor_id, name, dims, allocation_bytes,
187                                   requested_bytes);
188       },
189       /*level=*/tensorflow::profiler::TraceMeLevel::kInfo);
190 }
191 
OnTfLiteArenaAlloc(int subgraph_index,int arena_id,size_t num_bytes)192 void OnTfLiteArenaAlloc(int subgraph_index, int arena_id, size_t num_bytes) {
193   if (num_bytes == 0) return;
194   AddArenaTrace(/*is_allocating=*/true, subgraph_index, arena_id, num_bytes);
195 }
196 
OnTfLiteArenaDealloc(int subgraph_index,int arena_id,size_t num_bytes)197 void OnTfLiteArenaDealloc(int subgraph_index, int arena_id, size_t num_bytes) {
198   if (num_bytes == 0) return;
199   AddArenaTrace(/*is_allocating=*/false, subgraph_index, arena_id, num_bytes);
200 }
201 
202 }  // namespace tflite
203