• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/android/orderfile/orderfile_instrumentation.h"
6 
7 #include <time.h>
8 #include <unistd.h>
9 
10 #include <atomic>
11 #include <cstdio>
12 #include <cstring>
13 #include <string>
14 #include <thread>
15 #include <vector>
16 
17 #include "base/android/library_loader/anchor_functions.h"
18 #include "base/android/orderfile/orderfile_buildflags.h"
19 #include "base/files/file.h"
20 #include "base/format_macros.h"
21 #include "base/json/json_writer.h"
22 #include "base/logging.h"
23 #include "base/strings/stringprintf.h"
24 #include "base/values.h"
25 #include "build/build_config.h"
26 
27 #if BUILDFLAG(DEVTOOLS_INSTRUMENTATION_DUMPING)
28 #include <sstream>
29 
30 #include "base/command_line.h"
31 #include "base/time/time.h"
32 #include "base/trace_event/memory_dump_manager.h"   // no-presubmit-check
33 #include "base/trace_event/memory_dump_provider.h"  // no-presubmit-check
34 #endif  // BUILDFLAG(DEVTOOLS_INSTRUMENTATION_DUMPING)
35 
36 #if !BUILDFLAG(SUPPORTS_CODE_ORDERING)
37 #error Only supported on architectures supporting code ordering (arm/arm64).
38 #endif  // !BUILDFLAG(SUPPORTS_CODE_ORDERING)
39 
40 // Must be applied to all functions within this file.
41 #define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function))
42 #define INLINE_AND_NO_INSTRUMENT_FUNCTION \
43   __attribute__((always_inline, no_instrument_function))
44 
45 namespace base {
46 namespace android {
47 namespace orderfile {
48 
49 namespace {
50 
51 #if BUILDFLAG(DEVTOOLS_INSTRUMENTATION_DUMPING)
52 // This is defined in content/public/common/content_switches.h, which is not
53 // accessible in ::base.
54 constexpr const char kProcessTypeSwitch[] = "type";
55 #else
56 // Constant used for StartDelayedDump().
57 constexpr int kDelayInSeconds = 30;
58 #endif  // BUILDFLAG(DEVTOOLS_INSTRUMENTATION_DUMPING)
59 
60 constexpr size_t kMaxTextSizeInBytes = 1 << 27;
61 constexpr size_t kMaxElements = kMaxTextSizeInBytes / 4;
62 // Native code currently have ~850k symbols, hence recording up to 1M symbols
63 // can cover all possible callee symbols.
64 constexpr size_t kMaxReachedSymbols = 1 << 20;
65 // 3 callers are recorded per callee.
66 constexpr size_t kCallerBuckets = 3;
67 // The last bucket is to count for misses and callers from outside the
68 // native code bounds.
69 constexpr size_t kMissesBucketIndex = 3;
70 constexpr size_t kTotalBuckets = 4;
71 
72 std::atomic<uint32_t> callee_map[kMaxElements];
73 static_assert(sizeof(callee_map) == 128 * (1 << 20), "");
74 // Contain caller offsets. 4 buckets of callers per callee where the
75 // last bucket is for misses.
76 std::atomic<uint32_t> g_caller_offset[kMaxReachedSymbols * kTotalBuckets];
77 static_assert(sizeof(g_caller_offset) == 16 * (1 << 20), "");
78 // Corresponding count of |g_caller_offset|.
79 std::atomic<uint32_t> g_caller_count[kMaxReachedSymbols * kTotalBuckets];
80 static_assert(sizeof(g_caller_count) == 16 * (1 << 20), "");
81 // Index for |g_caller_offset| and |g_caller_count|.
82 std::atomic<uint32_t> g_callers_index;
83 std::atomic<uint32_t> g_calls_count;
84 std::atomic<bool> g_disabled;
85 
86 #if BUILDFLAG(DEVTOOLS_INSTRUMENTATION_DUMPING)
87 // Dump offsets when a memory dump is requested. Used only if
88 // switches::kDevtoolsInstrumentationDumping is set.
89 class OrderfileMemoryDumpHook : public base::trace_event::MemoryDumpProvider {
OnMemoryDump(const base::trace_event::MemoryDumpArgs & args,base::trace_event::ProcessMemoryDump * pmd)90   NO_INSTRUMENT_FUNCTION bool OnMemoryDump(
91       const base::trace_event::MemoryDumpArgs& args,
92       base::trace_event::ProcessMemoryDump* pmd) override {
93     if (!Disable())
94       return true;  // A dump has already been started.
95 
96     std::string process_type =
97         base::CommandLine::ForCurrentProcess()->GetSwitchValueASCII(
98             kProcessTypeSwitch);
99     if (process_type.empty())
100       process_type = "browser";
101 
102     Dump(process_type);
103     return true;  // If something goes awry, a fatal error will be created
104                   // internally.
105   }
106 };
107 #endif  // BUILDFLAG(DEVTOOLS_INSTRUMENTATION_DUMPING)
108 
109 // This is not racy. It is guaranteed that any number of threads concurrently
110 // calling this function in any order, will always end up with the same count
111 // at the end. It returns |element|'s value before the increment.
112 INLINE_AND_NO_INSTRUMENT_FUNCTION uint32_t
AtomicIncrement(std::atomic<uint32_t> * element)113 AtomicIncrement(std::atomic<uint32_t>* element) {
114   return element->fetch_add(1, std::memory_order_relaxed);
115 }
116 
117 // Increment the miss bucket for a callee. |index| is the first bucket of
118 // callers for this callee.
RecordMiss(size_t index)119 INLINE_AND_NO_INSTRUMENT_FUNCTION void RecordMiss(size_t index) {
120   AtomicIncrement(g_caller_count + index + kMissesBucketIndex);
121 }
122 
123 // Increment the caller count if it has previously been registered.
124 // If it hasn't, search for an empty bucket and register the caller.
125 // Otherwise, return false.
126 // |index| is the first bucket to register callers for a certain callee.
RecordCaller(size_t index,size_t caller_offset)127 INLINE_AND_NO_INSTRUMENT_FUNCTION bool RecordCaller(size_t index,
128                                                     size_t caller_offset) {
129   for (size_t i = index; i < index + kCallerBuckets; i++) {
130     auto offset = g_caller_offset[i].load(std::memory_order_relaxed);
131     // This check is racy, a write could have happened between the load and the
132     // check.
133     if (offset == caller_offset) {
134       // Caller already recorded, increment the count.
135       AtomicIncrement(g_caller_count + i);
136       return true;
137     }
138   }
139 
140   for (size_t i = index; i < index + kCallerBuckets; i++) {
141     auto offset = g_caller_offset[i].load(std::memory_order_relaxed);
142     size_t expected = 0;
143     if (!offset) {
144       // This is not racy as the compare and exchange is done atomically.
145       // It is impossible to reset a bucket if it has already been set. It
146       // exchanges the value in |g_caller_offset[i]| with |caller_offset| if
147       // the value in |g_caller_offset[i] == expected|.
148       // Otherwise, returns false and set |expected = g_caller_offset[i]|.
149       if (g_caller_offset[i].compare_exchange_strong(
150               expected, caller_offset, std::memory_order_relaxed,
151               std::memory_order_relaxed)) {
152         AtomicIncrement(g_caller_count + i);
153         return true;
154       }
155     }
156     // This will decrease the chances that we miss something due to unseen
157     // changes made by another thread.
158     if (offset == caller_offset || expected == caller_offset) {
159       AtomicIncrement(g_caller_count + i);
160       return true;
161     }
162   }
163   return false;
164 }
165 
166 template <bool for_testing>
RecordAddress(size_t callee_address,size_t caller_address)167 __attribute__((always_inline, no_instrument_function)) void RecordAddress(
168     size_t callee_address,
169     size_t caller_address) {
170   bool disabled = g_disabled.load(std::memory_order_relaxed);
171   if (disabled)
172     return;
173 
174   const size_t start =
175       for_testing ? kStartOfTextForTesting : base::android::kStartOfText;
176   const size_t end =
177       for_testing ? kEndOfTextForTesting : base::android::kEndOfText;
178 
179   if (UNLIKELY(callee_address < start || callee_address > end)) {
180     // Only the code in the native library is instrumented. Callees are expected
181     // to be within the native library bounds.
182     Disable();
183     ImmediateCrash();
184   }
185 
186   size_t offset = callee_address - start;
187   static_assert(sizeof(int) == 4,
188                 "Collection and processing code assumes that sizeof(int) == 4");
189 
190   size_t offset_index = offset / 4;
191 
192   if (UNLIKELY(offset_index >= kMaxElements))
193     return;
194 
195   std::atomic<uint32_t>* element = callee_map + offset_index;
196   uint32_t callers_index = element->load(std::memory_order_relaxed);
197 
198   // Racy check.
199   if (callers_index == 0) {
200     // Fragmentation is possible as we increment the |insertion_index| based on
201     // a racy check.
202     uint32_t insertion_index = AtomicIncrement(&g_callers_index) + 1;
203     if (UNLIKELY(insertion_index >= kMaxReachedSymbols))
204       return;
205 
206     uint32_t expected = 0;
207     // Exchanges the value in |element| with |insertion_index| if the value in
208     // |element == expected|. Otherwise, set |expected = element|.
209     element->compare_exchange_strong(expected, insertion_index,
210                                      std::memory_order_relaxed,
211                                      std::memory_order_relaxed);
212     // If expected is set, then this callee has previously been seen and already
213     // has a corresponding index in the callers array.
214     callers_index = expected == 0 ? insertion_index : expected;
215   }
216 
217   AtomicIncrement(&g_calls_count);
218   callers_index *= kTotalBuckets;
219   if (caller_address <= start || caller_address > end ||
220       !RecordCaller(callers_index, caller_address - start)) {
221     // Record as a Miss, if the caller is not within the bounds of the native
222     // code or there are no empty buckets to record one more caller for this
223     // callee.
224     RecordMiss(callers_index);
225   }
226 }
227 
DumpToFile(const base::FilePath & path)228 NO_INSTRUMENT_FUNCTION bool DumpToFile(const base::FilePath& path) {
229   auto file =
230       base::File(path, base::File::FLAG_CREATE_ALWAYS | base::File::FLAG_WRITE);
231   if (!file.IsValid()) {
232     PLOG(ERROR) << "Could not open " << path;
233     return false;
234   }
235 
236   if (g_callers_index == 0) {
237     LOG(ERROR) << "No entries to dump";
238     return false;
239   }
240 
241   // This can get very large as it  constructs the whole data structure in
242   // memory before dumping it to the file.
243   uint32_t total_calls_count = g_calls_count.load(std::memory_order_relaxed);
244   auto root = base::Value::Dict().Set(
245       "total_calls_count", base::StringPrintf("%" PRIu32, total_calls_count));
246   auto call_graph = base::Value::List();
247   for (size_t i = 0; i < kMaxElements; i++) {
248     auto caller_index =
249         callee_map[i].load(std::memory_order_relaxed) * kTotalBuckets;
250     if (!caller_index) {
251       // This callee was never called.
252       continue;
253     }
254 
255     uint32_t callee_offset = i * 4;
256     auto callee_element =
257         base::Value::Dict()
258             .Set("index", base::StringPrintf("%" PRIuS, caller_index))
259             .Set("callee_offset",
260                  base::StringPrintf("%" PRIu32, callee_offset));
261     std::string offset_str;
262     auto callers_list = base::Value::List();
263     for (size_t j = 0; j < kTotalBuckets; j++) {
264       uint32_t caller_offset =
265           g_caller_offset[caller_index + j].load(std::memory_order_relaxed);
266 
267       // The last bucket is for misses or callers outside the native library,
268       // the caller_offset for this bucket is 0.
269       if (j != kMissesBucketIndex && !caller_offset)
270         continue;
271 
272       uint32_t count =
273           g_caller_count[caller_index + j].load(std::memory_order_relaxed);
274       // The count can only be 0 for the misses bucket. Otherwise,
275       // if |caller_offset| is set then the count must be >= 1.
276       CHECK_EQ(count || j == kMissesBucketIndex, true);
277       if (!count) {
278         // No misses.
279         continue;
280       }
281 
282       auto caller_count =
283           base::Value::Dict()
284               .Set("caller_offset",
285                    base::StringPrintf("%" PRIu32, caller_offset))
286               .Set("count", base::StringPrintf("%" PRIu32, count));
287       callers_list.Append(std::move(caller_count));
288     }
289     callee_element.Set("caller_and_count", std::move(callers_list));
290     call_graph.Append(std::move(callee_element));
291   }
292 
293   root.Set("call_graph", std::move(call_graph));
294   std::string output_js;
295   if (!JSONWriter::Write(root, &output_js)) {
296     LOG(FATAL) << "Error getting JSON string";
297   }
298   if (file.WriteAtCurrentPos(output_js.c_str(),
299                              static_cast<int>(output_js.size())) < 0) {
300     // If the file could be opened, but writing has failed, it's likely that
301     // data was partially written. Producing incomplete profiling data would
302     // lead to a poorly performing orderfile, but might not be otherwised
303     // noticed. So we crash instead.
304     LOG(FATAL) << "Error writing profile data";
305   }
306   return true;
307 }
308 
309 // Stops recording, and outputs the data to |path|.
StopAndDumpToFile(int pid,uint64_t start_ns_since_epoch,const std::string & tag)310 NO_INSTRUMENT_FUNCTION void StopAndDumpToFile(int pid,
311                                               uint64_t start_ns_since_epoch,
312                                               const std::string& tag) {
313   std::string tag_str;
314   if (!tag.empty())
315     tag_str = base::StringPrintf("%s-", tag.c_str());
316   auto path = base::StringPrintf(
317       "/data/local/tmp/chrome/orderfile/profile-hitmap-%s%d-%" PRIu64 ".txt",
318       tag_str.c_str(), pid, start_ns_since_epoch);
319 
320   if (!DumpToFile(base::FilePath(path))) {
321     LOG(ERROR) << "Problem with dump (" << tag << ")";
322   }
323 }
324 
325 }  // namespace
326 
327 // It is safe to call any function after |Disable()| has been called. No risk of
328 // infinite recursion.
Disable()329 NO_INSTRUMENT_FUNCTION bool Disable() {
330   bool disabled = g_disabled.exchange(true, std::memory_order_relaxed);
331   std::atomic_thread_fence(std::memory_order_seq_cst);
332   return !disabled;
333 }
334 
StartDelayedDump()335 NO_INSTRUMENT_FUNCTION void StartDelayedDump() {
336 #if BUILDFLAG(DEVTOOLS_INSTRUMENTATION_DUMPING)
337   static auto* g_orderfile_memory_dump_hook = new OrderfileMemoryDumpHook();
338   base::trace_event::MemoryDumpManager::GetInstance()->RegisterDumpProvider(
339       g_orderfile_memory_dump_hook, "Orderfile", nullptr);
340 // Return, letting devtools tracing handle any dumping.
341 #else
342   // Using std::thread and not using base::TimeTicks() in order to to not call
343   // too many base:: symbols that would pollute the reached symbol dumps.
344   struct timespec ts;
345   if (clock_gettime(CLOCK_MONOTONIC, &ts))
346     PLOG(FATAL) << "clock_gettime.";
347   uint64_t start_ns_since_epoch =
348       static_cast<uint64_t>(ts.tv_sec) * 1000 * 1000 * 1000 + ts.tv_nsec;
349   int pid = getpid();
350   std::thread([pid, start_ns_since_epoch]() {
351     sleep(kDelayInSeconds);
352     if (Disable())
353       StopAndDumpToFile(pid, start_ns_since_epoch, "");
354   }).detach();
355 #endif  // BUILDFLAG(DEVTOOLS_INSTRUMENTATION_DUMPING)
356 }
357 
Dump(const std::string & tag)358 NO_INSTRUMENT_FUNCTION void Dump(const std::string& tag) {
359   // As profiling has been disabled, none of the uses of ::base symbols below
360   // will enter the symbol dump.
361   StopAndDumpToFile(
362       getpid(), (base::Time::Now() - base::Time::UnixEpoch()).InNanoseconds(),
363       tag);
364 }
365 
ResetForTesting()366 NO_INSTRUMENT_FUNCTION void ResetForTesting() {
367   Disable();
368   memset(reinterpret_cast<uint32_t*>(callee_map), 0,
369          sizeof(uint32_t) * kMaxElements);
370   memset(reinterpret_cast<uint32_t*>(g_caller_offset), 0,
371          sizeof(uint32_t) * kMaxReachedSymbols * kTotalBuckets);
372   memset(reinterpret_cast<uint32_t*>(g_caller_count), 0,
373          sizeof(uint32_t) * kMaxReachedSymbols * kTotalBuckets);
374   g_callers_index = 0;
375   g_disabled = false;
376 }
377 
RecordAddressForTesting(size_t callee_address,size_t caller_address)378 NO_INSTRUMENT_FUNCTION void RecordAddressForTesting(size_t callee_address,
379                                                     size_t caller_address) {
380   return RecordAddress<true>(callee_address, caller_address);
381 }
382 
383 // Returns a flattened vector where each callee is allocated 9 buckets.
384 // First bucket -> callee offset
385 // 8 buckets -> [caller offset, count, ...]
GetOrderedOffsetsForTesting()386 NO_INSTRUMENT_FUNCTION std::vector<size_t> GetOrderedOffsetsForTesting() {
387   std::vector<size_t> result;
388   for (size_t i = 0; i < kMaxElements; i++) {
389     auto caller_index =
390         callee_map[i].load(std::memory_order_relaxed) * kTotalBuckets;
391     if (!caller_index)
392       continue;
393 
394     result.push_back(i * 4);
395     for (size_t j = 0; j < kTotalBuckets; j++) {
396       uint32_t count =
397           g_caller_count[caller_index + j].load(std::memory_order_relaxed);
398 
399       uint32_t caller_offset =
400           g_caller_offset[caller_index + j].load(std::memory_order_relaxed);
401       result.push_back(caller_offset);
402       result.push_back(count);
403     }
404   }
405   return result;
406 }
407 
408 }  // namespace orderfile
409 }  // namespace android
410 }  // namespace base
411 
412 extern "C" {
413 
__cyg_profile_func_enter_bare()414 NO_INSTRUMENT_FUNCTION void __cyg_profile_func_enter_bare() {
415   base::android::orderfile::RecordAddress<false>(
416       reinterpret_cast<size_t>(__builtin_return_address(0)),
417       reinterpret_cast<size_t>(__builtin_return_address(1)));
418 }
419 
420 }  // extern "C"
421