1 // Copyright 2017 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/android/orderfile/orderfile_instrumentation.h"
6
7 #include <time.h>
8 #include <unistd.h>
9
10 #include <atomic>
11 #include <cstdio>
12 #include <cstring>
13 #include <sstream>
14 #include <string>
15 #include <thread>
16 #include <vector>
17
18 #include "base/android/library_loader/anchor_functions.h"
19 #include "base/android/orderfile/orderfile_buildflags.h"
20 #include "base/files/file.h"
21 #include "base/format_macros.h"
22 #include "base/logging.h"
23 #include "base/strings/stringprintf.h"
24 #include "build/build_config.h"
25
26 #if BUILDFLAG(DEVTOOLS_INSTRUMENTATION_DUMPING)
27 #include <sstream>
28
29 #include "base/command_line.h"
30 #include "base/time/time.h"
31 #include "base/trace_event/memory_dump_manager.h" // no-presubmit-check
32 #include "base/trace_event/memory_dump_provider.h" // no-presubmit-check
33 #endif // BUILDFLAG(DEVTOOLS_INSTRUMENTATION_DUMPING)
34
35 #if !BUILDFLAG(SUPPORTS_CODE_ORDERING)
36 #error Requires code ordering support (arm/arm64/x86/x86_64).
37 #endif // !BUILDFLAG(SUPPORTS_CODE_ORDERING)
38
39 // Must be applied to all functions within this file.
40 #define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function))
41
42 namespace base {
43 namespace android {
44 namespace orderfile {
45
46 namespace {
47 // Constants used for StartDelayedDump().
48 constexpr int kDelayInSeconds = 30;
49 constexpr int kInitialDelayInSeconds = kPhases == 1 ? kDelayInSeconds : 5;
50
51 #if BUILDFLAG(DEVTOOLS_INSTRUMENTATION_DUMPING)
52 // This is defined in content/public/common/content_switches.h, which is not
53 // accessible in ::base.
54 constexpr const char kProcessTypeSwitch[] = "type";
55 #endif // BUILDFLAG(DEVTOOLS_INSTRUMENTATION_DUMPING)
56
57 // These are large overestimates, which is not an issue, as the data is
58 // allocated in .bss, and on linux doesn't take any actual memory when it's not
59 // touched.
60 constexpr size_t kBitfieldSize = 1 << 22;
61 constexpr size_t kMaxTextSizeInBytes = kBitfieldSize * (4 * 32);
62 constexpr size_t kMaxElements = 1 << 20;
63
64 // Data required to log reached offsets.
65 struct LogData {
66 std::atomic<uint32_t> offsets[kBitfieldSize];
67 std::atomic<size_t> ordered_offsets[kMaxElements];
68 std::atomic<size_t> index;
69 };
70
71 LogData g_data[kPhases];
72 std::atomic<int> g_data_index;
73
74 // Number of unexpected addresses, that is addresses that are not within [start,
75 // end) bounds for the executable code.
76 //
77 // This should be exactly 0, since the start and end of .text should be known
78 // perfectly by the linker, but it does happen. See crbug.com/1186598.
79 std::atomic<int> g_unexpected_addresses;
80
81 #if BUILDFLAG(DEVTOOLS_INSTRUMENTATION_DUMPING)
82 // Dump offsets when a memory dump is requested. Used only if
83 // switches::kDevtoolsInstrumentationDumping is set.
84 class OrderfileMemoryDumpHook : public base::trace_event::MemoryDumpProvider {
OnMemoryDump(const base::trace_event::MemoryDumpArgs & args,base::trace_event::ProcessMemoryDump * pmd)85 NO_INSTRUMENT_FUNCTION bool OnMemoryDump(
86 const base::trace_event::MemoryDumpArgs& args,
87 base::trace_event::ProcessMemoryDump* pmd) override {
88 // Disable instrumentation now to cut down on orderfile pollution.
89 if (!Disable()) {
90 return true; // A dump has already been started.
91 }
92 std::stringstream process_type_str;
93 Dump(base::CommandLine::ForCurrentProcess()->GetSwitchValueASCII(
94 kProcessTypeSwitch));
95 return true; // If something goes awry, a fatal error will be created
96 // internally.
97 }
98 };
99 #endif // BUILDFLAG(DEVTOOLS_INSTRUMENTATION_DUMPING)
100
101 // |RecordAddress()| adds an element to a concurrent bitset and to a concurrent
102 // append-only list of offsets.
103 //
104 // Ordering:
105 // Two consecutive calls to |RecordAddress()| from the same thread will be
106 // ordered in the same way in the result, as written by
107 // |StopAndDumpToFile()|. The result will contain exactly one instance of each
108 // unique offset relative to |kStartOfText| passed to |RecordAddress()|.
109 //
110 // Implementation:
111 // The "set" part is implemented with a bitfield, |g_offset|. The insertion
112 // order is recorded in |g_ordered_offsets|.
113 // This is not a class to make sure there isn't a static constructor, as it
114 // would cause issue with an instrumented static constructor calling this code.
115 //
116 // Limitations:
117 // - Only records offsets to addresses between |kStartOfText| and |kEndOfText|.
118 // - Capacity of the set is limited by |kMaxElements|.
119 // - Some insertions at the end of collection may be lost.
120
121 // Records that |address| has been reached, if recording is enabled.
122 // To avoid infinite recursion, this *must* *never* call any instrumented
123 // function, unless |Disable()| is called first.
124 template <bool for_testing>
RecordAddress(size_t address)125 __attribute__((always_inline, no_instrument_function)) void RecordAddress(
126 size_t address) {
127 int index = g_data_index.load(std::memory_order_relaxed);
128 if (index >= kPhases)
129 return;
130
131 const size_t start =
132 for_testing ? kStartOfTextForTesting : base::android::kStartOfText;
133 const size_t end =
134 for_testing ? kEndOfTextForTesting : base::android::kEndOfText;
135 if (UNLIKELY(address < start || address > end)) {
136 if (!AreAnchorsSane()) {
137 // Something is really wrong with the anchors, and this is likely to be
138 // triggered from within a static constructor, where logging is likely to
139 // deadlock. By crashing immediately we at least have a chance to get a
140 // stack trace from the system to give some clue about the nature of the
141 // problem.
142 ImmediateCrash();
143 }
144
145 // We should really crash at the first instance, but it does happen on bots,
146 // for a mysterious reason. Give it some leeway. Note that since we don't
147 // remember the caller address, if a single function is misplaced but we get
148 // many calls to it, then we still crash. If this is the case, add
149 // deduplication.
150 //
151 // Bumped to 100 temporarily as part of crbug.com/1265928 investigation.
152 if (g_unexpected_addresses.fetch_add(1, std::memory_order_relaxed) < 100) {
153 return;
154 }
155
156 Disable();
157 LOG(FATAL) << "Too many unexpected addresses! start = " << std::hex << start
158 << " end = " << end << " address = " << address;
159 }
160
161 size_t offset = address - start;
162 static_assert(sizeof(int) == 4,
163 "Collection and processing code assumes that sizeof(int) == 4");
164 size_t offset_index = offset / 4;
165
166 auto* offsets = g_data[index].offsets;
167 // Atomically set the corresponding bit in the array.
168 std::atomic<uint32_t>* element = offsets + (offset_index / 32);
169 // First, a racy check. This saves a CAS if the bit is already set, and
170 // allows the cache line to remain shared acoss CPUs in this case.
171 uint32_t value = element->load(std::memory_order_relaxed);
172 uint32_t mask = 1 << (offset_index % 32);
173 if (value & mask)
174 return;
175
176 auto before = element->fetch_or(mask, std::memory_order_relaxed);
177 if (before & mask)
178 return;
179
180 // We were the first one to set the element, record it in the ordered
181 // elements list.
182 // Use relaxed ordering, as the value is not published, or used for
183 // synchronization.
184 auto* ordered_offsets = g_data[index].ordered_offsets;
185 auto& ordered_offsets_index = g_data[index].index;
186 size_t insertion_index =
187 ordered_offsets_index.fetch_add(1, std::memory_order_relaxed);
188 if (UNLIKELY(insertion_index >= kMaxElements)) {
189 Disable();
190 LOG(FATAL) << "Too many reached offsets";
191 }
192 ordered_offsets[insertion_index].store(offset, std::memory_order_relaxed);
193 }
194
DumpToFile(const base::FilePath & path,const LogData & data)195 NO_INSTRUMENT_FUNCTION bool DumpToFile(const base::FilePath& path,
196 const LogData& data) {
197 auto file =
198 base::File(path, base::File::FLAG_CREATE_ALWAYS | base::File::FLAG_WRITE);
199 if (!file.IsValid()) {
200 PLOG(ERROR) << "Could not open " << path;
201 return false;
202 }
203
204 if (data.index == 0) {
205 LOG(ERROR) << "No entries to dump";
206 return false;
207 }
208
209 size_t count = data.index - 1;
210 for (size_t i = 0; i < count; i++) {
211 // |g_ordered_offsets| is initialized to 0, so a 0 in the middle of it
212 // indicates a case where the index was incremented, but the write is not
213 // visible in this thread yet. Safe to skip, also because the function at
214 // the start of text is never called.
215 auto offset = data.ordered_offsets[i].load(std::memory_order_relaxed);
216 if (!offset)
217 continue;
218 auto offset_str = base::StringPrintf("%" PRIuS "\n", offset);
219 if (file.WriteAtCurrentPos(offset_str.c_str(),
220 static_cast<int>(offset_str.size())) < 0) {
221 // If the file could be opened, but writing has failed, it's likely that
222 // data was partially written. Producing incomplete profiling data would
223 // lead to a poorly performing orderfile, but might not be otherwised
224 // noticed. So we crash instead.
225 LOG(FATAL) << "Error writing profile data";
226 }
227 }
228 return true;
229 }
230
231 // Stops recording, and outputs the data to |path|.
StopAndDumpToFile(int pid,uint64_t start_ns_since_epoch,const std::string & tag)232 NO_INSTRUMENT_FUNCTION void StopAndDumpToFile(int pid,
233 uint64_t start_ns_since_epoch,
234 const std::string& tag) {
235 Disable();
236
237 for (int phase = 0; phase < kPhases; phase++) {
238 std::string tag_str;
239 if (!tag.empty())
240 tag_str = base::StringPrintf("%s-", tag.c_str());
241 auto path = base::StringPrintf(
242 "/data/local/tmp/chrome/orderfile/profile-hitmap-%s%d-%" PRIu64
243 ".txt_%d",
244 tag_str.c_str(), pid, start_ns_since_epoch, phase);
245 if (!DumpToFile(base::FilePath(path), g_data[phase])) {
246 LOG(ERROR) << "Problem with dump " << phase << " (" << tag << ")";
247 }
248 }
249
250 int unexpected_addresses =
251 g_unexpected_addresses.load(std::memory_order_relaxed);
252 if (unexpected_addresses != 0) {
253 LOG(WARNING) << "Got " << unexpected_addresses << " unexpected addresses!";
254 }
255 }
256
257 } // namespace
258
259 // After a call to Disable(), any function can be called, as reentrancy into the
260 // instrumentation function will be mitigated.
Disable()261 NO_INSTRUMENT_FUNCTION bool Disable() {
262 auto old_phase = g_data_index.exchange(kPhases, std::memory_order_relaxed);
263 std::atomic_thread_fence(std::memory_order_seq_cst);
264 return old_phase != kPhases;
265 }
266
SanityChecks()267 NO_INSTRUMENT_FUNCTION void SanityChecks() {
268 CHECK_LT(base::android::kEndOfText - base::android::kStartOfText,
269 kMaxTextSizeInBytes);
270 CHECK(base::android::IsOrderingSane());
271 }
272
SwitchToNextPhaseOrDump(int pid,uint64_t start_ns_since_epoch)273 NO_INSTRUMENT_FUNCTION bool SwitchToNextPhaseOrDump(
274 int pid,
275 uint64_t start_ns_since_epoch) {
276 int before = g_data_index.fetch_add(1, std::memory_order_relaxed);
277 if (before + 1 == kPhases) {
278 StopAndDumpToFile(pid, start_ns_since_epoch, "");
279 return true;
280 }
281 return false;
282 }
283
StartDelayedDump()284 NO_INSTRUMENT_FUNCTION void StartDelayedDump() {
285 // Using std::thread and not using base::TimeTicks() in order to to not call
286 // too many base:: symbols that would pollute the reached symbol dumps.
287 struct timespec ts;
288 if (clock_gettime(CLOCK_MONOTONIC, &ts))
289 PLOG(FATAL) << "clock_gettime.";
290 uint64_t start_ns_since_epoch =
291 static_cast<uint64_t>(ts.tv_sec) * 1000 * 1000 * 1000 + ts.tv_nsec;
292 int pid = getpid();
293
294 #if BUILDFLAG(DEVTOOLS_INSTRUMENTATION_DUMPING)
295 static auto* g_orderfile_memory_dump_hook = new OrderfileMemoryDumpHook();
296 base::trace_event::MemoryDumpManager::GetInstance()->RegisterDumpProvider(
297 g_orderfile_memory_dump_hook, "Orderfile", nullptr);
298 #endif // BUILDFLAG(DEVTOOLS_INSTRUMENTATION_DUMPING)
299
300 std::thread([pid, start_ns_since_epoch]() {
301 sleep(kInitialDelayInSeconds);
302 #if BUILDFLAG(DEVTOOLS_INSTRUMENTATION_DUMPING)
303 SwitchToNextPhaseOrDump(pid, start_ns_since_epoch);
304 // Return, letting devtools tracing handle any post-startup phases.
305 #else
306 while (!SwitchToNextPhaseOrDump(pid, start_ns_since_epoch))
307 sleep(kDelayInSeconds);
308 #endif // BUILDFLAG(DEVTOOLS_INSTRUMENTATION_DUMPING)
309 })
310 .detach();
311 }
312
Dump(const std::string & tag)313 NO_INSTRUMENT_FUNCTION void Dump(const std::string& tag) {
314 // As profiling has been disabled, none of the uses of ::base symbols below
315 // will enter the symbol dump.
316 StopAndDumpToFile(
317 getpid(), (base::Time::Now() - base::Time::UnixEpoch()).InNanoseconds(),
318 tag);
319 }
320
ResetForTesting()321 NO_INSTRUMENT_FUNCTION void ResetForTesting() {
322 Disable();
323 g_data_index = 0;
324 for (int i = 0; i < kPhases; i++) {
325 memset(reinterpret_cast<uint32_t*>(g_data[i].offsets), 0,
326 sizeof(uint32_t) * kBitfieldSize);
327 memset(reinterpret_cast<uint32_t*>(g_data[i].ordered_offsets), 0,
328 sizeof(uint32_t) * kMaxElements);
329 g_data[i].index.store(0);
330 }
331
332 g_unexpected_addresses.store(0, std::memory_order_relaxed);
333 }
334
RecordAddressForTesting(size_t address)335 NO_INSTRUMENT_FUNCTION void RecordAddressForTesting(size_t address) {
336 return RecordAddress<true>(address);
337 }
338
GetOrderedOffsetsForTesting()339 NO_INSTRUMENT_FUNCTION std::vector<size_t> GetOrderedOffsetsForTesting() {
340 std::vector<size_t> result;
341 size_t max_index = g_data[0].index.load(std::memory_order_relaxed);
342 for (size_t i = 0; i < max_index; ++i) {
343 auto value = g_data[0].ordered_offsets[i].load(std::memory_order_relaxed);
344 if (value)
345 result.push_back(value);
346 }
347 return result;
348 }
349
350 } // namespace orderfile
351 } // namespace android
352 } // namespace base
353
354 extern "C" {
355
__cyg_profile_func_enter_bare()356 NO_INSTRUMENT_FUNCTION void __cyg_profile_func_enter_bare() {
357 base::android::orderfile::RecordAddress<false>(
358 reinterpret_cast<size_t>(__builtin_return_address(0)));
359 }
360
361 } // extern "C"
362