• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2018 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifdef UNSAFE_BUFFERS_BUILD
6 // TODO(crbug.com/40284755): Remove this and spanify to fix the errors.
7 #pragma allow_unsafe_buffers
8 #endif
9 
10 #include "base/trace_event/cfi_backtrace_android.h"
11 
12 #include <sys/mman.h>
13 #include <sys/types.h>
14 
15 #include "base/android/apk_assets.h"
16 #include "base/android/library_loader/anchor_functions.h"
17 #include "build/build_config.h"
18 
19 #if !defined(ARCH_CPU_ARMEL)
20 #error This file should not be built for this architecture.
21 #endif
22 
23 /*
24 Basics of unwinding:
25 For each instruction in a function we need to know what is the offset of SP
26 (Stack Pointer) to reach the previous function's stack frame. To know which
27 function is being invoked, we need the return address of the next function. The
28 CFI information for an instruction is made up of 2 offsets, CFA (Call Frame
29 Address) offset and RA (Return Address) offset. The CFA offset is the change in
30 SP made by the function till the current instruction. This depends on amount of
31 memory allocated on stack by the function plus some registers that the function
32 stores that needs to be restored at the end of function. So, at each instruction
33 the CFA offset tells the offset from original SP before the function call. The
34 RA offset tells us the offset from the previous SP into the current function
35 where the return address is stored.
36 
37 The unwind table file has 2 tables UNW_INDEX and UNW_DATA, inspired from ARM
38 EHABI format. The first table contains function addresses and an index into the
39 UNW_DATA table. The second table contains one or more rows for the function
40 unwind information.
41 
42 UNW_INDEX contains two columns of N rows each, where N is the number of
43 functions.
44   1. First column 4 byte rows of all the function start address as offset from
45      start of the binary, in sorted order.
46   2. For each function addr, the second column contains 2 byte indices in order.
47      The indices are offsets (in count of 2 bytes) of the CFI data from start of
48      UNW_DATA.
49 The last entry in the table always contains CANT_UNWIND index to specify the
50 end address of the last function.
51 
52 UNW_DATA contains data of all the functions. Each function data contains N rows.
53 The data found at the address pointed from UNW_INDEX will be:
54   2 bytes: N - number of rows that belong to current function.
55   N * 4 bytes: N rows of data. 16 bits : Address offset from function start.
56                                14 bits : CFA offset / 4.
57                                 2 bits : RA offset / 4.
58 If the RA offset of a row is 0, then use the offset of the previous rows in the
59 same function.
60 TODO(ssid): Make sure RA offset is always present.
61 
62 See extract_unwind_tables.py for details about how this data is extracted from
63 breakpad symbol files.
64 */
65 
66 extern "C" {
67 
68 // The address of |__executable_start| gives the start address of the
69 // executable or shared library. This value is used to find the offset address
70 // of the instruction in binary from PC.
71 extern char __executable_start;
72 
73 }
74 
75 namespace base {
76 namespace trace_event {
77 
78 namespace {
79 
80 // The value of index when the function does not have unwind information.
81 constexpr uint32_t kCantUnwind = 0xFFFF;
82 
83 // The mask on the CFI row data that is used to get the high 14 bits and
84 // multiply it by 4 to get CFA offset. Since the last 2 bits are masked out, a
85 // shift is not necessary.
86 constexpr uint16_t kCFAMask = 0xfffc;
87 
88 // The mask on the CFI row data that is used to get the low 2 bits and multiply
89 // it by 4 to get the RA offset.
90 constexpr uint16_t kRAMask = 0x3;
91 constexpr uint16_t kRAShift = 2;
92 
93 // The code in this file assumes we are running in 32-bit builds since all the
94 // addresses in the unwind table are specified in 32 bits.
95 static_assert(sizeof(uintptr_t) == 4,
96               "The unwind table format is only valid for 32 bit builds.");
97 
98 // The CFI data in UNW_DATA table starts with number of rows (N) and then
99 // followed by N rows of 4 bytes long. The CFIUnwindDataRow represents a single
100 // row of CFI data of a function in the table. Since we cast the memory at the
101 // address after the address of number of rows, into an array of
102 // CFIUnwindDataRow, the size of the struct should be 4 bytes and the order of
103 // the members is fixed according to the given format. The first 2 bytes tell
104 // the address of function and last 2 bytes give the CFI data for the offset.
105 struct CFIUnwindDataRow {
106   // The address of the instruction in terms of offset from the start of the
107   // function.
108   uint16_t addr_offset;
109   // Represents the CFA and RA offsets to get information about next stack
110   // frame. This is the CFI data at the point before executing the instruction
111   // at |addr_offset| from the start of the function.
112   uint16_t cfi_data;
113 
114   // Return the RA offset for the current unwind row.
ra_offsetbase::trace_event::__anonc4e82ff50111::CFIUnwindDataRow115   uint16_t ra_offset() const {
116     return static_cast<uint16_t>((cfi_data & kRAMask) << kRAShift);
117   }
118 
119   // Returns the CFA offset for the current unwind row.
cfa_offsetbase::trace_event::__anonc4e82ff50111::CFIUnwindDataRow120   uint16_t cfa_offset() const { return cfi_data & kCFAMask; }
121 };
122 
123 static_assert(
124     sizeof(CFIUnwindDataRow) == 4,
125     "The CFIUnwindDataRow struct must be exactly 4 bytes for searching.");
126 
127 constinit thread_local CFIBacktraceAndroid::CFICache cfi_cache;
128 
129 }  // namespace
130 
131 // static
GetInitializedInstance()132 CFIBacktraceAndroid* CFIBacktraceAndroid::GetInitializedInstance() {
133   static CFIBacktraceAndroid* instance = new CFIBacktraceAndroid();
134   return instance;
135 }
136 
137 // static
is_chrome_address(uintptr_t pc)138 bool CFIBacktraceAndroid::is_chrome_address(uintptr_t pc) {
139   return pc >= base::android::kStartOfText && pc < executable_end_addr();
140 }
141 
142 // static
executable_start_addr()143 uintptr_t CFIBacktraceAndroid::executable_start_addr() {
144   return reinterpret_cast<uintptr_t>(&__executable_start);
145 }
146 
147 // static
executable_end_addr()148 uintptr_t CFIBacktraceAndroid::executable_end_addr() {
149   return base::android::kEndOfText;
150 }
151 
CFIBacktraceAndroid()152 CFIBacktraceAndroid::CFIBacktraceAndroid() {
153   // This file name is defined by extract_unwind_tables.gni.
154   static constexpr char kCfiFileName[] = "assets/unwind_cfi_32";
155   static constexpr char kSplitName[] = "stack_unwinder";
156 
157   MemoryMappedFile::Region cfi_region;
158   int fd = base::android::OpenApkAsset(kCfiFileName, kSplitName, &cfi_region);
159   if (fd < 0) {
160     return;
161   }
162   cfi_mmap_ = std::make_unique<MemoryMappedFile>();
163   // The CFI region starts at |cfi_region.offset|.
164   if (!cfi_mmap_->Initialize(base::File(fd), cfi_region)) {
165     return;
166   }
167 
168   ParseCFITables();
169   can_unwind_stack_frames_ = true;
170 }
171 
172 CFIBacktraceAndroid::~CFIBacktraceAndroid() = default;
173 
ParseCFITables()174 void CFIBacktraceAndroid::ParseCFITables() {
175   // The first 4 bytes in the file is the number of entries in UNW_INDEX table.
176   size_t unw_index_size = 0;
177   memcpy(&unw_index_size, cfi_mmap_->data(), sizeof(unw_index_size));
178   // UNW_INDEX table starts after 4 bytes.
179   unw_index_function_col_ =
180       reinterpret_cast<const uintptr_t*>(cfi_mmap_->data()) + 1;
181   unw_index_row_count_ = unw_index_size;
182   unw_index_indices_col_ = reinterpret_cast<const uint16_t*>(
183       (unw_index_function_col_ + unw_index_row_count_).get());
184 
185   // The UNW_DATA table data is right after the end of UNW_INDEX table.
186   // Interpret the UNW_DATA table as an array of 2 byte numbers since the
187   // indexes we have from the UNW_INDEX table are in terms of 2 bytes.
188   unw_data_start_addr_ = unw_index_indices_col_ + unw_index_row_count_;
189 }
190 
Unwind(const void ** out_trace,size_t max_depth)191 size_t CFIBacktraceAndroid::Unwind(const void** out_trace, size_t max_depth) {
192   // This function walks the stack using the call frame information to find the
193   // return addresses of all the functions that belong to current binary in call
194   // stack. For each function the CFI table defines the offset of the previous
195   // call frame and offset where the return address is stored.
196   if (!can_unwind_stack_frames())
197     return 0;
198 
199   // Get the current register state. This register state can be taken at any
200   // point in the function and the unwind information would be for this point.
201   // Define local variables before trying to get the current PC and SP to make
202   // sure the register state obtained is consistent with each other.
203   uintptr_t pc = 0, sp = 0;
204   asm volatile("mov %0, pc" : "=r"(pc));
205   asm volatile("mov %0, sp" : "=r"(sp));
206 
207   return Unwind(pc, sp, /*lr=*/0, out_trace, max_depth);
208 }
209 
Unwind(uintptr_t pc,uintptr_t sp,uintptr_t lr,const void ** out_trace,size_t max_depth)210 size_t CFIBacktraceAndroid::Unwind(uintptr_t pc,
211                                    uintptr_t sp,
212                                    uintptr_t lr,
213                                    const void** out_trace,
214                                    size_t max_depth) {
215   if (!can_unwind_stack_frames())
216     return 0;
217 
218   // We can only unwind as long as the pc is within the chrome.so.
219   size_t depth = 0;
220   while (is_chrome_address(pc) && depth < max_depth) {
221     out_trace[depth++] = reinterpret_cast<void*>(pc);
222     // The offset of function from the start of the chrome.so binary:
223     uintptr_t func_addr = pc - executable_start_addr();
224     CFIRow cfi{};
225     if (!FindCFIRowForPC(func_addr, &cfi)) {
226       if (depth == 1 && lr != 0 && pc != lr) {
227         // If CFI data is not found for the frame, then we stopped in prolog of
228         // a function. The return address is stored in LR when in function
229         // prolog. So, update the PC with address in LR and do not update SP
230         // since SP was not updated by the prolog yet.
231         // TODO(ssid): Write tests / add info to detect if we are actually in
232         // function prolog. https://crbug.com/898276
233         pc = lr;
234         continue;
235       }
236       break;
237     }
238 
239     // The rules for unwinding using the CFI information are:
240     // SP_prev = SP_cur + cfa_offset and
241     // PC_prev = * (SP_prev - ra_offset).
242     sp = sp + cfi.cfa_offset;
243     memcpy(&pc, reinterpret_cast<uintptr_t*>(sp - cfi.ra_offset),
244            sizeof(uintptr_t));
245   }
246   return depth;
247 }
248 
FindCFIRowForPC(uintptr_t func_addr,CFIBacktraceAndroid::CFIRow * cfi)249 bool CFIBacktraceAndroid::FindCFIRowForPC(uintptr_t func_addr,
250                                           CFIBacktraceAndroid::CFIRow* cfi) {
251   if (!can_unwind_stack_frames())
252     return false;
253 
254   *cfi = {0};
255   if (cfi_cache.Find(func_addr, cfi)) {
256     return true;
257   }
258 
259   // Consider each column of UNW_INDEX table as arrays of uintptr_t (function
260   // addresses) and uint16_t (indices). Define start and end iterator on the
261   // first column array (addresses) and use std::lower_bound() to binary search
262   // on this array to find the required function address.
263   static const uintptr_t* const unw_index_fn_end =
264       unw_index_function_col_ + unw_index_row_count_;
265   const uintptr_t* found = std::lower_bound(unw_index_function_col_.get(),
266                                             unw_index_fn_end, func_addr);
267 
268   // If found is start, then the given function is not in the table. If the
269   // given pc is start of a function then we cannot unwind.
270   if (found == unw_index_function_col_ || *found == func_addr)
271     return false;
272 
273   // std::lower_bound() returns the iter that corresponds to the first address
274   // that is greater than the given address. So, the required iter is always one
275   // less than the value returned by std::lower_bound().
276   --found;
277   uintptr_t func_start_addr = *found;
278   size_t row_num = static_cast<size_t>(found - unw_index_function_col_);
279   uint16_t index = unw_index_indices_col_[row_num];
280   DCHECK_LE(func_start_addr, func_addr);
281   // If the index is CANT_UNWIND then we do not have unwind infomation for the
282   // function.
283   if (index == kCantUnwind)
284     return false;
285 
286   // The unwind data for the current function is at an offsset of the index
287   // found in UNW_INDEX table.
288   const uint16_t* unwind_data = unw_data_start_addr_ + index;
289   // The value of first 2 bytes is the CFI data row count for the function.
290   uint16_t row_count = 0;
291   memcpy(&row_count, unwind_data, sizeof(row_count));
292   // And the actual CFI rows start after 2 bytes from the |unwind_data|. Cast
293   // the data into an array of CFIUnwindDataRow since the struct is designed to
294   // represent each row. We should be careful to read only |row_count| number of
295   // elements in the array.
296   const CFIUnwindDataRow* function_data =
297       reinterpret_cast<const CFIUnwindDataRow*>(unwind_data + 1);
298 
299   // Iterate through the CFI rows of the function to find the row that gives
300   // offset for the given instruction address.
301   CFIUnwindDataRow cfi_row = {0, 0};
302   uint16_t ra_offset = 0;
303   for (uint16_t i = 0; i < row_count; ++i) {
304     CFIUnwindDataRow row;
305     memcpy(&row, function_data + i, sizeof(CFIUnwindDataRow));
306     // The return address of the function is the instruction that is not yet
307     // been executed. The cfi row specifies the unwind info before executing the
308     // given instruction. If the given address is equal to the instruction
309     // offset, then use the current row. Or use the row with highest address
310     // less than the given address.
311     if (row.addr_offset + func_start_addr > func_addr)
312       break;
313 
314     cfi_row = row;
315     // The ra offset of the last specified row should be used, if unspecified.
316     // So, keep updating the RA offset till we reach the correct CFI row.
317     // TODO(ssid): This should be fixed in the format and we should always
318     // output ra offset.
319     if (cfi_row.ra_offset())
320       ra_offset = cfi_row.ra_offset();
321   }
322   DCHECK_NE(0u, cfi_row.addr_offset);
323   *cfi = {cfi_row.cfa_offset(), ra_offset};
324   DCHECK(cfi->cfa_offset);
325   DCHECK(cfi->ra_offset);
326 
327   // safe to update since the cache is thread local.
328   cfi_cache.Add(func_addr, *cfi);
329   return true;
330 }
331 
Add(uintptr_t address,CFIRow cfi)332 void CFIBacktraceAndroid::CFICache::Add(uintptr_t address, CFIRow cfi) {
333   cache_[address % kLimit] = {address, cfi};
334 }
335 
Find(uintptr_t address,CFIRow * cfi)336 bool CFIBacktraceAndroid::CFICache::Find(uintptr_t address, CFIRow* cfi) {
337   if (cache_[address % kLimit].address == address) {
338     *cfi = cache_[address % kLimit].cfi;
339     return true;
340   }
341   return false;
342 }
343 
344 }  // namespace trace_event
345 }  // namespace base
346