1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #pragma once
18
19 #include <stdint.h>
20 #include <string.h>
21 #include <sys/mman.h>
22
23 #include <memory>
24 #include <vector>
25
26 #include <unwindstack/Global.h>
27 #include <unwindstack/Maps.h>
28
29 #include "Check.h"
30 #include "GlobalDebugInterface.h"
31 #include "MemoryCache.h"
32 #include "MemoryRange.h"
33
34 // This implements the JIT Compilation Interface.
35 // See https://sourceware.org/gdb/onlinedocs/gdb/JIT-Interface.html
36 //
37 // We use it to get in-memory ELF files created by the ART compiler,
38 // but we also use it to get list of DEX files used by the runtime.
39
40 namespace unwindstack {
41
42 // Implementation templated for ELF/DEX and for different architectures.
43 template <typename Symfile, typename Uintptr_T, typename Uint64_T>
44 class GlobalDebugImpl : public GlobalDebugInterface<Symfile>, public Global {
45 public:
46 static constexpr int kMaxRaceRetries = 16;
47 static constexpr int kMaxHeadRetries = 16;
48 static constexpr uint8_t kMagic[8] = {'A', 'n', 'd', 'r', 'o', 'i', 'd', '2'};
49
50 struct JITCodeEntry {
51 Uintptr_T next;
52 Uintptr_T prev;
53 Uintptr_T symfile_addr;
54 Uint64_T symfile_size;
55 // Android-specific fields:
56 Uint64_T timestamp;
57 uint32_t seqlock;
58 };
59
60 static constexpr size_t kSizeOfCodeEntryV1 = offsetof(JITCodeEntry, timestamp);
61 static constexpr size_t kSizeOfCodeEntryV2 = sizeof(JITCodeEntry);
62
63 struct JITDescriptor {
64 uint32_t version;
65 uint32_t action_flag;
66 Uintptr_T relevant_entry;
67 Uintptr_T first_entry;
68 // Android-specific fields:
69 uint8_t magic[8];
70 uint32_t flags;
71 uint32_t sizeof_descriptor;
72 uint32_t sizeof_entry;
73 uint32_t seqlock;
74 Uint64_T timestamp;
75 };
76
77 static constexpr size_t kSizeOfDescriptorV1 = offsetof(JITDescriptor, magic);
78 static constexpr size_t kSizeOfDescriptorV2 = sizeof(JITDescriptor);
79
80 // This uniquely identifies entry in presence of concurrent modifications.
81 // Each (address,seqlock) pair is unique for each newly created JIT entry.
82 struct UID {
83 uint64_t address; // Address of JITCodeEntry in memory.
84 uint32_t seqlock; // This servers as "version" for the given address.
85
86 bool operator<(const UID& other) const {
87 return std::tie(address, seqlock) < std::tie(other.address, other.seqlock);
88 }
89 };
90
GlobalDebugImpl(ArchEnum arch,std::shared_ptr<Memory> & memory,std::vector<std::string> & search_libs,const char * global_variable_name)91 GlobalDebugImpl(ArchEnum arch, std::shared_ptr<Memory>& memory,
92 std::vector<std::string>& search_libs, const char* global_variable_name)
93 : Global(memory, search_libs), global_variable_name_(global_variable_name) {
94 SetArch(arch);
95 }
96
ReadDescriptor(uint64_t addr)97 bool ReadDescriptor(uint64_t addr) {
98 JITDescriptor desc{};
99 // Try to read the full descriptor including Android-specific fields.
100 if (!this->memory_->ReadFully(addr, &desc, kSizeOfDescriptorV2)) {
101 // Fallback to just the minimal descriptor.
102 // This will make the magic check below fail.
103 if (!this->memory_->ReadFully(addr, &desc, kSizeOfDescriptorV1)) {
104 return false;
105 }
106 }
107
108 if (desc.version != 1 || desc.first_entry == 0) {
109 // Either unknown version, or no jit entries.
110 return false;
111 }
112
113 // Check if there are extra Android-specific fields.
114 if (memcmp(desc.magic, kMagic, sizeof(kMagic)) == 0) {
115 jit_entry_size_ = kSizeOfCodeEntryV2;
116 seqlock_offset_ = offsetof(JITCodeEntry, seqlock);
117 } else {
118 jit_entry_size_ = kSizeOfCodeEntryV1;
119 seqlock_offset_ = 0;
120 }
121 descriptor_addr_ = addr;
122 return true;
123 }
124
ProcessArch()125 void ProcessArch() {}
126
ReadVariableData(uint64_t ptr)127 bool ReadVariableData(uint64_t ptr) { return ReadDescriptor(ptr); }
128
129 // Invoke callback for all symfiles that contain the given PC.
130 // Returns true if any callback returns true (which also aborts the iteration).
131 template <typename Callback /* (Symfile*) -> bool */>
ForEachSymfile(Maps * maps,uint64_t pc,Callback callback)132 bool ForEachSymfile(Maps* maps, uint64_t pc, Callback callback) {
133 // Use a single lock, this object should be used so infrequently that
134 // a fine grain lock is unnecessary.
135 std::lock_guard<std::mutex> guard(lock_);
136 if (descriptor_addr_ == 0) {
137 FindAndReadVariable(maps, global_variable_name_);
138 if (descriptor_addr_ == 0) {
139 return false;
140 }
141 }
142
143 // Try to find the entry in already loaded symbol files.
144 for (auto& it : entries_) {
145 Symfile* symfile = it.second.get();
146 // Check seqlock to make sure that entry is still valid (it may be very old).
147 if (symfile->IsValidPc(pc) && CheckSeqlock(it.first) && callback(symfile)) {
148 return true;
149 }
150 }
151
152 // Update all entries and retry.
153 ReadAllEntries(maps);
154 for (auto& it : entries_) {
155 Symfile* symfile = it.second.get();
156 // Note that the entry could become invalid since the ReadAllEntries above,
157 // but that is ok. We don't want to fail or refresh the entries yet again.
158 // This is as if we found the entry in time and it became invalid after return.
159 // This is relevant when ART moves/packs JIT entries. That is, the entry is
160 // technically deleted, but only because it was copied into merged uber-entry.
161 // So the JIT method is still alive and the deleted data is still correct.
162 if (symfile->IsValidPc(pc) && callback(symfile)) {
163 return true;
164 }
165 }
166
167 return false;
168 }
169
GetFunctionName(Maps * maps,uint64_t pc,SharedString * name,uint64_t * offset)170 bool GetFunctionName(Maps* maps, uint64_t pc, SharedString* name, uint64_t* offset) {
171 // NB: If symfiles overlap in PC ranges, this will check all of them.
172 return ForEachSymfile(maps, pc, [pc, name, offset](Symfile* file) {
173 return file->GetFunctionName(pc, name, offset);
174 });
175 }
176
Find(Maps * maps,uint64_t pc)177 Symfile* Find(Maps* maps, uint64_t pc) {
178 // NB: If symfiles overlap in PC ranges (which can happen for both ELF and DEX),
179 // this will check all of them and return one that also has a matching function.
180 Symfile* result = nullptr;
181 bool found = ForEachSymfile(maps, pc, [pc, &result](Symfile* file) {
182 result = file;
183 SharedString name;
184 uint64_t offset;
185 return file->GetFunctionName(pc, &name, &offset);
186 });
187 if (found) {
188 return result; // Found symfile with symbol that also matches the PC.
189 }
190 // There is no matching symbol, so return any symfile for which the PC is valid.
191 // This is a useful fallback for tests, which often have symfiles with no functions.
192 return result;
193 }
194
195 // Read all entries from the process and cache them locally.
196 // The linked list might be concurrently modified. We detect races and retry.
ReadAllEntries(Maps * maps)197 bool ReadAllEntries(Maps* maps) {
198 for (int i = 0; i < kMaxRaceRetries; i++) {
199 bool race = false;
200 if (!ReadAllEntries(maps, &race)) {
201 if (race) {
202 continue; // Retry due to concurrent modification of the linked list.
203 }
204 return false; // Failed to read entries.
205 }
206 return true; // Success.
207 }
208 return false; // Too many retries.
209 }
210
211 // Read all JIT entries while assuming there might be concurrent modifications.
212 // If there is a race, the method will fail and the caller should retry the call.
ReadAllEntries(Maps * maps,bool * race)213 bool ReadAllEntries(Maps* maps, bool* race) {
214 // New entries might be added while we iterate over the linked list.
215 // In particular, an entry could be effectively moved from end to start due to
216 // the ART repacking algorithm, which groups smaller entries into a big one.
217 // Therefore keep reading the most recent entries until we reach a fixed point.
218 std::map<UID, std::shared_ptr<Symfile>> entries;
219 for (size_t i = 0; i < kMaxHeadRetries; i++) {
220 size_t old_size = entries.size();
221 if (!ReadNewEntries(maps, &entries, race)) {
222 return false;
223 }
224 if (entries.size() == old_size) {
225 entries_.swap(entries);
226 return true;
227 }
228 }
229 return false; // Too many retries.
230 }
231
232 // Read new JIT entries (head of linked list) until we find one that we have seen before.
233 // This method uses seqlocks extensively to ensure safety in case of concurrent modifications.
ReadNewEntries(Maps * maps,std::map<UID,std::shared_ptr<Symfile>> * entries,bool * race)234 bool ReadNewEntries(Maps* maps, std::map<UID, std::shared_ptr<Symfile>>* entries, bool* race) {
235 // Read the address of the head entry in the linked list.
236 UID uid;
237 if (!ReadNextField(descriptor_addr_ + offsetof(JITDescriptor, first_entry), &uid, race)) {
238 return false;
239 }
240
241 // Follow the linked list.
242 while (uid.address != 0) {
243 // Check if we have reached an already cached entry (we restart from head repeatedly).
244 if (entries->count(uid) != 0) {
245 return true;
246 }
247
248 // Read the entry.
249 JITCodeEntry data{};
250 if (!memory_->ReadFully(uid.address, &data, jit_entry_size_)) {
251 return false;
252 }
253 data.symfile_addr = StripAddressTag(data.symfile_addr);
254
255 // Check the seqlock to verify the symfile_addr and symfile_size.
256 if (!CheckSeqlock(uid, race)) {
257 return false;
258 }
259
260 // Copy and load the symfile.
261 auto it = entries_.find(uid);
262 if (it != entries_.end()) {
263 // The symfile was already loaded - just copy the reference.
264 entries->emplace(uid, it->second);
265 } else if (data.symfile_addr != 0) {
266 std::shared_ptr<Symfile> symfile;
267 bool ok = this->Load(maps, memory_, data.symfile_addr, data.symfile_size.value, symfile);
268 // Check seqlock first because load can fail due to race (so we want to trigger retry).
269 // TODO: Extract the memory copy code before the load, so that it is immune to races.
270 if (!CheckSeqlock(uid, race)) {
271 return false; // The ELF/DEX data was removed before we loaded it.
272 }
273 // Exclude symbol files that fail to load (but continue loading other files).
274 if (ok) {
275 entries->emplace(uid, symfile);
276 }
277 }
278
279 // Go to next entry.
280 UID next_uid;
281 if (!ReadNextField(uid.address + offsetof(JITCodeEntry, next), &next_uid, race)) {
282 return false; // The next pointer was modified while we were reading it.
283 }
284 if (!CheckSeqlock(uid, race)) {
285 return false; // This entry was deleted before we moved to the next one.
286 }
287 uid = next_uid;
288 }
289
290 return true;
291 }
292
293 // Read the address and seqlock of entry from the next field of linked list.
294 // This is non-trivial since they need to be consistent (as if we read both atomically).
295 //
296 // We're reading pointers, which can point at heap-allocated structures (the
297 // case for the __dex_debug_descriptor pointers at the time of writing).
298 // On 64 bit systems, the target process might have top-byte heap pointer
299 // tagging enabled, so we need to mask out the tag. We also know that the
300 // address must point to userspace, so the top byte of the address must be
301 // zero on both x64 and aarch64 without tagging. Therefore the masking can be
302 // done unconditionally.
ReadNextField(uint64_t next_field_addr,UID * uid,bool * race)303 bool ReadNextField(uint64_t next_field_addr, UID* uid, bool* race) {
304 Uintptr_T address[2]{0, 0};
305 uint32_t seqlock[2]{0, 0};
306 // Read all data twice: address[0], seqlock[0], address[1], seqlock[1].
307 for (int i = 0; i < 2; i++) {
308 std::atomic_thread_fence(std::memory_order_acquire);
309 if (!(memory_->ReadFully(next_field_addr, &address[i], sizeof(address[i])))) {
310 return false;
311 }
312 address[i] = StripAddressTag(address[i]);
313 if (seqlock_offset_ == 0) {
314 // There is no seqlock field.
315 *uid = UID{.address = address[0], .seqlock = 0};
316 return true;
317 }
318 if (address[i] != 0) {
319 std::atomic_thread_fence(std::memory_order_acquire);
320 if (!memory_->ReadFully(address[i] + seqlock_offset_, &seqlock[i], sizeof(seqlock[i]))) {
321 return false;
322 }
323 }
324 }
325 // Check that both reads returned identical values, and that the entry is live.
326 if (address[0] != address[1] || seqlock[0] != seqlock[1] || (seqlock[0] & 1) == 1) {
327 *race = true;
328 return false;
329 }
330 // Since address[1] is sandwiched between two seqlock reads, we know that
331 // at the time of address[1] read, the entry had the given seqlock value.
332 *uid = UID{.address = address[1], .seqlock = seqlock[1]};
333 return true;
334 }
335
336 // Check that the given entry has not been deleted (or replaced by new entry at same address).
337 bool CheckSeqlock(UID uid, bool* race = nullptr) {
338 if (seqlock_offset_ == 0) {
339 // There is no seqlock field.
340 return true;
341 }
342 // This is required for memory synchronization if the we are working with local memory.
343 // For other types of memory (e.g. remote) this is no-op and has no significant effect.
344 std::atomic_thread_fence(std::memory_order_acquire);
345 uint32_t seen_seqlock;
346 if (!memory_->Read32(uid.address + seqlock_offset_, &seen_seqlock)) {
347 return false;
348 }
349 if (seen_seqlock != uid.seqlock) {
350 if (race != nullptr) {
351 *race = true;
352 }
353 return false;
354 }
355 return true;
356 }
357
358 // AArch64 has Address tagging (aka Top Byte Ignore) feature, which is used by
359 // HWASAN and MTE to store metadata in the address. We need to remove the tag.
StripAddressTag(Uintptr_T addr)360 Uintptr_T StripAddressTag(Uintptr_T addr) {
361 if (arch() == ARCH_ARM64) {
362 // Make the value signed so it will be sign extended if necessary.
363 return static_cast<Uintptr_T>((static_cast<int64_t>(addr) << 8) >> 8);
364 }
365 return addr;
366 }
367
368 private:
369 const char* global_variable_name_ = nullptr;
370 uint64_t descriptor_addr_ = 0; // Non-zero if we have found (non-empty) descriptor.
371 uint32_t jit_entry_size_ = 0;
372 uint32_t seqlock_offset_ = 0;
373 std::map<UID, std::shared_ptr<Symfile>> entries_; // Cached loaded entries.
374
375 std::mutex lock_;
376 };
377
378 // uint64_t values on x86 are not naturally aligned,
379 // but uint64_t values on ARM are naturally aligned.
380 struct Uint64_P {
381 uint64_t value;
382 } __attribute__((packed));
383 struct Uint64_A {
384 uint64_t value;
385 } __attribute__((aligned(8)));
386
387 template <typename Symfile>
CreateGlobalDebugImpl(ArchEnum arch,std::shared_ptr<Memory> & memory,std::vector<std::string> search_libs,const char * global_variable_name)388 std::unique_ptr<GlobalDebugInterface<Symfile>> CreateGlobalDebugImpl(
389 ArchEnum arch, std::shared_ptr<Memory>& memory, std::vector<std::string> search_libs,
390 const char* global_variable_name) {
391 CHECK(arch != ARCH_UNKNOWN);
392
393 // The interface needs to see real-time changes in memory for synchronization with the
394 // concurrently running ART JIT compiler. Skip caching and read the memory directly.
395 std::shared_ptr<Memory> jit_memory;
396 MemoryCacheBase* cached_memory = memory->AsMemoryCacheBase();
397 if (cached_memory != nullptr) {
398 jit_memory = cached_memory->UnderlyingMemory();
399 } else {
400 jit_memory = memory;
401 }
402
403 switch (arch) {
404 case ARCH_X86: {
405 using Impl = GlobalDebugImpl<Symfile, uint32_t, Uint64_P>;
406 static_assert(offsetof(typename Impl::JITCodeEntry, symfile_size) == 12, "layout");
407 static_assert(offsetof(typename Impl::JITCodeEntry, seqlock) == 28, "layout");
408 static_assert(sizeof(typename Impl::JITCodeEntry) == 32, "layout");
409 static_assert(sizeof(typename Impl::JITDescriptor) == 48, "layout");
410 return std::make_unique<Impl>(arch, jit_memory, search_libs, global_variable_name);
411 }
412 case ARCH_ARM: {
413 using Impl = GlobalDebugImpl<Symfile, uint32_t, Uint64_A>;
414 static_assert(offsetof(typename Impl::JITCodeEntry, symfile_size) == 16, "layout");
415 static_assert(offsetof(typename Impl::JITCodeEntry, seqlock) == 32, "layout");
416 static_assert(sizeof(typename Impl::JITCodeEntry) == 40, "layout");
417 static_assert(sizeof(typename Impl::JITDescriptor) == 48, "layout");
418 return std::make_unique<Impl>(arch, jit_memory, search_libs, global_variable_name);
419 }
420 case ARCH_ARM64:
421 case ARCH_X86_64:
422 case ARCH_RISCV64: {
423 using Impl = GlobalDebugImpl<Symfile, uint64_t, Uint64_A>;
424 static_assert(offsetof(typename Impl::JITCodeEntry, symfile_size) == 24, "layout");
425 static_assert(offsetof(typename Impl::JITCodeEntry, seqlock) == 40, "layout");
426 static_assert(sizeof(typename Impl::JITCodeEntry) == 48, "layout");
427 static_assert(sizeof(typename Impl::JITDescriptor) == 56, "layout");
428 return std::make_unique<Impl>(arch, jit_memory, search_libs, global_variable_name);
429 }
430 default:
431 abort();
432 }
433 }
434
435 } // namespace unwindstack
436