1 /*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "read_elf.h"
18 #include "read_apk.h"
19
20 #include <stdio.h>
21 #include <string.h>
22 #include <sys/stat.h>
23 #include <sys/types.h>
24
25 #include <algorithm>
26 #include <limits>
27
28 #include <android-base/file.h>
29 #include <android-base/logging.h>
30
31 #pragma clang diagnostic push
32 #pragma clang diagnostic ignored "-Wunused-parameter"
33
34 #include <llvm/ADT/StringRef.h>
35 #include <llvm/Object/Binary.h>
36 #include <llvm/Object/ELFObjectFile.h>
37 #include <llvm/Object/ObjectFile.h>
38
39 #pragma clang diagnostic pop
40
41 #include "utils.h"
42
43 #define ELF_NOTE_GNU "GNU"
44 #define NT_GNU_BUILD_ID 3
45
operator <<(std::ostream & os,const ElfStatus & status)46 std::ostream& operator<<(std::ostream& os, const ElfStatus& status) {
47 switch (status) {
48 case ElfStatus::NO_ERROR:
49 os << "No error";
50 break;
51 case ElfStatus::FILE_NOT_FOUND:
52 os << "File not found";
53 break;
54 case ElfStatus::READ_FAILED:
55 os << "Read failed";
56 break;
57 case ElfStatus::FILE_MALFORMED:
58 os << "Malformed file";
59 break;
60 case ElfStatus::NO_SYMBOL_TABLE:
61 os << "No symbol table";
62 break;
63 case ElfStatus::NO_BUILD_ID:
64 os << "No build id";
65 break;
66 case ElfStatus::BUILD_ID_MISMATCH:
67 os << "Build id mismatch";
68 break;
69 case ElfStatus::SECTION_NOT_FOUND:
70 os << "Section not found";
71 break;
72 }
73 return os;
74 }
75
IsValidElfFile(int fd)76 ElfStatus IsValidElfFile(int fd) {
77 static const char elf_magic[] = {0x7f, 'E', 'L', 'F'};
78 char buf[4];
79 if (!android::base::ReadFully(fd, buf, 4)) {
80 return ElfStatus::READ_FAILED;
81 }
82 if (memcmp(buf, elf_magic, 4) != 0) {
83 return ElfStatus::FILE_MALFORMED;
84 }
85 return ElfStatus::NO_ERROR;
86 }
87
IsValidElfPath(const std::string & filename)88 ElfStatus IsValidElfPath(const std::string& filename) {
89 if (!IsRegularFile(filename)) {
90 return ElfStatus::FILE_NOT_FOUND;
91 }
92 std::string mode = std::string("rb") + CLOSE_ON_EXEC_MODE;
93 FILE* fp = fopen(filename.c_str(), mode.c_str());
94 if (fp == nullptr) {
95 return ElfStatus::READ_FAILED;
96 }
97 ElfStatus result = IsValidElfFile(fileno(fp));
98 fclose(fp);
99 return result;
100 }
101
GetBuildIdFromNoteSection(const char * section,size_t section_size,BuildId * build_id)102 bool GetBuildIdFromNoteSection(const char* section, size_t section_size, BuildId* build_id) {
103 const char* p = section;
104 const char* end = p + section_size;
105 while (p < end) {
106 if (p + 12 >= end) {
107 return false;
108 }
109 uint32_t namesz;
110 uint32_t descsz;
111 uint32_t type;
112 MoveFromBinaryFormat(namesz, p);
113 MoveFromBinaryFormat(descsz, p);
114 MoveFromBinaryFormat(type, p);
115 namesz = Align(namesz, 4);
116 descsz = Align(descsz, 4);
117 if ((type == NT_GNU_BUILD_ID) && (p < end) && (strcmp(p, ELF_NOTE_GNU) == 0)) {
118 const char* desc_start = p + namesz;
119 const char* desc_end = desc_start + descsz;
120 if (desc_start > p && desc_start < desc_end && desc_end <= end) {
121 *build_id = BuildId(p + namesz, descsz);
122 return true;
123 } else {
124 return false;
125 }
126 }
127 p += namesz + descsz;
128 }
129 return false;
130 }
131
GetBuildIdFromNoteFile(const std::string & filename,BuildId * build_id)132 ElfStatus GetBuildIdFromNoteFile(const std::string& filename, BuildId* build_id) {
133 std::string content;
134 if (!android::base::ReadFileToString(filename, &content)) {
135 return ElfStatus::READ_FAILED;
136 }
137 if (!GetBuildIdFromNoteSection(content.c_str(), content.size(), build_id)) {
138 return ElfStatus::NO_BUILD_ID;
139 }
140 return ElfStatus::NO_ERROR;
141 }
142
143 template <class ELFT>
GetBuildIdFromELFFile(const llvm::object::ELFObjectFile<ELFT> * elf,BuildId * build_id)144 ElfStatus GetBuildIdFromELFFile(const llvm::object::ELFObjectFile<ELFT>* elf, BuildId* build_id) {
145 for (auto it = elf->section_begin(); it != elf->section_end(); ++it) {
146 const llvm::object::ELFSectionRef& section_ref = *it;
147 if (section_ref.getType() == llvm::ELF::SHT_NOTE) {
148 llvm::StringRef data;
149 if (it->getContents(data)) {
150 return ElfStatus::READ_FAILED;
151 }
152 if (GetBuildIdFromNoteSection(data.data(), data.size(), build_id)) {
153 return ElfStatus::NO_ERROR;
154 }
155 }
156 }
157 return ElfStatus::NO_BUILD_ID;
158 }
159
GetBuildIdFromObjectFile(llvm::object::ObjectFile * obj,BuildId * build_id)160 static ElfStatus GetBuildIdFromObjectFile(llvm::object::ObjectFile* obj, BuildId* build_id) {
161 if (auto elf = llvm::dyn_cast<llvm::object::ELF32LEObjectFile>(obj)) {
162 return GetBuildIdFromELFFile(elf, build_id);
163 } else if (auto elf = llvm::dyn_cast<llvm::object::ELF64LEObjectFile>(obj)) {
164 return GetBuildIdFromELFFile(elf, build_id);
165 }
166 return ElfStatus::FILE_MALFORMED;
167 }
168
169 struct BinaryWrapper {
170 llvm::object::OwningBinary<llvm::object::Binary> binary;
171 llvm::object::ObjectFile* obj;
172
BinaryWrapperBinaryWrapper173 BinaryWrapper() : obj(nullptr) {
174 }
175 };
176
OpenObjectFile(const std::string & filename,uint64_t file_offset,uint64_t file_size,BinaryWrapper * wrapper)177 static ElfStatus OpenObjectFile(const std::string& filename, uint64_t file_offset,
178 uint64_t file_size, BinaryWrapper* wrapper) {
179 FileHelper fhelper = FileHelper::OpenReadOnly(filename);
180 if (!fhelper) {
181 return ElfStatus::READ_FAILED;
182 }
183 if (file_size == 0) {
184 file_size = GetFileSize(filename);
185 if (file_size == 0) {
186 return ElfStatus::READ_FAILED;
187 }
188 }
189 auto buffer_or_err = llvm::MemoryBuffer::getOpenFileSlice(fhelper.fd(), filename, file_size, file_offset);
190 if (!buffer_or_err) {
191 return ElfStatus::READ_FAILED;
192 }
193 auto binary_or_err = llvm::object::createBinary(buffer_or_err.get()->getMemBufferRef());
194 if (!binary_or_err) {
195 return ElfStatus::READ_FAILED;
196 }
197 wrapper->binary = llvm::object::OwningBinary<llvm::object::Binary>(std::move(binary_or_err.get()),
198 std::move(buffer_or_err.get()));
199 wrapper->obj = llvm::dyn_cast<llvm::object::ObjectFile>(wrapper->binary.getBinary());
200 if (wrapper->obj == nullptr) {
201 return ElfStatus::FILE_MALFORMED;
202 }
203 return ElfStatus::NO_ERROR;
204 }
205
OpenObjectFileFromString(const std::string & s,BinaryWrapper * wrapper)206 static ElfStatus OpenObjectFileFromString(const std::string& s, BinaryWrapper* wrapper) {
207 auto buffer = llvm::MemoryBuffer::getMemBuffer(s);
208 auto binary_or_err = llvm::object::createBinary(buffer->getMemBufferRef());
209 if (!binary_or_err) {
210 return ElfStatus::FILE_MALFORMED;
211 }
212 wrapper->binary = llvm::object::OwningBinary<llvm::object::Binary>(std::move(binary_or_err.get()),
213 std::move(buffer));
214 wrapper->obj = llvm::dyn_cast<llvm::object::ObjectFile>(wrapper->binary.getBinary());
215 if (wrapper->obj == nullptr) {
216 return ElfStatus::FILE_MALFORMED;
217 }
218 return ElfStatus::NO_ERROR;
219 }
220
GetBuildIdFromElfFile(const std::string & filename,BuildId * build_id)221 ElfStatus GetBuildIdFromElfFile(const std::string& filename, BuildId* build_id) {
222 ElfStatus result = IsValidElfPath(filename);
223 if (result != ElfStatus::NO_ERROR) {
224 return result;
225 }
226 return GetBuildIdFromEmbeddedElfFile(filename, 0, 0, build_id);
227 }
228
GetBuildIdFromEmbeddedElfFile(const std::string & filename,uint64_t file_offset,uint32_t file_size,BuildId * build_id)229 ElfStatus GetBuildIdFromEmbeddedElfFile(const std::string& filename, uint64_t file_offset,
230 uint32_t file_size, BuildId* build_id) {
231 BinaryWrapper wrapper;
232 ElfStatus result = OpenObjectFile(filename, file_offset, file_size, &wrapper);
233 if (result != ElfStatus::NO_ERROR) {
234 return result;
235 }
236 return GetBuildIdFromObjectFile(wrapper.obj, build_id);
237 }
238
239 template <class ELFT>
ReadSectionFromELFFile(const llvm::object::ELFObjectFile<ELFT> * elf,const std::string & section_name,std::string * content)240 ElfStatus ReadSectionFromELFFile(const llvm::object::ELFObjectFile<ELFT>* elf, const std::string& section_name,
241 std::string* content) {
242 for (llvm::object::section_iterator it = elf->section_begin(); it != elf->section_end(); ++it) {
243 llvm::StringRef name;
244 if (it->getName(name) || name != section_name) {
245 continue;
246 }
247 llvm::StringRef data;
248 std::error_code err = it->getContents(data);
249 if (err) {
250 return ElfStatus::READ_FAILED;
251 }
252 *content = data;
253 return ElfStatus::NO_ERROR;
254 }
255 return ElfStatus::SECTION_NOT_FOUND;
256 }
257
IsArmMappingSymbol(const char * name)258 bool IsArmMappingSymbol(const char* name) {
259 // Mapping symbols in arm, which are described in "ELF for ARM Architecture" and
260 // "ELF for ARM 64-bit Architecture". The regular expression to match mapping symbol
261 // is ^\$(a|d|t|x)(\..*)?$
262 return name[0] == '$' && strchr("adtx", name[1]) != nullptr && (name[2] == '\0' || name[2] == '.');
263 }
264
ReadSymbolTable(llvm::object::symbol_iterator sym_begin,llvm::object::symbol_iterator sym_end,const std::function<void (const ElfFileSymbol &)> & callback,bool is_arm)265 void ReadSymbolTable(llvm::object::symbol_iterator sym_begin,
266 llvm::object::symbol_iterator sym_end,
267 const std::function<void(const ElfFileSymbol&)>& callback,
268 bool is_arm) {
269 for (; sym_begin != sym_end; ++sym_begin) {
270 ElfFileSymbol symbol;
271 auto symbol_ref = static_cast<const llvm::object::ELFSymbolRef*>(&*sym_begin);
272 llvm::Expected<llvm::object::section_iterator> section_it_or_err = symbol_ref->getSection();
273 if (!section_it_or_err) {
274 continue;
275 }
276
277 llvm::StringRef section_name;
278 if (section_it_or_err.get()->getName(section_name) || section_name.empty()) {
279 continue;
280 }
281 if (section_name == ".text") {
282 symbol.is_in_text_section = true;
283 }
284 llvm::Expected<llvm::StringRef> symbol_name_or_err = symbol_ref->getName();
285 if (!symbol_name_or_err || symbol_name_or_err.get().empty()) {
286 continue;
287 }
288
289 symbol.name = symbol_name_or_err.get();
290 symbol.vaddr = symbol_ref->getValue();
291 if ((symbol.vaddr & 1) != 0 && is_arm) {
292 // Arm sets bit 0 to mark it as thumb code, remove the flag.
293 symbol.vaddr &= ~1;
294 }
295 symbol.len = symbol_ref->getSize();
296 llvm::object::SymbolRef::Type symbol_type = *symbol_ref->getType();
297 if (symbol_type == llvm::object::SymbolRef::ST_Function) {
298 symbol.is_func = true;
299 } else if (symbol_type == llvm::object::SymbolRef::ST_Unknown) {
300 if (symbol.is_in_text_section) {
301 symbol.is_label = true;
302 if (is_arm) {
303 // Remove mapping symbols in arm.
304 const char* p = (symbol.name.compare(0, linker_prefix.size(), linker_prefix) == 0)
305 ? symbol.name.c_str() + linker_prefix.size()
306 : symbol.name.c_str();
307 if (IsArmMappingSymbol(p)) {
308 symbol.is_label = false;
309 }
310 }
311 }
312 }
313
314 callback(symbol);
315 }
316 }
317
318 template <class ELFT>
AddSymbolForPltSection(const llvm::object::ELFObjectFile<ELFT> * elf,const std::function<void (const ElfFileSymbol &)> & callback)319 void AddSymbolForPltSection(const llvm::object::ELFObjectFile<ELFT>* elf,
320 const std::function<void(const ElfFileSymbol&)>& callback) {
321 // We may sample instructions in .plt section if the program
322 // calls functions from shared libraries. Different architectures use
323 // different formats to store .plt section, so it needs a lot of work to match
324 // instructions in .plt section to symbols. As samples in .plt section rarely
325 // happen, and .plt section can hardly be a performance bottleneck, we can
326 // just use a symbol @plt to represent instructions in .plt section.
327 for (auto it = elf->section_begin(); it != elf->section_end(); ++it) {
328 const llvm::object::ELFSectionRef& section_ref = *it;
329 llvm::StringRef section_name;
330 std::error_code err = section_ref.getName(section_name);
331 if (err || section_name != ".plt") {
332 continue;
333 }
334 const auto* shdr = elf->getSection(section_ref.getRawDataRefImpl());
335 if (shdr == nullptr) {
336 return;
337 }
338 ElfFileSymbol symbol;
339 symbol.vaddr = shdr->sh_addr;
340 symbol.len = shdr->sh_size;
341 symbol.is_func = true;
342 symbol.is_label = true;
343 symbol.is_in_text_section = true;
344 symbol.name = "@plt";
345 callback(symbol);
346 return;
347 }
348 }
349
350 template <class ELFT>
CheckSymbolSections(const llvm::object::ELFObjectFile<ELFT> * elf,bool * has_symtab,bool * has_dynsym)351 void CheckSymbolSections(const llvm::object::ELFObjectFile<ELFT>* elf,
352 bool* has_symtab, bool* has_dynsym) {
353 *has_symtab = false;
354 *has_dynsym = false;
355 for (auto it = elf->section_begin(); it != elf->section_end(); ++it) {
356 const llvm::object::ELFSectionRef& section_ref = *it;
357 llvm::StringRef section_name;
358 std::error_code err = section_ref.getName(section_name);
359 if (err) {
360 continue;
361 }
362 if (section_name == ".dynsym") {
363 *has_dynsym = true;
364 } else if (section_name == ".symtab") {
365 *has_symtab = true;
366 }
367 }
368 }
369
370 template <class ELFT>
ParseSymbolsFromELFFile(const llvm::object::ELFObjectFile<ELFT> * elf,const std::function<void (const ElfFileSymbol &)> & callback)371 ElfStatus ParseSymbolsFromELFFile(const llvm::object::ELFObjectFile<ELFT>* elf,
372 const std::function<void(const ElfFileSymbol&)>& callback) {
373 auto machine = elf->getELFFile()->getHeader()->e_machine;
374 bool is_arm = (machine == llvm::ELF::EM_ARM || machine == llvm::ELF::EM_AARCH64);
375 AddSymbolForPltSection(elf, callback);
376 // Some applications deliberately ship elf files with broken section tables.
377 // So check the existence of .symtab section and .dynsym section before reading symbols.
378 bool has_symtab;
379 bool has_dynsym;
380 CheckSymbolSections(elf, &has_symtab, &has_dynsym);
381 if (has_symtab && elf->symbol_begin() != elf->symbol_end()) {
382 ReadSymbolTable(elf->symbol_begin(), elf->symbol_end(), callback, is_arm);
383 return ElfStatus::NO_ERROR;
384 } else if (has_dynsym &&
385 elf->dynamic_symbol_begin()->getRawDataRefImpl() != llvm::object::DataRefImpl()) {
386 ReadSymbolTable(elf->dynamic_symbol_begin(), elf->dynamic_symbol_end(), callback, is_arm);
387 }
388 std::string debugdata;
389 ElfStatus result = ReadSectionFromELFFile(elf, ".gnu_debugdata", &debugdata);
390 if (result == ElfStatus::SECTION_NOT_FOUND) {
391 return ElfStatus::NO_SYMBOL_TABLE;
392 } else if (result == ElfStatus::NO_ERROR) {
393 std::string decompressed_data;
394 if (XzDecompress(debugdata, &decompressed_data)) {
395 BinaryWrapper wrapper;
396 result = OpenObjectFileFromString(decompressed_data, &wrapper);
397 if (result == ElfStatus::NO_ERROR) {
398 if (auto elf = llvm::dyn_cast<llvm::object::ELF32LEObjectFile>(wrapper.obj)) {
399 return ParseSymbolsFromELFFile(elf, callback);
400 } else if (auto elf = llvm::dyn_cast<llvm::object::ELF64LEObjectFile>(wrapper.obj)) {
401 return ParseSymbolsFromELFFile(elf, callback);
402 } else {
403 return ElfStatus::FILE_MALFORMED;
404 }
405 }
406 }
407 }
408 return result;
409 }
410
MatchBuildId(llvm::object::ObjectFile * obj,const BuildId & expected_build_id)411 ElfStatus MatchBuildId(llvm::object::ObjectFile* obj, const BuildId& expected_build_id) {
412 if (expected_build_id.IsEmpty()) {
413 return ElfStatus::NO_ERROR;
414 }
415 BuildId real_build_id;
416 ElfStatus result = GetBuildIdFromObjectFile(obj, &real_build_id);
417 if (result != ElfStatus::NO_ERROR) {
418 return result;
419 }
420 if (expected_build_id != real_build_id) {
421 return ElfStatus::BUILD_ID_MISMATCH;
422 }
423 return ElfStatus::NO_ERROR;
424 }
425
ParseSymbolsFromElfFile(const std::string & filename,const BuildId & expected_build_id,const std::function<void (const ElfFileSymbol &)> & callback)426 ElfStatus ParseSymbolsFromElfFile(const std::string& filename,
427 const BuildId& expected_build_id,
428 const std::function<void(const ElfFileSymbol&)>& callback) {
429 ElfStatus result = IsValidElfPath(filename);
430 if (result != ElfStatus::NO_ERROR) {
431 return result;
432 }
433 return ParseSymbolsFromEmbeddedElfFile(filename, 0, 0, expected_build_id, callback);
434 }
435
ParseSymbolsFromEmbeddedElfFile(const std::string & filename,uint64_t file_offset,uint32_t file_size,const BuildId & expected_build_id,const std::function<void (const ElfFileSymbol &)> & callback)436 ElfStatus ParseSymbolsFromEmbeddedElfFile(const std::string& filename, uint64_t file_offset,
437 uint32_t file_size, const BuildId& expected_build_id,
438 const std::function<void(const ElfFileSymbol&)>& callback) {
439 BinaryWrapper wrapper;
440 ElfStatus result = OpenObjectFile(filename, file_offset, file_size, &wrapper);
441 if (result != ElfStatus::NO_ERROR) {
442 return result;
443 }
444 result = MatchBuildId(wrapper.obj, expected_build_id);
445 if (result != ElfStatus::NO_ERROR) {
446 return result;
447 }
448 if (auto elf = llvm::dyn_cast<llvm::object::ELF32LEObjectFile>(wrapper.obj)) {
449 return ParseSymbolsFromELFFile(elf, callback);
450 } else if (auto elf = llvm::dyn_cast<llvm::object::ELF64LEObjectFile>(wrapper.obj)) {
451 return ParseSymbolsFromELFFile(elf, callback);
452 }
453 return ElfStatus::FILE_MALFORMED;
454 }
455
456 template <class ELFT>
ReadMinExecutableVirtualAddress(const llvm::object::ELFFile<ELFT> * elf,uint64_t * p_vaddr)457 ElfStatus ReadMinExecutableVirtualAddress(const llvm::object::ELFFile<ELFT>* elf, uint64_t* p_vaddr) {
458 bool has_vaddr = false;
459 uint64_t min_addr = std::numeric_limits<uint64_t>::max();
460 for (auto it = elf->program_header_begin(); it != elf->program_header_end(); ++it) {
461 if ((it->p_type == llvm::ELF::PT_LOAD) && (it->p_flags & llvm::ELF::PF_X)) {
462 if (it->p_vaddr < min_addr) {
463 min_addr = it->p_vaddr;
464 has_vaddr = true;
465 }
466 }
467 }
468 if (!has_vaddr) {
469 return ElfStatus::FILE_MALFORMED;
470 }
471 *p_vaddr = min_addr;
472 return ElfStatus::NO_ERROR;
473 }
474
ReadMinExecutableVirtualAddressFromElfFile(const std::string & filename,const BuildId & expected_build_id,uint64_t * min_vaddr)475 ElfStatus ReadMinExecutableVirtualAddressFromElfFile(const std::string& filename,
476 const BuildId& expected_build_id,
477 uint64_t* min_vaddr) {
478 ElfStatus result = IsValidElfPath(filename);
479 if (result != ElfStatus::NO_ERROR) {
480 return result;
481 }
482 BinaryWrapper wrapper;
483 result = OpenObjectFile(filename, 0, 0, &wrapper);
484 if (result != ElfStatus::NO_ERROR) {
485 return result;
486 }
487 result = MatchBuildId(wrapper.obj, expected_build_id);
488 if (result != ElfStatus::NO_ERROR) {
489 return result;
490 }
491
492 if (auto elf = llvm::dyn_cast<llvm::object::ELF32LEObjectFile>(wrapper.obj)) {
493 return ReadMinExecutableVirtualAddress(elf->getELFFile(), min_vaddr);
494 } else if (auto elf = llvm::dyn_cast<llvm::object::ELF64LEObjectFile>(wrapper.obj)) {
495 return ReadMinExecutableVirtualAddress(elf->getELFFile(), min_vaddr);
496 } else {
497 return ElfStatus::FILE_MALFORMED;
498 }
499 }
500
ReadSectionFromElfFile(const std::string & filename,const std::string & section_name,std::string * content)501 ElfStatus ReadSectionFromElfFile(const std::string& filename, const std::string& section_name,
502 std::string* content) {
503 ElfStatus result = IsValidElfPath(filename);
504 if (result != ElfStatus::NO_ERROR) {
505 return result;
506 }
507 BinaryWrapper wrapper;
508 result = OpenObjectFile(filename, 0, 0, &wrapper);
509 if (result != ElfStatus::NO_ERROR) {
510 return result;
511 }
512 if (auto elf = llvm::dyn_cast<llvm::object::ELF32LEObjectFile>(wrapper.obj)) {
513 return ReadSectionFromELFFile(elf, section_name, content);
514 } else if (auto elf = llvm::dyn_cast<llvm::object::ELF64LEObjectFile>(wrapper.obj)) {
515 return ReadSectionFromELFFile(elf, section_name, content);
516 } else {
517 return ElfStatus::FILE_MALFORMED;
518 }
519 }
520