1 // Copyright 2017 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 // Allow dynamic symbol lookup in an in-memory Elf image.
16 //
17
18 #include "absl/debugging/internal/elf_mem_image.h"
19
20 #ifdef ABSL_HAVE_ELF_MEM_IMAGE // defined in elf_mem_image.h
21
22 #include <string.h>
23 #include <cassert>
24 #include <cstddef>
25 #include "absl/base/internal/raw_logging.h"
26
27 // From binutils/include/elf/common.h (this doesn't appear to be documented
28 // anywhere else).
29 //
30 // /* This flag appears in a Versym structure. It means that the symbol
31 // is hidden, and is only visible with an explicit version number.
32 // This is a GNU extension. */
33 // #define VERSYM_HIDDEN 0x8000
34 //
35 // /* This is the mask for the rest of the Versym information. */
36 // #define VERSYM_VERSION 0x7fff
37
38 #define VERSYM_VERSION 0x7fff
39
40 namespace absl {
41 ABSL_NAMESPACE_BEGIN
42 namespace debugging_internal {
43
44 namespace {
45
46 #if __WORDSIZE == 32
47 const int kElfClass = ELFCLASS32;
ElfBind(const ElfW (Sym)* symbol)48 int ElfBind(const ElfW(Sym) *symbol) { return ELF32_ST_BIND(symbol->st_info); }
ElfType(const ElfW (Sym)* symbol)49 int ElfType(const ElfW(Sym) *symbol) { return ELF32_ST_TYPE(symbol->st_info); }
50 #elif __WORDSIZE == 64
51 const int kElfClass = ELFCLASS64;
52 int ElfBind(const ElfW(Sym) *symbol) { return ELF64_ST_BIND(symbol->st_info); }
53 int ElfType(const ElfW(Sym) *symbol) { return ELF64_ST_TYPE(symbol->st_info); }
54 #else
55 const int kElfClass = -1;
56 int ElfBind(const ElfW(Sym) *) {
57 ABSL_RAW_LOG(FATAL, "Unexpected word size");
58 return 0;
59 }
60 int ElfType(const ElfW(Sym) *) {
61 ABSL_RAW_LOG(FATAL, "Unexpected word size");
62 return 0;
63 }
64 #endif
65
66 // Extract an element from one of the ELF tables, cast it to desired type.
67 // This is just a simple arithmetic and a glorified cast.
68 // Callers are responsible for bounds checking.
69 template <typename T>
GetTableElement(const ElfW (Ehdr)* ehdr,ElfW (Off)table_offset,ElfW (Word)element_size,size_t index)70 const T *GetTableElement(const ElfW(Ehdr) * ehdr, ElfW(Off) table_offset,
71 ElfW(Word) element_size, size_t index) {
72 return reinterpret_cast<const T*>(reinterpret_cast<const char *>(ehdr)
73 + table_offset
74 + index * element_size);
75 }
76
77 } // namespace
78
79 // The value of this variable doesn't matter; it's used only for its
80 // unique address.
81 const int ElfMemImage::kInvalidBaseSentinel = 0;
82
ElfMemImage(const void * base)83 ElfMemImage::ElfMemImage(const void *base) {
84 ABSL_RAW_CHECK(base != kInvalidBase, "bad pointer");
85 Init(base);
86 }
87
GetNumSymbols() const88 int ElfMemImage::GetNumSymbols() const {
89 if (!hash_) {
90 return 0;
91 }
92 // See http://www.caldera.com/developers/gabi/latest/ch5.dynamic.html#hash
93 return hash_[1];
94 }
95
ElfW(Sym)96 const ElfW(Sym) *ElfMemImage::GetDynsym(int index) const {
97 ABSL_RAW_CHECK(index < GetNumSymbols(), "index out of range");
98 return dynsym_ + index;
99 }
100
ElfW(Versym)101 const ElfW(Versym) *ElfMemImage::GetVersym(int index) const {
102 ABSL_RAW_CHECK(index < GetNumSymbols(), "index out of range");
103 return versym_ + index;
104 }
105
ElfW(Phdr)106 const ElfW(Phdr) *ElfMemImage::GetPhdr(int index) const {
107 ABSL_RAW_CHECK(index < ehdr_->e_phnum, "index out of range");
108 return GetTableElement<ElfW(Phdr)>(ehdr_,
109 ehdr_->e_phoff,
110 ehdr_->e_phentsize,
111 index);
112 }
113
GetDynstr(ElfW (Word)offset) const114 const char *ElfMemImage::GetDynstr(ElfW(Word) offset) const {
115 ABSL_RAW_CHECK(offset < strsize_, "offset out of range");
116 return dynstr_ + offset;
117 }
118
GetSymAddr(const ElfW (Sym)* sym) const119 const void *ElfMemImage::GetSymAddr(const ElfW(Sym) *sym) const {
120 if (sym->st_shndx == SHN_UNDEF || sym->st_shndx >= SHN_LORESERVE) {
121 // Symbol corresponds to "special" (e.g. SHN_ABS) section.
122 return reinterpret_cast<const void *>(sym->st_value);
123 }
124 ABSL_RAW_CHECK(link_base_ < sym->st_value, "symbol out of range");
125 return GetTableElement<char>(ehdr_, 0, 1, sym->st_value - link_base_);
126 }
127
ElfW(Verdef)128 const ElfW(Verdef) *ElfMemImage::GetVerdef(int index) const {
129 ABSL_RAW_CHECK(0 <= index && static_cast<size_t>(index) <= verdefnum_,
130 "index out of range");
131 const ElfW(Verdef) *version_definition = verdef_;
132 while (version_definition->vd_ndx < index && version_definition->vd_next) {
133 const char *const version_definition_as_char =
134 reinterpret_cast<const char *>(version_definition);
135 version_definition =
136 reinterpret_cast<const ElfW(Verdef) *>(version_definition_as_char +
137 version_definition->vd_next);
138 }
139 return version_definition->vd_ndx == index ? version_definition : nullptr;
140 }
141
ElfW(Verdaux)142 const ElfW(Verdaux) *ElfMemImage::GetVerdefAux(
143 const ElfW(Verdef) *verdef) const {
144 return reinterpret_cast<const ElfW(Verdaux) *>(verdef+1);
145 }
146
GetVerstr(ElfW (Word)offset) const147 const char *ElfMemImage::GetVerstr(ElfW(Word) offset) const {
148 ABSL_RAW_CHECK(offset < strsize_, "offset out of range");
149 return dynstr_ + offset;
150 }
151
Init(const void * base)152 void ElfMemImage::Init(const void *base) {
153 ehdr_ = nullptr;
154 dynsym_ = nullptr;
155 dynstr_ = nullptr;
156 versym_ = nullptr;
157 verdef_ = nullptr;
158 hash_ = nullptr;
159 strsize_ = 0;
160 verdefnum_ = 0;
161 link_base_ = ~0L; // Sentinel: PT_LOAD .p_vaddr can't possibly be this.
162 if (!base) {
163 return;
164 }
165 const char *const base_as_char = reinterpret_cast<const char *>(base);
166 if (base_as_char[EI_MAG0] != ELFMAG0 || base_as_char[EI_MAG1] != ELFMAG1 ||
167 base_as_char[EI_MAG2] != ELFMAG2 || base_as_char[EI_MAG3] != ELFMAG3) {
168 assert(false);
169 return;
170 }
171 int elf_class = base_as_char[EI_CLASS];
172 if (elf_class != kElfClass) {
173 assert(false);
174 return;
175 }
176 switch (base_as_char[EI_DATA]) {
177 case ELFDATA2LSB: {
178 if (__LITTLE_ENDIAN != __BYTE_ORDER) {
179 assert(false);
180 return;
181 }
182 break;
183 }
184 case ELFDATA2MSB: {
185 if (__BIG_ENDIAN != __BYTE_ORDER) {
186 assert(false);
187 return;
188 }
189 break;
190 }
191 default: {
192 assert(false);
193 return;
194 }
195 }
196
197 ehdr_ = reinterpret_cast<const ElfW(Ehdr) *>(base);
198 const ElfW(Phdr) *dynamic_program_header = nullptr;
199 for (int i = 0; i < ehdr_->e_phnum; ++i) {
200 const ElfW(Phdr) *const program_header = GetPhdr(i);
201 switch (program_header->p_type) {
202 case PT_LOAD:
203 if (!~link_base_) {
204 link_base_ = program_header->p_vaddr;
205 }
206 break;
207 case PT_DYNAMIC:
208 dynamic_program_header = program_header;
209 break;
210 }
211 }
212 if (!~link_base_ || !dynamic_program_header) {
213 assert(false);
214 // Mark this image as not present. Can not recur infinitely.
215 Init(nullptr);
216 return;
217 }
218 ptrdiff_t relocation =
219 base_as_char - reinterpret_cast<const char *>(link_base_);
220 ElfW(Dyn) *dynamic_entry =
221 reinterpret_cast<ElfW(Dyn) *>(dynamic_program_header->p_vaddr +
222 relocation);
223 for (; dynamic_entry->d_tag != DT_NULL; ++dynamic_entry) {
224 const ElfW(Xword) value = dynamic_entry->d_un.d_val + relocation;
225 switch (dynamic_entry->d_tag) {
226 case DT_HASH:
227 hash_ = reinterpret_cast<ElfW(Word) *>(value);
228 break;
229 case DT_SYMTAB:
230 dynsym_ = reinterpret_cast<ElfW(Sym) *>(value);
231 break;
232 case DT_STRTAB:
233 dynstr_ = reinterpret_cast<const char *>(value);
234 break;
235 case DT_VERSYM:
236 versym_ = reinterpret_cast<ElfW(Versym) *>(value);
237 break;
238 case DT_VERDEF:
239 verdef_ = reinterpret_cast<ElfW(Verdef) *>(value);
240 break;
241 case DT_VERDEFNUM:
242 verdefnum_ = dynamic_entry->d_un.d_val;
243 break;
244 case DT_STRSZ:
245 strsize_ = dynamic_entry->d_un.d_val;
246 break;
247 default:
248 // Unrecognized entries explicitly ignored.
249 break;
250 }
251 }
252 if (!hash_ || !dynsym_ || !dynstr_ || !versym_ ||
253 !verdef_ || !verdefnum_ || !strsize_) {
254 assert(false); // invalid VDSO
255 // Mark this image as not present. Can not recur infinitely.
256 Init(nullptr);
257 return;
258 }
259 }
260
LookupSymbol(const char * name,const char * version,int type,SymbolInfo * info_out) const261 bool ElfMemImage::LookupSymbol(const char *name,
262 const char *version,
263 int type,
264 SymbolInfo *info_out) const {
265 for (const SymbolInfo& info : *this) {
266 if (strcmp(info.name, name) == 0 && strcmp(info.version, version) == 0 &&
267 ElfType(info.symbol) == type) {
268 if (info_out) {
269 *info_out = info;
270 }
271 return true;
272 }
273 }
274 return false;
275 }
276
LookupSymbolByAddress(const void * address,SymbolInfo * info_out) const277 bool ElfMemImage::LookupSymbolByAddress(const void *address,
278 SymbolInfo *info_out) const {
279 for (const SymbolInfo& info : *this) {
280 const char *const symbol_start =
281 reinterpret_cast<const char *>(info.address);
282 const char *const symbol_end = symbol_start + info.symbol->st_size;
283 if (symbol_start <= address && address < symbol_end) {
284 if (info_out) {
285 // Client wants to know details for that symbol (the usual case).
286 if (ElfBind(info.symbol) == STB_GLOBAL) {
287 // Strong symbol; just return it.
288 *info_out = info;
289 return true;
290 } else {
291 // Weak or local. Record it, but keep looking for a strong one.
292 *info_out = info;
293 }
294 } else {
295 // Client only cares if there is an overlapping symbol.
296 return true;
297 }
298 }
299 }
300 return false;
301 }
302
SymbolIterator(const void * const image,int index)303 ElfMemImage::SymbolIterator::SymbolIterator(const void *const image, int index)
304 : index_(index), image_(image) {
305 }
306
operator ->() const307 const ElfMemImage::SymbolInfo *ElfMemImage::SymbolIterator::operator->() const {
308 return &info_;
309 }
310
operator *() const311 const ElfMemImage::SymbolInfo& ElfMemImage::SymbolIterator::operator*() const {
312 return info_;
313 }
314
operator ==(const SymbolIterator & rhs) const315 bool ElfMemImage::SymbolIterator::operator==(const SymbolIterator &rhs) const {
316 return this->image_ == rhs.image_ && this->index_ == rhs.index_;
317 }
318
operator !=(const SymbolIterator & rhs) const319 bool ElfMemImage::SymbolIterator::operator!=(const SymbolIterator &rhs) const {
320 return !(*this == rhs);
321 }
322
operator ++()323 ElfMemImage::SymbolIterator &ElfMemImage::SymbolIterator::operator++() {
324 this->Update(1);
325 return *this;
326 }
327
begin() const328 ElfMemImage::SymbolIterator ElfMemImage::begin() const {
329 SymbolIterator it(this, 0);
330 it.Update(0);
331 return it;
332 }
333
end() const334 ElfMemImage::SymbolIterator ElfMemImage::end() const {
335 return SymbolIterator(this, GetNumSymbols());
336 }
337
Update(int increment)338 void ElfMemImage::SymbolIterator::Update(int increment) {
339 const ElfMemImage *image = reinterpret_cast<const ElfMemImage *>(image_);
340 ABSL_RAW_CHECK(image->IsPresent() || increment == 0, "");
341 if (!image->IsPresent()) {
342 return;
343 }
344 index_ += increment;
345 if (index_ >= image->GetNumSymbols()) {
346 index_ = image->GetNumSymbols();
347 return;
348 }
349 const ElfW(Sym) *symbol = image->GetDynsym(index_);
350 const ElfW(Versym) *version_symbol = image->GetVersym(index_);
351 ABSL_RAW_CHECK(symbol && version_symbol, "");
352 const char *const symbol_name = image->GetDynstr(symbol->st_name);
353 const ElfW(Versym) version_index = version_symbol[0] & VERSYM_VERSION;
354 const ElfW(Verdef) *version_definition = nullptr;
355 const char *version_name = "";
356 if (symbol->st_shndx == SHN_UNDEF) {
357 // Undefined symbols reference DT_VERNEED, not DT_VERDEF, and
358 // version_index could well be greater than verdefnum_, so calling
359 // GetVerdef(version_index) may trigger assertion.
360 } else {
361 version_definition = image->GetVerdef(version_index);
362 }
363 if (version_definition) {
364 // I am expecting 1 or 2 auxiliary entries: 1 for the version itself,
365 // optional 2nd if the version has a parent.
366 ABSL_RAW_CHECK(
367 version_definition->vd_cnt == 1 || version_definition->vd_cnt == 2,
368 "wrong number of entries");
369 const ElfW(Verdaux) *version_aux = image->GetVerdefAux(version_definition);
370 version_name = image->GetVerstr(version_aux->vda_name);
371 }
372 info_.name = symbol_name;
373 info_.version = version_name;
374 info_.address = image->GetSymAddr(symbol);
375 info_.symbol = symbol;
376 }
377
378 } // namespace debugging_internal
379 ABSL_NAMESPACE_END
380 } // namespace absl
381
382 #endif // ABSL_HAVE_ELF_MEM_IMAGE
383