1 /*
2 * Copyright 2014-2019 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "ac_rtld.h"
25
26 #include "ac_binary.h"
27 #include "ac_gpu_info.h"
28 #include "util/compiler.h"
29 #include "util/u_dynarray.h"
30 #include "util/u_math.h"
31
32 #include <gelf.h>
33 #include <libelf.h>
34 #include <stdarg.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38
39 #ifndef EM_AMDGPU
40 // Old distributions may not have this enum constant
41 #define EM_AMDGPU 224
42 #endif
43
44 #ifndef STT_AMDGPU_LDS
45 #define STT_AMDGPU_LDS 13 // this is deprecated -- remove
46 #endif
47
48 #ifndef SHN_AMDGPU_LDS
49 #define SHN_AMDGPU_LDS 0xff00
50 #endif
51
52 #ifndef R_AMDGPU_NONE
53 #define R_AMDGPU_NONE 0
54 #define R_AMDGPU_ABS32_LO 1
55 #define R_AMDGPU_ABS32_HI 2
56 #define R_AMDGPU_ABS64 3
57 #define R_AMDGPU_REL32 4
58 #define R_AMDGPU_REL64 5
59 #define R_AMDGPU_ABS32 6
60 #define R_AMDGPU_GOTPCREL 7
61 #define R_AMDGPU_GOTPCREL32_LO 8
62 #define R_AMDGPU_GOTPCREL32_HI 9
63 #define R_AMDGPU_REL32_LO 10
64 #define R_AMDGPU_REL32_HI 11
65 #define R_AMDGPU_RELATIVE64 13
66 #endif
67
68 /* For the UMR disassembler. */
69 #define DEBUGGER_END_OF_CODE_MARKER 0xbf9f0000 /* invalid instruction */
70 #define DEBUGGER_NUM_MARKERS 5
71
72 struct ac_rtld_section {
73 bool is_rx : 1;
74 bool is_pasted_text : 1;
75 uint64_t offset;
76 const char *name;
77 };
78
79 struct ac_rtld_part {
80 Elf *elf;
81 struct ac_rtld_section *sections;
82 unsigned num_sections;
83 };
84
report_errorvf(const char * fmt,va_list va)85 static void report_errorvf(const char *fmt, va_list va)
86 {
87 fprintf(stderr, "ac_rtld error: ");
88
89 vfprintf(stderr, fmt, va);
90
91 fprintf(stderr, "\n");
92 }
93
94 static void report_errorf(const char *fmt, ...) PRINTFLIKE(1, 2);
95
report_errorf(const char * fmt,...)96 static void report_errorf(const char *fmt, ...)
97 {
98 va_list va;
99 va_start(va, fmt);
100 report_errorvf(fmt, va);
101 va_end(va);
102 }
103
104 static void report_elf_errorf(const char *fmt, ...) PRINTFLIKE(1, 2);
105
report_elf_errorf(const char * fmt,...)106 static void report_elf_errorf(const char *fmt, ...)
107 {
108 va_list va;
109 va_start(va, fmt);
110 report_errorvf(fmt, va);
111 va_end(va);
112
113 fprintf(stderr, "ELF error: %s\n", elf_errmsg(elf_errno()));
114 }
115
116 /**
117 * Find a symbol in a dynarray of struct ac_rtld_symbol by \p name and shader
118 * \p part_idx.
119 */
find_symbol(const struct util_dynarray * symbols,const char * name,unsigned part_idx)120 static const struct ac_rtld_symbol *find_symbol(const struct util_dynarray *symbols,
121 const char *name, unsigned part_idx)
122 {
123 util_dynarray_foreach (symbols, struct ac_rtld_symbol, symbol) {
124 if ((symbol->part_idx == ~0u || symbol->part_idx == part_idx) && !strcmp(name, symbol->name))
125 return symbol;
126 }
127 return 0;
128 }
129
compare_symbol_by_align(const void * lhsp,const void * rhsp)130 static int compare_symbol_by_align(const void *lhsp, const void *rhsp)
131 {
132 const struct ac_rtld_symbol *lhs = lhsp;
133 const struct ac_rtld_symbol *rhs = rhsp;
134 if (rhs->align > lhs->align)
135 return 1;
136 if (rhs->align < lhs->align)
137 return -1;
138 return 0;
139 }
140
141 /**
142 * Sort the given symbol list by decreasing alignment and assign offsets.
143 */
layout_symbols(struct ac_rtld_symbol * symbols,unsigned num_symbols,uint64_t * ptotal_size)144 static bool layout_symbols(struct ac_rtld_symbol *symbols, unsigned num_symbols,
145 uint64_t *ptotal_size)
146 {
147 qsort(symbols, num_symbols, sizeof(*symbols), compare_symbol_by_align);
148
149 uint64_t total_size = *ptotal_size;
150
151 for (unsigned i = 0; i < num_symbols; ++i) {
152 struct ac_rtld_symbol *s = &symbols[i];
153 assert(util_is_power_of_two_nonzero(s->align));
154
155 total_size = align64(total_size, s->align);
156 s->offset = total_size;
157
158 if (total_size + s->size < total_size) {
159 report_errorf("%s: size overflow", __FUNCTION__);
160 return false;
161 }
162
163 total_size += s->size;
164 }
165
166 *ptotal_size = total_size;
167 return true;
168 }
169
170 /**
171 * Read LDS symbols from the given \p section of the ELF of \p part and append
172 * them to the LDS symbols list.
173 *
174 * Shared LDS symbols are filtered out.
175 */
read_private_lds_symbols(struct ac_rtld_binary * binary,unsigned part_idx,Elf_Scn * section,uint32_t * lds_end_align)176 static bool read_private_lds_symbols(struct ac_rtld_binary *binary, unsigned part_idx,
177 Elf_Scn *section, uint32_t *lds_end_align)
178 {
179 #define report_if(cond) \
180 do { \
181 if ((cond)) { \
182 report_errorf(#cond); \
183 return false; \
184 } \
185 } while (false)
186 #define report_elf_if(cond) \
187 do { \
188 if ((cond)) { \
189 report_elf_errorf(#cond); \
190 return false; \
191 } \
192 } while (false)
193
194 struct ac_rtld_part *part = &binary->parts[part_idx];
195 Elf64_Shdr *shdr = elf64_getshdr(section);
196 uint32_t strtabidx = shdr->sh_link;
197 Elf_Data *symbols_data = elf_getdata(section, NULL);
198 report_elf_if(!symbols_data);
199
200 const Elf64_Sym *symbol = symbols_data->d_buf;
201 size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym);
202
203 for (size_t j = 0; j < num_symbols; ++j, ++symbol) {
204 struct ac_rtld_symbol s = {0};
205
206 if (ELF64_ST_TYPE(symbol->st_info) == STT_AMDGPU_LDS) {
207 /* old-style LDS symbols from initial prototype -- remove eventually */
208 s.align = MIN2(1u << (symbol->st_other >> 3), 1u << 16);
209 } else if (symbol->st_shndx == SHN_AMDGPU_LDS) {
210 s.align = MIN2(symbol->st_value, 1u << 16);
211 report_if(!util_is_power_of_two_nonzero(s.align));
212 } else
213 continue;
214
215 report_if(symbol->st_size > 1u << 29);
216
217 s.name = elf_strptr(part->elf, strtabidx, symbol->st_name);
218 s.size = symbol->st_size;
219 s.part_idx = part_idx;
220
221 if (!strcmp(s.name, "__lds_end")) {
222 report_elf_if(s.size != 0);
223 *lds_end_align = MAX2(*lds_end_align, s.align);
224 continue;
225 }
226
227 const struct ac_rtld_symbol *shared = find_symbol(&binary->lds_symbols, s.name, part_idx);
228 if (shared) {
229 report_elf_if(s.align > shared->align);
230 report_elf_if(s.size > shared->size);
231 continue;
232 }
233
234 util_dynarray_append(&binary->lds_symbols, struct ac_rtld_symbol, s);
235 }
236
237 return true;
238
239 #undef report_if
240 #undef report_elf_if
241 }
242
243 /**
244 * Open a binary consisting of one or more shader parts.
245 *
246 * \param binary the uninitialized struct
247 * \param i binary opening parameters
248 */
ac_rtld_open(struct ac_rtld_binary * binary,struct ac_rtld_open_info i)249 bool ac_rtld_open(struct ac_rtld_binary *binary, struct ac_rtld_open_info i)
250 {
251 /* One of the libelf implementations
252 * (http://www.mr511.de/software/english.htm) requires calling
253 * elf_version() before elf_memory().
254 */
255 elf_version(EV_CURRENT);
256
257 memset(binary, 0, sizeof(*binary));
258 memcpy(&binary->options, &i.options, sizeof(binary->options));
259 binary->wave_size = i.wave_size;
260 binary->num_parts = i.num_parts;
261 binary->parts = calloc(sizeof(*binary->parts), i.num_parts);
262 if (!binary->parts)
263 return false;
264
265 uint64_t pasted_text_size = 0;
266 uint64_t rx_align = 1;
267 uint64_t rx_size = 0;
268 uint64_t exec_size = 0;
269
270 #define report_if(cond) \
271 do { \
272 if ((cond)) { \
273 report_errorf(#cond); \
274 goto fail; \
275 } \
276 } while (false)
277 #define report_elf_if(cond) \
278 do { \
279 if ((cond)) { \
280 report_elf_errorf(#cond); \
281 goto fail; \
282 } \
283 } while (false)
284
285 /* Copy and layout shared LDS symbols. */
286 if (i.num_shared_lds_symbols) {
287 if (!util_dynarray_resize(&binary->lds_symbols, struct ac_rtld_symbol,
288 i.num_shared_lds_symbols))
289 goto fail;
290
291 memcpy(binary->lds_symbols.data, i.shared_lds_symbols, binary->lds_symbols.size);
292 }
293
294 util_dynarray_foreach (&binary->lds_symbols, struct ac_rtld_symbol, symbol)
295 symbol->part_idx = ~0u;
296
297 unsigned max_lds_size = 64 * 1024;
298
299 if (i.info->chip_class == GFX6 ||
300 (i.shader_type != MESA_SHADER_COMPUTE && i.shader_type != MESA_SHADER_FRAGMENT))
301 max_lds_size = 32 * 1024;
302
303 uint64_t shared_lds_size = 0;
304 if (!layout_symbols(binary->lds_symbols.data, i.num_shared_lds_symbols, &shared_lds_size))
305 goto fail;
306
307 if (shared_lds_size > max_lds_size) {
308 fprintf(stderr, "ac_rtld error(1): too much LDS (used = %u, max = %u)\n",
309 (unsigned)shared_lds_size, max_lds_size);
310 goto fail;
311 }
312 binary->lds_size = shared_lds_size;
313
314 /* First pass over all parts: open ELFs, pre-determine the placement of
315 * sections in the memory image, and collect and layout private LDS symbols. */
316 uint32_t lds_end_align = 0;
317
318 if (binary->options.halt_at_entry)
319 pasted_text_size += 4;
320
321 for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) {
322 struct ac_rtld_part *part = &binary->parts[part_idx];
323 unsigned part_lds_symbols_begin =
324 util_dynarray_num_elements(&binary->lds_symbols, struct ac_rtld_symbol);
325
326 part->elf = elf_memory((char *)i.elf_ptrs[part_idx], i.elf_sizes[part_idx]);
327 report_elf_if(!part->elf);
328
329 const Elf64_Ehdr *ehdr = elf64_getehdr(part->elf);
330 report_elf_if(!ehdr);
331 report_if(ehdr->e_machine != EM_AMDGPU);
332
333 size_t section_str_index;
334 size_t num_shdrs;
335 report_elf_if(elf_getshdrstrndx(part->elf, §ion_str_index) < 0);
336 report_elf_if(elf_getshdrnum(part->elf, &num_shdrs) < 0);
337
338 part->num_sections = num_shdrs;
339 part->sections = calloc(sizeof(*part->sections), num_shdrs);
340 report_if(!part->sections);
341
342 Elf_Scn *section = NULL;
343 while ((section = elf_nextscn(part->elf, section))) {
344 Elf64_Shdr *shdr = elf64_getshdr(section);
345 struct ac_rtld_section *s = &part->sections[elf_ndxscn(section)];
346 s->name = elf_strptr(part->elf, section_str_index, shdr->sh_name);
347 report_elf_if(!s->name);
348
349 /* Cannot actually handle linked objects yet */
350 report_elf_if(shdr->sh_addr != 0);
351
352 /* Alignment must be 0 or a power of two */
353 report_elf_if(shdr->sh_addralign & (shdr->sh_addralign - 1));
354 uint64_t sh_align = MAX2(shdr->sh_addralign, 1);
355
356 if (shdr->sh_flags & SHF_ALLOC && shdr->sh_type != SHT_NOTE) {
357 report_if(shdr->sh_flags & SHF_WRITE);
358
359 s->is_rx = true;
360
361 if (shdr->sh_flags & SHF_EXECINSTR) {
362 report_elf_if(shdr->sh_size & 3);
363
364 if (!strcmp(s->name, ".text"))
365 s->is_pasted_text = true;
366
367 exec_size += shdr->sh_size;
368 }
369
370 if (s->is_pasted_text) {
371 s->offset = pasted_text_size;
372 pasted_text_size += shdr->sh_size;
373 } else {
374 rx_align = align(rx_align, sh_align);
375 rx_size = align(rx_size, sh_align);
376 s->offset = rx_size;
377 rx_size += shdr->sh_size;
378 }
379 } else if (shdr->sh_type == SHT_SYMTAB) {
380 if (!read_private_lds_symbols(binary, part_idx, section, &lds_end_align))
381 goto fail;
382 }
383 }
384
385 uint64_t part_lds_size = shared_lds_size;
386 if (!layout_symbols(util_dynarray_element(&binary->lds_symbols, struct ac_rtld_symbol,
387 part_lds_symbols_begin),
388 util_dynarray_num_elements(&binary->lds_symbols, struct ac_rtld_symbol) -
389 part_lds_symbols_begin,
390 &part_lds_size))
391 goto fail;
392 binary->lds_size = MAX2(binary->lds_size, part_lds_size);
393 }
394
395 binary->rx_end_markers = pasted_text_size;
396 pasted_text_size += 4 * DEBUGGER_NUM_MARKERS;
397
398 /* __lds_end is a special symbol that points at the end of the memory
399 * occupied by other LDS symbols. Its alignment is taken as the
400 * maximum of its alignment over all shader parts where it occurs.
401 */
402 if (lds_end_align) {
403 binary->lds_size = align(binary->lds_size, lds_end_align);
404
405 struct ac_rtld_symbol *lds_end =
406 util_dynarray_grow(&binary->lds_symbols, struct ac_rtld_symbol, 1);
407 lds_end->name = "__lds_end";
408 lds_end->size = 0;
409 lds_end->align = lds_end_align;
410 lds_end->offset = binary->lds_size;
411 lds_end->part_idx = ~0u;
412 }
413
414 if (binary->lds_size > max_lds_size) {
415 fprintf(stderr, "ac_rtld error(2): too much LDS (used = %u, max = %u)\n",
416 (unsigned)binary->lds_size, max_lds_size);
417 goto fail;
418 }
419
420 /* Second pass: Adjust offsets of non-pasted text sections. */
421 binary->rx_size = pasted_text_size;
422 binary->rx_size = align(binary->rx_size, rx_align);
423
424 for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) {
425 struct ac_rtld_part *part = &binary->parts[part_idx];
426 size_t num_shdrs;
427 elf_getshdrnum(part->elf, &num_shdrs);
428
429 for (unsigned j = 0; j < num_shdrs; ++j) {
430 struct ac_rtld_section *s = &part->sections[j];
431 if (s->is_rx && !s->is_pasted_text)
432 s->offset += binary->rx_size;
433 }
434 }
435
436 binary->rx_size += rx_size;
437 binary->exec_size = exec_size;
438
439 /* The SQ fetches up to N cache lines of 16 dwords
440 * ahead of the PC, configurable by SH_MEM_CONFIG and
441 * S_INST_PREFETCH. This can cause two issues:
442 *
443 * (1) Crossing a page boundary to an unmapped page. The logic
444 * does not distinguish between a required fetch and a "mere"
445 * prefetch and will fault.
446 *
447 * (2) Prefetching instructions that will be changed for a
448 * different shader.
449 *
450 * (2) is not currently an issue because we flush the I$ at IB
451 * boundaries, but (1) needs to be addressed. Due to buffer
452 * suballocation, we just play it safe.
453 */
454 unsigned prefetch_distance = 0;
455
456 if (!i.info->has_graphics && i.info->family >= CHIP_ALDEBARAN)
457 prefetch_distance = 16;
458 else if (i.info->chip_class >= GFX10)
459 prefetch_distance = 3;
460
461 if (prefetch_distance)
462 binary->rx_size = align(binary->rx_size + prefetch_distance * 64, 64);
463
464 return true;
465
466 #undef report_if
467 #undef report_elf_if
468
469 fail:
470 ac_rtld_close(binary);
471 return false;
472 }
473
ac_rtld_close(struct ac_rtld_binary * binary)474 void ac_rtld_close(struct ac_rtld_binary *binary)
475 {
476 for (unsigned i = 0; i < binary->num_parts; ++i) {
477 struct ac_rtld_part *part = &binary->parts[i];
478 free(part->sections);
479 elf_end(part->elf);
480 }
481
482 util_dynarray_fini(&binary->lds_symbols);
483 free(binary->parts);
484 binary->parts = NULL;
485 binary->num_parts = 0;
486 }
487
get_section_by_name(struct ac_rtld_part * part,const char * name,const char ** data,size_t * nbytes)488 static bool get_section_by_name(struct ac_rtld_part *part, const char *name, const char **data,
489 size_t *nbytes)
490 {
491 for (unsigned i = 0; i < part->num_sections; ++i) {
492 struct ac_rtld_section *s = &part->sections[i];
493 if (s->name && !strcmp(name, s->name)) {
494 Elf_Scn *target_scn = elf_getscn(part->elf, i);
495 Elf_Data *target_data = elf_getdata(target_scn, NULL);
496 if (!target_data) {
497 report_elf_errorf("ac_rtld: get_section_by_name: elf_getdata");
498 return false;
499 }
500
501 *data = target_data->d_buf;
502 *nbytes = target_data->d_size;
503 return true;
504 }
505 }
506 return false;
507 }
508
ac_rtld_get_section_by_name(struct ac_rtld_binary * binary,const char * name,const char ** data,size_t * nbytes)509 bool ac_rtld_get_section_by_name(struct ac_rtld_binary *binary, const char *name, const char **data,
510 size_t *nbytes)
511 {
512 assert(binary->num_parts == 1);
513 return get_section_by_name(&binary->parts[0], name, data, nbytes);
514 }
515
ac_rtld_read_config(const struct radeon_info * info,struct ac_rtld_binary * binary,struct ac_shader_config * config)516 bool ac_rtld_read_config(const struct radeon_info *info, struct ac_rtld_binary *binary,
517 struct ac_shader_config *config)
518 {
519 for (unsigned i = 0; i < binary->num_parts; ++i) {
520 struct ac_rtld_part *part = &binary->parts[i];
521 const char *config_data;
522 size_t config_nbytes;
523
524 if (!get_section_by_name(part, ".AMDGPU.config", &config_data, &config_nbytes))
525 return false;
526
527 /* TODO: be precise about scratch use? */
528 struct ac_shader_config c = {0};
529 ac_parse_shader_binary_config(config_data, config_nbytes, binary->wave_size, true, info, &c);
530
531 config->num_sgprs = MAX2(config->num_sgprs, c.num_sgprs);
532 config->num_vgprs = MAX2(config->num_vgprs, c.num_vgprs);
533 config->spilled_sgprs = MAX2(config->spilled_sgprs, c.spilled_sgprs);
534 config->spilled_vgprs = MAX2(config->spilled_vgprs, c.spilled_vgprs);
535 config->scratch_bytes_per_wave =
536 MAX2(config->scratch_bytes_per_wave, c.scratch_bytes_per_wave);
537
538 assert(i == 0 || config->float_mode == c.float_mode);
539 config->float_mode = c.float_mode;
540
541 /* SPI_PS_INPUT_ENA/ADDR can't be combined. Only the value from
542 * the main shader part is used. */
543 assert(config->spi_ps_input_ena == 0 && config->spi_ps_input_addr == 0);
544 config->spi_ps_input_ena = c.spi_ps_input_ena;
545 config->spi_ps_input_addr = c.spi_ps_input_addr;
546
547 /* TODO: consistently use LDS symbols for this */
548 config->lds_size = MAX2(config->lds_size, c.lds_size);
549
550 /* TODO: Should we combine these somehow? It's currently only
551 * used for radeonsi's compute, where multiple parts aren't used. */
552 assert(config->rsrc1 == 0 && config->rsrc2 == 0);
553 config->rsrc1 = c.rsrc1;
554 config->rsrc2 = c.rsrc2;
555 }
556
557 return true;
558 }
559
resolve_symbol(const struct ac_rtld_upload_info * u,unsigned part_idx,const Elf64_Sym * sym,const char * name,uint64_t * value)560 static bool resolve_symbol(const struct ac_rtld_upload_info *u, unsigned part_idx,
561 const Elf64_Sym *sym, const char *name, uint64_t *value)
562 {
563 /* TODO: properly disentangle the undef and the LDS cases once
564 * STT_AMDGPU_LDS is retired. */
565 if (sym->st_shndx == SHN_UNDEF || sym->st_shndx == SHN_AMDGPU_LDS) {
566 const struct ac_rtld_symbol *lds_sym = find_symbol(&u->binary->lds_symbols, name, part_idx);
567
568 if (lds_sym) {
569 *value = lds_sym->offset;
570 return true;
571 }
572
573 /* TODO: resolve from other parts */
574
575 if (u->get_external_symbol(u->cb_data, name, value))
576 return true;
577
578 report_errorf("symbol %s: unknown", name);
579 return false;
580 }
581
582 struct ac_rtld_part *part = &u->binary->parts[part_idx];
583 if (sym->st_shndx >= part->num_sections) {
584 report_errorf("symbol %s: section out of bounds", name);
585 return false;
586 }
587
588 struct ac_rtld_section *s = &part->sections[sym->st_shndx];
589 if (!s->is_rx) {
590 report_errorf("symbol %s: bad section", name);
591 return false;
592 }
593
594 uint64_t section_base = u->rx_va + s->offset;
595
596 *value = section_base + sym->st_value;
597 return true;
598 }
599
apply_relocs(const struct ac_rtld_upload_info * u,unsigned part_idx,const Elf64_Shdr * reloc_shdr,const Elf_Data * reloc_data)600 static bool apply_relocs(const struct ac_rtld_upload_info *u, unsigned part_idx,
601 const Elf64_Shdr *reloc_shdr, const Elf_Data *reloc_data)
602 {
603 #define report_if(cond) \
604 do { \
605 if ((cond)) { \
606 report_errorf(#cond); \
607 return false; \
608 } \
609 } while (false)
610 #define report_elf_if(cond) \
611 do { \
612 if ((cond)) { \
613 report_elf_errorf(#cond); \
614 return false; \
615 } \
616 } while (false)
617
618 struct ac_rtld_part *part = &u->binary->parts[part_idx];
619 Elf_Scn *target_scn = elf_getscn(part->elf, reloc_shdr->sh_info);
620 report_elf_if(!target_scn);
621
622 Elf_Data *target_data = elf_getdata(target_scn, NULL);
623 report_elf_if(!target_data);
624
625 Elf_Scn *symbols_scn = elf_getscn(part->elf, reloc_shdr->sh_link);
626 report_elf_if(!symbols_scn);
627
628 Elf64_Shdr *symbols_shdr = elf64_getshdr(symbols_scn);
629 report_elf_if(!symbols_shdr);
630 uint32_t strtabidx = symbols_shdr->sh_link;
631
632 Elf_Data *symbols_data = elf_getdata(symbols_scn, NULL);
633 report_elf_if(!symbols_data);
634
635 const Elf64_Sym *symbols = symbols_data->d_buf;
636 size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym);
637
638 struct ac_rtld_section *s = &part->sections[reloc_shdr->sh_info];
639 report_if(!s->is_rx);
640
641 const char *orig_base = target_data->d_buf;
642 char *dst_base = u->rx_ptr + s->offset;
643 uint64_t va_base = u->rx_va + s->offset;
644
645 Elf64_Rel *rel = reloc_data->d_buf;
646 size_t num_relocs = reloc_data->d_size / sizeof(*rel);
647 for (size_t i = 0; i < num_relocs; ++i, ++rel) {
648 size_t r_sym = ELF64_R_SYM(rel->r_info);
649 unsigned r_type = ELF64_R_TYPE(rel->r_info);
650
651 const char *orig_ptr = orig_base + rel->r_offset;
652 char *dst_ptr = dst_base + rel->r_offset;
653 uint64_t va = va_base + rel->r_offset;
654
655 uint64_t symbol;
656 uint64_t addend;
657
658 if (r_sym == STN_UNDEF) {
659 symbol = 0;
660 } else {
661 report_elf_if(r_sym >= num_symbols);
662
663 const Elf64_Sym *sym = &symbols[r_sym];
664 const char *symbol_name = elf_strptr(part->elf, strtabidx, sym->st_name);
665 report_elf_if(!symbol_name);
666
667 if (!resolve_symbol(u, part_idx, sym, symbol_name, &symbol))
668 return false;
669 }
670
671 /* TODO: Should we also support .rela sections, where the
672 * addend is part of the relocation record? */
673
674 /* Load the addend from the ELF instead of the destination,
675 * because the destination may be in VRAM. */
676 switch (r_type) {
677 case R_AMDGPU_ABS32:
678 case R_AMDGPU_ABS32_LO:
679 case R_AMDGPU_ABS32_HI:
680 case R_AMDGPU_REL32:
681 case R_AMDGPU_REL32_LO:
682 case R_AMDGPU_REL32_HI:
683 addend = *(const uint32_t *)orig_ptr;
684 break;
685 case R_AMDGPU_ABS64:
686 case R_AMDGPU_REL64:
687 addend = *(const uint64_t *)orig_ptr;
688 break;
689 default:
690 report_errorf("unsupported r_type == %u", r_type);
691 return false;
692 }
693
694 uint64_t abs = symbol + addend;
695
696 switch (r_type) {
697 case R_AMDGPU_ABS32:
698 assert((uint32_t)abs == abs);
699 FALLTHROUGH;
700 case R_AMDGPU_ABS32_LO:
701 *(uint32_t *)dst_ptr = util_cpu_to_le32(abs);
702 break;
703 case R_AMDGPU_ABS32_HI:
704 *(uint32_t *)dst_ptr = util_cpu_to_le32(abs >> 32);
705 break;
706 case R_AMDGPU_ABS64:
707 *(uint64_t *)dst_ptr = util_cpu_to_le64(abs);
708 break;
709 case R_AMDGPU_REL32:
710 assert((int64_t)(int32_t)(abs - va) == (int64_t)(abs - va));
711 FALLTHROUGH;
712 case R_AMDGPU_REL32_LO:
713 *(uint32_t *)dst_ptr = util_cpu_to_le32(abs - va);
714 break;
715 case R_AMDGPU_REL32_HI:
716 *(uint32_t *)dst_ptr = util_cpu_to_le32((abs - va) >> 32);
717 break;
718 case R_AMDGPU_REL64:
719 *(uint64_t *)dst_ptr = util_cpu_to_le64(abs - va);
720 break;
721 default:
722 unreachable("bad r_type");
723 }
724 }
725
726 return true;
727
728 #undef report_if
729 #undef report_elf_if
730 }
731
732 /**
733 * Upload the binary or binaries to the provided GPU buffers, including
734 * relocations.
735 */
ac_rtld_upload(struct ac_rtld_upload_info * u)736 int ac_rtld_upload(struct ac_rtld_upload_info *u)
737 {
738 #define report_if(cond) \
739 do { \
740 if ((cond)) { \
741 report_errorf(#cond); \
742 return -1; \
743 } \
744 } while (false)
745 #define report_elf_if(cond) \
746 do { \
747 if ((cond)) { \
748 report_errorf(#cond); \
749 return -1; \
750 } \
751 } while (false)
752
753 int size = 0;
754 if (u->binary->options.halt_at_entry) {
755 /* s_sethalt 1 */
756 *(uint32_t *)u->rx_ptr = util_cpu_to_le32(0xbf8d0001);
757 }
758
759 /* First pass: upload raw section data and lay out private LDS symbols. */
760 for (unsigned i = 0; i < u->binary->num_parts; ++i) {
761 struct ac_rtld_part *part = &u->binary->parts[i];
762
763 Elf_Scn *section = NULL;
764 while ((section = elf_nextscn(part->elf, section))) {
765 Elf64_Shdr *shdr = elf64_getshdr(section);
766 struct ac_rtld_section *s = &part->sections[elf_ndxscn(section)];
767
768 if (!s->is_rx)
769 continue;
770
771 report_if(shdr->sh_type != SHT_PROGBITS);
772
773 Elf_Data *data = elf_getdata(section, NULL);
774 report_elf_if(!data || data->d_size != shdr->sh_size);
775 memcpy(u->rx_ptr + s->offset, data->d_buf, shdr->sh_size);
776
777 size = MAX2(size, s->offset + shdr->sh_size);
778 }
779 }
780
781 if (u->binary->rx_end_markers) {
782 uint32_t *dst = (uint32_t *)(u->rx_ptr + u->binary->rx_end_markers);
783 for (unsigned i = 0; i < DEBUGGER_NUM_MARKERS; ++i)
784 *dst++ = util_cpu_to_le32(DEBUGGER_END_OF_CODE_MARKER);
785 size += 4 * DEBUGGER_NUM_MARKERS;
786 }
787
788 /* Second pass: handle relocations, overwriting uploaded data where
789 * appropriate. */
790 for (unsigned i = 0; i < u->binary->num_parts; ++i) {
791 struct ac_rtld_part *part = &u->binary->parts[i];
792 Elf_Scn *section = NULL;
793 while ((section = elf_nextscn(part->elf, section))) {
794 Elf64_Shdr *shdr = elf64_getshdr(section);
795 if (shdr->sh_type == SHT_REL) {
796 Elf_Data *relocs = elf_getdata(section, NULL);
797 report_elf_if(!relocs || relocs->d_size != shdr->sh_size);
798 if (!apply_relocs(u, i, shdr, relocs))
799 return -1;
800 } else if (shdr->sh_type == SHT_RELA) {
801 report_errorf("SHT_RELA not supported");
802 return -1;
803 }
804 }
805 }
806
807 return size;
808
809 #undef report_if
810 #undef report_elf_if
811 }
812