1 /*
2 * Copyright 2014-2019 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "ac_rtld.h"
25
26 #include "ac_binary.h"
27 #include "ac_gpu_info.h"
28 #include "util/u_dynarray.h"
29 #include "util/u_math.h"
30
31 #include <gelf.h>
32 #include <libelf.h>
33 #include <stdarg.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37
38 // Old distributions may not have this enum constant
39 #define MY_EM_AMDGPU 224
40
41 #ifndef STT_AMDGPU_LDS
42 #define STT_AMDGPU_LDS 13 // this is deprecated -- remove
43 #endif
44
45 #ifndef SHN_AMDGPU_LDS
46 #define SHN_AMDGPU_LDS 0xff00
47 #endif
48
49 #ifndef R_AMDGPU_NONE
50 #define R_AMDGPU_NONE 0
51 #define R_AMDGPU_ABS32_LO 1
52 #define R_AMDGPU_ABS32_HI 2
53 #define R_AMDGPU_ABS64 3
54 #define R_AMDGPU_REL32 4
55 #define R_AMDGPU_REL64 5
56 #define R_AMDGPU_ABS32 6
57 #define R_AMDGPU_GOTPCREL 7
58 #define R_AMDGPU_GOTPCREL32_LO 8
59 #define R_AMDGPU_GOTPCREL32_HI 9
60 #define R_AMDGPU_REL32_LO 10
61 #define R_AMDGPU_REL32_HI 11
62 #define R_AMDGPU_RELATIVE64 13
63 #endif
64
65 /* For the UMR disassembler. */
66 #define DEBUGGER_END_OF_CODE_MARKER 0xbf9f0000 /* invalid instruction */
67 #define DEBUGGER_NUM_MARKERS 5
68
69 struct ac_rtld_section {
70 bool is_rx : 1;
71 bool is_pasted_text : 1;
72 uint64_t offset;
73 const char *name;
74 };
75
76 struct ac_rtld_part {
77 Elf *elf;
78 struct ac_rtld_section *sections;
79 unsigned num_sections;
80 };
81
report_erroraf(const char * fmt,va_list va)82 static void report_erroraf(const char *fmt, va_list va)
83 {
84 char *msg;
85 int ret = vasprintf(&msg, fmt, va);
86 if (ret < 0)
87 msg = "(vasprintf failed)";
88
89 fprintf(stderr, "ac_rtld error: %s\n", msg);
90
91 if (ret >= 0)
92 free(msg);
93 }
94
95 static void report_errorf(const char *fmt, ...) PRINTFLIKE(1, 2);
96
report_errorf(const char * fmt,...)97 static void report_errorf(const char *fmt, ...)
98 {
99 va_list va;
100 va_start(va, fmt);
101 report_erroraf(fmt, va);
102 va_end(va);
103 }
104
105 static void report_elf_errorf(const char *fmt, ...) PRINTFLIKE(1, 2);
106
report_elf_errorf(const char * fmt,...)107 static void report_elf_errorf(const char *fmt, ...)
108 {
109 va_list va;
110 va_start(va, fmt);
111 report_erroraf(fmt, va);
112 va_end(va);
113
114 fprintf(stderr, "ELF error: %s\n", elf_errmsg(elf_errno()));
115 }
116
117 /**
118 * Find a symbol in a dynarray of struct ac_rtld_symbol by \p name and shader
119 * \p part_idx.
120 */
find_symbol(const struct util_dynarray * symbols,const char * name,unsigned part_idx)121 static const struct ac_rtld_symbol *find_symbol(const struct util_dynarray *symbols,
122 const char *name, unsigned part_idx)
123 {
124 util_dynarray_foreach (symbols, struct ac_rtld_symbol, symbol) {
125 if ((symbol->part_idx == ~0u || symbol->part_idx == part_idx) && !strcmp(name, symbol->name))
126 return symbol;
127 }
128 return 0;
129 }
130
compare_symbol_by_align(const void * lhsp,const void * rhsp)131 static int compare_symbol_by_align(const void *lhsp, const void *rhsp)
132 {
133 const struct ac_rtld_symbol *lhs = lhsp;
134 const struct ac_rtld_symbol *rhs = rhsp;
135 if (rhs->align > lhs->align)
136 return 1;
137 if (rhs->align < lhs->align)
138 return -1;
139 return 0;
140 }
141
142 /**
143 * Sort the given symbol list by decreasing alignment and assign offsets.
144 */
layout_symbols(struct ac_rtld_symbol * symbols,unsigned num_symbols,uint64_t * ptotal_size)145 static bool layout_symbols(struct ac_rtld_symbol *symbols, unsigned num_symbols,
146 uint64_t *ptotal_size)
147 {
148 qsort(symbols, num_symbols, sizeof(*symbols), compare_symbol_by_align);
149
150 uint64_t total_size = *ptotal_size;
151
152 for (unsigned i = 0; i < num_symbols; ++i) {
153 struct ac_rtld_symbol *s = &symbols[i];
154 assert(util_is_power_of_two_nonzero(s->align));
155
156 total_size = align64(total_size, s->align);
157 s->offset = total_size;
158
159 if (total_size + s->size < total_size) {
160 report_errorf("%s: size overflow", __FUNCTION__);
161 return false;
162 }
163
164 total_size += s->size;
165 }
166
167 *ptotal_size = total_size;
168 return true;
169 }
170
171 /**
172 * Read LDS symbols from the given \p section of the ELF of \p part and append
173 * them to the LDS symbols list.
174 *
175 * Shared LDS symbols are filtered out.
176 */
read_private_lds_symbols(struct ac_rtld_binary * binary,unsigned part_idx,Elf_Scn * section,uint32_t * lds_end_align)177 static bool read_private_lds_symbols(struct ac_rtld_binary *binary, unsigned part_idx,
178 Elf_Scn *section, uint32_t *lds_end_align)
179 {
180 #define report_if(cond) \
181 do { \
182 if ((cond)) { \
183 report_errorf(#cond); \
184 return false; \
185 } \
186 } while (false)
187 #define report_elf_if(cond) \
188 do { \
189 if ((cond)) { \
190 report_elf_errorf(#cond); \
191 return false; \
192 } \
193 } while (false)
194
195 struct ac_rtld_part *part = &binary->parts[part_idx];
196 Elf64_Shdr *shdr = elf64_getshdr(section);
197 uint32_t strtabidx = shdr->sh_link;
198 Elf_Data *symbols_data = elf_getdata(section, NULL);
199 report_elf_if(!symbols_data);
200
201 const Elf64_Sym *symbol = symbols_data->d_buf;
202 size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym);
203
204 for (size_t j = 0; j < num_symbols; ++j, ++symbol) {
205 struct ac_rtld_symbol s = {0};
206
207 if (ELF64_ST_TYPE(symbol->st_info) == STT_AMDGPU_LDS) {
208 /* old-style LDS symbols from initial prototype -- remove eventually */
209 s.align = MIN2(1u << (symbol->st_other >> 3), 1u << 16);
210 } else if (symbol->st_shndx == SHN_AMDGPU_LDS) {
211 s.align = MIN2(symbol->st_value, 1u << 16);
212 report_if(!util_is_power_of_two_nonzero(s.align));
213 } else
214 continue;
215
216 report_if(symbol->st_size > 1u << 29);
217
218 s.name = elf_strptr(part->elf, strtabidx, symbol->st_name);
219 s.size = symbol->st_size;
220 s.part_idx = part_idx;
221
222 if (!strcmp(s.name, "__lds_end")) {
223 report_elf_if(s.size != 0);
224 *lds_end_align = MAX2(*lds_end_align, s.align);
225 continue;
226 }
227
228 const struct ac_rtld_symbol *shared = find_symbol(&binary->lds_symbols, s.name, part_idx);
229 if (shared) {
230 report_elf_if(s.align > shared->align);
231 report_elf_if(s.size > shared->size);
232 continue;
233 }
234
235 util_dynarray_append(&binary->lds_symbols, struct ac_rtld_symbol, s);
236 }
237
238 return true;
239
240 #undef report_if
241 #undef report_elf_if
242 }
243
244 /**
245 * Open a binary consisting of one or more shader parts.
246 *
247 * \param binary the uninitialized struct
248 * \param i binary opening parameters
249 */
ac_rtld_open(struct ac_rtld_binary * binary,struct ac_rtld_open_info i)250 bool ac_rtld_open(struct ac_rtld_binary *binary, struct ac_rtld_open_info i)
251 {
252 /* One of the libelf implementations
253 * (http://www.mr511.de/software/english.htm) requires calling
254 * elf_version() before elf_memory().
255 */
256 elf_version(EV_CURRENT);
257
258 memset(binary, 0, sizeof(*binary));
259 memcpy(&binary->options, &i.options, sizeof(binary->options));
260 binary->wave_size = i.wave_size;
261 binary->num_parts = i.num_parts;
262 binary->parts = calloc(sizeof(*binary->parts), i.num_parts);
263 if (!binary->parts)
264 return false;
265
266 uint64_t pasted_text_size = 0;
267 uint64_t rx_align = 1;
268 uint64_t rx_size = 0;
269 uint64_t exec_size = 0;
270
271 #define report_if(cond) \
272 do { \
273 if ((cond)) { \
274 report_errorf(#cond); \
275 goto fail; \
276 } \
277 } while (false)
278 #define report_elf_if(cond) \
279 do { \
280 if ((cond)) { \
281 report_elf_errorf(#cond); \
282 goto fail; \
283 } \
284 } while (false)
285
286 /* Copy and layout shared LDS symbols. */
287 if (i.num_shared_lds_symbols) {
288 if (!util_dynarray_resize(&binary->lds_symbols, struct ac_rtld_symbol,
289 i.num_shared_lds_symbols))
290 goto fail;
291
292 memcpy(binary->lds_symbols.data, i.shared_lds_symbols, binary->lds_symbols.size);
293 }
294
295 util_dynarray_foreach (&binary->lds_symbols, struct ac_rtld_symbol, symbol)
296 symbol->part_idx = ~0u;
297
298 unsigned max_lds_size = 64 * 1024;
299
300 if (i.info->chip_class == GFX6 ||
301 (i.shader_type != MESA_SHADER_COMPUTE && i.shader_type != MESA_SHADER_FRAGMENT))
302 max_lds_size = 32 * 1024;
303
304 uint64_t shared_lds_size = 0;
305 if (!layout_symbols(binary->lds_symbols.data, i.num_shared_lds_symbols, &shared_lds_size))
306 goto fail;
307
308 if (shared_lds_size > max_lds_size) {
309 fprintf(stderr, "ac_rtld error(1): too much LDS (used = %u, max = %u)\n",
310 (unsigned)shared_lds_size, max_lds_size);
311 goto fail;
312 }
313 binary->lds_size = shared_lds_size;
314
315 /* First pass over all parts: open ELFs, pre-determine the placement of
316 * sections in the memory image, and collect and layout private LDS symbols. */
317 uint32_t lds_end_align = 0;
318
319 if (binary->options.halt_at_entry)
320 pasted_text_size += 4;
321
322 for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) {
323 struct ac_rtld_part *part = &binary->parts[part_idx];
324 unsigned part_lds_symbols_begin =
325 util_dynarray_num_elements(&binary->lds_symbols, struct ac_rtld_symbol);
326
327 part->elf = elf_memory((char *)i.elf_ptrs[part_idx], i.elf_sizes[part_idx]);
328 report_elf_if(!part->elf);
329
330 const Elf64_Ehdr *ehdr = elf64_getehdr(part->elf);
331 report_elf_if(!ehdr);
332 report_if(ehdr->e_machine != MY_EM_AMDGPU);
333
334 size_t section_str_index;
335 size_t num_shdrs;
336 report_elf_if(elf_getshdrstrndx(part->elf, §ion_str_index) < 0);
337 report_elf_if(elf_getshdrnum(part->elf, &num_shdrs) < 0);
338
339 part->num_sections = num_shdrs;
340 part->sections = calloc(sizeof(*part->sections), num_shdrs);
341 report_if(!part->sections);
342
343 Elf_Scn *section = NULL;
344 while ((section = elf_nextscn(part->elf, section))) {
345 Elf64_Shdr *shdr = elf64_getshdr(section);
346 struct ac_rtld_section *s = &part->sections[elf_ndxscn(section)];
347 s->name = elf_strptr(part->elf, section_str_index, shdr->sh_name);
348 report_elf_if(!s->name);
349
350 /* Cannot actually handle linked objects yet */
351 report_elf_if(shdr->sh_addr != 0);
352
353 /* Alignment must be 0 or a power of two */
354 report_elf_if(shdr->sh_addralign & (shdr->sh_addralign - 1));
355 uint64_t sh_align = MAX2(shdr->sh_addralign, 1);
356
357 if (shdr->sh_flags & SHF_ALLOC && shdr->sh_type != SHT_NOTE) {
358 report_if(shdr->sh_flags & SHF_WRITE);
359
360 s->is_rx = true;
361
362 if (shdr->sh_flags & SHF_EXECINSTR) {
363 report_elf_if(shdr->sh_size & 3);
364
365 if (!strcmp(s->name, ".text"))
366 s->is_pasted_text = true;
367
368 exec_size += shdr->sh_size;
369 }
370
371 if (s->is_pasted_text) {
372 s->offset = pasted_text_size;
373 pasted_text_size += shdr->sh_size;
374 } else {
375 rx_align = align(rx_align, sh_align);
376 rx_size = align(rx_size, sh_align);
377 s->offset = rx_size;
378 rx_size += shdr->sh_size;
379 }
380 } else if (shdr->sh_type == SHT_SYMTAB) {
381 if (!read_private_lds_symbols(binary, part_idx, section, &lds_end_align))
382 goto fail;
383 }
384 }
385
386 uint64_t part_lds_size = shared_lds_size;
387 if (!layout_symbols(util_dynarray_element(&binary->lds_symbols, struct ac_rtld_symbol,
388 part_lds_symbols_begin),
389 util_dynarray_num_elements(&binary->lds_symbols, struct ac_rtld_symbol) -
390 part_lds_symbols_begin,
391 &part_lds_size))
392 goto fail;
393 binary->lds_size = MAX2(binary->lds_size, part_lds_size);
394 }
395
396 binary->rx_end_markers = pasted_text_size;
397 pasted_text_size += 4 * DEBUGGER_NUM_MARKERS;
398
399 /* __lds_end is a special symbol that points at the end of the memory
400 * occupied by other LDS symbols. Its alignment is taken as the
401 * maximum of its alignment over all shader parts where it occurs.
402 */
403 if (lds_end_align) {
404 binary->lds_size = align(binary->lds_size, lds_end_align);
405
406 struct ac_rtld_symbol *lds_end =
407 util_dynarray_grow(&binary->lds_symbols, struct ac_rtld_symbol, 1);
408 lds_end->name = "__lds_end";
409 lds_end->size = 0;
410 lds_end->align = lds_end_align;
411 lds_end->offset = binary->lds_size;
412 lds_end->part_idx = ~0u;
413 }
414
415 if (binary->lds_size > max_lds_size) {
416 fprintf(stderr, "ac_rtld error(2): too much LDS (used = %u, max = %u)\n",
417 (unsigned)binary->lds_size, max_lds_size);
418 goto fail;
419 }
420
421 /* Second pass: Adjust offsets of non-pasted text sections. */
422 binary->rx_size = pasted_text_size;
423 binary->rx_size = align(binary->rx_size, rx_align);
424
425 for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) {
426 struct ac_rtld_part *part = &binary->parts[part_idx];
427 size_t num_shdrs;
428 elf_getshdrnum(part->elf, &num_shdrs);
429
430 for (unsigned j = 0; j < num_shdrs; ++j) {
431 struct ac_rtld_section *s = &part->sections[j];
432 if (s->is_rx && !s->is_pasted_text)
433 s->offset += binary->rx_size;
434 }
435 }
436
437 binary->rx_size += rx_size;
438 binary->exec_size = exec_size;
439
440 if (i.info->chip_class >= GFX10) {
441 /* In gfx10, the SQ fetches up to 3 cache lines of 16 dwords
442 * ahead of the PC, configurable by SH_MEM_CONFIG and
443 * S_INST_PREFETCH. This can cause two issues:
444 *
445 * (1) Crossing a page boundary to an unmapped page. The logic
446 * does not distinguish between a required fetch and a "mere"
447 * prefetch and will fault.
448 *
449 * (2) Prefetching instructions that will be changed for a
450 * different shader.
451 *
452 * (2) is not currently an issue because we flush the I$ at IB
453 * boundaries, but (1) needs to be addressed. Due to buffer
454 * suballocation, we just play it safe.
455 */
456 binary->rx_size = align(binary->rx_size + 3 * 64, 64);
457 }
458
459 return true;
460
461 #undef report_if
462 #undef report_elf_if
463
464 fail:
465 ac_rtld_close(binary);
466 return false;
467 }
468
ac_rtld_close(struct ac_rtld_binary * binary)469 void ac_rtld_close(struct ac_rtld_binary *binary)
470 {
471 for (unsigned i = 0; i < binary->num_parts; ++i) {
472 struct ac_rtld_part *part = &binary->parts[i];
473 free(part->sections);
474 elf_end(part->elf);
475 }
476
477 util_dynarray_fini(&binary->lds_symbols);
478 free(binary->parts);
479 binary->parts = NULL;
480 binary->num_parts = 0;
481 }
482
get_section_by_name(struct ac_rtld_part * part,const char * name,const char ** data,size_t * nbytes)483 static bool get_section_by_name(struct ac_rtld_part *part, const char *name, const char **data,
484 size_t *nbytes)
485 {
486 for (unsigned i = 0; i < part->num_sections; ++i) {
487 struct ac_rtld_section *s = &part->sections[i];
488 if (s->name && !strcmp(name, s->name)) {
489 Elf_Scn *target_scn = elf_getscn(part->elf, i);
490 Elf_Data *target_data = elf_getdata(target_scn, NULL);
491 if (!target_data) {
492 report_elf_errorf("ac_rtld: get_section_by_name: elf_getdata");
493 return false;
494 }
495
496 *data = target_data->d_buf;
497 *nbytes = target_data->d_size;
498 return true;
499 }
500 }
501 return false;
502 }
503
ac_rtld_get_section_by_name(struct ac_rtld_binary * binary,const char * name,const char ** data,size_t * nbytes)504 bool ac_rtld_get_section_by_name(struct ac_rtld_binary *binary, const char *name, const char **data,
505 size_t *nbytes)
506 {
507 assert(binary->num_parts == 1);
508 return get_section_by_name(&binary->parts[0], name, data, nbytes);
509 }
510
ac_rtld_read_config(const struct radeon_info * info,struct ac_rtld_binary * binary,struct ac_shader_config * config)511 bool ac_rtld_read_config(const struct radeon_info *info, struct ac_rtld_binary *binary,
512 struct ac_shader_config *config)
513 {
514 for (unsigned i = 0; i < binary->num_parts; ++i) {
515 struct ac_rtld_part *part = &binary->parts[i];
516 const char *config_data;
517 size_t config_nbytes;
518
519 if (!get_section_by_name(part, ".AMDGPU.config", &config_data, &config_nbytes))
520 return false;
521
522 /* TODO: be precise about scratch use? */
523 struct ac_shader_config c = {0};
524 ac_parse_shader_binary_config(config_data, config_nbytes, binary->wave_size, true, info, &c);
525
526 config->num_sgprs = MAX2(config->num_sgprs, c.num_sgprs);
527 config->num_vgprs = MAX2(config->num_vgprs, c.num_vgprs);
528 config->spilled_sgprs = MAX2(config->spilled_sgprs, c.spilled_sgprs);
529 config->spilled_vgprs = MAX2(config->spilled_vgprs, c.spilled_vgprs);
530 config->scratch_bytes_per_wave =
531 MAX2(config->scratch_bytes_per_wave, c.scratch_bytes_per_wave);
532
533 assert(i == 0 || config->float_mode == c.float_mode);
534 config->float_mode = c.float_mode;
535
536 /* SPI_PS_INPUT_ENA/ADDR can't be combined. Only the value from
537 * the main shader part is used. */
538 assert(config->spi_ps_input_ena == 0 && config->spi_ps_input_addr == 0);
539 config->spi_ps_input_ena = c.spi_ps_input_ena;
540 config->spi_ps_input_addr = c.spi_ps_input_addr;
541
542 /* TODO: consistently use LDS symbols for this */
543 config->lds_size = MAX2(config->lds_size, c.lds_size);
544
545 /* TODO: Should we combine these somehow? It's currently only
546 * used for radeonsi's compute, where multiple parts aren't used. */
547 assert(config->rsrc1 == 0 && config->rsrc2 == 0);
548 config->rsrc1 = c.rsrc1;
549 config->rsrc2 = c.rsrc2;
550 }
551
552 return true;
553 }
554
resolve_symbol(const struct ac_rtld_upload_info * u,unsigned part_idx,const Elf64_Sym * sym,const char * name,uint64_t * value)555 static bool resolve_symbol(const struct ac_rtld_upload_info *u, unsigned part_idx,
556 const Elf64_Sym *sym, const char *name, uint64_t *value)
557 {
558 /* TODO: properly disentangle the undef and the LDS cases once
559 * STT_AMDGPU_LDS is retired. */
560 if (sym->st_shndx == SHN_UNDEF || sym->st_shndx == SHN_AMDGPU_LDS) {
561 const struct ac_rtld_symbol *lds_sym = find_symbol(&u->binary->lds_symbols, name, part_idx);
562
563 if (lds_sym) {
564 *value = lds_sym->offset;
565 return true;
566 }
567
568 /* TODO: resolve from other parts */
569
570 if (u->get_external_symbol(u->cb_data, name, value))
571 return true;
572
573 report_errorf("symbol %s: unknown", name);
574 return false;
575 }
576
577 struct ac_rtld_part *part = &u->binary->parts[part_idx];
578 if (sym->st_shndx >= part->num_sections) {
579 report_errorf("symbol %s: section out of bounds", name);
580 return false;
581 }
582
583 struct ac_rtld_section *s = &part->sections[sym->st_shndx];
584 if (!s->is_rx) {
585 report_errorf("symbol %s: bad section", name);
586 return false;
587 }
588
589 uint64_t section_base = u->rx_va + s->offset;
590
591 *value = section_base + sym->st_value;
592 return true;
593 }
594
apply_relocs(const struct ac_rtld_upload_info * u,unsigned part_idx,const Elf64_Shdr * reloc_shdr,const Elf_Data * reloc_data)595 static bool apply_relocs(const struct ac_rtld_upload_info *u, unsigned part_idx,
596 const Elf64_Shdr *reloc_shdr, const Elf_Data *reloc_data)
597 {
598 #define report_if(cond) \
599 do { \
600 if ((cond)) { \
601 report_errorf(#cond); \
602 return false; \
603 } \
604 } while (false)
605 #define report_elf_if(cond) \
606 do { \
607 if ((cond)) { \
608 report_elf_errorf(#cond); \
609 return false; \
610 } \
611 } while (false)
612
613 struct ac_rtld_part *part = &u->binary->parts[part_idx];
614 Elf_Scn *target_scn = elf_getscn(part->elf, reloc_shdr->sh_info);
615 report_elf_if(!target_scn);
616
617 Elf_Data *target_data = elf_getdata(target_scn, NULL);
618 report_elf_if(!target_data);
619
620 Elf_Scn *symbols_scn = elf_getscn(part->elf, reloc_shdr->sh_link);
621 report_elf_if(!symbols_scn);
622
623 Elf64_Shdr *symbols_shdr = elf64_getshdr(symbols_scn);
624 report_elf_if(!symbols_shdr);
625 uint32_t strtabidx = symbols_shdr->sh_link;
626
627 Elf_Data *symbols_data = elf_getdata(symbols_scn, NULL);
628 report_elf_if(!symbols_data);
629
630 const Elf64_Sym *symbols = symbols_data->d_buf;
631 size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym);
632
633 struct ac_rtld_section *s = &part->sections[reloc_shdr->sh_info];
634 report_if(!s->is_rx);
635
636 const char *orig_base = target_data->d_buf;
637 char *dst_base = u->rx_ptr + s->offset;
638 uint64_t va_base = u->rx_va + s->offset;
639
640 Elf64_Rel *rel = reloc_data->d_buf;
641 size_t num_relocs = reloc_data->d_size / sizeof(*rel);
642 for (size_t i = 0; i < num_relocs; ++i, ++rel) {
643 size_t r_sym = ELF64_R_SYM(rel->r_info);
644 unsigned r_type = ELF64_R_TYPE(rel->r_info);
645
646 const char *orig_ptr = orig_base + rel->r_offset;
647 char *dst_ptr = dst_base + rel->r_offset;
648 uint64_t va = va_base + rel->r_offset;
649
650 uint64_t symbol;
651 uint64_t addend;
652
653 if (r_sym == STN_UNDEF) {
654 symbol = 0;
655 } else {
656 report_elf_if(r_sym >= num_symbols);
657
658 const Elf64_Sym *sym = &symbols[r_sym];
659 const char *symbol_name = elf_strptr(part->elf, strtabidx, sym->st_name);
660 report_elf_if(!symbol_name);
661
662 if (!resolve_symbol(u, part_idx, sym, symbol_name, &symbol))
663 return false;
664 }
665
666 /* TODO: Should we also support .rela sections, where the
667 * addend is part of the relocation record? */
668
669 /* Load the addend from the ELF instead of the destination,
670 * because the destination may be in VRAM. */
671 switch (r_type) {
672 case R_AMDGPU_ABS32:
673 case R_AMDGPU_ABS32_LO:
674 case R_AMDGPU_ABS32_HI:
675 case R_AMDGPU_REL32:
676 case R_AMDGPU_REL32_LO:
677 case R_AMDGPU_REL32_HI:
678 addend = *(const uint32_t *)orig_ptr;
679 break;
680 case R_AMDGPU_ABS64:
681 case R_AMDGPU_REL64:
682 addend = *(const uint64_t *)orig_ptr;
683 break;
684 default:
685 report_errorf("unsupported r_type == %u", r_type);
686 return false;
687 }
688
689 uint64_t abs = symbol + addend;
690
691 switch (r_type) {
692 case R_AMDGPU_ABS32:
693 assert((uint32_t)abs == abs);
694 case R_AMDGPU_ABS32_LO:
695 *(uint32_t *)dst_ptr = util_cpu_to_le32(abs);
696 break;
697 case R_AMDGPU_ABS32_HI:
698 *(uint32_t *)dst_ptr = util_cpu_to_le32(abs >> 32);
699 break;
700 case R_AMDGPU_ABS64:
701 *(uint64_t *)dst_ptr = util_cpu_to_le64(abs);
702 break;
703 case R_AMDGPU_REL32:
704 assert((int64_t)(int32_t)(abs - va) == (int64_t)(abs - va));
705 case R_AMDGPU_REL32_LO:
706 *(uint32_t *)dst_ptr = util_cpu_to_le32(abs - va);
707 break;
708 case R_AMDGPU_REL32_HI:
709 *(uint32_t *)dst_ptr = util_cpu_to_le32((abs - va) >> 32);
710 break;
711 case R_AMDGPU_REL64:
712 *(uint64_t *)dst_ptr = util_cpu_to_le64(abs - va);
713 break;
714 default:
715 unreachable("bad r_type");
716 }
717 }
718
719 return true;
720
721 #undef report_if
722 #undef report_elf_if
723 }
724
725 /**
726 * Upload the binary or binaries to the provided GPU buffers, including
727 * relocations.
728 */
ac_rtld_upload(struct ac_rtld_upload_info * u)729 bool ac_rtld_upload(struct ac_rtld_upload_info *u)
730 {
731 #define report_if(cond) \
732 do { \
733 if ((cond)) { \
734 report_errorf(#cond); \
735 return false; \
736 } \
737 } while (false)
738 #define report_elf_if(cond) \
739 do { \
740 if ((cond)) { \
741 report_errorf(#cond); \
742 return false; \
743 } \
744 } while (false)
745
746 if (u->binary->options.halt_at_entry) {
747 /* s_sethalt 1 */
748 *(uint32_t *)u->rx_ptr = util_cpu_to_le32(0xbf8d0001);
749 }
750
751 /* First pass: upload raw section data and lay out private LDS symbols. */
752 for (unsigned i = 0; i < u->binary->num_parts; ++i) {
753 struct ac_rtld_part *part = &u->binary->parts[i];
754
755 Elf_Scn *section = NULL;
756 while ((section = elf_nextscn(part->elf, section))) {
757 Elf64_Shdr *shdr = elf64_getshdr(section);
758 struct ac_rtld_section *s = &part->sections[elf_ndxscn(section)];
759
760 if (!s->is_rx)
761 continue;
762
763 report_if(shdr->sh_type != SHT_PROGBITS);
764
765 Elf_Data *data = elf_getdata(section, NULL);
766 report_elf_if(!data || data->d_size != shdr->sh_size);
767 memcpy(u->rx_ptr + s->offset, data->d_buf, shdr->sh_size);
768 }
769 }
770
771 if (u->binary->rx_end_markers) {
772 uint32_t *dst = (uint32_t *)(u->rx_ptr + u->binary->rx_end_markers);
773 for (unsigned i = 0; i < DEBUGGER_NUM_MARKERS; ++i)
774 *dst++ = util_cpu_to_le32(DEBUGGER_END_OF_CODE_MARKER);
775 }
776
777 /* Second pass: handle relocations, overwriting uploaded data where
778 * appropriate. */
779 for (unsigned i = 0; i < u->binary->num_parts; ++i) {
780 struct ac_rtld_part *part = &u->binary->parts[i];
781 Elf_Scn *section = NULL;
782 while ((section = elf_nextscn(part->elf, section))) {
783 Elf64_Shdr *shdr = elf64_getshdr(section);
784 if (shdr->sh_type == SHT_REL) {
785 Elf_Data *relocs = elf_getdata(section, NULL);
786 report_elf_if(!relocs || relocs->d_size != shdr->sh_size);
787 if (!apply_relocs(u, i, shdr, relocs))
788 return false;
789 } else if (shdr->sh_type == SHT_RELA) {
790 report_errorf("SHT_RELA not supported");
791 return false;
792 }
793 }
794 }
795
796 return true;
797
798 #undef report_if
799 #undef report_elf_if
800 }
801