• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2024 Valve Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "radv_private.h"
8 
9 #include "util/hash_table.h"
10 #include "util/strndup.h"
11 #include "util/u_printf.h"
12 
13 #include "nir.h"
14 #include "nir_builder.h"
15 
16 static struct hash_table *device_ht = NULL;
17 
18 VkResult
radv_printf_data_init(struct radv_device * device)19 radv_printf_data_init(struct radv_device *device)
20 {
21    util_dynarray_init(&device->printf.formats, NULL);
22 
23    device->printf.buffer_size = debug_get_num_option("RADV_PRINTF_BUFFER_SIZE", 0);
24    if (device->printf.buffer_size < sizeof(struct radv_printf_buffer_header))
25       return VK_SUCCESS;
26 
27    VkBufferCreateInfo buffer_create_info = {
28       .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
29       .pNext =
30          &(VkBufferUsageFlags2CreateInfoKHR){
31             .sType = VK_STRUCTURE_TYPE_BUFFER_USAGE_FLAGS_2_CREATE_INFO_KHR,
32             .usage = VK_BUFFER_USAGE_2_TRANSFER_SRC_BIT_KHR | VK_BUFFER_USAGE_2_SHADER_DEVICE_ADDRESS_BIT_KHR,
33          },
34       .size = device->printf.buffer_size,
35    };
36 
37    VkDevice _device = radv_device_to_handle(device);
38    VkResult result = device->vk.dispatch_table.CreateBuffer(_device, &buffer_create_info, NULL, &device->printf.buffer);
39    if (result != VK_SUCCESS)
40       return result;
41 
42    VkMemoryRequirements requirements;
43    device->vk.dispatch_table.GetBufferMemoryRequirements(_device, device->printf.buffer, &requirements);
44 
45    VkMemoryAllocateInfo alloc_info = {
46       .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
47       .allocationSize = requirements.size,
48       .memoryTypeIndex = radv_find_memory_index(device->physical_device, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
49                                                                             VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
50                                                                             VK_MEMORY_PROPERTY_HOST_COHERENT_BIT),
51    };
52 
53    result = device->vk.dispatch_table.AllocateMemory(_device, &alloc_info, NULL, &device->printf.memory);
54    if (result != VK_SUCCESS)
55       return result;
56 
57    result = device->vk.dispatch_table.MapMemory(_device, device->printf.memory, 0, VK_WHOLE_SIZE, 0,
58                                                 (void **)&device->printf.data);
59    if (result != VK_SUCCESS)
60       return result;
61 
62    result = device->vk.dispatch_table.BindBufferMemory(_device, device->printf.buffer, device->printf.memory, 0);
63    if (result != VK_SUCCESS)
64       return result;
65 
66    struct radv_printf_buffer_header *header = device->printf.data;
67    header->offset = sizeof(struct radv_printf_buffer_header);
68    header->size = device->printf.buffer_size;
69 
70    VkBufferDeviceAddressInfo addr_info = {
71       .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,
72       .buffer = device->printf.buffer,
73    };
74    device->printf.buffer_addr = device->vk.dispatch_table.GetBufferDeviceAddress(_device, &addr_info);
75 
76    return VK_SUCCESS;
77 }
78 
79 void
radv_printf_data_finish(struct radv_device * device)80 radv_printf_data_finish(struct radv_device *device)
81 {
82    VkDevice _device = radv_device_to_handle(device);
83 
84    device->vk.dispatch_table.DestroyBuffer(_device, device->printf.buffer, NULL);
85    if (device->printf.memory)
86       device->vk.dispatch_table.UnmapMemory(_device, device->printf.memory);
87    device->vk.dispatch_table.FreeMemory(_device, device->printf.memory, NULL);
88 
89    util_dynarray_foreach (&device->printf.formats, struct radv_printf_format, format)
90       free(format->string);
91 
92    util_dynarray_fini(&device->printf.formats);
93 }
94 
95 void
radv_build_printf(nir_builder * b,nir_def * cond,const char * format_string,...)96 radv_build_printf(nir_builder *b, nir_def *cond, const char *format_string, ...)
97 {
98    if (!device_ht)
99       return;
100 
101    struct radv_device *device = _mesa_hash_table_search(device_ht, b->shader)->data;
102    if (!device->printf.buffer_addr)
103       return;
104 
105    struct radv_printf_format format = {0};
106    format.string = strdup(format_string);
107    if (!format.string)
108       return;
109 
110    uint32_t format_index = util_dynarray_num_elements(&device->printf.formats, struct radv_printf_format);
111 
112    if (cond)
113       nir_push_if(b, cond);
114 
115    nir_def *size = nir_imm_int(b, 4);
116 
117    va_list arg_list;
118    va_start(arg_list, format_string);
119 
120    uint32_t num_args = 0;
121    for (uint32_t i = 0; i < strlen(format_string); i++)
122       if (format_string[i] == '%')
123          num_args++;
124 
125    nir_def **args = malloc(num_args * sizeof(nir_def *));
126    nir_def **strides = malloc(num_args * sizeof(nir_def *));
127 
128    nir_def *ballot = nir_ballot(b, 1, 64, nir_imm_true(b));
129    nir_def *active_invocation_count = nir_bit_count(b, ballot);
130 
131    for (uint32_t i = 0; i < num_args; i++) {
132       nir_def *arg = va_arg(arg_list, nir_def *);
133 
134       if (arg->bit_size == 1)
135          arg = nir_b2i32(b, arg);
136 
137       args[i] = arg;
138 
139       uint32_t arg_size = arg->bit_size == 1 ? 32 : arg->bit_size / 8;
140       format.element_sizes[i] = arg_size;
141 
142       nir_update_instr_divergence(b->shader, arg->parent_instr);
143 
144       if (arg->divergent) {
145          strides[i] = nir_imul_imm(b, active_invocation_count, arg_size);
146          format.divergence_mask |= BITFIELD_BIT(i);
147       } else {
148          strides[i] = nir_imm_int(b, arg_size);
149       }
150 
151       size = nir_iadd(b, size, strides[i]);
152    }
153 
154    va_end(arg_list);
155 
156    nir_def *offset;
157    nir_def *undef;
158 
159    nir_push_if(b, nir_elect(b, 1));
160    {
161       offset = nir_global_atomic(
162          b, 32, nir_imm_int64(b, device->printf.buffer_addr + offsetof(struct radv_printf_buffer_header, offset)), size,
163          .atomic_op = nir_atomic_op_iadd);
164    }
165    nir_push_else(b, NULL);
166    {
167       undef = nir_undef(b, 1, 32);
168    }
169    nir_pop_if(b, NULL);
170 
171    offset = nir_read_first_invocation(b, nir_if_phi(b, offset, undef));
172 
173    nir_def *buffer_size = nir_load_global(
174       b, nir_imm_int64(b, device->printf.buffer_addr + offsetof(struct radv_printf_buffer_header, size)), 4, 1, 32);
175 
176    nir_push_if(b, nir_ige(b, buffer_size, nir_iadd(b, offset, size)));
177    {
178       nir_def *addr = nir_iadd_imm(b, nir_u2u64(b, offset), device->printf.buffer_addr);
179 
180       /* header */
181       nir_store_global(b, addr, 4, nir_ior_imm(b, active_invocation_count, format_index << 16), 1);
182       addr = nir_iadd_imm(b, addr, 4);
183 
184       for (uint32_t i = 0; i < num_args; i++) {
185          nir_def *arg = args[i];
186 
187          if (arg->divergent) {
188             nir_def *invocation_index = nir_mbcnt_amd(b, ballot, nir_imm_int(b, 0));
189             nir_store_global(
190                b, nir_iadd(b, addr, nir_u2u64(b, nir_imul_imm(b, invocation_index, format.element_sizes[i]))), 4, arg,
191                1);
192          } else {
193             nir_store_global(b, addr, 4, arg, 1);
194          }
195 
196          addr = nir_iadd(b, addr, nir_u2u64(b, strides[i]));
197       }
198    }
199    nir_pop_if(b, NULL);
200 
201    if (cond)
202       nir_pop_if(b, NULL);
203 
204    free(args);
205    free(strides);
206 
207    util_dynarray_append(&device->printf.formats, struct radv_printf_format, format);
208 }
209 
210 void
radv_dump_printf_data(struct radv_device * device)211 radv_dump_printf_data(struct radv_device *device)
212 {
213    if (!device->printf.data)
214       return;
215 
216    device->vk.dispatch_table.DeviceWaitIdle(radv_device_to_handle(device));
217 
218    struct radv_printf_buffer_header *header = device->printf.data;
219    uint8_t *data = device->printf.data;
220 
221    for (uint32_t offset = sizeof(struct radv_printf_buffer_header); offset < header->offset;) {
222       uint32_t printf_header = *(uint32_t *)&data[offset];
223       offset += sizeof(uint32_t);
224 
225       uint32_t format_index = printf_header >> 16;
226       struct radv_printf_format *printf_format =
227          util_dynarray_element(&device->printf.formats, struct radv_printf_format, format_index);
228 
229       uint32_t invocation_count = printf_header & 0xFFFF;
230 
231       uint32_t num_args = 0;
232       for (uint32_t i = 0; i < strlen(printf_format->string); i++)
233          if (printf_format->string[i] == '%')
234             num_args++;
235 
236       char *format = printf_format->string;
237 
238       for (uint32_t i = 0; i <= num_args; i++) {
239          size_t spec_pos = util_printf_next_spec_pos(format, 0);
240 
241          if (spec_pos == -1) {
242             printf("%s", format);
243             continue;
244          }
245 
246          const char *token = util_printf_prev_tok(&format[spec_pos]);
247          char *next_format = &format[spec_pos + 1];
248 
249          /* print the part before the format token */
250          if (token != format) {
251             fwrite(format, token - format, 1, stdout);
252             fflush(stdout);
253          }
254 
255          char *print_str = strndup(token, next_format - token);
256          /* rebase spec_pos so we can use it with print_str */
257          spec_pos += format - token;
258 
259          size_t element_size = printf_format->element_sizes[i];
260          bool is_float = strpbrk(print_str, "fFeEgGaA") != NULL;
261 
262          uint32_t lane_count = (printf_format->divergence_mask & BITFIELD_BIT(i)) ? invocation_count : 1;
263          for (uint32_t lane = 0; lane < lane_count; lane++) {
264             switch (element_size) {
265             case 1: {
266                uint8_t v;
267                memcpy(&v, &data[offset], element_size);
268                printf(print_str, v);
269                break;
270             }
271             case 2: {
272                uint16_t v;
273                memcpy(&v, &data[offset], element_size);
274                printf(print_str, v);
275                break;
276             }
277             case 4: {
278                if (is_float) {
279                   float v;
280                   memcpy(&v, &data[offset], element_size);
281                   printf(print_str, v);
282                } else {
283                   uint32_t v;
284                   memcpy(&v, &data[offset], element_size);
285                   printf(print_str, v);
286                }
287                break;
288             }
289             case 8: {
290                if (is_float) {
291                   double v;
292                   memcpy(&v, &data[offset], element_size);
293                   printf(print_str, v);
294                } else {
295                   uint64_t v;
296                   memcpy(&v, &data[offset], element_size);
297                   printf(print_str, v);
298                }
299                break;
300             }
301             default:
302                unreachable("Unsupported data type");
303             }
304 
305             if (lane != lane_count - 1)
306                printf(" ");
307 
308             offset += element_size;
309          }
310 
311          /* rebase format */
312          format = next_format;
313          free(print_str);
314       }
315    }
316 
317    header->offset = sizeof(struct radv_printf_buffer_header);
318 }
319 
320 void
radv_device_associate_nir(struct radv_device * device,nir_shader * nir)321 radv_device_associate_nir(struct radv_device *device, nir_shader *nir)
322 {
323    if (!device->printf.buffer_addr)
324       return;
325 
326    if (!device_ht)
327       device_ht = _mesa_pointer_hash_table_create(NULL);
328 
329    _mesa_hash_table_insert(device_ht, nir, device);
330 }
331