1 /*
2 * Copyright © 2024 Valve Corporation
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "radv_printf.h"
8 #include "radv_device.h"
9 #include "radv_physical_device.h"
10
11 #include "util/hash_table.h"
12 #include "util/strndup.h"
13 #include "util/u_printf.h"
14
15 #include "nir.h"
16 #include "nir_builder.h"
17
18 static struct hash_table *device_ht = NULL;
19
20 VkResult
radv_printf_data_init(struct radv_device * device)21 radv_printf_data_init(struct radv_device *device)
22 {
23 const struct radv_physical_device *pdev = radv_device_physical(device);
24
25 util_dynarray_init(&device->printf.formats, NULL);
26
27 device->printf.buffer_size = debug_get_num_option("RADV_PRINTF_BUFFER_SIZE", 0);
28 if (device->printf.buffer_size < sizeof(struct radv_printf_buffer_header))
29 return VK_SUCCESS;
30
31 VkBufferCreateInfo buffer_create_info = {
32 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
33 .pNext =
34 &(VkBufferUsageFlags2CreateInfo){
35 .sType = VK_STRUCTURE_TYPE_BUFFER_USAGE_FLAGS_2_CREATE_INFO,
36 .usage = VK_BUFFER_USAGE_2_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_2_SHADER_DEVICE_ADDRESS_BIT,
37 },
38 .size = device->printf.buffer_size,
39 };
40
41 VkDevice _device = radv_device_to_handle(device);
42 VkResult result = device->vk.dispatch_table.CreateBuffer(_device, &buffer_create_info, NULL, &device->printf.buffer);
43 if (result != VK_SUCCESS)
44 return result;
45
46 VkMemoryRequirements requirements;
47 device->vk.dispatch_table.GetBufferMemoryRequirements(_device, device->printf.buffer, &requirements);
48
49 VkMemoryAllocateInfo alloc_info = {
50 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
51 .allocationSize = requirements.size,
52 .memoryTypeIndex =
53 radv_find_memory_index(pdev, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
54 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT),
55 };
56
57 result = device->vk.dispatch_table.AllocateMemory(_device, &alloc_info, NULL, &device->printf.memory);
58 if (result != VK_SUCCESS)
59 return result;
60
61 result = device->vk.dispatch_table.MapMemory(_device, device->printf.memory, 0, VK_WHOLE_SIZE, 0,
62 (void **)&device->printf.data);
63 if (result != VK_SUCCESS)
64 return result;
65
66 result = device->vk.dispatch_table.BindBufferMemory(_device, device->printf.buffer, device->printf.memory, 0);
67 if (result != VK_SUCCESS)
68 return result;
69
70 struct radv_printf_buffer_header *header = device->printf.data;
71 header->offset = sizeof(struct radv_printf_buffer_header);
72 header->size = device->printf.buffer_size;
73
74 VkBufferDeviceAddressInfo addr_info = {
75 .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,
76 .buffer = device->printf.buffer,
77 };
78 device->printf.buffer_addr = device->vk.dispatch_table.GetBufferDeviceAddress(_device, &addr_info);
79
80 return VK_SUCCESS;
81 }
82
83 void
radv_printf_data_finish(struct radv_device * device)84 radv_printf_data_finish(struct radv_device *device)
85 {
86 VkDevice _device = radv_device_to_handle(device);
87
88 device->vk.dispatch_table.DestroyBuffer(_device, device->printf.buffer, NULL);
89 if (device->printf.memory)
90 device->vk.dispatch_table.UnmapMemory(_device, device->printf.memory);
91 device->vk.dispatch_table.FreeMemory(_device, device->printf.memory, NULL);
92
93 util_dynarray_foreach (&device->printf.formats, struct radv_printf_format, format)
94 free(format->string);
95
96 util_dynarray_fini(&device->printf.formats);
97 }
98
99 void
radv_build_printf(nir_builder * b,nir_def * cond,const char * format_string,...)100 radv_build_printf(nir_builder *b, nir_def *cond, const char *format_string, ...)
101 {
102 if (!device_ht)
103 return;
104
105 struct radv_device *device = _mesa_hash_table_search(device_ht, b->shader)->data;
106 if (!device->printf.buffer_addr)
107 return;
108
109 struct radv_printf_format format = {0};
110 format.string = strdup(format_string);
111 if (!format.string)
112 return;
113
114 uint32_t format_index = util_dynarray_num_elements(&device->printf.formats, struct radv_printf_format);
115
116 if (cond)
117 nir_push_if(b, cond);
118
119 if (b->shader->info.stage == MESA_SHADER_FRAGMENT)
120 nir_push_if(b, nir_inot(b, nir_is_helper_invocation(b, 1)));
121
122 nir_def *size = nir_imm_int(b, 4);
123
124 va_list arg_list;
125 va_start(arg_list, format_string);
126
127 uint32_t num_args = 0;
128 for (uint32_t i = 0; i < strlen(format_string); i++)
129 if (format_string[i] == '%')
130 num_args++;
131
132 nir_def **args = malloc(num_args * sizeof(nir_def *));
133 nir_def **strides = malloc(num_args * sizeof(nir_def *));
134
135 nir_def *ballot = nir_ballot(b, 1, 64, nir_imm_true(b));
136 nir_def *active_invocation_count = nir_bit_count(b, ballot);
137
138 for (uint32_t i = 0; i < num_args; i++) {
139 nir_def *arg = va_arg(arg_list, nir_def *);
140 bool divergent = arg->divergent;
141
142 if (arg->bit_size == 1)
143 arg = nir_b2i32(b, arg);
144
145 args[i] = arg;
146
147 uint32_t arg_size = arg->bit_size == 1 ? 32 : arg->bit_size / 8;
148 format.element_sizes[i] = arg_size;
149
150 if (divergent) {
151 strides[i] = nir_imul_imm(b, active_invocation_count, arg_size);
152 format.divergence_mask |= BITFIELD_BIT(i);
153 } else {
154 strides[i] = nir_imm_int(b, arg_size);
155 }
156
157 size = nir_iadd(b, size, strides[i]);
158 }
159
160 va_end(arg_list);
161
162 nir_def *offset;
163 nir_def *undef;
164
165 nir_push_if(b, nir_elect(b, 1));
166 {
167 offset = nir_global_atomic(
168 b, 32, nir_imm_int64(b, device->printf.buffer_addr + offsetof(struct radv_printf_buffer_header, offset)), size,
169 .atomic_op = nir_atomic_op_iadd);
170 }
171 nir_push_else(b, NULL);
172 {
173 undef = nir_undef(b, 1, 32);
174 }
175 nir_pop_if(b, NULL);
176
177 offset = nir_read_first_invocation(b, nir_if_phi(b, offset, undef));
178
179 nir_def *buffer_size = nir_load_global(
180 b, nir_imm_int64(b, device->printf.buffer_addr + offsetof(struct radv_printf_buffer_header, size)), 4, 1, 32);
181
182 nir_push_if(b, nir_ige(b, buffer_size, nir_iadd(b, offset, size)));
183 {
184 nir_def *addr = nir_iadd_imm(b, nir_u2u64(b, offset), device->printf.buffer_addr);
185
186 /* header */
187 nir_store_global(b, addr, 4, nir_ior_imm(b, active_invocation_count, format_index << 16), 1);
188 addr = nir_iadd_imm(b, addr, 4);
189
190 for (uint32_t i = 0; i < num_args; i++) {
191 nir_def *arg = args[i];
192
193 if (arg->divergent) {
194 nir_def *invocation_index = nir_mbcnt_amd(b, ballot, nir_imm_int(b, 0));
195 nir_store_global(
196 b, nir_iadd(b, addr, nir_u2u64(b, nir_imul_imm(b, invocation_index, format.element_sizes[i]))), 4, arg,
197 1);
198 } else {
199 nir_store_global(b, addr, 4, arg, 1);
200 }
201
202 addr = nir_iadd(b, addr, nir_u2u64(b, strides[i]));
203 }
204 }
205 nir_pop_if(b, NULL);
206
207 if (cond)
208 nir_pop_if(b, NULL);
209
210 if (b->shader->info.stage == MESA_SHADER_FRAGMENT)
211 nir_pop_if(b, NULL);
212
213 free(args);
214 free(strides);
215
216 util_dynarray_append(&device->printf.formats, struct radv_printf_format, format);
217 }
218
219 void
radv_dump_printf_data(struct radv_device * device,FILE * out)220 radv_dump_printf_data(struct radv_device *device, FILE *out)
221 {
222 if (!device->printf.data)
223 return;
224
225 device->vk.dispatch_table.DeviceWaitIdle(radv_device_to_handle(device));
226
227 struct radv_printf_buffer_header *header = device->printf.data;
228 uint8_t *data = device->printf.data;
229
230 for (uint32_t offset = sizeof(struct radv_printf_buffer_header); offset < header->offset;) {
231 uint32_t printf_header = *(uint32_t *)&data[offset];
232 offset += sizeof(uint32_t);
233
234 uint32_t format_index = printf_header >> 16;
235 struct radv_printf_format *printf_format =
236 util_dynarray_element(&device->printf.formats, struct radv_printf_format, format_index);
237
238 uint32_t invocation_count = printf_header & 0xFFFF;
239
240 uint32_t num_args = 0;
241 for (uint32_t i = 0; i < strlen(printf_format->string); i++)
242 if (printf_format->string[i] == '%')
243 num_args++;
244
245 char *format = printf_format->string;
246
247 for (uint32_t i = 0; i <= num_args; i++) {
248 size_t spec_pos = util_printf_next_spec_pos(format, 0);
249
250 if (spec_pos == -1) {
251 fprintf(out, "%s", format);
252 continue;
253 }
254
255 const char *token = util_printf_prev_tok(&format[spec_pos]);
256 char *next_format = &format[spec_pos + 1];
257
258 /* print the part before the format token */
259 if (token != format)
260 fwrite(format, token - format, 1, out);
261
262 char *print_str = strndup(token, next_format - token);
263 /* rebase spec_pos so we can use it with print_str */
264 spec_pos += format - token;
265
266 size_t element_size = printf_format->element_sizes[i];
267 bool is_float = strpbrk(print_str, "fFeEgGaA") != NULL;
268
269 uint32_t lane_count = (printf_format->divergence_mask & BITFIELD_BIT(i)) ? invocation_count : 1;
270 for (uint32_t lane = 0; lane < lane_count; lane++) {
271 switch (element_size) {
272 case 1: {
273 uint8_t v;
274 memcpy(&v, &data[offset], element_size);
275 fprintf(out, print_str, v);
276 break;
277 }
278 case 2: {
279 uint16_t v;
280 memcpy(&v, &data[offset], element_size);
281 fprintf(out, print_str, v);
282 break;
283 }
284 case 4: {
285 if (is_float) {
286 float v;
287 memcpy(&v, &data[offset], element_size);
288 fprintf(out, print_str, v);
289 } else {
290 uint32_t v;
291 memcpy(&v, &data[offset], element_size);
292 fprintf(out, print_str, v);
293 }
294 break;
295 }
296 case 8: {
297 if (is_float) {
298 double v;
299 memcpy(&v, &data[offset], element_size);
300 fprintf(out, print_str, v);
301 } else {
302 uint64_t v;
303 memcpy(&v, &data[offset], element_size);
304 fprintf(out, print_str, v);
305 }
306 break;
307 }
308 default:
309 unreachable("Unsupported data type");
310 }
311
312 if (lane != lane_count - 1)
313 fprintf(out, " ");
314
315 offset += element_size;
316 }
317
318 /* rebase format */
319 format = next_format;
320 free(print_str);
321 }
322 }
323
324 fflush(out);
325
326 header->offset = sizeof(struct radv_printf_buffer_header);
327 }
328
329 void
radv_device_associate_nir(struct radv_device * device,nir_shader * nir)330 radv_device_associate_nir(struct radv_device *device, nir_shader *nir)
331 {
332 if (!device->printf.buffer_addr)
333 return;
334
335 if (!device_ht)
336 device_ht = _mesa_pointer_hash_table_create(NULL);
337
338 _mesa_hash_table_insert(device_ht, nir, device);
339 }
340