1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "util/blob.h"
25 #include "util/hash_table.h"
26 #include "util/u_debug.h"
27 #include "util/disk_cache.h"
28 #include "util/mesa-sha1.h"
29 #include "nir/nir_serialize.h"
30 #include "anv_private.h"
31 #include "nir/nir_xfb_info.h"
32 #include "vk_util.h"
33 #include "compiler/spirv/nir_spirv.h"
34 #include "shaders/float64_spv.h"
35 #include "util/u_printf.h"
36
37 /**
38 * Embedded sampler management.
39 */
40
41 static unsigned
embedded_sampler_key_hash(const void * key)42 embedded_sampler_key_hash(const void *key)
43 {
44 return _mesa_hash_data(key, sizeof(struct anv_embedded_sampler_key));
45 }
46
47 static bool
embedded_sampler_key_equal(const void * a,const void * b)48 embedded_sampler_key_equal(const void *a, const void *b)
49 {
50 return memcmp(a, b, sizeof(struct anv_embedded_sampler_key)) == 0;
51 }
52
53 static void
anv_embedded_sampler_free(struct anv_device * device,struct anv_embedded_sampler * sampler)54 anv_embedded_sampler_free(struct anv_device *device,
55 struct anv_embedded_sampler *sampler)
56 {
57 anv_state_pool_free(&device->dynamic_state_pool, sampler->sampler_state);
58 anv_state_pool_free(&device->dynamic_state_pool, sampler->border_color_state);
59 vk_free(&device->vk.alloc, sampler);
60 }
61
62 static struct anv_embedded_sampler *
anv_embedded_sampler_ref(struct anv_embedded_sampler * sampler)63 anv_embedded_sampler_ref(struct anv_embedded_sampler *sampler)
64 {
65 sampler->ref_cnt++;
66 return sampler;
67 }
68
69 static void
anv_embedded_sampler_unref(struct anv_device * device,struct anv_embedded_sampler * sampler)70 anv_embedded_sampler_unref(struct anv_device *device,
71 struct anv_embedded_sampler *sampler)
72 {
73 simple_mtx_lock(&device->embedded_samplers.mutex);
74 if (--sampler->ref_cnt == 0) {
75 _mesa_hash_table_remove_key(device->embedded_samplers.map,
76 &sampler->key);
77 anv_embedded_sampler_free(device, sampler);
78 }
79 simple_mtx_unlock(&device->embedded_samplers.mutex);
80 }
81
82 void
anv_device_init_embedded_samplers(struct anv_device * device)83 anv_device_init_embedded_samplers(struct anv_device *device)
84 {
85 simple_mtx_init(&device->embedded_samplers.mutex, mtx_plain);
86 device->embedded_samplers.map =
87 _mesa_hash_table_create(NULL,
88 embedded_sampler_key_hash,
89 embedded_sampler_key_equal);
90 }
91
92 void
anv_device_finish_embedded_samplers(struct anv_device * device)93 anv_device_finish_embedded_samplers(struct anv_device *device)
94 {
95 hash_table_foreach(device->embedded_samplers.map, entry) {
96 anv_embedded_sampler_free(device, entry->data);
97 }
98 ralloc_free(device->embedded_samplers.map);
99 simple_mtx_destroy(&device->embedded_samplers.mutex);
100 }
101
102 static VkResult
anv_shader_bin_get_embedded_samplers(struct anv_device * device,struct anv_shader_bin * shader,const struct anv_pipeline_bind_map * bind_map)103 anv_shader_bin_get_embedded_samplers(struct anv_device *device,
104 struct anv_shader_bin *shader,
105 const struct anv_pipeline_bind_map *bind_map)
106 {
107 VkResult result = VK_SUCCESS;
108
109 simple_mtx_lock(&device->embedded_samplers.mutex);
110
111 for (uint32_t i = 0; i < bind_map->embedded_sampler_count; i++) {
112 struct hash_entry *entry =
113 _mesa_hash_table_search(device->embedded_samplers.map,
114 &bind_map->embedded_sampler_to_binding[i].key);
115 if (entry == NULL) {
116 shader->embedded_samplers[i] =
117 vk_zalloc(&device->vk.alloc,
118 sizeof(struct anv_embedded_sampler), 8,
119 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
120 if (shader->embedded_samplers[i] == NULL) {
121 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
122 goto err;
123 }
124
125 anv_genX(device->info, emit_embedded_sampler)(
126 device, shader->embedded_samplers[i],
127 &bind_map->embedded_sampler_to_binding[i]);
128 _mesa_hash_table_insert(device->embedded_samplers.map,
129 &shader->embedded_samplers[i]->key,
130 shader->embedded_samplers[i]);
131 } else {
132 shader->embedded_samplers[i] = anv_embedded_sampler_ref(entry->data);
133 }
134 }
135
136 err:
137 simple_mtx_unlock(&device->embedded_samplers.mutex);
138 return result;
139 }
140
141 /**
142 *
143 */
144
145 static bool
146 anv_shader_bin_serialize(struct vk_pipeline_cache_object *object,
147 struct blob *blob);
148
149 struct vk_pipeline_cache_object *
150 anv_shader_bin_deserialize(struct vk_pipeline_cache *cache,
151 const void *key_data, size_t key_size,
152 struct blob_reader *blob);
153
154 static void
anv_shader_bin_destroy(struct vk_device * _device,struct vk_pipeline_cache_object * object)155 anv_shader_bin_destroy(struct vk_device *_device,
156 struct vk_pipeline_cache_object *object)
157 {
158 struct anv_device *device =
159 container_of(_device, struct anv_device, vk);
160
161 struct anv_shader_bin *shader =
162 container_of(object, struct anv_shader_bin, base);
163
164 for (uint32_t i = 0; i < shader->bind_map.embedded_sampler_count; i++)
165 anv_embedded_sampler_unref(device, shader->embedded_samplers[i]);
166
167 anv_state_pool_free(&device->instruction_state_pool, shader->kernel);
168 vk_pipeline_cache_object_finish(&shader->base);
169 vk_free(&device->vk.alloc, shader);
170 }
171
172 static const struct vk_pipeline_cache_object_ops anv_shader_bin_ops = {
173 .serialize = anv_shader_bin_serialize,
174 .deserialize = anv_shader_bin_deserialize,
175 .destroy = anv_shader_bin_destroy,
176 };
177
178 const struct vk_pipeline_cache_object_ops *const anv_cache_import_ops[2] = {
179 &anv_shader_bin_ops,
180 NULL
181 };
182
183 static void
anv_shader_bin_rewrite_embedded_samplers(struct anv_device * device,struct anv_shader_bin * shader,const struct anv_pipeline_bind_map * bind_map,const struct brw_stage_prog_data * prog_data_in)184 anv_shader_bin_rewrite_embedded_samplers(struct anv_device *device,
185 struct anv_shader_bin *shader,
186 const struct anv_pipeline_bind_map *bind_map,
187 const struct brw_stage_prog_data *prog_data_in)
188 {
189 int rv_count = 0;
190 struct brw_shader_reloc_value reloc_values[BRW_MAX_EMBEDDED_SAMPLERS];
191
192 for (uint32_t i = 0; i < bind_map->embedded_sampler_count; i++) {
193 reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
194 .id = BRW_SHADER_RELOC_EMBEDDED_SAMPLER_HANDLE + i,
195 .value = shader->embedded_samplers[i]->sampler_state.offset,
196 };
197 }
198
199 brw_write_shader_relocs(&device->physical->compiler->isa,
200 shader->kernel.map, prog_data_in,
201 reloc_values, rv_count);
202 }
203
204 static uint32_t
brw_stage_prog_data_printf_num_args(const struct brw_stage_prog_data * prog_data)205 brw_stage_prog_data_printf_num_args(const struct brw_stage_prog_data *prog_data)
206 {
207 uint32_t count = 0;
208 for (unsigned i = 0; i < prog_data->printf_info_count; i++)
209 count += prog_data->printf_info[i].num_args;
210 return count;
211 }
212
213 static uint32_t
brw_stage_prog_data_printf_string_size(const struct brw_stage_prog_data * prog_data)214 brw_stage_prog_data_printf_string_size(const struct brw_stage_prog_data *prog_data)
215 {
216 uint32_t size = 0;
217 for (unsigned i = 0; i < prog_data->printf_info_count; i++)
218 size += prog_data->printf_info[i].string_size;
219 return size;
220 }
221
222 static void
copy_uprintf(u_printf_info * out_infos,unsigned * out_arg_sizes,char * out_strings,const struct brw_stage_prog_data * prog_data)223 copy_uprintf(u_printf_info *out_infos,
224 unsigned *out_arg_sizes,
225 char *out_strings,
226 const struct brw_stage_prog_data *prog_data)
227 {
228 for (unsigned i = 0; i < prog_data->printf_info_count; i++) {
229 out_infos[i] = prog_data->printf_info[i];
230 out_infos[i].arg_sizes = out_arg_sizes;
231 memcpy(out_infos[i].arg_sizes,
232 prog_data->printf_info[i].arg_sizes,
233 sizeof(out_infos[i].arg_sizes[0]) * prog_data->printf_info[i].num_args);
234 out_infos[i].strings = out_strings;
235 memcpy(out_infos[i].strings,
236 prog_data->printf_info[i].strings,
237 prog_data->printf_info[i].string_size);
238
239 out_arg_sizes += prog_data->printf_info[i].num_args;
240 out_strings += prog_data->printf_info[i].string_size;
241 }
242 }
243
244 static struct anv_shader_bin *
anv_shader_bin_create(struct anv_device * device,gl_shader_stage stage,const void * key_data,uint32_t key_size,const void * kernel_data,uint32_t kernel_size,const struct brw_stage_prog_data * prog_data_in,uint32_t prog_data_size,const struct brw_compile_stats * stats,uint32_t num_stats,const nir_xfb_info * xfb_info_in,const struct anv_pipeline_bind_map * bind_map,const struct anv_push_descriptor_info * push_desc_info,enum anv_dynamic_push_bits dynamic_push_values)245 anv_shader_bin_create(struct anv_device *device,
246 gl_shader_stage stage,
247 const void *key_data, uint32_t key_size,
248 const void *kernel_data, uint32_t kernel_size,
249 const struct brw_stage_prog_data *prog_data_in,
250 uint32_t prog_data_size,
251 const struct brw_compile_stats *stats, uint32_t num_stats,
252 const nir_xfb_info *xfb_info_in,
253 const struct anv_pipeline_bind_map *bind_map,
254 const struct anv_push_descriptor_info *push_desc_info,
255 enum anv_dynamic_push_bits dynamic_push_values)
256 {
257 VK_MULTIALLOC(ma);
258 VK_MULTIALLOC_DECL(&ma, struct anv_shader_bin, shader, 1);
259 VK_MULTIALLOC_DECL_SIZE(&ma, void, obj_key_data, key_size);
260 VK_MULTIALLOC_DECL_SIZE(&ma, struct brw_stage_prog_data, prog_data,
261 prog_data_size);
262 VK_MULTIALLOC_DECL(&ma, struct brw_shader_reloc, prog_data_relocs,
263 prog_data_in->num_relocs);
264 VK_MULTIALLOC_DECL(&ma, uint32_t, prog_data_param, prog_data_in->nr_params);
265
266 VK_MULTIALLOC_DECL_SIZE(&ma, nir_xfb_info, xfb_info,
267 xfb_info_in == NULL ? 0 :
268 nir_xfb_info_size(xfb_info_in->output_count));
269
270 VK_MULTIALLOC_DECL(&ma, struct anv_pipeline_binding, surface_to_descriptor,
271 bind_map->surface_count);
272 VK_MULTIALLOC_DECL(&ma, struct anv_pipeline_binding, sampler_to_descriptor,
273 bind_map->sampler_count);
274 VK_MULTIALLOC_DECL(&ma, struct anv_pipeline_embedded_sampler_binding,
275 embedded_sampler_to_binding,
276 bind_map->embedded_sampler_count);
277 VK_MULTIALLOC_DECL(&ma, struct brw_kernel_arg_desc, kernel_args,
278 bind_map->kernel_arg_count);
279 VK_MULTIALLOC_DECL(&ma, struct anv_embedded_sampler *, embedded_samplers,
280 bind_map->embedded_sampler_count);
281 VK_MULTIALLOC_DECL(&ma, u_printf_info, printf_infos,
282 INTEL_DEBUG(DEBUG_SHADER_PRINT) ?
283 prog_data_in->printf_info_count : 0);
284 VK_MULTIALLOC_DECL(&ma, unsigned, arg_sizes,
285 INTEL_DEBUG(DEBUG_SHADER_PRINT) ?
286 brw_stage_prog_data_printf_num_args(prog_data_in) : 0);
287 VK_MULTIALLOC_DECL(&ma, char, strings,
288 INTEL_DEBUG(DEBUG_SHADER_PRINT) ?
289 brw_stage_prog_data_printf_string_size(prog_data_in) : 0);
290
291 if (!vk_multialloc_zalloc(&ma, &device->vk.alloc,
292 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE))
293 return NULL;
294
295 memcpy(obj_key_data, key_data, key_size);
296 vk_pipeline_cache_object_init(&device->vk, &shader->base,
297 &anv_shader_bin_ops, obj_key_data, key_size);
298
299 shader->stage = stage;
300
301 shader->kernel =
302 anv_state_pool_alloc(&device->instruction_state_pool, kernel_size, 64);
303 memcpy(shader->kernel.map, kernel_data, kernel_size);
304 shader->kernel_size = kernel_size;
305
306 if (bind_map->embedded_sampler_count > 0) {
307 shader->embedded_samplers = embedded_samplers;
308 if (anv_shader_bin_get_embedded_samplers(device, shader, bind_map) != VK_SUCCESS) {
309 anv_state_pool_free(&device->instruction_state_pool, shader->kernel);
310 vk_free(&device->vk.alloc, shader);
311 return NULL;
312 }
313 }
314
315 uint64_t shader_data_addr =
316 device->physical->va.instruction_state_pool.addr +
317 shader->kernel.offset +
318 prog_data_in->const_data_offset;
319
320 int rv_count = 0;
321 struct brw_shader_reloc_value reloc_values[10];
322 assert((device->physical->va.dynamic_visible_pool.addr & 0xffffffff) == 0);
323 reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
324 .id = BRW_SHADER_RELOC_DESCRIPTORS_BUFFER_ADDR_HIGH,
325 .value = device->physical->va.dynamic_visible_pool.addr >> 32,
326 };
327 assert((device->physical->va.indirect_descriptor_pool.addr & 0xffffffff) == 0);
328 assert((device->physical->va.internal_surface_state_pool.addr & 0xffffffff) == 0);
329 reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
330 .id = BRW_SHADER_RELOC_DESCRIPTORS_ADDR_HIGH,
331 .value = device->physical->indirect_descriptors ?
332 (device->physical->va.indirect_descriptor_pool.addr >> 32) :
333 (device->physical->va.internal_surface_state_pool.addr >> 32),
334 };
335 assert((device->physical->va.instruction_state_pool.addr & 0xffffffff) == 0);
336 reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
337 .id = BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW,
338 .value = shader_data_addr,
339 };
340 assert((device->physical->va.instruction_state_pool.addr & 0xffffffff) == 0);
341 assert(shader_data_addr >> 32 == device->physical->va.instruction_state_pool.addr >> 32);
342 reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
343 .id = BRW_SHADER_RELOC_CONST_DATA_ADDR_HIGH,
344 .value = device->physical->va.instruction_state_pool.addr >> 32,
345 };
346 reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
347 .id = BRW_SHADER_RELOC_SHADER_START_OFFSET,
348 .value = shader->kernel.offset,
349 };
350 if (brw_shader_stage_is_bindless(stage)) {
351 const struct brw_bs_prog_data *bs_prog_data =
352 brw_bs_prog_data_const(prog_data_in);
353 uint64_t resume_sbt_addr =
354 device->physical->va.instruction_state_pool.addr +
355 shader->kernel.offset +
356 bs_prog_data->resume_sbt_offset;
357 reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
358 .id = BRW_SHADER_RELOC_RESUME_SBT_ADDR_LOW,
359 .value = resume_sbt_addr,
360 };
361 reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
362 .id = BRW_SHADER_RELOC_RESUME_SBT_ADDR_HIGH,
363 .value = resume_sbt_addr >> 32,
364 };
365 }
366
367 if (INTEL_DEBUG(DEBUG_SHADER_PRINT) && prog_data_in->printf_info_count > 0) {
368 assert(device->printf.bo != NULL);
369
370 copy_uprintf(printf_infos, arg_sizes, strings, prog_data_in);
371
372 simple_mtx_lock(&device->printf.mutex);
373
374 uint32_t base_printf_idx =
375 util_dynarray_num_elements(&device->printf.prints, u_printf_info*);
376 for (uint32_t i = 0; i < prog_data_in->printf_info_count; i++) {
377 util_dynarray_append(&device->printf.prints, u_printf_info *,
378 &printf_infos[i]);
379 }
380
381 simple_mtx_unlock(&device->printf.mutex);
382
383 /* u_printf expects the string IDs to start at 1. */
384 reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
385 .id = BRW_SHADER_RELOC_PRINTF_BASE_IDENTIFIER,
386 .value = base_printf_idx,
387 };
388 reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
389 .id = BRW_SHADER_RELOC_PRINTF_BUFFER_ADDR_LOW,
390 .value = device->printf.bo->offset & 0xffffffff,
391 };
392 reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
393 .id = BRW_SHADER_RELOC_PRINTF_BUFFER_ADDR_HIGH,
394 .value = device->printf.bo->offset >> 32,
395 };
396 reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
397 .id = BRW_SHADER_RELOC_PRINTF_BUFFER_SIZE,
398 .value = anv_printf_buffer_size(),
399 };
400 } else if (prog_data_in->printf_info_count > 0) {
401 unreachable("shader with printf intrinsics requires INTEL_DEBUG=shader-print");
402 }
403
404 brw_write_shader_relocs(&device->physical->compiler->isa,
405 shader->kernel.map, prog_data_in,
406 reloc_values, rv_count);
407
408 anv_shader_bin_rewrite_embedded_samplers(device, shader, bind_map, prog_data_in);
409
410 memcpy(prog_data, prog_data_in, prog_data_size);
411 typed_memcpy(prog_data_relocs, prog_data_in->relocs,
412 prog_data_in->num_relocs);
413 prog_data->relocs = prog_data_relocs;
414 prog_data->param = prog_data_param;
415 prog_data->printf_info = printf_infos;
416 shader->prog_data = prog_data;
417 shader->prog_data_size = prog_data_size;
418
419 assert(num_stats <= ARRAY_SIZE(shader->stats));
420 assert((stats != NULL) || (num_stats == 0));
421 typed_memcpy(shader->stats, stats, num_stats);
422 shader->num_stats = num_stats;
423
424 if (xfb_info_in) {
425 *xfb_info = *xfb_info_in;
426 typed_memcpy(xfb_info->outputs, xfb_info_in->outputs,
427 xfb_info_in->output_count);
428 shader->xfb_info = xfb_info;
429 } else {
430 shader->xfb_info = NULL;
431 }
432
433 shader->dynamic_push_values = dynamic_push_values;
434
435 typed_memcpy(&shader->push_desc_info, push_desc_info, 1);
436
437 shader->bind_map = *bind_map;
438
439 typed_memcpy(surface_to_descriptor, bind_map->surface_to_descriptor,
440 bind_map->surface_count);
441 shader->bind_map.surface_to_descriptor = surface_to_descriptor;
442
443 typed_memcpy(sampler_to_descriptor, bind_map->sampler_to_descriptor,
444 bind_map->sampler_count);
445 shader->bind_map.sampler_to_descriptor = sampler_to_descriptor;
446
447 typed_memcpy(embedded_sampler_to_binding, bind_map->embedded_sampler_to_binding,
448 bind_map->embedded_sampler_count);
449 shader->bind_map.embedded_sampler_to_binding = embedded_sampler_to_binding;
450
451 typed_memcpy(kernel_args, bind_map->kernel_args,
452 bind_map->kernel_arg_count);
453 shader->bind_map.kernel_args = kernel_args;
454
455 return shader;
456 }
457
458 static bool
anv_shader_bin_serialize(struct vk_pipeline_cache_object * object,struct blob * blob)459 anv_shader_bin_serialize(struct vk_pipeline_cache_object *object,
460 struct blob *blob)
461 {
462 struct anv_shader_bin *shader =
463 container_of(object, struct anv_shader_bin, base);
464
465 blob_write_uint32(blob, shader->stage);
466
467 blob_write_uint32(blob, shader->kernel_size);
468 blob_write_bytes(blob, shader->kernel.map, shader->kernel_size);
469
470 blob_write_uint32(blob, shader->prog_data_size);
471
472 union brw_any_prog_data prog_data;
473 assert(shader->prog_data_size <= sizeof(prog_data));
474 memcpy(&prog_data, shader->prog_data, shader->prog_data_size);
475 prog_data.base.relocs = NULL;
476 prog_data.base.param = NULL;
477 blob_write_bytes(blob, &prog_data, shader->prog_data_size);
478
479 blob_write_bytes(blob, shader->prog_data->relocs,
480 shader->prog_data->num_relocs *
481 sizeof(shader->prog_data->relocs[0]));
482 u_printf_serialize_info(blob, shader->prog_data->printf_info,
483 shader->prog_data->printf_info_count);
484
485 blob_write_uint32(blob, shader->num_stats);
486 blob_write_bytes(blob, shader->stats,
487 shader->num_stats * sizeof(shader->stats[0]));
488
489 if (shader->xfb_info) {
490 uint32_t xfb_info_size =
491 nir_xfb_info_size(shader->xfb_info->output_count);
492 blob_write_uint32(blob, xfb_info_size);
493 blob_write_bytes(blob, shader->xfb_info, xfb_info_size);
494 } else {
495 blob_write_uint32(blob, 0);
496 }
497
498 blob_write_uint32(blob, shader->dynamic_push_values);
499
500 blob_write_uint32(blob, shader->push_desc_info.used_descriptors);
501 blob_write_uint32(blob, shader->push_desc_info.fully_promoted_ubo_descriptors);
502 blob_write_uint8(blob, shader->push_desc_info.used_set_buffer);
503
504 blob_write_bytes(blob, shader->bind_map.surface_sha1,
505 sizeof(shader->bind_map.surface_sha1));
506 blob_write_bytes(blob, shader->bind_map.sampler_sha1,
507 sizeof(shader->bind_map.sampler_sha1));
508 blob_write_bytes(blob, shader->bind_map.push_sha1,
509 sizeof(shader->bind_map.push_sha1));
510 blob_write_uint32(blob, shader->bind_map.surface_count);
511 blob_write_uint32(blob, shader->bind_map.sampler_count);
512 blob_write_uint32(blob, shader->bind_map.embedded_sampler_count);
513 if (shader->stage == MESA_SHADER_KERNEL) {
514 uint32_t packed = (uint32_t)shader->bind_map.kernel_args_size << 16 |
515 (uint32_t)shader->bind_map.kernel_arg_count;
516 blob_write_uint32(blob, packed);
517 }
518 blob_write_bytes(blob, shader->bind_map.surface_to_descriptor,
519 shader->bind_map.surface_count *
520 sizeof(*shader->bind_map.surface_to_descriptor));
521 blob_write_bytes(blob, shader->bind_map.sampler_to_descriptor,
522 shader->bind_map.sampler_count *
523 sizeof(*shader->bind_map.sampler_to_descriptor));
524 blob_write_bytes(blob, shader->bind_map.embedded_sampler_to_binding,
525 shader->bind_map.embedded_sampler_count *
526 sizeof(*shader->bind_map.embedded_sampler_to_binding));
527 blob_write_bytes(blob, shader->bind_map.kernel_args,
528 shader->bind_map.kernel_arg_count *
529 sizeof(*shader->bind_map.kernel_args));
530 blob_write_bytes(blob, shader->bind_map.push_ranges,
531 sizeof(shader->bind_map.push_ranges));
532
533 return !blob->out_of_memory;
534 }
535
536 struct vk_pipeline_cache_object *
anv_shader_bin_deserialize(struct vk_pipeline_cache * cache,const void * key_data,size_t key_size,struct blob_reader * blob)537 anv_shader_bin_deserialize(struct vk_pipeline_cache *cache,
538 const void *key_data, size_t key_size,
539 struct blob_reader *blob)
540 {
541 struct anv_device *device =
542 container_of(cache->base.device, struct anv_device, vk);
543
544 gl_shader_stage stage = blob_read_uint32(blob);
545
546 uint32_t kernel_size = blob_read_uint32(blob);
547 const void *kernel_data = blob_read_bytes(blob, kernel_size);
548
549 uint32_t prog_data_size = blob_read_uint32(blob);
550 const void *prog_data_bytes = blob_read_bytes(blob, prog_data_size);
551 if (blob->overrun)
552 return NULL;
553
554 union brw_any_prog_data prog_data;
555 memcpy(&prog_data, prog_data_bytes,
556 MIN2(sizeof(prog_data), prog_data_size));
557 prog_data.base.relocs =
558 blob_read_bytes(blob, prog_data.base.num_relocs *
559 sizeof(prog_data.base.relocs[0]));
560
561 void *mem_ctx = ralloc_context(NULL);
562 prog_data.base.printf_info =
563 u_printf_deserialize_info(mem_ctx, blob,
564 &prog_data.base.printf_info_count);
565
566 uint32_t num_stats = blob_read_uint32(blob);
567 const struct brw_compile_stats *stats =
568 blob_read_bytes(blob, num_stats * sizeof(stats[0]));
569
570 const nir_xfb_info *xfb_info = NULL;
571 uint32_t xfb_size = blob_read_uint32(blob);
572 if (xfb_size)
573 xfb_info = blob_read_bytes(blob, xfb_size);
574
575 enum anv_dynamic_push_bits dynamic_push_values = blob_read_uint32(blob);
576
577 struct anv_push_descriptor_info push_desc_info = {};
578 push_desc_info.used_descriptors = blob_read_uint32(blob);
579 push_desc_info.fully_promoted_ubo_descriptors = blob_read_uint32(blob);
580 push_desc_info.used_set_buffer = blob_read_uint8(blob);
581
582 struct anv_pipeline_bind_map bind_map = {};
583 blob_copy_bytes(blob, bind_map.surface_sha1, sizeof(bind_map.surface_sha1));
584 blob_copy_bytes(blob, bind_map.sampler_sha1, sizeof(bind_map.sampler_sha1));
585 blob_copy_bytes(blob, bind_map.push_sha1, sizeof(bind_map.push_sha1));
586 bind_map.surface_count = blob_read_uint32(blob);
587 bind_map.sampler_count = blob_read_uint32(blob);
588 bind_map.embedded_sampler_count = blob_read_uint32(blob);
589 if (stage == MESA_SHADER_KERNEL) {
590 uint32_t packed = blob_read_uint32(blob);
591 bind_map.kernel_args_size = (uint16_t)(packed >> 16);
592 bind_map.kernel_arg_count = (uint16_t)packed;
593 }
594 bind_map.surface_to_descriptor = (void *)
595 blob_read_bytes(blob, bind_map.surface_count *
596 sizeof(*bind_map.surface_to_descriptor));
597 bind_map.sampler_to_descriptor = (void *)
598 blob_read_bytes(blob, bind_map.sampler_count *
599 sizeof(*bind_map.sampler_to_descriptor));
600 bind_map.embedded_sampler_to_binding = (void *)
601 blob_read_bytes(blob, bind_map.embedded_sampler_count *
602 sizeof(*bind_map.embedded_sampler_to_binding));
603 bind_map.kernel_args = (void *)
604 blob_read_bytes(blob, bind_map.kernel_arg_count *
605 sizeof(*bind_map.kernel_args));
606 blob_copy_bytes(blob, bind_map.push_ranges, sizeof(bind_map.push_ranges));
607
608 if (blob->overrun) {
609 ralloc_free(mem_ctx);
610 return NULL;
611 }
612
613 struct anv_shader_bin *shader =
614 anv_shader_bin_create(device, stage,
615 key_data, key_size,
616 kernel_data, kernel_size,
617 &prog_data.base, prog_data_size,
618 stats, num_stats, xfb_info, &bind_map,
619 &push_desc_info,
620 dynamic_push_values);
621
622 ralloc_free(mem_ctx);
623
624 if (shader == NULL)
625 return NULL;
626
627 return &shader->base;
628 }
629
630 struct anv_shader_bin *
anv_device_search_for_kernel(struct anv_device * device,struct vk_pipeline_cache * cache,const void * key_data,uint32_t key_size,bool * user_cache_hit)631 anv_device_search_for_kernel(struct anv_device *device,
632 struct vk_pipeline_cache *cache,
633 const void *key_data, uint32_t key_size,
634 bool *user_cache_hit)
635 {
636 /* Use the default pipeline cache if none is specified */
637 if (cache == NULL)
638 cache = device->vk.mem_cache;
639
640 bool cache_hit = false;
641 struct vk_pipeline_cache_object *object =
642 vk_pipeline_cache_lookup_object(cache, key_data, key_size,
643 &anv_shader_bin_ops, &cache_hit);
644 if (user_cache_hit != NULL) {
645 *user_cache_hit = object != NULL && cache_hit &&
646 cache != device->vk.mem_cache;
647 }
648
649 if (object == NULL)
650 return NULL;
651
652 return container_of(object, struct anv_shader_bin, base);
653 }
654
655 struct anv_shader_bin *
anv_device_upload_kernel(struct anv_device * device,struct vk_pipeline_cache * cache,const struct anv_shader_upload_params * params)656 anv_device_upload_kernel(struct anv_device *device,
657 struct vk_pipeline_cache *cache,
658 const struct anv_shader_upload_params *params)
659 {
660 /* Use the default pipeline cache if none is specified */
661 if (cache == NULL)
662 cache = device->vk.mem_cache;
663
664 struct anv_shader_bin *shader =
665 anv_shader_bin_create(device,
666 params->stage,
667 params->key_data,
668 params->key_size,
669 params->kernel_data,
670 params->kernel_size,
671 params->prog_data,
672 params->prog_data_size,
673 params->stats,
674 params->num_stats,
675 params->xfb_info,
676 params->bind_map,
677 params->push_desc_info,
678 params->dynamic_push_values);
679 if (shader == NULL)
680 return NULL;
681
682 struct vk_pipeline_cache_object *cached =
683 vk_pipeline_cache_add_object(cache, &shader->base);
684
685 return container_of(cached, struct anv_shader_bin, base);
686 }
687
688 #define SHA1_KEY_SIZE 20
689
690 struct nir_shader *
anv_device_search_for_nir(struct anv_device * device,struct vk_pipeline_cache * cache,const nir_shader_compiler_options * nir_options,unsigned char sha1_key[SHA1_KEY_SIZE],void * mem_ctx)691 anv_device_search_for_nir(struct anv_device *device,
692 struct vk_pipeline_cache *cache,
693 const nir_shader_compiler_options *nir_options,
694 unsigned char sha1_key[SHA1_KEY_SIZE],
695 void *mem_ctx)
696 {
697 if (cache == NULL)
698 cache = device->vk.mem_cache;
699
700 return vk_pipeline_cache_lookup_nir(cache, sha1_key, SHA1_KEY_SIZE,
701 nir_options, NULL, mem_ctx);
702 }
703
704 void
anv_device_upload_nir(struct anv_device * device,struct vk_pipeline_cache * cache,const struct nir_shader * nir,unsigned char sha1_key[SHA1_KEY_SIZE])705 anv_device_upload_nir(struct anv_device *device,
706 struct vk_pipeline_cache *cache,
707 const struct nir_shader *nir,
708 unsigned char sha1_key[SHA1_KEY_SIZE])
709 {
710 if (cache == NULL)
711 cache = device->vk.mem_cache;
712
713 vk_pipeline_cache_add_nir(cache, sha1_key, SHA1_KEY_SIZE, nir);
714 }
715
716 void
anv_load_fp64_shader(struct anv_device * device)717 anv_load_fp64_shader(struct anv_device *device)
718 {
719 const nir_shader_compiler_options *nir_options =
720 device->physical->compiler->nir_options[MESA_SHADER_VERTEX];
721
722 const char* shader_name = "float64_spv_lib";
723 struct mesa_sha1 sha1_ctx;
724 uint8_t sha1[20];
725 _mesa_sha1_init(&sha1_ctx);
726 _mesa_sha1_update(&sha1_ctx, shader_name, strlen(shader_name));
727 _mesa_sha1_final(&sha1_ctx, sha1);
728
729 device->fp64_nir =
730 anv_device_search_for_nir(device, device->internal_cache,
731 nir_options, sha1, NULL);
732
733 /* The shader found, no need to call spirv_to_nir() again. */
734 if (device->fp64_nir)
735 return;
736
737 const struct spirv_capabilities spirv_caps = {
738 .Addresses = true,
739 .Float64 = true,
740 .Int8 = true,
741 .Int16 = true,
742 .Int64 = true,
743 };
744
745 struct spirv_to_nir_options spirv_options = {
746 .capabilities = &spirv_caps,
747 .environment = NIR_SPIRV_VULKAN,
748 .create_library = true
749 };
750
751 nir_shader* nir =
752 spirv_to_nir(float64_spv_source, sizeof(float64_spv_source) / 4,
753 NULL, 0, MESA_SHADER_VERTEX, "main",
754 &spirv_options, nir_options);
755
756 assert(nir != NULL);
757
758 nir_validate_shader(nir, "after spirv_to_nir");
759 nir_validate_ssa_dominance(nir, "after spirv_to_nir");
760
761 NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
762 NIR_PASS_V(nir, nir_lower_returns);
763 NIR_PASS_V(nir, nir_inline_functions);
764 NIR_PASS_V(nir, nir_opt_deref);
765
766 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
767 NIR_PASS_V(nir, nir_copy_prop);
768 NIR_PASS_V(nir, nir_opt_dce);
769 NIR_PASS_V(nir, nir_opt_cse);
770 NIR_PASS_V(nir, nir_opt_gcm, true);
771 NIR_PASS_V(nir, nir_opt_peephole_select, 1, false, false);
772 NIR_PASS_V(nir, nir_opt_dce);
773
774 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_function_temp,
775 nir_address_format_62bit_generic);
776
777 anv_device_upload_nir(device, device->internal_cache,
778 nir, sha1);
779
780 device->fp64_nir = nir;
781 }
782