• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "util/blob.h"
25 #include "util/hash_table.h"
26 #include "util/u_debug.h"
27 #include "util/disk_cache.h"
28 #include "util/mesa-sha1.h"
29 #include "nir/nir_serialize.h"
30 #include "anv_private.h"
31 #include "nir/nir_xfb_info.h"
32 #include "vk_util.h"
33 #include "compiler/spirv/nir_spirv.h"
34 #include "shaders/float64_spv.h"
35 #include "util/u_printf.h"
36 
37 /**
38  * Embedded sampler management.
39  */
40 
41 static unsigned
embedded_sampler_key_hash(const void * key)42 embedded_sampler_key_hash(const void *key)
43 {
44    return _mesa_hash_data(key, sizeof(struct anv_embedded_sampler_key));
45 }
46 
47 static bool
embedded_sampler_key_equal(const void * a,const void * b)48 embedded_sampler_key_equal(const void *a, const void *b)
49 {
50    return memcmp(a, b, sizeof(struct anv_embedded_sampler_key)) == 0;
51 }
52 
53 static void
anv_embedded_sampler_free(struct anv_device * device,struct anv_embedded_sampler * sampler)54 anv_embedded_sampler_free(struct anv_device *device,
55                           struct anv_embedded_sampler *sampler)
56 {
57    anv_state_pool_free(&device->dynamic_state_pool, sampler->sampler_state);
58    anv_state_pool_free(&device->dynamic_state_pool, sampler->border_color_state);
59    vk_free(&device->vk.alloc, sampler);
60 }
61 
62 static struct anv_embedded_sampler *
anv_embedded_sampler_ref(struct anv_embedded_sampler * sampler)63 anv_embedded_sampler_ref(struct anv_embedded_sampler *sampler)
64 {
65    sampler->ref_cnt++;
66    return sampler;
67 }
68 
69 static void
anv_embedded_sampler_unref(struct anv_device * device,struct anv_embedded_sampler * sampler)70 anv_embedded_sampler_unref(struct anv_device *device,
71                            struct anv_embedded_sampler *sampler)
72 {
73    simple_mtx_lock(&device->embedded_samplers.mutex);
74    if (--sampler->ref_cnt == 0) {
75       _mesa_hash_table_remove_key(device->embedded_samplers.map,
76                                   &sampler->key);
77       anv_embedded_sampler_free(device, sampler);
78    }
79    simple_mtx_unlock(&device->embedded_samplers.mutex);
80 }
81 
82 void
anv_device_init_embedded_samplers(struct anv_device * device)83 anv_device_init_embedded_samplers(struct anv_device *device)
84 {
85    simple_mtx_init(&device->embedded_samplers.mutex, mtx_plain);
86    device->embedded_samplers.map =
87       _mesa_hash_table_create(NULL,
88                               embedded_sampler_key_hash,
89                               embedded_sampler_key_equal);
90 }
91 
92 void
anv_device_finish_embedded_samplers(struct anv_device * device)93 anv_device_finish_embedded_samplers(struct anv_device *device)
94 {
95    hash_table_foreach(device->embedded_samplers.map, entry) {
96       anv_embedded_sampler_free(device, entry->data);
97    }
98    ralloc_free(device->embedded_samplers.map);
99    simple_mtx_destroy(&device->embedded_samplers.mutex);
100 }
101 
102 static VkResult
anv_shader_bin_get_embedded_samplers(struct anv_device * device,struct anv_shader_bin * shader,const struct anv_pipeline_bind_map * bind_map)103 anv_shader_bin_get_embedded_samplers(struct anv_device *device,
104                                      struct anv_shader_bin *shader,
105                                      const struct anv_pipeline_bind_map *bind_map)
106 {
107    VkResult result = VK_SUCCESS;
108 
109    simple_mtx_lock(&device->embedded_samplers.mutex);
110 
111    for (uint32_t i = 0; i < bind_map->embedded_sampler_count; i++) {
112       struct hash_entry *entry =
113          _mesa_hash_table_search(device->embedded_samplers.map,
114                                  &bind_map->embedded_sampler_to_binding[i].key);
115       if (entry == NULL) {
116          shader->embedded_samplers[i] =
117             vk_zalloc(&device->vk.alloc,
118                       sizeof(struct anv_embedded_sampler), 8,
119                       VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
120          if (shader->embedded_samplers[i] == NULL) {
121             result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
122             goto err;
123          }
124 
125          anv_genX(device->info, emit_embedded_sampler)(
126             device, shader->embedded_samplers[i],
127             &bind_map->embedded_sampler_to_binding[i]);
128          _mesa_hash_table_insert(device->embedded_samplers.map,
129                                  &shader->embedded_samplers[i]->key,
130                                  shader->embedded_samplers[i]);
131       } else {
132          shader->embedded_samplers[i] = anv_embedded_sampler_ref(entry->data);
133       }
134    }
135 
136  err:
137    simple_mtx_unlock(&device->embedded_samplers.mutex);
138    return result;
139 }
140 
141 /**
142  *
143  */
144 
145 static bool
146 anv_shader_bin_serialize(struct vk_pipeline_cache_object *object,
147                          struct blob *blob);
148 
149 struct vk_pipeline_cache_object *
150 anv_shader_bin_deserialize(struct vk_pipeline_cache *cache,
151                            const void *key_data, size_t key_size,
152                            struct blob_reader *blob);
153 
154 static void
anv_shader_bin_destroy(struct vk_device * _device,struct vk_pipeline_cache_object * object)155 anv_shader_bin_destroy(struct vk_device *_device,
156                        struct vk_pipeline_cache_object *object)
157 {
158    struct anv_device *device =
159       container_of(_device, struct anv_device, vk);
160 
161    struct anv_shader_bin *shader =
162       container_of(object, struct anv_shader_bin, base);
163 
164    for (uint32_t i = 0; i < shader->bind_map.embedded_sampler_count; i++)
165       anv_embedded_sampler_unref(device, shader->embedded_samplers[i]);
166 
167    anv_state_pool_free(&device->instruction_state_pool, shader->kernel);
168    vk_pipeline_cache_object_finish(&shader->base);
169    vk_free(&device->vk.alloc, shader);
170 }
171 
172 static const struct vk_pipeline_cache_object_ops anv_shader_bin_ops = {
173    .serialize = anv_shader_bin_serialize,
174    .deserialize = anv_shader_bin_deserialize,
175    .destroy = anv_shader_bin_destroy,
176 };
177 
178 const struct vk_pipeline_cache_object_ops *const anv_cache_import_ops[2] = {
179    &anv_shader_bin_ops,
180    NULL
181 };
182 
183 static void
anv_shader_bin_rewrite_embedded_samplers(struct anv_device * device,struct anv_shader_bin * shader,const struct anv_pipeline_bind_map * bind_map,const struct brw_stage_prog_data * prog_data_in)184 anv_shader_bin_rewrite_embedded_samplers(struct anv_device *device,
185                                          struct anv_shader_bin *shader,
186                                          const struct anv_pipeline_bind_map *bind_map,
187                                          const struct brw_stage_prog_data *prog_data_in)
188 {
189    int rv_count = 0;
190    struct brw_shader_reloc_value reloc_values[BRW_MAX_EMBEDDED_SAMPLERS];
191 
192    for (uint32_t i = 0; i < bind_map->embedded_sampler_count; i++) {
193       reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
194          .id = BRW_SHADER_RELOC_EMBEDDED_SAMPLER_HANDLE + i,
195          .value = shader->embedded_samplers[i]->sampler_state.offset,
196       };
197    }
198 
199    brw_write_shader_relocs(&device->physical->compiler->isa,
200                            shader->kernel.map, prog_data_in,
201                            reloc_values, rv_count);
202 }
203 
204 static uint32_t
brw_stage_prog_data_printf_num_args(const struct brw_stage_prog_data * prog_data)205 brw_stage_prog_data_printf_num_args(const struct brw_stage_prog_data *prog_data)
206 {
207    uint32_t count = 0;
208    for (unsigned i = 0; i < prog_data->printf_info_count; i++)
209       count += prog_data->printf_info[i].num_args;
210    return count;
211 }
212 
213 static uint32_t
brw_stage_prog_data_printf_string_size(const struct brw_stage_prog_data * prog_data)214 brw_stage_prog_data_printf_string_size(const struct brw_stage_prog_data *prog_data)
215 {
216    uint32_t size = 0;
217    for (unsigned i = 0; i < prog_data->printf_info_count; i++)
218       size += prog_data->printf_info[i].string_size;
219    return size;
220 }
221 
222 static void
copy_uprintf(u_printf_info * out_infos,unsigned * out_arg_sizes,char * out_strings,const struct brw_stage_prog_data * prog_data)223 copy_uprintf(u_printf_info *out_infos,
224              unsigned *out_arg_sizes,
225              char  *out_strings,
226              const struct brw_stage_prog_data *prog_data)
227 {
228    for (unsigned i = 0; i < prog_data->printf_info_count; i++) {
229       out_infos[i] = prog_data->printf_info[i];
230       out_infos[i].arg_sizes = out_arg_sizes;
231       memcpy(out_infos[i].arg_sizes,
232              prog_data->printf_info[i].arg_sizes,
233              sizeof(out_infos[i].arg_sizes[0]) * prog_data->printf_info[i].num_args);
234       out_infos[i].strings = out_strings;
235       memcpy(out_infos[i].strings,
236              prog_data->printf_info[i].strings,
237              prog_data->printf_info[i].string_size);
238 
239       out_arg_sizes += prog_data->printf_info[i].num_args;
240       out_strings += prog_data->printf_info[i].string_size;
241    }
242 }
243 
244 static struct anv_shader_bin *
anv_shader_bin_create(struct anv_device * device,gl_shader_stage stage,const void * key_data,uint32_t key_size,const void * kernel_data,uint32_t kernel_size,const struct brw_stage_prog_data * prog_data_in,uint32_t prog_data_size,const struct brw_compile_stats * stats,uint32_t num_stats,const nir_xfb_info * xfb_info_in,const struct anv_pipeline_bind_map * bind_map,const struct anv_push_descriptor_info * push_desc_info,enum anv_dynamic_push_bits dynamic_push_values)245 anv_shader_bin_create(struct anv_device *device,
246                       gl_shader_stage stage,
247                       const void *key_data, uint32_t key_size,
248                       const void *kernel_data, uint32_t kernel_size,
249                       const struct brw_stage_prog_data *prog_data_in,
250                       uint32_t prog_data_size,
251                       const struct brw_compile_stats *stats, uint32_t num_stats,
252                       const nir_xfb_info *xfb_info_in,
253                       const struct anv_pipeline_bind_map *bind_map,
254                       const struct anv_push_descriptor_info *push_desc_info,
255                       enum anv_dynamic_push_bits dynamic_push_values)
256 {
257    VK_MULTIALLOC(ma);
258    VK_MULTIALLOC_DECL(&ma, struct anv_shader_bin, shader, 1);
259    VK_MULTIALLOC_DECL_SIZE(&ma, void, obj_key_data, key_size);
260    VK_MULTIALLOC_DECL_SIZE(&ma, struct brw_stage_prog_data, prog_data,
261                                 prog_data_size);
262    VK_MULTIALLOC_DECL(&ma, struct brw_shader_reloc, prog_data_relocs,
263                            prog_data_in->num_relocs);
264    VK_MULTIALLOC_DECL(&ma, uint32_t, prog_data_param, prog_data_in->nr_params);
265 
266    VK_MULTIALLOC_DECL_SIZE(&ma, nir_xfb_info, xfb_info,
267                                 xfb_info_in == NULL ? 0 :
268                                 nir_xfb_info_size(xfb_info_in->output_count));
269 
270    VK_MULTIALLOC_DECL(&ma, struct anv_pipeline_binding, surface_to_descriptor,
271                            bind_map->surface_count);
272    VK_MULTIALLOC_DECL(&ma, struct anv_pipeline_binding, sampler_to_descriptor,
273                       bind_map->sampler_count);
274    VK_MULTIALLOC_DECL(&ma, struct anv_pipeline_embedded_sampler_binding,
275                       embedded_sampler_to_binding,
276                       bind_map->embedded_sampler_count);
277    VK_MULTIALLOC_DECL(&ma, struct brw_kernel_arg_desc, kernel_args,
278                       bind_map->kernel_arg_count);
279    VK_MULTIALLOC_DECL(&ma, struct anv_embedded_sampler *, embedded_samplers,
280                       bind_map->embedded_sampler_count);
281    VK_MULTIALLOC_DECL(&ma, u_printf_info, printf_infos,
282                       INTEL_DEBUG(DEBUG_SHADER_PRINT) ?
283                       prog_data_in->printf_info_count : 0);
284    VK_MULTIALLOC_DECL(&ma, unsigned, arg_sizes,
285                       INTEL_DEBUG(DEBUG_SHADER_PRINT) ?
286                       brw_stage_prog_data_printf_num_args(prog_data_in) : 0);
287    VK_MULTIALLOC_DECL(&ma, char, strings,
288                       INTEL_DEBUG(DEBUG_SHADER_PRINT) ?
289                       brw_stage_prog_data_printf_string_size(prog_data_in) : 0);
290 
291    if (!vk_multialloc_zalloc(&ma, &device->vk.alloc,
292                              VK_SYSTEM_ALLOCATION_SCOPE_DEVICE))
293       return NULL;
294 
295    memcpy(obj_key_data, key_data, key_size);
296    vk_pipeline_cache_object_init(&device->vk, &shader->base,
297                                  &anv_shader_bin_ops, obj_key_data, key_size);
298 
299    shader->stage = stage;
300 
301    shader->kernel =
302       anv_state_pool_alloc(&device->instruction_state_pool, kernel_size, 64);
303    memcpy(shader->kernel.map, kernel_data, kernel_size);
304    shader->kernel_size = kernel_size;
305 
306    if (bind_map->embedded_sampler_count > 0) {
307       shader->embedded_samplers = embedded_samplers;
308       if (anv_shader_bin_get_embedded_samplers(device, shader, bind_map) != VK_SUCCESS) {
309          anv_state_pool_free(&device->instruction_state_pool, shader->kernel);
310          vk_free(&device->vk.alloc, shader);
311          return NULL;
312       }
313    }
314 
315    uint64_t shader_data_addr =
316       device->physical->va.instruction_state_pool.addr +
317       shader->kernel.offset +
318       prog_data_in->const_data_offset;
319 
320    int rv_count = 0;
321    struct brw_shader_reloc_value reloc_values[10];
322    assert((device->physical->va.dynamic_visible_pool.addr & 0xffffffff) == 0);
323    reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
324       .id = BRW_SHADER_RELOC_DESCRIPTORS_BUFFER_ADDR_HIGH,
325       .value = device->physical->va.dynamic_visible_pool.addr >> 32,
326    };
327    assert((device->physical->va.indirect_descriptor_pool.addr & 0xffffffff) == 0);
328    assert((device->physical->va.internal_surface_state_pool.addr & 0xffffffff) == 0);
329    reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
330       .id = BRW_SHADER_RELOC_DESCRIPTORS_ADDR_HIGH,
331       .value = device->physical->indirect_descriptors ?
332                (device->physical->va.indirect_descriptor_pool.addr >> 32) :
333                (device->physical->va.internal_surface_state_pool.addr >> 32),
334    };
335    assert((device->physical->va.instruction_state_pool.addr & 0xffffffff) == 0);
336    reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
337       .id = BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW,
338       .value = shader_data_addr,
339    };
340    assert((device->physical->va.instruction_state_pool.addr & 0xffffffff) == 0);
341    assert(shader_data_addr >> 32 == device->physical->va.instruction_state_pool.addr >> 32);
342    reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
343       .id = BRW_SHADER_RELOC_CONST_DATA_ADDR_HIGH,
344       .value = device->physical->va.instruction_state_pool.addr >> 32,
345    };
346    reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
347       .id = BRW_SHADER_RELOC_SHADER_START_OFFSET,
348       .value = shader->kernel.offset,
349    };
350    if (brw_shader_stage_is_bindless(stage)) {
351       const struct brw_bs_prog_data *bs_prog_data =
352          brw_bs_prog_data_const(prog_data_in);
353       uint64_t resume_sbt_addr =
354          device->physical->va.instruction_state_pool.addr +
355          shader->kernel.offset +
356          bs_prog_data->resume_sbt_offset;
357       reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
358          .id = BRW_SHADER_RELOC_RESUME_SBT_ADDR_LOW,
359          .value = resume_sbt_addr,
360       };
361       reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
362          .id = BRW_SHADER_RELOC_RESUME_SBT_ADDR_HIGH,
363          .value = resume_sbt_addr >> 32,
364       };
365    }
366 
367    if (INTEL_DEBUG(DEBUG_SHADER_PRINT) && prog_data_in->printf_info_count > 0) {
368       assert(device->printf.bo != NULL);
369 
370       copy_uprintf(printf_infos, arg_sizes, strings, prog_data_in);
371 
372       simple_mtx_lock(&device->printf.mutex);
373 
374       uint32_t base_printf_idx =
375          util_dynarray_num_elements(&device->printf.prints, u_printf_info*);
376       for (uint32_t i = 0; i < prog_data_in->printf_info_count; i++) {
377          util_dynarray_append(&device->printf.prints, u_printf_info *,
378                               &printf_infos[i]);
379       }
380 
381       simple_mtx_unlock(&device->printf.mutex);
382 
383       /* u_printf expects the string IDs to start at 1. */
384       reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
385          .id = BRW_SHADER_RELOC_PRINTF_BASE_IDENTIFIER,
386          .value = base_printf_idx,
387       };
388       reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
389          .id = BRW_SHADER_RELOC_PRINTF_BUFFER_ADDR_LOW,
390          .value = device->printf.bo->offset & 0xffffffff,
391       };
392       reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
393          .id = BRW_SHADER_RELOC_PRINTF_BUFFER_ADDR_HIGH,
394          .value = device->printf.bo->offset >> 32,
395       };
396       reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
397          .id = BRW_SHADER_RELOC_PRINTF_BUFFER_SIZE,
398          .value = anv_printf_buffer_size(),
399       };
400    } else if (prog_data_in->printf_info_count > 0) {
401       unreachable("shader with printf intrinsics requires INTEL_DEBUG=shader-print");
402    }
403 
404    brw_write_shader_relocs(&device->physical->compiler->isa,
405                            shader->kernel.map, prog_data_in,
406                            reloc_values, rv_count);
407 
408    anv_shader_bin_rewrite_embedded_samplers(device, shader, bind_map, prog_data_in);
409 
410    memcpy(prog_data, prog_data_in, prog_data_size);
411    typed_memcpy(prog_data_relocs, prog_data_in->relocs,
412                 prog_data_in->num_relocs);
413    prog_data->relocs = prog_data_relocs;
414    prog_data->param = prog_data_param;
415    prog_data->printf_info = printf_infos;
416    shader->prog_data = prog_data;
417    shader->prog_data_size = prog_data_size;
418 
419    assert(num_stats <= ARRAY_SIZE(shader->stats));
420    assert((stats != NULL) || (num_stats == 0));
421    typed_memcpy(shader->stats, stats, num_stats);
422    shader->num_stats = num_stats;
423 
424    if (xfb_info_in) {
425       *xfb_info = *xfb_info_in;
426       typed_memcpy(xfb_info->outputs, xfb_info_in->outputs,
427                    xfb_info_in->output_count);
428       shader->xfb_info = xfb_info;
429    } else {
430       shader->xfb_info = NULL;
431    }
432 
433    shader->dynamic_push_values = dynamic_push_values;
434 
435    typed_memcpy(&shader->push_desc_info, push_desc_info, 1);
436 
437    shader->bind_map = *bind_map;
438 
439    typed_memcpy(surface_to_descriptor, bind_map->surface_to_descriptor,
440                 bind_map->surface_count);
441    shader->bind_map.surface_to_descriptor = surface_to_descriptor;
442 
443    typed_memcpy(sampler_to_descriptor, bind_map->sampler_to_descriptor,
444                 bind_map->sampler_count);
445    shader->bind_map.sampler_to_descriptor = sampler_to_descriptor;
446 
447    typed_memcpy(embedded_sampler_to_binding, bind_map->embedded_sampler_to_binding,
448                 bind_map->embedded_sampler_count);
449    shader->bind_map.embedded_sampler_to_binding = embedded_sampler_to_binding;
450 
451    typed_memcpy(kernel_args, bind_map->kernel_args,
452                 bind_map->kernel_arg_count);
453    shader->bind_map.kernel_args = kernel_args;
454 
455    return shader;
456 }
457 
458 static bool
anv_shader_bin_serialize(struct vk_pipeline_cache_object * object,struct blob * blob)459 anv_shader_bin_serialize(struct vk_pipeline_cache_object *object,
460                          struct blob *blob)
461 {
462    struct anv_shader_bin *shader =
463       container_of(object, struct anv_shader_bin, base);
464 
465    blob_write_uint32(blob, shader->stage);
466 
467    blob_write_uint32(blob, shader->kernel_size);
468    blob_write_bytes(blob, shader->kernel.map, shader->kernel_size);
469 
470    blob_write_uint32(blob, shader->prog_data_size);
471 
472    union brw_any_prog_data prog_data;
473    assert(shader->prog_data_size <= sizeof(prog_data));
474    memcpy(&prog_data, shader->prog_data, shader->prog_data_size);
475    prog_data.base.relocs = NULL;
476    prog_data.base.param = NULL;
477    blob_write_bytes(blob, &prog_data, shader->prog_data_size);
478 
479    blob_write_bytes(blob, shader->prog_data->relocs,
480                     shader->prog_data->num_relocs *
481                     sizeof(shader->prog_data->relocs[0]));
482    u_printf_serialize_info(blob, shader->prog_data->printf_info,
483                            shader->prog_data->printf_info_count);
484 
485    blob_write_uint32(blob, shader->num_stats);
486    blob_write_bytes(blob, shader->stats,
487                     shader->num_stats * sizeof(shader->stats[0]));
488 
489    if (shader->xfb_info) {
490       uint32_t xfb_info_size =
491          nir_xfb_info_size(shader->xfb_info->output_count);
492       blob_write_uint32(blob, xfb_info_size);
493       blob_write_bytes(blob, shader->xfb_info, xfb_info_size);
494    } else {
495       blob_write_uint32(blob, 0);
496    }
497 
498    blob_write_uint32(blob, shader->dynamic_push_values);
499 
500    blob_write_uint32(blob, shader->push_desc_info.used_descriptors);
501    blob_write_uint32(blob, shader->push_desc_info.fully_promoted_ubo_descriptors);
502    blob_write_uint8(blob, shader->push_desc_info.used_set_buffer);
503 
504    blob_write_bytes(blob, shader->bind_map.surface_sha1,
505                     sizeof(shader->bind_map.surface_sha1));
506    blob_write_bytes(blob, shader->bind_map.sampler_sha1,
507                     sizeof(shader->bind_map.sampler_sha1));
508    blob_write_bytes(blob, shader->bind_map.push_sha1,
509                     sizeof(shader->bind_map.push_sha1));
510    blob_write_uint32(blob, shader->bind_map.surface_count);
511    blob_write_uint32(blob, shader->bind_map.sampler_count);
512    blob_write_uint32(blob, shader->bind_map.embedded_sampler_count);
513    if (shader->stage == MESA_SHADER_KERNEL) {
514       uint32_t packed = (uint32_t)shader->bind_map.kernel_args_size << 16 |
515                         (uint32_t)shader->bind_map.kernel_arg_count;
516       blob_write_uint32(blob, packed);
517    }
518    blob_write_bytes(blob, shader->bind_map.surface_to_descriptor,
519                     shader->bind_map.surface_count *
520                     sizeof(*shader->bind_map.surface_to_descriptor));
521    blob_write_bytes(blob, shader->bind_map.sampler_to_descriptor,
522                     shader->bind_map.sampler_count *
523                     sizeof(*shader->bind_map.sampler_to_descriptor));
524    blob_write_bytes(blob, shader->bind_map.embedded_sampler_to_binding,
525                     shader->bind_map.embedded_sampler_count *
526                     sizeof(*shader->bind_map.embedded_sampler_to_binding));
527    blob_write_bytes(blob, shader->bind_map.kernel_args,
528                     shader->bind_map.kernel_arg_count *
529                     sizeof(*shader->bind_map.kernel_args));
530    blob_write_bytes(blob, shader->bind_map.push_ranges,
531                     sizeof(shader->bind_map.push_ranges));
532 
533    return !blob->out_of_memory;
534 }
535 
536 struct vk_pipeline_cache_object *
anv_shader_bin_deserialize(struct vk_pipeline_cache * cache,const void * key_data,size_t key_size,struct blob_reader * blob)537 anv_shader_bin_deserialize(struct vk_pipeline_cache *cache,
538                            const void *key_data, size_t key_size,
539                            struct blob_reader *blob)
540 {
541    struct anv_device *device =
542       container_of(cache->base.device, struct anv_device, vk);
543 
544    gl_shader_stage stage = blob_read_uint32(blob);
545 
546    uint32_t kernel_size = blob_read_uint32(blob);
547    const void *kernel_data = blob_read_bytes(blob, kernel_size);
548 
549    uint32_t prog_data_size = blob_read_uint32(blob);
550    const void *prog_data_bytes = blob_read_bytes(blob, prog_data_size);
551    if (blob->overrun)
552       return NULL;
553 
554    union brw_any_prog_data prog_data;
555    memcpy(&prog_data, prog_data_bytes,
556           MIN2(sizeof(prog_data), prog_data_size));
557    prog_data.base.relocs =
558       blob_read_bytes(blob, prog_data.base.num_relocs *
559                             sizeof(prog_data.base.relocs[0]));
560 
561    void *mem_ctx = ralloc_context(NULL);
562    prog_data.base.printf_info =
563       u_printf_deserialize_info(mem_ctx, blob,
564                                 &prog_data.base.printf_info_count);
565 
566    uint32_t num_stats = blob_read_uint32(blob);
567    const struct brw_compile_stats *stats =
568       blob_read_bytes(blob, num_stats * sizeof(stats[0]));
569 
570    const nir_xfb_info *xfb_info = NULL;
571    uint32_t xfb_size = blob_read_uint32(blob);
572    if (xfb_size)
573       xfb_info = blob_read_bytes(blob, xfb_size);
574 
575    enum anv_dynamic_push_bits dynamic_push_values = blob_read_uint32(blob);
576 
577    struct anv_push_descriptor_info push_desc_info = {};
578    push_desc_info.used_descriptors = blob_read_uint32(blob);
579    push_desc_info.fully_promoted_ubo_descriptors = blob_read_uint32(blob);
580    push_desc_info.used_set_buffer = blob_read_uint8(blob);
581 
582    struct anv_pipeline_bind_map bind_map = {};
583    blob_copy_bytes(blob, bind_map.surface_sha1, sizeof(bind_map.surface_sha1));
584    blob_copy_bytes(blob, bind_map.sampler_sha1, sizeof(bind_map.sampler_sha1));
585    blob_copy_bytes(blob, bind_map.push_sha1, sizeof(bind_map.push_sha1));
586    bind_map.surface_count = blob_read_uint32(blob);
587    bind_map.sampler_count = blob_read_uint32(blob);
588    bind_map.embedded_sampler_count = blob_read_uint32(blob);
589    if (stage == MESA_SHADER_KERNEL) {
590       uint32_t packed = blob_read_uint32(blob);
591       bind_map.kernel_args_size = (uint16_t)(packed >> 16);
592       bind_map.kernel_arg_count = (uint16_t)packed;
593    }
594    bind_map.surface_to_descriptor = (void *)
595       blob_read_bytes(blob, bind_map.surface_count *
596                             sizeof(*bind_map.surface_to_descriptor));
597    bind_map.sampler_to_descriptor = (void *)
598       blob_read_bytes(blob, bind_map.sampler_count *
599                             sizeof(*bind_map.sampler_to_descriptor));
600    bind_map.embedded_sampler_to_binding = (void *)
601       blob_read_bytes(blob, bind_map.embedded_sampler_count *
602                             sizeof(*bind_map.embedded_sampler_to_binding));
603    bind_map.kernel_args = (void *)
604       blob_read_bytes(blob, bind_map.kernel_arg_count *
605                             sizeof(*bind_map.kernel_args));
606    blob_copy_bytes(blob, bind_map.push_ranges, sizeof(bind_map.push_ranges));
607 
608    if (blob->overrun) {
609       ralloc_free(mem_ctx);
610       return NULL;
611    }
612 
613    struct anv_shader_bin *shader =
614       anv_shader_bin_create(device, stage,
615                             key_data, key_size,
616                             kernel_data, kernel_size,
617                             &prog_data.base, prog_data_size,
618                             stats, num_stats, xfb_info, &bind_map,
619                             &push_desc_info,
620                             dynamic_push_values);
621 
622    ralloc_free(mem_ctx);
623 
624    if (shader == NULL)
625       return NULL;
626 
627    return &shader->base;
628 }
629 
630 struct anv_shader_bin *
anv_device_search_for_kernel(struct anv_device * device,struct vk_pipeline_cache * cache,const void * key_data,uint32_t key_size,bool * user_cache_hit)631 anv_device_search_for_kernel(struct anv_device *device,
632                              struct vk_pipeline_cache *cache,
633                              const void *key_data, uint32_t key_size,
634                              bool *user_cache_hit)
635 {
636    /* Use the default pipeline cache if none is specified */
637    if (cache == NULL)
638       cache = device->vk.mem_cache;
639 
640    bool cache_hit = false;
641    struct vk_pipeline_cache_object *object =
642       vk_pipeline_cache_lookup_object(cache, key_data, key_size,
643                                       &anv_shader_bin_ops, &cache_hit);
644    if (user_cache_hit != NULL) {
645       *user_cache_hit = object != NULL && cache_hit &&
646                         cache != device->vk.mem_cache;
647    }
648 
649    if (object == NULL)
650       return NULL;
651 
652    return container_of(object, struct anv_shader_bin, base);
653 }
654 
655 struct anv_shader_bin *
anv_device_upload_kernel(struct anv_device * device,struct vk_pipeline_cache * cache,const struct anv_shader_upload_params * params)656 anv_device_upload_kernel(struct anv_device *device,
657                          struct vk_pipeline_cache *cache,
658                          const struct anv_shader_upload_params *params)
659 {
660    /* Use the default pipeline cache if none is specified */
661    if (cache == NULL)
662       cache = device->vk.mem_cache;
663 
664    struct anv_shader_bin *shader =
665       anv_shader_bin_create(device,
666                             params->stage,
667                             params->key_data,
668                             params->key_size,
669                             params->kernel_data,
670                             params->kernel_size,
671                             params->prog_data,
672                             params->prog_data_size,
673                             params->stats,
674                             params->num_stats,
675                             params->xfb_info,
676                             params->bind_map,
677                             params->push_desc_info,
678                             params->dynamic_push_values);
679    if (shader == NULL)
680       return NULL;
681 
682    struct vk_pipeline_cache_object *cached =
683       vk_pipeline_cache_add_object(cache, &shader->base);
684 
685    return container_of(cached, struct anv_shader_bin, base);
686 }
687 
688 #define SHA1_KEY_SIZE 20
689 
690 struct nir_shader *
anv_device_search_for_nir(struct anv_device * device,struct vk_pipeline_cache * cache,const nir_shader_compiler_options * nir_options,unsigned char sha1_key[SHA1_KEY_SIZE],void * mem_ctx)691 anv_device_search_for_nir(struct anv_device *device,
692                           struct vk_pipeline_cache *cache,
693                           const nir_shader_compiler_options *nir_options,
694                           unsigned char sha1_key[SHA1_KEY_SIZE],
695                           void *mem_ctx)
696 {
697    if (cache == NULL)
698       cache = device->vk.mem_cache;
699 
700    return vk_pipeline_cache_lookup_nir(cache, sha1_key, SHA1_KEY_SIZE,
701                                        nir_options, NULL, mem_ctx);
702 }
703 
704 void
anv_device_upload_nir(struct anv_device * device,struct vk_pipeline_cache * cache,const struct nir_shader * nir,unsigned char sha1_key[SHA1_KEY_SIZE])705 anv_device_upload_nir(struct anv_device *device,
706                       struct vk_pipeline_cache *cache,
707                       const struct nir_shader *nir,
708                       unsigned char sha1_key[SHA1_KEY_SIZE])
709 {
710    if (cache == NULL)
711       cache = device->vk.mem_cache;
712 
713    vk_pipeline_cache_add_nir(cache, sha1_key, SHA1_KEY_SIZE, nir);
714 }
715 
716 void
anv_load_fp64_shader(struct anv_device * device)717 anv_load_fp64_shader(struct anv_device *device)
718 {
719    const nir_shader_compiler_options *nir_options =
720       device->physical->compiler->nir_options[MESA_SHADER_VERTEX];
721 
722    const char* shader_name = "float64_spv_lib";
723    struct mesa_sha1 sha1_ctx;
724    uint8_t sha1[20];
725    _mesa_sha1_init(&sha1_ctx);
726    _mesa_sha1_update(&sha1_ctx, shader_name, strlen(shader_name));
727    _mesa_sha1_final(&sha1_ctx, sha1);
728 
729    device->fp64_nir =
730       anv_device_search_for_nir(device, device->internal_cache,
731                                    nir_options, sha1, NULL);
732 
733    /* The shader found, no need to call spirv_to_nir() again. */
734    if (device->fp64_nir)
735       return;
736 
737    const struct spirv_capabilities spirv_caps = {
738       .Addresses = true,
739       .Float64 = true,
740       .Int8 = true,
741       .Int16 = true,
742       .Int64 = true,
743    };
744 
745    struct spirv_to_nir_options spirv_options = {
746       .capabilities = &spirv_caps,
747       .environment = NIR_SPIRV_VULKAN,
748       .create_library = true
749    };
750 
751    nir_shader* nir =
752       spirv_to_nir(float64_spv_source, sizeof(float64_spv_source) / 4,
753                    NULL, 0, MESA_SHADER_VERTEX, "main",
754                    &spirv_options, nir_options);
755 
756    assert(nir != NULL);
757 
758    nir_validate_shader(nir, "after spirv_to_nir");
759    nir_validate_ssa_dominance(nir, "after spirv_to_nir");
760 
761    NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
762    NIR_PASS_V(nir, nir_lower_returns);
763    NIR_PASS_V(nir, nir_inline_functions);
764    NIR_PASS_V(nir, nir_opt_deref);
765 
766    NIR_PASS_V(nir, nir_lower_vars_to_ssa);
767    NIR_PASS_V(nir, nir_copy_prop);
768    NIR_PASS_V(nir, nir_opt_dce);
769    NIR_PASS_V(nir, nir_opt_cse);
770    NIR_PASS_V(nir, nir_opt_gcm, true);
771    NIR_PASS_V(nir, nir_opt_peephole_select, 1, false, false);
772    NIR_PASS_V(nir, nir_opt_dce);
773 
774    NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_function_temp,
775               nir_address_format_62bit_generic);
776 
777    anv_device_upload_nir(device, device->internal_cache,
778                          nir, sha1);
779 
780    device->fp64_nir = nir;
781 }
782