• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2019 Raspberry Pi
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "v3dv_private.h"
25 #include "vulkan/util/vk_util.h"
26 #include "util/blob.h"
27 #include "nir/nir_serialize.h"
28 
29 static const bool dump_stats = false;
30 static const bool dump_stats_verbose = false;
31 
32 /* Shared for nir/variants */
33 #define V3DV_MAX_PIPELINE_CACHE_ENTRIES 4096
34 
35 static uint32_t
sha1_hash_func(const void * sha1)36 sha1_hash_func(const void *sha1)
37 {
38    return _mesa_hash_data(sha1, 20);
39 }
40 
41 static bool
sha1_compare_func(const void * sha1_a,const void * sha1_b)42 sha1_compare_func(const void *sha1_a, const void *sha1_b)
43 {
44    return memcmp(sha1_a, sha1_b, 20) == 0;
45 }
46 
47 struct serialized_nir {
48    unsigned char sha1_key[20];
49    size_t size;
50    char data[0];
51 };
52 
53 static void
cache_dump_stats(struct v3dv_pipeline_cache * cache)54 cache_dump_stats(struct v3dv_pipeline_cache *cache)
55 {
56    if (!dump_stats_verbose)
57       return;
58 
59    fprintf(stderr, "  NIR cache entries:      %d\n", cache->nir_stats.count);
60    fprintf(stderr, "  NIR cache miss count:   %d\n", cache->nir_stats.miss);
61    fprintf(stderr, "  NIR cache hit  count:   %d\n", cache->nir_stats.hit);
62 
63    fprintf(stderr, "  variant cache entries:      %d\n", cache->variant_stats.count);
64    fprintf(stderr, "  variant cache miss count:   %d\n", cache->variant_stats.miss);
65    fprintf(stderr, "  variant cache hit  count:   %d\n", cache->variant_stats.hit);
66 }
67 
68 void
v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,nir_shader * nir,unsigned char sha1_key[20])69 v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline,
70                                struct v3dv_pipeline_cache *cache,
71                                nir_shader *nir,
72                                unsigned char sha1_key[20])
73 {
74    if (!cache || !cache->nir_cache)
75       return;
76 
77    if (cache->nir_stats.count > V3DV_MAX_PIPELINE_CACHE_ENTRIES)
78       return;
79 
80    pthread_mutex_lock(&cache->mutex);
81    struct hash_entry *entry =
82       _mesa_hash_table_search(cache->nir_cache, sha1_key);
83    pthread_mutex_unlock(&cache->mutex);
84    if (entry)
85       return;
86 
87    struct blob blob;
88    blob_init(&blob);
89 
90    nir_serialize(&blob, nir, false);
91    if (blob.out_of_memory) {
92       blob_finish(&blob);
93       return;
94    }
95 
96    pthread_mutex_lock(&cache->mutex);
97    /* Because ralloc isn't thread-safe, we have to do all this inside the
98     * lock.  We could unlock for the big memcpy but it's probably not worth
99     * the hassle.
100     */
101    entry = _mesa_hash_table_search(cache->nir_cache, sha1_key);
102    if (entry) {
103       blob_finish(&blob);
104       pthread_mutex_unlock(&cache->mutex);
105       return;
106    }
107 
108    struct serialized_nir *snir =
109       ralloc_size(cache->nir_cache, sizeof(*snir) + blob.size);
110    memcpy(snir->sha1_key, sha1_key, 20);
111    snir->size = blob.size;
112    memcpy(snir->data, blob.data, blob.size);
113 
114    blob_finish(&blob);
115 
116    cache->nir_stats.count++;
117    if (unlikely(dump_stats)) {
118       char sha1buf[41];
119       _mesa_sha1_format(sha1buf, snir->sha1_key);
120       fprintf(stderr, "pipeline cache %p, new nir entry %s\n", cache, sha1buf);
121       cache_dump_stats(cache);
122    }
123 
124    _mesa_hash_table_insert(cache->nir_cache, snir->sha1_key, snir);
125 
126    pthread_mutex_unlock(&cache->mutex);
127 }
128 
129 nir_shader*
v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,const nir_shader_compiler_options * nir_options,unsigned char sha1_key[20])130 v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline,
131                                    struct v3dv_pipeline_cache *cache,
132                                    const nir_shader_compiler_options *nir_options,
133                                    unsigned char sha1_key[20])
134 {
135    if (!cache || !cache->nir_cache)
136       return NULL;
137 
138    if (unlikely(dump_stats)) {
139       char sha1buf[41];
140       _mesa_sha1_format(sha1buf, sha1_key);
141 
142       fprintf(stderr, "pipeline cache %p, search for nir %s\n", cache, sha1buf);
143    }
144 
145    const struct serialized_nir *snir = NULL;
146 
147    pthread_mutex_lock(&cache->mutex);
148    struct hash_entry *entry =
149       _mesa_hash_table_search(cache->nir_cache, sha1_key);
150    if (entry)
151       snir = entry->data;
152    pthread_mutex_unlock(&cache->mutex);
153 
154    if (snir) {
155       struct blob_reader blob;
156       blob_reader_init(&blob, snir->data, snir->size);
157 
158       /* We use context NULL as we want the p_stage to keep the reference to
159        * nir, as we keep open the possibility of provide a shader variant
160        * after cache creation
161        */
162       nir_shader *nir = nir_deserialize(NULL, nir_options, &blob);
163       if (blob.overrun) {
164          ralloc_free(nir);
165       } else {
166          cache->nir_stats.hit++;
167          cache_dump_stats(cache);
168          return nir;
169       }
170    }
171 
172    cache->nir_stats.miss++;
173    cache_dump_stats(cache);
174 
175    return NULL;
176 }
177 
178 void
v3dv_pipeline_cache_init(struct v3dv_pipeline_cache * cache,struct v3dv_device * device,bool cache_enabled)179 v3dv_pipeline_cache_init(struct v3dv_pipeline_cache *cache,
180                          struct v3dv_device *device,
181                          bool cache_enabled)
182 {
183    cache->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
184 
185    cache->device = device;
186    pthread_mutex_init(&cache->mutex, NULL);
187 
188    if (cache_enabled) {
189       cache->nir_cache = _mesa_hash_table_create(NULL, sha1_hash_func,
190                                                  sha1_compare_func);
191       cache->nir_stats.miss = 0;
192       cache->nir_stats.hit = 0;
193       cache->nir_stats.count = 0;
194 
195       cache->variant_cache = _mesa_hash_table_create(NULL, sha1_hash_func,
196                                                      sha1_compare_func);
197       cache->variant_stats.miss = 0;
198       cache->variant_stats.hit = 0;
199       cache->variant_stats.count = 0;
200    } else {
201       cache->nir_cache = NULL;
202       cache->variant_cache = NULL;
203    }
204 
205 }
206 
207 struct v3dv_shader_variant*
v3dv_pipeline_cache_search_for_variant(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,unsigned char sha1_key[20])208 v3dv_pipeline_cache_search_for_variant(struct v3dv_pipeline *pipeline,
209                                        struct v3dv_pipeline_cache *cache,
210                                        unsigned char sha1_key[20])
211 {
212    if (!cache || !cache->variant_cache)
213       return NULL;
214 
215    if (unlikely(dump_stats)) {
216       char sha1buf[41];
217       _mesa_sha1_format(sha1buf, sha1_key);
218 
219       fprintf(stderr, "pipeline cache %p, search variant with key %s\n", cache, sha1buf);
220    }
221 
222    pthread_mutex_lock(&cache->mutex);
223 
224    struct hash_entry *entry =
225       _mesa_hash_table_search(cache->variant_cache, sha1_key);
226 
227    if (entry) {
228       struct v3dv_shader_variant *variant =
229          (struct v3dv_shader_variant *) entry->data;
230 
231       cache->variant_stats.hit++;
232       if (unlikely(dump_stats)) {
233          fprintf(stderr, "\tcache hit: %p\n", variant);
234          cache_dump_stats(cache);
235       }
236 
237       if (variant)
238          v3dv_shader_variant_ref(variant);
239 
240       pthread_mutex_unlock(&cache->mutex);
241       return variant;
242    }
243 
244    cache->variant_stats.miss++;
245    if (unlikely(dump_stats)) {
246       fprintf(stderr, "\tcache miss\n");
247       cache_dump_stats(cache);
248    }
249 
250    pthread_mutex_unlock(&cache->mutex);
251    return NULL;
252 }
253 
254 void
v3dv_pipeline_cache_upload_variant(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,struct v3dv_shader_variant * variant)255 v3dv_pipeline_cache_upload_variant(struct v3dv_pipeline *pipeline,
256                                    struct v3dv_pipeline_cache *cache,
257                                    struct v3dv_shader_variant  *variant)
258 {
259    if (!cache || !cache->variant_cache)
260       return;
261 
262    if (cache->variant_stats.count > V3DV_MAX_PIPELINE_CACHE_ENTRIES)
263       return;
264 
265    pthread_mutex_lock(&cache->mutex);
266    struct hash_entry *entry =
267       _mesa_hash_table_search(cache->variant_cache, variant->variant_sha1);
268 
269    if (entry) {
270       pthread_mutex_unlock(&cache->mutex);
271       return;
272    }
273 
274    v3dv_shader_variant_ref(variant);
275    _mesa_hash_table_insert(cache->variant_cache, variant->variant_sha1, variant);
276    cache->variant_stats.count++;
277    if (unlikely(dump_stats)) {
278       char sha1buf[41];
279       _mesa_sha1_format(sha1buf, variant->variant_sha1);
280 
281       fprintf(stderr, "pipeline cache %p, new variant entry with key %s\n\t%p\n",
282               cache, sha1buf, variant);
283       cache_dump_stats(cache);
284    }
285 
286    pthread_mutex_unlock(&cache->mutex);
287 }
288 
289 static struct serialized_nir*
serialized_nir_create_from_blob(struct v3dv_pipeline_cache * cache,struct blob_reader * blob)290 serialized_nir_create_from_blob(struct v3dv_pipeline_cache *cache,
291                                 struct blob_reader *blob)
292 {
293    const unsigned char *sha1_key = blob_read_bytes(blob, 20);
294    uint32_t snir_size = blob_read_uint32(blob);
295    const char* snir_data = blob_read_bytes(blob, snir_size);
296    if (blob->overrun)
297       return NULL;
298 
299    struct serialized_nir *snir =
300       ralloc_size(cache->nir_cache, sizeof(*snir) + snir_size);
301    memcpy(snir->sha1_key, sha1_key, 20);
302    snir->size = snir_size;
303    memcpy(snir->data, snir_data, snir_size);
304 
305    return snir;
306 }
307 
308 static struct v3dv_shader_variant*
shader_variant_create_from_blob(struct v3dv_device * device,struct blob_reader * blob)309 shader_variant_create_from_blob(struct v3dv_device *device,
310                                 struct blob_reader *blob)
311 {
312    VkResult result;
313 
314    gl_shader_stage stage = blob_read_uint32(blob);
315    bool is_coord = blob_read_uint8(blob);
316 
317    uint32_t v3d_key_size = blob_read_uint32(blob);
318    const struct v3d_key *v3d_key = blob_read_bytes(blob, v3d_key_size);
319 
320    const unsigned char *variant_sha1 = blob_read_bytes(blob, 20);
321 
322    uint32_t prog_data_size = blob_read_uint32(blob);
323    /* FIXME: as we include the stage perhaps we can avoid prog_data_size? */
324    assert(prog_data_size == v3d_prog_data_size(stage));
325 
326    const void *prog_data = blob_read_bytes(blob, prog_data_size);
327    if (blob->overrun)
328       return NULL;
329 
330    uint32_t ulist_count = blob_read_uint32(blob);
331    uint32_t contents_size = sizeof(enum quniform_contents) * ulist_count;
332    const void *contents_data = blob_read_bytes(blob, contents_size);
333    if (blob->overrun)
334       return NULL;
335 
336    uint ulist_data_size = sizeof(uint32_t) * ulist_count;
337    const void *ulist_data_data = blob_read_bytes(blob, ulist_data_size);
338    if (blob->overrun)
339       return NULL;
340 
341    uint32_t qpu_insts_size = blob_read_uint32(blob);
342    const uint64_t *qpu_insts = blob_read_bytes(blob, qpu_insts_size);
343    if (blob->overrun)
344       return NULL;
345 
346    /* shader_variant_create expects a newly created prog_data for their own,
347     * as it is what the v3d compiler returns. So we are also allocating one
348     * (including the uniform list) and filled it up with the data that we read
349     * from the blob
350     */
351    struct v3d_prog_data *new_prog_data = rzalloc_size(NULL, prog_data_size);
352    memcpy(new_prog_data, prog_data, prog_data_size);
353    struct v3d_uniform_list *ulist = &new_prog_data->uniforms;
354    ulist->count = ulist_count;
355    ulist->contents = ralloc_array(new_prog_data, enum quniform_contents, ulist->count);
356    memcpy(ulist->contents, contents_data, contents_size);
357    ulist->data = ralloc_array(new_prog_data, uint32_t, ulist->count);
358    memcpy(ulist->data, ulist_data_data, ulist_data_size);
359 
360    return v3dv_shader_variant_create(device, stage, is_coord,
361                                      variant_sha1,
362                                      v3d_key, v3d_key_size,
363                                      new_prog_data, prog_data_size,
364                                      qpu_insts, qpu_insts_size,
365                                      &result);
366 }
367 
368 static void
pipeline_cache_load(struct v3dv_pipeline_cache * cache,size_t size,const void * data)369 pipeline_cache_load(struct v3dv_pipeline_cache *cache,
370                     size_t size,
371                     const void *data)
372 {
373    struct v3dv_device *device = cache->device;
374    struct v3dv_physical_device *pdevice = &device->instance->physicalDevice;
375    struct vk_pipeline_cache_header header;
376 
377    if (cache->variant_cache == NULL)
378       return;
379 
380    struct blob_reader blob;
381    blob_reader_init(&blob, data, size);
382 
383    blob_copy_bytes(&blob, &header, sizeof(header));
384    if (size < sizeof(header))
385       return;
386    memcpy(&header, data, sizeof(header));
387    if (header.header_size < sizeof(header))
388       return;
389    if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
390       return;
391    if (header.vendor_id != v3dv_physical_device_vendor_id(pdevice))
392       return;
393    if (header.device_id != v3dv_physical_device_device_id(pdevice))
394       return;
395    if (memcmp(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE) != 0)
396       return;
397 
398    uint32_t nir_count = blob_read_uint32(&blob);
399    if (blob.overrun)
400       return;
401 
402    for (uint32_t i = 0; i < nir_count; i++) {
403       struct serialized_nir *snir =
404          serialized_nir_create_from_blob(cache, &blob);
405 
406       if (!snir)
407          break;
408 
409       _mesa_hash_table_insert(cache->nir_cache, snir->sha1_key, snir);
410       cache->nir_stats.count++;
411    }
412 
413    uint32_t count = blob_read_uint32(&blob);
414    if (blob.overrun)
415       return;
416 
417    for (uint32_t i = 0; i < count; i++) {
418       struct v3dv_shader_variant *variant =
419          shader_variant_create_from_blob(device, &blob);
420       if (!variant)
421          break;
422       _mesa_hash_table_insert(cache->variant_cache, variant->variant_sha1, variant);
423       cache->variant_stats.count++;
424    }
425 
426    if (unlikely(dump_stats)) {
427       fprintf(stderr, "pipeline cache %p, loaded %i nir shaders and "
428               "%i variant entries\n", cache, nir_count, count);
429       cache_dump_stats(cache);
430    }
431 }
432 
433 VkResult
v3dv_CreatePipelineCache(VkDevice _device,const VkPipelineCacheCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipelineCache * pPipelineCache)434 v3dv_CreatePipelineCache(VkDevice _device,
435                          const VkPipelineCacheCreateInfo *pCreateInfo,
436                          const VkAllocationCallbacks *pAllocator,
437                          VkPipelineCache *pPipelineCache)
438 {
439    V3DV_FROM_HANDLE(v3dv_device, device, _device);
440    struct v3dv_pipeline_cache *cache;
441 
442    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
443    assert(pCreateInfo->flags == 0);
444 
445    cache = vk_alloc2(&device->alloc, pAllocator,
446                      sizeof(*cache), 8,
447                      VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
448 
449    if (cache == NULL)
450       return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
451 
452    v3dv_pipeline_cache_init(cache, device,
453                             device->instance->pipeline_cache_enabled);
454 
455    if (pCreateInfo->initialDataSize > 0) {
456       pipeline_cache_load(cache,
457                           pCreateInfo->initialDataSize,
458                           pCreateInfo->pInitialData);
459    }
460 
461    *pPipelineCache = v3dv_pipeline_cache_to_handle(cache);
462 
463    return VK_SUCCESS;
464 }
465 
466 void
v3dv_pipeline_cache_finish(struct v3dv_pipeline_cache * cache)467 v3dv_pipeline_cache_finish(struct v3dv_pipeline_cache *cache)
468 {
469    pthread_mutex_destroy(&cache->mutex);
470 
471    if (cache->nir_cache) {
472       hash_table_foreach(cache->nir_cache, entry)
473          ralloc_free(entry->data);
474 
475       _mesa_hash_table_destroy(cache->nir_cache, NULL);
476    }
477 
478    if (cache->variant_cache) {
479       hash_table_foreach(cache->variant_cache, entry) {
480          struct v3dv_shader_variant *variant = entry->data;
481          if (variant)
482             v3dv_shader_variant_unref(cache->device, variant);
483       }
484 
485       _mesa_hash_table_destroy(cache->variant_cache, NULL);
486 
487    }
488 }
489 
490 void
v3dv_DestroyPipelineCache(VkDevice _device,VkPipelineCache _cache,const VkAllocationCallbacks * pAllocator)491 v3dv_DestroyPipelineCache(VkDevice _device,
492                           VkPipelineCache _cache,
493                           const VkAllocationCallbacks *pAllocator)
494 {
495    V3DV_FROM_HANDLE(v3dv_device, device, _device);
496    V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
497 
498    if (!cache)
499       return;
500 
501    v3dv_pipeline_cache_finish(cache);
502 
503    vk_free2(&device->alloc, pAllocator, cache);
504 }
505 
506 VkResult
v3dv_MergePipelineCaches(VkDevice device,VkPipelineCache dstCache,uint32_t srcCacheCount,const VkPipelineCache * pSrcCaches)507 v3dv_MergePipelineCaches(VkDevice device,
508                          VkPipelineCache dstCache,
509                          uint32_t srcCacheCount,
510                          const VkPipelineCache *pSrcCaches)
511 {
512    V3DV_FROM_HANDLE(v3dv_pipeline_cache, dst, dstCache);
513 
514    if (!dst->variant_cache || !dst->nir_cache)
515       return VK_SUCCESS;
516 
517    for (uint32_t i = 0; i < srcCacheCount; i++) {
518       V3DV_FROM_HANDLE(v3dv_pipeline_cache, src, pSrcCaches[i]);
519       if (!src->variant_cache || !src->nir_cache)
520          continue;
521 
522       hash_table_foreach(src->nir_cache, entry) {
523          struct serialized_nir *src_snir = entry->data;
524          assert(src_snir);
525 
526          if (_mesa_hash_table_search(dst->nir_cache, src_snir->sha1_key))
527             continue;
528 
529          /* FIXME: we are using serialized nir shaders because they are
530           * convenient to create and store on the cache, but requires to do a
531           * copy here (and some other places) of the serialized NIR. Perhaps
532           * it would make sense to move to handle the NIR shaders with shared
533           * structures with ref counts, as the variants.
534           */
535          struct serialized_nir *snir_dst =
536             ralloc_size(dst->nir_cache, sizeof(*snir_dst) + src_snir->size);
537          memcpy(snir_dst->sha1_key, src_snir->sha1_key, 20);
538          snir_dst->size = src_snir->size;
539          memcpy(snir_dst->data, src_snir->data, src_snir->size);
540 
541          _mesa_hash_table_insert(dst->nir_cache, snir_dst->sha1_key, snir_dst);
542          dst->nir_stats.count++;
543          if (unlikely(dump_stats)) {
544             char sha1buf[41];
545             _mesa_sha1_format(sha1buf, snir_dst->sha1_key);
546 
547             fprintf(stderr, "pipeline cache %p, added nir entry %s "
548                     "from pipeline cache %p\n",
549                     dst, sha1buf, src);
550             cache_dump_stats(dst);
551          }
552       }
553 
554       hash_table_foreach(src->variant_cache, entry) {
555          struct v3dv_shader_variant *variant = entry->data;
556          assert(variant);
557 
558          if (_mesa_hash_table_search(dst->variant_cache, variant->variant_sha1))
559             continue;
560 
561          v3dv_shader_variant_ref(variant);
562          _mesa_hash_table_insert(dst->variant_cache, variant->variant_sha1, variant);
563 
564          dst->variant_stats.count++;
565          if (unlikely(dump_stats)) {
566             char sha1buf[41];
567             _mesa_sha1_format(sha1buf, variant->variant_sha1);
568 
569             fprintf(stderr, "pipeline cache %p, added variant entry %s "
570                     "from pipeline cache %p\n",
571                     dst, sha1buf, src);
572             cache_dump_stats(dst);
573          }
574       }
575    }
576 
577    return VK_SUCCESS;
578 }
579 
580 static bool
shader_variant_write_to_blob(const struct v3dv_shader_variant * variant,struct blob * blob)581 shader_variant_write_to_blob(const struct v3dv_shader_variant *variant,
582                              struct blob *blob)
583 {
584    blob_write_uint32(blob, variant->stage);
585    blob_write_uint8(blob, variant->is_coord);
586 
587    blob_write_uint32(blob, variant->v3d_key_size);
588    blob_write_bytes(blob, &variant->key, variant->v3d_key_size);
589 
590    blob_write_bytes(blob, variant->variant_sha1, sizeof(variant->variant_sha1));
591 
592    blob_write_uint32(blob, variant->prog_data_size);
593    blob_write_bytes(blob, variant->prog_data.base, variant->prog_data_size);
594 
595    struct v3d_uniform_list *ulist = &variant->prog_data.base->uniforms;
596    blob_write_uint32(blob, ulist->count);
597    blob_write_bytes(blob, ulist->contents, sizeof(enum quniform_contents) * ulist->count);
598    blob_write_bytes(blob, ulist->data, sizeof(uint32_t) * ulist->count);
599 
600    blob_write_uint32(blob, variant->qpu_insts_size);
601    assert(variant->assembly_bo->map);
602    blob_write_bytes(blob, variant->assembly_bo->map, variant->qpu_insts_size);
603 
604    return !blob->out_of_memory;
605 }
606 
607 VkResult
v3dv_GetPipelineCacheData(VkDevice _device,VkPipelineCache _cache,size_t * pDataSize,void * pData)608 v3dv_GetPipelineCacheData(VkDevice _device,
609                           VkPipelineCache _cache,
610                           size_t *pDataSize,
611                           void *pData)
612 {
613    V3DV_FROM_HANDLE(v3dv_device, device, _device);
614    V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
615 
616    struct blob blob;
617    if (pData) {
618       blob_init_fixed(&blob, pData, *pDataSize);
619    } else {
620       blob_init_fixed(&blob, NULL, SIZE_MAX);
621    }
622 
623    struct v3dv_physical_device *pdevice = &device->instance->physicalDevice;
624    VkResult result = VK_SUCCESS;
625 
626    pthread_mutex_lock(&cache->mutex);
627 
628    struct vk_pipeline_cache_header header = {
629       .header_size = sizeof(struct vk_pipeline_cache_header),
630       .header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE,
631       .vendor_id = v3dv_physical_device_vendor_id(pdevice),
632       .device_id = v3dv_physical_device_device_id(pdevice),
633    };
634    memcpy(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE);
635    blob_write_bytes(&blob, &header, sizeof(header));
636 
637    uint32_t nir_count = 0;
638    intptr_t nir_count_offset = blob_reserve_uint32(&blob);
639    if (nir_count_offset < 0) {
640       *pDataSize = 0;
641       blob_finish(&blob);
642       pthread_mutex_unlock(&cache->mutex);
643       return VK_INCOMPLETE;
644    }
645 
646    if (cache->nir_cache) {
647       hash_table_foreach(cache->nir_cache, entry) {
648          const struct serialized_nir *snir = entry->data;
649 
650          size_t save_size = blob.size;
651 
652          blob_write_bytes(&blob, snir->sha1_key, 20);
653          blob_write_uint32(&blob, snir->size);
654          blob_write_bytes(&blob, snir->data, snir->size);
655 
656          if (blob.out_of_memory) {
657             blob.size = save_size;
658             pthread_mutex_unlock(&cache->mutex);
659             result = VK_INCOMPLETE;
660             break;
661          }
662 
663          nir_count++;
664       }
665    }
666    blob_overwrite_uint32(&blob, nir_count_offset, nir_count);
667 
668    uint32_t count = 0;
669    intptr_t count_offset = blob_reserve_uint32(&blob);
670    if (count_offset < 0) {
671       *pDataSize = 0;
672       blob_finish(&blob);
673       pthread_mutex_unlock(&cache->mutex);
674       return VK_INCOMPLETE;
675    }
676 
677    if (cache->variant_cache) {
678       hash_table_foreach(cache->variant_cache, entry) {
679          struct v3dv_shader_variant *variant = entry->data;
680 
681          size_t save_size = blob.size;
682          if (!shader_variant_write_to_blob(variant, &blob)) {
683             /* If it fails reset to the previous size and bail */
684             blob.size = save_size;
685             pthread_mutex_unlock(&cache->mutex);
686             result = VK_INCOMPLETE;
687             break;
688          }
689 
690          count++;
691       }
692    }
693 
694    blob_overwrite_uint32(&blob, count_offset, count);
695 
696    *pDataSize = blob.size;
697 
698    blob_finish(&blob);
699 
700    if (unlikely(dump_stats)) {
701       assert(count <= cache->variant_stats.count);
702       fprintf(stderr, "GetPipelineCacheData: serializing cache %p, "
703               "%i nir shader entries "
704               "%i variant entries, %u DataSize\n",
705               cache, nir_count, count, (uint32_t) *pDataSize);
706    }
707 
708    pthread_mutex_unlock(&cache->mutex);
709 
710    return result;
711 }
712