• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2019 Raspberry Pi Ltd
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "v3dv_private.h"
25 #include "vulkan/util/vk_util.h"
26 #include "util/blob.h"
27 #include "nir/nir_serialize.h"
28 
29 static const bool debug_cache = false;
30 static const bool dump_stats = false;
31 static const bool dump_stats_on_destroy = false;
32 
33 /* Shared for nir/variants */
34 #define V3DV_MAX_PIPELINE_CACHE_ENTRIES 4096
35 
36 static uint32_t
sha1_hash_func(const void * sha1)37 sha1_hash_func(const void *sha1)
38 {
39    return _mesa_hash_data(sha1, 20);
40 }
41 
42 static bool
sha1_compare_func(const void * sha1_a,const void * sha1_b)43 sha1_compare_func(const void *sha1_a, const void *sha1_b)
44 {
45    return memcmp(sha1_a, sha1_b, 20) == 0;
46 }
47 
48 struct serialized_nir {
49    unsigned char sha1_key[20];
50    size_t size;
51    char data[0];
52 };
53 
54 static void
cache_dump_stats(struct v3dv_pipeline_cache * cache)55 cache_dump_stats(struct v3dv_pipeline_cache *cache)
56 {
57    fprintf(stderr, "  NIR cache entries:      %d\n", cache->nir_stats.count);
58    fprintf(stderr, "  NIR cache miss count:   %d\n", cache->nir_stats.miss);
59    fprintf(stderr, "  NIR cache hit  count:   %d\n", cache->nir_stats.hit);
60 
61    fprintf(stderr, "  cache entries:      %d\n", cache->stats.count);
62    fprintf(stderr, "  cache miss count:   %d\n", cache->stats.miss);
63    fprintf(stderr, "  cache hit  count:   %d\n", cache->stats.hit);
64 
65    fprintf(stderr, "  on-disk cache hit  count:   %d\n", cache->stats.on_disk_hit);
66 }
67 
68 static void
pipeline_cache_lock(struct v3dv_pipeline_cache * cache)69 pipeline_cache_lock(struct v3dv_pipeline_cache *cache)
70 {
71    if (!cache->externally_synchronized)
72       mtx_lock(&cache->mutex);
73 }
74 
75 static void
pipeline_cache_unlock(struct v3dv_pipeline_cache * cache)76 pipeline_cache_unlock(struct v3dv_pipeline_cache *cache)
77 {
78    if (!cache->externally_synchronized)
79       mtx_unlock(&cache->mutex);
80 }
81 
82 void
v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,nir_shader * nir,unsigned char sha1_key[20])83 v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline,
84                                struct v3dv_pipeline_cache *cache,
85                                nir_shader *nir,
86                                unsigned char sha1_key[20])
87 {
88    if (!cache || !cache->nir_cache)
89       return;
90 
91    if (cache->nir_stats.count > V3DV_MAX_PIPELINE_CACHE_ENTRIES)
92       return;
93 
94    pipeline_cache_lock(cache);
95    struct hash_entry *entry =
96       _mesa_hash_table_search(cache->nir_cache, sha1_key);
97    pipeline_cache_unlock(cache);
98    if (entry)
99       return;
100 
101    struct blob blob;
102    blob_init(&blob);
103 
104    nir_serialize(&blob, nir, false);
105    if (blob.out_of_memory) {
106       blob_finish(&blob);
107       return;
108    }
109 
110    pipeline_cache_lock(cache);
111    /* Because ralloc isn't thread-safe, we have to do all this inside the
112     * lock.  We could unlock for the big memcpy but it's probably not worth
113     * the hassle.
114     */
115    entry = _mesa_hash_table_search(cache->nir_cache, sha1_key);
116    if (entry) {
117       blob_finish(&blob);
118       pipeline_cache_unlock(cache);
119       return;
120    }
121 
122    struct serialized_nir *snir =
123       ralloc_size(cache->nir_cache, sizeof(*snir) + blob.size);
124    memcpy(snir->sha1_key, sha1_key, 20);
125    snir->size = blob.size;
126    memcpy(snir->data, blob.data, blob.size);
127 
128    blob_finish(&blob);
129 
130    cache->nir_stats.count++;
131    if (debug_cache) {
132       char sha1buf[41];
133       _mesa_sha1_format(sha1buf, snir->sha1_key);
134       fprintf(stderr, "pipeline cache %p, new nir entry %s\n", cache, sha1buf);
135       if (dump_stats)
136          cache_dump_stats(cache);
137    }
138 
139    _mesa_hash_table_insert(cache->nir_cache, snir->sha1_key, snir);
140 
141    pipeline_cache_unlock(cache);
142 }
143 
144 nir_shader*
v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,const nir_shader_compiler_options * nir_options,unsigned char sha1_key[20])145 v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline,
146                                    struct v3dv_pipeline_cache *cache,
147                                    const nir_shader_compiler_options *nir_options,
148                                    unsigned char sha1_key[20])
149 {
150    if (!cache || !cache->nir_cache)
151       return NULL;
152 
153    if (debug_cache) {
154       char sha1buf[41];
155       _mesa_sha1_format(sha1buf, sha1_key);
156 
157       fprintf(stderr, "pipeline cache %p, search for nir %s\n", cache, sha1buf);
158    }
159 
160    const struct serialized_nir *snir = NULL;
161 
162    pipeline_cache_lock(cache);
163    struct hash_entry *entry =
164       _mesa_hash_table_search(cache->nir_cache, sha1_key);
165    if (entry)
166       snir = entry->data;
167    pipeline_cache_unlock(cache);
168 
169    if (snir) {
170       struct blob_reader blob;
171       blob_reader_init(&blob, snir->data, snir->size);
172 
173       /* We use context NULL as we want the p_stage to keep the reference to
174        * nir, as we keep open the possibility of provide a shader variant
175        * after cache creation
176        */
177       nir_shader *nir = nir_deserialize(NULL, nir_options, &blob);
178       if (blob.overrun) {
179          ralloc_free(nir);
180       } else {
181          cache->nir_stats.hit++;
182          if (debug_cache) {
183             fprintf(stderr, "[v3dv nir cache] hit: %p\n", nir);
184             if (dump_stats)
185                cache_dump_stats(cache);
186          }
187          return nir;
188       }
189    }
190 
191    cache->nir_stats.miss++;
192    if (debug_cache) {
193       fprintf(stderr, "[v3dv nir cache] miss\n");
194       if (dump_stats)
195          cache_dump_stats(cache);
196    }
197 
198    return NULL;
199 }
200 
201 void
v3dv_pipeline_cache_init(struct v3dv_pipeline_cache * cache,struct v3dv_device * device,VkPipelineCacheCreateFlags flags,bool cache_enabled)202 v3dv_pipeline_cache_init(struct v3dv_pipeline_cache *cache,
203                          struct v3dv_device *device,
204                          VkPipelineCacheCreateFlags flags,
205                          bool cache_enabled)
206 {
207    cache->device = device;
208    mtx_init(&cache->mutex, mtx_plain);
209 
210    if (cache_enabled) {
211       cache->nir_cache = _mesa_hash_table_create(NULL, sha1_hash_func,
212                                                  sha1_compare_func);
213       cache->nir_stats.miss = 0;
214       cache->nir_stats.hit = 0;
215       cache->nir_stats.count = 0;
216 
217       cache->cache = _mesa_hash_table_create(NULL, sha1_hash_func,
218                                              sha1_compare_func);
219       cache->stats.miss = 0;
220       cache->stats.hit = 0;
221       cache->stats.count = 0;
222 
223       cache->externally_synchronized = flags &
224          VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT;
225    } else {
226       cache->nir_cache = NULL;
227       cache->cache = NULL;
228    }
229 
230 }
231 
232 static struct v3dv_pipeline_shared_data *
233 v3dv_pipeline_shared_data_create_from_blob(struct v3dv_pipeline_cache *cache,
234                                            struct blob_reader *blob);
235 
236 static void
237 pipeline_cache_upload_shared_data(struct v3dv_pipeline_cache *cache,
238                                   struct v3dv_pipeline_shared_data *shared_data,
239                                   bool from_disk_cache);
240 
241 static bool
242 v3dv_pipeline_shared_data_write_to_blob(const struct v3dv_pipeline_shared_data *cache_entry,
243                                         struct blob *blob);
244 
245 /**
246  * It searchs for pipeline cached data, and returns a v3dv_pipeline_shared_data with
247  * it, or NULL if doesn't have it cached. On the former, it will increases the
248  * ref_count, so caller is responsible to unref it.
249  */
250 struct v3dv_pipeline_shared_data *
v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache * cache,unsigned char sha1_key[20],bool * cache_hit)251 v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache *cache,
252                                         unsigned char sha1_key[20],
253                                         bool *cache_hit)
254 {
255    if (!cache || !cache->cache)
256       return NULL;
257 
258    if (debug_cache) {
259       char sha1buf[41];
260       _mesa_sha1_format(sha1buf, sha1_key);
261 
262       fprintf(stderr, "pipeline cache %p, search pipeline with key %s\n", cache, sha1buf);
263    }
264 
265    pipeline_cache_lock(cache);
266 
267    struct hash_entry *entry =
268       _mesa_hash_table_search(cache->cache, sha1_key);
269 
270    if (entry) {
271       struct v3dv_pipeline_shared_data *cache_entry =
272          (struct v3dv_pipeline_shared_data *) entry->data;
273       assert(cache_entry);
274 
275       cache->stats.hit++;
276       *cache_hit = true;
277       if (debug_cache) {
278          fprintf(stderr, "[v3dv cache] hit: %p\n", cache_entry);
279          if (dump_stats)
280             cache_dump_stats(cache);
281       }
282 
283 
284       v3dv_pipeline_shared_data_ref(cache_entry);
285 
286       pipeline_cache_unlock(cache);
287 
288       return cache_entry;
289    }
290 
291    cache->stats.miss++;
292    if (debug_cache) {
293       fprintf(stderr, "[v3dv cache] miss\n");
294       if (dump_stats)
295          cache_dump_stats(cache);
296    }
297 
298    pipeline_cache_unlock(cache);
299 
300 #ifdef ENABLE_SHADER_CACHE
301    struct v3dv_device *device = cache->device;
302    struct disk_cache *disk_cache = device->pdevice->disk_cache;
303    /* Note that the on-disk-cache can be independently disabled, while keeping
304     * the pipeline cache working, by using the environment variable
305     * MESA_SHADER_CACHE_DISABLE. In that case the calls to disk_cache_put/get
306     * will not do anything.
307     */
308    if (disk_cache && device->instance->pipeline_cache_enabled) {
309       cache_key cache_key;
310       disk_cache_compute_key(disk_cache, sha1_key, 20, cache_key);
311 
312       size_t buffer_size;
313       uint8_t *buffer = disk_cache_get(disk_cache, cache_key, &buffer_size);
314       if (unlikely(V3D_DEBUG & V3D_DEBUG_CACHE)) {
315          char sha1buf[41];
316          _mesa_sha1_format(sha1buf, cache_key);
317          fprintf(stderr, "[v3dv on-disk cache] %s %s\n",
318                  buffer ? "hit" : "miss",
319                  sha1buf);
320       }
321 
322       if (buffer) {
323          struct blob_reader blob;
324          struct v3dv_pipeline_shared_data *shared_data;
325 
326          blob_reader_init(&blob, buffer, buffer_size);
327          shared_data = v3dv_pipeline_shared_data_create_from_blob(cache, &blob);
328          free(buffer);
329 
330          if (shared_data) {
331             /* Technically we could increase on_disk_hit as soon as we have a
332              * buffer, but we are more interested on hits that got a valid
333              * shared_data
334              */
335             cache->stats.on_disk_hit++;
336             if (cache)
337                pipeline_cache_upload_shared_data(cache, shared_data, true);
338             return shared_data;
339          }
340       }
341    }
342 #endif
343 
344    return NULL;
345 }
346 
347 void
v3dv_pipeline_shared_data_destroy(struct v3dv_device * device,struct v3dv_pipeline_shared_data * shared_data)348 v3dv_pipeline_shared_data_destroy(struct v3dv_device *device,
349                                   struct v3dv_pipeline_shared_data *shared_data)
350 {
351    assert(shared_data->ref_cnt == 0);
352 
353    for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
354       if (shared_data->variants[stage] != NULL)
355          v3dv_shader_variant_destroy(device, shared_data->variants[stage]);
356 
357       /* We don't free binning descriptor maps as we are sharing them
358        * with the render shaders.
359        */
360       if (shared_data->maps[stage] != NULL &&
361           !broadcom_shader_stage_is_binning(stage)) {
362          vk_free(&device->vk.alloc, shared_data->maps[stage]);
363       }
364    }
365 
366    if (shared_data->assembly_bo)
367       v3dv_bo_free(device, shared_data->assembly_bo);
368 
369    vk_free(&device->vk.alloc, shared_data);
370 }
371 
372 static struct v3dv_pipeline_shared_data *
v3dv_pipeline_shared_data_new(struct v3dv_pipeline_cache * cache,const unsigned char sha1_key[20],struct v3dv_descriptor_maps ** maps,struct v3dv_shader_variant ** variants,const uint64_t * total_assembly,const uint32_t total_assembly_size)373 v3dv_pipeline_shared_data_new(struct v3dv_pipeline_cache *cache,
374                               const unsigned char sha1_key[20],
375                               struct v3dv_descriptor_maps **maps,
376                               struct v3dv_shader_variant **variants,
377                               const uint64_t *total_assembly,
378                               const uint32_t total_assembly_size)
379 {
380    size_t size = sizeof(struct v3dv_pipeline_shared_data);
381    /* We create new_entry using the device alloc. Right now shared_data is ref
382     * and unref by both the pipeline and the pipeline cache, so we can't
383     * ensure that the cache or pipeline alloc will be available on the last
384     * unref.
385     */
386    struct v3dv_pipeline_shared_data *new_entry =
387       vk_zalloc2(&cache->device->vk.alloc, NULL, size, 8,
388                  VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
389 
390    if (new_entry == NULL)
391       return NULL;
392 
393    new_entry->ref_cnt = 1;
394    memcpy(new_entry->sha1_key, sha1_key, 20);
395 
396    for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
397       new_entry->maps[stage] = maps[stage];
398       new_entry->variants[stage] = variants[stage];
399    }
400 
401    struct v3dv_bo *bo = v3dv_bo_alloc(cache->device, total_assembly_size,
402                                       "pipeline shader assembly", true);
403    if (!bo) {
404       fprintf(stderr, "failed to allocate memory for shaders assembly\n");
405       goto fail;
406    }
407 
408    bool ok = v3dv_bo_map(cache->device, bo, total_assembly_size);
409    if (!ok) {
410       fprintf(stderr, "failed to map source shader buffer\n");
411       goto fail;
412    }
413 
414    memcpy(bo->map, total_assembly, total_assembly_size);
415 
416    new_entry->assembly_bo = bo;
417 
418    return new_entry;
419 
420 fail:
421    v3dv_pipeline_shared_data_unref(cache->device, new_entry);
422    return NULL;
423 }
424 
425 static void
pipeline_cache_upload_shared_data(struct v3dv_pipeline_cache * cache,struct v3dv_pipeline_shared_data * shared_data,bool from_disk_cache)426 pipeline_cache_upload_shared_data(struct v3dv_pipeline_cache *cache,
427                                   struct v3dv_pipeline_shared_data *shared_data,
428                                   bool from_disk_cache)
429 {
430    assert(shared_data);
431 
432    if (!cache || !cache->cache)
433       return;
434 
435    if (cache->stats.count > V3DV_MAX_PIPELINE_CACHE_ENTRIES)
436       return;
437 
438    pipeline_cache_lock(cache);
439    struct hash_entry *entry = NULL;
440 
441    /* If this is being called from the disk cache, we already know that the
442     * entry is not on the hash table
443     */
444    if (!from_disk_cache)
445       entry = _mesa_hash_table_search(cache->cache, shared_data->sha1_key);
446 
447    if (entry) {
448       pipeline_cache_unlock(cache);
449       return;
450    }
451 
452    v3dv_pipeline_shared_data_ref(shared_data);
453    _mesa_hash_table_insert(cache->cache, shared_data->sha1_key, shared_data);
454    cache->stats.count++;
455    if (debug_cache) {
456       char sha1buf[41];
457       _mesa_sha1_format(sha1buf, shared_data->sha1_key);
458 
459       fprintf(stderr, "pipeline cache %p, new cache entry with sha1 key %s:%p\n\n",
460               cache, sha1buf, shared_data);
461       if (dump_stats)
462          cache_dump_stats(cache);
463    }
464 
465    pipeline_cache_unlock(cache);
466 
467 #ifdef ENABLE_SHADER_CACHE
468    /* If we are being called from a on-disk-cache hit, we can skip writing to
469     * the disk cache
470     */
471    if (from_disk_cache)
472       return;
473 
474    struct v3dv_device *device = cache->device;
475    struct disk_cache *disk_cache = device->pdevice->disk_cache;
476    if (disk_cache) {
477       struct blob binary;
478       blob_init(&binary);
479       if (v3dv_pipeline_shared_data_write_to_blob(shared_data, &binary)) {
480          cache_key cache_key;
481          disk_cache_compute_key(disk_cache, shared_data->sha1_key, 20, cache_key);
482 
483          if (unlikely(V3D_DEBUG & V3D_DEBUG_CACHE)) {
484             char sha1buf[41];
485             _mesa_sha1_format(sha1buf, shared_data->sha1_key);
486             fprintf(stderr, "[v3dv on-disk cache] storing %s\n", sha1buf);
487          }
488          disk_cache_put(disk_cache, cache_key, binary.data, binary.size, NULL);
489       }
490 
491       blob_finish(&binary);
492    }
493 #endif
494 }
495 
496 /* Uploads all the "cacheable" or shared data from the pipeline */
497 void
v3dv_pipeline_cache_upload_pipeline(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache)498 v3dv_pipeline_cache_upload_pipeline(struct v3dv_pipeline *pipeline,
499                                     struct v3dv_pipeline_cache *cache)
500 {
501    pipeline_cache_upload_shared_data(cache, pipeline->shared_data, false);
502 }
503 
504 static struct serialized_nir*
serialized_nir_create_from_blob(struct v3dv_pipeline_cache * cache,struct blob_reader * blob)505 serialized_nir_create_from_blob(struct v3dv_pipeline_cache *cache,
506                                 struct blob_reader *blob)
507 {
508    const unsigned char *sha1_key = blob_read_bytes(blob, 20);
509    uint32_t snir_size = blob_read_uint32(blob);
510    const char* snir_data = blob_read_bytes(blob, snir_size);
511    if (blob->overrun)
512       return NULL;
513 
514    struct serialized_nir *snir =
515       ralloc_size(cache->nir_cache, sizeof(*snir) + snir_size);
516    memcpy(snir->sha1_key, sha1_key, 20);
517    snir->size = snir_size;
518    memcpy(snir->data, snir_data, snir_size);
519 
520    return snir;
521 }
522 
523 static struct v3dv_shader_variant*
shader_variant_create_from_blob(struct v3dv_device * device,struct blob_reader * blob)524 shader_variant_create_from_blob(struct v3dv_device *device,
525                                 struct blob_reader *blob)
526 {
527    VkResult result;
528 
529    enum broadcom_shader_stage stage = blob_read_uint32(blob);
530 
531    uint32_t prog_data_size = blob_read_uint32(blob);
532    /* FIXME: as we include the stage perhaps we can avoid prog_data_size? */
533    assert(prog_data_size == v3d_prog_data_size(broadcom_shader_stage_to_gl(stage)));
534 
535    const void *prog_data = blob_read_bytes(blob, prog_data_size);
536    if (blob->overrun)
537       return NULL;
538 
539    uint32_t ulist_count = blob_read_uint32(blob);
540    uint32_t contents_size = sizeof(enum quniform_contents) * ulist_count;
541    const void *contents_data = blob_read_bytes(blob, contents_size);
542    if (blob->overrun)
543       return NULL;
544 
545    uint ulist_data_size = sizeof(uint32_t) * ulist_count;
546    const void *ulist_data_data = blob_read_bytes(blob, ulist_data_size);
547    if (blob->overrun)
548       return NULL;
549 
550    uint32_t assembly_offset = blob_read_uint32(blob);
551    uint32_t qpu_insts_size = blob_read_uint32(blob);
552 
553    /* shader_variant_create expects a newly created prog_data for their own,
554     * as it is what the v3d compiler returns. So we are also allocating one
555     * (including the uniform list) and filled it up with the data that we read
556     * from the blob
557     */
558    struct v3d_prog_data *new_prog_data = rzalloc_size(NULL, prog_data_size);
559    memcpy(new_prog_data, prog_data, prog_data_size);
560    struct v3d_uniform_list *ulist = &new_prog_data->uniforms;
561    ulist->count = ulist_count;
562    ulist->contents = ralloc_array(new_prog_data, enum quniform_contents, ulist->count);
563    memcpy(ulist->contents, contents_data, contents_size);
564    ulist->data = ralloc_array(new_prog_data, uint32_t, ulist->count);
565    memcpy(ulist->data, ulist_data_data, ulist_data_size);
566 
567    return v3dv_shader_variant_create(device, stage,
568                                      new_prog_data, prog_data_size,
569                                      assembly_offset,
570                                      NULL, qpu_insts_size,
571                                      &result);
572 }
573 
574 static struct v3dv_pipeline_shared_data *
v3dv_pipeline_shared_data_create_from_blob(struct v3dv_pipeline_cache * cache,struct blob_reader * blob)575 v3dv_pipeline_shared_data_create_from_blob(struct v3dv_pipeline_cache *cache,
576                                            struct blob_reader *blob)
577 {
578    const unsigned char *sha1_key = blob_read_bytes(blob, 20);
579 
580    struct v3dv_descriptor_maps *maps[BROADCOM_SHADER_STAGES] = { 0 };
581    struct v3dv_shader_variant *variants[BROADCOM_SHADER_STAGES] = { 0 };
582 
583    uint8_t descriptor_maps_count = blob_read_uint8(blob);
584    for (uint8_t count = 0; count < descriptor_maps_count; count++) {
585       uint8_t stage = blob_read_uint8(blob);
586 
587       const struct v3dv_descriptor_maps *current_maps =
588          blob_read_bytes(blob, sizeof(struct v3dv_descriptor_maps));
589 
590       if (blob->overrun)
591          goto fail;
592 
593       maps[stage] = vk_zalloc2(&cache->device->vk.alloc, NULL,
594                                sizeof(struct v3dv_descriptor_maps), 8,
595                                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
596 
597       if (maps[stage] == NULL)
598          goto fail;
599 
600       memcpy(maps[stage], current_maps, sizeof(struct v3dv_descriptor_maps));
601       if (broadcom_shader_stage_is_render_with_binning(stage)) {
602          enum broadcom_shader_stage bin_stage =
603             broadcom_binning_shader_stage_for_render_stage(stage);
604             maps[bin_stage] = maps[stage];
605       }
606    }
607 
608    uint8_t variant_count = blob_read_uint8(blob);
609 
610    for (uint8_t count = 0; count < variant_count; count++) {
611       uint8_t stage = blob_read_uint8(blob);
612       struct v3dv_shader_variant *variant =
613          shader_variant_create_from_blob(cache->device, blob);
614       variants[stage] = variant;
615    }
616 
617    uint32_t total_assembly_size = blob_read_uint32(blob);
618    const uint64_t *total_assembly =
619       blob_read_bytes(blob, total_assembly_size);
620 
621    if (blob->overrun)
622       goto fail;
623 
624    struct v3dv_pipeline_shared_data *data =
625       v3dv_pipeline_shared_data_new(cache, sha1_key, maps, variants,
626                                     total_assembly, total_assembly_size);
627 
628    if (!data)
629       goto fail;
630 
631    return data;
632 
633 fail:
634    for (int i = 0; i < BROADCOM_SHADER_STAGES; i++) {
635       if (maps[i])
636          vk_free2(&cache->device->vk.alloc, NULL, maps[i]);
637       if (variants[i])
638          v3dv_shader_variant_destroy(cache->device, variants[i]);
639    }
640    return NULL;
641 }
642 
643 static void
pipeline_cache_load(struct v3dv_pipeline_cache * cache,size_t size,const void * data)644 pipeline_cache_load(struct v3dv_pipeline_cache *cache,
645                     size_t size,
646                     const void *data)
647 {
648    struct v3dv_device *device = cache->device;
649    struct v3dv_physical_device *pdevice = &device->instance->physicalDevice;
650    struct vk_pipeline_cache_header header;
651 
652    if (cache->cache == NULL || cache->nir_cache == NULL)
653       return;
654 
655    struct blob_reader blob;
656    blob_reader_init(&blob, data, size);
657 
658    blob_copy_bytes(&blob, &header, sizeof(header));
659    if (size < sizeof(header))
660       return;
661    memcpy(&header, data, sizeof(header));
662    if (header.header_size < sizeof(header))
663       return;
664    if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
665       return;
666    if (header.vendor_id != v3dv_physical_device_vendor_id(pdevice))
667       return;
668    if (header.device_id != v3dv_physical_device_device_id(pdevice))
669       return;
670    if (memcmp(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE) != 0)
671       return;
672 
673    uint32_t nir_count = blob_read_uint32(&blob);
674    if (blob.overrun)
675       return;
676 
677    for (uint32_t i = 0; i < nir_count; i++) {
678       struct serialized_nir *snir =
679          serialized_nir_create_from_blob(cache, &blob);
680 
681       if (!snir)
682          break;
683 
684       _mesa_hash_table_insert(cache->nir_cache, snir->sha1_key, snir);
685       cache->nir_stats.count++;
686    }
687 
688    uint32_t count = blob_read_uint32(&blob);
689    if (blob.overrun)
690       return;
691 
692    for (uint32_t i = 0; i < count; i++) {
693       struct v3dv_pipeline_shared_data *cache_entry =
694          v3dv_pipeline_shared_data_create_from_blob(cache, &blob);
695       if (!cache_entry)
696          break;
697 
698       _mesa_hash_table_insert(cache->cache, cache_entry->sha1_key, cache_entry);
699       cache->stats.count++;
700    }
701 
702    if (debug_cache) {
703       fprintf(stderr, "pipeline cache %p, loaded %i nir shaders and "
704               "%i entries\n", cache, nir_count, count);
705       if (dump_stats)
706          cache_dump_stats(cache);
707    }
708 }
709 
710 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreatePipelineCache(VkDevice _device,const VkPipelineCacheCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipelineCache * pPipelineCache)711 v3dv_CreatePipelineCache(VkDevice _device,
712                          const VkPipelineCacheCreateInfo *pCreateInfo,
713                          const VkAllocationCallbacks *pAllocator,
714                          VkPipelineCache *pPipelineCache)
715 {
716    V3DV_FROM_HANDLE(v3dv_device, device, _device);
717    struct v3dv_pipeline_cache *cache;
718 
719    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
720 
721    cache = vk_object_zalloc(&device->vk, pAllocator,
722                             sizeof(*cache),
723                             VK_OBJECT_TYPE_PIPELINE_CACHE);
724 
725    if (cache == NULL)
726       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
727 
728    v3dv_pipeline_cache_init(cache, device, pCreateInfo->flags,
729                             device->instance->pipeline_cache_enabled);
730 
731    if (pCreateInfo->initialDataSize > 0) {
732       pipeline_cache_load(cache,
733                           pCreateInfo->initialDataSize,
734                           pCreateInfo->pInitialData);
735    }
736 
737    *pPipelineCache = v3dv_pipeline_cache_to_handle(cache);
738 
739    return VK_SUCCESS;
740 }
741 
742 void
v3dv_pipeline_cache_finish(struct v3dv_pipeline_cache * cache)743 v3dv_pipeline_cache_finish(struct v3dv_pipeline_cache *cache)
744 {
745    mtx_destroy(&cache->mutex);
746 
747    if (dump_stats_on_destroy)
748       cache_dump_stats(cache);
749 
750    if (cache->nir_cache) {
751       hash_table_foreach(cache->nir_cache, entry)
752          ralloc_free(entry->data);
753 
754       _mesa_hash_table_destroy(cache->nir_cache, NULL);
755    }
756 
757    if (cache->cache) {
758       hash_table_foreach(cache->cache, entry) {
759          struct v3dv_pipeline_shared_data *cache_entry = entry->data;
760          if (cache_entry)
761             v3dv_pipeline_shared_data_unref(cache->device, cache_entry);
762       }
763 
764       _mesa_hash_table_destroy(cache->cache, NULL);
765    }
766 }
767 
768 VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyPipelineCache(VkDevice _device,VkPipelineCache _cache,const VkAllocationCallbacks * pAllocator)769 v3dv_DestroyPipelineCache(VkDevice _device,
770                           VkPipelineCache _cache,
771                           const VkAllocationCallbacks *pAllocator)
772 {
773    V3DV_FROM_HANDLE(v3dv_device, device, _device);
774    V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
775 
776    if (!cache)
777       return;
778 
779    v3dv_pipeline_cache_finish(cache);
780 
781    vk_object_free(&device->vk, pAllocator, cache);
782 }
783 
784 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_MergePipelineCaches(VkDevice device,VkPipelineCache dstCache,uint32_t srcCacheCount,const VkPipelineCache * pSrcCaches)785 v3dv_MergePipelineCaches(VkDevice device,
786                          VkPipelineCache dstCache,
787                          uint32_t srcCacheCount,
788                          const VkPipelineCache *pSrcCaches)
789 {
790    V3DV_FROM_HANDLE(v3dv_pipeline_cache, dst, dstCache);
791 
792    if (!dst->cache || !dst->nir_cache)
793       return VK_SUCCESS;
794 
795    for (uint32_t i = 0; i < srcCacheCount; i++) {
796       V3DV_FROM_HANDLE(v3dv_pipeline_cache, src, pSrcCaches[i]);
797       if (!src->cache || !src->nir_cache)
798          continue;
799 
800       hash_table_foreach(src->nir_cache, entry) {
801          struct serialized_nir *src_snir = entry->data;
802          assert(src_snir);
803 
804          if (_mesa_hash_table_search(dst->nir_cache, src_snir->sha1_key))
805             continue;
806 
807          /* FIXME: we are using serialized nir shaders because they are
808           * convenient to create and store on the cache, but requires to do a
809           * copy here (and some other places) of the serialized NIR. Perhaps
810           * it would make sense to move to handle the NIR shaders with shared
811           * structures with ref counts, as the variants.
812           */
813          struct serialized_nir *snir_dst =
814             ralloc_size(dst->nir_cache, sizeof(*snir_dst) + src_snir->size);
815          memcpy(snir_dst->sha1_key, src_snir->sha1_key, 20);
816          snir_dst->size = src_snir->size;
817          memcpy(snir_dst->data, src_snir->data, src_snir->size);
818 
819          _mesa_hash_table_insert(dst->nir_cache, snir_dst->sha1_key, snir_dst);
820          dst->nir_stats.count++;
821          if (debug_cache) {
822             char sha1buf[41];
823             _mesa_sha1_format(sha1buf, snir_dst->sha1_key);
824 
825             fprintf(stderr, "pipeline cache %p, added nir entry %s "
826                     "from pipeline cache %p\n",
827                     dst, sha1buf, src);
828             if (dump_stats)
829                cache_dump_stats(dst);
830          }
831       }
832 
833       hash_table_foreach(src->cache, entry) {
834          struct v3dv_pipeline_shared_data *cache_entry = entry->data;
835          assert(cache_entry);
836 
837          if (_mesa_hash_table_search(dst->cache, cache_entry->sha1_key))
838             continue;
839 
840          v3dv_pipeline_shared_data_ref(cache_entry);
841          _mesa_hash_table_insert(dst->cache, cache_entry->sha1_key, cache_entry);
842 
843          dst->stats.count++;
844          if (debug_cache) {
845             char sha1buf[41];
846             _mesa_sha1_format(sha1buf, cache_entry->sha1_key);
847 
848             fprintf(stderr, "pipeline cache %p, added entry %s "
849                     "from pipeline cache %p\n",
850                     dst, sha1buf, src);
851             if (dump_stats)
852                cache_dump_stats(dst);
853          }
854       }
855    }
856 
857    return VK_SUCCESS;
858 }
859 
860 static bool
shader_variant_write_to_blob(const struct v3dv_shader_variant * variant,struct blob * blob)861 shader_variant_write_to_blob(const struct v3dv_shader_variant *variant,
862                              struct blob *blob)
863 {
864    blob_write_uint32(blob, variant->stage);
865 
866    blob_write_uint32(blob, variant->prog_data_size);
867    blob_write_bytes(blob, variant->prog_data.base, variant->prog_data_size);
868 
869    struct v3d_uniform_list *ulist = &variant->prog_data.base->uniforms;
870    blob_write_uint32(blob, ulist->count);
871    blob_write_bytes(blob, ulist->contents, sizeof(enum quniform_contents) * ulist->count);
872    blob_write_bytes(blob, ulist->data, sizeof(uint32_t) * ulist->count);
873 
874    blob_write_uint32(blob, variant->assembly_offset);
875    blob_write_uint32(blob, variant->qpu_insts_size);
876 
877    return !blob->out_of_memory;
878 }
879 
880 static bool
v3dv_pipeline_shared_data_write_to_blob(const struct v3dv_pipeline_shared_data * cache_entry,struct blob * blob)881 v3dv_pipeline_shared_data_write_to_blob(const struct v3dv_pipeline_shared_data *cache_entry,
882                                         struct blob *blob)
883 {
884    blob_write_bytes(blob, cache_entry->sha1_key, 20);
885 
886    uint8_t descriptor_maps_count = 0;
887    for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
888       if (broadcom_shader_stage_is_binning(stage))
889          continue;
890       if (cache_entry->maps[stage] == NULL)
891          continue;
892       descriptor_maps_count++;
893    }
894 
895    /* Compute pipelines only have one descriptor map,
896     * graphics pipelines may have 2 (VS+FS) or 3 (VS+GS+FS), since the binning
897     * stages take the descriptor map from the render stage.
898     */
899    assert((descriptor_maps_count >= 2 && descriptor_maps_count <= 3) ||
900           (descriptor_maps_count == 1 && cache_entry->variants[BROADCOM_SHADER_COMPUTE]));
901    blob_write_uint8(blob, descriptor_maps_count);
902 
903    for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
904       if (cache_entry->maps[stage] == NULL)
905          continue;
906       if (broadcom_shader_stage_is_binning(stage))
907          continue;
908 
909       blob_write_uint8(blob, stage);
910       blob_write_bytes(blob, cache_entry->maps[stage],
911                        sizeof(struct v3dv_descriptor_maps));
912    }
913 
914    uint8_t variant_count = 0;
915    for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
916       if (cache_entry->variants[stage] == NULL)
917          continue;
918       variant_count++;
919    }
920 
921    /* Graphics pipelines with VS+FS have 3 variants, VS+GS+FS will have 5 and
922     * compute pipelines only have 1.
923     */
924    assert((variant_count == 5  || variant_count == 3) ||
925           (variant_count == 1 && cache_entry->variants[BROADCOM_SHADER_COMPUTE]));
926    blob_write_uint8(blob, variant_count);
927 
928    uint32_t total_assembly_size = 0;
929    for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
930       if (cache_entry->variants[stage] == NULL)
931          continue;
932 
933       blob_write_uint8(blob, stage);
934       if (!shader_variant_write_to_blob(cache_entry->variants[stage], blob))
935          return false;
936 
937       total_assembly_size += cache_entry->variants[stage]->qpu_insts_size;
938    }
939    blob_write_uint32(blob, total_assembly_size);
940 
941    assert(cache_entry->assembly_bo->map);
942    assert(cache_entry->assembly_bo->size >= total_assembly_size);
943    blob_write_bytes(blob, cache_entry->assembly_bo->map, total_assembly_size);
944 
945    return !blob->out_of_memory;
946 }
947 
948 
949 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_GetPipelineCacheData(VkDevice _device,VkPipelineCache _cache,size_t * pDataSize,void * pData)950 v3dv_GetPipelineCacheData(VkDevice _device,
951                           VkPipelineCache _cache,
952                           size_t *pDataSize,
953                           void *pData)
954 {
955    V3DV_FROM_HANDLE(v3dv_device, device, _device);
956    V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
957 
958    struct blob blob;
959    if (pData) {
960       blob_init_fixed(&blob, pData, *pDataSize);
961    } else {
962       blob_init_fixed(&blob, NULL, SIZE_MAX);
963    }
964 
965    struct v3dv_physical_device *pdevice = &device->instance->physicalDevice;
966    VkResult result = VK_INCOMPLETE;
967 
968    pipeline_cache_lock(cache);
969 
970    struct vk_pipeline_cache_header header = {
971       .header_size = sizeof(struct vk_pipeline_cache_header),
972       .header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE,
973       .vendor_id = v3dv_physical_device_vendor_id(pdevice),
974       .device_id = v3dv_physical_device_device_id(pdevice),
975    };
976    memcpy(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE);
977    blob_write_bytes(&blob, &header, sizeof(header));
978 
979    uint32_t nir_count = 0;
980    intptr_t nir_count_offset = blob_reserve_uint32(&blob);
981    if (nir_count_offset < 0) {
982       *pDataSize = 0;
983       goto done;
984    }
985 
986    if (cache->nir_cache) {
987       hash_table_foreach(cache->nir_cache, entry) {
988          const struct serialized_nir *snir = entry->data;
989 
990          size_t save_size = blob.size;
991 
992          blob_write_bytes(&blob, snir->sha1_key, 20);
993          blob_write_uint32(&blob, snir->size);
994          blob_write_bytes(&blob, snir->data, snir->size);
995 
996          if (blob.out_of_memory) {
997             blob.size = save_size;
998             goto done;
999          }
1000 
1001          nir_count++;
1002       }
1003    }
1004    blob_overwrite_uint32(&blob, nir_count_offset, nir_count);
1005 
1006    uint32_t count = 0;
1007    intptr_t count_offset = blob_reserve_uint32(&blob);
1008    if (count_offset < 0) {
1009       *pDataSize = 0;
1010       goto done;
1011    }
1012 
1013    if (cache->cache) {
1014       hash_table_foreach(cache->cache, entry) {
1015          struct v3dv_pipeline_shared_data *cache_entry = entry->data;
1016 
1017          size_t save_size = blob.size;
1018          if (!v3dv_pipeline_shared_data_write_to_blob(cache_entry, &blob)) {
1019             /* If it fails reset to the previous size and bail */
1020             blob.size = save_size;
1021             goto done;
1022          }
1023 
1024          count++;
1025       }
1026    }
1027 
1028    blob_overwrite_uint32(&blob, count_offset, count);
1029 
1030    *pDataSize = blob.size;
1031 
1032    result = VK_SUCCESS;
1033 
1034    if (debug_cache) {
1035       assert(count <= cache->stats.count);
1036       fprintf(stderr, "GetPipelineCacheData: serializing cache %p, "
1037               "%i nir shader entries "
1038               "%i entries, %u DataSize\n",
1039               cache, nir_count, count, (uint32_t) *pDataSize);
1040    }
1041 
1042  done:
1043    blob_finish(&blob);
1044 
1045    pipeline_cache_unlock(cache);
1046 
1047    return result;
1048 }
1049