• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2019 Raspberry Pi Ltd
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "v3dv_private.h"
25 #include "vk_util.h"
26 #include "util/blob.h"
27 #include "nir/nir_serialize.h"
28 
29 static const bool debug_cache = false;
30 static const bool dump_stats = false;
31 static const bool dump_stats_on_destroy = false;
32 
33 /* Shared for nir/variants */
34 #define V3DV_MAX_PIPELINE_CACHE_ENTRIES 4096
35 
36 static uint32_t
sha1_hash_func(const void * sha1)37 sha1_hash_func(const void *sha1)
38 {
39    return _mesa_hash_data(sha1, 20);
40 }
41 
42 static bool
sha1_compare_func(const void * sha1_a,const void * sha1_b)43 sha1_compare_func(const void *sha1_a, const void *sha1_b)
44 {
45    return memcmp(sha1_a, sha1_b, 20) == 0;
46 }
47 
48 struct serialized_nir {
49    unsigned char sha1_key[20];
50    size_t size;
51    char data[0];
52 };
53 
54 static void
cache_dump_stats(struct v3dv_pipeline_cache * cache)55 cache_dump_stats(struct v3dv_pipeline_cache *cache)
56 {
57    mesa_logi("  NIR cache entries:      %d\n", cache->nir_stats.count);
58    mesa_logi("  NIR cache miss count:   %d\n", cache->nir_stats.miss);
59    mesa_logi("  NIR cache hit  count:   %d\n", cache->nir_stats.hit);
60 
61    mesa_logi("  cache entries:      %d\n", cache->stats.count);
62    mesa_logi("  cache miss count:   %d\n", cache->stats.miss);
63    mesa_logi("  cache hit  count:   %d\n", cache->stats.hit);
64 
65    mesa_logi("  on-disk cache hit  count:   %d\n", cache->stats.on_disk_hit);
66 }
67 
68 static void
pipeline_cache_lock(struct v3dv_pipeline_cache * cache)69 pipeline_cache_lock(struct v3dv_pipeline_cache *cache)
70 {
71    if (!cache->externally_synchronized)
72       mtx_lock(&cache->mutex);
73 }
74 
75 static void
pipeline_cache_unlock(struct v3dv_pipeline_cache * cache)76 pipeline_cache_unlock(struct v3dv_pipeline_cache *cache)
77 {
78    if (!cache->externally_synchronized)
79       mtx_unlock(&cache->mutex);
80 }
81 
82 void
v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,nir_shader * nir,unsigned char sha1_key[20])83 v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline,
84                                struct v3dv_pipeline_cache *cache,
85                                nir_shader *nir,
86                                unsigned char sha1_key[20])
87 {
88    if (!cache || !cache->nir_cache)
89       return;
90 
91    if (cache->nir_stats.count > V3DV_MAX_PIPELINE_CACHE_ENTRIES)
92       return;
93 
94    pipeline_cache_lock(cache);
95    struct hash_entry *entry =
96       _mesa_hash_table_search(cache->nir_cache, sha1_key);
97    pipeline_cache_unlock(cache);
98    if (entry)
99       return;
100 
101    struct blob blob;
102    blob_init(&blob);
103 
104    nir_serialize(&blob, nir, false);
105    if (blob.out_of_memory) {
106       blob_finish(&blob);
107       return;
108    }
109 
110    pipeline_cache_lock(cache);
111    /* Because ralloc isn't thread-safe, we have to do all this inside the
112     * lock.  We could unlock for the big memcpy but it's probably not worth
113     * the hassle.
114     */
115    entry = _mesa_hash_table_search(cache->nir_cache, sha1_key);
116    if (entry) {
117       blob_finish(&blob);
118       pipeline_cache_unlock(cache);
119       return;
120    }
121 
122    struct serialized_nir *snir =
123       ralloc_size(cache->nir_cache, sizeof(*snir) + blob.size);
124    memcpy(snir->sha1_key, sha1_key, 20);
125    snir->size = blob.size;
126    memcpy(snir->data, blob.data, blob.size);
127 
128    blob_finish(&blob);
129 
130    cache->nir_stats.count++;
131    if (debug_cache) {
132       char sha1buf[41];
133       _mesa_sha1_format(sha1buf, snir->sha1_key);
134       mesa_logi("pipeline cache %p, new nir entry %s\n", cache, sha1buf);
135       if (dump_stats)
136          cache_dump_stats(cache);
137    }
138 
139    _mesa_hash_table_insert(cache->nir_cache, snir->sha1_key, snir);
140 
141    pipeline_cache_unlock(cache);
142 }
143 
144 nir_shader*
v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,const nir_shader_compiler_options * nir_options,unsigned char sha1_key[20])145 v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline,
146                                    struct v3dv_pipeline_cache *cache,
147                                    const nir_shader_compiler_options *nir_options,
148                                    unsigned char sha1_key[20])
149 {
150    if (!cache || !cache->nir_cache)
151       return NULL;
152 
153    if (debug_cache) {
154       char sha1buf[41];
155       _mesa_sha1_format(sha1buf, sha1_key);
156 
157       mesa_logi("pipeline cache %p, search for nir %s\n", cache, sha1buf);
158    }
159 
160    const struct serialized_nir *snir = NULL;
161 
162    pipeline_cache_lock(cache);
163    struct hash_entry *entry =
164       _mesa_hash_table_search(cache->nir_cache, sha1_key);
165    if (entry)
166       snir = entry->data;
167    pipeline_cache_unlock(cache);
168 
169    if (snir) {
170       struct blob_reader blob;
171       blob_reader_init(&blob, snir->data, snir->size);
172 
173       /* We use context NULL as we want the p_stage to keep the reference to
174        * nir, as we keep open the possibility of provide a shader variant
175        * after cache creation
176        */
177       nir_shader *nir = nir_deserialize(NULL, nir_options, &blob);
178       if (blob.overrun) {
179          ralloc_free(nir);
180       } else {
181          cache->nir_stats.hit++;
182          if (debug_cache) {
183             mesa_logi("[v3dv nir cache] hit: %p\n", nir);
184             if (dump_stats)
185                cache_dump_stats(cache);
186          }
187          return nir;
188       }
189    }
190 
191    cache->nir_stats.miss++;
192    if (debug_cache) {
193       mesa_logi("[v3dv nir cache] miss\n");
194       if (dump_stats)
195          cache_dump_stats(cache);
196    }
197 
198    return NULL;
199 }
200 
201 void
v3dv_pipeline_cache_init(struct v3dv_pipeline_cache * cache,struct v3dv_device * device,VkPipelineCacheCreateFlags flags,bool cache_enabled)202 v3dv_pipeline_cache_init(struct v3dv_pipeline_cache *cache,
203                          struct v3dv_device *device,
204                          VkPipelineCacheCreateFlags flags,
205                          bool cache_enabled)
206 {
207    cache->device = device;
208    mtx_init(&cache->mutex, mtx_plain);
209 
210    if (cache_enabled) {
211       cache->nir_cache = _mesa_hash_table_create(NULL, sha1_hash_func,
212                                                  sha1_compare_func);
213       cache->nir_stats.miss = 0;
214       cache->nir_stats.hit = 0;
215       cache->nir_stats.count = 0;
216 
217       cache->cache = _mesa_hash_table_create(NULL, sha1_hash_func,
218                                              sha1_compare_func);
219       cache->stats.miss = 0;
220       cache->stats.hit = 0;
221       cache->stats.count = 0;
222 
223       cache->externally_synchronized = flags &
224          VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT;
225    } else {
226       cache->nir_cache = NULL;
227       cache->cache = NULL;
228    }
229 
230 }
231 
232 static struct v3dv_pipeline_shared_data *
233 v3dv_pipeline_shared_data_create_from_blob(struct v3dv_pipeline_cache *cache,
234                                            struct blob_reader *blob);
235 
236 static void
237 pipeline_cache_upload_shared_data(struct v3dv_pipeline_cache *cache,
238                                   struct v3dv_pipeline_shared_data *shared_data,
239                                   bool from_disk_cache);
240 
241 static bool
242 v3dv_pipeline_shared_data_write_to_blob(const struct v3dv_pipeline_shared_data *cache_entry,
243                                         struct blob *blob);
244 
245 /**
246  * It searches for pipeline cached data, and returns a v3dv_pipeline_shared_data with
247  * it, or NULL if doesn't have it cached. On the former, it will increases the
248  * ref_count, so caller is responsible to unref it.
249  */
250 struct v3dv_pipeline_shared_data *
v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache * cache,unsigned char sha1_key[20],bool * cache_hit)251 v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache *cache,
252                                         unsigned char sha1_key[20],
253                                         bool *cache_hit)
254 {
255    if (!cache || !cache->cache)
256       return NULL;
257 
258    if (debug_cache) {
259       char sha1buf[41];
260       _mesa_sha1_format(sha1buf, sha1_key);
261 
262       mesa_logi("pipeline cache %p, search pipeline with key %s\n", cache, sha1buf);
263    }
264 
265    pipeline_cache_lock(cache);
266 
267    struct hash_entry *entry =
268       _mesa_hash_table_search(cache->cache, sha1_key);
269 
270    if (entry) {
271       struct v3dv_pipeline_shared_data *cache_entry =
272          (struct v3dv_pipeline_shared_data *) entry->data;
273       assert(cache_entry);
274 
275       cache->stats.hit++;
276       *cache_hit = true;
277       if (debug_cache) {
278          mesa_logi("[v3dv cache] hit: %p\n", cache_entry);
279          if (dump_stats)
280             cache_dump_stats(cache);
281       }
282 
283 
284       v3dv_pipeline_shared_data_ref(cache_entry);
285 
286       pipeline_cache_unlock(cache);
287 
288       return cache_entry;
289    }
290 
291    cache->stats.miss++;
292    if (debug_cache) {
293       mesa_logi("[v3dv cache] miss\n");
294       if (dump_stats)
295          cache_dump_stats(cache);
296    }
297 
298    pipeline_cache_unlock(cache);
299 
300 #ifdef ENABLE_SHADER_CACHE
301    struct v3dv_device *device = cache->device;
302    struct disk_cache *disk_cache = device->pdevice->disk_cache;
303    /* Note that the on-disk-cache can be independently disabled, while keeping
304     * the pipeline cache working, by using the environment variable
305     * MESA_SHADER_CACHE_DISABLE. In that case the calls to disk_cache_put/get
306     * will not do anything.
307     */
308    if (disk_cache && device->instance->pipeline_cache_enabled) {
309       cache_key cache_key;
310       disk_cache_compute_key(disk_cache, sha1_key, 20, cache_key);
311 
312       size_t buffer_size;
313       uint8_t *buffer = disk_cache_get(disk_cache, cache_key, &buffer_size);
314       if (V3D_DBG(CACHE)) {
315          char sha1buf[41];
316          _mesa_sha1_format(sha1buf, cache_key);
317          mesa_logi("[v3dv on-disk cache] %s %s\n",
318                    buffer ? "hit" : "miss", sha1buf);
319       }
320 
321       if (buffer) {
322          struct blob_reader blob;
323          struct v3dv_pipeline_shared_data *shared_data;
324 
325          blob_reader_init(&blob, buffer, buffer_size);
326          shared_data = v3dv_pipeline_shared_data_create_from_blob(cache, &blob);
327          free(buffer);
328 
329          if (shared_data) {
330             /* Technically we could increase on_disk_hit as soon as we have a
331              * buffer, but we are more interested on hits that got a valid
332              * shared_data
333              */
334             cache->stats.on_disk_hit++;
335             if (cache)
336                pipeline_cache_upload_shared_data(cache, shared_data, true);
337             return shared_data;
338          }
339       }
340    }
341 #endif
342 
343    return NULL;
344 }
345 
346 void
v3dv_pipeline_shared_data_destroy(struct v3dv_device * device,struct v3dv_pipeline_shared_data * shared_data)347 v3dv_pipeline_shared_data_destroy(struct v3dv_device *device,
348                                   struct v3dv_pipeline_shared_data *shared_data)
349 {
350    assert(shared_data->ref_cnt == 0);
351 
352    for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
353       if (shared_data->variants[stage] != NULL)
354          v3dv_shader_variant_destroy(device, shared_data->variants[stage]);
355 
356       /* We don't free binning descriptor maps as we are sharing them
357        * with the render shaders.
358        */
359       if (shared_data->maps[stage] != NULL &&
360           !broadcom_shader_stage_is_binning(stage)) {
361          vk_free(&device->vk.alloc, shared_data->maps[stage]);
362       }
363    }
364 
365    if (shared_data->assembly_bo)
366       v3dv_bo_free(device, shared_data->assembly_bo);
367 
368    vk_free(&device->vk.alloc, shared_data);
369 }
370 
371 static struct v3dv_pipeline_shared_data *
v3dv_pipeline_shared_data_new(struct v3dv_pipeline_cache * cache,const unsigned char sha1_key[20],struct v3dv_descriptor_maps ** maps,struct v3dv_shader_variant ** variants,const uint64_t * total_assembly,const uint32_t total_assembly_size)372 v3dv_pipeline_shared_data_new(struct v3dv_pipeline_cache *cache,
373                               const unsigned char sha1_key[20],
374                               struct v3dv_descriptor_maps **maps,
375                               struct v3dv_shader_variant **variants,
376                               const uint64_t *total_assembly,
377                               const uint32_t total_assembly_size)
378 {
379    size_t size = sizeof(struct v3dv_pipeline_shared_data);
380    /* We create new_entry using the device alloc. Right now shared_data is ref
381     * and unref by both the pipeline and the pipeline cache, so we can't
382     * ensure that the cache or pipeline alloc will be available on the last
383     * unref.
384     */
385    struct v3dv_pipeline_shared_data *new_entry =
386       vk_zalloc2(&cache->device->vk.alloc, NULL, size, 8,
387                  VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
388 
389    if (new_entry == NULL)
390       return NULL;
391 
392    new_entry->ref_cnt = 1;
393    memcpy(new_entry->sha1_key, sha1_key, 20);
394 
395    for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
396       new_entry->maps[stage] = maps[stage];
397       new_entry->variants[stage] = variants[stage];
398    }
399 
400    struct v3dv_bo *bo = v3dv_bo_alloc(cache->device, total_assembly_size,
401                                       "pipeline shader assembly", true);
402    if (!bo) {
403       mesa_loge("failed to allocate memory for shaders assembly\n");
404       goto fail;
405    }
406 
407    bool ok = v3dv_bo_map(cache->device, bo, total_assembly_size);
408    if (!ok) {
409       mesa_loge("failed to map source shader buffer\n");
410       goto fail;
411    }
412 
413    memcpy(bo->map, total_assembly, total_assembly_size);
414 
415    new_entry->assembly_bo = bo;
416 
417    return new_entry;
418 
419 fail:
420    v3dv_pipeline_shared_data_unref(cache->device, new_entry);
421    return NULL;
422 }
423 
424 static void
pipeline_cache_upload_shared_data(struct v3dv_pipeline_cache * cache,struct v3dv_pipeline_shared_data * shared_data,bool from_disk_cache)425 pipeline_cache_upload_shared_data(struct v3dv_pipeline_cache *cache,
426                                   struct v3dv_pipeline_shared_data *shared_data,
427                                   bool from_disk_cache)
428 {
429    assert(shared_data);
430 
431    if (!cache || !cache->cache)
432       return;
433 
434    if (cache->stats.count > V3DV_MAX_PIPELINE_CACHE_ENTRIES)
435       return;
436 
437    pipeline_cache_lock(cache);
438    struct hash_entry *entry = NULL;
439 
440    /* If this is being called from the disk cache, we already know that the
441     * entry is not on the hash table
442     */
443    if (!from_disk_cache)
444       entry = _mesa_hash_table_search(cache->cache, shared_data->sha1_key);
445 
446    if (entry) {
447       pipeline_cache_unlock(cache);
448       return;
449    }
450 
451    v3dv_pipeline_shared_data_ref(shared_data);
452    _mesa_hash_table_insert(cache->cache, shared_data->sha1_key, shared_data);
453    cache->stats.count++;
454    if (debug_cache) {
455       char sha1buf[41];
456       _mesa_sha1_format(sha1buf, shared_data->sha1_key);
457 
458       mesa_logi("pipeline cache %p, new cache entry with sha1 key %s:%p\n\n",
459                 cache, sha1buf, shared_data);
460       if (dump_stats)
461          cache_dump_stats(cache);
462    }
463 
464    pipeline_cache_unlock(cache);
465 
466 #ifdef ENABLE_SHADER_CACHE
467    /* If we are being called from a on-disk-cache hit, we can skip writing to
468     * the disk cache
469     */
470    if (from_disk_cache)
471       return;
472 
473    struct v3dv_device *device = cache->device;
474    struct disk_cache *disk_cache = device->pdevice->disk_cache;
475    if (disk_cache) {
476       struct blob binary;
477       blob_init(&binary);
478       if (v3dv_pipeline_shared_data_write_to_blob(shared_data, &binary)) {
479          cache_key cache_key;
480          disk_cache_compute_key(disk_cache, shared_data->sha1_key, 20, cache_key);
481 
482          if (V3D_DBG(CACHE)) {
483             char sha1buf[41];
484             _mesa_sha1_format(sha1buf, shared_data->sha1_key);
485             mesa_logi("[v3dv on-disk cache] storing %s\n", sha1buf);
486          }
487          disk_cache_put(disk_cache, cache_key, binary.data, binary.size, NULL);
488       }
489 
490       blob_finish(&binary);
491    }
492 #endif
493 }
494 
495 /* Uploads all the "cacheable" or shared data from the pipeline */
496 void
v3dv_pipeline_cache_upload_pipeline(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache)497 v3dv_pipeline_cache_upload_pipeline(struct v3dv_pipeline *pipeline,
498                                     struct v3dv_pipeline_cache *cache)
499 {
500    pipeline_cache_upload_shared_data(cache, pipeline->shared_data, false);
501 }
502 
503 static struct serialized_nir*
serialized_nir_create_from_blob(struct v3dv_pipeline_cache * cache,struct blob_reader * blob)504 serialized_nir_create_from_blob(struct v3dv_pipeline_cache *cache,
505                                 struct blob_reader *blob)
506 {
507    const unsigned char *sha1_key = blob_read_bytes(blob, 20);
508    uint32_t snir_size = blob_read_uint32(blob);
509    const char* snir_data = blob_read_bytes(blob, snir_size);
510    if (blob->overrun)
511       return NULL;
512 
513    struct serialized_nir *snir =
514       ralloc_size(cache->nir_cache, sizeof(*snir) + snir_size);
515    memcpy(snir->sha1_key, sha1_key, 20);
516    snir->size = snir_size;
517    memcpy(snir->data, snir_data, snir_size);
518 
519    return snir;
520 }
521 
522 static struct v3dv_shader_variant*
shader_variant_create_from_blob(struct v3dv_device * device,struct blob_reader * blob)523 shader_variant_create_from_blob(struct v3dv_device *device,
524                                 struct blob_reader *blob)
525 {
526    VkResult result;
527 
528    enum broadcom_shader_stage stage = blob_read_uint32(blob);
529 
530    uint32_t prog_data_size = blob_read_uint32(blob);
531    /* FIXME: as we include the stage perhaps we can avoid prog_data_size? */
532    assert(prog_data_size == v3d_prog_data_size(broadcom_shader_stage_to_gl(stage)));
533 
534    const void *prog_data = blob_read_bytes(blob, prog_data_size);
535    if (blob->overrun)
536       return NULL;
537 
538    uint32_t ulist_count = blob_read_uint32(blob);
539    uint32_t contents_size = sizeof(enum quniform_contents) * ulist_count;
540    const void *contents_data = blob_read_bytes(blob, contents_size);
541    if (blob->overrun)
542       return NULL;
543 
544    size_t ulist_data_size = sizeof(uint32_t) * ulist_count;
545    const void *ulist_data_data = blob_read_bytes(blob, ulist_data_size);
546    if (blob->overrun)
547       return NULL;
548 
549    uint32_t assembly_offset = blob_read_uint32(blob);
550    uint32_t qpu_insts_size = blob_read_uint32(blob);
551 
552    /* shader_variant_create expects a newly created prog_data for their own,
553     * as it is what the v3d compiler returns. So we are also allocating one
554     * (including the uniform list) and filled it up with the data that we read
555     * from the blob
556     */
557    struct v3d_prog_data *new_prog_data = rzalloc_size(NULL, prog_data_size);
558    memcpy(new_prog_data, prog_data, prog_data_size);
559    struct v3d_uniform_list *ulist = &new_prog_data->uniforms;
560    ulist->count = ulist_count;
561    ulist->contents = ralloc_array(new_prog_data, enum quniform_contents, ulist->count);
562    memcpy(ulist->contents, contents_data, contents_size);
563    ulist->data = ralloc_array(new_prog_data, uint32_t, ulist->count);
564    memcpy(ulist->data, ulist_data_data, ulist_data_size);
565 
566    return v3dv_shader_variant_create(device, stage,
567                                      new_prog_data, prog_data_size,
568                                      assembly_offset,
569                                      NULL, qpu_insts_size,
570                                      &result);
571 }
572 
573 static struct v3dv_pipeline_shared_data *
v3dv_pipeline_shared_data_create_from_blob(struct v3dv_pipeline_cache * cache,struct blob_reader * blob)574 v3dv_pipeline_shared_data_create_from_blob(struct v3dv_pipeline_cache *cache,
575                                            struct blob_reader *blob)
576 {
577    const unsigned char *sha1_key = blob_read_bytes(blob, 20);
578 
579    struct v3dv_descriptor_maps *maps[BROADCOM_SHADER_STAGES] = { 0 };
580    struct v3dv_shader_variant *variants[BROADCOM_SHADER_STAGES] = { 0 };
581 
582    uint8_t descriptor_maps_count = blob_read_uint8(blob);
583    for (uint8_t count = 0; count < descriptor_maps_count; count++) {
584       uint8_t stage = blob_read_uint8(blob);
585 
586       const struct v3dv_descriptor_maps *current_maps =
587          blob_read_bytes(blob, sizeof(struct v3dv_descriptor_maps));
588 
589       if (blob->overrun)
590          goto fail;
591 
592       maps[stage] = vk_zalloc2(&cache->device->vk.alloc, NULL,
593                                sizeof(struct v3dv_descriptor_maps), 8,
594                                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
595 
596       if (maps[stage] == NULL)
597          goto fail;
598 
599       memcpy(maps[stage], current_maps, sizeof(struct v3dv_descriptor_maps));
600       if (broadcom_shader_stage_is_render_with_binning(stage)) {
601          enum broadcom_shader_stage bin_stage =
602             broadcom_binning_shader_stage_for_render_stage(stage);
603             maps[bin_stage] = maps[stage];
604       }
605    }
606 
607    uint8_t variant_count = blob_read_uint8(blob);
608 
609    for (uint8_t count = 0; count < variant_count; count++) {
610       uint8_t stage = blob_read_uint8(blob);
611       struct v3dv_shader_variant *variant =
612          shader_variant_create_from_blob(cache->device, blob);
613       variants[stage] = variant;
614    }
615 
616    uint32_t total_assembly_size = blob_read_uint32(blob);
617    const uint64_t *total_assembly =
618       blob_read_bytes(blob, total_assembly_size);
619 
620    if (blob->overrun)
621       goto fail;
622 
623    struct v3dv_pipeline_shared_data *data =
624       v3dv_pipeline_shared_data_new(cache, sha1_key, maps, variants,
625                                     total_assembly, total_assembly_size);
626 
627    if (!data)
628       goto fail;
629 
630    return data;
631 
632 fail:
633    for (int i = 0; i < BROADCOM_SHADER_STAGES; i++) {
634       if (maps[i])
635          vk_free2(&cache->device->vk.alloc, NULL, maps[i]);
636       if (variants[i])
637          v3dv_shader_variant_destroy(cache->device, variants[i]);
638    }
639    return NULL;
640 }
641 
642 static void
pipeline_cache_load(struct v3dv_pipeline_cache * cache,size_t size,const void * data)643 pipeline_cache_load(struct v3dv_pipeline_cache *cache,
644                     size_t size,
645                     const void *data)
646 {
647    struct v3dv_device *device = cache->device;
648    struct v3dv_physical_device *pdevice = device->pdevice;
649    struct vk_pipeline_cache_header header;
650 
651    if (cache->cache == NULL || cache->nir_cache == NULL)
652       return;
653 
654    struct blob_reader blob;
655    blob_reader_init(&blob, data, size);
656 
657    blob_copy_bytes(&blob, &header, sizeof(header));
658    if (size < sizeof(header))
659       return;
660    memcpy(&header, data, sizeof(header));
661    if (header.header_size < sizeof(header))
662       return;
663    if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
664       return;
665    if (header.vendor_id != v3dv_physical_device_vendor_id(pdevice))
666       return;
667    if (header.device_id != v3dv_physical_device_device_id(pdevice))
668       return;
669    if (memcmp(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE) != 0)
670       return;
671 
672    uint32_t nir_count = blob_read_uint32(&blob);
673    if (blob.overrun)
674       return;
675 
676    for (uint32_t i = 0; i < nir_count; i++) {
677       struct serialized_nir *snir =
678          serialized_nir_create_from_blob(cache, &blob);
679 
680       if (!snir)
681          break;
682 
683       _mesa_hash_table_insert(cache->nir_cache, snir->sha1_key, snir);
684       cache->nir_stats.count++;
685    }
686 
687    uint32_t count = blob_read_uint32(&blob);
688    if (blob.overrun)
689       return;
690 
691    for (uint32_t i = 0; i < count; i++) {
692       struct v3dv_pipeline_shared_data *cache_entry =
693          v3dv_pipeline_shared_data_create_from_blob(cache, &blob);
694       if (!cache_entry)
695          break;
696 
697       _mesa_hash_table_insert(cache->cache, cache_entry->sha1_key, cache_entry);
698       cache->stats.count++;
699    }
700 
701    if (debug_cache) {
702       mesa_logi("pipeline cache %p, loaded %i nir shaders and "
703                 "%i entries\n", cache, nir_count, count);
704       if (dump_stats)
705          cache_dump_stats(cache);
706    }
707 }
708 
709 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreatePipelineCache(VkDevice _device,const VkPipelineCacheCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipelineCache * pPipelineCache)710 v3dv_CreatePipelineCache(VkDevice _device,
711                          const VkPipelineCacheCreateInfo *pCreateInfo,
712                          const VkAllocationCallbacks *pAllocator,
713                          VkPipelineCache *pPipelineCache)
714 {
715    V3DV_FROM_HANDLE(v3dv_device, device, _device);
716    struct v3dv_pipeline_cache *cache;
717 
718    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
719 
720    cache = vk_object_zalloc(&device->vk, pAllocator,
721                             sizeof(*cache),
722                             VK_OBJECT_TYPE_PIPELINE_CACHE);
723 
724    if (cache == NULL)
725       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
726 
727    v3dv_pipeline_cache_init(cache, device, pCreateInfo->flags,
728                             device->instance->pipeline_cache_enabled);
729 
730    if (pCreateInfo->initialDataSize > 0) {
731       pipeline_cache_load(cache,
732                           pCreateInfo->initialDataSize,
733                           pCreateInfo->pInitialData);
734    }
735 
736    *pPipelineCache = v3dv_pipeline_cache_to_handle(cache);
737 
738    return VK_SUCCESS;
739 }
740 
741 void
v3dv_pipeline_cache_finish(struct v3dv_pipeline_cache * cache)742 v3dv_pipeline_cache_finish(struct v3dv_pipeline_cache *cache)
743 {
744    mtx_destroy(&cache->mutex);
745 
746    if (dump_stats_on_destroy)
747       cache_dump_stats(cache);
748 
749    if (cache->nir_cache) {
750       hash_table_foreach(cache->nir_cache, entry)
751          ralloc_free(entry->data);
752 
753       _mesa_hash_table_destroy(cache->nir_cache, NULL);
754    }
755 
756    if (cache->cache) {
757       hash_table_foreach(cache->cache, entry) {
758          struct v3dv_pipeline_shared_data *cache_entry = entry->data;
759          if (cache_entry)
760             v3dv_pipeline_shared_data_unref(cache->device, cache_entry);
761       }
762 
763       _mesa_hash_table_destroy(cache->cache, NULL);
764    }
765 }
766 
767 VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyPipelineCache(VkDevice _device,VkPipelineCache _cache,const VkAllocationCallbacks * pAllocator)768 v3dv_DestroyPipelineCache(VkDevice _device,
769                           VkPipelineCache _cache,
770                           const VkAllocationCallbacks *pAllocator)
771 {
772    V3DV_FROM_HANDLE(v3dv_device, device, _device);
773    V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
774 
775    if (!cache)
776       return;
777 
778    v3dv_pipeline_cache_finish(cache);
779 
780    vk_object_free(&device->vk, pAllocator, cache);
781 }
782 
783 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_MergePipelineCaches(VkDevice device,VkPipelineCache dstCache,uint32_t srcCacheCount,const VkPipelineCache * pSrcCaches)784 v3dv_MergePipelineCaches(VkDevice device,
785                          VkPipelineCache dstCache,
786                          uint32_t srcCacheCount,
787                          const VkPipelineCache *pSrcCaches)
788 {
789    V3DV_FROM_HANDLE(v3dv_pipeline_cache, dst, dstCache);
790 
791    if (!dst->cache || !dst->nir_cache)
792       return VK_SUCCESS;
793 
794    for (uint32_t i = 0; i < srcCacheCount; i++) {
795       V3DV_FROM_HANDLE(v3dv_pipeline_cache, src, pSrcCaches[i]);
796       if (!src->cache || !src->nir_cache)
797          continue;
798 
799       hash_table_foreach(src->nir_cache, entry) {
800          struct serialized_nir *src_snir = entry->data;
801          assert(src_snir);
802 
803          if (_mesa_hash_table_search(dst->nir_cache, src_snir->sha1_key))
804             continue;
805 
806          /* FIXME: we are using serialized nir shaders because they are
807           * convenient to create and store on the cache, but requires to do a
808           * copy here (and some other places) of the serialized NIR. Perhaps
809           * it would make sense to move to handle the NIR shaders with shared
810           * structures with ref counts, as the variants.
811           */
812          struct serialized_nir *snir_dst =
813             ralloc_size(dst->nir_cache, sizeof(*snir_dst) + src_snir->size);
814          memcpy(snir_dst->sha1_key, src_snir->sha1_key, 20);
815          snir_dst->size = src_snir->size;
816          memcpy(snir_dst->data, src_snir->data, src_snir->size);
817 
818          _mesa_hash_table_insert(dst->nir_cache, snir_dst->sha1_key, snir_dst);
819          dst->nir_stats.count++;
820          if (debug_cache) {
821             char sha1buf[41];
822             _mesa_sha1_format(sha1buf, snir_dst->sha1_key);
823 
824             mesa_logi("pipeline cache %p, added nir entry %s "
825                       "from pipeline cache %p\n",
826                     dst, sha1buf, src);
827             if (dump_stats)
828                cache_dump_stats(dst);
829          }
830       }
831 
832       hash_table_foreach(src->cache, entry) {
833          struct v3dv_pipeline_shared_data *cache_entry = entry->data;
834          assert(cache_entry);
835 
836          if (_mesa_hash_table_search(dst->cache, cache_entry->sha1_key))
837             continue;
838 
839          v3dv_pipeline_shared_data_ref(cache_entry);
840          _mesa_hash_table_insert(dst->cache, cache_entry->sha1_key, cache_entry);
841 
842          dst->stats.count++;
843          if (debug_cache) {
844             char sha1buf[41];
845             _mesa_sha1_format(sha1buf, cache_entry->sha1_key);
846 
847             mesa_logi("pipeline cache %p, added entry %s "
848                       "from pipeline cache %p\n",
849                     dst, sha1buf, src);
850             if (dump_stats)
851                cache_dump_stats(dst);
852          }
853       }
854    }
855 
856    return VK_SUCCESS;
857 }
858 
859 static bool
shader_variant_write_to_blob(const struct v3dv_shader_variant * variant,struct blob * blob)860 shader_variant_write_to_blob(const struct v3dv_shader_variant *variant,
861                              struct blob *blob)
862 {
863    blob_write_uint32(blob, variant->stage);
864 
865    blob_write_uint32(blob, variant->prog_data_size);
866    blob_write_bytes(blob, variant->prog_data.base, variant->prog_data_size);
867 
868    struct v3d_uniform_list *ulist = &variant->prog_data.base->uniforms;
869    blob_write_uint32(blob, ulist->count);
870    blob_write_bytes(blob, ulist->contents, sizeof(enum quniform_contents) * ulist->count);
871    blob_write_bytes(blob, ulist->data, sizeof(uint32_t) * ulist->count);
872 
873    blob_write_uint32(blob, variant->assembly_offset);
874    blob_write_uint32(blob, variant->qpu_insts_size);
875 
876    return !blob->out_of_memory;
877 }
878 
879 static bool
v3dv_pipeline_shared_data_write_to_blob(const struct v3dv_pipeline_shared_data * cache_entry,struct blob * blob)880 v3dv_pipeline_shared_data_write_to_blob(const struct v3dv_pipeline_shared_data *cache_entry,
881                                         struct blob *blob)
882 {
883    blob_write_bytes(blob, cache_entry->sha1_key, 20);
884 
885    uint8_t descriptor_maps_count = 0;
886    for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
887       if (broadcom_shader_stage_is_binning(stage))
888          continue;
889       if (cache_entry->maps[stage] == NULL)
890          continue;
891       descriptor_maps_count++;
892    }
893 
894    /* Compute pipelines only have one descriptor map,
895     * graphics pipelines may have 2 (VS+FS) or 3 (VS+GS+FS), since the binning
896     * stages take the descriptor map from the render stage.
897     */
898    assert((descriptor_maps_count >= 2 && descriptor_maps_count <= 3) ||
899           (descriptor_maps_count == 1 && cache_entry->variants[BROADCOM_SHADER_COMPUTE]));
900    blob_write_uint8(blob, descriptor_maps_count);
901 
902    for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
903       if (cache_entry->maps[stage] == NULL)
904          continue;
905       if (broadcom_shader_stage_is_binning(stage))
906          continue;
907 
908       blob_write_uint8(blob, stage);
909       blob_write_bytes(blob, cache_entry->maps[stage],
910                        sizeof(struct v3dv_descriptor_maps));
911    }
912 
913    uint8_t variant_count = 0;
914    for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
915       if (cache_entry->variants[stage] == NULL)
916          continue;
917       variant_count++;
918    }
919 
920    /* Graphics pipelines with VS+FS have 3 variants, VS+GS+FS will have 5 and
921     * compute pipelines only have 1.
922     */
923    assert((variant_count == 5  || variant_count == 3) ||
924           (variant_count == 1 && cache_entry->variants[BROADCOM_SHADER_COMPUTE]));
925    blob_write_uint8(blob, variant_count);
926 
927    uint32_t total_assembly_size = 0;
928    for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
929       if (cache_entry->variants[stage] == NULL)
930          continue;
931 
932       blob_write_uint8(blob, stage);
933       if (!shader_variant_write_to_blob(cache_entry->variants[stage], blob))
934          return false;
935 
936       total_assembly_size += cache_entry->variants[stage]->qpu_insts_size;
937    }
938    blob_write_uint32(blob, total_assembly_size);
939 
940    assert(cache_entry->assembly_bo->map);
941    assert(cache_entry->assembly_bo->size >= total_assembly_size);
942    blob_write_bytes(blob, cache_entry->assembly_bo->map, total_assembly_size);
943 
944    return !blob->out_of_memory;
945 }
946 
947 
948 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_GetPipelineCacheData(VkDevice _device,VkPipelineCache _cache,size_t * pDataSize,void * pData)949 v3dv_GetPipelineCacheData(VkDevice _device,
950                           VkPipelineCache _cache,
951                           size_t *pDataSize,
952                           void *pData)
953 {
954    V3DV_FROM_HANDLE(v3dv_device, device, _device);
955    V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
956 
957    struct blob blob;
958    if (pData) {
959       blob_init_fixed(&blob, pData, *pDataSize);
960    } else {
961       blob_init_fixed(&blob, NULL, SIZE_MAX);
962    }
963 
964    struct v3dv_physical_device *pdevice = device->pdevice;
965    VkResult result = VK_INCOMPLETE;
966 
967    pipeline_cache_lock(cache);
968 
969    struct vk_pipeline_cache_header header = {
970       .header_size = sizeof(struct vk_pipeline_cache_header),
971       .header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE,
972       .vendor_id = v3dv_physical_device_vendor_id(pdevice),
973       .device_id = v3dv_physical_device_device_id(pdevice),
974    };
975    memcpy(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE);
976    blob_write_bytes(&blob, &header, sizeof(header));
977 
978    uint32_t nir_count = 0;
979    intptr_t nir_count_offset = blob_reserve_uint32(&blob);
980    if (nir_count_offset < 0) {
981       *pDataSize = 0;
982       goto done;
983    }
984 
985    if (cache->nir_cache) {
986       hash_table_foreach(cache->nir_cache, entry) {
987          const struct serialized_nir *snir = entry->data;
988 
989          size_t save_size = blob.size;
990 
991          blob_write_bytes(&blob, snir->sha1_key, 20);
992          blob_write_uint32(&blob, snir->size);
993          blob_write_bytes(&blob, snir->data, snir->size);
994 
995          if (blob.out_of_memory) {
996             blob.size = save_size;
997             goto done;
998          }
999 
1000          nir_count++;
1001       }
1002    }
1003    blob_overwrite_uint32(&blob, nir_count_offset, nir_count);
1004 
1005    uint32_t count = 0;
1006    intptr_t count_offset = blob_reserve_uint32(&blob);
1007    if (count_offset < 0) {
1008       *pDataSize = 0;
1009       goto done;
1010    }
1011 
1012    if (cache->cache) {
1013       hash_table_foreach(cache->cache, entry) {
1014          struct v3dv_pipeline_shared_data *cache_entry = entry->data;
1015 
1016          size_t save_size = blob.size;
1017          if (!v3dv_pipeline_shared_data_write_to_blob(cache_entry, &blob)) {
1018             /* If it fails reset to the previous size and bail */
1019             blob.size = save_size;
1020             goto done;
1021          }
1022 
1023          count++;
1024       }
1025    }
1026 
1027    blob_overwrite_uint32(&blob, count_offset, count);
1028 
1029    *pDataSize = blob.size;
1030 
1031    result = VK_SUCCESS;
1032 
1033    if (debug_cache) {
1034       assert(count <= cache->stats.count);
1035       mesa_logi("GetPipelineCacheData: serializing cache %p, "
1036                 "%i nir shader entries "
1037                 "%i entries, %u DataSize\n",
1038                 cache, nir_count, count, (uint32_t) *pDataSize);
1039    }
1040 
1041  done:
1042    blob_finish(&blob);
1043 
1044    pipeline_cache_unlock(cache);
1045 
1046    return result;
1047 }
1048