1 /*
2 * Copyright © 2019 Raspberry Pi
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "v3dv_private.h"
25 #include "vulkan/util/vk_util.h"
26 #include "util/blob.h"
27 #include "nir/nir_serialize.h"
28
29 static const bool debug_cache = false;
30 static const bool dump_stats = false;
31 static const bool dump_stats_on_destroy = false;
32
33 /* Shared for nir/variants */
34 #define V3DV_MAX_PIPELINE_CACHE_ENTRIES 4096
35
36 static uint32_t
sha1_hash_func(const void * sha1)37 sha1_hash_func(const void *sha1)
38 {
39 return _mesa_hash_data(sha1, 20);
40 }
41
42 static bool
sha1_compare_func(const void * sha1_a,const void * sha1_b)43 sha1_compare_func(const void *sha1_a, const void *sha1_b)
44 {
45 return memcmp(sha1_a, sha1_b, 20) == 0;
46 }
47
48 struct serialized_nir {
49 unsigned char sha1_key[20];
50 size_t size;
51 char data[0];
52 };
53
54 static void
cache_dump_stats(struct v3dv_pipeline_cache * cache)55 cache_dump_stats(struct v3dv_pipeline_cache *cache)
56 {
57 fprintf(stderr, " NIR cache entries: %d\n", cache->nir_stats.count);
58 fprintf(stderr, " NIR cache miss count: %d\n", cache->nir_stats.miss);
59 fprintf(stderr, " NIR cache hit count: %d\n", cache->nir_stats.hit);
60
61 fprintf(stderr, " cache entries: %d\n", cache->stats.count);
62 fprintf(stderr, " cache miss count: %d\n", cache->stats.miss);
63 fprintf(stderr, " cache hit count: %d\n", cache->stats.hit);
64 }
65
66 static void
pipeline_cache_lock(struct v3dv_pipeline_cache * cache)67 pipeline_cache_lock(struct v3dv_pipeline_cache *cache)
68 {
69 if (!cache->externally_synchronized)
70 pthread_mutex_lock(&cache->mutex);
71 }
72
73 static void
pipeline_cache_unlock(struct v3dv_pipeline_cache * cache)74 pipeline_cache_unlock(struct v3dv_pipeline_cache *cache)
75 {
76 if (!cache->externally_synchronized)
77 pthread_mutex_unlock(&cache->mutex);
78 }
79
80 void
v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,nir_shader * nir,unsigned char sha1_key[20])81 v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline,
82 struct v3dv_pipeline_cache *cache,
83 nir_shader *nir,
84 unsigned char sha1_key[20])
85 {
86 if (!cache || !cache->nir_cache)
87 return;
88
89 if (cache->nir_stats.count > V3DV_MAX_PIPELINE_CACHE_ENTRIES)
90 return;
91
92 pipeline_cache_lock(cache);
93 struct hash_entry *entry =
94 _mesa_hash_table_search(cache->nir_cache, sha1_key);
95 pipeline_cache_unlock(cache);
96 if (entry)
97 return;
98
99 struct blob blob;
100 blob_init(&blob);
101
102 nir_serialize(&blob, nir, false);
103 if (blob.out_of_memory) {
104 blob_finish(&blob);
105 return;
106 }
107
108 pipeline_cache_lock(cache);
109 /* Because ralloc isn't thread-safe, we have to do all this inside the
110 * lock. We could unlock for the big memcpy but it's probably not worth
111 * the hassle.
112 */
113 entry = _mesa_hash_table_search(cache->nir_cache, sha1_key);
114 if (entry) {
115 blob_finish(&blob);
116 pipeline_cache_unlock(cache);
117 return;
118 }
119
120 struct serialized_nir *snir =
121 ralloc_size(cache->nir_cache, sizeof(*snir) + blob.size);
122 memcpy(snir->sha1_key, sha1_key, 20);
123 snir->size = blob.size;
124 memcpy(snir->data, blob.data, blob.size);
125
126 blob_finish(&blob);
127
128 cache->nir_stats.count++;
129 if (debug_cache) {
130 char sha1buf[41];
131 _mesa_sha1_format(sha1buf, snir->sha1_key);
132 fprintf(stderr, "pipeline cache %p, new nir entry %s\n", cache, sha1buf);
133 if (dump_stats)
134 cache_dump_stats(cache);
135 }
136
137 _mesa_hash_table_insert(cache->nir_cache, snir->sha1_key, snir);
138
139 pipeline_cache_unlock(cache);
140 }
141
142 nir_shader*
v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,const nir_shader_compiler_options * nir_options,unsigned char sha1_key[20])143 v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline,
144 struct v3dv_pipeline_cache *cache,
145 const nir_shader_compiler_options *nir_options,
146 unsigned char sha1_key[20])
147 {
148 if (!cache || !cache->nir_cache)
149 return NULL;
150
151 if (debug_cache) {
152 char sha1buf[41];
153 _mesa_sha1_format(sha1buf, sha1_key);
154
155 fprintf(stderr, "pipeline cache %p, search for nir %s\n", cache, sha1buf);
156 }
157
158 const struct serialized_nir *snir = NULL;
159
160 pipeline_cache_lock(cache);
161 struct hash_entry *entry =
162 _mesa_hash_table_search(cache->nir_cache, sha1_key);
163 if (entry)
164 snir = entry->data;
165 pipeline_cache_unlock(cache);
166
167 if (snir) {
168 struct blob_reader blob;
169 blob_reader_init(&blob, snir->data, snir->size);
170
171 /* We use context NULL as we want the p_stage to keep the reference to
172 * nir, as we keep open the possibility of provide a shader variant
173 * after cache creation
174 */
175 nir_shader *nir = nir_deserialize(NULL, nir_options, &blob);
176 if (blob.overrun) {
177 ralloc_free(nir);
178 } else {
179 cache->nir_stats.hit++;
180 if (debug_cache) {
181 fprintf(stderr, "\tnir cache hit: %p\n", nir);
182 if (dump_stats)
183 cache_dump_stats(cache);
184 }
185 return nir;
186 }
187 }
188
189 cache->nir_stats.miss++;
190 if (debug_cache) {
191 fprintf(stderr, "\tnir cache miss\n");
192 if (dump_stats)
193 cache_dump_stats(cache);
194 }
195
196 return NULL;
197 }
198
199 void
v3dv_pipeline_cache_init(struct v3dv_pipeline_cache * cache,struct v3dv_device * device,VkPipelineCacheCreateFlags flags,bool cache_enabled)200 v3dv_pipeline_cache_init(struct v3dv_pipeline_cache *cache,
201 struct v3dv_device *device,
202 VkPipelineCacheCreateFlags flags,
203 bool cache_enabled)
204 {
205 cache->device = device;
206 pthread_mutex_init(&cache->mutex, NULL);
207
208 if (cache_enabled) {
209 cache->nir_cache = _mesa_hash_table_create(NULL, sha1_hash_func,
210 sha1_compare_func);
211 cache->nir_stats.miss = 0;
212 cache->nir_stats.hit = 0;
213 cache->nir_stats.count = 0;
214
215 cache->cache = _mesa_hash_table_create(NULL, sha1_hash_func,
216 sha1_compare_func);
217 cache->stats.miss = 0;
218 cache->stats.hit = 0;
219 cache->stats.count = 0;
220
221 cache->externally_synchronized = flags &
222 VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT_EXT;
223 } else {
224 cache->nir_cache = NULL;
225 cache->cache = NULL;
226 }
227
228 }
229
230 static struct v3dv_pipeline_shared_data *
231 v3dv_pipeline_shared_data_create_from_blob(struct v3dv_pipeline_cache *cache,
232 struct blob_reader *blob);
233
234 static void
235 pipeline_cache_upload_shared_data(struct v3dv_pipeline_cache *cache,
236 struct v3dv_pipeline_shared_data *shared_data,
237 bool from_disk_cache);
238
239 static bool
240 v3dv_pipeline_shared_data_write_to_blob(const struct v3dv_pipeline_shared_data *cache_entry,
241 struct blob *blob);
242
243 /**
244 * It searchs for pipeline cached data, and returns a v3dv_pipeline_shared_data with
245 * it, or NULL if doesn't have it cached. On the former, it will increases the
246 * ref_count, so caller is responsible to unref it.
247 */
248 struct v3dv_pipeline_shared_data *
v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache * cache,unsigned char sha1_key[20],bool * cache_hit)249 v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache *cache,
250 unsigned char sha1_key[20],
251 bool *cache_hit)
252 {
253 if (!cache || !cache->cache)
254 return NULL;
255
256 if (debug_cache) {
257 char sha1buf[41];
258 _mesa_sha1_format(sha1buf, sha1_key);
259
260 fprintf(stderr, "pipeline cache %p, search pipeline with key %s\n", cache, sha1buf);
261 }
262
263 pipeline_cache_lock(cache);
264
265 struct hash_entry *entry =
266 _mesa_hash_table_search(cache->cache, sha1_key);
267
268 if (entry) {
269 struct v3dv_pipeline_shared_data *cache_entry =
270 (struct v3dv_pipeline_shared_data *) entry->data;
271 assert(cache_entry);
272
273 cache->stats.hit++;
274 *cache_hit = true;
275 if (debug_cache) {
276 fprintf(stderr, "\tcache hit: %p\n", cache_entry);
277 if (dump_stats)
278 cache_dump_stats(cache);
279 }
280
281
282 v3dv_pipeline_shared_data_ref(cache_entry);
283
284 pipeline_cache_unlock(cache);
285
286 return cache_entry;
287 }
288
289 cache->stats.miss++;
290 if (debug_cache) {
291 fprintf(stderr, "\tcache miss\n");
292 if (dump_stats)
293 cache_dump_stats(cache);
294 }
295
296 pipeline_cache_unlock(cache);
297
298 #ifdef ENABLE_SHADER_CACHE
299 struct v3dv_device *device = cache->device;
300 struct disk_cache *disk_cache = device->pdevice->disk_cache;
301 /* Note that the on-disk-cache can be independently disabled, while keeping
302 * the pipeline cache working, by using the environment variable
303 * MESA_GLSL_CACHE_DISABLE. In that case the calls to disk_cache_put/get
304 * will not do anything.
305 */
306 if (disk_cache && device->instance->pipeline_cache_enabled) {
307 cache_key cache_key;
308 disk_cache_compute_key(disk_cache, sha1_key, 20, cache_key);
309
310 size_t buffer_size;
311 uint8_t *buffer = disk_cache_get(disk_cache, cache_key, &buffer_size);
312 if (buffer) {
313 struct blob_reader blob;
314 struct v3dv_pipeline_shared_data *shared_data;
315
316 if (debug_cache)
317 fprintf(stderr, "\ton-disk-cache hit\n");
318
319 blob_reader_init(&blob, buffer, buffer_size);
320 shared_data = v3dv_pipeline_shared_data_create_from_blob(cache, &blob);
321 free(buffer);
322
323 if (shared_data) {
324 if (cache)
325 pipeline_cache_upload_shared_data(cache, shared_data, true);
326 return shared_data;
327 }
328 } else {
329 if (debug_cache)
330 fprintf(stderr, "\ton-disk-cache miss\n");
331 }
332 }
333 #endif
334
335 return NULL;
336 }
337
338 void
v3dv_pipeline_shared_data_destroy(struct v3dv_device * device,struct v3dv_pipeline_shared_data * shared_data)339 v3dv_pipeline_shared_data_destroy(struct v3dv_device *device,
340 struct v3dv_pipeline_shared_data *shared_data)
341 {
342 assert(shared_data->ref_cnt == 0);
343
344 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
345 if (shared_data->variants[stage] != NULL)
346 v3dv_shader_variant_destroy(device, shared_data->variants[stage]);
347
348 /* We don't free binning descriptor maps as we are sharing them
349 * with the render shaders.
350 */
351 if (shared_data->maps[stage] != NULL &&
352 !broadcom_shader_stage_is_binning(stage)) {
353 vk_free(&device->vk.alloc, shared_data->maps[stage]);
354 }
355 }
356
357 if (shared_data->assembly_bo)
358 v3dv_bo_free(device, shared_data->assembly_bo);
359
360 vk_free(&device->vk.alloc, shared_data);
361 }
362
363 static struct v3dv_pipeline_shared_data *
v3dv_pipeline_shared_data_new(struct v3dv_pipeline_cache * cache,const unsigned char sha1_key[20],struct v3dv_descriptor_maps ** maps,struct v3dv_shader_variant ** variants,const uint64_t * total_assembly,const uint32_t total_assembly_size)364 v3dv_pipeline_shared_data_new(struct v3dv_pipeline_cache *cache,
365 const unsigned char sha1_key[20],
366 struct v3dv_descriptor_maps **maps,
367 struct v3dv_shader_variant **variants,
368 const uint64_t *total_assembly,
369 const uint32_t total_assembly_size)
370 {
371 size_t size = sizeof(struct v3dv_pipeline_shared_data);
372 /* We create new_entry using the device alloc. Right now shared_data is ref
373 * and unref by both the pipeline and the pipeline cache, so we can't
374 * ensure that the cache or pipeline alloc will be available on the last
375 * unref.
376 */
377 struct v3dv_pipeline_shared_data *new_entry =
378 vk_zalloc2(&cache->device->vk.alloc, NULL, size, 8,
379 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
380
381 if (new_entry == NULL)
382 return NULL;
383
384 new_entry->ref_cnt = 1;
385 memcpy(new_entry->sha1_key, sha1_key, 20);
386
387 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
388 new_entry->maps[stage] = maps[stage];
389 new_entry->variants[stage] = variants[stage];
390 }
391
392 struct v3dv_bo *bo = v3dv_bo_alloc(cache->device, total_assembly_size,
393 "pipeline shader assembly", true);
394 if (!bo) {
395 fprintf(stderr, "failed to allocate memory for shaders assembly\n");
396 v3dv_pipeline_shared_data_unref(cache->device, new_entry);
397 return NULL;
398 }
399
400 bool ok = v3dv_bo_map(cache->device, bo, total_assembly_size);
401 if (!ok) {
402 fprintf(stderr, "failed to map source shader buffer\n");
403 v3dv_pipeline_shared_data_unref(cache->device, new_entry);
404 return NULL;
405 }
406
407 memcpy(bo->map, total_assembly, total_assembly_size);
408
409 new_entry->assembly_bo = bo;
410
411 return new_entry;
412 }
413
414 static void
pipeline_cache_upload_shared_data(struct v3dv_pipeline_cache * cache,struct v3dv_pipeline_shared_data * shared_data,bool from_disk_cache)415 pipeline_cache_upload_shared_data(struct v3dv_pipeline_cache *cache,
416 struct v3dv_pipeline_shared_data *shared_data,
417 bool from_disk_cache)
418 {
419 assert(shared_data);
420
421 if (!cache || !cache->cache)
422 return;
423
424 if (cache->stats.count > V3DV_MAX_PIPELINE_CACHE_ENTRIES)
425 return;
426
427 pipeline_cache_lock(cache);
428 struct hash_entry *entry =
429 _mesa_hash_table_search(cache->cache, shared_data->sha1_key);
430
431 if (entry) {
432 pipeline_cache_unlock(cache);
433 return;
434 }
435
436 v3dv_pipeline_shared_data_ref(shared_data);
437 _mesa_hash_table_insert(cache->cache, shared_data->sha1_key, shared_data);
438 cache->stats.count++;
439 if (debug_cache) {
440 char sha1buf[41];
441 _mesa_sha1_format(sha1buf, shared_data->sha1_key);
442
443 fprintf(stderr, "pipeline cache %p, new cache entry with sha1 key %s:%p\n\n",
444 cache, sha1buf, shared_data);
445 if (dump_stats)
446 cache_dump_stats(cache);
447 }
448
449 pipeline_cache_unlock(cache);
450
451 #ifdef ENABLE_SHADER_CACHE
452 /* If we are being called from a on-disk-cache hit, we can skip writing to
453 * the disk cache
454 */
455 if (from_disk_cache)
456 return;
457
458 struct v3dv_device *device = cache->device;
459 struct disk_cache *disk_cache = device->pdevice->disk_cache;
460 if (disk_cache) {
461 struct blob binary;
462 blob_init(&binary);
463 if (v3dv_pipeline_shared_data_write_to_blob(shared_data, &binary)) {
464 cache_key cache_key;
465 disk_cache_compute_key(disk_cache, shared_data->sha1_key, 20, cache_key);
466
467 disk_cache_put(disk_cache, cache_key, binary.data, binary.size, NULL);
468 if (debug_cache) {
469 char sha1buf[41];
470 _mesa_sha1_format(sha1buf, shared_data->sha1_key);
471
472 fprintf(stderr, "on-disk-cache, new cache entry with sha1 key %s:%p\n\n",
473 sha1buf, shared_data);
474 }
475 }
476
477 blob_finish(&binary);
478 }
479 #endif
480 }
481
482 /* Uploads all the "cacheable" or shared data from the pipeline */
483 void
v3dv_pipeline_cache_upload_pipeline(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache)484 v3dv_pipeline_cache_upload_pipeline(struct v3dv_pipeline *pipeline,
485 struct v3dv_pipeline_cache *cache)
486 {
487 pipeline_cache_upload_shared_data(cache, pipeline->shared_data, false);
488 }
489
490 static struct serialized_nir*
serialized_nir_create_from_blob(struct v3dv_pipeline_cache * cache,struct blob_reader * blob)491 serialized_nir_create_from_blob(struct v3dv_pipeline_cache *cache,
492 struct blob_reader *blob)
493 {
494 const unsigned char *sha1_key = blob_read_bytes(blob, 20);
495 uint32_t snir_size = blob_read_uint32(blob);
496 const char* snir_data = blob_read_bytes(blob, snir_size);
497 if (blob->overrun)
498 return NULL;
499
500 struct serialized_nir *snir =
501 ralloc_size(cache->nir_cache, sizeof(*snir) + snir_size);
502 memcpy(snir->sha1_key, sha1_key, 20);
503 snir->size = snir_size;
504 memcpy(snir->data, snir_data, snir_size);
505
506 return snir;
507 }
508
509 static struct v3dv_shader_variant*
shader_variant_create_from_blob(struct v3dv_device * device,struct blob_reader * blob)510 shader_variant_create_from_blob(struct v3dv_device *device,
511 struct blob_reader *blob)
512 {
513 VkResult result;
514
515 enum broadcom_shader_stage stage = blob_read_uint32(blob);
516
517 uint32_t prog_data_size = blob_read_uint32(blob);
518 /* FIXME: as we include the stage perhaps we can avoid prog_data_size? */
519 assert(prog_data_size == v3d_prog_data_size(broadcom_shader_stage_to_gl(stage)));
520
521 const void *prog_data = blob_read_bytes(blob, prog_data_size);
522 if (blob->overrun)
523 return NULL;
524
525 uint32_t ulist_count = blob_read_uint32(blob);
526 uint32_t contents_size = sizeof(enum quniform_contents) * ulist_count;
527 const void *contents_data = blob_read_bytes(blob, contents_size);
528 if (blob->overrun)
529 return NULL;
530
531 uint ulist_data_size = sizeof(uint32_t) * ulist_count;
532 const void *ulist_data_data = blob_read_bytes(blob, ulist_data_size);
533 if (blob->overrun)
534 return NULL;
535
536 uint32_t assembly_offset = blob_read_uint32(blob);
537 uint32_t qpu_insts_size = blob_read_uint32(blob);
538
539 /* shader_variant_create expects a newly created prog_data for their own,
540 * as it is what the v3d compiler returns. So we are also allocating one
541 * (including the uniform list) and filled it up with the data that we read
542 * from the blob
543 */
544 struct v3d_prog_data *new_prog_data = rzalloc_size(NULL, prog_data_size);
545 memcpy(new_prog_data, prog_data, prog_data_size);
546 struct v3d_uniform_list *ulist = &new_prog_data->uniforms;
547 ulist->count = ulist_count;
548 ulist->contents = ralloc_array(new_prog_data, enum quniform_contents, ulist->count);
549 memcpy(ulist->contents, contents_data, contents_size);
550 ulist->data = ralloc_array(new_prog_data, uint32_t, ulist->count);
551 memcpy(ulist->data, ulist_data_data, ulist_data_size);
552
553 return v3dv_shader_variant_create(device, stage,
554 new_prog_data, prog_data_size,
555 assembly_offset,
556 NULL, qpu_insts_size,
557 &result);
558 }
559
560 static struct v3dv_pipeline_shared_data *
v3dv_pipeline_shared_data_create_from_blob(struct v3dv_pipeline_cache * cache,struct blob_reader * blob)561 v3dv_pipeline_shared_data_create_from_blob(struct v3dv_pipeline_cache *cache,
562 struct blob_reader *blob)
563 {
564 const unsigned char *sha1_key = blob_read_bytes(blob, 20);
565
566 struct v3dv_descriptor_maps *maps[BROADCOM_SHADER_STAGES] = { 0 };
567
568 uint8_t descriptor_maps_count = blob_read_uint8(blob);
569 for (uint8_t count = 0; count < descriptor_maps_count; count++) {
570 uint8_t stage = blob_read_uint8(blob);
571
572 const struct v3dv_descriptor_maps *current_maps =
573 blob_read_bytes(blob, sizeof(struct v3dv_descriptor_maps));
574
575 if (blob->overrun)
576 return NULL;
577
578 maps[stage] = vk_zalloc2(&cache->device->vk.alloc, NULL,
579 sizeof(struct v3dv_descriptor_maps), 8,
580 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
581
582 if (maps[stage] == NULL)
583 return NULL;
584
585 memcpy(maps[stage], current_maps, sizeof(struct v3dv_descriptor_maps));
586 if (broadcom_shader_stage_is_render_with_binning(stage)) {
587 enum broadcom_shader_stage bin_stage =
588 broadcom_binning_shader_stage_for_render_stage(stage);
589 maps[bin_stage] = maps[stage];
590 }
591 }
592
593 uint8_t variant_count = blob_read_uint8(blob);
594
595 struct v3dv_shader_variant *variants[BROADCOM_SHADER_STAGES] = { 0 };
596
597 for (uint8_t count = 0; count < variant_count; count++) {
598 uint8_t stage = blob_read_uint8(blob);
599 struct v3dv_shader_variant *variant =
600 shader_variant_create_from_blob(cache->device, blob);
601 variants[stage] = variant;
602 }
603
604 uint32_t total_assembly_size = blob_read_uint32(blob);
605 const uint64_t *total_assembly =
606 blob_read_bytes(blob, total_assembly_size);
607
608 if (blob->overrun)
609 return NULL;
610
611 return v3dv_pipeline_shared_data_new(cache, sha1_key, maps, variants,
612 total_assembly, total_assembly_size);
613 }
614
615 static void
pipeline_cache_load(struct v3dv_pipeline_cache * cache,size_t size,const void * data)616 pipeline_cache_load(struct v3dv_pipeline_cache *cache,
617 size_t size,
618 const void *data)
619 {
620 struct v3dv_device *device = cache->device;
621 struct v3dv_physical_device *pdevice = &device->instance->physicalDevice;
622 struct vk_pipeline_cache_header header;
623
624 if (cache->cache == NULL || cache->nir_cache == NULL)
625 return;
626
627 struct blob_reader blob;
628 blob_reader_init(&blob, data, size);
629
630 blob_copy_bytes(&blob, &header, sizeof(header));
631 if (size < sizeof(header))
632 return;
633 memcpy(&header, data, sizeof(header));
634 if (header.header_size < sizeof(header))
635 return;
636 if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
637 return;
638 if (header.vendor_id != v3dv_physical_device_vendor_id(pdevice))
639 return;
640 if (header.device_id != v3dv_physical_device_device_id(pdevice))
641 return;
642 if (memcmp(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE) != 0)
643 return;
644
645 uint32_t nir_count = blob_read_uint32(&blob);
646 if (blob.overrun)
647 return;
648
649 for (uint32_t i = 0; i < nir_count; i++) {
650 struct serialized_nir *snir =
651 serialized_nir_create_from_blob(cache, &blob);
652
653 if (!snir)
654 break;
655
656 _mesa_hash_table_insert(cache->nir_cache, snir->sha1_key, snir);
657 cache->nir_stats.count++;
658 }
659
660 uint32_t count = blob_read_uint32(&blob);
661 if (blob.overrun)
662 return;
663
664 for (uint32_t i = 0; i < count; i++) {
665 struct v3dv_pipeline_shared_data *cache_entry =
666 v3dv_pipeline_shared_data_create_from_blob(cache, &blob);
667 if (!cache_entry)
668 break;
669
670 _mesa_hash_table_insert(cache->cache, cache_entry->sha1_key, cache_entry);
671 cache->stats.count++;
672 }
673
674 if (debug_cache) {
675 fprintf(stderr, "pipeline cache %p, loaded %i nir shaders and "
676 "%i entries\n", cache, nir_count, count);
677 if (dump_stats)
678 cache_dump_stats(cache);
679 }
680 }
681
682 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreatePipelineCache(VkDevice _device,const VkPipelineCacheCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipelineCache * pPipelineCache)683 v3dv_CreatePipelineCache(VkDevice _device,
684 const VkPipelineCacheCreateInfo *pCreateInfo,
685 const VkAllocationCallbacks *pAllocator,
686 VkPipelineCache *pPipelineCache)
687 {
688 V3DV_FROM_HANDLE(v3dv_device, device, _device);
689 struct v3dv_pipeline_cache *cache;
690
691 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
692
693 cache = vk_object_zalloc(&device->vk, pAllocator,
694 sizeof(*cache),
695 VK_OBJECT_TYPE_PIPELINE_CACHE);
696
697 if (cache == NULL)
698 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
699
700 v3dv_pipeline_cache_init(cache, device, pCreateInfo->flags,
701 device->instance->pipeline_cache_enabled);
702
703 if (pCreateInfo->initialDataSize > 0) {
704 pipeline_cache_load(cache,
705 pCreateInfo->initialDataSize,
706 pCreateInfo->pInitialData);
707 }
708
709 *pPipelineCache = v3dv_pipeline_cache_to_handle(cache);
710
711 return VK_SUCCESS;
712 }
713
714 void
v3dv_pipeline_cache_finish(struct v3dv_pipeline_cache * cache)715 v3dv_pipeline_cache_finish(struct v3dv_pipeline_cache *cache)
716 {
717 pthread_mutex_destroy(&cache->mutex);
718
719 if (dump_stats_on_destroy)
720 cache_dump_stats(cache);
721
722 if (cache->nir_cache) {
723 hash_table_foreach(cache->nir_cache, entry)
724 ralloc_free(entry->data);
725
726 _mesa_hash_table_destroy(cache->nir_cache, NULL);
727 }
728
729 if (cache->cache) {
730 hash_table_foreach(cache->cache, entry) {
731 struct v3dv_pipeline_shared_data *cache_entry = entry->data;
732 if (cache_entry)
733 v3dv_pipeline_shared_data_unref(cache->device, cache_entry);
734 }
735
736 _mesa_hash_table_destroy(cache->cache, NULL);
737 }
738 }
739
740 VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyPipelineCache(VkDevice _device,VkPipelineCache _cache,const VkAllocationCallbacks * pAllocator)741 v3dv_DestroyPipelineCache(VkDevice _device,
742 VkPipelineCache _cache,
743 const VkAllocationCallbacks *pAllocator)
744 {
745 V3DV_FROM_HANDLE(v3dv_device, device, _device);
746 V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
747
748 if (!cache)
749 return;
750
751 v3dv_pipeline_cache_finish(cache);
752
753 vk_object_free(&device->vk, pAllocator, cache);
754 }
755
756 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_MergePipelineCaches(VkDevice device,VkPipelineCache dstCache,uint32_t srcCacheCount,const VkPipelineCache * pSrcCaches)757 v3dv_MergePipelineCaches(VkDevice device,
758 VkPipelineCache dstCache,
759 uint32_t srcCacheCount,
760 const VkPipelineCache *pSrcCaches)
761 {
762 V3DV_FROM_HANDLE(v3dv_pipeline_cache, dst, dstCache);
763
764 if (!dst->cache || !dst->nir_cache)
765 return VK_SUCCESS;
766
767 for (uint32_t i = 0; i < srcCacheCount; i++) {
768 V3DV_FROM_HANDLE(v3dv_pipeline_cache, src, pSrcCaches[i]);
769 if (!src->cache || !src->nir_cache)
770 continue;
771
772 hash_table_foreach(src->nir_cache, entry) {
773 struct serialized_nir *src_snir = entry->data;
774 assert(src_snir);
775
776 if (_mesa_hash_table_search(dst->nir_cache, src_snir->sha1_key))
777 continue;
778
779 /* FIXME: we are using serialized nir shaders because they are
780 * convenient to create and store on the cache, but requires to do a
781 * copy here (and some other places) of the serialized NIR. Perhaps
782 * it would make sense to move to handle the NIR shaders with shared
783 * structures with ref counts, as the variants.
784 */
785 struct serialized_nir *snir_dst =
786 ralloc_size(dst->nir_cache, sizeof(*snir_dst) + src_snir->size);
787 memcpy(snir_dst->sha1_key, src_snir->sha1_key, 20);
788 snir_dst->size = src_snir->size;
789 memcpy(snir_dst->data, src_snir->data, src_snir->size);
790
791 _mesa_hash_table_insert(dst->nir_cache, snir_dst->sha1_key, snir_dst);
792 dst->nir_stats.count++;
793 if (debug_cache) {
794 char sha1buf[41];
795 _mesa_sha1_format(sha1buf, snir_dst->sha1_key);
796
797 fprintf(stderr, "pipeline cache %p, added nir entry %s "
798 "from pipeline cache %p\n",
799 dst, sha1buf, src);
800 if (dump_stats)
801 cache_dump_stats(dst);
802 }
803 }
804
805 hash_table_foreach(src->cache, entry) {
806 struct v3dv_pipeline_shared_data *cache_entry = entry->data;
807 assert(cache_entry);
808
809 if (_mesa_hash_table_search(dst->cache, cache_entry->sha1_key))
810 continue;
811
812 v3dv_pipeline_shared_data_ref(cache_entry);
813 _mesa_hash_table_insert(dst->cache, cache_entry->sha1_key, cache_entry);
814
815 dst->stats.count++;
816 if (debug_cache) {
817 char sha1buf[41];
818 _mesa_sha1_format(sha1buf, cache_entry->sha1_key);
819
820 fprintf(stderr, "pipeline cache %p, added entry %s "
821 "from pipeline cache %p\n",
822 dst, sha1buf, src);
823 if (dump_stats)
824 cache_dump_stats(dst);
825 }
826 }
827 }
828
829 return VK_SUCCESS;
830 }
831
832 static bool
shader_variant_write_to_blob(const struct v3dv_shader_variant * variant,struct blob * blob)833 shader_variant_write_to_blob(const struct v3dv_shader_variant *variant,
834 struct blob *blob)
835 {
836 blob_write_uint32(blob, variant->stage);
837
838 blob_write_uint32(blob, variant->prog_data_size);
839 blob_write_bytes(blob, variant->prog_data.base, variant->prog_data_size);
840
841 struct v3d_uniform_list *ulist = &variant->prog_data.base->uniforms;
842 blob_write_uint32(blob, ulist->count);
843 blob_write_bytes(blob, ulist->contents, sizeof(enum quniform_contents) * ulist->count);
844 blob_write_bytes(blob, ulist->data, sizeof(uint32_t) * ulist->count);
845
846 blob_write_uint32(blob, variant->assembly_offset);
847 blob_write_uint32(blob, variant->qpu_insts_size);
848
849 return !blob->out_of_memory;
850 }
851
852 static bool
v3dv_pipeline_shared_data_write_to_blob(const struct v3dv_pipeline_shared_data * cache_entry,struct blob * blob)853 v3dv_pipeline_shared_data_write_to_blob(const struct v3dv_pipeline_shared_data *cache_entry,
854 struct blob *blob)
855 {
856 blob_write_bytes(blob, cache_entry->sha1_key, 20);
857
858 uint8_t descriptor_maps_count = 0;
859 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
860 if (broadcom_shader_stage_is_binning(stage))
861 continue;
862 if (cache_entry->maps[stage] == NULL)
863 continue;
864 descriptor_maps_count++;
865 }
866
867 /* Compute pipelines only have one descriptor map,
868 * graphics pipelines may have 2 (VS+FS) or 3 (VS+GS+FS), since the binning
869 * stages take the descriptor map from the render stage.
870 */
871 assert((descriptor_maps_count >= 2 && descriptor_maps_count <= 3) ||
872 (descriptor_maps_count == 1 && cache_entry->variants[BROADCOM_SHADER_COMPUTE]));
873 blob_write_uint8(blob, descriptor_maps_count);
874
875 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
876 if (cache_entry->maps[stage] == NULL)
877 continue;
878 if (broadcom_shader_stage_is_binning(stage))
879 continue;
880
881 blob_write_uint8(blob, stage);
882 blob_write_bytes(blob, cache_entry->maps[stage],
883 sizeof(struct v3dv_descriptor_maps));
884 }
885
886 uint8_t variant_count = 0;
887 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
888 if (cache_entry->variants[stage] == NULL)
889 continue;
890 variant_count++;
891 }
892
893 /* Graphics pipelines with VS+FS have 3 variants, VS+GS+FS will have 5 and
894 * compute pipelines only have 1.
895 */
896 assert((variant_count == 5 || variant_count == 3) ||
897 (variant_count == 1 && cache_entry->variants[BROADCOM_SHADER_COMPUTE]));
898 blob_write_uint8(blob, variant_count);
899
900 uint32_t total_assembly_size = 0;
901 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
902 if (cache_entry->variants[stage] == NULL)
903 continue;
904
905 blob_write_uint8(blob, stage);
906 if (!shader_variant_write_to_blob(cache_entry->variants[stage], blob))
907 return false;
908
909 total_assembly_size += cache_entry->variants[stage]->qpu_insts_size;
910 }
911 blob_write_uint32(blob, total_assembly_size);
912
913 assert(cache_entry->assembly_bo->map);
914 assert(cache_entry->assembly_bo->size >= total_assembly_size);
915 blob_write_bytes(blob, cache_entry->assembly_bo->map, total_assembly_size);
916
917 return !blob->out_of_memory;
918 }
919
920
921 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_GetPipelineCacheData(VkDevice _device,VkPipelineCache _cache,size_t * pDataSize,void * pData)922 v3dv_GetPipelineCacheData(VkDevice _device,
923 VkPipelineCache _cache,
924 size_t *pDataSize,
925 void *pData)
926 {
927 V3DV_FROM_HANDLE(v3dv_device, device, _device);
928 V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
929
930 struct blob blob;
931 if (pData) {
932 blob_init_fixed(&blob, pData, *pDataSize);
933 } else {
934 blob_init_fixed(&blob, NULL, SIZE_MAX);
935 }
936
937 struct v3dv_physical_device *pdevice = &device->instance->physicalDevice;
938 VkResult result = VK_INCOMPLETE;
939
940 pipeline_cache_lock(cache);
941
942 struct vk_pipeline_cache_header header = {
943 .header_size = sizeof(struct vk_pipeline_cache_header),
944 .header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE,
945 .vendor_id = v3dv_physical_device_vendor_id(pdevice),
946 .device_id = v3dv_physical_device_device_id(pdevice),
947 };
948 memcpy(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE);
949 blob_write_bytes(&blob, &header, sizeof(header));
950
951 uint32_t nir_count = 0;
952 intptr_t nir_count_offset = blob_reserve_uint32(&blob);
953 if (nir_count_offset < 0) {
954 *pDataSize = 0;
955 goto done;
956 }
957
958 if (cache->nir_cache) {
959 hash_table_foreach(cache->nir_cache, entry) {
960 const struct serialized_nir *snir = entry->data;
961
962 size_t save_size = blob.size;
963
964 blob_write_bytes(&blob, snir->sha1_key, 20);
965 blob_write_uint32(&blob, snir->size);
966 blob_write_bytes(&blob, snir->data, snir->size);
967
968 if (blob.out_of_memory) {
969 blob.size = save_size;
970 goto done;
971 }
972
973 nir_count++;
974 }
975 }
976 blob_overwrite_uint32(&blob, nir_count_offset, nir_count);
977
978 uint32_t count = 0;
979 intptr_t count_offset = blob_reserve_uint32(&blob);
980 if (count_offset < 0) {
981 *pDataSize = 0;
982 goto done;
983 }
984
985 if (cache->cache) {
986 hash_table_foreach(cache->cache, entry) {
987 struct v3dv_pipeline_shared_data *cache_entry = entry->data;
988
989 size_t save_size = blob.size;
990 if (!v3dv_pipeline_shared_data_write_to_blob(cache_entry, &blob)) {
991 /* If it fails reset to the previous size and bail */
992 blob.size = save_size;
993 goto done;
994 }
995
996 count++;
997 }
998 }
999
1000 blob_overwrite_uint32(&blob, count_offset, count);
1001
1002 *pDataSize = blob.size;
1003
1004 result = VK_SUCCESS;
1005
1006 if (debug_cache) {
1007 assert(count <= cache->stats.count);
1008 fprintf(stderr, "GetPipelineCacheData: serializing cache %p, "
1009 "%i nir shader entries "
1010 "%i entries, %u DataSize\n",
1011 cache, nir_count, count, (uint32_t) *pDataSize);
1012 }
1013
1014 done:
1015 blob_finish(&blob);
1016
1017 pipeline_cache_unlock(cache);
1018
1019 return result;
1020 }
1021