• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2018 Collabora Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "zink_program.h"
25 
26 #include "zink_compiler.h"
27 #include "zink_context.h"
28 #include "zink_descriptors.h"
29 #include "zink_helpers.h"
30 #include "zink_pipeline.h"
31 #include "zink_render_pass.h"
32 #include "zink_resource.h"
33 #include "zink_screen.h"
34 #include "zink_state.h"
35 #include "zink_inlines.h"
36 
37 #include "util/u_debug.h"
38 #include "util/u_memory.h"
39 #include "util/u_prim.h"
40 #include "nir_serialize.h"
41 #include "nir/nir_draw_helpers.h"
42 
43 /* for pipeline cache */
44 #define XXH_INLINE_ALL
45 #include "util/xxhash.h"
46 
47 static void
48 precompile_job(void *data, void *gdata, int thread_index);
49 struct zink_gfx_program *
50 create_gfx_program_separable(struct zink_context *ctx, struct zink_shader **stages, unsigned vertices_per_patch);
51 
52 void
debug_describe_zink_gfx_program(char * buf,const struct zink_gfx_program * ptr)53 debug_describe_zink_gfx_program(char *buf, const struct zink_gfx_program *ptr)
54 {
55    sprintf(buf, "zink_gfx_program");
56 }
57 
58 void
debug_describe_zink_compute_program(char * buf,const struct zink_compute_program * ptr)59 debug_describe_zink_compute_program(char *buf, const struct zink_compute_program *ptr)
60 {
61    sprintf(buf, "zink_compute_program");
62 }
63 
64 ALWAYS_INLINE static bool
shader_key_matches_tcs_nongenerated(const struct zink_shader_module * zm,const struct zink_shader_key * key,unsigned num_uniforms)65 shader_key_matches_tcs_nongenerated(const struct zink_shader_module *zm, const struct zink_shader_key *key, unsigned num_uniforms)
66 {
67    if (zm->num_uniforms != num_uniforms || zm->has_nonseamless != !!key->base.nonseamless_cube_mask ||
68        zm->needs_zs_shader_swizzle != key->base.needs_zs_shader_swizzle)
69       return false;
70    const uint32_t nonseamless_size = zm->has_nonseamless ? sizeof(uint32_t) : 0;
71    return (!nonseamless_size || !memcmp(zm->key + zm->key_size, &key->base.nonseamless_cube_mask, nonseamless_size)) &&
72           (!num_uniforms || !memcmp(zm->key + zm->key_size + nonseamless_size,
73                                     key->base.inlined_uniform_values, zm->num_uniforms * sizeof(uint32_t)));
74 }
75 
76 ALWAYS_INLINE static bool
shader_key_matches(const struct zink_shader_module * zm,const struct zink_shader_key * key,unsigned num_uniforms,bool has_inline,bool has_nonseamless)77 shader_key_matches(const struct zink_shader_module *zm,
78                    const struct zink_shader_key *key, unsigned num_uniforms,
79                    bool has_inline, bool has_nonseamless)
80 {
81    const uint32_t nonseamless_size = !has_nonseamless && zm->has_nonseamless ? sizeof(uint32_t) : 0;
82    if (has_inline) {
83       if (zm->num_uniforms != num_uniforms ||
84           (num_uniforms &&
85            memcmp(zm->key + zm->key_size + nonseamless_size,
86                   key->base.inlined_uniform_values, zm->num_uniforms * sizeof(uint32_t))))
87          return false;
88    }
89    if (!has_nonseamless) {
90       if (zm->has_nonseamless != !!key->base.nonseamless_cube_mask ||
91           (nonseamless_size && memcmp(zm->key + zm->key_size, &key->base.nonseamless_cube_mask, nonseamless_size)))
92          return false;
93    }
94    if (zm->needs_zs_shader_swizzle != key->base.needs_zs_shader_swizzle)
95       return false;
96    return !memcmp(zm->key, key, zm->key_size);
97 }
98 
99 static uint32_t
shader_module_hash(const struct zink_shader_module * zm)100 shader_module_hash(const struct zink_shader_module *zm)
101 {
102    const uint32_t nonseamless_size = zm->has_nonseamless ? sizeof(uint32_t) : 0;
103    unsigned key_size = zm->key_size + nonseamless_size + zm->num_uniforms * sizeof(uint32_t);
104    return _mesa_hash_data(zm->key, key_size);
105 }
106 
107 ALWAYS_INLINE static void
gather_shader_module_info(struct zink_context * ctx,struct zink_screen * screen,struct zink_shader * zs,struct zink_gfx_program * prog,struct zink_gfx_pipeline_state * state,bool has_inline,bool has_nonseamless,unsigned * inline_size,unsigned * nonseamless_size)108 gather_shader_module_info(struct zink_context *ctx, struct zink_screen *screen,
109                           struct zink_shader *zs, struct zink_gfx_program *prog,
110                           struct zink_gfx_pipeline_state *state,
111                           bool has_inline, //is inlining enabled?
112                           bool has_nonseamless, //is nonseamless ext present?
113                           unsigned *inline_size, unsigned *nonseamless_size)
114 {
115    gl_shader_stage stage = zs->info.stage;
116    struct zink_shader_key *key = &state->shader_keys.key[stage];
117    if (has_inline && ctx && zs->info.num_inlinable_uniforms &&
118        ctx->inlinable_uniforms_valid_mask & BITFIELD64_BIT(stage)) {
119       if (zs->can_inline && (screen->is_cpu || prog->inlined_variant_count[stage] < ZINK_MAX_INLINED_VARIANTS))
120          *inline_size = zs->info.num_inlinable_uniforms;
121       else
122          key->inline_uniforms = false;
123    }
124    if (!has_nonseamless && key->base.nonseamless_cube_mask)
125       *nonseamless_size = sizeof(uint32_t);
126 }
127 
128 ALWAYS_INLINE static struct zink_shader_module *
create_shader_module_for_stage(struct zink_context * ctx,struct zink_screen * screen,struct zink_shader * zs,struct zink_gfx_program * prog,gl_shader_stage stage,struct zink_gfx_pipeline_state * state,unsigned inline_size,unsigned nonseamless_size,bool has_inline,bool has_nonseamless)129 create_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *screen,
130                                struct zink_shader *zs, struct zink_gfx_program *prog,
131                                gl_shader_stage stage,
132                                struct zink_gfx_pipeline_state *state,
133                                unsigned inline_size, unsigned nonseamless_size,
134                                bool has_inline, //is inlining enabled?
135                                bool has_nonseamless) //is nonseamless ext present?
136 {
137    struct zink_shader_module *zm;
138    const struct zink_shader_key *key = &state->shader_keys.key[stage];
139    /* non-generated tcs won't use the shader key */
140    const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
141    const bool shadow_needs_shader_swizzle = key->base.needs_zs_shader_swizzle ||
142                                             (stage == MESA_SHADER_FRAGMENT && key->key.fs.base.shadow_needs_shader_swizzle);
143    zm = malloc(sizeof(struct zink_shader_module) + key->size +
144                (!has_nonseamless ? nonseamless_size : 0) + inline_size * sizeof(uint32_t) +
145                (shadow_needs_shader_swizzle ? sizeof(struct zink_zs_swizzle_key) : 0));
146    if (!zm) {
147       return NULL;
148    }
149    unsigned patch_vertices = state->shader_keys.key[MESA_SHADER_TESS_CTRL].key.tcs.patch_vertices;
150    if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated && zs->spirv) {
151       assert(ctx); //TODO async
152       zm->obj = zink_shader_tcs_compile(screen, zs, patch_vertices, prog->base.uses_shobj, &prog->base);
153    } else {
154       zm->obj = zink_shader_compile(screen, prog->base.uses_shobj, zs, zink_shader_blob_deserialize(screen, &prog->blobs[stage]), key, &ctx->di.zs_swizzle[stage], &prog->base);
155    }
156    if (!zm->obj.mod) {
157       FREE(zm);
158       return NULL;
159    }
160    zm->shobj = prog->base.uses_shobj;
161    zm->num_uniforms = inline_size;
162    if (!is_nongenerated_tcs) {
163       zm->key_size = key->size;
164       memcpy(zm->key, key, key->size);
165    } else {
166       zm->key_size = 0;
167       memset(zm->key, 0, key->size);
168    }
169    if (!has_nonseamless && nonseamless_size) {
170       /* nonseamless mask gets added to base key if it exists */
171       memcpy(zm->key + key->size, &key->base.nonseamless_cube_mask, nonseamless_size);
172    }
173    zm->needs_zs_shader_swizzle = shadow_needs_shader_swizzle;
174    zm->has_nonseamless = has_nonseamless ? 0 : !!nonseamless_size;
175    if (inline_size)
176       memcpy(zm->key + key->size + nonseamless_size, key->base.inlined_uniform_values, inline_size * sizeof(uint32_t));
177    if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated)
178       zm->hash = patch_vertices;
179    else
180       zm->hash = shader_module_hash(zm);
181    if (unlikely(shadow_needs_shader_swizzle)) {
182       memcpy(zm->key + key->size + nonseamless_size + inline_size * sizeof(uint32_t), &ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key));
183       zm->hash ^= _mesa_hash_data(&ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key));
184    }
185    zm->default_variant = !shadow_needs_shader_swizzle && !inline_size && !util_dynarray_contains(&prog->shader_cache[stage][0][0], void*);
186    if (inline_size)
187       prog->inlined_variant_count[stage]++;
188    util_dynarray_append(&prog->shader_cache[stage][has_nonseamless ? 0 : !!nonseamless_size][!!inline_size], void*, zm);
189    return zm;
190 }
191 
192 ALWAYS_INLINE static struct zink_shader_module *
get_shader_module_for_stage(struct zink_context * ctx,struct zink_screen * screen,struct zink_shader * zs,struct zink_gfx_program * prog,gl_shader_stage stage,struct zink_gfx_pipeline_state * state,unsigned inline_size,unsigned nonseamless_size,bool has_inline,bool has_nonseamless)193 get_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *screen,
194                             struct zink_shader *zs, struct zink_gfx_program *prog,
195                             gl_shader_stage stage,
196                             struct zink_gfx_pipeline_state *state,
197                             unsigned inline_size, unsigned nonseamless_size,
198                             bool has_inline, //is inlining enabled?
199                             bool has_nonseamless) //is nonseamless ext present?
200 {
201    const struct zink_shader_key *key = &state->shader_keys.key[stage];
202    /* non-generated tcs won't use the shader key */
203    const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
204    const bool shadow_needs_shader_swizzle = unlikely(key->base.needs_zs_shader_swizzle) ||
205                                             (stage == MESA_SHADER_FRAGMENT && unlikely(key->key.fs.base.shadow_needs_shader_swizzle));
206 
207    struct util_dynarray *shader_cache = &prog->shader_cache[stage][!has_nonseamless ? !!nonseamless_size : 0][has_inline ? !!inline_size : 0];
208    unsigned count = util_dynarray_num_elements(shader_cache, struct zink_shader_module *);
209    struct zink_shader_module **pzm = shader_cache->data;
210    for (unsigned i = 0; i < count; i++) {
211       struct zink_shader_module *iter = pzm[i];
212       if (is_nongenerated_tcs) {
213          if (!shader_key_matches_tcs_nongenerated(iter, key, has_inline ? !!inline_size : 0))
214             continue;
215       } else {
216          if (stage == MESA_SHADER_VERTEX && iter->key_size != key->size)
217             continue;
218          if (!shader_key_matches(iter, key, inline_size, has_inline, has_nonseamless))
219             continue;
220          if (unlikely(shadow_needs_shader_swizzle)) {
221             /* shadow swizzle data needs a manual compare since it's so fat */
222             if (memcmp(iter->key + iter->key_size + nonseamless_size + iter->num_uniforms * sizeof(uint32_t),
223                        &ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key)))
224                continue;
225          }
226       }
227       if (i > 0) {
228          struct zink_shader_module *zero = pzm[0];
229          pzm[0] = iter;
230          pzm[i] = zero;
231       }
232       return iter;
233    }
234 
235    return NULL;
236 }
237 
238 ALWAYS_INLINE static struct zink_shader_module *
create_shader_module_for_stage_optimal(struct zink_context * ctx,struct zink_screen * screen,struct zink_shader * zs,struct zink_gfx_program * prog,gl_shader_stage stage,struct zink_gfx_pipeline_state * state)239 create_shader_module_for_stage_optimal(struct zink_context *ctx, struct zink_screen *screen,
240                                        struct zink_shader *zs, struct zink_gfx_program *prog,
241                                        gl_shader_stage stage,
242                                        struct zink_gfx_pipeline_state *state)
243 {
244    struct zink_shader_module *zm;
245    uint16_t *key;
246    unsigned mask = stage == MESA_SHADER_FRAGMENT ? BITFIELD_MASK(16) : BITFIELD_MASK(8);
247    bool shadow_needs_shader_swizzle = false;
248    if (zs == prog->last_vertex_stage) {
249       key = (uint16_t*)&state->shader_keys_optimal.key.vs_base;
250    } else if (stage == MESA_SHADER_FRAGMENT) {
251       key = (uint16_t*)&state->shader_keys_optimal.key.fs;
252       shadow_needs_shader_swizzle = ctx ? ctx->gfx_pipeline_state.shader_keys_optimal.key.fs.shadow_needs_shader_swizzle : false;
253    } else if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated) {
254       key = (uint16_t*)&state->shader_keys_optimal.key.tcs;
255    } else {
256       key = NULL;
257    }
258    size_t key_size = sizeof(uint16_t);
259    zm = calloc(1, sizeof(struct zink_shader_module) + (key ? key_size : 0) + (unlikely(shadow_needs_shader_swizzle) ? sizeof(struct zink_zs_swizzle_key) : 0));
260    if (!zm) {
261       return NULL;
262    }
263    if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated && zs->spirv) {
264       assert(ctx || screen->info.dynamic_state2_feats.extendedDynamicState2PatchControlPoints);
265       unsigned patch_vertices = 3;
266       if (ctx) {
267          struct zink_tcs_key *tcs = (struct zink_tcs_key*)key;
268          patch_vertices = tcs->patch_vertices;
269       }
270       zm->obj = zink_shader_tcs_compile(screen, zs, patch_vertices, prog->base.uses_shobj, &prog->base);
271    } else {
272       zm->obj = zink_shader_compile(screen, prog->base.uses_shobj, zs, zink_shader_blob_deserialize(screen, &prog->blobs[stage]),
273                                     (struct zink_shader_key*)key, shadow_needs_shader_swizzle ? &ctx->di.zs_swizzle[stage] : NULL, &prog->base);
274    }
275    if (!zm->obj.mod) {
276       FREE(zm);
277       return NULL;
278    }
279    zm->shobj = prog->base.uses_shobj;
280    /* non-generated tcs won't use the shader key */
281    const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
282    if (key && !is_nongenerated_tcs) {
283       zm->key_size = key_size;
284       uint16_t *data = (uint16_t*)zm->key;
285       /* sanitize actual key bits */
286       *data = (*key) & mask;
287       if (unlikely(shadow_needs_shader_swizzle))
288          memcpy(&data[1], &ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key));
289    }
290    zm->default_variant = !util_dynarray_contains(&prog->shader_cache[stage][0][0], void*);
291    util_dynarray_append(&prog->shader_cache[stage][0][0], void*, zm);
292    return zm;
293 }
294 
295 ALWAYS_INLINE static struct zink_shader_module *
get_shader_module_for_stage_optimal(struct zink_context * ctx,struct zink_screen * screen,struct zink_shader * zs,struct zink_gfx_program * prog,gl_shader_stage stage,struct zink_gfx_pipeline_state * state)296 get_shader_module_for_stage_optimal(struct zink_context *ctx, struct zink_screen *screen,
297                                     struct zink_shader *zs, struct zink_gfx_program *prog,
298                                     gl_shader_stage stage,
299                                     struct zink_gfx_pipeline_state *state)
300 {
301    /* non-generated tcs won't use the shader key */
302    const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
303    bool shadow_needs_shader_swizzle = false;
304    uint16_t *key;
305    unsigned mask = stage == MESA_SHADER_FRAGMENT ? BITFIELD_MASK(16) : BITFIELD_MASK(8);
306    if (zs == prog->last_vertex_stage) {
307       key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.vs_base;
308    } else if (stage == MESA_SHADER_FRAGMENT) {
309       key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.fs;
310       shadow_needs_shader_swizzle = ctx->gfx_pipeline_state.shader_keys_optimal.key.fs.shadow_needs_shader_swizzle;
311    } else if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated) {
312       key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.tcs;
313    } else {
314       key = NULL;
315    }
316    struct util_dynarray *shader_cache = &prog->shader_cache[stage][0][0];
317    unsigned count = util_dynarray_num_elements(shader_cache, struct zink_shader_module *);
318    struct zink_shader_module **pzm = shader_cache->data;
319    for (unsigned i = 0; i < count; i++) {
320       struct zink_shader_module *iter = pzm[i];
321       if (is_nongenerated_tcs) {
322          /* always match */
323       } else if (key) {
324          uint16_t val = (*key) & mask;
325          /* no key is bigger than uint16_t */
326          if (memcmp(iter->key, &val, sizeof(uint16_t)))
327             continue;
328          if (unlikely(shadow_needs_shader_swizzle)) {
329             /* shadow swizzle data needs a manual compare since it's so fat */
330             if (memcmp(iter->key + sizeof(uint16_t), &ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key)))
331                continue;
332          }
333       }
334       if (i > 0) {
335          struct zink_shader_module *zero = pzm[0];
336          pzm[0] = iter;
337          pzm[i] = zero;
338       }
339       return iter;
340    }
341 
342    return NULL;
343 }
344 
345 static void
zink_destroy_shader_module(struct zink_screen * screen,struct zink_shader_module * zm)346 zink_destroy_shader_module(struct zink_screen *screen, struct zink_shader_module *zm)
347 {
348    if (zm->shobj)
349       VKSCR(DestroyShaderEXT)(screen->dev, zm->obj.obj, NULL);
350    else
351       VKSCR(DestroyShaderModule)(screen->dev, zm->obj.mod, NULL);
352    ralloc_free(zm->obj.spirv);
353    free(zm);
354 }
355 
356 static void
destroy_shader_cache(struct zink_screen * screen,struct util_dynarray * sc)357 destroy_shader_cache(struct zink_screen *screen, struct util_dynarray *sc)
358 {
359    while (util_dynarray_contains(sc, void*)) {
360       struct zink_shader_module *zm = util_dynarray_pop(sc, struct zink_shader_module*);
361       zink_destroy_shader_module(screen, zm);
362    }
363 }
364 
365 ALWAYS_INLINE static void
update_gfx_shader_modules(struct zink_context * ctx,struct zink_screen * screen,struct zink_gfx_program * prog,uint32_t mask,struct zink_gfx_pipeline_state * state,bool has_inline,bool has_nonseamless)366 update_gfx_shader_modules(struct zink_context *ctx,
367                       struct zink_screen *screen,
368                       struct zink_gfx_program *prog, uint32_t mask,
369                       struct zink_gfx_pipeline_state *state,
370                       bool has_inline, //is inlining enabled?
371                       bool has_nonseamless) //is nonseamless ext present?
372 {
373    bool hash_changed = false;
374    bool default_variants = true;
375    assert(prog->objs[MESA_SHADER_VERTEX].mod);
376    uint32_t variant_hash = prog->last_variant_hash;
377    prog->has_edgeflags = prog->shaders[MESA_SHADER_VERTEX]->has_edgeflags;
378    for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
379       if (!(mask & BITFIELD_BIT(i)))
380          continue;
381 
382       assert(prog->shaders[i]);
383 
384       unsigned inline_size = 0, nonseamless_size = 0;
385       gather_shader_module_info(ctx, screen, prog->shaders[i], prog, state, has_inline, has_nonseamless, &inline_size, &nonseamless_size);
386       struct zink_shader_module *zm = get_shader_module_for_stage(ctx, screen, prog->shaders[i], prog, i, state,
387                                                                   inline_size, nonseamless_size, has_inline, has_nonseamless);
388       if (!zm)
389          zm = create_shader_module_for_stage(ctx, screen, prog->shaders[i], prog, i, state,
390                                              inline_size, nonseamless_size, has_inline, has_nonseamless);
391       state->modules[i] = zm->obj.mod;
392       if (prog->objs[i].mod == zm->obj.mod)
393          continue;
394       prog->optimal_keys &= !prog->shaders[i]->non_fs.is_generated;
395       variant_hash ^= prog->module_hash[i];
396       hash_changed = true;
397       default_variants &= zm->default_variant;
398       prog->objs[i] = zm->obj;
399       prog->objects[i] = zm->obj.obj;
400       prog->module_hash[i] = zm->hash;
401       if (has_inline) {
402          if (zm->num_uniforms)
403             prog->inline_variants |= BITFIELD_BIT(i);
404          else
405             prog->inline_variants &= ~BITFIELD_BIT(i);
406       }
407       variant_hash ^= prog->module_hash[i];
408    }
409 
410    if (hash_changed && state) {
411       if (default_variants)
412          prog->last_variant_hash = prog->default_variant_hash;
413       else
414          prog->last_variant_hash = variant_hash;
415 
416       state->modules_changed = true;
417    }
418 }
419 
420 static void
generate_gfx_program_modules(struct zink_context * ctx,struct zink_screen * screen,struct zink_gfx_program * prog,struct zink_gfx_pipeline_state * state)421 generate_gfx_program_modules(struct zink_context *ctx, struct zink_screen *screen, struct zink_gfx_program *prog, struct zink_gfx_pipeline_state *state)
422 {
423    assert(!prog->objs[MESA_SHADER_VERTEX].mod);
424    uint32_t variant_hash = 0;
425    bool default_variants = true;
426    for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
427       if (!(prog->stages_present & BITFIELD_BIT(i)))
428          continue;
429 
430       assert(prog->shaders[i]);
431 
432       unsigned inline_size = 0, nonseamless_size = 0;
433       gather_shader_module_info(ctx, screen, prog->shaders[i], prog, state,
434                                 screen->driconf.inline_uniforms, screen->info.have_EXT_non_seamless_cube_map,
435                                 &inline_size, &nonseamless_size);
436       struct zink_shader_module *zm = create_shader_module_for_stage(ctx, screen, prog->shaders[i], prog, i, state,
437                                                                      inline_size, nonseamless_size,
438                                                                      screen->driconf.inline_uniforms, screen->info.have_EXT_non_seamless_cube_map);
439       state->modules[i] = zm->obj.mod;
440       prog->objs[i] = zm->obj;
441       prog->objects[i] = zm->obj.obj;
442       prog->module_hash[i] = zm->hash;
443       if (zm->num_uniforms)
444          prog->inline_variants |= BITFIELD_BIT(i);
445       default_variants &= zm->default_variant;
446       variant_hash ^= prog->module_hash[i];
447    }
448 
449    state->modules_changed = true;
450 
451    prog->last_variant_hash = variant_hash;
452    if (default_variants)
453       prog->default_variant_hash = prog->last_variant_hash;
454 }
455 
456 static void
generate_gfx_program_modules_optimal(struct zink_context * ctx,struct zink_screen * screen,struct zink_gfx_program * prog,struct zink_gfx_pipeline_state * state)457 generate_gfx_program_modules_optimal(struct zink_context *ctx, struct zink_screen *screen, struct zink_gfx_program *prog, struct zink_gfx_pipeline_state *state)
458 {
459    assert(!prog->objs[MESA_SHADER_VERTEX].mod);
460    for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
461       if (!(prog->stages_present & BITFIELD_BIT(i)))
462          continue;
463 
464       assert(prog->shaders[i]);
465 
466       struct zink_shader_module *zm = create_shader_module_for_stage_optimal(ctx, screen, prog->shaders[i], prog, i, state);
467       prog->objs[i] = zm->obj;
468       prog->objects[i] = zm->obj.obj;
469    }
470 
471    state->modules_changed = true;
472    prog->last_variant_hash = state->optimal_key;
473 }
474 
475 static uint32_t
hash_pipeline_lib_generated_tcs(const void * key)476 hash_pipeline_lib_generated_tcs(const void *key)
477 {
478    const struct zink_gfx_library_key *gkey = key;
479    return gkey->optimal_key;
480 }
481 
482 
483 static bool
equals_pipeline_lib_generated_tcs(const void * a,const void * b)484 equals_pipeline_lib_generated_tcs(const void *a, const void *b)
485 {
486    return !memcmp(a, b, sizeof(uint32_t));
487 }
488 
489 static uint32_t
hash_pipeline_lib(const void * key)490 hash_pipeline_lib(const void *key)
491 {
492    const struct zink_gfx_library_key *gkey = key;
493    /* remove generated tcs bits */
494    return zink_shader_key_optimal_no_tcs(gkey->optimal_key);
495 }
496 
497 static bool
equals_pipeline_lib(const void * a,const void * b)498 equals_pipeline_lib(const void *a, const void *b)
499 {
500    const struct zink_gfx_library_key *ak = a;
501    const struct zink_gfx_library_key *bk = b;
502    /* remove generated tcs bits */
503    uint32_t val_a = zink_shader_key_optimal_no_tcs(ak->optimal_key);
504    uint32_t val_b = zink_shader_key_optimal_no_tcs(bk->optimal_key);
505    return val_a == val_b;
506 }
507 
508 uint32_t
hash_gfx_input_dynamic(const void * key)509 hash_gfx_input_dynamic(const void *key)
510 {
511    const struct zink_gfx_input_key *ikey = key;
512    return ikey->idx;
513 }
514 
515 static bool
equals_gfx_input_dynamic(const void * a,const void * b)516 equals_gfx_input_dynamic(const void *a, const void *b)
517 {
518    const struct zink_gfx_input_key *ikey_a = a;
519    const struct zink_gfx_input_key *ikey_b = b;
520    return ikey_a->idx == ikey_b->idx;
521 }
522 
523 uint32_t
hash_gfx_input(const void * key)524 hash_gfx_input(const void *key)
525 {
526    const struct zink_gfx_input_key *ikey = key;
527    if (ikey->uses_dynamic_stride)
528       return ikey->input;
529    return _mesa_hash_data(key, offsetof(struct zink_gfx_input_key, pipeline));
530 }
531 
532 static bool
equals_gfx_input(const void * a,const void * b)533 equals_gfx_input(const void *a, const void *b)
534 {
535    const struct zink_gfx_input_key *ikey_a = a;
536    const struct zink_gfx_input_key *ikey_b = b;
537    if (ikey_a->uses_dynamic_stride)
538       return ikey_a->element_state == ikey_b->element_state &&
539              !memcmp(a, b, offsetof(struct zink_gfx_input_key, vertex_buffers_enabled_mask));
540    return !memcmp(a, b, offsetof(struct zink_gfx_input_key, pipeline));
541 }
542 
543 uint32_t
hash_gfx_output_ds3(const void * key)544 hash_gfx_output_ds3(const void *key)
545 {
546    const uint8_t *data = key;
547    return _mesa_hash_data(data, sizeof(uint32_t));
548 }
549 
550 static bool
equals_gfx_output_ds3(const void * a,const void * b)551 equals_gfx_output_ds3(const void *a, const void *b)
552 {
553    const uint8_t *da = a;
554    const uint8_t *db = b;
555    return !memcmp(da, db, sizeof(uint32_t));
556 }
557 
558 uint32_t
hash_gfx_output(const void * key)559 hash_gfx_output(const void *key)
560 {
561    const uint8_t *data = key;
562    return _mesa_hash_data(data, offsetof(struct zink_gfx_output_key, pipeline));
563 }
564 
565 static bool
equals_gfx_output(const void * a,const void * b)566 equals_gfx_output(const void *a, const void *b)
567 {
568    const uint8_t *da = a;
569    const uint8_t *db = b;
570    return !memcmp(da, db, offsetof(struct zink_gfx_output_key, pipeline));
571 }
572 
573 ALWAYS_INLINE static void
update_gfx_program_nonseamless(struct zink_context * ctx,struct zink_gfx_program * prog,bool has_nonseamless)574 update_gfx_program_nonseamless(struct zink_context *ctx, struct zink_gfx_program *prog, bool has_nonseamless)
575 {
576    struct zink_screen *screen = zink_screen(ctx->base.screen);
577    if (screen->driconf.inline_uniforms || prog->needs_inlining)
578       update_gfx_shader_modules(ctx, screen, prog,
579                                 ctx->dirty_gfx_stages & prog->stages_present, &ctx->gfx_pipeline_state,
580                                 true, has_nonseamless);
581    else
582       update_gfx_shader_modules(ctx, screen, prog,
583                                 ctx->dirty_gfx_stages & prog->stages_present, &ctx->gfx_pipeline_state,
584                                 false, has_nonseamless);
585 }
586 
587 static void
update_gfx_program(struct zink_context * ctx,struct zink_gfx_program * prog)588 update_gfx_program(struct zink_context *ctx, struct zink_gfx_program *prog)
589 {
590    struct zink_screen *screen = zink_screen(ctx->base.screen);
591    if (screen->info.have_EXT_non_seamless_cube_map)
592       update_gfx_program_nonseamless(ctx, prog, true);
593    else
594       update_gfx_program_nonseamless(ctx, prog, false);
595 }
596 
597 void
zink_gfx_program_update(struct zink_context * ctx)598 zink_gfx_program_update(struct zink_context *ctx)
599 {
600    if (ctx->last_vertex_stage_dirty) {
601       gl_shader_stage pstage = ctx->last_vertex_stage->info.stage;
602       ctx->dirty_gfx_stages |= BITFIELD_BIT(pstage);
603       memcpy(&ctx->gfx_pipeline_state.shader_keys.key[pstage].key.vs_base,
604              &ctx->gfx_pipeline_state.shader_keys.last_vertex.key.vs_base,
605              sizeof(struct zink_vs_key_base));
606       ctx->last_vertex_stage_dirty = false;
607    }
608    if (ctx->gfx_dirty) {
609       struct zink_gfx_program *prog = NULL;
610 
611       simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
612       struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(ctx->shader_stages)];
613       const uint32_t hash = ctx->gfx_hash;
614       struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ht, hash, ctx->gfx_stages);
615       /* this must be done before prog is updated */
616       if (ctx->curr_program)
617          ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
618       if (entry) {
619          prog = (struct zink_gfx_program*)entry->data;
620          for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
621             if (prog->stages_present & ~ctx->dirty_gfx_stages & BITFIELD_BIT(i))
622                ctx->gfx_pipeline_state.modules[i] = prog->objs[i].mod;
623          }
624          /* ensure variants are always updated if keys have changed since last use */
625          ctx->dirty_gfx_stages |= prog->stages_present;
626          update_gfx_program(ctx, prog);
627       } else {
628          ctx->dirty_gfx_stages |= ctx->shader_stages;
629          prog = zink_create_gfx_program(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch, hash);
630          zink_screen_get_pipeline_cache(zink_screen(ctx->base.screen), &prog->base, false);
631          _mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog);
632          prog->base.removed = false;
633          generate_gfx_program_modules(ctx, zink_screen(ctx->base.screen), prog, &ctx->gfx_pipeline_state);
634       }
635       simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
636       if (prog && prog != ctx->curr_program)
637          zink_batch_reference_program(&ctx->batch, &prog->base);
638       ctx->curr_program = prog;
639       ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
640       ctx->gfx_dirty = false;
641    } else if (ctx->dirty_gfx_stages) {
642       /* remove old hash */
643       ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
644       update_gfx_program(ctx, ctx->curr_program);
645       /* apply new hash */
646       ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
647    }
648    ctx->dirty_gfx_stages = 0;
649 }
650 
651 ALWAYS_INLINE static bool
update_gfx_shader_module_optimal(struct zink_context * ctx,struct zink_gfx_program * prog,gl_shader_stage pstage)652 update_gfx_shader_module_optimal(struct zink_context *ctx, struct zink_gfx_program *prog, gl_shader_stage pstage)
653 {
654    struct zink_screen *screen = zink_screen(ctx->base.screen);
655    if (screen->info.have_EXT_graphics_pipeline_library)
656       util_queue_fence_wait(&prog->base.cache_fence);
657    struct zink_shader_module *zm = get_shader_module_for_stage_optimal(ctx, screen, prog->shaders[pstage], prog, pstage, &ctx->gfx_pipeline_state);
658    if (!zm) {
659       zm = create_shader_module_for_stage_optimal(ctx, screen, prog->shaders[pstage], prog, pstage, &ctx->gfx_pipeline_state);
660       perf_debug(ctx, "zink[gfx_compile]: %s shader variant required\n", _mesa_shader_stage_to_string(pstage));
661    }
662 
663    bool changed = prog->objs[pstage].mod != zm->obj.mod;
664    prog->objs[pstage] = zm->obj;
665    prog->objects[pstage] = zm->obj.obj;
666    return changed;
667 }
668 
669 static void
update_gfx_program_optimal(struct zink_context * ctx,struct zink_gfx_program * prog)670 update_gfx_program_optimal(struct zink_context *ctx, struct zink_gfx_program *prog)
671 {
672    const union zink_shader_key_optimal *optimal_key = (union zink_shader_key_optimal*)&prog->last_variant_hash;
673    if (ctx->gfx_pipeline_state.shader_keys_optimal.key.vs_bits != optimal_key->vs_bits) {
674       assert(!prog->is_separable);
675       bool changed = update_gfx_shader_module_optimal(ctx, prog, ctx->last_vertex_stage->info.stage);
676       ctx->gfx_pipeline_state.modules_changed |= changed;
677    }
678    const bool shadow_needs_shader_swizzle = optimal_key->fs.shadow_needs_shader_swizzle && (ctx->dirty_gfx_stages & BITFIELD_BIT(MESA_SHADER_FRAGMENT));
679    if (ctx->gfx_pipeline_state.shader_keys_optimal.key.fs_bits != optimal_key->fs_bits ||
680        /* always recheck shadow swizzles since they aren't directly part of the key */
681        unlikely(shadow_needs_shader_swizzle)) {
682       assert(!prog->is_separable);
683       bool changed = update_gfx_shader_module_optimal(ctx, prog, MESA_SHADER_FRAGMENT);
684       ctx->gfx_pipeline_state.modules_changed |= changed;
685       if (unlikely(shadow_needs_shader_swizzle)) {
686          struct zink_shader_module **pzm = prog->shader_cache[MESA_SHADER_FRAGMENT][0][0].data;
687          ctx->gfx_pipeline_state.shadow = (struct zink_zs_swizzle_key*)pzm[0]->key + sizeof(uint16_t);
688       }
689    }
690    if (prog->shaders[MESA_SHADER_TESS_CTRL] && prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated &&
691        ctx->gfx_pipeline_state.shader_keys_optimal.key.tcs_bits != optimal_key->tcs_bits) {
692       assert(!prog->is_separable);
693       bool changed = update_gfx_shader_module_optimal(ctx, prog, MESA_SHADER_TESS_CTRL);
694       ctx->gfx_pipeline_state.modules_changed |= changed;
695    }
696    prog->last_variant_hash = ctx->gfx_pipeline_state.optimal_key;
697 }
698 
699 static struct zink_gfx_program *
replace_separable_prog(struct zink_screen * screen,struct hash_entry * entry,struct zink_gfx_program * prog)700 replace_separable_prog(struct zink_screen *screen, struct hash_entry *entry, struct zink_gfx_program *prog)
701 {
702    struct zink_gfx_program *real = prog->full_prog;
703    entry->data = real;
704    entry->key = real->shaders;
705    real->base.removed = false;
706    zink_gfx_program_reference(screen, &prog->full_prog, NULL);
707    prog->base.removed = true;
708    return real;
709 }
710 
711 void
zink_gfx_program_update_optimal(struct zink_context * ctx)712 zink_gfx_program_update_optimal(struct zink_context *ctx)
713 {
714    struct zink_screen *screen = zink_screen(ctx->base.screen);
715    if (ctx->gfx_dirty) {
716       struct zink_gfx_program *prog = NULL;
717       ctx->gfx_pipeline_state.optimal_key = zink_sanitize_optimal_key(ctx->gfx_stages, ctx->gfx_pipeline_state.shader_keys_optimal.key.val);
718       struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(ctx->shader_stages)];
719       const uint32_t hash = ctx->gfx_hash;
720       simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
721       struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ht, hash, ctx->gfx_stages);
722 
723       if (ctx->curr_program)
724          ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
725       if (entry) {
726          prog = (struct zink_gfx_program*)entry->data;
727          if (prog->is_separable && !(zink_debug & ZINK_DEBUG_NOOPT)) {
728             /* shader variants can't be handled by separable programs: sync and compile */
729             if (!ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(ctx->gfx_pipeline_state.optimal_key))
730                util_queue_fence_wait(&prog->base.cache_fence);
731             /* If the optimized linked pipeline is done compiling, swap it into place. */
732             if (util_queue_fence_is_signalled(&prog->base.cache_fence)) {
733                prog = replace_separable_prog(screen, entry, prog);
734             }
735          }
736          update_gfx_program_optimal(ctx, prog);
737       } else {
738          ctx->dirty_gfx_stages |= ctx->shader_stages;
739          prog = create_gfx_program_separable(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch);
740          prog->base.removed = false;
741          _mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog);
742          if (!prog->is_separable) {
743             zink_screen_get_pipeline_cache(screen, &prog->base, false);
744             perf_debug(ctx, "zink[gfx_compile]: new program created (probably legacy GL features in use)\n");
745             generate_gfx_program_modules_optimal(ctx, screen, prog, &ctx->gfx_pipeline_state);
746          }
747       }
748       simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
749       if (prog && prog != ctx->curr_program)
750          zink_batch_reference_program(&ctx->batch, &prog->base);
751       ctx->curr_program = prog;
752       ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
753    } else if (ctx->dirty_gfx_stages) {
754       /* remove old hash */
755       ctx->gfx_pipeline_state.optimal_key = zink_sanitize_optimal_key(ctx->gfx_stages, ctx->gfx_pipeline_state.shader_keys_optimal.key.val);
756       ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
757       if (ctx->curr_program->is_separable && !(zink_debug & ZINK_DEBUG_NOOPT)) {
758          struct zink_gfx_program *prog = ctx->curr_program;
759          if (!ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(ctx->gfx_pipeline_state.optimal_key)) {
760             util_queue_fence_wait(&prog->base.cache_fence);
761             /* shader variants can't be handled by separable programs: sync and compile */
762             perf_debug(ctx, "zink[gfx_compile]: non-default shader variant required with separate shader object program\n");
763             struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(ctx->shader_stages)];
764             const uint32_t hash = ctx->gfx_hash;
765             simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
766             struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ht, hash, ctx->gfx_stages);
767             ctx->curr_program = replace_separable_prog(screen, entry, prog);
768             simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
769          }
770       }
771       update_gfx_program_optimal(ctx, ctx->curr_program);
772       /* apply new hash */
773       ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
774    }
775    ctx->dirty_gfx_stages = 0;
776    ctx->gfx_dirty = false;
777    ctx->last_vertex_stage_dirty = false;
778 }
779 
780 static void
optimized_compile_job(void * data,void * gdata,int thread_index)781 optimized_compile_job(void *data, void *gdata, int thread_index)
782 {
783    struct zink_gfx_pipeline_cache_entry *pc_entry = data;
784    struct zink_screen *screen = gdata;
785    VkPipeline pipeline;
786    if (pc_entry->gpl.gkey)
787       pipeline = zink_create_gfx_pipeline_combined(screen, pc_entry->prog, pc_entry->gpl.ikey->pipeline, &pc_entry->gpl.gkey->pipeline, 1, pc_entry->gpl.okey->pipeline, true, false);
788    else
789       pipeline = zink_create_gfx_pipeline(screen, pc_entry->prog, pc_entry->prog->objs, &pc_entry->state, pc_entry->state.element_state->binding_map, zink_primitive_topology(pc_entry->state.gfx_prim_mode), true, NULL);
790    if (pipeline) {
791       pc_entry->gpl.unoptimized_pipeline = pc_entry->pipeline;
792       pc_entry->pipeline = pipeline;
793    }
794 }
795 
796 static void
optimized_shobj_compile_job(void * data,void * gdata,int thread_index)797 optimized_shobj_compile_job(void *data, void *gdata, int thread_index)
798 {
799    struct zink_gfx_pipeline_cache_entry *pc_entry = data;
800    struct zink_screen *screen = gdata;
801 
802    struct zink_shader_object objs[ZINK_GFX_SHADER_COUNT];
803    for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
804       objs[i].mod = VK_NULL_HANDLE;
805       objs[i].spirv = pc_entry->shobjs[i].spirv;
806    }
807    pc_entry->pipeline = zink_create_gfx_pipeline(screen, pc_entry->prog, objs, &pc_entry->state, NULL, zink_primitive_topology(pc_entry->state.gfx_prim_mode), true, NULL);
808    /* no unoptimized_pipeline dance */
809 }
810 
811 void
zink_gfx_program_compile_queue(struct zink_context * ctx,struct zink_gfx_pipeline_cache_entry * pc_entry)812 zink_gfx_program_compile_queue(struct zink_context *ctx, struct zink_gfx_pipeline_cache_entry *pc_entry)
813 {
814    struct zink_screen *screen = zink_screen(ctx->base.screen);
815    if (screen->driver_workarounds.disable_optimized_compile)
816       return;
817    if (zink_debug & ZINK_DEBUG_NOBGC) {
818       if (pc_entry->prog->base.uses_shobj)
819          optimized_shobj_compile_job(pc_entry, screen, 0);
820       else
821          optimized_compile_job(pc_entry, screen, 0);
822    } else {
823       util_queue_add_job(&screen->cache_get_thread, pc_entry, &pc_entry->fence,
824                          pc_entry->prog->base.uses_shobj ? optimized_shobj_compile_job : optimized_compile_job, NULL, 0);
825    }
826 }
827 
828 static void
update_cs_shader_module(struct zink_context * ctx,struct zink_compute_program * comp)829 update_cs_shader_module(struct zink_context *ctx, struct zink_compute_program *comp)
830 {
831    struct zink_screen *screen = zink_screen(ctx->base.screen);
832    struct zink_shader *zs = comp->shader;
833    struct zink_shader_module *zm = NULL;
834    unsigned inline_size = 0, nonseamless_size = 0, zs_swizzle_size = 0;
835    struct zink_shader_key *key = &ctx->compute_pipeline_state.key;
836    ASSERTED bool check_robustness = screen->driver_workarounds.lower_robustImageAccess2 && (ctx->flags & PIPE_CONTEXT_ROBUST_BUFFER_ACCESS);
837    assert(zink_cs_key(key)->robust_access == check_robustness);
838 
839    if (ctx && zs->info.num_inlinable_uniforms &&
840        ctx->inlinable_uniforms_valid_mask & BITFIELD64_BIT(MESA_SHADER_COMPUTE)) {
841       if (screen->is_cpu || comp->inlined_variant_count < ZINK_MAX_INLINED_VARIANTS)
842          inline_size = zs->info.num_inlinable_uniforms;
843       else
844          key->inline_uniforms = false;
845    }
846    if (key->base.nonseamless_cube_mask)
847       nonseamless_size = sizeof(uint32_t);
848    if (key->base.needs_zs_shader_swizzle)
849       zs_swizzle_size = sizeof(struct zink_zs_swizzle_key);
850 
851    if (inline_size || nonseamless_size || zink_cs_key(key)->robust_access || zs_swizzle_size) {
852       struct util_dynarray *shader_cache = &comp->shader_cache[!!nonseamless_size];
853       unsigned count = util_dynarray_num_elements(shader_cache, struct zink_shader_module *);
854       struct zink_shader_module **pzm = shader_cache->data;
855       for (unsigned i = 0; i < count; i++) {
856          struct zink_shader_module *iter = pzm[i];
857          if (!shader_key_matches(iter, key, inline_size,
858                                  screen->driconf.inline_uniforms,
859                                  screen->info.have_EXT_non_seamless_cube_map))
860             continue;
861          if (unlikely(zs_swizzle_size)) {
862             /* zs swizzle data needs a manual compare since it's so fat */
863             if (memcmp(iter->key + iter->key_size + nonseamless_size + inline_size * sizeof(uint32_t),
864                        &ctx->di.zs_swizzle[MESA_SHADER_COMPUTE], zs_swizzle_size))
865                continue;
866          }
867          if (i > 0) {
868             struct zink_shader_module *zero = pzm[0];
869             pzm[0] = iter;
870             pzm[i] = zero;
871          }
872          zm = iter;
873       }
874    } else {
875       zm = comp->module;
876    }
877 
878    if (!zm) {
879       zm = malloc(sizeof(struct zink_shader_module) + nonseamless_size + inline_size * sizeof(uint32_t) + zs_swizzle_size);
880       if (!zm) {
881          return;
882       }
883       zm->shobj = false;
884       zm->obj = zink_shader_compile(screen, false, zs, zink_shader_blob_deserialize(screen, &comp->shader->blob), key, zs_swizzle_size ? &ctx->di.zs_swizzle[MESA_SHADER_COMPUTE] : NULL, &comp->base);
885       if (!zm->obj.spirv) {
886          FREE(zm);
887          return;
888       }
889       zm->num_uniforms = inline_size;
890       zm->key_size = key->size;
891       memcpy(zm->key, key, key->size);
892       zm->has_nonseamless = !!nonseamless_size;
893       zm->needs_zs_shader_swizzle = !!zs_swizzle_size;
894       assert(nonseamless_size || inline_size || zink_cs_key(key)->robust_access || zs_swizzle_size);
895       if (nonseamless_size)
896          memcpy(zm->key + zm->key_size, &key->base.nonseamless_cube_mask, nonseamless_size);
897       if (inline_size)
898          memcpy(zm->key + zm->key_size + nonseamless_size, key->base.inlined_uniform_values, inline_size * sizeof(uint32_t));
899       if (zs_swizzle_size)
900          memcpy(zm->key + zm->key_size + nonseamless_size + inline_size * sizeof(uint32_t), &ctx->di.zs_swizzle[MESA_SHADER_COMPUTE], zs_swizzle_size);
901 
902       zm->hash = shader_module_hash(zm);
903       zm->default_variant = false;
904       if (inline_size)
905          comp->inlined_variant_count++;
906 
907       /* this is otherwise the default variant, which is stored as comp->module */
908       if (zm->num_uniforms || nonseamless_size || zink_cs_key(key)->robust_access || zs_swizzle_size)
909          util_dynarray_append(&comp->shader_cache[!!nonseamless_size], void*, zm);
910    }
911    if (comp->curr == zm)
912       return;
913    ctx->compute_pipeline_state.final_hash ^= ctx->compute_pipeline_state.module_hash;
914    comp->curr = zm;
915    ctx->compute_pipeline_state.module_hash = zm->hash;
916    ctx->compute_pipeline_state.final_hash ^= ctx->compute_pipeline_state.module_hash;
917    ctx->compute_pipeline_state.module_changed = true;
918 }
919 
920 void
zink_update_compute_program(struct zink_context * ctx)921 zink_update_compute_program(struct zink_context *ctx)
922 {
923    util_queue_fence_wait(&ctx->curr_compute->base.cache_fence);
924    update_cs_shader_module(ctx, ctx->curr_compute);
925 }
926 
927 VkPipelineLayout
zink_pipeline_layout_create(struct zink_screen * screen,VkDescriptorSetLayout * dsl,unsigned num_dsl,bool is_compute,VkPipelineLayoutCreateFlags flags)928 zink_pipeline_layout_create(struct zink_screen *screen, VkDescriptorSetLayout *dsl, unsigned num_dsl, bool is_compute, VkPipelineLayoutCreateFlags flags)
929 {
930    VkPipelineLayoutCreateInfo plci = {0};
931    plci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
932    plci.flags = flags;
933 
934    plci.pSetLayouts = dsl;
935    plci.setLayoutCount = num_dsl;
936 
937    VkPushConstantRange pcr;
938    if (!is_compute) {
939       pcr.stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS;
940       pcr.offset = 0;
941       pcr.size = sizeof(struct zink_gfx_push_constant);
942       plci.pushConstantRangeCount = 1;
943       plci.pPushConstantRanges = &pcr;
944    }
945 
946    VkPipelineLayout layout;
947    VkResult result = VKSCR(CreatePipelineLayout)(screen->dev, &plci, NULL, &layout);
948    if (result != VK_SUCCESS) {
949       mesa_loge("vkCreatePipelineLayout failed (%s)", vk_Result_to_str(result));
950       return VK_NULL_HANDLE;
951    }
952 
953    return layout;
954 }
955 
956 static void *
create_program(struct zink_context * ctx,bool is_compute)957 create_program(struct zink_context *ctx, bool is_compute)
958 {
959    struct zink_program *pg = rzalloc_size(NULL, is_compute ? sizeof(struct zink_compute_program) : sizeof(struct zink_gfx_program));
960    if (!pg)
961       return NULL;
962 
963    pipe_reference_init(&pg->reference, 1);
964    u_rwlock_init(&pg->pipeline_cache_lock);
965    util_queue_fence_init(&pg->cache_fence);
966    pg->is_compute = is_compute;
967    pg->ctx = ctx;
968    return (void*)pg;
969 }
970 
971 static void
assign_io(struct zink_screen * screen,nir_shader * shaders[ZINK_GFX_SHADER_COUNT])972 assign_io(struct zink_screen *screen,
973           nir_shader *shaders[ZINK_GFX_SHADER_COUNT])
974 {
975    for (unsigned i = 0; i < MESA_SHADER_FRAGMENT;) {
976       nir_shader *producer = shaders[i];
977       for (unsigned j = i + 1; j < ZINK_GFX_SHADER_COUNT; i++, j++) {
978          nir_shader *consumer = shaders[j];
979          if (!consumer)
980             continue;
981          zink_compiler_assign_io(screen, producer, consumer);
982          i = j;
983          break;
984       }
985    }
986 }
987 
988 void
zink_gfx_lib_cache_unref(struct zink_screen * screen,struct zink_gfx_lib_cache * libs)989 zink_gfx_lib_cache_unref(struct zink_screen *screen, struct zink_gfx_lib_cache *libs)
990 {
991    if (!p_atomic_dec_zero(&libs->refcount))
992       return;
993 
994    simple_mtx_destroy(&libs->lock);
995    set_foreach_remove(&libs->libs, he) {
996       struct zink_gfx_library_key *gkey = (void*)he->key;
997       VKSCR(DestroyPipeline)(screen->dev, gkey->pipeline, NULL);
998       FREE(gkey);
999    }
1000    ralloc_free(libs->libs.table);
1001    FREE(libs);
1002 }
1003 
1004 static struct zink_gfx_lib_cache *
create_lib_cache(struct zink_gfx_program * prog,bool generated_tcs)1005 create_lib_cache(struct zink_gfx_program *prog, bool generated_tcs)
1006 {
1007    struct zink_gfx_lib_cache *libs = CALLOC_STRUCT(zink_gfx_lib_cache);
1008    libs->stages_present = prog->stages_present;
1009    if (generated_tcs)
1010       libs->stages_present &= ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
1011    simple_mtx_init(&libs->lock, mtx_plain);
1012    if (generated_tcs)
1013       _mesa_set_init(&libs->libs, NULL, hash_pipeline_lib_generated_tcs, equals_pipeline_lib_generated_tcs);
1014    else
1015       _mesa_set_init(&libs->libs, NULL, hash_pipeline_lib, equals_pipeline_lib);
1016    return libs;
1017 }
1018 
1019 static struct zink_gfx_lib_cache *
find_or_create_lib_cache(struct zink_screen * screen,struct zink_gfx_program * prog)1020 find_or_create_lib_cache(struct zink_screen *screen, struct zink_gfx_program *prog)
1021 {
1022    unsigned stages_present = prog->stages_present;
1023    bool generated_tcs = prog->shaders[MESA_SHADER_TESS_CTRL] && prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated;
1024    if (generated_tcs)
1025       stages_present &= ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
1026    unsigned idx = zink_program_cache_stages(stages_present);
1027    struct set *ht = &screen->pipeline_libs[idx];
1028    const uint32_t hash = prog->gfx_hash;
1029 
1030    simple_mtx_lock(&screen->pipeline_libs_lock[idx]);
1031    bool found = false;
1032    struct set_entry *entry = _mesa_set_search_or_add_pre_hashed(ht, hash, prog->shaders, &found);
1033    struct zink_gfx_lib_cache *libs;
1034    if (found) {
1035       libs = (void*)entry->key;
1036    } else {
1037       libs = create_lib_cache(prog, generated_tcs);
1038       memcpy(libs->shaders, prog->shaders, sizeof(prog->shaders));
1039       entry->key = libs;
1040       unsigned refs = 0;
1041       for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
1042          if (prog->shaders[i] && (!generated_tcs || i != MESA_SHADER_TESS_CTRL)) {
1043             simple_mtx_lock(&prog->shaders[i]->lock);
1044             util_dynarray_append(&prog->shaders[i]->pipeline_libs, struct zink_gfx_lib_cache*, libs);
1045             simple_mtx_unlock(&prog->shaders[i]->lock);
1046             refs++;
1047          }
1048       }
1049       p_atomic_set(&libs->refcount, refs);
1050    }
1051    simple_mtx_unlock(&screen->pipeline_libs_lock[idx]);
1052    return libs;
1053 }
1054 
1055 struct zink_gfx_program *
zink_create_gfx_program(struct zink_context * ctx,struct zink_shader ** stages,unsigned vertices_per_patch,uint32_t gfx_hash)1056 zink_create_gfx_program(struct zink_context *ctx,
1057                         struct zink_shader **stages,
1058                         unsigned vertices_per_patch,
1059                         uint32_t gfx_hash)
1060 {
1061    struct zink_screen *screen = zink_screen(ctx->base.screen);
1062    struct zink_gfx_program *prog = create_program(ctx, false);
1063    if (!prog)
1064       goto fail;
1065 
1066    prog->ctx = ctx;
1067    prog->gfx_hash = gfx_hash;
1068    prog->base.removed = true;
1069    prog->optimal_keys = screen->optimal_keys;
1070 
1071    nir_shader *nir[ZINK_GFX_SHADER_COUNT];
1072 
1073    prog->has_edgeflags = prog->shaders[MESA_SHADER_VERTEX] &&
1074                          prog->shaders[MESA_SHADER_VERTEX]->has_edgeflags;
1075    for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
1076       util_dynarray_init(&prog->shader_cache[i][0][0], prog);
1077       util_dynarray_init(&prog->shader_cache[i][0][1], prog);
1078       util_dynarray_init(&prog->shader_cache[i][1][0], prog);
1079       util_dynarray_init(&prog->shader_cache[i][1][1], prog);
1080       if (stages[i]) {
1081          prog->shaders[i] = stages[i];
1082          prog->stages_present |= BITFIELD_BIT(i);
1083          if (i != MESA_SHADER_FRAGMENT)
1084             prog->optimal_keys &= !prog->shaders[i]->non_fs.is_generated;
1085          prog->needs_inlining |= prog->shaders[i]->needs_inlining;
1086          nir[i] = zink_shader_deserialize(screen, stages[i]);
1087       } else {
1088          nir[i] = NULL;
1089       }
1090    }
1091    if (stages[MESA_SHADER_TESS_EVAL] && !stages[MESA_SHADER_TESS_CTRL]) {
1092       prog->shaders[MESA_SHADER_TESS_EVAL]->non_fs.generated_tcs =
1093       prog->shaders[MESA_SHADER_TESS_CTRL] =
1094         zink_shader_tcs_create(screen, nir[MESA_SHADER_TESS_EVAL], vertices_per_patch, &nir[MESA_SHADER_TESS_CTRL]);
1095       prog->stages_present |= BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
1096    }
1097    prog->stages_remaining = prog->stages_present;
1098 
1099    assign_io(screen, nir);
1100    for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
1101       if (nir[i])
1102          zink_shader_serialize_blob(nir[i], &prog->blobs[i]);
1103       ralloc_free(nir[i]);
1104    }
1105 
1106 
1107    if (stages[MESA_SHADER_GEOMETRY])
1108       prog->last_vertex_stage = stages[MESA_SHADER_GEOMETRY];
1109    else if (stages[MESA_SHADER_TESS_EVAL])
1110       prog->last_vertex_stage = stages[MESA_SHADER_TESS_EVAL];
1111    else
1112       prog->last_vertex_stage = stages[MESA_SHADER_VERTEX];
1113 
1114    for (int r = 0; r < ARRAY_SIZE(prog->pipelines); ++r) {
1115       for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) {
1116          _mesa_hash_table_init(&prog->pipelines[r][i], prog, NULL, zink_get_gfx_pipeline_eq_func(screen, prog));
1117          /* only need first 3/4 for point/line/tri/patch */
1118          if (screen->info.have_EXT_extended_dynamic_state &&
1119              i == (prog->last_vertex_stage->info.stage == MESA_SHADER_TESS_EVAL ? 4 : 3))
1120             break;
1121       }
1122    }
1123 
1124    if (screen->optimal_keys)
1125       prog->libs = find_or_create_lib_cache(screen, prog);
1126    if (prog->libs)
1127       p_atomic_inc(&prog->libs->refcount);
1128 
1129    struct mesa_sha1 sctx;
1130    _mesa_sha1_init(&sctx);
1131    for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
1132       if (prog->shaders[i]) {
1133          simple_mtx_lock(&prog->shaders[i]->lock);
1134          _mesa_set_add(prog->shaders[i]->programs, prog);
1135          simple_mtx_unlock(&prog->shaders[i]->lock);
1136          zink_gfx_program_reference(screen, NULL, prog);
1137          _mesa_sha1_update(&sctx, prog->shaders[i]->base.sha1, sizeof(prog->shaders[i]->base.sha1));
1138       }
1139    }
1140    _mesa_sha1_final(&sctx, prog->base.sha1);
1141    p_atomic_dec(&prog->base.reference.count);
1142 
1143    if (!zink_descriptor_program_init(ctx, &prog->base))
1144       goto fail;
1145 
1146    return prog;
1147 
1148 fail:
1149    if (prog)
1150       zink_destroy_gfx_program(screen, prog);
1151    return NULL;
1152 }
1153 
1154 /* Creates a replacement, optimized zink_gfx_program for this set of separate shaders, which will
1155  * be swapped in in place of the fast-linked separable program once it's done compiling.
1156  */
1157 static void
create_linked_separable_job(void * data,void * gdata,int thread_index)1158 create_linked_separable_job(void *data, void *gdata, int thread_index)
1159 {
1160    struct zink_gfx_program *prog = data;
1161    prog->full_prog = zink_create_gfx_program(prog->ctx, prog->shaders, 0, prog->gfx_hash);
1162    /* add an ownership ref */
1163    zink_gfx_program_reference(zink_screen(prog->ctx->base.screen), NULL, prog->full_prog);
1164    precompile_job(prog->full_prog, gdata, thread_index);
1165 }
1166 
1167 struct zink_gfx_program *
create_gfx_program_separable(struct zink_context * ctx,struct zink_shader ** stages,unsigned vertices_per_patch)1168 create_gfx_program_separable(struct zink_context *ctx, struct zink_shader **stages, unsigned vertices_per_patch)
1169 {
1170    struct zink_screen *screen = zink_screen(ctx->base.screen);
1171    bool is_separate = true;
1172    for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++)
1173       is_separate &= !stages[i] || stages[i]->info.separate_shader;
1174    /* filter cases that need real pipelines */
1175    if (!is_separate ||
1176        /* TODO: maybe try variants? grimace */
1177        !ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(ctx->gfx_pipeline_state.optimal_key) ||
1178        !zink_can_use_pipeline_libs(ctx))
1179       return zink_create_gfx_program(ctx, stages, vertices_per_patch, ctx->gfx_hash);
1180    for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
1181       /* ensure async shader creation is done */
1182       if (stages[i]) {
1183          util_queue_fence_wait(&stages[i]->precompile.fence);
1184          if (!stages[i]->precompile.obj.mod)
1185             return zink_create_gfx_program(ctx, stages, vertices_per_patch, ctx->gfx_hash);
1186       }
1187    }
1188 
1189    struct zink_gfx_program *prog = create_program(ctx, false);
1190    if (!prog)
1191       goto fail;
1192 
1193    prog->ctx = ctx;
1194    prog->is_separable = true;
1195    prog->gfx_hash = ctx->gfx_hash;
1196    prog->base.uses_shobj = screen->info.have_EXT_shader_object;
1197 
1198    prog->stages_remaining = prog->stages_present = ctx->shader_stages;
1199    memcpy(prog->shaders, stages, sizeof(prog->shaders));
1200    prog->last_vertex_stage = ctx->last_vertex_stage;
1201 
1202    if (stages[MESA_SHADER_TESS_EVAL] && !stages[MESA_SHADER_TESS_CTRL]) {
1203       prog->shaders[MESA_SHADER_TESS_CTRL] = stages[MESA_SHADER_TESS_EVAL]->non_fs.generated_tcs;
1204       prog->stages_present |= BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
1205    }
1206 
1207    if (!screen->info.have_EXT_shader_object) {
1208       prog->libs = create_lib_cache(prog, false);
1209       /* this libs cache is owned by the program */
1210       p_atomic_set(&prog->libs->refcount, 1);
1211    }
1212 
1213    unsigned refs = 0;
1214    for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
1215       if (prog->shaders[i]) {
1216          simple_mtx_lock(&prog->shaders[i]->lock);
1217          _mesa_set_add(prog->shaders[i]->programs, prog);
1218          simple_mtx_unlock(&prog->shaders[i]->lock);
1219          if (screen->info.have_EXT_shader_object) {
1220             if (!prog->objects[i])
1221                prog->objects[i] = prog->shaders[i]->precompile.obj.obj;
1222          }
1223          refs++;
1224       }
1225    }
1226    /* We can do this add after the _mesa_set_adds above because we know the prog->shaders[] are
1227    * referenced by the draw state and zink_gfx_shader_free() can't be called on them while we're in here.
1228    */
1229    p_atomic_add(&prog->base.reference.count, refs - 1);
1230 
1231    for (int r = 0; r < ARRAY_SIZE(prog->pipelines); ++r) {
1232       for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) {
1233          _mesa_hash_table_init(&prog->pipelines[r][i], prog, NULL, zink_get_gfx_pipeline_eq_func(screen, prog));
1234          /* only need first 3/4 for point/line/tri/patch */
1235          if (screen->info.have_EXT_extended_dynamic_state &&
1236              i == (prog->last_vertex_stage->info.stage == MESA_SHADER_TESS_EVAL ? 4 : 3))
1237             break;
1238       }
1239    }
1240 
1241    for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
1242       if (!prog->shaders[i] || !prog->shaders[i]->precompile.dsl)
1243          continue;
1244       int idx = !i ? 0 : screen->info.have_EXT_shader_object ? i : 1;
1245       prog->base.dd.binding_usage |= BITFIELD_BIT(idx);
1246       prog->base.dsl[idx] = prog->shaders[i]->precompile.dsl;
1247       /* guarantee a null dsl if previous stages don't have descriptors */
1248       if (prog->shaders[i]->precompile.dsl)
1249          prog->base.num_dsl = idx + 1;
1250       prog->base.dd.bindless |= prog->shaders[i]->bindless;
1251    }
1252    if (prog->base.dd.bindless) {
1253       prog->base.num_dsl = screen->compact_descriptors ? ZINK_DESCRIPTOR_ALL_TYPES - ZINK_DESCRIPTOR_COMPACT : ZINK_DESCRIPTOR_ALL_TYPES;
1254       prog->base.dsl[screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS]] = screen->bindless_layout;
1255    }
1256    prog->base.layout = zink_pipeline_layout_create(screen, prog->base.dsl, prog->base.num_dsl, false, VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT);
1257 
1258    prog->last_variant_hash = ctx->gfx_pipeline_state.optimal_key;
1259 
1260    if (!screen->info.have_EXT_shader_object) {
1261       VkPipeline libs[] = {stages[MESA_SHADER_VERTEX]->precompile.gpl, stages[MESA_SHADER_FRAGMENT]->precompile.gpl};
1262       struct zink_gfx_library_key *gkey = CALLOC_STRUCT(zink_gfx_library_key);
1263       if (!gkey) {
1264          mesa_loge("ZINK: failed to allocate gkey!");
1265          goto fail;
1266       }
1267       gkey->optimal_key = prog->last_variant_hash;
1268       assert(gkey->optimal_key);
1269       gkey->pipeline = zink_create_gfx_pipeline_combined(screen, prog, VK_NULL_HANDLE, libs, 2, VK_NULL_HANDLE, false, false);
1270       _mesa_set_add(&prog->libs->libs, gkey);
1271    }
1272 
1273    if (!(zink_debug & ZINK_DEBUG_NOOPT))
1274       util_queue_add_job(&screen->cache_get_thread, prog, &prog->base.cache_fence, create_linked_separable_job, NULL, 0);
1275 
1276    return prog;
1277 fail:
1278    if (prog)
1279       zink_destroy_gfx_program(screen, prog);
1280    return NULL;
1281 }
1282 
1283 static uint32_t
hash_compute_pipeline_state_local_size(const void * key)1284 hash_compute_pipeline_state_local_size(const void *key)
1285 {
1286    const struct zink_compute_pipeline_state *state = key;
1287    uint32_t hash = _mesa_hash_data(state, offsetof(struct zink_compute_pipeline_state, hash));
1288    hash = XXH32(&state->local_size[0], sizeof(state->local_size), hash);
1289    return hash;
1290 }
1291 
1292 static uint32_t
hash_compute_pipeline_state(const void * key)1293 hash_compute_pipeline_state(const void *key)
1294 {
1295    const struct zink_compute_pipeline_state *state = key;
1296    return _mesa_hash_data(state, offsetof(struct zink_compute_pipeline_state, hash));
1297 }
1298 
1299 void
zink_program_update_compute_pipeline_state(struct zink_context * ctx,struct zink_compute_program * comp,const struct pipe_grid_info * info)1300 zink_program_update_compute_pipeline_state(struct zink_context *ctx, struct zink_compute_program *comp, const struct pipe_grid_info *info)
1301 {
1302    if (comp->use_local_size) {
1303       for (int i = 0; i < ARRAY_SIZE(ctx->compute_pipeline_state.local_size); i++) {
1304          if (ctx->compute_pipeline_state.local_size[i] != info->block[i])
1305             ctx->compute_pipeline_state.dirty = true;
1306          ctx->compute_pipeline_state.local_size[i] = info->block[i];
1307       }
1308    }
1309    if (ctx->compute_pipeline_state.variable_shared_mem != info->variable_shared_mem) {
1310       ctx->compute_pipeline_state.dirty = true;
1311       ctx->compute_pipeline_state.variable_shared_mem = info->variable_shared_mem;
1312    }
1313 }
1314 
1315 static bool
equals_compute_pipeline_state(const void * a,const void * b)1316 equals_compute_pipeline_state(const void *a, const void *b)
1317 {
1318    const struct zink_compute_pipeline_state *sa = a;
1319    const struct zink_compute_pipeline_state *sb = b;
1320    return !memcmp(a, b, offsetof(struct zink_compute_pipeline_state, hash)) &&
1321           sa->module == sb->module;
1322 }
1323 
1324 static bool
equals_compute_pipeline_state_local_size(const void * a,const void * b)1325 equals_compute_pipeline_state_local_size(const void *a, const void *b)
1326 {
1327    const struct zink_compute_pipeline_state *sa = a;
1328    const struct zink_compute_pipeline_state *sb = b;
1329    return !memcmp(a, b, offsetof(struct zink_compute_pipeline_state, hash)) &&
1330           !memcmp(sa->local_size, sb->local_size, sizeof(sa->local_size)) &&
1331           sa->module == sb->module;
1332 }
1333 
1334 static void
precompile_compute_job(void * data,void * gdata,int thread_index)1335 precompile_compute_job(void *data, void *gdata, int thread_index)
1336 {
1337    struct zink_compute_program *comp = data;
1338    struct zink_screen *screen = gdata;
1339 
1340    comp->shader = zink_shader_create(screen, comp->nir);
1341    comp->curr = comp->module = CALLOC_STRUCT(zink_shader_module);
1342    assert(comp->module);
1343    comp->module->shobj = false;
1344    comp->module->obj = zink_shader_compile(screen, false, comp->shader, comp->nir, NULL, NULL, &comp->base);
1345    /* comp->nir will be freed by zink_shader_compile */
1346    comp->nir = NULL;
1347    assert(comp->module->obj.spirv);
1348    util_dynarray_init(&comp->shader_cache[0], comp);
1349    util_dynarray_init(&comp->shader_cache[1], comp);
1350 
1351    struct mesa_sha1 sha1_ctx;
1352    _mesa_sha1_init(&sha1_ctx);
1353    _mesa_sha1_update(&sha1_ctx, comp->shader->blob.data, comp->shader->blob.size);
1354    _mesa_sha1_final(&sha1_ctx, comp->base.sha1);
1355 
1356    zink_descriptor_program_init(comp->base.ctx, &comp->base);
1357 
1358    zink_screen_get_pipeline_cache(screen, &comp->base, true);
1359    if (comp->base.can_precompile)
1360       comp->base_pipeline = zink_create_compute_pipeline(screen, comp, NULL);
1361    if (comp->base_pipeline)
1362       zink_screen_update_pipeline_cache(screen, &comp->base, true);
1363 }
1364 
1365 static struct zink_compute_program *
create_compute_program(struct zink_context * ctx,nir_shader * nir)1366 create_compute_program(struct zink_context *ctx, nir_shader *nir)
1367 {
1368    struct zink_screen *screen = zink_screen(ctx->base.screen);
1369    struct zink_compute_program *comp = create_program(ctx, true);
1370    if (!comp)
1371       return NULL;
1372    simple_mtx_init(&comp->cache_lock, mtx_plain);
1373    comp->scratch_size = nir->scratch_size;
1374    comp->nir = nir;
1375    comp->num_inlinable_uniforms = nir->info.num_inlinable_uniforms;
1376 
1377    comp->use_local_size = !(nir->info.workgroup_size[0] ||
1378                             nir->info.workgroup_size[1] ||
1379                             nir->info.workgroup_size[2]);
1380    comp->has_variable_shared_mem = nir->info.cs.has_variable_shared_mem;
1381    comp->base.can_precompile = !comp->use_local_size &&
1382                                (screen->info.have_EXT_non_seamless_cube_map || !zink_shader_has_cubes(nir)) &&
1383                                (screen->info.rb2_feats.robustImageAccess2 || !(ctx->flags & PIPE_CONTEXT_ROBUST_BUFFER_ACCESS));
1384    _mesa_hash_table_init(&comp->pipelines, comp, NULL, comp->use_local_size ?
1385                                                        equals_compute_pipeline_state_local_size :
1386                                                        equals_compute_pipeline_state);
1387    if (zink_debug & ZINK_DEBUG_NOBGC)
1388       precompile_compute_job(comp, screen, 0);
1389    else
1390       util_queue_add_job(&screen->cache_get_thread, comp, &comp->base.cache_fence,
1391                         precompile_compute_job, NULL, 0);
1392    return comp;
1393 }
1394 
1395 uint32_t
zink_program_get_descriptor_usage(struct zink_context * ctx,gl_shader_stage stage,enum zink_descriptor_type type)1396 zink_program_get_descriptor_usage(struct zink_context *ctx, gl_shader_stage stage, enum zink_descriptor_type type)
1397 {
1398    struct zink_shader *zs = NULL;
1399    switch (stage) {
1400    case MESA_SHADER_VERTEX:
1401    case MESA_SHADER_TESS_CTRL:
1402    case MESA_SHADER_TESS_EVAL:
1403    case MESA_SHADER_GEOMETRY:
1404    case MESA_SHADER_FRAGMENT:
1405       zs = ctx->gfx_stages[stage];
1406       break;
1407    case MESA_SHADER_COMPUTE: {
1408       zs = ctx->curr_compute->shader;
1409       break;
1410    }
1411    default:
1412       unreachable("unknown shader type");
1413    }
1414    if (!zs)
1415       return 0;
1416    switch (type) {
1417    case ZINK_DESCRIPTOR_TYPE_UBO:
1418       return zs->ubos_used;
1419    case ZINK_DESCRIPTOR_TYPE_SSBO:
1420       return zs->ssbos_used;
1421    case ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW:
1422       return BITSET_TEST_RANGE(zs->info.textures_used, 0, PIPE_MAX_SAMPLERS - 1);
1423    case ZINK_DESCRIPTOR_TYPE_IMAGE:
1424       return BITSET_TEST_RANGE(zs->info.images_used, 0, PIPE_MAX_SAMPLERS - 1);
1425    default:
1426       unreachable("unknown descriptor type!");
1427    }
1428    return 0;
1429 }
1430 
1431 bool
zink_program_descriptor_is_buffer(struct zink_context * ctx,gl_shader_stage stage,enum zink_descriptor_type type,unsigned i)1432 zink_program_descriptor_is_buffer(struct zink_context *ctx, gl_shader_stage stage, enum zink_descriptor_type type, unsigned i)
1433 {
1434    struct zink_shader *zs = NULL;
1435    switch (stage) {
1436    case MESA_SHADER_VERTEX:
1437    case MESA_SHADER_TESS_CTRL:
1438    case MESA_SHADER_TESS_EVAL:
1439    case MESA_SHADER_GEOMETRY:
1440    case MESA_SHADER_FRAGMENT:
1441       zs = ctx->gfx_stages[stage];
1442       break;
1443    case MESA_SHADER_COMPUTE: {
1444       zs = ctx->curr_compute->shader;
1445       break;
1446    }
1447    default:
1448       unreachable("unknown shader type");
1449    }
1450    if (!zs)
1451       return false;
1452    return zink_shader_descriptor_is_buffer(zs, type, i);
1453 }
1454 
1455 static unsigned
get_num_bindings(struct zink_shader * zs,enum zink_descriptor_type type)1456 get_num_bindings(struct zink_shader *zs, enum zink_descriptor_type type)
1457 {
1458    switch (type) {
1459    case ZINK_DESCRIPTOR_TYPE_UNIFORMS:
1460       return !!zs->has_uniforms;
1461    case ZINK_DESCRIPTOR_TYPE_UBO:
1462    case ZINK_DESCRIPTOR_TYPE_SSBO:
1463       return zs->num_bindings[type];
1464    default:
1465       break;
1466    }
1467    unsigned num_bindings = 0;
1468    for (int i = 0; i < zs->num_bindings[type]; i++)
1469       num_bindings += zs->bindings[type][i].size;
1470    return num_bindings;
1471 }
1472 
1473 unsigned
zink_program_num_bindings_typed(const struct zink_program * pg,enum zink_descriptor_type type)1474 zink_program_num_bindings_typed(const struct zink_program *pg, enum zink_descriptor_type type)
1475 {
1476    unsigned num_bindings = 0;
1477    if (pg->is_compute) {
1478       struct zink_compute_program *comp = (void*)pg;
1479       return get_num_bindings(comp->shader, type);
1480    }
1481    struct zink_gfx_program *prog = (void*)pg;
1482    for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
1483       if (prog->shaders[i])
1484          num_bindings += get_num_bindings(prog->shaders[i], type);
1485    }
1486    return num_bindings;
1487 }
1488 
1489 unsigned
zink_program_num_bindings(const struct zink_program * pg)1490 zink_program_num_bindings(const struct zink_program *pg)
1491 {
1492    unsigned num_bindings = 0;
1493    for (unsigned i = 0; i < ZINK_DESCRIPTOR_BASE_TYPES; i++)
1494       num_bindings += zink_program_num_bindings_typed(pg, i);
1495    return num_bindings;
1496 }
1497 
1498 static void
deinit_program(struct zink_screen * screen,struct zink_program * pg)1499 deinit_program(struct zink_screen *screen, struct zink_program *pg)
1500 {
1501    util_queue_fence_wait(&pg->cache_fence);
1502    if (pg->layout)
1503       VKSCR(DestroyPipelineLayout)(screen->dev, pg->layout, NULL);
1504 
1505    if (pg->pipeline_cache)
1506       VKSCR(DestroyPipelineCache)(screen->dev, pg->pipeline_cache, NULL);
1507    u_rwlock_destroy(&pg->pipeline_cache_lock);
1508    zink_descriptor_program_deinit(screen, pg);
1509 }
1510 
1511 void
zink_destroy_gfx_program(struct zink_screen * screen,struct zink_gfx_program * prog)1512 zink_destroy_gfx_program(struct zink_screen *screen,
1513                          struct zink_gfx_program *prog)
1514 {
1515    unsigned max_idx = ARRAY_SIZE(prog->pipelines[0]);
1516    if (screen->info.have_EXT_extended_dynamic_state) {
1517       /* only need first 3/4 for point/line/tri/patch */
1518       if ((prog->stages_present &
1519           (BITFIELD_BIT(MESA_SHADER_TESS_EVAL) | BITFIELD_BIT(MESA_SHADER_GEOMETRY))) ==
1520           BITFIELD_BIT(MESA_SHADER_TESS_EVAL))
1521          max_idx = 4;
1522       else
1523          max_idx = 3;
1524       max_idx++;
1525    }
1526 
1527    if (prog->is_separable)
1528       zink_gfx_program_reference(screen, &prog->full_prog, NULL);
1529    for (unsigned r = 0; r < ARRAY_SIZE(prog->pipelines); r++) {
1530       for (int i = 0; i < max_idx; ++i) {
1531          hash_table_foreach(&prog->pipelines[r][i], entry) {
1532             struct zink_gfx_pipeline_cache_entry *pc_entry = entry->data;
1533 
1534             util_queue_fence_wait(&pc_entry->fence);
1535             VKSCR(DestroyPipeline)(screen->dev, pc_entry->pipeline, NULL);
1536             VKSCR(DestroyPipeline)(screen->dev, pc_entry->gpl.unoptimized_pipeline, NULL);
1537             free(pc_entry);
1538          }
1539       }
1540    }
1541 
1542    deinit_program(screen, &prog->base);
1543 
1544    for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
1545       if (prog->shaders[i]) {
1546          _mesa_set_remove_key(prog->shaders[i]->programs, prog);
1547          prog->shaders[i] = NULL;
1548       }
1549       if (!prog->is_separable) {
1550          destroy_shader_cache(screen, &prog->shader_cache[i][0][0]);
1551          destroy_shader_cache(screen, &prog->shader_cache[i][0][1]);
1552          destroy_shader_cache(screen, &prog->shader_cache[i][1][0]);
1553          destroy_shader_cache(screen, &prog->shader_cache[i][1][1]);
1554          blob_finish(&prog->blobs[i]);
1555       }
1556    }
1557    if (prog->libs)
1558       zink_gfx_lib_cache_unref(screen, prog->libs);
1559 
1560    ralloc_free(prog);
1561 }
1562 
1563 void
zink_destroy_compute_program(struct zink_screen * screen,struct zink_compute_program * comp)1564 zink_destroy_compute_program(struct zink_screen *screen,
1565                              struct zink_compute_program *comp)
1566 {
1567    deinit_program(screen, &comp->base);
1568 
1569    assert(comp->shader);
1570    assert(!comp->shader->spirv);
1571 
1572    zink_shader_free(screen, comp->shader);
1573 
1574    destroy_shader_cache(screen, &comp->shader_cache[0]);
1575    destroy_shader_cache(screen, &comp->shader_cache[1]);
1576 
1577    hash_table_foreach(&comp->pipelines, entry) {
1578       struct compute_pipeline_cache_entry *pc_entry = entry->data;
1579 
1580       VKSCR(DestroyPipeline)(screen->dev, pc_entry->pipeline, NULL);
1581       free(pc_entry);
1582    }
1583    VKSCR(DestroyPipeline)(screen->dev, comp->base_pipeline, NULL);
1584    zink_destroy_shader_module(screen, comp->module);
1585 
1586    ralloc_free(comp);
1587 }
1588 
1589 ALWAYS_INLINE static bool
compute_can_shortcut(const struct zink_compute_program * comp)1590 compute_can_shortcut(const struct zink_compute_program *comp)
1591 {
1592    return !comp->use_local_size && !comp->curr->num_uniforms && !comp->curr->has_nonseamless;
1593 }
1594 
1595 VkPipeline
zink_get_compute_pipeline(struct zink_screen * screen,struct zink_compute_program * comp,struct zink_compute_pipeline_state * state)1596 zink_get_compute_pipeline(struct zink_screen *screen,
1597                       struct zink_compute_program *comp,
1598                       struct zink_compute_pipeline_state *state)
1599 {
1600    struct hash_entry *entry = NULL;
1601    struct compute_pipeline_cache_entry *cache_entry;
1602 
1603    if (!state->dirty && !state->module_changed)
1604       return state->pipeline;
1605    if (state->dirty) {
1606       if (state->pipeline) //avoid on first hash
1607          state->final_hash ^= state->hash;
1608       if (comp->use_local_size)
1609          state->hash = hash_compute_pipeline_state_local_size(state);
1610       else
1611          state->hash = hash_compute_pipeline_state(state);
1612       state->dirty = false;
1613       state->final_hash ^= state->hash;
1614    }
1615 
1616    util_queue_fence_wait(&comp->base.cache_fence);
1617    if (comp->base_pipeline && compute_can_shortcut(comp)) {
1618       state->pipeline = comp->base_pipeline;
1619       return state->pipeline;
1620    }
1621    entry = _mesa_hash_table_search_pre_hashed(&comp->pipelines, state->final_hash, state);
1622 
1623    if (!entry) {
1624       simple_mtx_lock(&comp->cache_lock);
1625       entry = _mesa_hash_table_search_pre_hashed(&comp->pipelines, state->final_hash, state);
1626       if (entry) {
1627          simple_mtx_unlock(&comp->cache_lock);
1628          goto out;
1629       }
1630       VkPipeline pipeline = zink_create_compute_pipeline(screen, comp, state);
1631 
1632       if (pipeline == VK_NULL_HANDLE) {
1633          simple_mtx_unlock(&comp->cache_lock);
1634          return VK_NULL_HANDLE;
1635       }
1636 
1637       zink_screen_update_pipeline_cache(screen, &comp->base, false);
1638       if (compute_can_shortcut(comp)) {
1639          simple_mtx_unlock(&comp->cache_lock);
1640          /* don't add base pipeline to cache */
1641          state->pipeline = comp->base_pipeline = pipeline;
1642          return state->pipeline;
1643       }
1644 
1645       struct compute_pipeline_cache_entry *pc_entry = CALLOC_STRUCT(compute_pipeline_cache_entry);
1646       if (!pc_entry) {
1647          simple_mtx_unlock(&comp->cache_lock);
1648          return VK_NULL_HANDLE;
1649       }
1650 
1651       memcpy(&pc_entry->state, state, sizeof(*state));
1652       pc_entry->pipeline = pipeline;
1653 
1654       entry = _mesa_hash_table_insert_pre_hashed(&comp->pipelines, state->final_hash, pc_entry, pc_entry);
1655       assert(entry);
1656       simple_mtx_unlock(&comp->cache_lock);
1657    }
1658 out:
1659    cache_entry = entry->data;
1660    state->pipeline = cache_entry->pipeline;
1661    return state->pipeline;
1662 }
1663 
1664 static void
bind_gfx_stage(struct zink_context * ctx,gl_shader_stage stage,struct zink_shader * shader)1665 bind_gfx_stage(struct zink_context *ctx, gl_shader_stage stage, struct zink_shader *shader)
1666 {
1667    /* RADV doesn't support binding pipelines in DGC */
1668    if (zink_screen(ctx->base.screen)->info.nv_dgc_props.maxGraphicsShaderGroupCount == 0)
1669       zink_flush_dgc_if_enabled(ctx);
1670    if (shader && shader->info.num_inlinable_uniforms)
1671       ctx->shader_has_inlinable_uniforms_mask |= 1 << stage;
1672    else
1673       ctx->shader_has_inlinable_uniforms_mask &= ~(1 << stage);
1674 
1675    if (ctx->gfx_stages[stage])
1676       ctx->gfx_hash ^= ctx->gfx_stages[stage]->hash;
1677 
1678    if (stage == MESA_SHADER_GEOMETRY && ctx->is_generated_gs_bound && (!shader || !shader->non_fs.parent)) {
1679       ctx->inlinable_uniforms_valid_mask &= ~BITFIELD64_BIT(MESA_SHADER_GEOMETRY);
1680       ctx->is_generated_gs_bound = false;
1681    }
1682 
1683    ctx->gfx_stages[stage] = shader;
1684    ctx->gfx_dirty = ctx->gfx_stages[MESA_SHADER_FRAGMENT] && ctx->gfx_stages[MESA_SHADER_VERTEX];
1685    ctx->gfx_pipeline_state.modules_changed = true;
1686    if (shader) {
1687       ctx->shader_stages |= BITFIELD_BIT(stage);
1688       ctx->gfx_hash ^= ctx->gfx_stages[stage]->hash;
1689    } else {
1690       ctx->gfx_pipeline_state.modules[stage] = VK_NULL_HANDLE;
1691       if (ctx->curr_program)
1692          ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
1693       ctx->curr_program = NULL;
1694       ctx->shader_stages &= ~BITFIELD_BIT(stage);
1695    }
1696 }
1697 
1698 static enum mesa_prim
gs_output_to_reduced_prim_type(struct shader_info * info)1699 gs_output_to_reduced_prim_type(struct shader_info *info)
1700 {
1701    switch (info->gs.output_primitive) {
1702    case MESA_PRIM_POINTS:
1703       return MESA_PRIM_POINTS;
1704 
1705    case MESA_PRIM_LINES:
1706    case MESA_PRIM_LINE_LOOP:
1707    case MESA_PRIM_LINE_STRIP:
1708    case MESA_PRIM_LINES_ADJACENCY:
1709    case MESA_PRIM_LINE_STRIP_ADJACENCY:
1710       return MESA_PRIM_LINES;
1711 
1712    case MESA_PRIM_TRIANGLES:
1713    case MESA_PRIM_TRIANGLE_STRIP:
1714    case MESA_PRIM_TRIANGLE_FAN:
1715    case MESA_PRIM_TRIANGLES_ADJACENCY:
1716    case MESA_PRIM_TRIANGLE_STRIP_ADJACENCY:
1717       return MESA_PRIM_TRIANGLES;
1718 
1719    default:
1720       unreachable("unexpected output primitive type");
1721    }
1722 }
1723 
1724 static enum mesa_prim
update_rast_prim(struct zink_shader * shader)1725 update_rast_prim(struct zink_shader *shader)
1726 {
1727    struct shader_info *info = &shader->info;
1728    if (info->stage == MESA_SHADER_GEOMETRY)
1729       return gs_output_to_reduced_prim_type(info);
1730    else if (info->stage == MESA_SHADER_TESS_EVAL) {
1731       if (info->tess.point_mode)
1732          return MESA_PRIM_POINTS;
1733       else {
1734          switch (info->tess._primitive_mode) {
1735          case TESS_PRIMITIVE_ISOLINES:
1736             return MESA_PRIM_LINES;
1737          case TESS_PRIMITIVE_TRIANGLES:
1738          case TESS_PRIMITIVE_QUADS:
1739             return MESA_PRIM_TRIANGLES;
1740          default:
1741             return MESA_PRIM_COUNT;
1742          }
1743       }
1744    }
1745    return MESA_PRIM_COUNT;
1746 }
1747 
1748 static void
unbind_generated_gs(struct zink_context * ctx,gl_shader_stage stage,struct zink_shader * prev_shader)1749 unbind_generated_gs(struct zink_context *ctx, gl_shader_stage stage, struct zink_shader *prev_shader)
1750 {
1751    if (prev_shader->non_fs.is_generated)
1752       ctx->inlinable_uniforms_valid_mask &= ~BITFIELD64_BIT(MESA_SHADER_GEOMETRY);
1753 
1754    if (ctx->gfx_stages[MESA_SHADER_GEOMETRY] &&
1755        ctx->gfx_stages[MESA_SHADER_GEOMETRY]->non_fs.parent ==
1756        prev_shader) {
1757       bind_gfx_stage(ctx, MESA_SHADER_GEOMETRY, NULL);
1758    }
1759 }
1760 
1761 static void
bind_last_vertex_stage(struct zink_context * ctx,gl_shader_stage stage,struct zink_shader * prev_shader)1762 bind_last_vertex_stage(struct zink_context *ctx, gl_shader_stage stage, struct zink_shader *prev_shader)
1763 {
1764    if (prev_shader && stage < MESA_SHADER_GEOMETRY)
1765       unbind_generated_gs(ctx, stage, prev_shader);
1766 
1767    gl_shader_stage old = ctx->last_vertex_stage ? ctx->last_vertex_stage->info.stage : MESA_SHADER_STAGES;
1768    if (ctx->gfx_stages[MESA_SHADER_GEOMETRY])
1769       ctx->last_vertex_stage = ctx->gfx_stages[MESA_SHADER_GEOMETRY];
1770    else if (ctx->gfx_stages[MESA_SHADER_TESS_EVAL])
1771       ctx->last_vertex_stage = ctx->gfx_stages[MESA_SHADER_TESS_EVAL];
1772    else
1773       ctx->last_vertex_stage = ctx->gfx_stages[MESA_SHADER_VERTEX];
1774    gl_shader_stage current = ctx->last_vertex_stage ? ctx->last_vertex_stage->info.stage : MESA_SHADER_VERTEX;
1775 
1776    /* update rast_prim */
1777    ctx->gfx_pipeline_state.shader_rast_prim =
1778       ctx->last_vertex_stage ? update_rast_prim(ctx->last_vertex_stage) :
1779                                MESA_PRIM_COUNT;
1780 
1781    if (old != current) {
1782       if (!zink_screen(ctx->base.screen)->optimal_keys) {
1783          if (old != MESA_SHADER_STAGES) {
1784             memset(&ctx->gfx_pipeline_state.shader_keys.key[old].key.vs_base, 0, sizeof(struct zink_vs_key_base));
1785             ctx->dirty_gfx_stages |= BITFIELD_BIT(old);
1786          } else {
1787             /* always unset vertex shader values when changing to a non-vs last stage */
1788             memset(&ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_VERTEX].key.vs_base, 0, sizeof(struct zink_vs_key_base));
1789          }
1790       }
1791 
1792       unsigned num_viewports = ctx->vp_state.num_viewports;
1793       struct zink_screen *screen = zink_screen(ctx->base.screen);
1794       /* number of enabled viewports is based on whether last vertex stage writes viewport index */
1795       if (ctx->last_vertex_stage) {
1796          if (ctx->last_vertex_stage->info.outputs_written & (VARYING_BIT_VIEWPORT | VARYING_BIT_VIEWPORT_MASK))
1797             ctx->vp_state.num_viewports = MIN2(screen->info.props.limits.maxViewports, PIPE_MAX_VIEWPORTS);
1798          else
1799             ctx->vp_state.num_viewports = 1;
1800       } else {
1801          ctx->vp_state.num_viewports = 1;
1802       }
1803       ctx->vp_state_changed |= num_viewports != ctx->vp_state.num_viewports;
1804       if (!screen->info.have_EXT_extended_dynamic_state) {
1805          if (ctx->gfx_pipeline_state.dyn_state1.num_viewports != ctx->vp_state.num_viewports)
1806             ctx->gfx_pipeline_state.dirty = true;
1807          ctx->gfx_pipeline_state.dyn_state1.num_viewports = ctx->vp_state.num_viewports;
1808       }
1809       ctx->last_vertex_stage_dirty = true;
1810    }
1811 }
1812 
1813 static void
zink_bind_vs_state(struct pipe_context * pctx,void * cso)1814 zink_bind_vs_state(struct pipe_context *pctx,
1815                    void *cso)
1816 {
1817    struct zink_context *ctx = zink_context(pctx);
1818    if (!cso && !ctx->gfx_stages[MESA_SHADER_VERTEX])
1819       return;
1820    struct zink_shader *prev_shader = ctx->gfx_stages[MESA_SHADER_VERTEX];
1821    bind_gfx_stage(ctx, MESA_SHADER_VERTEX, cso);
1822    bind_last_vertex_stage(ctx, MESA_SHADER_VERTEX, prev_shader);
1823    if (cso) {
1824       struct zink_shader *zs = cso;
1825       ctx->shader_reads_drawid = BITSET_TEST(zs->info.system_values_read, SYSTEM_VALUE_DRAW_ID);
1826       ctx->shader_reads_basevertex = BITSET_TEST(zs->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX);
1827    } else {
1828       ctx->shader_reads_drawid = false;
1829       ctx->shader_reads_basevertex = false;
1830    }
1831 }
1832 
1833 /* if gl_SampleMask[] is written to, we have to ensure that we get a shader with the same sample count:
1834  * in GL, samples==1 means ignore gl_SampleMask[]
1835  * in VK, gl_SampleMask[] is never ignored
1836  */
1837 void
zink_update_fs_key_samples(struct zink_context * ctx)1838 zink_update_fs_key_samples(struct zink_context *ctx)
1839 {
1840    if (!ctx->gfx_stages[MESA_SHADER_FRAGMENT])
1841       return;
1842    shader_info *info = &ctx->gfx_stages[MESA_SHADER_FRAGMENT]->info;
1843    if (info->outputs_written & (1 << FRAG_RESULT_SAMPLE_MASK)) {
1844       bool samples = zink_get_fs_base_key(ctx)->samples;
1845       if (samples != (ctx->fb_state.samples > 1))
1846          zink_set_fs_base_key(ctx)->samples = ctx->fb_state.samples > 1;
1847    }
1848 }
1849 
zink_update_gs_key_rectangular_line(struct zink_context * ctx)1850 void zink_update_gs_key_rectangular_line(struct zink_context *ctx)
1851 {
1852    bool line_rectangular = zink_get_gs_key(ctx)->line_rectangular;
1853    if (line_rectangular != ctx->rast_state->base.line_rectangular)
1854       zink_set_gs_key(ctx)->line_rectangular = ctx->rast_state->base.line_rectangular;
1855 }
1856 
1857 static void
zink_bind_fs_state(struct pipe_context * pctx,void * cso)1858 zink_bind_fs_state(struct pipe_context *pctx,
1859                    void *cso)
1860 {
1861    struct zink_context *ctx = zink_context(pctx);
1862    if (!cso && !ctx->gfx_stages[MESA_SHADER_FRAGMENT])
1863       return;
1864    if (ctx->disable_fs && !ctx->disable_color_writes && cso != ctx->null_fs) {
1865       ctx->saved_fs = cso;
1866       zink_set_null_fs(ctx);
1867       return;
1868    }
1869    bool writes_cbuf0 = ctx->gfx_stages[MESA_SHADER_FRAGMENT] ? (ctx->gfx_stages[MESA_SHADER_FRAGMENT]->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_DATA0)) > 0 : true;
1870    unsigned shadow_mask = ctx->gfx_stages[MESA_SHADER_FRAGMENT] ? ctx->gfx_stages[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask : 0;
1871    bind_gfx_stage(ctx, MESA_SHADER_FRAGMENT, cso);
1872    ctx->fbfetch_outputs = 0;
1873    if (cso) {
1874       shader_info *info = &ctx->gfx_stages[MESA_SHADER_FRAGMENT]->info;
1875       bool new_writes_cbuf0 = (info->outputs_written & BITFIELD_BIT(FRAG_RESULT_DATA0)) > 0;
1876       if (ctx->gfx_pipeline_state.blend_state && ctx->gfx_pipeline_state.blend_state->alpha_to_coverage &&
1877           writes_cbuf0 != new_writes_cbuf0 && zink_screen(pctx->screen)->info.have_EXT_extended_dynamic_state3) {
1878          ctx->blend_state_changed = true;
1879          ctx->ds3_states |= BITFIELD_BIT(ZINK_DS3_BLEND_A2C);
1880       }
1881       if (info->fs.uses_fbfetch_output) {
1882          if (info->outputs_read & (BITFIELD_BIT(FRAG_RESULT_DEPTH) | BITFIELD_BIT(FRAG_RESULT_STENCIL)))
1883             ctx->fbfetch_outputs |= BITFIELD_BIT(PIPE_MAX_COLOR_BUFS);
1884          ctx->fbfetch_outputs |= info->outputs_read >> FRAG_RESULT_DATA0;
1885       }
1886       zink_update_fs_key_samples(ctx);
1887       if (zink_screen(pctx->screen)->info.have_EXT_rasterization_order_attachment_access) {
1888          if (ctx->gfx_pipeline_state.rast_attachment_order != info->fs.uses_fbfetch_output)
1889             ctx->gfx_pipeline_state.dirty = true;
1890          ctx->gfx_pipeline_state.rast_attachment_order = info->fs.uses_fbfetch_output;
1891       }
1892       zink_set_zs_needs_shader_swizzle_key(ctx, MESA_SHADER_FRAGMENT, false);
1893       if (shadow_mask != ctx->gfx_stages[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask &&
1894           !zink_screen(pctx->screen)->driver_workarounds.needs_zs_shader_swizzle)
1895          zink_update_shadow_samplerviews(ctx, shadow_mask | ctx->gfx_stages[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask);
1896       if (!ctx->track_renderpasses && !ctx->blitting)
1897          ctx->rp_tc_info_updated = true;
1898    }
1899    zink_update_fbfetch(ctx);
1900 }
1901 
1902 static void
zink_bind_gs_state(struct pipe_context * pctx,void * cso)1903 zink_bind_gs_state(struct pipe_context *pctx,
1904                    void *cso)
1905 {
1906    struct zink_context *ctx = zink_context(pctx);
1907    if (!cso && !ctx->gfx_stages[MESA_SHADER_GEOMETRY])
1908       return;
1909    bind_gfx_stage(ctx, MESA_SHADER_GEOMETRY, cso);
1910    bind_last_vertex_stage(ctx, MESA_SHADER_GEOMETRY, NULL);
1911 }
1912 
1913 static void
zink_bind_tcs_state(struct pipe_context * pctx,void * cso)1914 zink_bind_tcs_state(struct pipe_context *pctx,
1915                    void *cso)
1916 {
1917    bind_gfx_stage(zink_context(pctx), MESA_SHADER_TESS_CTRL, cso);
1918 }
1919 
1920 static void
zink_bind_tes_state(struct pipe_context * pctx,void * cso)1921 zink_bind_tes_state(struct pipe_context *pctx,
1922                    void *cso)
1923 {
1924    struct zink_context *ctx = zink_context(pctx);
1925    if (!cso && !ctx->gfx_stages[MESA_SHADER_TESS_EVAL])
1926       return;
1927    if (!!ctx->gfx_stages[MESA_SHADER_TESS_EVAL] != !!cso) {
1928       if (!cso) {
1929          /* if unsetting a TESS that uses a generated TCS, ensure the TCS is unset */
1930          if (ctx->gfx_stages[MESA_SHADER_TESS_CTRL] == ctx->gfx_stages[MESA_SHADER_TESS_EVAL]->non_fs.generated_tcs)
1931             ctx->gfx_stages[MESA_SHADER_TESS_CTRL] = NULL;
1932       }
1933    }
1934    struct zink_shader *prev_shader = ctx->gfx_stages[MESA_SHADER_TESS_EVAL];
1935    bind_gfx_stage(ctx, MESA_SHADER_TESS_EVAL, cso);
1936    bind_last_vertex_stage(ctx, MESA_SHADER_TESS_EVAL, prev_shader);
1937 }
1938 
1939 static void *
zink_create_cs_state(struct pipe_context * pctx,const struct pipe_compute_state * shader)1940 zink_create_cs_state(struct pipe_context *pctx,
1941                      const struct pipe_compute_state *shader)
1942 {
1943    struct nir_shader *nir;
1944    if (shader->ir_type != PIPE_SHADER_IR_NIR)
1945       nir = zink_tgsi_to_nir(pctx->screen, shader->prog);
1946    else
1947       nir = (struct nir_shader *)shader->prog;
1948 
1949    if (nir->info.uses_bindless)
1950       zink_descriptors_init_bindless(zink_context(pctx));
1951 
1952    return create_compute_program(zink_context(pctx), nir);
1953 }
1954 
1955 static void
zink_bind_cs_state(struct pipe_context * pctx,void * cso)1956 zink_bind_cs_state(struct pipe_context *pctx,
1957                    void *cso)
1958 {
1959    struct zink_context *ctx = zink_context(pctx);
1960    struct zink_compute_program *comp = cso;
1961    if (comp && comp->num_inlinable_uniforms)
1962       ctx->shader_has_inlinable_uniforms_mask |= 1 << MESA_SHADER_COMPUTE;
1963    else
1964       ctx->shader_has_inlinable_uniforms_mask &= ~(1 << MESA_SHADER_COMPUTE);
1965 
1966    if (ctx->curr_compute) {
1967       zink_batch_reference_program(&ctx->batch, &ctx->curr_compute->base);
1968       ctx->compute_pipeline_state.final_hash ^= ctx->compute_pipeline_state.module_hash;
1969       ctx->compute_pipeline_state.module = VK_NULL_HANDLE;
1970       ctx->compute_pipeline_state.module_hash = 0;
1971    }
1972    ctx->compute_pipeline_state.dirty = true;
1973    ctx->curr_compute = comp;
1974    if (comp && comp != ctx->curr_compute) {
1975       ctx->compute_pipeline_state.module_hash = ctx->curr_compute->curr->hash;
1976       if (util_queue_fence_is_signalled(&comp->base.cache_fence))
1977          ctx->compute_pipeline_state.module = ctx->curr_compute->curr->obj.mod;
1978       ctx->compute_pipeline_state.final_hash ^= ctx->compute_pipeline_state.module_hash;
1979       if (ctx->compute_pipeline_state.key.base.nonseamless_cube_mask)
1980          ctx->compute_dirty = true;
1981    }
1982    zink_select_launch_grid(ctx);
1983 }
1984 
1985 static void
zink_get_compute_state_info(struct pipe_context * pctx,void * cso,struct pipe_compute_state_object_info * info)1986 zink_get_compute_state_info(struct pipe_context *pctx, void *cso, struct pipe_compute_state_object_info *info)
1987 {
1988    struct zink_compute_program *comp = cso;
1989    struct zink_screen *screen = zink_screen(pctx->screen);
1990 
1991    info->max_threads = screen->info.props.limits.maxComputeWorkGroupInvocations;
1992    info->private_memory = comp->scratch_size;
1993    if (screen->info.props11.subgroupSize) {
1994       info->preferred_simd_size = screen->info.props11.subgroupSize;
1995       info->simd_sizes = info->preferred_simd_size;
1996    } else {
1997       // just guess it
1998       info->preferred_simd_size = 64;
1999       // only used for actual subgroup support
2000       info->simd_sizes = 0;
2001    }
2002 }
2003 
2004 static void
zink_delete_cs_shader_state(struct pipe_context * pctx,void * cso)2005 zink_delete_cs_shader_state(struct pipe_context *pctx, void *cso)
2006 {
2007    struct zink_compute_program *comp = cso;
2008    zink_compute_program_reference(zink_screen(pctx->screen), &comp, NULL);
2009 }
2010 
2011 /* caller must lock prog->libs->lock */
2012 struct zink_gfx_library_key *
zink_create_pipeline_lib(struct zink_screen * screen,struct zink_gfx_program * prog,struct zink_gfx_pipeline_state * state)2013 zink_create_pipeline_lib(struct zink_screen *screen, struct zink_gfx_program *prog, struct zink_gfx_pipeline_state *state)
2014 {
2015    struct zink_gfx_library_key *gkey = CALLOC_STRUCT(zink_gfx_library_key);
2016    if (!gkey) {
2017       mesa_loge("ZINK: failed to allocate gkey!");
2018       return NULL;
2019    }
2020 
2021    gkey->optimal_key = state->optimal_key;
2022    assert(gkey->optimal_key);
2023    for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++)
2024       gkey->modules[i] = prog->objs[i].mod;
2025    gkey->pipeline = zink_create_gfx_pipeline_library(screen, prog);
2026    _mesa_set_add(&prog->libs->libs, gkey);
2027    return gkey;
2028 }
2029 
2030 static const char *
print_exe_stages(VkShaderStageFlags stages)2031 print_exe_stages(VkShaderStageFlags stages)
2032 {
2033    if (stages == VK_SHADER_STAGE_VERTEX_BIT)
2034       return "VS";
2035    if (stages == (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_GEOMETRY_BIT))
2036       return "VS+GS";
2037    if (stages == (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT))
2038       return "VS+TCS+TES";
2039    if (stages == (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT | VK_SHADER_STAGE_GEOMETRY_BIT))
2040       return "VS+TCS+TES+GS";
2041    if (stages == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
2042       return "TCS";
2043    if (stages == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
2044       return "TES";
2045    if (stages == VK_SHADER_STAGE_GEOMETRY_BIT)
2046       return "GS";
2047    if (stages == VK_SHADER_STAGE_FRAGMENT_BIT)
2048       return "FS";
2049    if (stages == VK_SHADER_STAGE_COMPUTE_BIT)
2050       return "CS";
2051    unreachable("unhandled combination of stages!");
2052 }
2053 
2054 static void
print_pipeline_stats(struct zink_screen * screen,VkPipeline pipeline)2055 print_pipeline_stats(struct zink_screen *screen, VkPipeline pipeline)
2056 {
2057    VkPipelineInfoKHR pinfo = {
2058      VK_STRUCTURE_TYPE_PIPELINE_INFO_KHR,
2059      NULL,
2060      pipeline
2061    };
2062    unsigned exe_count = 0;
2063    VkPipelineExecutablePropertiesKHR props[10] = {0};
2064    for (unsigned i = 0; i < ARRAY_SIZE(props); i++) {
2065       props[i].sType = VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_PROPERTIES_KHR;
2066       props[i].pNext = NULL;
2067    }
2068    VKSCR(GetPipelineExecutablePropertiesKHR)(screen->dev, &pinfo, &exe_count, NULL);
2069    VKSCR(GetPipelineExecutablePropertiesKHR)(screen->dev, &pinfo, &exe_count, props);
2070    printf("PIPELINE STATISTICS:");
2071    for (unsigned e = 0; e < exe_count; e++) {
2072       VkPipelineExecutableInfoKHR info = {
2073          VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_INFO_KHR,
2074          NULL,
2075          pipeline,
2076          e
2077       };
2078       unsigned count = 0;
2079       printf("\n\t%s (%s): ", print_exe_stages(props[e].stages), props[e].name);
2080       VkPipelineExecutableStatisticKHR *stats = NULL;
2081       VKSCR(GetPipelineExecutableStatisticsKHR)(screen->dev, &info, &count, NULL);
2082       stats = calloc(count, sizeof(VkPipelineExecutableStatisticKHR));
2083       if (!stats) {
2084          mesa_loge("ZINK: failed to allocate stats!");
2085          return;
2086       }
2087 
2088       for (unsigned i = 0; i < count; i++)
2089          stats[i].sType = VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_STATISTIC_KHR;
2090       VKSCR(GetPipelineExecutableStatisticsKHR)(screen->dev, &info, &count, stats);
2091 
2092       for (unsigned i = 0; i < count; i++) {
2093          if (i)
2094             printf(", ");
2095          switch (stats[i].format) {
2096          case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_BOOL32_KHR:
2097             printf("%s: %u", stats[i].name, stats[i].value.b32);
2098             break;
2099          case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_INT64_KHR:
2100             printf("%s: %" PRIi64, stats[i].name, stats[i].value.i64);
2101             break;
2102          case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR:
2103             printf("%s: %" PRIu64, stats[i].name, stats[i].value.u64);
2104             break;
2105          case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_FLOAT64_KHR:
2106             printf("%s: %g", stats[i].name, stats[i].value.f64);
2107             break;
2108          default:
2109             unreachable("unknown statistic");
2110          }
2111       }
2112    }
2113    printf("\n");
2114 }
2115 
2116 static void
precompile_job(void * data,void * gdata,int thread_index)2117 precompile_job(void *data, void *gdata, int thread_index)
2118 {
2119    struct zink_screen *screen = gdata;
2120    struct zink_gfx_program *prog = data;
2121 
2122    struct zink_gfx_pipeline_state state = {0};
2123    state.shader_keys_optimal.key.vs_base.last_vertex_stage = true;
2124    state.shader_keys_optimal.key.tcs.patch_vertices = 3; //random guess, generated tcs precompile is hard
2125    state.optimal_key = state.shader_keys_optimal.key.val;
2126    generate_gfx_program_modules_optimal(NULL, screen, prog, &state);
2127    zink_screen_get_pipeline_cache(screen, &prog->base, true);
2128    if (!screen->info.have_EXT_shader_object) {
2129       simple_mtx_lock(&prog->libs->lock);
2130       zink_create_pipeline_lib(screen, prog, &state);
2131       simple_mtx_unlock(&prog->libs->lock);
2132    }
2133    zink_screen_update_pipeline_cache(screen, &prog->base, true);
2134 }
2135 
2136 static void
precompile_separate_shader_job(void * data,void * gdata,int thread_index)2137 precompile_separate_shader_job(void *data, void *gdata, int thread_index)
2138 {
2139    struct zink_screen *screen = gdata;
2140    struct zink_shader *zs = data;
2141 
2142    zs->precompile.obj = zink_shader_compile_separate(screen, zs);
2143    if (!screen->info.have_EXT_shader_object) {
2144       struct zink_shader_object objs[ZINK_GFX_SHADER_COUNT] = {0};
2145       objs[zs->info.stage].mod = zs->precompile.obj.mod;
2146       zs->precompile.gpl = zink_create_gfx_pipeline_separate(screen, objs, zs->precompile.layout, zs->info.stage);
2147    }
2148 }
2149 
2150 static void
zink_link_gfx_shader(struct pipe_context * pctx,void ** shaders)2151 zink_link_gfx_shader(struct pipe_context *pctx, void **shaders)
2152 {
2153    struct zink_context *ctx = zink_context(pctx);
2154    struct zink_shader **zshaders = (struct zink_shader **)shaders;
2155    if (shaders[MESA_SHADER_COMPUTE])
2156       return;
2157    /* explicitly block sample shading: this needs full pipelines always */
2158    if (zshaders[MESA_SHADER_FRAGMENT] && zshaders[MESA_SHADER_FRAGMENT]->info.fs.uses_sample_shading)
2159       return;
2160    /* can't precompile fixedfunc */
2161    if (!shaders[MESA_SHADER_VERTEX] || !shaders[MESA_SHADER_FRAGMENT]) {
2162       /* handled directly from shader create */
2163       return;
2164    }
2165    unsigned hash = 0;
2166    unsigned shader_stages = 0;
2167    for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
2168       if (zshaders[i]) {
2169          hash ^= zshaders[i]->hash;
2170          shader_stages |= BITFIELD_BIT(i);
2171       }
2172    }
2173    unsigned tess_stages = BITFIELD_BIT(MESA_SHADER_TESS_CTRL) | BITFIELD_BIT(MESA_SHADER_TESS_EVAL);
2174    unsigned tess = shader_stages & tess_stages;
2175    /* can't do fixedfunc tes either */
2176    if (tess && !shaders[MESA_SHADER_TESS_EVAL])
2177       return;
2178    struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(shader_stages)];
2179    simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(shader_stages)]);
2180    /* link can be called repeatedly with the same shaders: ignore */
2181    if (_mesa_hash_table_search_pre_hashed(ht, hash, shaders)) {
2182       simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(shader_stages)]);
2183       return;
2184    }
2185    struct zink_gfx_program *prog = zink_create_gfx_program(ctx, zshaders, 3, hash);
2186    u_foreach_bit(i, shader_stages)
2187       assert(prog->shaders[i]);
2188    _mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog);
2189    prog->base.removed = false;
2190    simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(shader_stages)]);
2191    if (zink_debug & ZINK_DEBUG_SHADERDB) {
2192       struct zink_screen *screen = zink_screen(pctx->screen);
2193       if (screen->optimal_keys)
2194          generate_gfx_program_modules_optimal(ctx, screen,  prog, &ctx->gfx_pipeline_state);
2195       else
2196          generate_gfx_program_modules(ctx, screen,  prog, &ctx->gfx_pipeline_state);
2197       VkPipeline pipeline = zink_create_gfx_pipeline(screen, prog, prog->objs, &ctx->gfx_pipeline_state,
2198                                                      ctx->gfx_pipeline_state.element_state->binding_map,
2199                                                      shaders[MESA_SHADER_TESS_EVAL] ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST : VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, true, NULL);
2200       print_pipeline_stats(screen, pipeline);
2201    } else {
2202       if (zink_screen(pctx->screen)->info.have_EXT_shader_object)
2203          prog->base.uses_shobj = !BITSET_TEST(zshaders[MESA_SHADER_FRAGMENT]->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN);
2204       if (zink_debug & ZINK_DEBUG_NOBGC)
2205          precompile_job(prog, pctx->screen, 0);
2206       else
2207          util_queue_add_job(&zink_screen(pctx->screen)->cache_get_thread, prog, &prog->base.cache_fence, precompile_job, NULL, 0);
2208    }
2209 }
2210 
2211 void
zink_delete_shader_state(struct pipe_context * pctx,void * cso)2212 zink_delete_shader_state(struct pipe_context *pctx, void *cso)
2213 {
2214    zink_gfx_shader_free(zink_screen(pctx->screen), cso);
2215 }
2216 
2217 void *
zink_create_gfx_shader_state(struct pipe_context * pctx,const struct pipe_shader_state * shader)2218 zink_create_gfx_shader_state(struct pipe_context *pctx, const struct pipe_shader_state *shader)
2219 {
2220    struct zink_screen *screen = zink_screen(pctx->screen);
2221    nir_shader *nir;
2222    if (shader->type != PIPE_SHADER_IR_NIR)
2223       nir = zink_tgsi_to_nir(pctx->screen, shader->tokens);
2224    else
2225       nir = (struct nir_shader *)shader->ir.nir;
2226 
2227    if (nir->info.stage == MESA_SHADER_FRAGMENT && nir->info.fs.uses_fbfetch_output)
2228       zink_descriptor_util_init_fbfetch(zink_context(pctx));
2229    if (nir->info.uses_bindless)
2230       zink_descriptors_init_bindless(zink_context(pctx));
2231 
2232    void *ret = zink_shader_create(zink_screen(pctx->screen), nir);
2233 
2234    if (nir->info.separate_shader && zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB &&
2235        (screen->info.have_EXT_shader_object ||
2236        (screen->info.have_EXT_graphics_pipeline_library && (nir->info.stage == MESA_SHADER_FRAGMENT || nir->info.stage == MESA_SHADER_VERTEX)))) {
2237       struct zink_shader *zs = ret;
2238       /* sample shading can't precompile */
2239       if (nir->info.stage != MESA_SHADER_FRAGMENT || !nir->info.fs.uses_sample_shading) {
2240          if (zink_debug & ZINK_DEBUG_NOBGC)
2241             precompile_separate_shader_job(zs, screen, 0);
2242          else
2243             util_queue_add_job(&screen->cache_get_thread, zs, &zs->precompile.fence, precompile_separate_shader_job, NULL, 0);
2244       }
2245    }
2246    ralloc_free(nir);
2247 
2248    return ret;
2249 }
2250 
2251 static void
zink_delete_cached_shader_state(struct pipe_context * pctx,void * cso)2252 zink_delete_cached_shader_state(struct pipe_context *pctx, void *cso)
2253 {
2254    struct zink_screen *screen = zink_screen(pctx->screen);
2255    util_shader_reference(pctx, &screen->shaders, &cso, NULL);
2256 }
2257 
2258 static void *
zink_create_cached_shader_state(struct pipe_context * pctx,const struct pipe_shader_state * shader)2259 zink_create_cached_shader_state(struct pipe_context *pctx, const struct pipe_shader_state *shader)
2260 {
2261    bool cache_hit;
2262    struct zink_screen *screen = zink_screen(pctx->screen);
2263    return util_live_shader_cache_get(pctx, &screen->shaders, shader, &cache_hit);
2264 }
2265 
2266 
2267 void
zink_program_init(struct zink_context * ctx)2268 zink_program_init(struct zink_context *ctx)
2269 {
2270    ctx->base.create_vs_state = zink_create_cached_shader_state;
2271    ctx->base.bind_vs_state = zink_bind_vs_state;
2272    ctx->base.delete_vs_state = zink_delete_cached_shader_state;
2273 
2274    ctx->base.create_fs_state = zink_create_cached_shader_state;
2275    ctx->base.bind_fs_state = zink_bind_fs_state;
2276    ctx->base.delete_fs_state = zink_delete_cached_shader_state;
2277 
2278    ctx->base.create_gs_state = zink_create_cached_shader_state;
2279    ctx->base.bind_gs_state = zink_bind_gs_state;
2280    ctx->base.delete_gs_state = zink_delete_cached_shader_state;
2281 
2282    ctx->base.create_tcs_state = zink_create_cached_shader_state;
2283    ctx->base.bind_tcs_state = zink_bind_tcs_state;
2284    ctx->base.delete_tcs_state = zink_delete_cached_shader_state;
2285 
2286    ctx->base.create_tes_state = zink_create_cached_shader_state;
2287    ctx->base.bind_tes_state = zink_bind_tes_state;
2288    ctx->base.delete_tes_state = zink_delete_cached_shader_state;
2289 
2290    ctx->base.create_compute_state = zink_create_cs_state;
2291    ctx->base.bind_compute_state = zink_bind_cs_state;
2292    ctx->base.get_compute_state_info = zink_get_compute_state_info;
2293    ctx->base.delete_compute_state = zink_delete_cs_shader_state;
2294 
2295    if (zink_screen(ctx->base.screen)->info.have_EXT_vertex_input_dynamic_state)
2296       _mesa_set_init(&ctx->gfx_inputs, ctx, hash_gfx_input_dynamic, equals_gfx_input_dynamic);
2297    else
2298       _mesa_set_init(&ctx->gfx_inputs, ctx, hash_gfx_input, equals_gfx_input);
2299    if (zink_screen(ctx->base.screen)->have_full_ds3)
2300       _mesa_set_init(&ctx->gfx_outputs, ctx, hash_gfx_output_ds3, equals_gfx_output_ds3);
2301    else
2302       _mesa_set_init(&ctx->gfx_outputs, ctx, hash_gfx_output, equals_gfx_output);
2303    /* validate struct packing */
2304    STATIC_ASSERT(offsetof(struct zink_gfx_output_key, sample_mask) == sizeof(uint32_t));
2305    STATIC_ASSERT(offsetof(struct zink_gfx_pipeline_state, vertex_buffers_enabled_mask) - offsetof(struct zink_gfx_pipeline_state, input) ==
2306                  offsetof(struct zink_gfx_input_key, vertex_buffers_enabled_mask) - offsetof(struct zink_gfx_input_key, input));
2307    STATIC_ASSERT(offsetof(struct zink_gfx_pipeline_state, vertex_strides) - offsetof(struct zink_gfx_pipeline_state, input) ==
2308                  offsetof(struct zink_gfx_input_key, vertex_strides) - offsetof(struct zink_gfx_input_key, input));
2309    STATIC_ASSERT(offsetof(struct zink_gfx_pipeline_state, element_state) - offsetof(struct zink_gfx_pipeline_state, input) ==
2310                  offsetof(struct zink_gfx_input_key, element_state) - offsetof(struct zink_gfx_input_key, input));
2311 
2312    STATIC_ASSERT(sizeof(union zink_shader_key_optimal) == sizeof(uint32_t));
2313 
2314    struct zink_screen *screen = zink_screen(ctx->base.screen);
2315    if (screen->info.have_EXT_graphics_pipeline_library || screen->info.have_EXT_shader_object || zink_debug & ZINK_DEBUG_SHADERDB)
2316       ctx->base.link_shader = zink_link_gfx_shader;
2317 }
2318 
2319 bool
zink_set_rasterizer_discard(struct zink_context * ctx,bool disable)2320 zink_set_rasterizer_discard(struct zink_context *ctx, bool disable)
2321 {
2322    bool value = disable ? false : (ctx->rast_state ? ctx->rast_state->base.rasterizer_discard : false);
2323    bool changed = ctx->gfx_pipeline_state.dyn_state2.rasterizer_discard != value;
2324    ctx->gfx_pipeline_state.dyn_state2.rasterizer_discard = value;
2325    if (!changed)
2326       return false;
2327    if (!zink_screen(ctx->base.screen)->info.have_EXT_extended_dynamic_state2)
2328       ctx->gfx_pipeline_state.dirty |= true;
2329    ctx->rasterizer_discard_changed = true;
2330    return true;
2331 }
2332 
2333 void
zink_driver_thread_add_job(struct pipe_screen * pscreen,void * data,struct util_queue_fence * fence,pipe_driver_thread_func execute,pipe_driver_thread_func cleanup,const size_t job_size)2334 zink_driver_thread_add_job(struct pipe_screen *pscreen, void *data,
2335                            struct util_queue_fence *fence,
2336                            pipe_driver_thread_func execute,
2337                            pipe_driver_thread_func cleanup,
2338                            const size_t job_size)
2339 {
2340    struct zink_screen *screen = zink_screen(pscreen);
2341    util_queue_add_job(&screen->cache_get_thread, data, fence, execute, cleanup, job_size);
2342 }
2343 
2344 static bool
has_edge_flags(struct zink_context * ctx)2345 has_edge_flags(struct zink_context *ctx)
2346 {
2347    switch(ctx->gfx_pipeline_state.gfx_prim_mode) {
2348    case MESA_PRIM_POINTS:
2349    case MESA_PRIM_LINE_STRIP:
2350    case MESA_PRIM_LINE_STRIP_ADJACENCY:
2351    case MESA_PRIM_LINES:
2352    case MESA_PRIM_LINE_LOOP:
2353    case MESA_PRIM_LINES_ADJACENCY:
2354    case MESA_PRIM_TRIANGLE_STRIP:
2355    case MESA_PRIM_TRIANGLE_FAN:
2356    case MESA_PRIM_TRIANGLE_STRIP_ADJACENCY:
2357    case MESA_PRIM_QUAD_STRIP:
2358    case MESA_PRIM_PATCHES:
2359       return false;
2360    case MESA_PRIM_TRIANGLES:
2361    case MESA_PRIM_TRIANGLES_ADJACENCY:
2362    case MESA_PRIM_QUADS:
2363    case MESA_PRIM_POLYGON:
2364    case MESA_PRIM_COUNT:
2365    default:
2366       break;
2367    }
2368    return (ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_LINES ||
2369            ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_POINTS) &&
2370           ctx->gfx_stages[MESA_SHADER_VERTEX]->has_edgeflags;
2371 }
2372 
2373 static enum zink_rast_prim
zink_rast_prim_for_pipe(enum mesa_prim prim)2374 zink_rast_prim_for_pipe(enum mesa_prim prim)
2375 {
2376    switch (prim) {
2377    case MESA_PRIM_POINTS:
2378       return ZINK_PRIM_POINTS;
2379    case MESA_PRIM_LINES:
2380       return ZINK_PRIM_LINES;
2381    case MESA_PRIM_TRIANGLES:
2382    default:
2383       return ZINK_PRIM_TRIANGLES;
2384    }
2385 }
2386 
2387 static enum mesa_prim
zink_tess_prim_type(struct zink_shader * tess)2388 zink_tess_prim_type(struct zink_shader *tess)
2389 {
2390    if (tess->info.tess.point_mode)
2391       return MESA_PRIM_POINTS;
2392    else {
2393       switch (tess->info.tess._primitive_mode) {
2394       case TESS_PRIMITIVE_ISOLINES:
2395          return MESA_PRIM_LINES;
2396       case TESS_PRIMITIVE_TRIANGLES:
2397       case TESS_PRIMITIVE_QUADS:
2398          return MESA_PRIM_TRIANGLES;
2399       default:
2400          return MESA_PRIM_COUNT;
2401       }
2402    }
2403 }
2404 
2405 static inline void
zink_add_inline_uniform(nir_shader * shader,int offset)2406 zink_add_inline_uniform(nir_shader *shader, int offset)
2407 {
2408    shader->info.inlinable_uniform_dw_offsets[shader->info.num_inlinable_uniforms] = offset;
2409    ++shader->info.num_inlinable_uniforms;
2410 }
2411 
2412 static unsigned
encode_lower_pv_mode(enum mesa_prim prim_type)2413 encode_lower_pv_mode(enum mesa_prim prim_type)
2414 {
2415    switch (prim_type) {
2416    case MESA_PRIM_TRIANGLE_STRIP:
2417    case MESA_PRIM_QUAD_STRIP:
2418       return ZINK_PVE_PRIMITIVE_TRISTRIP;
2419    case MESA_PRIM_TRIANGLE_FAN:
2420       return ZINK_PVE_PRIMITIVE_FAN;
2421    default:
2422       return ZINK_PVE_PRIMITIVE_SIMPLE;
2423    }
2424 }
2425 
2426 void
zink_set_primitive_emulation_keys(struct zink_context * ctx)2427 zink_set_primitive_emulation_keys(struct zink_context *ctx)
2428 {
2429    struct zink_screen *screen = zink_screen(ctx->base.screen);
2430    bool lower_line_stipple = false, lower_line_smooth = false;
2431    unsigned lower_pv_mode = 0;
2432    if (!screen->optimal_keys) {
2433       lower_line_stipple = ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_LINES &&
2434                                 screen->driver_workarounds.no_linestipple &&
2435                                 ctx->rast_state->base.line_stipple_enable &&
2436                                 !ctx->num_so_targets;
2437 
2438       bool lower_point_smooth = ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_POINTS &&
2439                                 screen->driconf.emulate_point_smooth &&
2440                                 ctx->rast_state->base.point_smooth;
2441       if (zink_get_fs_key(ctx)->lower_line_stipple != lower_line_stipple) {
2442          assert(zink_get_gs_key(ctx)->lower_line_stipple ==
2443                 zink_get_fs_key(ctx)->lower_line_stipple);
2444          zink_set_fs_key(ctx)->lower_line_stipple = lower_line_stipple;
2445          zink_set_gs_key(ctx)->lower_line_stipple = lower_line_stipple;
2446       }
2447 
2448       lower_line_smooth = ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_LINES &&
2449                           screen->driver_workarounds.no_linesmooth &&
2450                           ctx->rast_state->base.line_smooth &&
2451                           !ctx->num_so_targets;
2452 
2453       if (zink_get_fs_key(ctx)->lower_line_smooth != lower_line_smooth) {
2454          assert(zink_get_gs_key(ctx)->lower_line_smooth ==
2455                 zink_get_fs_key(ctx)->lower_line_smooth);
2456          zink_set_fs_key(ctx)->lower_line_smooth = lower_line_smooth;
2457          zink_set_gs_key(ctx)->lower_line_smooth = lower_line_smooth;
2458       }
2459 
2460       if (zink_get_fs_key(ctx)->lower_point_smooth != lower_point_smooth) {
2461          zink_set_fs_key(ctx)->lower_point_smooth = lower_point_smooth;
2462       }
2463 
2464       lower_pv_mode = ctx->gfx_pipeline_state.dyn_state3.pv_last &&
2465                       !screen->info.have_EXT_provoking_vertex;
2466       if (lower_pv_mode)
2467          lower_pv_mode = encode_lower_pv_mode(ctx->gfx_pipeline_state.gfx_prim_mode);
2468 
2469       if (zink_get_gs_key(ctx)->lower_pv_mode != lower_pv_mode)
2470          zink_set_gs_key(ctx)->lower_pv_mode = lower_pv_mode;
2471    }
2472 
2473    bool lower_edge_flags = has_edge_flags(ctx);
2474 
2475    bool lower_quad_prim = ctx->gfx_pipeline_state.gfx_prim_mode == MESA_PRIM_QUADS;
2476 
2477    bool lower_filled_quad =  lower_quad_prim &&
2478       ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_TRIANGLES;
2479 
2480    if (lower_line_stipple || lower_line_smooth ||
2481        lower_edge_flags || lower_quad_prim ||
2482        lower_pv_mode || zink_get_gs_key(ctx)->lower_gl_point) {
2483       enum pipe_shader_type prev_vertex_stage =
2484          ctx->gfx_stages[MESA_SHADER_TESS_EVAL] ?
2485             MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
2486       enum zink_rast_prim zink_prim_type =
2487          zink_rast_prim_for_pipe(ctx->gfx_pipeline_state.rast_prim);
2488 
2489       //when using transform feedback primitives must be tessellated
2490       lower_filled_quad |= lower_quad_prim && ctx->gfx_stages[prev_vertex_stage]->info.has_transform_feedback_varyings;
2491 
2492       if (!ctx->gfx_stages[MESA_SHADER_GEOMETRY] || (ctx->gfx_stages[MESA_SHADER_GEOMETRY]->non_fs.is_generated &&
2493           ctx->gfx_stages[MESA_SHADER_GEOMETRY]->info.gs.input_primitive != ctx->gfx_pipeline_state.gfx_prim_mode)) {
2494 
2495          if (!ctx->gfx_stages[prev_vertex_stage]->non_fs.generated_gs[ctx->gfx_pipeline_state.gfx_prim_mode][zink_prim_type]) {
2496             nir_shader *prev_stage = zink_shader_deserialize(screen, ctx->gfx_stages[prev_vertex_stage]);
2497             nir_shader *nir;
2498             if (lower_filled_quad) {
2499                nir = zink_create_quads_emulation_gs(
2500                   &screen->nir_options,
2501                   prev_stage);
2502             } else {
2503                enum mesa_prim prim = ctx->gfx_pipeline_state.gfx_prim_mode;
2504                if (prev_vertex_stage == MESA_SHADER_TESS_EVAL)
2505                   prim = zink_tess_prim_type(ctx->gfx_stages[MESA_SHADER_TESS_EVAL]);
2506                nir = nir_create_passthrough_gs(
2507                   &screen->nir_options,
2508                   prev_stage,
2509                   prim,
2510                   ctx->gfx_pipeline_state.rast_prim,
2511                   lower_edge_flags,
2512                   lower_line_stipple || lower_quad_prim);
2513             }
2514             zink_lower_system_values_to_inlined_uniforms(nir);
2515 
2516             zink_add_inline_uniform(nir, ZINK_INLINE_VAL_FLAT_MASK);
2517             zink_add_inline_uniform(nir, ZINK_INLINE_VAL_FLAT_MASK+1);
2518             zink_add_inline_uniform(nir, ZINK_INLINE_VAL_PV_LAST_VERT);
2519             ralloc_free(prev_stage);
2520             struct zink_shader *shader = zink_shader_create(screen, nir);
2521             shader->needs_inlining = true;
2522             ctx->gfx_stages[prev_vertex_stage]->non_fs.generated_gs[ctx->gfx_pipeline_state.gfx_prim_mode][zink_prim_type] = shader;
2523             shader->non_fs.is_generated = true;
2524             shader->non_fs.parent = ctx->gfx_stages[prev_vertex_stage];
2525             shader->can_inline = true;
2526             memcpy(shader->sinfo.stride, ctx->gfx_stages[prev_vertex_stage]->sinfo.stride, sizeof(shader->sinfo.stride));
2527          }
2528 
2529          ctx->base.bind_gs_state(&ctx->base,
2530                                  ctx->gfx_stages[prev_vertex_stage]->non_fs.generated_gs[ctx->gfx_pipeline_state.gfx_prim_mode][zink_prim_type]);
2531          ctx->is_generated_gs_bound = true;
2532       }
2533 
2534       ctx->base.set_inlinable_constants(&ctx->base, MESA_SHADER_GEOMETRY, 3,
2535                                         (uint32_t []){ctx->gfx_stages[MESA_SHADER_FRAGMENT]->flat_flags,
2536                                                       ctx->gfx_stages[MESA_SHADER_FRAGMENT]->flat_flags >> 32,
2537                                                       ctx->gfx_pipeline_state.dyn_state3.pv_last});
2538    } else if (ctx->gfx_stages[MESA_SHADER_GEOMETRY] &&
2539               ctx->gfx_stages[MESA_SHADER_GEOMETRY]->non_fs.is_generated)
2540          ctx->base.bind_gs_state(&ctx->base, NULL);
2541 }
2542