1 /*
2 * Copyright 2018 Collabora Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "zink_program.h"
25
26 #include "zink_compiler.h"
27 #include "zink_context.h"
28 #include "zink_descriptors.h"
29 #include "zink_helpers.h"
30 #include "zink_pipeline.h"
31 #include "zink_render_pass.h"
32 #include "zink_resource.h"
33 #include "zink_screen.h"
34 #include "zink_state.h"
35 #include "zink_inlines.h"
36
37 #include "util/memstream.h"
38 #include "util/u_debug.h"
39 #include "util/u_memory.h"
40 #include "util/u_prim.h"
41 #include "nir_serialize.h"
42 #include "nir/nir_draw_helpers.h"
43
44 /* for pipeline cache */
45 #define XXH_INLINE_ALL
46 #include "util/xxhash.h"
47
48 static void
49 gfx_program_precompile_job(void *data, void *gdata, int thread_index);
50 struct zink_gfx_program *
51 create_gfx_program_separable(struct zink_context *ctx, struct zink_shader **stages, unsigned vertices_per_patch);
52
53 void
debug_describe_zink_gfx_program(char * buf,const struct zink_gfx_program * ptr)54 debug_describe_zink_gfx_program(char *buf, const struct zink_gfx_program *ptr)
55 {
56 sprintf(buf, "zink_gfx_program");
57 }
58
59 void
debug_describe_zink_compute_program(char * buf,const struct zink_compute_program * ptr)60 debug_describe_zink_compute_program(char *buf, const struct zink_compute_program *ptr)
61 {
62 sprintf(buf, "zink_compute_program");
63 }
64
65 ALWAYS_INLINE static bool
shader_key_matches_tcs_nongenerated(const struct zink_shader_module * zm,const struct zink_shader_key * key,unsigned num_uniforms)66 shader_key_matches_tcs_nongenerated(const struct zink_shader_module *zm, const struct zink_shader_key *key, unsigned num_uniforms)
67 {
68 if (zm->num_uniforms != num_uniforms || zm->has_nonseamless != !!key->base.nonseamless_cube_mask ||
69 zm->needs_zs_shader_swizzle != key->base.needs_zs_shader_swizzle)
70 return false;
71 const uint32_t nonseamless_size = zm->has_nonseamless ? sizeof(uint32_t) : 0;
72 return (!nonseamless_size || !memcmp(zm->key + zm->key_size, &key->base.nonseamless_cube_mask, nonseamless_size)) &&
73 (!num_uniforms || !memcmp(zm->key + zm->key_size + nonseamless_size,
74 key->base.inlined_uniform_values, zm->num_uniforms * sizeof(uint32_t)));
75 }
76
77 ALWAYS_INLINE static bool
shader_key_matches(const struct zink_shader_module * zm,const struct zink_shader_key * key,unsigned num_uniforms,bool has_inline,bool has_nonseamless)78 shader_key_matches(const struct zink_shader_module *zm,
79 const struct zink_shader_key *key, unsigned num_uniforms,
80 bool has_inline, bool has_nonseamless)
81 {
82 const uint32_t nonseamless_size = !has_nonseamless && zm->has_nonseamless ? sizeof(uint32_t) : 0;
83 if (has_inline) {
84 if (zm->num_uniforms != num_uniforms ||
85 (num_uniforms &&
86 memcmp(zm->key + zm->key_size + nonseamless_size,
87 key->base.inlined_uniform_values, zm->num_uniforms * sizeof(uint32_t))))
88 return false;
89 }
90 if (!has_nonseamless) {
91 if (zm->has_nonseamless != !!key->base.nonseamless_cube_mask ||
92 (nonseamless_size && memcmp(zm->key + zm->key_size, &key->base.nonseamless_cube_mask, nonseamless_size)))
93 return false;
94 }
95 if (zm->needs_zs_shader_swizzle != key->base.needs_zs_shader_swizzle)
96 return false;
97 return !memcmp(zm->key, key, zm->key_size);
98 }
99
100 static uint32_t
shader_module_hash(const struct zink_shader_module * zm)101 shader_module_hash(const struct zink_shader_module *zm)
102 {
103 const uint32_t nonseamless_size = zm->has_nonseamless ? sizeof(uint32_t) : 0;
104 unsigned key_size = zm->key_size + nonseamless_size + zm->num_uniforms * sizeof(uint32_t);
105 return _mesa_hash_data(zm->key, key_size);
106 }
107
108 ALWAYS_INLINE static void
gather_shader_module_info(struct zink_context * ctx,struct zink_screen * screen,struct zink_shader * zs,struct zink_gfx_program * prog,struct zink_gfx_pipeline_state * state,bool has_inline,bool has_nonseamless,unsigned * inline_size,unsigned * nonseamless_size)109 gather_shader_module_info(struct zink_context *ctx, struct zink_screen *screen,
110 struct zink_shader *zs, struct zink_gfx_program *prog,
111 struct zink_gfx_pipeline_state *state,
112 bool has_inline, //is inlining enabled?
113 bool has_nonseamless, //is nonseamless ext present?
114 unsigned *inline_size, unsigned *nonseamless_size)
115 {
116 gl_shader_stage stage = zs->info.stage;
117 struct zink_shader_key *key = &state->shader_keys.key[stage];
118 if (has_inline && ctx && zs->info.num_inlinable_uniforms &&
119 ctx->inlinable_uniforms_valid_mask & BITFIELD64_BIT(stage)) {
120 if (zs->can_inline && (screen->is_cpu || prog->inlined_variant_count[stage] < ZINK_MAX_INLINED_VARIANTS))
121 *inline_size = zs->info.num_inlinable_uniforms;
122 else
123 key->inline_uniforms = false;
124 }
125 if (!has_nonseamless && key->base.nonseamless_cube_mask)
126 *nonseamless_size = sizeof(uint32_t);
127 }
128
129 ALWAYS_INLINE static struct zink_shader_module *
create_shader_module_for_stage(struct zink_context * ctx,struct zink_screen * screen,struct zink_shader * zs,struct zink_gfx_program * prog,gl_shader_stage stage,struct zink_gfx_pipeline_state * state,unsigned inline_size,unsigned nonseamless_size,bool has_inline,bool has_nonseamless)130 create_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *screen,
131 struct zink_shader *zs, struct zink_gfx_program *prog,
132 gl_shader_stage stage,
133 struct zink_gfx_pipeline_state *state,
134 unsigned inline_size, unsigned nonseamless_size,
135 bool has_inline, //is inlining enabled?
136 bool has_nonseamless) //is nonseamless ext present?
137 {
138 struct zink_shader_module *zm;
139 const struct zink_shader_key *key = &state->shader_keys.key[stage];
140 /* non-generated tcs won't use the shader key */
141 const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
142 const bool shadow_needs_shader_swizzle = key->base.needs_zs_shader_swizzle ||
143 (stage == MESA_SHADER_FRAGMENT && key->key.fs.base.shadow_needs_shader_swizzle);
144 zm = malloc(sizeof(struct zink_shader_module) + key->size +
145 (!has_nonseamless ? nonseamless_size : 0) + inline_size * sizeof(uint32_t) +
146 (shadow_needs_shader_swizzle ? sizeof(struct zink_zs_swizzle_key) : 0));
147 if (!zm) {
148 return NULL;
149 }
150 unsigned patch_vertices = state->shader_keys.key[MESA_SHADER_TESS_CTRL].key.tcs.patch_vertices;
151 if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated && zs->spirv) {
152 assert(ctx); //TODO async
153 zm->obj = zink_shader_tcs_compile(screen, zs, patch_vertices, prog->base.uses_shobj, &prog->base);
154 } else {
155 zm->obj = zink_shader_compile(screen, prog->base.uses_shobj, zs, zink_shader_blob_deserialize(screen, &prog->blobs[stage]), key, &ctx->di.zs_swizzle[stage], &prog->base);
156 }
157 if (!zm->obj.mod) {
158 FREE(zm);
159 return NULL;
160 }
161 zm->shobj = prog->base.uses_shobj;
162 zm->num_uniforms = inline_size;
163 if (!is_nongenerated_tcs) {
164 zm->key_size = key->size;
165 memcpy(zm->key, key, key->size);
166 } else {
167 zm->key_size = 0;
168 memset(zm->key, 0, key->size);
169 }
170 if (!has_nonseamless && nonseamless_size) {
171 /* nonseamless mask gets added to base key if it exists */
172 memcpy(zm->key + key->size, &key->base.nonseamless_cube_mask, nonseamless_size);
173 }
174 zm->needs_zs_shader_swizzle = shadow_needs_shader_swizzle;
175 zm->has_nonseamless = has_nonseamless ? 0 : !!nonseamless_size;
176 if (inline_size)
177 memcpy(zm->key + key->size + nonseamless_size, key->base.inlined_uniform_values, inline_size * sizeof(uint32_t));
178 if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated)
179 zm->hash = patch_vertices;
180 else
181 zm->hash = shader_module_hash(zm);
182 if (unlikely(shadow_needs_shader_swizzle)) {
183 memcpy(zm->key + key->size + nonseamless_size + inline_size * sizeof(uint32_t), &ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key));
184 zm->hash ^= _mesa_hash_data(&ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key));
185 }
186 zm->default_variant = !shadow_needs_shader_swizzle && !inline_size && !util_dynarray_contains(&prog->shader_cache[stage][0][0], void*);
187 if (inline_size)
188 prog->inlined_variant_count[stage]++;
189 util_dynarray_append(&prog->shader_cache[stage][has_nonseamless ? 0 : !!nonseamless_size][!!inline_size], void*, zm);
190 return zm;
191 }
192
193 ALWAYS_INLINE static struct zink_shader_module *
get_shader_module_for_stage(struct zink_context * ctx,struct zink_screen * screen,struct zink_shader * zs,struct zink_gfx_program * prog,gl_shader_stage stage,struct zink_gfx_pipeline_state * state,unsigned inline_size,unsigned nonseamless_size,bool has_inline,bool has_nonseamless)194 get_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *screen,
195 struct zink_shader *zs, struct zink_gfx_program *prog,
196 gl_shader_stage stage,
197 struct zink_gfx_pipeline_state *state,
198 unsigned inline_size, unsigned nonseamless_size,
199 bool has_inline, //is inlining enabled?
200 bool has_nonseamless) //is nonseamless ext present?
201 {
202 const struct zink_shader_key *key = &state->shader_keys.key[stage];
203 /* non-generated tcs won't use the shader key */
204 const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
205 const bool shadow_needs_shader_swizzle = unlikely(key->base.needs_zs_shader_swizzle) ||
206 (stage == MESA_SHADER_FRAGMENT && unlikely(key->key.fs.base.shadow_needs_shader_swizzle));
207
208 struct util_dynarray *shader_cache = &prog->shader_cache[stage][!has_nonseamless ? !!nonseamless_size : 0][has_inline ? !!inline_size : 0];
209 unsigned count = util_dynarray_num_elements(shader_cache, struct zink_shader_module *);
210 struct zink_shader_module **pzm = shader_cache->data;
211 for (unsigned i = 0; i < count; i++) {
212 struct zink_shader_module *iter = pzm[i];
213 if (is_nongenerated_tcs) {
214 if (!shader_key_matches_tcs_nongenerated(iter, key, has_inline ? !!inline_size : 0))
215 continue;
216 } else {
217 if (stage == MESA_SHADER_VERTEX && iter->key_size != key->size)
218 continue;
219 if (!shader_key_matches(iter, key, inline_size, has_inline, has_nonseamless))
220 continue;
221 if (unlikely(shadow_needs_shader_swizzle)) {
222 /* shadow swizzle data needs a manual compare since it's so fat */
223 if (memcmp(iter->key + iter->key_size + nonseamless_size + iter->num_uniforms * sizeof(uint32_t),
224 &ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key)))
225 continue;
226 }
227 }
228 if (i > 0) {
229 struct zink_shader_module *zero = pzm[0];
230 pzm[0] = iter;
231 pzm[i] = zero;
232 }
233 return iter;
234 }
235
236 return NULL;
237 }
238
239 ALWAYS_INLINE static struct zink_shader_module *
create_shader_module_for_stage_optimal(struct zink_context * ctx,struct zink_screen * screen,struct zink_shader * zs,struct zink_gfx_program * prog,gl_shader_stage stage,struct zink_gfx_pipeline_state * state)240 create_shader_module_for_stage_optimal(struct zink_context *ctx, struct zink_screen *screen,
241 struct zink_shader *zs, struct zink_gfx_program *prog,
242 gl_shader_stage stage,
243 struct zink_gfx_pipeline_state *state)
244 {
245 struct zink_shader_module *zm;
246 uint16_t *key;
247 unsigned mask = stage == MESA_SHADER_FRAGMENT ? BITFIELD_MASK(16) : BITFIELD_MASK(8);
248 bool shadow_needs_shader_swizzle = false;
249 if (zs == prog->last_vertex_stage) {
250 key = (uint16_t*)&state->shader_keys_optimal.key.vs_base;
251 } else if (stage == MESA_SHADER_FRAGMENT) {
252 key = (uint16_t*)&state->shader_keys_optimal.key.fs;
253 shadow_needs_shader_swizzle = ctx ? ctx->gfx_pipeline_state.shader_keys_optimal.key.fs.shadow_needs_shader_swizzle : false;
254 } else if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated) {
255 key = (uint16_t*)&state->shader_keys_optimal.key.tcs;
256 } else {
257 key = NULL;
258 }
259 size_t key_size = sizeof(uint16_t);
260 zm = calloc(1, sizeof(struct zink_shader_module) + (key ? key_size : 0) + (unlikely(shadow_needs_shader_swizzle) ? sizeof(struct zink_zs_swizzle_key) : 0));
261 if (!zm) {
262 return NULL;
263 }
264 if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated && zs->spirv) {
265 assert(ctx || screen->info.dynamic_state2_feats.extendedDynamicState2PatchControlPoints);
266 unsigned patch_vertices = 3;
267 if (ctx) {
268 struct zink_tcs_key *tcs = (struct zink_tcs_key*)key;
269 patch_vertices = tcs->patch_vertices;
270 }
271 zm->obj = zink_shader_tcs_compile(screen, zs, patch_vertices, prog->base.uses_shobj, &prog->base);
272 } else {
273 zm->obj = zink_shader_compile(screen, prog->base.uses_shobj, zs, zink_shader_blob_deserialize(screen, &prog->blobs[stage]),
274 (struct zink_shader_key*)key, shadow_needs_shader_swizzle ? &ctx->di.zs_swizzle[stage] : NULL, &prog->base);
275 }
276 if (!zm->obj.mod) {
277 FREE(zm);
278 return NULL;
279 }
280 zm->shobj = prog->base.uses_shobj;
281 /* non-generated tcs won't use the shader key */
282 const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
283 if (key && !is_nongenerated_tcs) {
284 zm->key_size = key_size;
285 uint16_t *data = (uint16_t*)zm->key;
286 /* sanitize actual key bits */
287 *data = (*key) & mask;
288 if (unlikely(shadow_needs_shader_swizzle))
289 memcpy(&data[1], &ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key));
290 }
291 zm->default_variant = !util_dynarray_contains(&prog->shader_cache[stage][0][0], void*);
292 util_dynarray_append(&prog->shader_cache[stage][0][0], void*, zm);
293 return zm;
294 }
295
296 ALWAYS_INLINE static struct zink_shader_module *
get_shader_module_for_stage_optimal(struct zink_context * ctx,struct zink_screen * screen,struct zink_shader * zs,struct zink_gfx_program * prog,gl_shader_stage stage,struct zink_gfx_pipeline_state * state)297 get_shader_module_for_stage_optimal(struct zink_context *ctx, struct zink_screen *screen,
298 struct zink_shader *zs, struct zink_gfx_program *prog,
299 gl_shader_stage stage,
300 struct zink_gfx_pipeline_state *state)
301 {
302 /* non-generated tcs won't use the shader key */
303 const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
304 bool shadow_needs_shader_swizzle = false;
305 uint16_t *key;
306 unsigned mask = stage == MESA_SHADER_FRAGMENT ? BITFIELD_MASK(16) : BITFIELD_MASK(8);
307 if (zs == prog->last_vertex_stage) {
308 key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.vs_base;
309 } else if (stage == MESA_SHADER_FRAGMENT) {
310 key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.fs;
311 shadow_needs_shader_swizzle = ctx->gfx_pipeline_state.shader_keys_optimal.key.fs.shadow_needs_shader_swizzle;
312 } else if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated) {
313 key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.tcs;
314 } else {
315 key = NULL;
316 }
317 struct util_dynarray *shader_cache = &prog->shader_cache[stage][0][0];
318 unsigned count = util_dynarray_num_elements(shader_cache, struct zink_shader_module *);
319 struct zink_shader_module **pzm = shader_cache->data;
320 for (unsigned i = 0; i < count; i++) {
321 struct zink_shader_module *iter = pzm[i];
322 if (is_nongenerated_tcs) {
323 /* always match */
324 } else if (key) {
325 uint16_t val = (*key) & mask;
326 /* no key is bigger than uint16_t */
327 if (memcmp(iter->key, &val, sizeof(uint16_t)))
328 continue;
329 if (unlikely(shadow_needs_shader_swizzle)) {
330 /* shadow swizzle data needs a manual compare since it's so fat */
331 if (memcmp(iter->key + sizeof(uint16_t), &ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key)))
332 continue;
333 }
334 }
335 if (i > 0) {
336 struct zink_shader_module *zero = pzm[0];
337 pzm[0] = iter;
338 pzm[i] = zero;
339 }
340 return iter;
341 }
342
343 return NULL;
344 }
345
346 static void
zink_destroy_shader_module(struct zink_screen * screen,struct zink_shader_module * zm)347 zink_destroy_shader_module(struct zink_screen *screen, struct zink_shader_module *zm)
348 {
349 if (zm->shobj)
350 VKSCR(DestroyShaderEXT)(screen->dev, zm->obj.obj, NULL);
351 else
352 VKSCR(DestroyShaderModule)(screen->dev, zm->obj.mod, NULL);
353 ralloc_free(zm->obj.spirv);
354 free(zm);
355 }
356
357 static void
destroy_shader_cache(struct zink_screen * screen,struct util_dynarray * sc)358 destroy_shader_cache(struct zink_screen *screen, struct util_dynarray *sc)
359 {
360 while (util_dynarray_contains(sc, void*)) {
361 struct zink_shader_module *zm = util_dynarray_pop(sc, struct zink_shader_module*);
362 zink_destroy_shader_module(screen, zm);
363 }
364 }
365
366 ALWAYS_INLINE static void
update_gfx_shader_modules(struct zink_context * ctx,struct zink_screen * screen,struct zink_gfx_program * prog,uint32_t mask,struct zink_gfx_pipeline_state * state,bool has_inline,bool has_nonseamless)367 update_gfx_shader_modules(struct zink_context *ctx,
368 struct zink_screen *screen,
369 struct zink_gfx_program *prog, uint32_t mask,
370 struct zink_gfx_pipeline_state *state,
371 bool has_inline, //is inlining enabled?
372 bool has_nonseamless) //is nonseamless ext present?
373 {
374 bool hash_changed = false;
375 bool default_variants = true;
376 assert(prog->objs[MESA_SHADER_VERTEX].mod);
377 uint32_t variant_hash = prog->last_variant_hash;
378 prog->has_edgeflags = prog->shaders[MESA_SHADER_VERTEX]->has_edgeflags;
379 for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
380 if (!(mask & BITFIELD_BIT(i)))
381 continue;
382
383 assert(prog->shaders[i]);
384
385 unsigned inline_size = 0, nonseamless_size = 0;
386 gather_shader_module_info(ctx, screen, prog->shaders[i], prog, state, has_inline, has_nonseamless, &inline_size, &nonseamless_size);
387 struct zink_shader_module *zm = get_shader_module_for_stage(ctx, screen, prog->shaders[i], prog, i, state,
388 inline_size, nonseamless_size, has_inline, has_nonseamless);
389 if (!zm)
390 zm = create_shader_module_for_stage(ctx, screen, prog->shaders[i], prog, i, state,
391 inline_size, nonseamless_size, has_inline, has_nonseamless);
392 state->modules[i] = zm->obj.mod;
393 if (prog->objs[i].mod == zm->obj.mod)
394 continue;
395 prog->optimal_keys &= !prog->shaders[i]->non_fs.is_generated;
396 variant_hash ^= prog->module_hash[i];
397 hash_changed = true;
398 default_variants &= zm->default_variant;
399 prog->objs[i] = zm->obj;
400 prog->objects[i] = zm->obj.obj;
401 prog->module_hash[i] = zm->hash;
402 if (has_inline) {
403 if (zm->num_uniforms)
404 prog->inline_variants |= BITFIELD_BIT(i);
405 else
406 prog->inline_variants &= ~BITFIELD_BIT(i);
407 }
408 variant_hash ^= prog->module_hash[i];
409 }
410
411 if (hash_changed && state) {
412 if (default_variants)
413 prog->last_variant_hash = prog->default_variant_hash;
414 else
415 prog->last_variant_hash = variant_hash;
416
417 state->modules_changed = true;
418 }
419 }
420
421 static void
generate_gfx_program_modules(struct zink_context * ctx,struct zink_screen * screen,struct zink_gfx_program * prog,struct zink_gfx_pipeline_state * state)422 generate_gfx_program_modules(struct zink_context *ctx, struct zink_screen *screen, struct zink_gfx_program *prog, struct zink_gfx_pipeline_state *state)
423 {
424 assert(!prog->objs[MESA_SHADER_VERTEX].mod);
425 uint32_t variant_hash = 0;
426 bool default_variants = true;
427 for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
428 if (!(prog->stages_present & BITFIELD_BIT(i)))
429 continue;
430
431 assert(prog->shaders[i]);
432
433 unsigned inline_size = 0, nonseamless_size = 0;
434 gather_shader_module_info(ctx, screen, prog->shaders[i], prog, state,
435 screen->driconf.inline_uniforms, screen->info.have_EXT_non_seamless_cube_map,
436 &inline_size, &nonseamless_size);
437 struct zink_shader_module *zm = create_shader_module_for_stage(ctx, screen, prog->shaders[i], prog, i, state,
438 inline_size, nonseamless_size,
439 screen->driconf.inline_uniforms, screen->info.have_EXT_non_seamless_cube_map);
440 state->modules[i] = zm->obj.mod;
441 prog->objs[i] = zm->obj;
442 prog->objects[i] = zm->obj.obj;
443 prog->module_hash[i] = zm->hash;
444 if (zm->num_uniforms)
445 prog->inline_variants |= BITFIELD_BIT(i);
446 default_variants &= zm->default_variant;
447 variant_hash ^= prog->module_hash[i];
448 }
449
450 state->modules_changed = true;
451
452 prog->last_variant_hash = variant_hash;
453 if (default_variants)
454 prog->default_variant_hash = prog->last_variant_hash;
455 }
456
457 static void
generate_gfx_program_modules_optimal(struct zink_context * ctx,struct zink_screen * screen,struct zink_gfx_program * prog,struct zink_gfx_pipeline_state * state)458 generate_gfx_program_modules_optimal(struct zink_context *ctx, struct zink_screen *screen, struct zink_gfx_program *prog, struct zink_gfx_pipeline_state *state)
459 {
460 assert(!prog->objs[MESA_SHADER_VERTEX].mod);
461 for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
462 if (!(prog->stages_present & BITFIELD_BIT(i)))
463 continue;
464
465 assert(prog->shaders[i]);
466
467 struct zink_shader_module *zm = create_shader_module_for_stage_optimal(ctx, screen, prog->shaders[i], prog, i, state);
468 prog->objs[i] = zm->obj;
469 prog->objects[i] = zm->obj.obj;
470 }
471
472 state->modules_changed = true;
473 prog->last_variant_hash = state->optimal_key;
474 }
475
476 static uint32_t
hash_pipeline_lib_generated_tcs(const void * key)477 hash_pipeline_lib_generated_tcs(const void *key)
478 {
479 const struct zink_gfx_library_key *gkey = key;
480 return gkey->optimal_key;
481 }
482
483
484 static bool
equals_pipeline_lib_generated_tcs(const void * a,const void * b)485 equals_pipeline_lib_generated_tcs(const void *a, const void *b)
486 {
487 return !memcmp(a, b, sizeof(uint32_t));
488 }
489
490 static uint32_t
hash_pipeline_lib(const void * key)491 hash_pipeline_lib(const void *key)
492 {
493 const struct zink_gfx_library_key *gkey = key;
494 /* remove generated tcs bits */
495 return zink_shader_key_optimal_no_tcs(gkey->optimal_key);
496 }
497
498 static bool
equals_pipeline_lib(const void * a,const void * b)499 equals_pipeline_lib(const void *a, const void *b)
500 {
501 const struct zink_gfx_library_key *ak = a;
502 const struct zink_gfx_library_key *bk = b;
503 /* remove generated tcs bits */
504 uint32_t val_a = zink_shader_key_optimal_no_tcs(ak->optimal_key);
505 uint32_t val_b = zink_shader_key_optimal_no_tcs(bk->optimal_key);
506 return val_a == val_b;
507 }
508
509 uint32_t
hash_gfx_input_dynamic(const void * key)510 hash_gfx_input_dynamic(const void *key)
511 {
512 const struct zink_gfx_input_key *ikey = key;
513 return ikey->idx;
514 }
515
516 static bool
equals_gfx_input_dynamic(const void * a,const void * b)517 equals_gfx_input_dynamic(const void *a, const void *b)
518 {
519 const struct zink_gfx_input_key *ikey_a = a;
520 const struct zink_gfx_input_key *ikey_b = b;
521 return ikey_a->idx == ikey_b->idx;
522 }
523
524 uint32_t
hash_gfx_input(const void * key)525 hash_gfx_input(const void *key)
526 {
527 const struct zink_gfx_input_key *ikey = key;
528 if (ikey->uses_dynamic_stride)
529 return ikey->input;
530 return _mesa_hash_data(key, offsetof(struct zink_gfx_input_key, pipeline));
531 }
532
533 static bool
equals_gfx_input(const void * a,const void * b)534 equals_gfx_input(const void *a, const void *b)
535 {
536 const struct zink_gfx_input_key *ikey_a = a;
537 const struct zink_gfx_input_key *ikey_b = b;
538 if (ikey_a->uses_dynamic_stride)
539 return ikey_a->element_state == ikey_b->element_state &&
540 !memcmp(a, b, offsetof(struct zink_gfx_input_key, vertex_buffers_enabled_mask));
541 return !memcmp(a, b, offsetof(struct zink_gfx_input_key, pipeline));
542 }
543
544 uint32_t
hash_gfx_output_ds3(const void * key)545 hash_gfx_output_ds3(const void *key)
546 {
547 const uint8_t *data = key;
548 return _mesa_hash_data(data, sizeof(uint32_t));
549 }
550
551 static bool
equals_gfx_output_ds3(const void * a,const void * b)552 equals_gfx_output_ds3(const void *a, const void *b)
553 {
554 const uint8_t *da = a;
555 const uint8_t *db = b;
556 return !memcmp(da, db, sizeof(uint32_t));
557 }
558
559 uint32_t
hash_gfx_output(const void * key)560 hash_gfx_output(const void *key)
561 {
562 const uint8_t *data = key;
563 return _mesa_hash_data(data, offsetof(struct zink_gfx_output_key, pipeline));
564 }
565
566 static bool
equals_gfx_output(const void * a,const void * b)567 equals_gfx_output(const void *a, const void *b)
568 {
569 const uint8_t *da = a;
570 const uint8_t *db = b;
571 return !memcmp(da, db, offsetof(struct zink_gfx_output_key, pipeline));
572 }
573
574 ALWAYS_INLINE static void
update_gfx_program_nonseamless(struct zink_context * ctx,struct zink_gfx_program * prog,bool has_nonseamless)575 update_gfx_program_nonseamless(struct zink_context *ctx, struct zink_gfx_program *prog, bool has_nonseamless)
576 {
577 struct zink_screen *screen = zink_screen(ctx->base.screen);
578 if (screen->driconf.inline_uniforms || prog->needs_inlining)
579 update_gfx_shader_modules(ctx, screen, prog,
580 ctx->dirty_gfx_stages & prog->stages_present, &ctx->gfx_pipeline_state,
581 true, has_nonseamless);
582 else
583 update_gfx_shader_modules(ctx, screen, prog,
584 ctx->dirty_gfx_stages & prog->stages_present, &ctx->gfx_pipeline_state,
585 false, has_nonseamless);
586 }
587
588 static void
update_gfx_program(struct zink_context * ctx,struct zink_gfx_program * prog)589 update_gfx_program(struct zink_context *ctx, struct zink_gfx_program *prog)
590 {
591 struct zink_screen *screen = zink_screen(ctx->base.screen);
592 if (screen->info.have_EXT_non_seamless_cube_map)
593 update_gfx_program_nonseamless(ctx, prog, true);
594 else
595 update_gfx_program_nonseamless(ctx, prog, false);
596 }
597
598 void
zink_gfx_program_update(struct zink_context * ctx)599 zink_gfx_program_update(struct zink_context *ctx)
600 {
601 if (ctx->last_vertex_stage_dirty) {
602 gl_shader_stage pstage = ctx->last_vertex_stage->info.stage;
603 ctx->dirty_gfx_stages |= BITFIELD_BIT(pstage);
604 memcpy(&ctx->gfx_pipeline_state.shader_keys.key[pstage].key.vs_base,
605 &ctx->gfx_pipeline_state.shader_keys.last_vertex.key.vs_base,
606 sizeof(struct zink_vs_key_base));
607 ctx->last_vertex_stage_dirty = false;
608 }
609 if (ctx->gfx_dirty) {
610 struct zink_gfx_program *prog = NULL;
611
612 simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
613 struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(ctx->shader_stages)];
614 const uint32_t hash = ctx->gfx_hash;
615 struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ht, hash, ctx->gfx_stages);
616 /* this must be done before prog is updated */
617 if (ctx->curr_program)
618 ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
619 if (entry) {
620 prog = (struct zink_gfx_program*)entry->data;
621 for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
622 if (prog->stages_present & ~ctx->dirty_gfx_stages & BITFIELD_BIT(i))
623 ctx->gfx_pipeline_state.modules[i] = prog->objs[i].mod;
624 }
625 /* ensure variants are always updated if keys have changed since last use */
626 ctx->dirty_gfx_stages |= prog->stages_present;
627 update_gfx_program(ctx, prog);
628 } else {
629 ctx->dirty_gfx_stages |= ctx->shader_stages;
630 prog = zink_create_gfx_program(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch, hash);
631 zink_screen_get_pipeline_cache(zink_screen(ctx->base.screen), &prog->base, false);
632 _mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog);
633 prog->base.removed = false;
634 generate_gfx_program_modules(ctx, zink_screen(ctx->base.screen), prog, &ctx->gfx_pipeline_state);
635 }
636 simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
637 if (prog && prog != ctx->curr_program)
638 zink_batch_reference_program(ctx, &prog->base);
639 ctx->curr_program = prog;
640 ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
641 ctx->gfx_dirty = false;
642 } else if (ctx->dirty_gfx_stages) {
643 /* remove old hash */
644 ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
645 update_gfx_program(ctx, ctx->curr_program);
646 /* apply new hash */
647 ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
648 }
649 ctx->dirty_gfx_stages = 0;
650 }
651
652 ALWAYS_INLINE static bool
update_gfx_shader_module_optimal(struct zink_context * ctx,struct zink_gfx_program * prog,gl_shader_stage pstage)653 update_gfx_shader_module_optimal(struct zink_context *ctx, struct zink_gfx_program *prog, gl_shader_stage pstage)
654 {
655 struct zink_screen *screen = zink_screen(ctx->base.screen);
656 if (screen->info.have_EXT_graphics_pipeline_library)
657 util_queue_fence_wait(&prog->base.cache_fence);
658 struct zink_shader_module *zm = get_shader_module_for_stage_optimal(ctx, screen, prog->shaders[pstage], prog, pstage, &ctx->gfx_pipeline_state);
659 if (!zm) {
660 zm = create_shader_module_for_stage_optimal(ctx, screen, prog->shaders[pstage], prog, pstage, &ctx->gfx_pipeline_state);
661 perf_debug(ctx, "zink[gfx_compile]: %s shader variant required\n", _mesa_shader_stage_to_string(pstage));
662 }
663
664 bool changed = prog->objs[pstage].mod != zm->obj.mod;
665 prog->objs[pstage] = zm->obj;
666 prog->objects[pstage] = zm->obj.obj;
667 return changed;
668 }
669
670 static void
update_gfx_program_optimal(struct zink_context * ctx,struct zink_gfx_program * prog)671 update_gfx_program_optimal(struct zink_context *ctx, struct zink_gfx_program *prog)
672 {
673 const union zink_shader_key_optimal *key = (union zink_shader_key_optimal*)&ctx->gfx_pipeline_state.optimal_key;
674 const union zink_shader_key_optimal *last_prog_key = (union zink_shader_key_optimal*)&prog->last_variant_hash;
675 if (key->vs_bits != last_prog_key->vs_bits) {
676 assert(!prog->is_separable);
677 bool changed = update_gfx_shader_module_optimal(ctx, prog, ctx->last_vertex_stage->info.stage);
678 ctx->gfx_pipeline_state.modules_changed |= changed;
679 }
680 const bool shadow_needs_shader_swizzle = last_prog_key->fs.shadow_needs_shader_swizzle && (ctx->dirty_gfx_stages & BITFIELD_BIT(MESA_SHADER_FRAGMENT));
681 if (key->fs_bits != last_prog_key->fs_bits ||
682 /* always recheck shadow swizzles since they aren't directly part of the key */
683 unlikely(shadow_needs_shader_swizzle)) {
684 assert(!prog->is_separable);
685 bool changed = update_gfx_shader_module_optimal(ctx, prog, MESA_SHADER_FRAGMENT);
686 ctx->gfx_pipeline_state.modules_changed |= changed;
687 if (unlikely(shadow_needs_shader_swizzle)) {
688 struct zink_shader_module **pzm = prog->shader_cache[MESA_SHADER_FRAGMENT][0][0].data;
689 ctx->gfx_pipeline_state.shadow = (struct zink_zs_swizzle_key*)pzm[0]->key + sizeof(uint16_t);
690 }
691 }
692 if (prog->shaders[MESA_SHADER_TESS_CTRL] && prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated &&
693 key->tcs_bits != last_prog_key->tcs_bits) {
694 assert(!prog->is_separable);
695 bool changed = update_gfx_shader_module_optimal(ctx, prog, MESA_SHADER_TESS_CTRL);
696 ctx->gfx_pipeline_state.modules_changed |= changed;
697 }
698 prog->last_variant_hash = ctx->gfx_pipeline_state.optimal_key;
699 }
700
701 static struct zink_gfx_program *
replace_separable_prog(struct zink_context * ctx,struct hash_entry * entry,struct zink_gfx_program * prog)702 replace_separable_prog(struct zink_context *ctx, struct hash_entry *entry, struct zink_gfx_program *prog)
703 {
704 struct zink_screen *screen = zink_screen(ctx->base.screen);
705 struct zink_gfx_program *real = prog->full_prog ?
706 prog->full_prog :
707 /* this will be NULL with ZINK_DEBUG_NOOPT */
708 zink_create_gfx_program(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch, ctx->gfx_hash);
709 entry->data = real;
710 entry->key = real->shaders;
711 real->base.removed = false;
712 zink_gfx_program_reference(screen, &prog->full_prog, NULL);
713 prog->base.removed = true;
714 return real;
715 }
716
717 void
zink_gfx_program_update_optimal(struct zink_context * ctx)718 zink_gfx_program_update_optimal(struct zink_context *ctx)
719 {
720 struct zink_screen *screen = zink_screen(ctx->base.screen);
721 if (ctx->gfx_dirty) {
722 struct zink_gfx_program *prog = NULL;
723 ctx->gfx_pipeline_state.optimal_key = zink_sanitize_optimal_key(ctx->gfx_stages, ctx->gfx_pipeline_state.shader_keys_optimal.key.val);
724 struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(ctx->shader_stages)];
725 const uint32_t hash = ctx->gfx_hash;
726 simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
727 struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ht, hash, ctx->gfx_stages);
728
729 if (ctx->curr_program)
730 ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
731 if (entry) {
732 prog = (struct zink_gfx_program*)entry->data;
733 bool must_replace = prog->base.uses_shobj ? !zink_can_use_shader_objects(ctx) : (prog->is_separable && !zink_can_use_pipeline_libs(ctx));
734 if (prog->is_separable) {
735 /* shader variants can't be handled by separable programs: sync and compile */
736 if (!ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(ctx->gfx_pipeline_state.optimal_key) || must_replace)
737 util_queue_fence_wait(&prog->base.cache_fence);
738 /* If the optimized linked pipeline is done compiling, swap it into place. */
739 if (util_queue_fence_is_signalled(&prog->base.cache_fence) &&
740 /* but only if needed for ZINK_DEBUG=noopt */
741 (!(zink_debug & ZINK_DEBUG_NOOPT) || !ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(ctx->gfx_pipeline_state.optimal_key) || must_replace)) {
742 prog = replace_separable_prog(ctx, entry, prog);
743 }
744 } else if (must_replace) {
745 /* this is a non-separable, incompatible prog which needs replacement */
746 struct zink_gfx_program *real = zink_create_gfx_program(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch, ctx->gfx_hash);
747 generate_gfx_program_modules_optimal(ctx, screen, real, &ctx->gfx_pipeline_state);
748 entry->data = real;
749 entry->key = real->shaders;
750 real->base.removed = false;
751 prog->base.removed = true;
752 zink_gfx_program_reference(screen, &prog, NULL);
753 prog = real;
754 }
755 update_gfx_program_optimal(ctx, prog);
756 } else {
757 ctx->dirty_gfx_stages |= ctx->shader_stages;
758 prog = create_gfx_program_separable(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch);
759 prog->base.removed = false;
760 _mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog);
761 if (!prog->is_separable) {
762 zink_screen_get_pipeline_cache(screen, &prog->base, false);
763 perf_debug(ctx, "zink[gfx_compile]: new program created (probably legacy GL features in use)\n");
764 generate_gfx_program_modules_optimal(ctx, screen, prog, &ctx->gfx_pipeline_state);
765 }
766 }
767 simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
768 if (prog && prog != ctx->curr_program)
769 zink_batch_reference_program(ctx, &prog->base);
770 ctx->curr_program = prog;
771 ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
772 } else if (ctx->dirty_gfx_stages) {
773 /* remove old hash */
774 ctx->gfx_pipeline_state.optimal_key = zink_sanitize_optimal_key(ctx->gfx_stages, ctx->gfx_pipeline_state.shader_keys_optimal.key.val);
775 ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
776
777 bool must_replace = ctx->curr_program->base.uses_shobj ? !zink_can_use_shader_objects(ctx) : (ctx->curr_program->is_separable && !zink_can_use_pipeline_libs(ctx));
778 if (must_replace || (ctx->curr_program->is_separable && !ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(ctx->gfx_pipeline_state.optimal_key))) {
779 struct zink_gfx_program *prog = ctx->curr_program;
780
781 util_queue_fence_wait(&prog->base.cache_fence);
782 /* shader variants can't be handled by separable programs: sync and compile */
783 perf_debug(ctx, "zink[gfx_compile]: non-default shader variant required with separate shader object program\n");
784 struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(ctx->shader_stages)];
785 const uint32_t hash = ctx->gfx_hash;
786 simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
787 struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ht, hash, ctx->gfx_stages);
788 ctx->curr_program = replace_separable_prog(ctx, entry, prog);
789 simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
790 }
791 update_gfx_program_optimal(ctx, ctx->curr_program);
792 /* apply new hash */
793 ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
794 }
795 ctx->dirty_gfx_stages = 0;
796 ctx->gfx_dirty = false;
797 ctx->last_vertex_stage_dirty = false;
798 }
799
800 static void
optimized_compile_job(void * data,void * gdata,int thread_index)801 optimized_compile_job(void *data, void *gdata, int thread_index)
802 {
803 struct zink_gfx_pipeline_cache_entry *pc_entry = data;
804 struct zink_screen *screen = gdata;
805 VkPipeline pipeline;
806 if (pc_entry->gpl.gkey)
807 pipeline = zink_create_gfx_pipeline_combined(screen, pc_entry->prog, pc_entry->gpl.ikey->pipeline, &pc_entry->gpl.gkey->pipeline, 1, pc_entry->gpl.okey->pipeline, true, false);
808 else
809 pipeline = zink_create_gfx_pipeline(screen, pc_entry->prog, pc_entry->prog->objs, &pc_entry->state, pc_entry->state.element_state->binding_map, zink_primitive_topology(pc_entry->state.gfx_prim_mode), true);
810 if (pipeline) {
811 pc_entry->gpl.unoptimized_pipeline = pc_entry->pipeline;
812 pc_entry->pipeline = pipeline;
813 }
814 }
815
816 static void
optimized_shobj_compile_job(void * data,void * gdata,int thread_index)817 optimized_shobj_compile_job(void *data, void *gdata, int thread_index)
818 {
819 struct zink_gfx_pipeline_cache_entry *pc_entry = data;
820 struct zink_screen *screen = gdata;
821
822 struct zink_shader_object objs[ZINK_GFX_SHADER_COUNT];
823 for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
824 objs[i].mod = VK_NULL_HANDLE;
825 objs[i].spirv = pc_entry->shobjs[i].spirv;
826 }
827 pc_entry->pipeline = zink_create_gfx_pipeline(screen, pc_entry->prog, objs, &pc_entry->state, NULL, zink_primitive_topology(pc_entry->state.gfx_prim_mode), true);
828 /* no unoptimized_pipeline dance */
829 }
830
831 void
zink_gfx_program_compile_queue(struct zink_context * ctx,struct zink_gfx_pipeline_cache_entry * pc_entry)832 zink_gfx_program_compile_queue(struct zink_context *ctx, struct zink_gfx_pipeline_cache_entry *pc_entry)
833 {
834 struct zink_screen *screen = zink_screen(ctx->base.screen);
835 if (screen->driver_workarounds.disable_optimized_compile)
836 return;
837 if (zink_debug & ZINK_DEBUG_NOBGC) {
838 if (pc_entry->prog->base.uses_shobj)
839 optimized_shobj_compile_job(pc_entry, screen, 0);
840 else
841 optimized_compile_job(pc_entry, screen, 0);
842 } else {
843 util_queue_add_job(&screen->cache_get_thread, pc_entry, &pc_entry->fence,
844 pc_entry->prog->base.uses_shobj ? optimized_shobj_compile_job : optimized_compile_job, NULL, 0);
845 }
846 }
847
848 void
zink_program_finish(struct zink_context * ctx,struct zink_program * pg)849 zink_program_finish(struct zink_context *ctx, struct zink_program *pg)
850 {
851 util_queue_fence_wait(&pg->cache_fence);
852 if (pg->is_compute)
853 return;
854 struct zink_gfx_program *prog = (struct zink_gfx_program*)pg;
855 for (int r = 0; r < ARRAY_SIZE(prog->pipelines); ++r) {
856 for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) {
857 hash_table_foreach(&prog->pipelines[r][i], entry) {
858 struct zink_gfx_pipeline_cache_entry *pc_entry = entry->data;
859 util_queue_fence_wait(&pc_entry->fence);
860 }
861 }
862 }
863 }
864
865 static void
update_cs_shader_module(struct zink_context * ctx,struct zink_compute_program * comp)866 update_cs_shader_module(struct zink_context *ctx, struct zink_compute_program *comp)
867 {
868 struct zink_screen *screen = zink_screen(ctx->base.screen);
869 struct zink_shader *zs = comp->shader;
870 struct zink_shader_module *zm = NULL;
871 unsigned inline_size = 0, nonseamless_size = 0, zs_swizzle_size = 0;
872 struct zink_shader_key *key = &ctx->compute_pipeline_state.key;
873 ASSERTED bool check_robustness = screen->driver_compiler_workarounds.lower_robustImageAccess2 && (ctx->flags & PIPE_CONTEXT_ROBUST_BUFFER_ACCESS);
874 assert(zink_cs_key(key)->robust_access == check_robustness);
875
876 if (ctx && zs->info.num_inlinable_uniforms &&
877 ctx->inlinable_uniforms_valid_mask & BITFIELD64_BIT(MESA_SHADER_COMPUTE)) {
878 if (screen->is_cpu || comp->inlined_variant_count < ZINK_MAX_INLINED_VARIANTS)
879 inline_size = zs->info.num_inlinable_uniforms;
880 else
881 key->inline_uniforms = false;
882 }
883 if (key->base.nonseamless_cube_mask)
884 nonseamless_size = sizeof(uint32_t);
885 if (key->base.needs_zs_shader_swizzle)
886 zs_swizzle_size = sizeof(struct zink_zs_swizzle_key);
887
888 if (inline_size || nonseamless_size || zink_cs_key(key)->robust_access || zs_swizzle_size) {
889 struct util_dynarray *shader_cache = &comp->shader_cache[!!nonseamless_size];
890 unsigned count = util_dynarray_num_elements(shader_cache, struct zink_shader_module *);
891 struct zink_shader_module **pzm = shader_cache->data;
892 for (unsigned i = 0; i < count; i++) {
893 struct zink_shader_module *iter = pzm[i];
894 if (!shader_key_matches(iter, key, inline_size,
895 screen->driconf.inline_uniforms,
896 screen->info.have_EXT_non_seamless_cube_map))
897 continue;
898 if (unlikely(zs_swizzle_size)) {
899 /* zs swizzle data needs a manual compare since it's so fat */
900 if (memcmp(iter->key + iter->key_size + nonseamless_size + inline_size * sizeof(uint32_t),
901 &ctx->di.zs_swizzle[MESA_SHADER_COMPUTE], zs_swizzle_size))
902 continue;
903 }
904 if (i > 0) {
905 struct zink_shader_module *zero = pzm[0];
906 pzm[0] = iter;
907 pzm[i] = zero;
908 }
909 zm = iter;
910 }
911 } else {
912 zm = comp->module;
913 }
914
915 if (!zm) {
916 zm = malloc(sizeof(struct zink_shader_module) + nonseamless_size + inline_size * sizeof(uint32_t) + zs_swizzle_size);
917 if (!zm) {
918 return;
919 }
920 zm->shobj = false;
921 zm->obj = zink_shader_compile(screen, false, zs, zink_shader_blob_deserialize(screen, &comp->shader->blob), key, zs_swizzle_size ? &ctx->di.zs_swizzle[MESA_SHADER_COMPUTE] : NULL, &comp->base);
922 if (!zm->obj.spirv) {
923 FREE(zm);
924 return;
925 }
926 zm->num_uniforms = inline_size;
927 zm->key_size = key->size;
928 memcpy(zm->key, key, key->size);
929 zm->has_nonseamless = !!nonseamless_size;
930 zm->needs_zs_shader_swizzle = !!zs_swizzle_size;
931 assert(nonseamless_size || inline_size || zink_cs_key(key)->robust_access || zs_swizzle_size);
932 if (nonseamless_size)
933 memcpy(zm->key + zm->key_size, &key->base.nonseamless_cube_mask, nonseamless_size);
934 if (inline_size)
935 memcpy(zm->key + zm->key_size + nonseamless_size, key->base.inlined_uniform_values, inline_size * sizeof(uint32_t));
936 if (zs_swizzle_size)
937 memcpy(zm->key + zm->key_size + nonseamless_size + inline_size * sizeof(uint32_t), &ctx->di.zs_swizzle[MESA_SHADER_COMPUTE], zs_swizzle_size);
938
939 zm->hash = shader_module_hash(zm);
940 zm->default_variant = false;
941 if (inline_size)
942 comp->inlined_variant_count++;
943
944 /* this is otherwise the default variant, which is stored as comp->module */
945 if (zm->num_uniforms || nonseamless_size || zink_cs_key(key)->robust_access || zs_swizzle_size)
946 util_dynarray_append(&comp->shader_cache[!!nonseamless_size], void*, zm);
947 }
948 if (comp->curr == zm)
949 return;
950 ctx->compute_pipeline_state.final_hash ^= ctx->compute_pipeline_state.module_hash;
951 comp->curr = zm;
952 ctx->compute_pipeline_state.module_hash = zm->hash;
953 ctx->compute_pipeline_state.final_hash ^= ctx->compute_pipeline_state.module_hash;
954 ctx->compute_pipeline_state.module_changed = true;
955 }
956
957 void
zink_update_compute_program(struct zink_context * ctx)958 zink_update_compute_program(struct zink_context *ctx)
959 {
960 util_queue_fence_wait(&ctx->curr_compute->base.cache_fence);
961 update_cs_shader_module(ctx, ctx->curr_compute);
962 }
963
964 VkPipelineLayout
zink_pipeline_layout_create(struct zink_screen * screen,VkDescriptorSetLayout * dsl,unsigned num_dsl,bool is_compute,VkPipelineLayoutCreateFlags flags)965 zink_pipeline_layout_create(struct zink_screen *screen, VkDescriptorSetLayout *dsl, unsigned num_dsl, bool is_compute, VkPipelineLayoutCreateFlags flags)
966 {
967 VkPipelineLayoutCreateInfo plci = {0};
968 plci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
969 plci.flags = flags;
970
971 plci.pSetLayouts = dsl;
972 plci.setLayoutCount = num_dsl;
973
974 VkPushConstantRange pcr;
975 if (!is_compute) {
976 pcr.stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS;
977 pcr.offset = 0;
978 pcr.size = sizeof(struct zink_gfx_push_constant);
979 plci.pushConstantRangeCount = 1;
980 plci.pPushConstantRanges = &pcr;
981 }
982
983 VkPipelineLayout layout;
984 VkResult result = VKSCR(CreatePipelineLayout)(screen->dev, &plci, NULL, &layout);
985 if (result != VK_SUCCESS) {
986 mesa_loge("vkCreatePipelineLayout failed (%s)", vk_Result_to_str(result));
987 return VK_NULL_HANDLE;
988 }
989
990 return layout;
991 }
992
993 static void *
create_program(struct zink_context * ctx,bool is_compute)994 create_program(struct zink_context *ctx, bool is_compute)
995 {
996 struct zink_program *pg = rzalloc_size(NULL, is_compute ? sizeof(struct zink_compute_program) : sizeof(struct zink_gfx_program));
997 if (!pg)
998 return NULL;
999
1000 pipe_reference_init(&pg->reference, 1);
1001 u_rwlock_init(&pg->pipeline_cache_lock);
1002 util_queue_fence_init(&pg->cache_fence);
1003 pg->is_compute = is_compute;
1004 pg->ctx = ctx;
1005 return (void*)pg;
1006 }
1007
1008 static void
assign_io(struct zink_screen * screen,nir_shader * shaders[ZINK_GFX_SHADER_COUNT])1009 assign_io(struct zink_screen *screen,
1010 nir_shader *shaders[ZINK_GFX_SHADER_COUNT])
1011 {
1012 for (unsigned i = 0; i < MESA_SHADER_FRAGMENT;) {
1013 nir_shader *producer = shaders[i];
1014 for (unsigned j = i + 1; j < ZINK_GFX_SHADER_COUNT; i++, j++) {
1015 nir_shader *consumer = shaders[j];
1016 if (!consumer)
1017 continue;
1018 zink_compiler_assign_io(screen, producer, consumer);
1019 i = j;
1020 break;
1021 }
1022 }
1023 }
1024
1025 void
zink_gfx_lib_cache_unref(struct zink_screen * screen,struct zink_gfx_lib_cache * libs)1026 zink_gfx_lib_cache_unref(struct zink_screen *screen, struct zink_gfx_lib_cache *libs)
1027 {
1028 if (!p_atomic_dec_zero(&libs->refcount))
1029 return;
1030
1031 simple_mtx_destroy(&libs->lock);
1032 set_foreach_remove(&libs->libs, he) {
1033 struct zink_gfx_library_key *gkey = (void*)he->key;
1034 VKSCR(DestroyPipeline)(screen->dev, gkey->pipeline, NULL);
1035 FREE(gkey);
1036 }
1037 ralloc_free(libs->libs.table);
1038 FREE(libs);
1039 }
1040
1041 static struct zink_gfx_lib_cache *
create_lib_cache(struct zink_gfx_program * prog,bool generated_tcs)1042 create_lib_cache(struct zink_gfx_program *prog, bool generated_tcs)
1043 {
1044 struct zink_gfx_lib_cache *libs = CALLOC_STRUCT(zink_gfx_lib_cache);
1045 libs->stages_present = prog->stages_present;
1046 if (generated_tcs)
1047 libs->stages_present &= ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
1048 simple_mtx_init(&libs->lock, mtx_plain);
1049 if (generated_tcs)
1050 _mesa_set_init(&libs->libs, NULL, hash_pipeline_lib_generated_tcs, equals_pipeline_lib_generated_tcs);
1051 else
1052 _mesa_set_init(&libs->libs, NULL, hash_pipeline_lib, equals_pipeline_lib);
1053 return libs;
1054 }
1055
1056 static struct zink_gfx_lib_cache *
find_or_create_lib_cache(struct zink_screen * screen,struct zink_gfx_program * prog)1057 find_or_create_lib_cache(struct zink_screen *screen, struct zink_gfx_program *prog)
1058 {
1059 unsigned stages_present = prog->stages_present;
1060 bool generated_tcs = prog->shaders[MESA_SHADER_TESS_CTRL] && prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated;
1061 if (generated_tcs)
1062 stages_present &= ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
1063 unsigned idx = zink_program_cache_stages(stages_present);
1064 struct set *ht = &screen->pipeline_libs[idx];
1065 const uint32_t hash = prog->gfx_hash;
1066
1067 simple_mtx_lock(&screen->pipeline_libs_lock[idx]);
1068 bool found = false;
1069 struct set_entry *entry = _mesa_set_search_or_add_pre_hashed(ht, hash, prog->shaders, &found);
1070 struct zink_gfx_lib_cache *libs;
1071 if (found) {
1072 libs = (void*)entry->key;
1073 } else {
1074 libs = create_lib_cache(prog, generated_tcs);
1075 memcpy(libs->shaders, prog->shaders, sizeof(prog->shaders));
1076 entry->key = libs;
1077 unsigned refs = 0;
1078 for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
1079 if (prog->shaders[i] && (!generated_tcs || i != MESA_SHADER_TESS_CTRL)) {
1080 simple_mtx_lock(&prog->shaders[i]->lock);
1081 util_dynarray_append(&prog->shaders[i]->pipeline_libs, struct zink_gfx_lib_cache*, libs);
1082 simple_mtx_unlock(&prog->shaders[i]->lock);
1083 refs++;
1084 }
1085 }
1086 p_atomic_set(&libs->refcount, refs);
1087 }
1088 simple_mtx_unlock(&screen->pipeline_libs_lock[idx]);
1089 return libs;
1090 }
1091
1092 static struct zink_gfx_program *
gfx_program_create(struct zink_context * ctx,struct zink_shader ** stages,unsigned vertices_per_patch,uint32_t gfx_hash)1093 gfx_program_create(struct zink_context *ctx,
1094 struct zink_shader **stages,
1095 unsigned vertices_per_patch,
1096 uint32_t gfx_hash)
1097 {
1098 struct zink_screen *screen = zink_screen(ctx->base.screen);
1099 struct zink_gfx_program *prog = create_program(ctx, false);
1100 if (!prog)
1101 goto fail;
1102
1103 prog->gfx_hash = gfx_hash;
1104 prog->base.removed = true;
1105 prog->optimal_keys = screen->optimal_keys;
1106
1107 prog->has_edgeflags = prog->shaders[MESA_SHADER_VERTEX] &&
1108 prog->shaders[MESA_SHADER_VERTEX]->has_edgeflags;
1109 for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
1110 util_dynarray_init(&prog->shader_cache[i][0][0], prog);
1111 util_dynarray_init(&prog->shader_cache[i][0][1], prog);
1112 util_dynarray_init(&prog->shader_cache[i][1][0], prog);
1113 util_dynarray_init(&prog->shader_cache[i][1][1], prog);
1114 if (stages[i]) {
1115 prog->shaders[i] = stages[i];
1116 prog->stages_present |= BITFIELD_BIT(i);
1117 if (i != MESA_SHADER_FRAGMENT)
1118 prog->optimal_keys &= !prog->shaders[i]->non_fs.is_generated;
1119 prog->needs_inlining |= prog->shaders[i]->needs_inlining;
1120 }
1121 }
1122 if (stages[MESA_SHADER_TESS_EVAL] && !stages[MESA_SHADER_TESS_CTRL]) {
1123 util_queue_fence_wait(&stages[MESA_SHADER_TESS_EVAL]->precompile.fence);
1124 if (!prog->shaders[MESA_SHADER_TESS_EVAL]->non_fs.generated_tcs)
1125 prog->shaders[MESA_SHADER_TESS_EVAL]->non_fs.generated_tcs = zink_shader_tcs_create(screen, vertices_per_patch);
1126 prog->shaders[MESA_SHADER_TESS_CTRL] = prog->shaders[MESA_SHADER_TESS_EVAL]->non_fs.generated_tcs;
1127 prog->stages_present |= BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
1128 }
1129 prog->stages_remaining = prog->stages_present;
1130 for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
1131 if (prog->shaders[i]) {
1132 simple_mtx_lock(&prog->shaders[i]->lock);
1133 _mesa_set_add(prog->shaders[i]->programs, prog);
1134 simple_mtx_unlock(&prog->shaders[i]->lock);
1135 zink_gfx_program_reference(screen, NULL, prog);
1136 }
1137 }
1138 p_atomic_dec(&prog->base.reference.count);
1139
1140 if (stages[MESA_SHADER_GEOMETRY])
1141 prog->last_vertex_stage = stages[MESA_SHADER_GEOMETRY];
1142 else if (stages[MESA_SHADER_TESS_EVAL])
1143 prog->last_vertex_stage = stages[MESA_SHADER_TESS_EVAL];
1144 else
1145 prog->last_vertex_stage = stages[MESA_SHADER_VERTEX];
1146
1147 for (int r = 0; r < ARRAY_SIZE(prog->pipelines); ++r) {
1148 for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) {
1149 _mesa_hash_table_init(&prog->pipelines[r][i], prog, NULL, zink_get_gfx_pipeline_eq_func(screen, prog));
1150 /* only need first 3/4 for point/line/tri/patch */
1151 if (screen->info.have_EXT_extended_dynamic_state &&
1152 i == (prog->last_vertex_stage->info.stage == MESA_SHADER_TESS_EVAL ? 4 : 3))
1153 break;
1154 }
1155 }
1156 return prog;
1157
1158 fail:
1159 if (prog)
1160 zink_destroy_gfx_program(screen, prog);
1161 return NULL;
1162 }
1163
1164 /* NO THREAD-UNSAFE ctx USAGE! */
1165 static struct zink_gfx_program *
gfx_program_init(struct zink_context * ctx,struct zink_gfx_program * prog)1166 gfx_program_init(struct zink_context *ctx, struct zink_gfx_program *prog)
1167 {
1168 struct zink_screen *screen = zink_screen(ctx->base.screen);
1169 nir_shader *nir[ZINK_GFX_SHADER_COUNT];
1170
1171 /* iterate in reverse order to create TES before generated TCS */
1172 for (int i = MESA_SHADER_FRAGMENT; i >= MESA_SHADER_VERTEX; i--) {
1173 if (prog->shaders[i]) {
1174 util_queue_fence_wait(&prog->shaders[i]->precompile.fence);
1175 /* this may have already been precompiled for separate shader */
1176 if (i == MESA_SHADER_TESS_CTRL && prog->shaders[i]->non_fs.is_generated && prog->shaders[MESA_SHADER_TESS_CTRL]->nir)
1177 zink_shader_tcs_init(screen, prog->shaders[MESA_SHADER_TESS_CTRL], nir[MESA_SHADER_TESS_EVAL], &nir[i]);
1178 else
1179 nir[i] = zink_shader_deserialize(screen, prog->shaders[i]);
1180 } else {
1181 nir[i] = NULL;
1182 }
1183 }
1184 assign_io(screen, nir);
1185 for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
1186 if (nir[i])
1187 zink_shader_serialize_blob(nir[i], &prog->blobs[i]);
1188 ralloc_free(nir[i]);
1189 }
1190
1191 if (screen->optimal_keys)
1192 prog->libs = find_or_create_lib_cache(screen, prog);
1193 if (prog->libs)
1194 p_atomic_inc(&prog->libs->refcount);
1195
1196 struct mesa_blake3 sctx;
1197 _mesa_blake3_init(&sctx);
1198 for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
1199 if (prog->shaders[i])
1200 _mesa_blake3_update(&sctx, prog->shaders[i]->base.sha1, sizeof(prog->shaders[i]->base.sha1));
1201 }
1202 _mesa_blake3_final(&sctx, prog->base.blake3);
1203
1204 if (!zink_descriptor_program_init(ctx, &prog->base))
1205 goto fail;
1206
1207 return prog;
1208
1209 fail:
1210 if (prog)
1211 zink_destroy_gfx_program(screen, prog);
1212 return NULL;
1213 }
1214
1215 struct zink_gfx_program *
zink_create_gfx_program(struct zink_context * ctx,struct zink_shader ** stages,unsigned vertices_per_patch,uint32_t gfx_hash)1216 zink_create_gfx_program(struct zink_context *ctx,
1217 struct zink_shader **stages,
1218 unsigned vertices_per_patch,
1219 uint32_t gfx_hash)
1220 {
1221 struct zink_gfx_program *prog = gfx_program_create(ctx, stages, vertices_per_patch, gfx_hash);
1222 if (prog)
1223 prog = gfx_program_init(ctx, prog);
1224 return prog;
1225 }
1226
1227 /* Creates a replacement, optimized zink_gfx_program for this set of separate shaders, which will
1228 * be swapped in in place of the fast-linked separable program once it's done compiling.
1229 */
1230 static void
create_linked_separable_job(void * data,void * gdata,int thread_index)1231 create_linked_separable_job(void *data, void *gdata, int thread_index)
1232 {
1233 struct zink_gfx_program *prog = data;
1234 /* this is a dead program */
1235 if (prog->base.removed)
1236 return;
1237 prog->full_prog = gfx_program_create(prog->base.ctx, prog->shaders, 0, prog->gfx_hash);
1238 /* block gfx_shader_prune in the main thread */
1239 util_queue_fence_reset(&prog->full_prog->base.cache_fence);
1240 /* add an ownership ref */
1241 zink_gfx_program_reference(zink_screen(prog->base.ctx->base.screen), NULL, prog->full_prog);
1242 /* this is otherwise a dead program */
1243 if (prog->full_prog->stages_present == prog->full_prog->stages_remaining)
1244 gfx_program_precompile_job(prog->full_prog, gdata, thread_index);
1245 util_queue_fence_signal(&prog->full_prog->base.cache_fence);
1246 }
1247
1248 struct zink_gfx_program *
create_gfx_program_separable(struct zink_context * ctx,struct zink_shader ** stages,unsigned vertices_per_patch)1249 create_gfx_program_separable(struct zink_context *ctx, struct zink_shader **stages, unsigned vertices_per_patch)
1250 {
1251 struct zink_screen *screen = zink_screen(ctx->base.screen);
1252 bool is_separate = true;
1253 for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++)
1254 is_separate &= !stages[i] || stages[i]->info.separate_shader;
1255 /* filter cases that need real pipelines */
1256 if (!is_separate ||
1257 /* TODO: maybe try variants? grimace */
1258 !ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(ctx->gfx_pipeline_state.optimal_key) ||
1259 !zink_can_use_pipeline_libs(ctx))
1260 return zink_create_gfx_program(ctx, stages, vertices_per_patch, ctx->gfx_hash);
1261 for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
1262 /* ensure async shader creation is done */
1263 if (stages[i]) {
1264 util_queue_fence_wait(&stages[i]->precompile.fence);
1265 if (!stages[i]->precompile.obj.mod)
1266 return zink_create_gfx_program(ctx, stages, vertices_per_patch, ctx->gfx_hash);
1267 }
1268 }
1269
1270 struct zink_gfx_program *prog = create_program(ctx, false);
1271 if (!prog)
1272 goto fail;
1273
1274 prog->is_separable = true;
1275 prog->gfx_hash = ctx->gfx_hash;
1276 prog->base.uses_shobj = screen->info.have_EXT_shader_object && !stages[MESA_SHADER_VERTEX]->info.view_mask && !BITSET_TEST(stages[MESA_SHADER_FRAGMENT]->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN);
1277
1278 prog->stages_remaining = prog->stages_present = ctx->shader_stages;
1279 memcpy(prog->shaders, stages, sizeof(prog->shaders));
1280 prog->last_vertex_stage = ctx->last_vertex_stage;
1281
1282 if (stages[MESA_SHADER_TESS_EVAL] && !stages[MESA_SHADER_TESS_CTRL]) {
1283 prog->shaders[MESA_SHADER_TESS_CTRL] = stages[MESA_SHADER_TESS_EVAL]->non_fs.generated_tcs;
1284 prog->stages_present |= BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
1285 }
1286
1287 if (!screen->info.have_EXT_shader_object) {
1288 prog->libs = create_lib_cache(prog, false);
1289 /* this libs cache is owned by the program */
1290 p_atomic_set(&prog->libs->refcount, 1);
1291 }
1292
1293 unsigned refs = 0;
1294 for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
1295 if (prog->shaders[i]) {
1296 simple_mtx_lock(&prog->shaders[i]->lock);
1297 _mesa_set_add(prog->shaders[i]->programs, prog);
1298 simple_mtx_unlock(&prog->shaders[i]->lock);
1299 if (screen->info.have_EXT_shader_object) {
1300 if (!prog->objects[i])
1301 prog->objects[i] = prog->shaders[i]->precompile.obj.obj;
1302 }
1303 refs++;
1304 }
1305 }
1306 /* We can do this add after the _mesa_set_adds above because we know the prog->shaders[] are
1307 * referenced by the draw state and zink_gfx_shader_free() can't be called on them while we're in here.
1308 */
1309 p_atomic_add(&prog->base.reference.count, refs - 1);
1310
1311 for (int r = 0; r < ARRAY_SIZE(prog->pipelines); ++r) {
1312 for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) {
1313 _mesa_hash_table_init(&prog->pipelines[r][i], prog, NULL, zink_get_gfx_pipeline_eq_func(screen, prog));
1314 /* only need first 3/4 for point/line/tri/patch */
1315 if (screen->info.have_EXT_extended_dynamic_state &&
1316 i == (prog->last_vertex_stage->info.stage == MESA_SHADER_TESS_EVAL ? 4 : 3))
1317 break;
1318 }
1319 }
1320
1321 for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
1322 if (!prog->shaders[i] || !prog->shaders[i]->precompile.dsl)
1323 continue;
1324 int idx = !i ? 0 : screen->info.have_EXT_shader_object ? i : 1;
1325 prog->base.dd.binding_usage |= BITFIELD_BIT(idx);
1326 prog->base.dsl[idx] = prog->shaders[i]->precompile.dsl;
1327 /* guarantee a null dsl if previous stages don't have descriptors */
1328 if (prog->shaders[i]->precompile.dsl)
1329 prog->base.num_dsl = idx + 1;
1330 prog->base.dd.bindless |= prog->shaders[i]->bindless;
1331 }
1332 if (prog->base.dd.bindless) {
1333 prog->base.num_dsl = screen->compact_descriptors ? ZINK_DESCRIPTOR_ALL_TYPES - ZINK_DESCRIPTOR_COMPACT : ZINK_DESCRIPTOR_ALL_TYPES;
1334 prog->base.dsl[screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS]] = screen->bindless_layout;
1335 }
1336 prog->base.layout = zink_pipeline_layout_create(screen, prog->base.dsl, prog->base.num_dsl, false, VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT);
1337
1338 prog->last_variant_hash = ctx->gfx_pipeline_state.optimal_key;
1339
1340 if (!screen->info.have_EXT_shader_object) {
1341 VkPipeline libs[] = {stages[MESA_SHADER_VERTEX]->precompile.gpl, stages[MESA_SHADER_FRAGMENT]->precompile.gpl};
1342 struct zink_gfx_library_key *gkey = CALLOC_STRUCT(zink_gfx_library_key);
1343 if (!gkey) {
1344 mesa_loge("ZINK: failed to allocate gkey!");
1345 goto fail;
1346 }
1347 gkey->optimal_key = prog->last_variant_hash;
1348 assert(gkey->optimal_key);
1349 gkey->pipeline = zink_create_gfx_pipeline_combined(screen, prog, VK_NULL_HANDLE, libs, 2, VK_NULL_HANDLE, false, false);
1350 _mesa_set_add(&prog->libs->libs, gkey);
1351 }
1352
1353 if (!(zink_debug & ZINK_DEBUG_NOOPT))
1354 util_queue_add_job(&screen->cache_get_thread, prog, &prog->base.cache_fence, create_linked_separable_job, NULL, 0);
1355
1356 return prog;
1357 fail:
1358 if (prog)
1359 zink_destroy_gfx_program(screen, prog);
1360 return NULL;
1361 }
1362
1363 static void
print_pipeline_stats(struct zink_screen * screen,VkPipeline pipeline,struct util_debug_callback * debug)1364 print_pipeline_stats(struct zink_screen *screen, VkPipeline pipeline, struct util_debug_callback *debug)
1365 {
1366 VkPipelineInfoKHR pinfo = {
1367 VK_STRUCTURE_TYPE_PIPELINE_INFO_KHR,
1368 NULL,
1369 pipeline
1370 };
1371 unsigned exe_count = 0;
1372 VkPipelineExecutablePropertiesKHR props[10] = {0};
1373 for (unsigned i = 0; i < ARRAY_SIZE(props); i++) {
1374 props[i].sType = VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_PROPERTIES_KHR;
1375 props[i].pNext = NULL;
1376 }
1377 VKSCR(GetPipelineExecutablePropertiesKHR)(screen->dev, &pinfo, &exe_count, NULL);
1378 VKSCR(GetPipelineExecutablePropertiesKHR)(screen->dev, &pinfo, &exe_count, props);
1379 for (unsigned e = 0; e < exe_count; e++) {
1380 VkPipelineExecutableInfoKHR info = {
1381 VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_INFO_KHR,
1382 NULL,
1383 pipeline,
1384 e
1385 };
1386 unsigned count = 0;
1387
1388 struct u_memstream stream;
1389 char *print_buf;
1390 size_t print_buf_sz;
1391
1392 if (!u_memstream_open(&stream, &print_buf, &print_buf_sz)) {
1393 mesa_loge("ZINK: failed to open memstream!");
1394 return;
1395 }
1396
1397 FILE *f = u_memstream_get(&stream);
1398 fprintf(f, "%s shader: ", props[e].name);
1399 VkPipelineExecutableStatisticKHR *stats = NULL;
1400 VKSCR(GetPipelineExecutableStatisticsKHR)(screen->dev, &info, &count, NULL);
1401 stats = calloc(count, sizeof(VkPipelineExecutableStatisticKHR));
1402 if (!stats) {
1403 mesa_loge("ZINK: failed to allocate stats!");
1404 return;
1405 }
1406
1407 for (unsigned i = 0; i < count; i++)
1408 stats[i].sType = VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_STATISTIC_KHR;
1409 VKSCR(GetPipelineExecutableStatisticsKHR)(screen->dev, &info, &count, stats);
1410
1411 for (unsigned i = 0; i < count; i++) {
1412 if (i)
1413 fprintf(f, ", ");
1414
1415 switch (stats[i].format) {
1416 case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_BOOL32_KHR:
1417 fprintf(f, "%u %s", stats[i].value.b32, stats[i].name);
1418 break;
1419 case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_INT64_KHR:
1420 fprintf(f, "%" PRIi64 " %s", stats[i].value.i64, stats[i].name);
1421 break;
1422 case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR:
1423 fprintf(f, "%" PRIu64 " %s", stats[i].value.u64, stats[i].name);
1424 break;
1425 case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_FLOAT64_KHR:
1426 fprintf(f, "%g %s", stats[i].value.f64, stats[i].name);
1427 break;
1428 default:
1429 unreachable("unknown statistic");
1430 }
1431 }
1432
1433 /* print_buf is only valid after flushing. */
1434 fflush(f);
1435 util_debug_message(debug, SHADER_INFO, "%s", print_buf);
1436
1437 u_memstream_close(&stream);
1438 free(print_buf);
1439 }
1440 }
1441
1442 static uint32_t
hash_compute_pipeline_state_local_size(const void * key)1443 hash_compute_pipeline_state_local_size(const void *key)
1444 {
1445 const struct zink_compute_pipeline_state *state = key;
1446 uint32_t hash = _mesa_hash_data(state, offsetof(struct zink_compute_pipeline_state, hash));
1447 hash = XXH32(&state->local_size[0], sizeof(state->local_size), hash);
1448 return hash;
1449 }
1450
1451 static uint32_t
hash_compute_pipeline_state(const void * key)1452 hash_compute_pipeline_state(const void *key)
1453 {
1454 const struct zink_compute_pipeline_state *state = key;
1455 return _mesa_hash_data(state, offsetof(struct zink_compute_pipeline_state, hash));
1456 }
1457
1458 void
zink_program_update_compute_pipeline_state(struct zink_context * ctx,struct zink_compute_program * comp,const struct pipe_grid_info * info)1459 zink_program_update_compute_pipeline_state(struct zink_context *ctx, struct zink_compute_program *comp, const struct pipe_grid_info *info)
1460 {
1461 if (comp->use_local_size) {
1462 for (int i = 0; i < ARRAY_SIZE(ctx->compute_pipeline_state.local_size); i++) {
1463 if (ctx->compute_pipeline_state.local_size[i] != info->block[i])
1464 ctx->compute_pipeline_state.dirty = true;
1465 ctx->compute_pipeline_state.local_size[i] = info->block[i];
1466 }
1467 }
1468 if (ctx->compute_pipeline_state.variable_shared_mem != info->variable_shared_mem) {
1469 ctx->compute_pipeline_state.dirty = true;
1470 ctx->compute_pipeline_state.variable_shared_mem = info->variable_shared_mem;
1471 }
1472 }
1473
1474 static bool
equals_compute_pipeline_state(const void * a,const void * b)1475 equals_compute_pipeline_state(const void *a, const void *b)
1476 {
1477 const struct zink_compute_pipeline_state *sa = a;
1478 const struct zink_compute_pipeline_state *sb = b;
1479 return !memcmp(a, b, offsetof(struct zink_compute_pipeline_state, hash)) &&
1480 sa->module == sb->module;
1481 }
1482
1483 static bool
equals_compute_pipeline_state_local_size(const void * a,const void * b)1484 equals_compute_pipeline_state_local_size(const void *a, const void *b)
1485 {
1486 const struct zink_compute_pipeline_state *sa = a;
1487 const struct zink_compute_pipeline_state *sb = b;
1488 return !memcmp(a, b, offsetof(struct zink_compute_pipeline_state, hash)) &&
1489 !memcmp(sa->local_size, sb->local_size, sizeof(sa->local_size)) &&
1490 sa->module == sb->module;
1491 }
1492
1493 static void
precompile_compute_job(void * data,void * gdata,int thread_index)1494 precompile_compute_job(void *data, void *gdata, int thread_index)
1495 {
1496 struct zink_compute_program *comp = data;
1497 struct zink_screen *screen = gdata;
1498
1499 comp->shader = zink_shader_create(screen, comp->nir);
1500 zink_shader_init(screen, comp->shader);
1501 comp->curr = comp->module = CALLOC_STRUCT(zink_shader_module);
1502 assert(comp->module);
1503 comp->module->shobj = false;
1504 comp->module->obj = zink_shader_compile(screen, false, comp->shader, comp->nir, NULL, NULL, &comp->base);
1505 /* comp->nir will be freed by zink_shader_compile */
1506 comp->nir = NULL;
1507 assert(comp->module->obj.spirv);
1508 util_dynarray_init(&comp->shader_cache[0], comp);
1509 util_dynarray_init(&comp->shader_cache[1], comp);
1510
1511 struct mesa_blake3 blake3_ctx;
1512 _mesa_blake3_init(&blake3_ctx);
1513 _mesa_blake3_update(&blake3_ctx, comp->shader->blob.data, comp->shader->blob.size);
1514 _mesa_blake3_final(&blake3_ctx, comp->base.blake3);
1515
1516 zink_descriptor_program_init(comp->base.ctx, &comp->base);
1517
1518 zink_screen_get_pipeline_cache(screen, &comp->base, true);
1519 if (comp->base.can_precompile)
1520 comp->base_pipeline = zink_create_compute_pipeline(screen, comp, NULL);
1521 if (comp->base_pipeline)
1522 zink_screen_update_pipeline_cache(screen, &comp->base, true);
1523 }
1524
1525 static struct zink_compute_program *
create_compute_program(struct zink_context * ctx,nir_shader * nir)1526 create_compute_program(struct zink_context *ctx, nir_shader *nir)
1527 {
1528 struct zink_screen *screen = zink_screen(ctx->base.screen);
1529 struct zink_compute_program *comp = create_program(ctx, true);
1530 if (!comp)
1531 return NULL;
1532 simple_mtx_init(&comp->cache_lock, mtx_plain);
1533 comp->scratch_size = nir->scratch_size;
1534 comp->nir = nir;
1535 comp->num_inlinable_uniforms = nir->info.num_inlinable_uniforms;
1536
1537 comp->use_local_size = !(nir->info.workgroup_size[0] ||
1538 nir->info.workgroup_size[1] ||
1539 nir->info.workgroup_size[2]);
1540 comp->has_variable_shared_mem = nir->info.cs.has_variable_shared_mem;
1541 comp->base.can_precompile = !comp->use_local_size &&
1542 (screen->info.have_EXT_non_seamless_cube_map || !zink_shader_has_cubes(nir)) &&
1543 (screen->info.rb2_feats.robustImageAccess2 || !(ctx->flags & PIPE_CONTEXT_ROBUST_BUFFER_ACCESS));
1544 _mesa_hash_table_init(&comp->pipelines, comp, NULL, comp->use_local_size ?
1545 equals_compute_pipeline_state_local_size :
1546 equals_compute_pipeline_state);
1547
1548 if (zink_debug & (ZINK_DEBUG_NOBGC|ZINK_DEBUG_SHADERDB))
1549 precompile_compute_job(comp, screen, 0);
1550 else
1551 util_queue_add_job(&screen->cache_get_thread, comp, &comp->base.cache_fence,
1552 precompile_compute_job, NULL, 0);
1553
1554 if (zink_debug & ZINK_DEBUG_SHADERDB) {
1555 print_pipeline_stats(screen, comp->base_pipeline, &ctx->dbg);
1556 }
1557
1558 return comp;
1559 }
1560
1561 bool
zink_program_descriptor_is_buffer(struct zink_context * ctx,gl_shader_stage stage,enum zink_descriptor_type type,unsigned i)1562 zink_program_descriptor_is_buffer(struct zink_context *ctx, gl_shader_stage stage, enum zink_descriptor_type type, unsigned i)
1563 {
1564 struct zink_shader *zs = NULL;
1565 switch (stage) {
1566 case MESA_SHADER_VERTEX:
1567 case MESA_SHADER_TESS_CTRL:
1568 case MESA_SHADER_TESS_EVAL:
1569 case MESA_SHADER_GEOMETRY:
1570 case MESA_SHADER_FRAGMENT:
1571 zs = ctx->gfx_stages[stage];
1572 break;
1573 case MESA_SHADER_COMPUTE: {
1574 zs = ctx->curr_compute->shader;
1575 break;
1576 }
1577 default:
1578 unreachable("unknown shader type");
1579 }
1580 if (!zs)
1581 return false;
1582 return zink_shader_descriptor_is_buffer(zs, type, i);
1583 }
1584
1585 static unsigned
get_num_bindings(struct zink_shader * zs,enum zink_descriptor_type type)1586 get_num_bindings(struct zink_shader *zs, enum zink_descriptor_type type)
1587 {
1588 switch (type) {
1589 case ZINK_DESCRIPTOR_TYPE_UNIFORMS:
1590 return !!zs->has_uniforms;
1591 case ZINK_DESCRIPTOR_TYPE_UBO:
1592 case ZINK_DESCRIPTOR_TYPE_SSBO:
1593 return zs->num_bindings[type];
1594 default:
1595 break;
1596 }
1597 unsigned num_bindings = 0;
1598 for (int i = 0; i < zs->num_bindings[type]; i++)
1599 num_bindings += zs->bindings[type][i].size;
1600 return num_bindings;
1601 }
1602
1603 unsigned
zink_program_num_bindings_typed(const struct zink_program * pg,enum zink_descriptor_type type)1604 zink_program_num_bindings_typed(const struct zink_program *pg, enum zink_descriptor_type type)
1605 {
1606 unsigned num_bindings = 0;
1607 if (pg->is_compute) {
1608 struct zink_compute_program *comp = (void*)pg;
1609 return get_num_bindings(comp->shader, type);
1610 }
1611 struct zink_gfx_program *prog = (void*)pg;
1612 for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
1613 if (prog->shaders[i])
1614 num_bindings += get_num_bindings(prog->shaders[i], type);
1615 }
1616 return num_bindings;
1617 }
1618
1619 unsigned
zink_program_num_bindings(const struct zink_program * pg)1620 zink_program_num_bindings(const struct zink_program *pg)
1621 {
1622 unsigned num_bindings = 0;
1623 for (unsigned i = 0; i < ZINK_DESCRIPTOR_BASE_TYPES; i++)
1624 num_bindings += zink_program_num_bindings_typed(pg, i);
1625 return num_bindings;
1626 }
1627
1628 static void
deinit_program(struct zink_screen * screen,struct zink_program * pg)1629 deinit_program(struct zink_screen *screen, struct zink_program *pg)
1630 {
1631 util_queue_fence_wait(&pg->cache_fence);
1632 if (pg->layout)
1633 VKSCR(DestroyPipelineLayout)(screen->dev, pg->layout, NULL);
1634
1635 if (pg->pipeline_cache)
1636 VKSCR(DestroyPipelineCache)(screen->dev, pg->pipeline_cache, NULL);
1637 u_rwlock_destroy(&pg->pipeline_cache_lock);
1638 zink_descriptor_program_deinit(screen, pg);
1639 }
1640
1641 void
zink_destroy_gfx_program(struct zink_screen * screen,struct zink_gfx_program * prog)1642 zink_destroy_gfx_program(struct zink_screen *screen,
1643 struct zink_gfx_program *prog)
1644 {
1645 unsigned max_idx = ARRAY_SIZE(prog->pipelines[0]);
1646 if (screen->info.have_EXT_extended_dynamic_state) {
1647 /* only need first 3/4 for point/line/tri/patch */
1648 if ((prog->stages_present &
1649 (BITFIELD_BIT(MESA_SHADER_TESS_EVAL) | BITFIELD_BIT(MESA_SHADER_GEOMETRY))) ==
1650 BITFIELD_BIT(MESA_SHADER_TESS_EVAL))
1651 max_idx = 4;
1652 else
1653 max_idx = 3;
1654 max_idx++;
1655 }
1656
1657 if (prog->is_separable)
1658 zink_gfx_program_reference(screen, &prog->full_prog, NULL);
1659 for (unsigned r = 0; r < ARRAY_SIZE(prog->pipelines); r++) {
1660 for (int i = 0; i < max_idx; ++i) {
1661 hash_table_foreach(&prog->pipelines[r][i], entry) {
1662 struct zink_gfx_pipeline_cache_entry *pc_entry = entry->data;
1663
1664 util_queue_fence_wait(&pc_entry->fence);
1665 VKSCR(DestroyPipeline)(screen->dev, pc_entry->pipeline, NULL);
1666 VKSCR(DestroyPipeline)(screen->dev, pc_entry->gpl.unoptimized_pipeline, NULL);
1667 free(pc_entry);
1668 }
1669 }
1670 }
1671
1672 deinit_program(screen, &prog->base);
1673
1674 for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
1675 if (prog->shaders[i]) {
1676 _mesa_set_remove_key(prog->shaders[i]->programs, prog);
1677 prog->shaders[i] = NULL;
1678 }
1679 if (!prog->is_separable) {
1680 destroy_shader_cache(screen, &prog->shader_cache[i][0][0]);
1681 destroy_shader_cache(screen, &prog->shader_cache[i][0][1]);
1682 destroy_shader_cache(screen, &prog->shader_cache[i][1][0]);
1683 destroy_shader_cache(screen, &prog->shader_cache[i][1][1]);
1684 blob_finish(&prog->blobs[i]);
1685 }
1686 }
1687 if (prog->libs)
1688 zink_gfx_lib_cache_unref(screen, prog->libs);
1689
1690 ralloc_free(prog);
1691 }
1692
1693 void
zink_destroy_compute_program(struct zink_screen * screen,struct zink_compute_program * comp)1694 zink_destroy_compute_program(struct zink_screen *screen,
1695 struct zink_compute_program *comp)
1696 {
1697 deinit_program(screen, &comp->base);
1698
1699 assert(comp->shader);
1700 assert(!comp->shader->spirv);
1701
1702 zink_shader_free(screen, comp->shader);
1703
1704 destroy_shader_cache(screen, &comp->shader_cache[0]);
1705 destroy_shader_cache(screen, &comp->shader_cache[1]);
1706
1707 hash_table_foreach(&comp->pipelines, entry) {
1708 struct compute_pipeline_cache_entry *pc_entry = entry->data;
1709
1710 VKSCR(DestroyPipeline)(screen->dev, pc_entry->pipeline, NULL);
1711 free(pc_entry);
1712 }
1713 VKSCR(DestroyPipeline)(screen->dev, comp->base_pipeline, NULL);
1714 zink_destroy_shader_module(screen, comp->module);
1715
1716 ralloc_free(comp);
1717 }
1718
1719 ALWAYS_INLINE static bool
compute_can_shortcut(const struct zink_compute_program * comp)1720 compute_can_shortcut(const struct zink_compute_program *comp)
1721 {
1722 return !comp->use_local_size && !comp->curr->num_uniforms && !comp->curr->has_nonseamless;
1723 }
1724
1725 VkPipeline
zink_get_compute_pipeline(struct zink_screen * screen,struct zink_compute_program * comp,struct zink_compute_pipeline_state * state)1726 zink_get_compute_pipeline(struct zink_screen *screen,
1727 struct zink_compute_program *comp,
1728 struct zink_compute_pipeline_state *state)
1729 {
1730 struct hash_entry *entry = NULL;
1731 struct compute_pipeline_cache_entry *cache_entry;
1732
1733 if (!state->dirty && !state->module_changed)
1734 return state->pipeline;
1735 if (state->dirty) {
1736 if (state->pipeline) //avoid on first hash
1737 state->final_hash ^= state->hash;
1738 if (comp->use_local_size)
1739 state->hash = hash_compute_pipeline_state_local_size(state);
1740 else
1741 state->hash = hash_compute_pipeline_state(state);
1742 state->dirty = false;
1743 state->final_hash ^= state->hash;
1744 }
1745
1746 util_queue_fence_wait(&comp->base.cache_fence);
1747 if (comp->base_pipeline && compute_can_shortcut(comp)) {
1748 state->pipeline = comp->base_pipeline;
1749 return state->pipeline;
1750 }
1751 entry = _mesa_hash_table_search_pre_hashed(&comp->pipelines, state->final_hash, state);
1752
1753 if (!entry) {
1754 simple_mtx_lock(&comp->cache_lock);
1755 entry = _mesa_hash_table_search_pre_hashed(&comp->pipelines, state->final_hash, state);
1756 if (entry) {
1757 simple_mtx_unlock(&comp->cache_lock);
1758 goto out;
1759 }
1760 VkPipeline pipeline = zink_create_compute_pipeline(screen, comp, state);
1761
1762 if (pipeline == VK_NULL_HANDLE) {
1763 simple_mtx_unlock(&comp->cache_lock);
1764 return VK_NULL_HANDLE;
1765 }
1766
1767 zink_screen_update_pipeline_cache(screen, &comp->base, false);
1768 if (compute_can_shortcut(comp)) {
1769 simple_mtx_unlock(&comp->cache_lock);
1770 /* don't add base pipeline to cache */
1771 state->pipeline = comp->base_pipeline = pipeline;
1772 return state->pipeline;
1773 }
1774
1775 struct compute_pipeline_cache_entry *pc_entry = CALLOC_STRUCT(compute_pipeline_cache_entry);
1776 if (!pc_entry) {
1777 simple_mtx_unlock(&comp->cache_lock);
1778 return VK_NULL_HANDLE;
1779 }
1780
1781 memcpy(&pc_entry->state, state, sizeof(*state));
1782 pc_entry->pipeline = pipeline;
1783
1784 entry = _mesa_hash_table_insert_pre_hashed(&comp->pipelines, state->final_hash, pc_entry, pc_entry);
1785 assert(entry);
1786 simple_mtx_unlock(&comp->cache_lock);
1787 }
1788 out:
1789 cache_entry = entry->data;
1790 state->pipeline = cache_entry->pipeline;
1791 return state->pipeline;
1792 }
1793
1794 static void
bind_gfx_stage(struct zink_context * ctx,gl_shader_stage stage,struct zink_shader * shader)1795 bind_gfx_stage(struct zink_context *ctx, gl_shader_stage stage, struct zink_shader *shader)
1796 {
1797 if (shader && shader->info.num_inlinable_uniforms)
1798 ctx->shader_has_inlinable_uniforms_mask |= 1 << stage;
1799 else
1800 ctx->shader_has_inlinable_uniforms_mask &= ~(1 << stage);
1801
1802 if (ctx->gfx_stages[stage])
1803 ctx->gfx_hash ^= ctx->gfx_stages[stage]->hash;
1804
1805 if (stage == MESA_SHADER_GEOMETRY && ctx->is_generated_gs_bound && (!shader || !shader->non_fs.parent)) {
1806 ctx->inlinable_uniforms_valid_mask &= ~BITFIELD64_BIT(MESA_SHADER_GEOMETRY);
1807 ctx->is_generated_gs_bound = false;
1808 }
1809
1810 ctx->gfx_stages[stage] = shader;
1811 ctx->gfx_dirty = ctx->gfx_stages[MESA_SHADER_FRAGMENT] && ctx->gfx_stages[MESA_SHADER_VERTEX];
1812 ctx->gfx_pipeline_state.modules_changed = true;
1813 if (shader) {
1814 ctx->shader_stages |= BITFIELD_BIT(stage);
1815 ctx->gfx_hash ^= ctx->gfx_stages[stage]->hash;
1816 } else {
1817 ctx->gfx_pipeline_state.modules[stage] = VK_NULL_HANDLE;
1818 if (ctx->curr_program)
1819 ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
1820 ctx->curr_program = NULL;
1821 ctx->shader_stages &= ~BITFIELD_BIT(stage);
1822 }
1823 }
1824
1825 static enum mesa_prim
gs_output_to_reduced_prim_type(struct shader_info * info)1826 gs_output_to_reduced_prim_type(struct shader_info *info)
1827 {
1828 switch (info->gs.output_primitive) {
1829 case MESA_PRIM_POINTS:
1830 return MESA_PRIM_POINTS;
1831
1832 case MESA_PRIM_LINES:
1833 case MESA_PRIM_LINE_LOOP:
1834 case MESA_PRIM_LINE_STRIP:
1835 case MESA_PRIM_LINES_ADJACENCY:
1836 case MESA_PRIM_LINE_STRIP_ADJACENCY:
1837 return MESA_PRIM_LINES;
1838
1839 case MESA_PRIM_TRIANGLES:
1840 case MESA_PRIM_TRIANGLE_STRIP:
1841 case MESA_PRIM_TRIANGLE_FAN:
1842 case MESA_PRIM_TRIANGLES_ADJACENCY:
1843 case MESA_PRIM_TRIANGLE_STRIP_ADJACENCY:
1844 return MESA_PRIM_TRIANGLES;
1845
1846 default:
1847 unreachable("unexpected output primitive type");
1848 }
1849 }
1850
1851 static enum mesa_prim
update_rast_prim(struct zink_shader * shader)1852 update_rast_prim(struct zink_shader *shader)
1853 {
1854 struct shader_info *info = &shader->info;
1855 if (info->stage == MESA_SHADER_GEOMETRY)
1856 return gs_output_to_reduced_prim_type(info);
1857 else if (info->stage == MESA_SHADER_TESS_EVAL) {
1858 if (info->tess.point_mode)
1859 return MESA_PRIM_POINTS;
1860 else {
1861 switch (info->tess._primitive_mode) {
1862 case TESS_PRIMITIVE_ISOLINES:
1863 return MESA_PRIM_LINES;
1864 case TESS_PRIMITIVE_TRIANGLES:
1865 case TESS_PRIMITIVE_QUADS:
1866 return MESA_PRIM_TRIANGLES;
1867 default:
1868 return MESA_PRIM_COUNT;
1869 }
1870 }
1871 }
1872 return MESA_PRIM_COUNT;
1873 }
1874
1875 static void
unbind_generated_gs(struct zink_context * ctx,gl_shader_stage stage,struct zink_shader * prev_shader)1876 unbind_generated_gs(struct zink_context *ctx, gl_shader_stage stage, struct zink_shader *prev_shader)
1877 {
1878 if (prev_shader->non_fs.is_generated)
1879 ctx->inlinable_uniforms_valid_mask &= ~BITFIELD64_BIT(MESA_SHADER_GEOMETRY);
1880
1881 if (ctx->gfx_stages[MESA_SHADER_GEOMETRY] &&
1882 ctx->gfx_stages[MESA_SHADER_GEOMETRY]->non_fs.parent ==
1883 prev_shader) {
1884 bind_gfx_stage(ctx, MESA_SHADER_GEOMETRY, NULL);
1885 }
1886 }
1887
1888 static void
bind_last_vertex_stage(struct zink_context * ctx,gl_shader_stage stage,struct zink_shader * prev_shader)1889 bind_last_vertex_stage(struct zink_context *ctx, gl_shader_stage stage, struct zink_shader *prev_shader)
1890 {
1891 if (prev_shader && stage < MESA_SHADER_GEOMETRY)
1892 unbind_generated_gs(ctx, stage, prev_shader);
1893
1894 gl_shader_stage old = ctx->last_vertex_stage ? ctx->last_vertex_stage->info.stage : MESA_SHADER_STAGES;
1895 if (ctx->gfx_stages[MESA_SHADER_GEOMETRY])
1896 ctx->last_vertex_stage = ctx->gfx_stages[MESA_SHADER_GEOMETRY];
1897 else if (ctx->gfx_stages[MESA_SHADER_TESS_EVAL])
1898 ctx->last_vertex_stage = ctx->gfx_stages[MESA_SHADER_TESS_EVAL];
1899 else
1900 ctx->last_vertex_stage = ctx->gfx_stages[MESA_SHADER_VERTEX];
1901 gl_shader_stage current = ctx->last_vertex_stage ? ctx->last_vertex_stage->info.stage : MESA_SHADER_VERTEX;
1902
1903 /* update rast_prim */
1904 ctx->gfx_pipeline_state.shader_rast_prim =
1905 ctx->last_vertex_stage ? update_rast_prim(ctx->last_vertex_stage) :
1906 MESA_PRIM_COUNT;
1907
1908 if (old != current) {
1909 if (!zink_screen(ctx->base.screen)->optimal_keys) {
1910 if (old != MESA_SHADER_STAGES) {
1911 memset(&ctx->gfx_pipeline_state.shader_keys.key[old].key.vs_base, 0, sizeof(struct zink_vs_key_base));
1912 ctx->dirty_gfx_stages |= BITFIELD_BIT(old);
1913 } else {
1914 /* always unset vertex shader values when changing to a non-vs last stage */
1915 memset(&ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_VERTEX].key.vs_base, 0, sizeof(struct zink_vs_key_base));
1916 }
1917 }
1918 ctx->last_vertex_stage_dirty = true;
1919 }
1920
1921 if (ctx->last_vertex_stage != prev_shader) {
1922 unsigned num_viewports = ctx->vp_state.num_viewports;
1923 struct zink_screen *screen = zink_screen(ctx->base.screen);
1924 /* number of enabled viewports is based on whether last vertex stage writes viewport index */
1925 if (ctx->last_vertex_stage) {
1926 if (ctx->last_vertex_stage->info.outputs_written & (VARYING_BIT_VIEWPORT | VARYING_BIT_VIEWPORT_MASK))
1927 ctx->vp_state.num_viewports = MIN2(screen->info.props.limits.maxViewports, PIPE_MAX_VIEWPORTS);
1928 else
1929 ctx->vp_state.num_viewports = 1;
1930 } else {
1931 ctx->vp_state.num_viewports = 1;
1932 }
1933 ctx->vp_state_changed |= num_viewports != ctx->vp_state.num_viewports;
1934 if (!screen->info.have_EXT_extended_dynamic_state) {
1935 if (ctx->gfx_pipeline_state.dyn_state1.num_viewports != ctx->vp_state.num_viewports)
1936 ctx->gfx_pipeline_state.dirty = true;
1937 ctx->gfx_pipeline_state.dyn_state1.num_viewports = ctx->vp_state.num_viewports;
1938 }
1939 }
1940 }
1941
1942 static void
zink_bind_vs_state(struct pipe_context * pctx,void * cso)1943 zink_bind_vs_state(struct pipe_context *pctx,
1944 void *cso)
1945 {
1946 struct zink_context *ctx = zink_context(pctx);
1947 if (!cso && !ctx->gfx_stages[MESA_SHADER_VERTEX])
1948 return;
1949 struct zink_shader *prev_shader = ctx->gfx_stages[MESA_SHADER_VERTEX];
1950 bind_gfx_stage(ctx, MESA_SHADER_VERTEX, cso);
1951 bind_last_vertex_stage(ctx, MESA_SHADER_VERTEX, prev_shader);
1952 if (cso) {
1953 struct zink_shader *zs = cso;
1954 ctx->shader_reads_drawid = BITSET_TEST(zs->info.system_values_read, SYSTEM_VALUE_DRAW_ID);
1955 ctx->shader_reads_basevertex = BITSET_TEST(zs->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX);
1956 } else {
1957 ctx->shader_reads_drawid = false;
1958 ctx->shader_reads_basevertex = false;
1959 }
1960 }
1961
1962 /* if gl_SampleMask[] is written to, we have to ensure that we get a shader with the same sample count:
1963 * in GL, samples==1 means ignore gl_SampleMask[]
1964 * in VK, gl_SampleMask[] is never ignored
1965 */
1966 void
zink_update_fs_key_samples(struct zink_context * ctx)1967 zink_update_fs_key_samples(struct zink_context *ctx)
1968 {
1969 if (!ctx->gfx_stages[MESA_SHADER_FRAGMENT])
1970 return;
1971 if (zink_shader_uses_samples(ctx->gfx_stages[MESA_SHADER_FRAGMENT])) {
1972 bool samples = zink_get_fs_base_key(ctx)->samples;
1973 if (samples != (ctx->fb_state.samples > 1))
1974 zink_set_fs_base_key(ctx)->samples = ctx->fb_state.samples > 1;
1975 }
1976 }
1977
zink_update_gs_key_rectangular_line(struct zink_context * ctx)1978 void zink_update_gs_key_rectangular_line(struct zink_context *ctx)
1979 {
1980 bool line_rectangular = zink_get_gs_key(ctx)->line_rectangular;
1981 if (line_rectangular != ctx->rast_state->base.line_rectangular)
1982 zink_set_gs_key(ctx)->line_rectangular = ctx->rast_state->base.line_rectangular;
1983 }
1984
1985 static void
zink_bind_fs_state(struct pipe_context * pctx,void * cso)1986 zink_bind_fs_state(struct pipe_context *pctx,
1987 void *cso)
1988 {
1989 struct zink_context *ctx = zink_context(pctx);
1990 if (!cso && !ctx->gfx_stages[MESA_SHADER_FRAGMENT])
1991 return;
1992 if (ctx->disable_fs && !ctx->disable_color_writes && cso != ctx->null_fs) {
1993 ctx->saved_fs = cso;
1994 zink_set_null_fs(ctx);
1995 return;
1996 }
1997 bool writes_cbuf0 = ctx->gfx_stages[MESA_SHADER_FRAGMENT] ? (ctx->gfx_stages[MESA_SHADER_FRAGMENT]->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_DATA0)) > 0 : true;
1998 unsigned shadow_mask = ctx->gfx_stages[MESA_SHADER_FRAGMENT] ? ctx->gfx_stages[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask : 0;
1999 bind_gfx_stage(ctx, MESA_SHADER_FRAGMENT, cso);
2000 ctx->fbfetch_outputs = 0;
2001 if (cso) {
2002 shader_info *info = &ctx->gfx_stages[MESA_SHADER_FRAGMENT]->info;
2003 bool new_writes_cbuf0 = (info->outputs_written & BITFIELD_BIT(FRAG_RESULT_DATA0)) > 0;
2004 if (ctx->gfx_pipeline_state.blend_state && ctx->gfx_pipeline_state.blend_state->alpha_to_coverage &&
2005 writes_cbuf0 != new_writes_cbuf0 && zink_screen(pctx->screen)->info.have_EXT_extended_dynamic_state3) {
2006 ctx->blend_state_changed = true;
2007 ctx->ds3_states |= BITFIELD_BIT(ZINK_DS3_BLEND_A2C);
2008 }
2009 if (info->fs.uses_fbfetch_output) {
2010 if (info->outputs_read & (BITFIELD_BIT(FRAG_RESULT_DEPTH) | BITFIELD_BIT(FRAG_RESULT_STENCIL)))
2011 ctx->fbfetch_outputs |= BITFIELD_BIT(PIPE_MAX_COLOR_BUFS);
2012 ctx->fbfetch_outputs |= info->outputs_read >> FRAG_RESULT_DATA0;
2013 }
2014 zink_update_fs_key_samples(ctx);
2015 if (zink_screen(pctx->screen)->info.have_EXT_rasterization_order_attachment_access) {
2016 if (ctx->gfx_pipeline_state.rast_attachment_order != info->fs.uses_fbfetch_output)
2017 ctx->gfx_pipeline_state.dirty = true;
2018 ctx->gfx_pipeline_state.rast_attachment_order = info->fs.uses_fbfetch_output;
2019 }
2020 zink_set_zs_needs_shader_swizzle_key(ctx, MESA_SHADER_FRAGMENT, false);
2021 if (shadow_mask != ctx->gfx_stages[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask &&
2022 !zink_screen(pctx->screen)->driver_compiler_workarounds.needs_zs_shader_swizzle)
2023 zink_update_shadow_samplerviews(ctx, shadow_mask | ctx->gfx_stages[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask);
2024 if (!ctx->track_renderpasses && !ctx->blitting)
2025 ctx->rp_tc_info_updated = true;
2026 }
2027 zink_update_fbfetch(ctx);
2028 }
2029
2030 static void
zink_bind_gs_state(struct pipe_context * pctx,void * cso)2031 zink_bind_gs_state(struct pipe_context *pctx,
2032 void *cso)
2033 {
2034 struct zink_context *ctx = zink_context(pctx);
2035 if (!cso && !ctx->gfx_stages[MESA_SHADER_GEOMETRY])
2036 return;
2037 bind_gfx_stage(ctx, MESA_SHADER_GEOMETRY, cso);
2038 bind_last_vertex_stage(ctx, MESA_SHADER_GEOMETRY, NULL);
2039 }
2040
2041 static void
zink_bind_tcs_state(struct pipe_context * pctx,void * cso)2042 zink_bind_tcs_state(struct pipe_context *pctx,
2043 void *cso)
2044 {
2045 bind_gfx_stage(zink_context(pctx), MESA_SHADER_TESS_CTRL, cso);
2046 }
2047
2048 static void
zink_bind_tes_state(struct pipe_context * pctx,void * cso)2049 zink_bind_tes_state(struct pipe_context *pctx,
2050 void *cso)
2051 {
2052 struct zink_context *ctx = zink_context(pctx);
2053 if (!cso && !ctx->gfx_stages[MESA_SHADER_TESS_EVAL])
2054 return;
2055 if (!!ctx->gfx_stages[MESA_SHADER_TESS_EVAL] != !!cso) {
2056 if (!cso) {
2057 /* if unsetting a TESS that uses a generated TCS, ensure the TCS is unset */
2058 if (ctx->gfx_stages[MESA_SHADER_TESS_CTRL] == ctx->gfx_stages[MESA_SHADER_TESS_EVAL]->non_fs.generated_tcs)
2059 ctx->gfx_stages[MESA_SHADER_TESS_CTRL] = NULL;
2060 }
2061 }
2062 struct zink_shader *prev_shader = ctx->gfx_stages[MESA_SHADER_TESS_EVAL];
2063 bind_gfx_stage(ctx, MESA_SHADER_TESS_EVAL, cso);
2064 bind_last_vertex_stage(ctx, MESA_SHADER_TESS_EVAL, prev_shader);
2065 }
2066
2067 static void *
zink_create_cs_state(struct pipe_context * pctx,const struct pipe_compute_state * shader)2068 zink_create_cs_state(struct pipe_context *pctx,
2069 const struct pipe_compute_state *shader)
2070 {
2071 struct nir_shader *nir;
2072 if (shader->ir_type != PIPE_SHADER_IR_NIR)
2073 nir = zink_tgsi_to_nir(pctx->screen, shader->prog);
2074 else
2075 nir = (struct nir_shader *)shader->prog;
2076
2077 if (nir->info.uses_bindless)
2078 zink_descriptors_init_bindless(zink_context(pctx));
2079
2080 return create_compute_program(zink_context(pctx), nir);
2081 }
2082
2083 static void
zink_bind_cs_state(struct pipe_context * pctx,void * cso)2084 zink_bind_cs_state(struct pipe_context *pctx,
2085 void *cso)
2086 {
2087 struct zink_context *ctx = zink_context(pctx);
2088 struct zink_compute_program *comp = cso;
2089 if (comp && comp->num_inlinable_uniforms)
2090 ctx->shader_has_inlinable_uniforms_mask |= 1 << MESA_SHADER_COMPUTE;
2091 else
2092 ctx->shader_has_inlinable_uniforms_mask &= ~(1 << MESA_SHADER_COMPUTE);
2093
2094 if (ctx->curr_compute) {
2095 zink_batch_reference_program(ctx, &ctx->curr_compute->base);
2096 ctx->compute_pipeline_state.final_hash ^= ctx->compute_pipeline_state.module_hash;
2097 ctx->compute_pipeline_state.module = VK_NULL_HANDLE;
2098 ctx->compute_pipeline_state.module_hash = 0;
2099 }
2100 ctx->compute_pipeline_state.dirty = true;
2101 ctx->curr_compute = comp;
2102 if (comp && comp != ctx->curr_compute) {
2103 ctx->compute_pipeline_state.module_hash = ctx->curr_compute->curr->hash;
2104 if (util_queue_fence_is_signalled(&comp->base.cache_fence))
2105 ctx->compute_pipeline_state.module = ctx->curr_compute->curr->obj.mod;
2106 ctx->compute_pipeline_state.final_hash ^= ctx->compute_pipeline_state.module_hash;
2107 if (ctx->compute_pipeline_state.key.base.nonseamless_cube_mask)
2108 ctx->compute_dirty = true;
2109 }
2110 zink_select_launch_grid(ctx);
2111 }
2112
2113 static void
zink_get_compute_state_info(struct pipe_context * pctx,void * cso,struct pipe_compute_state_object_info * info)2114 zink_get_compute_state_info(struct pipe_context *pctx, void *cso, struct pipe_compute_state_object_info *info)
2115 {
2116 struct zink_compute_program *comp = cso;
2117 struct zink_screen *screen = zink_screen(pctx->screen);
2118
2119 info->max_threads = screen->info.props.limits.maxComputeWorkGroupInvocations;
2120 info->private_memory = comp->scratch_size;
2121 if (screen->info.props11.subgroupSize) {
2122 info->preferred_simd_size = screen->info.props11.subgroupSize;
2123 info->simd_sizes = info->preferred_simd_size;
2124 } else {
2125 // just guess it
2126 info->preferred_simd_size = 64;
2127 // only used for actual subgroup support
2128 info->simd_sizes = 0;
2129 }
2130 }
2131
2132 static void
zink_delete_cs_shader_state(struct pipe_context * pctx,void * cso)2133 zink_delete_cs_shader_state(struct pipe_context *pctx, void *cso)
2134 {
2135 struct zink_compute_program *comp = cso;
2136 zink_compute_program_reference(zink_screen(pctx->screen), &comp, NULL);
2137 }
2138
2139 /* caller must lock prog->libs->lock */
2140 struct zink_gfx_library_key *
zink_create_pipeline_lib(struct zink_screen * screen,struct zink_gfx_program * prog,struct zink_gfx_pipeline_state * state)2141 zink_create_pipeline_lib(struct zink_screen *screen, struct zink_gfx_program *prog, struct zink_gfx_pipeline_state *state)
2142 {
2143 struct zink_gfx_library_key *gkey = CALLOC_STRUCT(zink_gfx_library_key);
2144 if (!gkey) {
2145 mesa_loge("ZINK: failed to allocate gkey!");
2146 return NULL;
2147 }
2148
2149 gkey->optimal_key = state->optimal_key;
2150 assert(gkey->optimal_key);
2151 for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++)
2152 gkey->modules[i] = prog->objs[i].mod;
2153 gkey->pipeline = zink_create_gfx_pipeline_library(screen, prog);
2154 _mesa_set_add(&prog->libs->libs, gkey);
2155 return gkey;
2156 }
2157
2158 static const char *
print_exe_stages(VkShaderStageFlags stages)2159 print_exe_stages(VkShaderStageFlags stages)
2160 {
2161 if (stages == VK_SHADER_STAGE_VERTEX_BIT)
2162 return "VS";
2163 if (stages == (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_GEOMETRY_BIT))
2164 return "VS+GS";
2165 if (stages == (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT))
2166 return "VS+TCS+TES";
2167 if (stages == (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT | VK_SHADER_STAGE_GEOMETRY_BIT))
2168 return "VS+TCS+TES+GS";
2169 if (stages == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
2170 return "TCS";
2171 if (stages == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
2172 return "TES";
2173 if (stages == VK_SHADER_STAGE_GEOMETRY_BIT)
2174 return "GS";
2175 if (stages == VK_SHADER_STAGE_FRAGMENT_BIT)
2176 return "FS";
2177 if (stages == VK_SHADER_STAGE_COMPUTE_BIT)
2178 return "CS";
2179 unreachable("unhandled combination of stages!");
2180 }
2181
2182 static void
gfx_program_precompile_job(void * data,void * gdata,int thread_index)2183 gfx_program_precompile_job(void *data, void *gdata, int thread_index)
2184 {
2185 struct zink_screen *screen = gdata;
2186 struct zink_gfx_program *prog = data;
2187
2188 /* this is threadsafe */
2189 gfx_program_init(prog->base.ctx, prog);
2190
2191 struct zink_gfx_pipeline_state state = {0};
2192 state.shader_keys_optimal.key.vs_base.last_vertex_stage = true;
2193 state.shader_keys_optimal.key.tcs.patch_vertices = 3; //random guess, generated tcs precompile is hard
2194 state.optimal_key = state.shader_keys_optimal.key.val;
2195 generate_gfx_program_modules_optimal(NULL, screen, prog, &state);
2196 zink_screen_get_pipeline_cache(screen, &prog->base, true);
2197 if (!screen->info.have_EXT_shader_object) {
2198 simple_mtx_lock(&prog->libs->lock);
2199 zink_create_pipeline_lib(screen, prog, &state);
2200 simple_mtx_unlock(&prog->libs->lock);
2201 }
2202 zink_screen_update_pipeline_cache(screen, &prog->base, true);
2203 }
2204
2205 static void
zink_link_gfx_shader(struct pipe_context * pctx,void ** shaders)2206 zink_link_gfx_shader(struct pipe_context *pctx, void **shaders)
2207 {
2208 struct zink_context *ctx = zink_context(pctx);
2209 struct zink_shader **zshaders = (struct zink_shader **)shaders;
2210 if (shaders[MESA_SHADER_COMPUTE])
2211 return;
2212 /* explicitly block sample shading: this needs full pipelines always */
2213 if (zshaders[MESA_SHADER_FRAGMENT] && zshaders[MESA_SHADER_FRAGMENT]->info.fs.uses_sample_shading)
2214 return;
2215 /* can't precompile fixedfunc */
2216 if (!shaders[MESA_SHADER_VERTEX] || !shaders[MESA_SHADER_FRAGMENT]) {
2217 /* handled directly from shader create */
2218 return;
2219 }
2220 unsigned hash = 0;
2221 unsigned shader_stages = 0;
2222 for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
2223 if (zshaders[i]) {
2224 hash ^= zshaders[i]->hash;
2225 shader_stages |= BITFIELD_BIT(i);
2226 }
2227 }
2228 unsigned tess_stages = BITFIELD_BIT(MESA_SHADER_TESS_CTRL) | BITFIELD_BIT(MESA_SHADER_TESS_EVAL);
2229 unsigned tess = shader_stages & tess_stages;
2230 /* can't do fixedfunc tes either */
2231 if (tess && !shaders[MESA_SHADER_TESS_EVAL])
2232 return;
2233 struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(shader_stages)];
2234 simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(shader_stages)]);
2235 /* link can be called repeatedly with the same shaders: ignore */
2236 if (_mesa_hash_table_search_pre_hashed(ht, hash, shaders)) {
2237 simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(shader_stages)]);
2238 return;
2239 }
2240 struct zink_gfx_program *prog = gfx_program_create(ctx, zshaders, 3, hash);
2241 u_foreach_bit(i, shader_stages)
2242 assert(prog->shaders[i]);
2243 _mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog);
2244 prog->base.removed = false;
2245 simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(shader_stages)]);
2246 if (zink_debug & ZINK_DEBUG_SHADERDB) {
2247 struct zink_screen *screen = zink_screen(pctx->screen);
2248 gfx_program_init(ctx, prog);
2249 if (screen->optimal_keys)
2250 generate_gfx_program_modules_optimal(ctx, screen, prog, &ctx->gfx_pipeline_state);
2251 else
2252 generate_gfx_program_modules(ctx, screen, prog, &ctx->gfx_pipeline_state);
2253 VkPipeline pipeline = zink_create_gfx_pipeline(screen, prog, prog->objs, &ctx->gfx_pipeline_state,
2254 ctx->gfx_pipeline_state.element_state->binding_map,
2255 shaders[MESA_SHADER_TESS_EVAL] ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST : VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, true);
2256 print_pipeline_stats(screen, pipeline, &ctx->dbg);
2257 VKSCR(DestroyPipeline)(screen->dev, pipeline, NULL);
2258 } else {
2259 if (zink_screen(pctx->screen)->info.have_EXT_shader_object)
2260 prog->base.uses_shobj = !zshaders[MESA_SHADER_VERTEX]->info.view_mask && !BITSET_TEST(zshaders[MESA_SHADER_FRAGMENT]->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN);
2261 if (zink_debug & ZINK_DEBUG_NOBGC)
2262 gfx_program_precompile_job(prog, pctx->screen, 0);
2263 else
2264 util_queue_add_job(&zink_screen(pctx->screen)->cache_get_thread, prog, &prog->base.cache_fence, gfx_program_precompile_job, NULL, 0);
2265 }
2266 }
2267
2268 void
zink_delete_shader_state(struct pipe_context * pctx,void * cso)2269 zink_delete_shader_state(struct pipe_context *pctx, void *cso)
2270 {
2271 zink_gfx_shader_free(zink_screen(pctx->screen), cso);
2272 }
2273
2274 static void
precompile_separate_shader(struct zink_shader * zs,struct zink_screen * screen)2275 precompile_separate_shader(struct zink_shader *zs, struct zink_screen *screen)
2276 {
2277 zs->precompile.obj = zink_shader_compile_separate(screen, zs);
2278 if (!screen->info.have_EXT_shader_object) {
2279 struct zink_shader_object objs[ZINK_GFX_SHADER_COUNT] = {0};
2280 objs[zs->info.stage].mod = zs->precompile.obj.mod;
2281 zs->precompile.gpl = zink_create_gfx_pipeline_separate(screen, objs, zs->precompile.layout, zs->info.stage);
2282 }
2283 }
2284
2285 static void
gfx_shader_init_job(void * data,void * gdata,int thread_index)2286 gfx_shader_init_job(void *data, void *gdata, int thread_index)
2287 {
2288 struct zink_screen *screen = gdata;
2289 struct zink_shader *zs = data;
2290
2291 zink_shader_init(screen, zs);
2292
2293 if (zink_debug & ZINK_DEBUG_NOPC) {
2294 ralloc_free(zs->nir);
2295 zs->nir = NULL;
2296 return;
2297 }
2298 if (zs->info.separate_shader && zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB &&
2299 (screen->info.have_EXT_shader_object ||
2300 (screen->info.have_EXT_graphics_pipeline_library && (zs->info.stage == MESA_SHADER_FRAGMENT || zs->info.stage == MESA_SHADER_VERTEX)))) {
2301 /* sample shading can't precompile */
2302 if (zs->info.stage != MESA_SHADER_FRAGMENT || !zs->info.fs.uses_sample_shading)
2303 precompile_separate_shader(zs, screen);
2304 }
2305 ralloc_free(zs->nir);
2306 zs->nir = NULL;
2307 }
2308
2309 void *
zink_create_gfx_shader_state(struct pipe_context * pctx,const struct pipe_shader_state * shader)2310 zink_create_gfx_shader_state(struct pipe_context *pctx, const struct pipe_shader_state *shader)
2311 {
2312 struct zink_screen *screen = zink_screen(pctx->screen);
2313 nir_shader *nir;
2314 if (shader->type != PIPE_SHADER_IR_NIR)
2315 nir = zink_tgsi_to_nir(pctx->screen, shader->tokens);
2316 else
2317 nir = (struct nir_shader *)shader->ir.nir;
2318
2319 if (nir->info.stage == MESA_SHADER_FRAGMENT && nir->info.fs.uses_fbfetch_output)
2320 zink_descriptor_util_init_fbfetch(zink_context(pctx));
2321 if (nir->info.uses_bindless)
2322 zink_descriptors_init_bindless(zink_context(pctx));
2323
2324 struct zink_shader *zs = zink_shader_create(zink_screen(pctx->screen), nir);
2325 if (zink_debug & ZINK_DEBUG_NOBGC)
2326 gfx_shader_init_job(zs, screen, 0);
2327 else
2328 util_queue_add_job(&screen->cache_get_thread, zs, &zs->precompile.fence, gfx_shader_init_job, NULL, 0);
2329
2330 return zs;
2331 }
2332
2333 static void
zink_delete_cached_shader_state(struct pipe_context * pctx,void * cso)2334 zink_delete_cached_shader_state(struct pipe_context *pctx, void *cso)
2335 {
2336 struct zink_screen *screen = zink_screen(pctx->screen);
2337 util_shader_reference(pctx, &screen->shaders, &cso, NULL);
2338 }
2339
2340 static void *
zink_create_cached_shader_state(struct pipe_context * pctx,const struct pipe_shader_state * shader)2341 zink_create_cached_shader_state(struct pipe_context *pctx, const struct pipe_shader_state *shader)
2342 {
2343 bool cache_hit;
2344 struct zink_screen *screen = zink_screen(pctx->screen);
2345 return util_live_shader_cache_get(pctx, &screen->shaders, shader, &cache_hit);
2346 }
2347
2348
2349 void
zink_program_init(struct zink_context * ctx)2350 zink_program_init(struct zink_context *ctx)
2351 {
2352 ctx->base.create_vs_state = zink_create_cached_shader_state;
2353 ctx->base.bind_vs_state = zink_bind_vs_state;
2354 ctx->base.delete_vs_state = zink_delete_cached_shader_state;
2355
2356 ctx->base.create_fs_state = zink_create_cached_shader_state;
2357 ctx->base.bind_fs_state = zink_bind_fs_state;
2358 ctx->base.delete_fs_state = zink_delete_cached_shader_state;
2359
2360 ctx->base.create_gs_state = zink_create_cached_shader_state;
2361 ctx->base.bind_gs_state = zink_bind_gs_state;
2362 ctx->base.delete_gs_state = zink_delete_cached_shader_state;
2363
2364 ctx->base.create_tcs_state = zink_create_cached_shader_state;
2365 ctx->base.bind_tcs_state = zink_bind_tcs_state;
2366 ctx->base.delete_tcs_state = zink_delete_cached_shader_state;
2367
2368 ctx->base.create_tes_state = zink_create_cached_shader_state;
2369 ctx->base.bind_tes_state = zink_bind_tes_state;
2370 ctx->base.delete_tes_state = zink_delete_cached_shader_state;
2371
2372 ctx->base.create_compute_state = zink_create_cs_state;
2373 ctx->base.bind_compute_state = zink_bind_cs_state;
2374 ctx->base.get_compute_state_info = zink_get_compute_state_info;
2375 ctx->base.delete_compute_state = zink_delete_cs_shader_state;
2376
2377 if (zink_screen(ctx->base.screen)->info.have_EXT_vertex_input_dynamic_state)
2378 _mesa_set_init(&ctx->gfx_inputs, ctx, hash_gfx_input_dynamic, equals_gfx_input_dynamic);
2379 else
2380 _mesa_set_init(&ctx->gfx_inputs, ctx, hash_gfx_input, equals_gfx_input);
2381 if (zink_screen(ctx->base.screen)->have_full_ds3)
2382 _mesa_set_init(&ctx->gfx_outputs, ctx, hash_gfx_output_ds3, equals_gfx_output_ds3);
2383 else
2384 _mesa_set_init(&ctx->gfx_outputs, ctx, hash_gfx_output, equals_gfx_output);
2385 /* validate struct packing */
2386 STATIC_ASSERT(offsetof(struct zink_gfx_output_key, sample_mask) == sizeof(uint32_t));
2387 STATIC_ASSERT(offsetof(struct zink_gfx_pipeline_state, vertex_buffers_enabled_mask) - offsetof(struct zink_gfx_pipeline_state, input) ==
2388 offsetof(struct zink_gfx_input_key, vertex_buffers_enabled_mask) - offsetof(struct zink_gfx_input_key, input));
2389 STATIC_ASSERT(offsetof(struct zink_gfx_pipeline_state, vertex_strides) - offsetof(struct zink_gfx_pipeline_state, input) ==
2390 offsetof(struct zink_gfx_input_key, vertex_strides) - offsetof(struct zink_gfx_input_key, input));
2391 STATIC_ASSERT(offsetof(struct zink_gfx_pipeline_state, element_state) - offsetof(struct zink_gfx_pipeline_state, input) ==
2392 offsetof(struct zink_gfx_input_key, element_state) - offsetof(struct zink_gfx_input_key, input));
2393
2394 STATIC_ASSERT(sizeof(union zink_shader_key_optimal) == sizeof(uint32_t));
2395
2396 /* no precompile at all */
2397 if (zink_debug & ZINK_DEBUG_NOPC)
2398 return;
2399
2400 struct zink_screen *screen = zink_screen(ctx->base.screen);
2401 if (screen->info.have_EXT_graphics_pipeline_library || screen->info.have_EXT_shader_object || zink_debug & ZINK_DEBUG_SHADERDB)
2402 ctx->base.link_shader = zink_link_gfx_shader;
2403 }
2404
2405 bool
zink_set_rasterizer_discard(struct zink_context * ctx,bool disable)2406 zink_set_rasterizer_discard(struct zink_context *ctx, bool disable)
2407 {
2408 bool value = disable ? false : (ctx->rast_state ? ctx->rast_state->base.rasterizer_discard : false);
2409 bool changed = ctx->gfx_pipeline_state.dyn_state2.rasterizer_discard != value;
2410 ctx->gfx_pipeline_state.dyn_state2.rasterizer_discard = value;
2411 if (!changed)
2412 return false;
2413 if (!zink_screen(ctx->base.screen)->info.have_EXT_extended_dynamic_state2)
2414 ctx->gfx_pipeline_state.dirty |= true;
2415 ctx->rasterizer_discard_changed = true;
2416 return true;
2417 }
2418
2419 void
zink_driver_thread_add_job(struct pipe_screen * pscreen,void * data,struct util_queue_fence * fence,pipe_driver_thread_func execute,pipe_driver_thread_func cleanup,const size_t job_size)2420 zink_driver_thread_add_job(struct pipe_screen *pscreen, void *data,
2421 struct util_queue_fence *fence,
2422 pipe_driver_thread_func execute,
2423 pipe_driver_thread_func cleanup,
2424 const size_t job_size)
2425 {
2426 struct zink_screen *screen = zink_screen(pscreen);
2427 util_queue_add_job(&screen->cache_get_thread, data, fence, execute, cleanup, job_size);
2428 }
2429
2430 static bool
has_edge_flags(struct zink_context * ctx)2431 has_edge_flags(struct zink_context *ctx)
2432 {
2433 switch(ctx->gfx_pipeline_state.gfx_prim_mode) {
2434 case MESA_PRIM_POINTS:
2435 case MESA_PRIM_LINE_STRIP:
2436 case MESA_PRIM_LINE_STRIP_ADJACENCY:
2437 case MESA_PRIM_LINES:
2438 case MESA_PRIM_LINE_LOOP:
2439 case MESA_PRIM_LINES_ADJACENCY:
2440 case MESA_PRIM_TRIANGLE_STRIP:
2441 case MESA_PRIM_TRIANGLE_FAN:
2442 case MESA_PRIM_TRIANGLE_STRIP_ADJACENCY:
2443 case MESA_PRIM_QUAD_STRIP:
2444 case MESA_PRIM_PATCHES:
2445 return false;
2446 case MESA_PRIM_TRIANGLES:
2447 case MESA_PRIM_TRIANGLES_ADJACENCY:
2448 case MESA_PRIM_QUADS:
2449 case MESA_PRIM_POLYGON:
2450 case MESA_PRIM_COUNT:
2451 default:
2452 break;
2453 }
2454 return (ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_LINES ||
2455 ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_POINTS) &&
2456 ctx->gfx_stages[MESA_SHADER_VERTEX]->has_edgeflags;
2457 }
2458
2459 static enum zink_rast_prim
zink_rast_prim_for_pipe(enum mesa_prim prim)2460 zink_rast_prim_for_pipe(enum mesa_prim prim)
2461 {
2462 switch (prim) {
2463 case MESA_PRIM_POINTS:
2464 return ZINK_PRIM_POINTS;
2465 case MESA_PRIM_LINES:
2466 return ZINK_PRIM_LINES;
2467 case MESA_PRIM_TRIANGLES:
2468 default:
2469 return ZINK_PRIM_TRIANGLES;
2470 }
2471 }
2472
2473 static enum mesa_prim
zink_tess_prim_type(struct zink_shader * tess)2474 zink_tess_prim_type(struct zink_shader *tess)
2475 {
2476 if (tess->info.tess.point_mode)
2477 return MESA_PRIM_POINTS;
2478 else {
2479 switch (tess->info.tess._primitive_mode) {
2480 case TESS_PRIMITIVE_ISOLINES:
2481 return MESA_PRIM_LINES;
2482 case TESS_PRIMITIVE_TRIANGLES:
2483 case TESS_PRIMITIVE_QUADS:
2484 return MESA_PRIM_TRIANGLES;
2485 default:
2486 return MESA_PRIM_COUNT;
2487 }
2488 }
2489 }
2490
2491 static inline void
zink_add_inline_uniform(nir_shader * shader,int offset)2492 zink_add_inline_uniform(nir_shader *shader, int offset)
2493 {
2494 shader->info.inlinable_uniform_dw_offsets[shader->info.num_inlinable_uniforms] = offset;
2495 ++shader->info.num_inlinable_uniforms;
2496 }
2497
2498 static unsigned
encode_lower_pv_mode(enum mesa_prim prim_type)2499 encode_lower_pv_mode(enum mesa_prim prim_type)
2500 {
2501 switch (prim_type) {
2502 case MESA_PRIM_TRIANGLE_STRIP:
2503 case MESA_PRIM_QUAD_STRIP:
2504 return ZINK_PVE_PRIMITIVE_TRISTRIP;
2505 case MESA_PRIM_TRIANGLE_FAN:
2506 return ZINK_PVE_PRIMITIVE_FAN;
2507 default:
2508 return ZINK_PVE_PRIMITIVE_SIMPLE;
2509 }
2510 }
2511
2512 void
zink_set_primitive_emulation_keys(struct zink_context * ctx)2513 zink_set_primitive_emulation_keys(struct zink_context *ctx)
2514 {
2515 struct zink_screen *screen = zink_screen(ctx->base.screen);
2516 if (!screen->info.feats.features.geometryShader)
2517 return;
2518
2519 bool lower_line_stipple = false, lower_line_smooth = false;
2520 unsigned lower_pv_mode = 0;
2521 if (!screen->optimal_keys) {
2522 lower_line_stipple = ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_LINES &&
2523 screen->driver_workarounds.no_linestipple &&
2524 ctx->rast_state->base.line_stipple_enable &&
2525 !ctx->num_so_targets;
2526
2527 bool lower_point_smooth = ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_POINTS &&
2528 screen->driconf.emulate_point_smooth &&
2529 ctx->rast_state->base.point_smooth;
2530 if (zink_get_fs_key(ctx)->lower_line_stipple != lower_line_stipple) {
2531 assert(zink_get_gs_key(ctx)->lower_line_stipple ==
2532 zink_get_fs_key(ctx)->lower_line_stipple);
2533 zink_set_fs_key(ctx)->lower_line_stipple = lower_line_stipple;
2534 zink_set_gs_key(ctx)->lower_line_stipple = lower_line_stipple;
2535 }
2536
2537 lower_line_smooth = ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_LINES &&
2538 screen->driver_workarounds.no_linesmooth &&
2539 ctx->rast_state->base.line_smooth &&
2540 !ctx->num_so_targets;
2541
2542 if (zink_get_fs_key(ctx)->lower_line_smooth != lower_line_smooth) {
2543 assert(zink_get_gs_key(ctx)->lower_line_smooth ==
2544 zink_get_fs_key(ctx)->lower_line_smooth);
2545 zink_set_fs_key(ctx)->lower_line_smooth = lower_line_smooth;
2546 zink_set_gs_key(ctx)->lower_line_smooth = lower_line_smooth;
2547 }
2548
2549 if (zink_get_fs_key(ctx)->lower_point_smooth != lower_point_smooth) {
2550 zink_set_fs_key(ctx)->lower_point_smooth = lower_point_smooth;
2551 }
2552
2553 lower_pv_mode = ctx->gfx_pipeline_state.dyn_state3.pv_last &&
2554 !screen->info.have_EXT_provoking_vertex;
2555 if (lower_pv_mode)
2556 lower_pv_mode = encode_lower_pv_mode(ctx->gfx_pipeline_state.gfx_prim_mode);
2557
2558 if (zink_get_gs_key(ctx)->lower_pv_mode != lower_pv_mode)
2559 zink_set_gs_key(ctx)->lower_pv_mode = lower_pv_mode;
2560 }
2561
2562 bool lower_edge_flags = has_edge_flags(ctx);
2563
2564 bool lower_quad_prim = ctx->gfx_pipeline_state.gfx_prim_mode == MESA_PRIM_QUADS;
2565
2566 bool lower_filled_quad = lower_quad_prim &&
2567 ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_TRIANGLES;
2568
2569 if (lower_line_stipple || lower_line_smooth ||
2570 lower_edge_flags || lower_quad_prim ||
2571 lower_pv_mode || zink_get_gs_key(ctx)->lower_gl_point) {
2572 enum pipe_shader_type prev_vertex_stage =
2573 ctx->gfx_stages[MESA_SHADER_TESS_EVAL] ?
2574 MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
2575 enum zink_rast_prim zink_prim_type =
2576 zink_rast_prim_for_pipe(ctx->gfx_pipeline_state.rast_prim);
2577
2578 //when using transform feedback primitives must be tessellated
2579 lower_filled_quad |= lower_quad_prim && ctx->gfx_stages[prev_vertex_stage]->info.has_transform_feedback_varyings;
2580
2581 if (!ctx->gfx_stages[MESA_SHADER_GEOMETRY] || (ctx->gfx_stages[MESA_SHADER_GEOMETRY]->non_fs.is_generated &&
2582 ctx->gfx_stages[MESA_SHADER_GEOMETRY]->info.gs.input_primitive != ctx->gfx_pipeline_state.gfx_prim_mode)) {
2583
2584 if (!ctx->gfx_stages[prev_vertex_stage]->non_fs.generated_gs[ctx->gfx_pipeline_state.gfx_prim_mode][zink_prim_type]) {
2585 util_queue_fence_wait(&ctx->gfx_stages[prev_vertex_stage]->precompile.fence);
2586 nir_shader *prev_stage = zink_shader_deserialize(screen, ctx->gfx_stages[prev_vertex_stage]);
2587 nir_shader *nir;
2588 if (lower_filled_quad) {
2589 nir = zink_create_quads_emulation_gs(
2590 &screen->nir_options,
2591 prev_stage);
2592 } else {
2593 enum mesa_prim prim = ctx->gfx_pipeline_state.gfx_prim_mode;
2594 if (prev_vertex_stage == MESA_SHADER_TESS_EVAL)
2595 prim = zink_tess_prim_type(ctx->gfx_stages[MESA_SHADER_TESS_EVAL]);
2596 nir = nir_create_passthrough_gs(
2597 &screen->nir_options,
2598 prev_stage,
2599 prim,
2600 ctx->gfx_pipeline_state.rast_prim,
2601 lower_edge_flags,
2602 lower_line_stipple || lower_quad_prim,
2603 true);
2604 }
2605 zink_lower_system_values_to_inlined_uniforms(nir);
2606
2607 zink_add_inline_uniform(nir, ZINK_INLINE_VAL_FLAT_MASK);
2608 zink_add_inline_uniform(nir, ZINK_INLINE_VAL_FLAT_MASK+1);
2609 zink_add_inline_uniform(nir, ZINK_INLINE_VAL_PV_LAST_VERT);
2610 ralloc_free(prev_stage);
2611 struct zink_shader *shader = zink_shader_create(screen, nir);
2612 zink_shader_init(screen, shader);
2613 shader->needs_inlining = true;
2614 ctx->gfx_stages[prev_vertex_stage]->non_fs.generated_gs[ctx->gfx_pipeline_state.gfx_prim_mode][zink_prim_type] = shader;
2615 shader->non_fs.is_generated = true;
2616 shader->non_fs.parent = ctx->gfx_stages[prev_vertex_stage];
2617 shader->can_inline = true;
2618 memcpy(shader->sinfo.stride, ctx->gfx_stages[prev_vertex_stage]->sinfo.stride, sizeof(shader->sinfo.stride));
2619 }
2620
2621 ctx->base.bind_gs_state(&ctx->base,
2622 ctx->gfx_stages[prev_vertex_stage]->non_fs.generated_gs[ctx->gfx_pipeline_state.gfx_prim_mode][zink_prim_type]);
2623 ctx->is_generated_gs_bound = true;
2624 }
2625
2626 ctx->base.set_inlinable_constants(&ctx->base, MESA_SHADER_GEOMETRY, 3,
2627 (uint32_t []){ctx->gfx_stages[MESA_SHADER_FRAGMENT]->flat_flags,
2628 ctx->gfx_stages[MESA_SHADER_FRAGMENT]->flat_flags >> 32,
2629 ctx->gfx_pipeline_state.dyn_state3.pv_last});
2630 } else if (ctx->gfx_stages[MESA_SHADER_GEOMETRY] &&
2631 ctx->gfx_stages[MESA_SHADER_GEOMETRY]->non_fs.is_generated)
2632 ctx->base.bind_gs_state(&ctx->base, NULL);
2633 }
2634