1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23 /**
24 * @file iris_program.c
25 *
26 * This file contains the driver interface for compiling shaders.
27 *
28 * See iris_program_cache.c for the in-memory program cache where the
29 * compiled shaders are stored.
30 */
31
32 #include <stdio.h>
33 #include <errno.h>
34 #include "pipe/p_defines.h"
35 #include "pipe/p_state.h"
36 #include "pipe/p_context.h"
37 #include "pipe/p_screen.h"
38 #include "util/u_atomic.h"
39 #include "util/u_upload_mgr.h"
40 #include "util/debug.h"
41 #include "compiler/nir/nir.h"
42 #include "compiler/nir/nir_builder.h"
43 #include "compiler/nir/nir_serialize.h"
44 #include "intel/compiler/brw_compiler.h"
45 #include "intel/compiler/brw_nir.h"
46 #include "iris_context.h"
47 #include "nir/tgsi_to_nir.h"
48
49 #define KEY_ID(prefix) .prefix.program_string_id = ish->program_id
50 #define BRW_KEY_INIT(gen, prog_id) \
51 .base.program_string_id = prog_id, \
52 .base.subgroup_size_type = BRW_SUBGROUP_SIZE_UNIFORM, \
53 .base.tex.swizzles[0 ... MAX_SAMPLERS - 1] = 0x688, \
54 .base.tex.compressed_multisample_layout_mask = ~0, \
55 .base.tex.msaa_16 = (gen >= 9 ? ~0 : 0)
56
57 static unsigned
get_new_program_id(struct iris_screen * screen)58 get_new_program_id(struct iris_screen *screen)
59 {
60 return p_atomic_inc_return(&screen->program_id);
61 }
62
63 static struct brw_vs_prog_key
iris_to_brw_vs_key(const struct gen_device_info * devinfo,const struct iris_vs_prog_key * key)64 iris_to_brw_vs_key(const struct gen_device_info *devinfo,
65 const struct iris_vs_prog_key *key)
66 {
67 return (struct brw_vs_prog_key) {
68 BRW_KEY_INIT(devinfo->gen, key->vue.base.program_string_id),
69
70 /* Don't tell the backend about our clip plane constants, we've
71 * already lowered them in NIR and don't want it doing it again.
72 */
73 .nr_userclip_plane_consts = 0,
74 };
75 }
76
77 static struct brw_tcs_prog_key
iris_to_brw_tcs_key(const struct gen_device_info * devinfo,const struct iris_tcs_prog_key * key)78 iris_to_brw_tcs_key(const struct gen_device_info *devinfo,
79 const struct iris_tcs_prog_key *key)
80 {
81 return (struct brw_tcs_prog_key) {
82 BRW_KEY_INIT(devinfo->gen, key->vue.base.program_string_id),
83 .tes_primitive_mode = key->tes_primitive_mode,
84 .input_vertices = key->input_vertices,
85 .patch_outputs_written = key->patch_outputs_written,
86 .outputs_written = key->outputs_written,
87 .quads_workaround = key->quads_workaround,
88 };
89 }
90
91 static struct brw_tes_prog_key
iris_to_brw_tes_key(const struct gen_device_info * devinfo,const struct iris_tes_prog_key * key)92 iris_to_brw_tes_key(const struct gen_device_info *devinfo,
93 const struct iris_tes_prog_key *key)
94 {
95 return (struct brw_tes_prog_key) {
96 BRW_KEY_INIT(devinfo->gen, key->vue.base.program_string_id),
97 .patch_inputs_read = key->patch_inputs_read,
98 .inputs_read = key->inputs_read,
99 };
100 }
101
102 static struct brw_gs_prog_key
iris_to_brw_gs_key(const struct gen_device_info * devinfo,const struct iris_gs_prog_key * key)103 iris_to_brw_gs_key(const struct gen_device_info *devinfo,
104 const struct iris_gs_prog_key *key)
105 {
106 return (struct brw_gs_prog_key) {
107 BRW_KEY_INIT(devinfo->gen, key->vue.base.program_string_id),
108 };
109 }
110
111 static struct brw_wm_prog_key
iris_to_brw_fs_key(const struct gen_device_info * devinfo,const struct iris_fs_prog_key * key)112 iris_to_brw_fs_key(const struct gen_device_info *devinfo,
113 const struct iris_fs_prog_key *key)
114 {
115 return (struct brw_wm_prog_key) {
116 BRW_KEY_INIT(devinfo->gen, key->base.program_string_id),
117 .nr_color_regions = key->nr_color_regions,
118 .flat_shade = key->flat_shade,
119 .alpha_test_replicate_alpha = key->alpha_test_replicate_alpha,
120 .alpha_to_coverage = key->alpha_to_coverage,
121 .clamp_fragment_color = key->clamp_fragment_color,
122 .persample_interp = key->persample_interp,
123 .multisample_fbo = key->multisample_fbo,
124 .force_dual_color_blend = key->force_dual_color_blend,
125 .coherent_fb_fetch = key->coherent_fb_fetch,
126 .color_outputs_valid = key->color_outputs_valid,
127 .input_slots_valid = key->input_slots_valid,
128 .ignore_sample_mask_out = !key->multisample_fbo,
129 };
130 }
131
132 static struct brw_cs_prog_key
iris_to_brw_cs_key(const struct gen_device_info * devinfo,const struct iris_cs_prog_key * key)133 iris_to_brw_cs_key(const struct gen_device_info *devinfo,
134 const struct iris_cs_prog_key *key)
135 {
136 return (struct brw_cs_prog_key) {
137 BRW_KEY_INIT(devinfo->gen, key->base.program_string_id),
138 };
139 }
140
141 static void *
upload_state(struct u_upload_mgr * uploader,struct iris_state_ref * ref,unsigned size,unsigned alignment)142 upload_state(struct u_upload_mgr *uploader,
143 struct iris_state_ref *ref,
144 unsigned size,
145 unsigned alignment)
146 {
147 void *p = NULL;
148 u_upload_alloc(uploader, 0, size, alignment, &ref->offset, &ref->res, &p);
149 return p;
150 }
151
152 void
iris_upload_ubo_ssbo_surf_state(struct iris_context * ice,struct pipe_shader_buffer * buf,struct iris_state_ref * surf_state,isl_surf_usage_flags_t usage)153 iris_upload_ubo_ssbo_surf_state(struct iris_context *ice,
154 struct pipe_shader_buffer *buf,
155 struct iris_state_ref *surf_state,
156 isl_surf_usage_flags_t usage)
157 {
158 struct pipe_context *ctx = &ice->ctx;
159 struct iris_screen *screen = (struct iris_screen *) ctx->screen;
160 bool ssbo = usage & ISL_SURF_USAGE_STORAGE_BIT;
161
162 void *map =
163 upload_state(ice->state.surface_uploader, surf_state,
164 screen->isl_dev.ss.size, 64);
165 if (!unlikely(map)) {
166 surf_state->res = NULL;
167 return;
168 }
169
170 struct iris_resource *res = (void *) buf->buffer;
171 struct iris_bo *surf_bo = iris_resource_bo(surf_state->res);
172 surf_state->offset += iris_bo_offset_from_base_address(surf_bo);
173
174 const bool dataport = ssbo || !screen->compiler->indirect_ubos_use_sampler;
175
176 isl_buffer_fill_state(&screen->isl_dev, map,
177 .address = res->bo->gtt_offset + res->offset +
178 buf->buffer_offset,
179 .size_B = buf->buffer_size - res->offset,
180 .format = dataport ? ISL_FORMAT_RAW
181 : ISL_FORMAT_R32G32B32A32_FLOAT,
182 .swizzle = ISL_SWIZZLE_IDENTITY,
183 .stride_B = 1,
184 .mocs = iris_mocs(res->bo, &screen->isl_dev, usage));
185 }
186
187 static nir_ssa_def *
get_aoa_deref_offset(nir_builder * b,nir_deref_instr * deref,unsigned elem_size)188 get_aoa_deref_offset(nir_builder *b,
189 nir_deref_instr *deref,
190 unsigned elem_size)
191 {
192 unsigned array_size = elem_size;
193 nir_ssa_def *offset = nir_imm_int(b, 0);
194
195 while (deref->deref_type != nir_deref_type_var) {
196 assert(deref->deref_type == nir_deref_type_array);
197
198 /* This level's element size is the previous level's array size */
199 nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
200 assert(deref->arr.index.ssa);
201 offset = nir_iadd(b, offset,
202 nir_imul(b, index, nir_imm_int(b, array_size)));
203
204 deref = nir_deref_instr_parent(deref);
205 assert(glsl_type_is_array(deref->type));
206 array_size *= glsl_get_length(deref->type);
207 }
208
209 /* Accessing an invalid surface index with the dataport can result in a
210 * hang. According to the spec "if the index used to select an individual
211 * element is negative or greater than or equal to the size of the array,
212 * the results of the operation are undefined but may not lead to
213 * termination" -- which is one of the possible outcomes of the hang.
214 * Clamp the index to prevent access outside of the array bounds.
215 */
216 return nir_umin(b, offset, nir_imm_int(b, array_size - elem_size));
217 }
218
219 static void
iris_lower_storage_image_derefs(nir_shader * nir)220 iris_lower_storage_image_derefs(nir_shader *nir)
221 {
222 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
223
224 nir_builder b;
225 nir_builder_init(&b, impl);
226
227 nir_foreach_block(block, impl) {
228 nir_foreach_instr_safe(instr, block) {
229 if (instr->type != nir_instr_type_intrinsic)
230 continue;
231
232 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
233 switch (intrin->intrinsic) {
234 case nir_intrinsic_image_deref_load:
235 case nir_intrinsic_image_deref_store:
236 case nir_intrinsic_image_deref_atomic_add:
237 case nir_intrinsic_image_deref_atomic_imin:
238 case nir_intrinsic_image_deref_atomic_umin:
239 case nir_intrinsic_image_deref_atomic_imax:
240 case nir_intrinsic_image_deref_atomic_umax:
241 case nir_intrinsic_image_deref_atomic_and:
242 case nir_intrinsic_image_deref_atomic_or:
243 case nir_intrinsic_image_deref_atomic_xor:
244 case nir_intrinsic_image_deref_atomic_exchange:
245 case nir_intrinsic_image_deref_atomic_comp_swap:
246 case nir_intrinsic_image_deref_size:
247 case nir_intrinsic_image_deref_samples:
248 case nir_intrinsic_image_deref_load_raw_intel:
249 case nir_intrinsic_image_deref_store_raw_intel: {
250 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
251 nir_variable *var = nir_deref_instr_get_variable(deref);
252
253 b.cursor = nir_before_instr(&intrin->instr);
254 nir_ssa_def *index =
255 nir_iadd(&b, nir_imm_int(&b, var->data.driver_location),
256 get_aoa_deref_offset(&b, deref, 1));
257 nir_rewrite_image_intrinsic(intrin, index, false);
258 break;
259 }
260
261 default:
262 break;
263 }
264 }
265 }
266 }
267
268 /**
269 * Undo nir_lower_passthrough_edgeflags but keep the inputs_read flag.
270 */
271 static bool
iris_fix_edge_flags(nir_shader * nir)272 iris_fix_edge_flags(nir_shader *nir)
273 {
274 if (nir->info.stage != MESA_SHADER_VERTEX) {
275 nir_shader_preserve_all_metadata(nir);
276 return false;
277 }
278
279 nir_variable *var = nir_find_variable_with_location(nir, nir_var_shader_out,
280 VARYING_SLOT_EDGE);
281 if (!var) {
282 nir_shader_preserve_all_metadata(nir);
283 return false;
284 }
285
286 var->data.mode = nir_var_shader_temp;
287 nir->info.outputs_written &= ~VARYING_BIT_EDGE;
288 nir->info.inputs_read &= ~VERT_BIT_EDGEFLAG;
289 nir_fixup_deref_modes(nir);
290
291 nir_foreach_function(f, nir) {
292 if (f->impl) {
293 nir_metadata_preserve(f->impl, nir_metadata_block_index |
294 nir_metadata_dominance |
295 nir_metadata_live_ssa_defs |
296 nir_metadata_loop_analysis);
297 } else {
298 nir_metadata_preserve(f->impl, nir_metadata_all);
299 }
300 }
301
302 return true;
303 }
304
305 /**
306 * Fix an uncompiled shader's stream output info.
307 *
308 * Core Gallium stores output->register_index as a "slot" number, where
309 * slots are assigned consecutively to all outputs in info->outputs_written.
310 * This naive packing of outputs doesn't work for us - we too have slots,
311 * but the layout is defined by the VUE map, which we won't have until we
312 * compile a specific shader variant. So, we remap these and simply store
313 * VARYING_SLOT_* in our copy's output->register_index fields.
314 *
315 * We also fix up VARYING_SLOT_{LAYER,VIEWPORT,PSIZ} to select the Y/Z/W
316 * components of our VUE header. See brw_vue_map.c for the layout.
317 */
318 static void
update_so_info(struct pipe_stream_output_info * so_info,uint64_t outputs_written)319 update_so_info(struct pipe_stream_output_info *so_info,
320 uint64_t outputs_written)
321 {
322 uint8_t reverse_map[64] = {};
323 unsigned slot = 0;
324 while (outputs_written) {
325 reverse_map[slot++] = u_bit_scan64(&outputs_written);
326 }
327
328 for (unsigned i = 0; i < so_info->num_outputs; i++) {
329 struct pipe_stream_output *output = &so_info->output[i];
330
331 /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
332 output->register_index = reverse_map[output->register_index];
333
334 /* The VUE header contains three scalar fields packed together:
335 * - gl_PointSize is stored in VARYING_SLOT_PSIZ.w
336 * - gl_Layer is stored in VARYING_SLOT_PSIZ.y
337 * - gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z
338 */
339 switch (output->register_index) {
340 case VARYING_SLOT_LAYER:
341 assert(output->num_components == 1);
342 output->register_index = VARYING_SLOT_PSIZ;
343 output->start_component = 1;
344 break;
345 case VARYING_SLOT_VIEWPORT:
346 assert(output->num_components == 1);
347 output->register_index = VARYING_SLOT_PSIZ;
348 output->start_component = 2;
349 break;
350 case VARYING_SLOT_PSIZ:
351 assert(output->num_components == 1);
352 output->start_component = 3;
353 break;
354 }
355
356 //info->outputs_written |= 1ull << output->register_index;
357 }
358 }
359
360 static void
setup_vec4_image_sysval(uint32_t * sysvals,uint32_t idx,unsigned offset,unsigned n)361 setup_vec4_image_sysval(uint32_t *sysvals, uint32_t idx,
362 unsigned offset, unsigned n)
363 {
364 assert(offset % sizeof(uint32_t) == 0);
365
366 for (unsigned i = 0; i < n; ++i)
367 sysvals[i] = BRW_PARAM_IMAGE(idx, offset / sizeof(uint32_t) + i);
368
369 for (unsigned i = n; i < 4; ++i)
370 sysvals[i] = BRW_PARAM_BUILTIN_ZERO;
371 }
372
373 /**
374 * Associate NIR uniform variables with the prog_data->param[] mechanism
375 * used by the backend. Also, decide which UBOs we'd like to push in an
376 * ideal situation (though the backend can reduce this).
377 */
378 static void
iris_setup_uniforms(const struct brw_compiler * compiler,void * mem_ctx,nir_shader * nir,struct brw_stage_prog_data * prog_data,unsigned kernel_input_size,enum brw_param_builtin ** out_system_values,unsigned * out_num_system_values,unsigned * out_num_cbufs)379 iris_setup_uniforms(const struct brw_compiler *compiler,
380 void *mem_ctx,
381 nir_shader *nir,
382 struct brw_stage_prog_data *prog_data,
383 unsigned kernel_input_size,
384 enum brw_param_builtin **out_system_values,
385 unsigned *out_num_system_values,
386 unsigned *out_num_cbufs)
387 {
388 UNUSED const struct gen_device_info *devinfo = compiler->devinfo;
389
390 unsigned system_values_start = ALIGN(kernel_input_size, sizeof(uint32_t));
391
392 const unsigned IRIS_MAX_SYSTEM_VALUES =
393 PIPE_MAX_SHADER_IMAGES * BRW_IMAGE_PARAM_SIZE;
394 enum brw_param_builtin *system_values =
395 rzalloc_array(mem_ctx, enum brw_param_builtin, IRIS_MAX_SYSTEM_VALUES);
396 unsigned num_system_values = 0;
397
398 unsigned patch_vert_idx = -1;
399 unsigned ucp_idx[IRIS_MAX_CLIP_PLANES];
400 unsigned img_idx[PIPE_MAX_SHADER_IMAGES];
401 unsigned variable_group_size_idx = -1;
402 unsigned work_dim_idx = -1;
403 memset(ucp_idx, -1, sizeof(ucp_idx));
404 memset(img_idx, -1, sizeof(img_idx));
405
406 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
407
408 nir_builder b;
409 nir_builder_init(&b, impl);
410
411 b.cursor = nir_before_block(nir_start_block(impl));
412 nir_ssa_def *temp_ubo_name = nir_ssa_undef(&b, 1, 32);
413
414 /* Turn system value intrinsics into uniforms */
415 nir_foreach_block(block, impl) {
416 nir_foreach_instr_safe(instr, block) {
417 if (instr->type != nir_instr_type_intrinsic)
418 continue;
419
420 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
421 nir_ssa_def *offset;
422
423 switch (intrin->intrinsic) {
424 case nir_intrinsic_load_constant: {
425 unsigned load_size = intrin->dest.ssa.num_components *
426 intrin->dest.ssa.bit_size / 8;
427 unsigned load_align = intrin->dest.ssa.bit_size / 8;
428
429 /* This one is special because it reads from the shader constant
430 * data and not cbuf0 which gallium uploads for us.
431 */
432 b.cursor = nir_instr_remove(&intrin->instr);
433
434 nir_ssa_def *offset =
435 nir_iadd_imm(&b, nir_ssa_for_src(&b, intrin->src[0], 1),
436 nir_intrinsic_base(intrin));
437
438 assert(load_size < b.shader->constant_data_size);
439 unsigned max_offset = b.shader->constant_data_size - load_size;
440 offset = nir_umin(&b, offset, nir_imm_int(&b, max_offset));
441
442 nir_ssa_def *const_data_base_addr = nir_pack_64_2x32_split(&b,
443 nir_load_reloc_const_intel(&b, IRIS_SHADER_RELOC_CONST_DATA_ADDR_LOW),
444 nir_load_reloc_const_intel(&b, IRIS_SHADER_RELOC_CONST_DATA_ADDR_HIGH));
445
446 nir_ssa_def *data =
447 nir_load_global(&b, nir_iadd(&b, const_data_base_addr,
448 nir_u2u64(&b, offset)),
449 load_align,
450 intrin->dest.ssa.num_components,
451 intrin->dest.ssa.bit_size);
452
453 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
454 nir_src_for_ssa(data));
455 continue;
456 }
457 case nir_intrinsic_load_user_clip_plane: {
458 unsigned ucp = nir_intrinsic_ucp_id(intrin);
459
460 if (ucp_idx[ucp] == -1) {
461 ucp_idx[ucp] = num_system_values;
462 num_system_values += 4;
463 }
464
465 for (int i = 0; i < 4; i++) {
466 system_values[ucp_idx[ucp] + i] =
467 BRW_PARAM_BUILTIN_CLIP_PLANE(ucp, i);
468 }
469
470 b.cursor = nir_before_instr(instr);
471 offset = nir_imm_int(&b, system_values_start +
472 ucp_idx[ucp] * sizeof(uint32_t));
473 break;
474 }
475 case nir_intrinsic_load_patch_vertices_in:
476 if (patch_vert_idx == -1)
477 patch_vert_idx = num_system_values++;
478
479 system_values[patch_vert_idx] =
480 BRW_PARAM_BUILTIN_PATCH_VERTICES_IN;
481
482 b.cursor = nir_before_instr(instr);
483 offset = nir_imm_int(&b, system_values_start +
484 patch_vert_idx * sizeof(uint32_t));
485 break;
486 case nir_intrinsic_image_deref_load_param_intel: {
487 assert(devinfo->gen < 9);
488 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
489 nir_variable *var = nir_deref_instr_get_variable(deref);
490
491 if (img_idx[var->data.binding] == -1) {
492 /* GL only allows arrays of arrays of images. */
493 assert(glsl_type_is_image(glsl_without_array(var->type)));
494 unsigned num_images = MAX2(1, glsl_get_aoa_size(var->type));
495
496 for (int i = 0; i < num_images; i++) {
497 const unsigned img = var->data.binding + i;
498
499 img_idx[img] = num_system_values;
500 num_system_values += BRW_IMAGE_PARAM_SIZE;
501
502 uint32_t *img_sv = &system_values[img_idx[img]];
503
504 setup_vec4_image_sysval(
505 img_sv + BRW_IMAGE_PARAM_OFFSET_OFFSET, img,
506 offsetof(struct brw_image_param, offset), 2);
507 setup_vec4_image_sysval(
508 img_sv + BRW_IMAGE_PARAM_SIZE_OFFSET, img,
509 offsetof(struct brw_image_param, size), 3);
510 setup_vec4_image_sysval(
511 img_sv + BRW_IMAGE_PARAM_STRIDE_OFFSET, img,
512 offsetof(struct brw_image_param, stride), 4);
513 setup_vec4_image_sysval(
514 img_sv + BRW_IMAGE_PARAM_TILING_OFFSET, img,
515 offsetof(struct brw_image_param, tiling), 3);
516 setup_vec4_image_sysval(
517 img_sv + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, img,
518 offsetof(struct brw_image_param, swizzling), 2);
519 }
520 }
521
522 b.cursor = nir_before_instr(instr);
523 offset = nir_iadd(&b,
524 get_aoa_deref_offset(&b, deref, BRW_IMAGE_PARAM_SIZE * 4),
525 nir_imm_int(&b, system_values_start +
526 img_idx[var->data.binding] * 4 +
527 nir_intrinsic_base(intrin) * 16));
528 break;
529 }
530 case nir_intrinsic_load_local_group_size: {
531 assert(nir->info.cs.local_size_variable);
532 if (variable_group_size_idx == -1) {
533 variable_group_size_idx = num_system_values;
534 num_system_values += 3;
535 for (int i = 0; i < 3; i++) {
536 system_values[variable_group_size_idx + i] =
537 BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X + i;
538 }
539 }
540
541 b.cursor = nir_before_instr(instr);
542 offset = nir_imm_int(&b, system_values_start +
543 variable_group_size_idx * sizeof(uint32_t));
544 break;
545 }
546 case nir_intrinsic_load_work_dim: {
547 if (work_dim_idx == -1) {
548 work_dim_idx = num_system_values++;
549 system_values[work_dim_idx] = BRW_PARAM_BUILTIN_WORK_DIM;
550 }
551 b.cursor = nir_before_instr(instr);
552 offset = nir_imm_int(&b, system_values_start +
553 work_dim_idx * sizeof(uint32_t));
554 break;
555 }
556 case nir_intrinsic_load_kernel_input: {
557 assert(nir_intrinsic_base(intrin) +
558 nir_intrinsic_range(intrin) <= kernel_input_size);
559 b.cursor = nir_before_instr(instr);
560 offset = nir_iadd_imm(&b, intrin->src[0].ssa,
561 nir_intrinsic_base(intrin));
562 break;
563 }
564 default:
565 continue;
566 }
567
568 nir_intrinsic_instr *load =
569 nir_intrinsic_instr_create(nir, nir_intrinsic_load_ubo);
570 load->num_components = intrin->dest.ssa.num_components;
571 load->src[0] = nir_src_for_ssa(temp_ubo_name);
572 load->src[1] = nir_src_for_ssa(offset);
573 nir_intrinsic_set_align(load, 4, 0);
574 nir_intrinsic_set_range_base(load, 0);
575 nir_intrinsic_set_range(load, ~0);
576 nir_ssa_dest_init(&load->instr, &load->dest,
577 intrin->dest.ssa.num_components,
578 intrin->dest.ssa.bit_size, NULL);
579 nir_builder_instr_insert(&b, &load->instr);
580 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
581 nir_src_for_ssa(&load->dest.ssa));
582 nir_instr_remove(instr);
583 }
584 }
585
586 nir_validate_shader(nir, "before remapping");
587
588 /* Uniforms are stored in constant buffer 0, the
589 * user-facing UBOs are indexed by one. So if any constant buffer is
590 * needed, the constant buffer 0 will be needed, so account for it.
591 */
592 unsigned num_cbufs = nir->info.num_ubos;
593 if (num_cbufs || nir->num_uniforms)
594 num_cbufs++;
595
596 /* Place the new params in a new cbuf. */
597 if (num_system_values > 0 || kernel_input_size > 0) {
598 unsigned sysval_cbuf_index = num_cbufs;
599 num_cbufs++;
600
601 system_values = reralloc(mem_ctx, system_values, enum brw_param_builtin,
602 num_system_values);
603
604 nir_foreach_block(block, impl) {
605 nir_foreach_instr_safe(instr, block) {
606 if (instr->type != nir_instr_type_intrinsic)
607 continue;
608
609 nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);
610
611 if (load->intrinsic != nir_intrinsic_load_ubo)
612 continue;
613
614 b.cursor = nir_before_instr(instr);
615
616 assert(load->src[0].is_ssa);
617
618 if (load->src[0].ssa == temp_ubo_name) {
619 nir_ssa_def *imm = nir_imm_int(&b, sysval_cbuf_index);
620 nir_instr_rewrite_src(instr, &load->src[0],
621 nir_src_for_ssa(imm));
622 }
623 }
624 }
625
626 /* We need to fold the new iadds for brw_nir_analyze_ubo_ranges */
627 nir_opt_constant_folding(nir);
628 } else {
629 ralloc_free(system_values);
630 system_values = NULL;
631 }
632
633 assert(num_cbufs < PIPE_MAX_CONSTANT_BUFFERS);
634 nir_validate_shader(nir, "after remap");
635
636 /* We don't use params[] but gallium leaves num_uniforms set. We use this
637 * to detect when cbuf0 exists but we don't need it anymore when we get
638 * here. Instead, zero it out so that the back-end doesn't get confused
639 * when nr_params * 4 != num_uniforms != nr_params * 4.
640 */
641 nir->num_uniforms = 0;
642
643 *out_system_values = system_values;
644 *out_num_system_values = num_system_values;
645 *out_num_cbufs = num_cbufs;
646 }
647
648 static const char *surface_group_names[] = {
649 [IRIS_SURFACE_GROUP_RENDER_TARGET] = "render target",
650 [IRIS_SURFACE_GROUP_RENDER_TARGET_READ] = "non-coherent render target read",
651 [IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = "CS work groups",
652 [IRIS_SURFACE_GROUP_TEXTURE] = "texture",
653 [IRIS_SURFACE_GROUP_UBO] = "ubo",
654 [IRIS_SURFACE_GROUP_SSBO] = "ssbo",
655 [IRIS_SURFACE_GROUP_IMAGE] = "image",
656 };
657
658 static void
iris_print_binding_table(FILE * fp,const char * name,const struct iris_binding_table * bt)659 iris_print_binding_table(FILE *fp, const char *name,
660 const struct iris_binding_table *bt)
661 {
662 STATIC_ASSERT(ARRAY_SIZE(surface_group_names) == IRIS_SURFACE_GROUP_COUNT);
663
664 uint32_t total = 0;
665 uint32_t compacted = 0;
666
667 for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
668 uint32_t size = bt->sizes[i];
669 total += size;
670 if (size)
671 compacted += util_bitcount64(bt->used_mask[i]);
672 }
673
674 if (total == 0) {
675 fprintf(fp, "Binding table for %s is empty\n\n", name);
676 return;
677 }
678
679 if (total != compacted) {
680 fprintf(fp, "Binding table for %s "
681 "(compacted to %u entries from %u entries)\n",
682 name, compacted, total);
683 } else {
684 fprintf(fp, "Binding table for %s (%u entries)\n", name, total);
685 }
686
687 uint32_t entry = 0;
688 for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
689 uint64_t mask = bt->used_mask[i];
690 while (mask) {
691 int index = u_bit_scan64(&mask);
692 fprintf(fp, " [%u] %s #%d\n", entry++, surface_group_names[i], index);
693 }
694 }
695 fprintf(fp, "\n");
696 }
697
698 enum {
699 /* Max elements in a surface group. */
700 SURFACE_GROUP_MAX_ELEMENTS = 64,
701 };
702
703 /**
704 * Map a <group, index> pair to a binding table index.
705 *
706 * For example: <UBO, 5> => binding table index 12
707 */
708 uint32_t
iris_group_index_to_bti(const struct iris_binding_table * bt,enum iris_surface_group group,uint32_t index)709 iris_group_index_to_bti(const struct iris_binding_table *bt,
710 enum iris_surface_group group, uint32_t index)
711 {
712 assert(index < bt->sizes[group]);
713 uint64_t mask = bt->used_mask[group];
714 uint64_t bit = 1ull << index;
715 if (bit & mask) {
716 return bt->offsets[group] + util_bitcount64((bit - 1) & mask);
717 } else {
718 return IRIS_SURFACE_NOT_USED;
719 }
720 }
721
722 /**
723 * Map a binding table index back to a <group, index> pair.
724 *
725 * For example: binding table index 12 => <UBO, 5>
726 */
727 uint32_t
iris_bti_to_group_index(const struct iris_binding_table * bt,enum iris_surface_group group,uint32_t bti)728 iris_bti_to_group_index(const struct iris_binding_table *bt,
729 enum iris_surface_group group, uint32_t bti)
730 {
731 uint64_t used_mask = bt->used_mask[group];
732 assert(bti >= bt->offsets[group]);
733
734 uint32_t c = bti - bt->offsets[group];
735 while (used_mask) {
736 int i = u_bit_scan64(&used_mask);
737 if (c == 0)
738 return i;
739 c--;
740 }
741
742 return IRIS_SURFACE_NOT_USED;
743 }
744
745 static void
rewrite_src_with_bti(nir_builder * b,struct iris_binding_table * bt,nir_instr * instr,nir_src * src,enum iris_surface_group group)746 rewrite_src_with_bti(nir_builder *b, struct iris_binding_table *bt,
747 nir_instr *instr, nir_src *src,
748 enum iris_surface_group group)
749 {
750 assert(bt->sizes[group] > 0);
751
752 b->cursor = nir_before_instr(instr);
753 nir_ssa_def *bti;
754 if (nir_src_is_const(*src)) {
755 uint32_t index = nir_src_as_uint(*src);
756 bti = nir_imm_intN_t(b, iris_group_index_to_bti(bt, group, index),
757 src->ssa->bit_size);
758 } else {
759 /* Indirect usage makes all the surfaces of the group to be available,
760 * so we can just add the base.
761 */
762 assert(bt->used_mask[group] == BITFIELD64_MASK(bt->sizes[group]));
763 bti = nir_iadd_imm(b, src->ssa, bt->offsets[group]);
764 }
765 nir_instr_rewrite_src(instr, src, nir_src_for_ssa(bti));
766 }
767
768 static void
mark_used_with_src(struct iris_binding_table * bt,nir_src * src,enum iris_surface_group group)769 mark_used_with_src(struct iris_binding_table *bt, nir_src *src,
770 enum iris_surface_group group)
771 {
772 assert(bt->sizes[group] > 0);
773
774 if (nir_src_is_const(*src)) {
775 uint64_t index = nir_src_as_uint(*src);
776 assert(index < bt->sizes[group]);
777 bt->used_mask[group] |= 1ull << index;
778 } else {
779 /* There's an indirect usage, we need all the surfaces. */
780 bt->used_mask[group] = BITFIELD64_MASK(bt->sizes[group]);
781 }
782 }
783
784 static bool
skip_compacting_binding_tables(void)785 skip_compacting_binding_tables(void)
786 {
787 static int skip = -1;
788 if (skip < 0)
789 skip = env_var_as_boolean("INTEL_DISABLE_COMPACT_BINDING_TABLE", false);
790 return skip;
791 }
792
793 /**
794 * Set up the binding table indices and apply to the shader.
795 */
796 static void
iris_setup_binding_table(const struct gen_device_info * devinfo,struct nir_shader * nir,struct iris_binding_table * bt,unsigned num_render_targets,unsigned num_system_values,unsigned num_cbufs)797 iris_setup_binding_table(const struct gen_device_info *devinfo,
798 struct nir_shader *nir,
799 struct iris_binding_table *bt,
800 unsigned num_render_targets,
801 unsigned num_system_values,
802 unsigned num_cbufs)
803 {
804 const struct shader_info *info = &nir->info;
805
806 memset(bt, 0, sizeof(*bt));
807
808 /* Set the sizes for each surface group. For some groups, we already know
809 * upfront how many will be used, so mark them.
810 */
811 if (info->stage == MESA_SHADER_FRAGMENT) {
812 bt->sizes[IRIS_SURFACE_GROUP_RENDER_TARGET] = num_render_targets;
813 /* All render targets used. */
814 bt->used_mask[IRIS_SURFACE_GROUP_RENDER_TARGET] =
815 BITFIELD64_MASK(num_render_targets);
816
817 /* Setup render target read surface group inorder to support non-coherent
818 * framebuffer fetch on Gen8
819 */
820 if (devinfo->gen == 8 && info->outputs_read) {
821 bt->sizes[IRIS_SURFACE_GROUP_RENDER_TARGET_READ] = num_render_targets;
822 bt->used_mask[IRIS_SURFACE_GROUP_RENDER_TARGET_READ] =
823 BITFIELD64_MASK(num_render_targets);
824 }
825 } else if (info->stage == MESA_SHADER_COMPUTE) {
826 bt->sizes[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 1;
827 }
828
829 bt->sizes[IRIS_SURFACE_GROUP_TEXTURE] = util_last_bit(info->textures_used);
830 bt->used_mask[IRIS_SURFACE_GROUP_TEXTURE] = info->textures_used;
831
832 bt->sizes[IRIS_SURFACE_GROUP_IMAGE] = info->num_images;
833
834 /* Allocate an extra slot in the UBO section for NIR constants.
835 * Binding table compaction will remove it if unnecessary.
836 *
837 * We don't include them in iris_compiled_shader::num_cbufs because
838 * they are uploaded separately from shs->constbuf[], but from a shader
839 * point of view, they're another UBO (at the end of the section).
840 */
841 bt->sizes[IRIS_SURFACE_GROUP_UBO] = num_cbufs + 1;
842
843 bt->sizes[IRIS_SURFACE_GROUP_SSBO] = info->num_ssbos;
844
845 for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++)
846 assert(bt->sizes[i] <= SURFACE_GROUP_MAX_ELEMENTS);
847
848 /* Mark surfaces used for the cases we don't have the information available
849 * upfront.
850 */
851 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
852 nir_foreach_block (block, impl) {
853 nir_foreach_instr (instr, block) {
854 if (instr->type != nir_instr_type_intrinsic)
855 continue;
856
857 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
858 switch (intrin->intrinsic) {
859 case nir_intrinsic_load_num_work_groups:
860 bt->used_mask[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 1;
861 break;
862
863 case nir_intrinsic_load_output:
864 if (devinfo->gen == 8) {
865 mark_used_with_src(bt, &intrin->src[0],
866 IRIS_SURFACE_GROUP_RENDER_TARGET_READ);
867 }
868 break;
869
870 case nir_intrinsic_image_size:
871 case nir_intrinsic_image_load:
872 case nir_intrinsic_image_store:
873 case nir_intrinsic_image_atomic_add:
874 case nir_intrinsic_image_atomic_imin:
875 case nir_intrinsic_image_atomic_umin:
876 case nir_intrinsic_image_atomic_imax:
877 case nir_intrinsic_image_atomic_umax:
878 case nir_intrinsic_image_atomic_and:
879 case nir_intrinsic_image_atomic_or:
880 case nir_intrinsic_image_atomic_xor:
881 case nir_intrinsic_image_atomic_exchange:
882 case nir_intrinsic_image_atomic_comp_swap:
883 case nir_intrinsic_image_load_raw_intel:
884 case nir_intrinsic_image_store_raw_intel:
885 mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_IMAGE);
886 break;
887
888 case nir_intrinsic_load_ubo:
889 mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_UBO);
890 break;
891
892 case nir_intrinsic_store_ssbo:
893 mark_used_with_src(bt, &intrin->src[1], IRIS_SURFACE_GROUP_SSBO);
894 break;
895
896 case nir_intrinsic_get_ssbo_size:
897 case nir_intrinsic_ssbo_atomic_add:
898 case nir_intrinsic_ssbo_atomic_imin:
899 case nir_intrinsic_ssbo_atomic_umin:
900 case nir_intrinsic_ssbo_atomic_imax:
901 case nir_intrinsic_ssbo_atomic_umax:
902 case nir_intrinsic_ssbo_atomic_and:
903 case nir_intrinsic_ssbo_atomic_or:
904 case nir_intrinsic_ssbo_atomic_xor:
905 case nir_intrinsic_ssbo_atomic_exchange:
906 case nir_intrinsic_ssbo_atomic_comp_swap:
907 case nir_intrinsic_ssbo_atomic_fmin:
908 case nir_intrinsic_ssbo_atomic_fmax:
909 case nir_intrinsic_ssbo_atomic_fcomp_swap:
910 case nir_intrinsic_load_ssbo:
911 mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_SSBO);
912 break;
913
914 default:
915 break;
916 }
917 }
918 }
919
920 /* When disable we just mark everything as used. */
921 if (unlikely(skip_compacting_binding_tables())) {
922 for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++)
923 bt->used_mask[i] = BITFIELD64_MASK(bt->sizes[i]);
924 }
925
926 /* Calculate the offsets and the binding table size based on the used
927 * surfaces. After this point, the functions to go between "group indices"
928 * and binding table indices can be used.
929 */
930 uint32_t next = 0;
931 for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
932 if (bt->used_mask[i] != 0) {
933 bt->offsets[i] = next;
934 next += util_bitcount64(bt->used_mask[i]);
935 }
936 }
937 bt->size_bytes = next * 4;
938
939 if (INTEL_DEBUG & DEBUG_BT) {
940 iris_print_binding_table(stderr, gl_shader_stage_name(info->stage), bt);
941 }
942
943 /* Apply the binding table indices. The backend compiler is not expected
944 * to change those, as we haven't set any of the *_start entries in brw
945 * binding_table.
946 */
947 nir_builder b;
948 nir_builder_init(&b, impl);
949
950 nir_foreach_block (block, impl) {
951 nir_foreach_instr (instr, block) {
952 if (instr->type == nir_instr_type_tex) {
953 nir_tex_instr *tex = nir_instr_as_tex(instr);
954 tex->texture_index =
955 iris_group_index_to_bti(bt, IRIS_SURFACE_GROUP_TEXTURE,
956 tex->texture_index);
957 continue;
958 }
959
960 if (instr->type != nir_instr_type_intrinsic)
961 continue;
962
963 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
964 switch (intrin->intrinsic) {
965 case nir_intrinsic_image_size:
966 case nir_intrinsic_image_load:
967 case nir_intrinsic_image_store:
968 case nir_intrinsic_image_atomic_add:
969 case nir_intrinsic_image_atomic_imin:
970 case nir_intrinsic_image_atomic_umin:
971 case nir_intrinsic_image_atomic_imax:
972 case nir_intrinsic_image_atomic_umax:
973 case nir_intrinsic_image_atomic_and:
974 case nir_intrinsic_image_atomic_or:
975 case nir_intrinsic_image_atomic_xor:
976 case nir_intrinsic_image_atomic_exchange:
977 case nir_intrinsic_image_atomic_comp_swap:
978 case nir_intrinsic_image_load_raw_intel:
979 case nir_intrinsic_image_store_raw_intel:
980 rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
981 IRIS_SURFACE_GROUP_IMAGE);
982 break;
983
984 case nir_intrinsic_load_ubo:
985 rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
986 IRIS_SURFACE_GROUP_UBO);
987 break;
988
989 case nir_intrinsic_store_ssbo:
990 rewrite_src_with_bti(&b, bt, instr, &intrin->src[1],
991 IRIS_SURFACE_GROUP_SSBO);
992 break;
993
994 case nir_intrinsic_load_output:
995 if (devinfo->gen == 8) {
996 rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
997 IRIS_SURFACE_GROUP_RENDER_TARGET_READ);
998 }
999 break;
1000
1001 case nir_intrinsic_get_ssbo_size:
1002 case nir_intrinsic_ssbo_atomic_add:
1003 case nir_intrinsic_ssbo_atomic_imin:
1004 case nir_intrinsic_ssbo_atomic_umin:
1005 case nir_intrinsic_ssbo_atomic_imax:
1006 case nir_intrinsic_ssbo_atomic_umax:
1007 case nir_intrinsic_ssbo_atomic_and:
1008 case nir_intrinsic_ssbo_atomic_or:
1009 case nir_intrinsic_ssbo_atomic_xor:
1010 case nir_intrinsic_ssbo_atomic_exchange:
1011 case nir_intrinsic_ssbo_atomic_comp_swap:
1012 case nir_intrinsic_ssbo_atomic_fmin:
1013 case nir_intrinsic_ssbo_atomic_fmax:
1014 case nir_intrinsic_ssbo_atomic_fcomp_swap:
1015 case nir_intrinsic_load_ssbo:
1016 rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1017 IRIS_SURFACE_GROUP_SSBO);
1018 break;
1019
1020 default:
1021 break;
1022 }
1023 }
1024 }
1025 }
1026
1027 static void
iris_debug_recompile(struct iris_context * ice,struct shader_info * info,const struct brw_base_prog_key * key)1028 iris_debug_recompile(struct iris_context *ice,
1029 struct shader_info *info,
1030 const struct brw_base_prog_key *key)
1031 {
1032 struct iris_screen *screen = (struct iris_screen *) ice->ctx.screen;
1033 const struct gen_device_info *devinfo = &screen->devinfo;
1034 const struct brw_compiler *c = screen->compiler;
1035
1036 if (!info)
1037 return;
1038
1039 c->shader_perf_log(&ice->dbg, "Recompiling %s shader for program %s: %s\n",
1040 _mesa_shader_stage_to_string(info->stage),
1041 info->name ? info->name : "(no identifier)",
1042 info->label ? info->label : "");
1043
1044 const void *old_iris_key =
1045 iris_find_previous_compile(ice, info->stage, key->program_string_id);
1046
1047 union brw_any_prog_key old_key;
1048
1049 switch (info->stage) {
1050 case MESA_SHADER_VERTEX:
1051 old_key.vs = iris_to_brw_vs_key(devinfo, old_iris_key);
1052 break;
1053 case MESA_SHADER_TESS_CTRL:
1054 old_key.tcs = iris_to_brw_tcs_key(devinfo, old_iris_key);
1055 break;
1056 case MESA_SHADER_TESS_EVAL:
1057 old_key.tes = iris_to_brw_tes_key(devinfo, old_iris_key);
1058 break;
1059 case MESA_SHADER_GEOMETRY:
1060 old_key.gs = iris_to_brw_gs_key(devinfo, old_iris_key);
1061 break;
1062 case MESA_SHADER_FRAGMENT:
1063 old_key.wm = iris_to_brw_fs_key(devinfo, old_iris_key);
1064 break;
1065 case MESA_SHADER_COMPUTE:
1066 old_key.cs = iris_to_brw_cs_key(devinfo, old_iris_key);
1067 break;
1068 default:
1069 unreachable("invalid shader stage");
1070 }
1071
1072 brw_debug_key_recompile(c, &ice->dbg, info->stage, &old_key.base, key);
1073 }
1074
1075 /**
1076 * Get the shader for the last enabled geometry stage.
1077 *
1078 * This stage is the one which will feed stream output and the rasterizer.
1079 */
1080 static gl_shader_stage
last_vue_stage(struct iris_context * ice)1081 last_vue_stage(struct iris_context *ice)
1082 {
1083 if (ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])
1084 return MESA_SHADER_GEOMETRY;
1085
1086 if (ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])
1087 return MESA_SHADER_TESS_EVAL;
1088
1089 return MESA_SHADER_VERTEX;
1090 }
1091
1092 /**
1093 * Compile a vertex shader, and upload the assembly.
1094 */
1095 static struct iris_compiled_shader *
iris_compile_vs(struct iris_context * ice,struct iris_uncompiled_shader * ish,const struct iris_vs_prog_key * key)1096 iris_compile_vs(struct iris_context *ice,
1097 struct iris_uncompiled_shader *ish,
1098 const struct iris_vs_prog_key *key)
1099 {
1100 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1101 const struct brw_compiler *compiler = screen->compiler;
1102 const struct gen_device_info *devinfo = &screen->devinfo;
1103 void *mem_ctx = ralloc_context(NULL);
1104 struct brw_vs_prog_data *vs_prog_data =
1105 rzalloc(mem_ctx, struct brw_vs_prog_data);
1106 struct brw_vue_prog_data *vue_prog_data = &vs_prog_data->base;
1107 struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
1108 enum brw_param_builtin *system_values;
1109 unsigned num_system_values;
1110 unsigned num_cbufs;
1111
1112 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1113
1114 if (key->vue.nr_userclip_plane_consts) {
1115 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
1116 nir_lower_clip_vs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
1117 true, false, NULL);
1118 nir_lower_io_to_temporaries(nir, impl, true, false);
1119 nir_lower_global_vars_to_local(nir);
1120 nir_lower_vars_to_ssa(nir);
1121 nir_shader_gather_info(nir, impl);
1122 }
1123
1124 prog_data->use_alt_mode = ish->use_alt_mode;
1125
1126 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
1127 &num_system_values, &num_cbufs);
1128
1129 struct iris_binding_table bt;
1130 iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1131 num_system_values, num_cbufs);
1132
1133 brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
1134
1135 brw_compute_vue_map(devinfo,
1136 &vue_prog_data->vue_map, nir->info.outputs_written,
1137 nir->info.separate_shader, /* pos_slots */ 1);
1138
1139 struct brw_vs_prog_key brw_key = iris_to_brw_vs_key(devinfo, key);
1140
1141 char *error_str = NULL;
1142 const unsigned *program =
1143 brw_compile_vs(compiler, &ice->dbg, mem_ctx, &brw_key, vs_prog_data,
1144 nir, -1, NULL, &error_str);
1145 if (program == NULL) {
1146 dbg_printf("Failed to compile vertex shader: %s\n", error_str);
1147 ralloc_free(mem_ctx);
1148 return false;
1149 }
1150
1151 if (ish->compiled_once) {
1152 iris_debug_recompile(ice, &nir->info, &brw_key.base);
1153 } else {
1154 ish->compiled_once = true;
1155 }
1156
1157 uint32_t *so_decls =
1158 screen->vtbl.create_so_decl_list(&ish->stream_output,
1159 &vue_prog_data->vue_map);
1160
1161 struct iris_compiled_shader *shader =
1162 iris_upload_shader(ice, IRIS_CACHE_VS, sizeof(*key), key, program,
1163 prog_data, so_decls, system_values, num_system_values,
1164 0, num_cbufs, &bt);
1165
1166 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1167
1168 ralloc_free(mem_ctx);
1169 return shader;
1170 }
1171
1172 /**
1173 * Update the current vertex shader variant.
1174 *
1175 * Fill out the key, look in the cache, compile and bind if needed.
1176 */
1177 static void
iris_update_compiled_vs(struct iris_context * ice)1178 iris_update_compiled_vs(struct iris_context *ice)
1179 {
1180 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1181 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_VERTEX];
1182 struct iris_uncompiled_shader *ish =
1183 ice->shaders.uncompiled[MESA_SHADER_VERTEX];
1184
1185 struct iris_vs_prog_key key = { KEY_ID(vue.base) };
1186 screen->vtbl.populate_vs_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
1187
1188 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_VS];
1189 struct iris_compiled_shader *shader =
1190 iris_find_cached_shader(ice, IRIS_CACHE_VS, sizeof(key), &key);
1191
1192 if (!shader)
1193 shader = iris_disk_cache_retrieve(ice, ish, &key, sizeof(key));
1194
1195 if (!shader)
1196 shader = iris_compile_vs(ice, ish, &key);
1197
1198 if (old != shader) {
1199 ice->shaders.prog[IRIS_CACHE_VS] = shader;
1200 ice->state.dirty |= IRIS_DIRTY_VF_SGVS;
1201 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_VS |
1202 IRIS_STAGE_DIRTY_BINDINGS_VS |
1203 IRIS_STAGE_DIRTY_CONSTANTS_VS;
1204 shs->sysvals_need_upload = true;
1205
1206 const struct brw_vs_prog_data *vs_prog_data =
1207 (void *) shader->prog_data;
1208 const bool uses_draw_params = vs_prog_data->uses_firstvertex ||
1209 vs_prog_data->uses_baseinstance;
1210 const bool uses_derived_draw_params = vs_prog_data->uses_drawid ||
1211 vs_prog_data->uses_is_indexed_draw;
1212 const bool needs_sgvs_element = uses_draw_params ||
1213 vs_prog_data->uses_instanceid ||
1214 vs_prog_data->uses_vertexid;
1215
1216 if (ice->state.vs_uses_draw_params != uses_draw_params ||
1217 ice->state.vs_uses_derived_draw_params != uses_derived_draw_params ||
1218 ice->state.vs_needs_edge_flag != ish->needs_edge_flag) {
1219 ice->state.dirty |= IRIS_DIRTY_VERTEX_BUFFERS |
1220 IRIS_DIRTY_VERTEX_ELEMENTS;
1221 }
1222 ice->state.vs_uses_draw_params = uses_draw_params;
1223 ice->state.vs_uses_derived_draw_params = uses_derived_draw_params;
1224 ice->state.vs_needs_sgvs_element = needs_sgvs_element;
1225 ice->state.vs_needs_edge_flag = ish->needs_edge_flag;
1226 }
1227 }
1228
1229 /**
1230 * Get the shader_info for a given stage, or NULL if the stage is disabled.
1231 */
1232 const struct shader_info *
iris_get_shader_info(const struct iris_context * ice,gl_shader_stage stage)1233 iris_get_shader_info(const struct iris_context *ice, gl_shader_stage stage)
1234 {
1235 const struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[stage];
1236
1237 if (!ish)
1238 return NULL;
1239
1240 const nir_shader *nir = ish->nir;
1241 return &nir->info;
1242 }
1243
1244 /**
1245 * Get the union of TCS output and TES input slots.
1246 *
1247 * TCS and TES need to agree on a common URB entry layout. In particular,
1248 * the data for all patch vertices is stored in a single URB entry (unlike
1249 * GS which has one entry per input vertex). This means that per-vertex
1250 * array indexing needs a stride.
1251 *
1252 * SSO requires locations to match, but doesn't require the number of
1253 * outputs/inputs to match (in fact, the TCS often has extra outputs).
1254 * So, we need to take the extra step of unifying these on the fly.
1255 */
1256 static void
get_unified_tess_slots(const struct iris_context * ice,uint64_t * per_vertex_slots,uint32_t * per_patch_slots)1257 get_unified_tess_slots(const struct iris_context *ice,
1258 uint64_t *per_vertex_slots,
1259 uint32_t *per_patch_slots)
1260 {
1261 const struct shader_info *tcs =
1262 iris_get_shader_info(ice, MESA_SHADER_TESS_CTRL);
1263 const struct shader_info *tes =
1264 iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
1265
1266 *per_vertex_slots = tes->inputs_read;
1267 *per_patch_slots = tes->patch_inputs_read;
1268
1269 if (tcs) {
1270 *per_vertex_slots |= tcs->outputs_written;
1271 *per_patch_slots |= tcs->patch_outputs_written;
1272 }
1273 }
1274
1275 /**
1276 * Compile a tessellation control shader, and upload the assembly.
1277 */
1278 static struct iris_compiled_shader *
iris_compile_tcs(struct iris_context * ice,struct iris_uncompiled_shader * ish,const struct iris_tcs_prog_key * key)1279 iris_compile_tcs(struct iris_context *ice,
1280 struct iris_uncompiled_shader *ish,
1281 const struct iris_tcs_prog_key *key)
1282 {
1283 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1284 const struct brw_compiler *compiler = screen->compiler;
1285 const struct nir_shader_compiler_options *options =
1286 compiler->glsl_compiler_options[MESA_SHADER_TESS_CTRL].NirOptions;
1287 void *mem_ctx = ralloc_context(NULL);
1288 struct brw_tcs_prog_data *tcs_prog_data =
1289 rzalloc(mem_ctx, struct brw_tcs_prog_data);
1290 struct brw_vue_prog_data *vue_prog_data = &tcs_prog_data->base;
1291 struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
1292 const struct gen_device_info *devinfo = &screen->devinfo;
1293 enum brw_param_builtin *system_values = NULL;
1294 unsigned num_system_values = 0;
1295 unsigned num_cbufs = 0;
1296
1297 nir_shader *nir;
1298
1299 struct iris_binding_table bt;
1300
1301 struct brw_tcs_prog_key brw_key = iris_to_brw_tcs_key(devinfo, key);
1302
1303 if (ish) {
1304 nir = nir_shader_clone(mem_ctx, ish->nir);
1305
1306 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
1307 &num_system_values, &num_cbufs);
1308 iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1309 num_system_values, num_cbufs);
1310 brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
1311 } else {
1312 nir =
1313 brw_nir_create_passthrough_tcs(mem_ctx, compiler, options, &brw_key);
1314
1315 /* Reserve space for passing the default tess levels as constants. */
1316 num_cbufs = 1;
1317 num_system_values = 8;
1318 system_values =
1319 rzalloc_array(mem_ctx, enum brw_param_builtin, num_system_values);
1320 prog_data->param = rzalloc_array(mem_ctx, uint32_t, num_system_values);
1321 prog_data->nr_params = num_system_values;
1322
1323 if (key->tes_primitive_mode == GL_QUADS) {
1324 for (int i = 0; i < 4; i++)
1325 system_values[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
1326
1327 system_values[3] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X;
1328 system_values[2] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y;
1329 } else if (key->tes_primitive_mode == GL_TRIANGLES) {
1330 for (int i = 0; i < 3; i++)
1331 system_values[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
1332
1333 system_values[4] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X;
1334 } else {
1335 assert(key->tes_primitive_mode == GL_ISOLINES);
1336 system_values[7] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_Y;
1337 system_values[6] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X;
1338 }
1339
1340 /* Manually setup the TCS binding table. */
1341 memset(&bt, 0, sizeof(bt));
1342 bt.sizes[IRIS_SURFACE_GROUP_UBO] = 1;
1343 bt.used_mask[IRIS_SURFACE_GROUP_UBO] = 1;
1344 bt.size_bytes = 4;
1345
1346 prog_data->ubo_ranges[0].length = 1;
1347 }
1348
1349 char *error_str = NULL;
1350 const unsigned *program =
1351 brw_compile_tcs(compiler, &ice->dbg, mem_ctx, &brw_key, tcs_prog_data,
1352 nir, -1, NULL, &error_str);
1353 if (program == NULL) {
1354 dbg_printf("Failed to compile control shader: %s\n", error_str);
1355 ralloc_free(mem_ctx);
1356 return false;
1357 }
1358
1359 if (ish) {
1360 if (ish->compiled_once) {
1361 iris_debug_recompile(ice, &nir->info, &brw_key.base);
1362 } else {
1363 ish->compiled_once = true;
1364 }
1365 }
1366
1367 struct iris_compiled_shader *shader =
1368 iris_upload_shader(ice, IRIS_CACHE_TCS, sizeof(*key), key, program,
1369 prog_data, NULL, system_values, num_system_values,
1370 0, num_cbufs, &bt);
1371
1372 if (ish)
1373 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1374
1375 ralloc_free(mem_ctx);
1376 return shader;
1377 }
1378
1379 /**
1380 * Update the current tessellation control shader variant.
1381 *
1382 * Fill out the key, look in the cache, compile and bind if needed.
1383 */
1384 static void
iris_update_compiled_tcs(struct iris_context * ice)1385 iris_update_compiled_tcs(struct iris_context *ice)
1386 {
1387 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_CTRL];
1388 struct iris_uncompiled_shader *tcs =
1389 ice->shaders.uncompiled[MESA_SHADER_TESS_CTRL];
1390 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1391 const struct brw_compiler *compiler = screen->compiler;
1392 const struct gen_device_info *devinfo = &screen->devinfo;
1393
1394 const struct shader_info *tes_info =
1395 iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
1396 struct iris_tcs_prog_key key = {
1397 .vue.base.program_string_id = tcs ? tcs->program_id : 0,
1398 .tes_primitive_mode = tes_info->tess.primitive_mode,
1399 .input_vertices =
1400 !tcs || compiler->use_tcs_8_patch ? ice->state.vertices_per_patch : 0,
1401 .quads_workaround = devinfo->gen < 9 &&
1402 tes_info->tess.primitive_mode == GL_QUADS &&
1403 tes_info->tess.spacing == TESS_SPACING_EQUAL,
1404 };
1405 get_unified_tess_slots(ice, &key.outputs_written,
1406 &key.patch_outputs_written);
1407 screen->vtbl.populate_tcs_key(ice, &key);
1408
1409 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TCS];
1410 struct iris_compiled_shader *shader =
1411 iris_find_cached_shader(ice, IRIS_CACHE_TCS, sizeof(key), &key);
1412
1413 if (tcs && !shader)
1414 shader = iris_disk_cache_retrieve(ice, tcs, &key, sizeof(key));
1415
1416 if (!shader)
1417 shader = iris_compile_tcs(ice, tcs, &key);
1418
1419 if (old != shader) {
1420 ice->shaders.prog[IRIS_CACHE_TCS] = shader;
1421 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_TCS |
1422 IRIS_STAGE_DIRTY_BINDINGS_TCS |
1423 IRIS_STAGE_DIRTY_CONSTANTS_TCS;
1424 shs->sysvals_need_upload = true;
1425 }
1426 }
1427
1428 /**
1429 * Compile a tessellation evaluation shader, and upload the assembly.
1430 */
1431 static struct iris_compiled_shader *
iris_compile_tes(struct iris_context * ice,struct iris_uncompiled_shader * ish,const struct iris_tes_prog_key * key)1432 iris_compile_tes(struct iris_context *ice,
1433 struct iris_uncompiled_shader *ish,
1434 const struct iris_tes_prog_key *key)
1435 {
1436 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1437 const struct brw_compiler *compiler = screen->compiler;
1438 void *mem_ctx = ralloc_context(NULL);
1439 struct brw_tes_prog_data *tes_prog_data =
1440 rzalloc(mem_ctx, struct brw_tes_prog_data);
1441 struct brw_vue_prog_data *vue_prog_data = &tes_prog_data->base;
1442 struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
1443 enum brw_param_builtin *system_values;
1444 const struct gen_device_info *devinfo = &screen->devinfo;
1445 unsigned num_system_values;
1446 unsigned num_cbufs;
1447
1448 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1449
1450 if (key->vue.nr_userclip_plane_consts) {
1451 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
1452 nir_lower_clip_vs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
1453 true, false, NULL);
1454 nir_lower_io_to_temporaries(nir, impl, true, false);
1455 nir_lower_global_vars_to_local(nir);
1456 nir_lower_vars_to_ssa(nir);
1457 nir_shader_gather_info(nir, impl);
1458 }
1459
1460 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
1461 &num_system_values, &num_cbufs);
1462
1463 struct iris_binding_table bt;
1464 iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1465 num_system_values, num_cbufs);
1466
1467 brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
1468
1469 struct brw_vue_map input_vue_map;
1470 brw_compute_tess_vue_map(&input_vue_map, key->inputs_read,
1471 key->patch_inputs_read);
1472
1473 struct brw_tes_prog_key brw_key = iris_to_brw_tes_key(devinfo, key);
1474
1475 char *error_str = NULL;
1476 const unsigned *program =
1477 brw_compile_tes(compiler, &ice->dbg, mem_ctx, &brw_key, &input_vue_map,
1478 tes_prog_data, nir, -1, NULL, &error_str);
1479 if (program == NULL) {
1480 dbg_printf("Failed to compile evaluation shader: %s\n", error_str);
1481 ralloc_free(mem_ctx);
1482 return false;
1483 }
1484
1485 if (ish->compiled_once) {
1486 iris_debug_recompile(ice, &nir->info, &brw_key.base);
1487 } else {
1488 ish->compiled_once = true;
1489 }
1490
1491 uint32_t *so_decls =
1492 screen->vtbl.create_so_decl_list(&ish->stream_output,
1493 &vue_prog_data->vue_map);
1494
1495
1496 struct iris_compiled_shader *shader =
1497 iris_upload_shader(ice, IRIS_CACHE_TES, sizeof(*key), key, program,
1498 prog_data, so_decls, system_values, num_system_values,
1499 0, num_cbufs, &bt);
1500
1501 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1502
1503 ralloc_free(mem_ctx);
1504 return shader;
1505 }
1506
1507 /**
1508 * Update the current tessellation evaluation shader variant.
1509 *
1510 * Fill out the key, look in the cache, compile and bind if needed.
1511 */
1512 static void
iris_update_compiled_tes(struct iris_context * ice)1513 iris_update_compiled_tes(struct iris_context *ice)
1514 {
1515 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1516 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_EVAL];
1517 struct iris_uncompiled_shader *ish =
1518 ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
1519
1520 struct iris_tes_prog_key key = { KEY_ID(vue.base) };
1521 get_unified_tess_slots(ice, &key.inputs_read, &key.patch_inputs_read);
1522 screen->vtbl.populate_tes_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
1523
1524 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TES];
1525 struct iris_compiled_shader *shader =
1526 iris_find_cached_shader(ice, IRIS_CACHE_TES, sizeof(key), &key);
1527
1528 if (!shader)
1529 shader = iris_disk_cache_retrieve(ice, ish, &key, sizeof(key));
1530
1531 if (!shader)
1532 shader = iris_compile_tes(ice, ish, &key);
1533
1534 if (old != shader) {
1535 ice->shaders.prog[IRIS_CACHE_TES] = shader;
1536 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_TES |
1537 IRIS_STAGE_DIRTY_BINDINGS_TES |
1538 IRIS_STAGE_DIRTY_CONSTANTS_TES;
1539 shs->sysvals_need_upload = true;
1540 }
1541
1542 /* TODO: Could compare and avoid flagging this. */
1543 const struct shader_info *tes_info = &ish->nir->info;
1544 if (BITSET_TEST(tes_info->system_values_read, SYSTEM_VALUE_VERTICES_IN)) {
1545 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_CONSTANTS_TES;
1546 ice->state.shaders[MESA_SHADER_TESS_EVAL].sysvals_need_upload = true;
1547 }
1548 }
1549
1550 /**
1551 * Compile a geometry shader, and upload the assembly.
1552 */
1553 static struct iris_compiled_shader *
iris_compile_gs(struct iris_context * ice,struct iris_uncompiled_shader * ish,const struct iris_gs_prog_key * key)1554 iris_compile_gs(struct iris_context *ice,
1555 struct iris_uncompiled_shader *ish,
1556 const struct iris_gs_prog_key *key)
1557 {
1558 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1559 const struct brw_compiler *compiler = screen->compiler;
1560 const struct gen_device_info *devinfo = &screen->devinfo;
1561 void *mem_ctx = ralloc_context(NULL);
1562 struct brw_gs_prog_data *gs_prog_data =
1563 rzalloc(mem_ctx, struct brw_gs_prog_data);
1564 struct brw_vue_prog_data *vue_prog_data = &gs_prog_data->base;
1565 struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
1566 enum brw_param_builtin *system_values;
1567 unsigned num_system_values;
1568 unsigned num_cbufs;
1569
1570 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1571
1572 if (key->vue.nr_userclip_plane_consts) {
1573 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
1574 nir_lower_clip_gs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
1575 false, NULL);
1576 nir_lower_io_to_temporaries(nir, impl, true, false);
1577 nir_lower_global_vars_to_local(nir);
1578 nir_lower_vars_to_ssa(nir);
1579 nir_shader_gather_info(nir, impl);
1580 }
1581
1582 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
1583 &num_system_values, &num_cbufs);
1584
1585 struct iris_binding_table bt;
1586 iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1587 num_system_values, num_cbufs);
1588
1589 brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
1590
1591 brw_compute_vue_map(devinfo,
1592 &vue_prog_data->vue_map, nir->info.outputs_written,
1593 nir->info.separate_shader, /* pos_slots */ 1);
1594
1595 struct brw_gs_prog_key brw_key = iris_to_brw_gs_key(devinfo, key);
1596
1597 char *error_str = NULL;
1598 const unsigned *program =
1599 brw_compile_gs(compiler, &ice->dbg, mem_ctx, &brw_key, gs_prog_data,
1600 nir, NULL, -1, NULL, &error_str);
1601 if (program == NULL) {
1602 dbg_printf("Failed to compile geometry shader: %s\n", error_str);
1603 ralloc_free(mem_ctx);
1604 return false;
1605 }
1606
1607 if (ish->compiled_once) {
1608 iris_debug_recompile(ice, &nir->info, &brw_key.base);
1609 } else {
1610 ish->compiled_once = true;
1611 }
1612
1613 uint32_t *so_decls =
1614 screen->vtbl.create_so_decl_list(&ish->stream_output,
1615 &vue_prog_data->vue_map);
1616
1617 struct iris_compiled_shader *shader =
1618 iris_upload_shader(ice, IRIS_CACHE_GS, sizeof(*key), key, program,
1619 prog_data, so_decls, system_values, num_system_values,
1620 0, num_cbufs, &bt);
1621
1622 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1623
1624 ralloc_free(mem_ctx);
1625 return shader;
1626 }
1627
1628 /**
1629 * Update the current geometry shader variant.
1630 *
1631 * Fill out the key, look in the cache, compile and bind if needed.
1632 */
1633 static void
iris_update_compiled_gs(struct iris_context * ice)1634 iris_update_compiled_gs(struct iris_context *ice)
1635 {
1636 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_GEOMETRY];
1637 struct iris_uncompiled_shader *ish =
1638 ice->shaders.uncompiled[MESA_SHADER_GEOMETRY];
1639 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_GS];
1640 struct iris_compiled_shader *shader = NULL;
1641 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1642
1643 if (ish) {
1644 struct iris_gs_prog_key key = { KEY_ID(vue.base) };
1645 screen->vtbl.populate_gs_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
1646
1647 shader =
1648 iris_find_cached_shader(ice, IRIS_CACHE_GS, sizeof(key), &key);
1649
1650 if (!shader)
1651 shader = iris_disk_cache_retrieve(ice, ish, &key, sizeof(key));
1652
1653 if (!shader)
1654 shader = iris_compile_gs(ice, ish, &key);
1655 }
1656
1657 if (old != shader) {
1658 ice->shaders.prog[IRIS_CACHE_GS] = shader;
1659 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_GS |
1660 IRIS_STAGE_DIRTY_BINDINGS_GS |
1661 IRIS_STAGE_DIRTY_CONSTANTS_GS;
1662 shs->sysvals_need_upload = true;
1663 }
1664 }
1665
1666 /**
1667 * Compile a fragment (pixel) shader, and upload the assembly.
1668 */
1669 static struct iris_compiled_shader *
iris_compile_fs(struct iris_context * ice,struct iris_uncompiled_shader * ish,const struct iris_fs_prog_key * key,struct brw_vue_map * vue_map)1670 iris_compile_fs(struct iris_context *ice,
1671 struct iris_uncompiled_shader *ish,
1672 const struct iris_fs_prog_key *key,
1673 struct brw_vue_map *vue_map)
1674 {
1675 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1676 const struct brw_compiler *compiler = screen->compiler;
1677 void *mem_ctx = ralloc_context(NULL);
1678 struct brw_wm_prog_data *fs_prog_data =
1679 rzalloc(mem_ctx, struct brw_wm_prog_data);
1680 struct brw_stage_prog_data *prog_data = &fs_prog_data->base;
1681 enum brw_param_builtin *system_values;
1682 const struct gen_device_info *devinfo = &screen->devinfo;
1683 unsigned num_system_values;
1684 unsigned num_cbufs;
1685
1686 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1687
1688 prog_data->use_alt_mode = ish->use_alt_mode;
1689
1690 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
1691 &num_system_values, &num_cbufs);
1692
1693 /* Lower output variables to load_output intrinsics before setting up
1694 * binding tables, so iris_setup_binding_table can map any load_output
1695 * intrinsics to IRIS_SURFACE_GROUP_RENDER_TARGET_READ on Gen8 for
1696 * non-coherent framebuffer fetches.
1697 */
1698 brw_nir_lower_fs_outputs(nir);
1699
1700 /* On Gen11+, shader RT write messages have a "Null Render Target" bit
1701 * and do not need a binding table entry with a null surface. Earlier
1702 * generations need an entry for a null surface.
1703 */
1704 int null_rts = devinfo->gen < 11 ? 1 : 0;
1705
1706 struct iris_binding_table bt;
1707 iris_setup_binding_table(devinfo, nir, &bt,
1708 MAX2(key->nr_color_regions, null_rts),
1709 num_system_values, num_cbufs);
1710
1711 brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
1712
1713 struct brw_wm_prog_key brw_key = iris_to_brw_fs_key(devinfo, key);
1714
1715 char *error_str = NULL;
1716 const unsigned *program =
1717 brw_compile_fs(compiler, &ice->dbg, mem_ctx, &brw_key, fs_prog_data,
1718 nir, -1, -1, -1, true, false, vue_map,
1719 NULL, &error_str);
1720 if (program == NULL) {
1721 dbg_printf("Failed to compile fragment shader: %s\n", error_str);
1722 ralloc_free(mem_ctx);
1723 return false;
1724 }
1725
1726 if (ish->compiled_once) {
1727 iris_debug_recompile(ice, &nir->info, &brw_key.base);
1728 } else {
1729 ish->compiled_once = true;
1730 }
1731
1732 struct iris_compiled_shader *shader =
1733 iris_upload_shader(ice, IRIS_CACHE_FS, sizeof(*key), key, program,
1734 prog_data, NULL, system_values, num_system_values,
1735 0, num_cbufs, &bt);
1736
1737 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1738
1739 ralloc_free(mem_ctx);
1740 return shader;
1741 }
1742
1743 /**
1744 * Update the current fragment shader variant.
1745 *
1746 * Fill out the key, look in the cache, compile and bind if needed.
1747 */
1748 static void
iris_update_compiled_fs(struct iris_context * ice)1749 iris_update_compiled_fs(struct iris_context *ice)
1750 {
1751 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_FRAGMENT];
1752 struct iris_uncompiled_shader *ish =
1753 ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
1754 struct iris_fs_prog_key key = { KEY_ID(base) };
1755 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1756 screen->vtbl.populate_fs_key(ice, &ish->nir->info, &key);
1757
1758 if (ish->nos & (1ull << IRIS_NOS_LAST_VUE_MAP))
1759 key.input_slots_valid = ice->shaders.last_vue_map->slots_valid;
1760
1761 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_FS];
1762 struct iris_compiled_shader *shader =
1763 iris_find_cached_shader(ice, IRIS_CACHE_FS, sizeof(key), &key);
1764
1765 if (!shader)
1766 shader = iris_disk_cache_retrieve(ice, ish, &key, sizeof(key));
1767
1768 if (!shader)
1769 shader = iris_compile_fs(ice, ish, &key, ice->shaders.last_vue_map);
1770
1771 if (old != shader) {
1772 // XXX: only need to flag CLIP if barycentric has NONPERSPECTIVE
1773 // toggles. might be able to avoid flagging SBE too.
1774 ice->shaders.prog[IRIS_CACHE_FS] = shader;
1775 ice->state.dirty |= IRIS_DIRTY_WM |
1776 IRIS_DIRTY_CLIP |
1777 IRIS_DIRTY_SBE;
1778 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_FS |
1779 IRIS_STAGE_DIRTY_BINDINGS_FS |
1780 IRIS_STAGE_DIRTY_CONSTANTS_FS;
1781 shs->sysvals_need_upload = true;
1782 }
1783 }
1784
1785 /**
1786 * Update the last enabled stage's VUE map.
1787 *
1788 * When the shader feeding the rasterizer's output interface changes, we
1789 * need to re-emit various packets.
1790 */
1791 static void
update_last_vue_map(struct iris_context * ice,struct brw_stage_prog_data * prog_data)1792 update_last_vue_map(struct iris_context *ice,
1793 struct brw_stage_prog_data *prog_data)
1794 {
1795 struct brw_vue_prog_data *vue_prog_data = (void *) prog_data;
1796 struct brw_vue_map *vue_map = &vue_prog_data->vue_map;
1797 struct brw_vue_map *old_map = ice->shaders.last_vue_map;
1798 const uint64_t changed_slots =
1799 (old_map ? old_map->slots_valid : 0ull) ^ vue_map->slots_valid;
1800
1801 if (changed_slots & VARYING_BIT_VIEWPORT) {
1802 ice->state.num_viewports =
1803 (vue_map->slots_valid & VARYING_BIT_VIEWPORT) ? IRIS_MAX_VIEWPORTS : 1;
1804 ice->state.dirty |= IRIS_DIRTY_CLIP |
1805 IRIS_DIRTY_SF_CL_VIEWPORT |
1806 IRIS_DIRTY_CC_VIEWPORT |
1807 IRIS_DIRTY_SCISSOR_RECT;
1808 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_UNCOMPILED_FS |
1809 ice->state.stage_dirty_for_nos[IRIS_NOS_LAST_VUE_MAP];
1810 }
1811
1812 if (changed_slots || (old_map && old_map->separate != vue_map->separate)) {
1813 ice->state.dirty |= IRIS_DIRTY_SBE;
1814 }
1815
1816 ice->shaders.last_vue_map = &vue_prog_data->vue_map;
1817 }
1818
1819 static void
iris_update_pull_constant_descriptors(struct iris_context * ice,gl_shader_stage stage)1820 iris_update_pull_constant_descriptors(struct iris_context *ice,
1821 gl_shader_stage stage)
1822 {
1823 struct iris_compiled_shader *shader = ice->shaders.prog[stage];
1824
1825 if (!shader || !shader->prog_data->has_ubo_pull)
1826 return;
1827
1828 struct iris_shader_state *shs = &ice->state.shaders[stage];
1829 bool any_new_descriptors =
1830 shader->num_system_values > 0 && shs->sysvals_need_upload;
1831
1832 unsigned bound_cbufs = shs->bound_cbufs;
1833
1834 while (bound_cbufs) {
1835 const int i = u_bit_scan(&bound_cbufs);
1836 struct pipe_shader_buffer *cbuf = &shs->constbuf[i];
1837 struct iris_state_ref *surf_state = &shs->constbuf_surf_state[i];
1838 if (!surf_state->res && cbuf->buffer) {
1839 iris_upload_ubo_ssbo_surf_state(ice, cbuf, surf_state,
1840 ISL_SURF_USAGE_CONSTANT_BUFFER_BIT);
1841 any_new_descriptors = true;
1842 }
1843 }
1844
1845 if (any_new_descriptors)
1846 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_BINDINGS_VS << stage;
1847 }
1848
1849 /**
1850 * Get the prog_data for a given stage, or NULL if the stage is disabled.
1851 */
1852 static struct brw_vue_prog_data *
get_vue_prog_data(struct iris_context * ice,gl_shader_stage stage)1853 get_vue_prog_data(struct iris_context *ice, gl_shader_stage stage)
1854 {
1855 if (!ice->shaders.prog[stage])
1856 return NULL;
1857
1858 return (void *) ice->shaders.prog[stage]->prog_data;
1859 }
1860
1861 /**
1862 * Update the current shader variants for the given state.
1863 *
1864 * This should be called on every draw call to ensure that the correct
1865 * shaders are bound. It will also flag any dirty state triggered by
1866 * swapping out those shaders.
1867 */
1868 void
iris_update_compiled_shaders(struct iris_context * ice)1869 iris_update_compiled_shaders(struct iris_context *ice)
1870 {
1871 const uint64_t dirty = ice->state.dirty;
1872 const uint64_t stage_dirty = ice->state.stage_dirty;
1873
1874 struct brw_vue_prog_data *old_prog_datas[4];
1875 if (!(dirty & IRIS_DIRTY_URB)) {
1876 for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++)
1877 old_prog_datas[i] = get_vue_prog_data(ice, i);
1878 }
1879
1880 if (stage_dirty & (IRIS_STAGE_DIRTY_UNCOMPILED_TCS |
1881 IRIS_STAGE_DIRTY_UNCOMPILED_TES)) {
1882 struct iris_uncompiled_shader *tes =
1883 ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
1884 if (tes) {
1885 iris_update_compiled_tcs(ice);
1886 iris_update_compiled_tes(ice);
1887 } else {
1888 ice->shaders.prog[IRIS_CACHE_TCS] = NULL;
1889 ice->shaders.prog[IRIS_CACHE_TES] = NULL;
1890 ice->state.stage_dirty |=
1891 IRIS_STAGE_DIRTY_TCS | IRIS_STAGE_DIRTY_TES |
1892 IRIS_STAGE_DIRTY_BINDINGS_TCS | IRIS_STAGE_DIRTY_BINDINGS_TES |
1893 IRIS_STAGE_DIRTY_CONSTANTS_TCS | IRIS_STAGE_DIRTY_CONSTANTS_TES;
1894 }
1895 }
1896
1897 if (stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_VS)
1898 iris_update_compiled_vs(ice);
1899 if (stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_GS)
1900 iris_update_compiled_gs(ice);
1901
1902 if (stage_dirty & (IRIS_STAGE_DIRTY_UNCOMPILED_GS |
1903 IRIS_STAGE_DIRTY_UNCOMPILED_TES)) {
1904 const struct iris_compiled_shader *gs =
1905 ice->shaders.prog[MESA_SHADER_GEOMETRY];
1906 const struct iris_compiled_shader *tes =
1907 ice->shaders.prog[MESA_SHADER_TESS_EVAL];
1908
1909 bool points_or_lines = false;
1910
1911 if (gs) {
1912 const struct brw_gs_prog_data *gs_prog_data = (void *) gs->prog_data;
1913 points_or_lines =
1914 gs_prog_data->output_topology == _3DPRIM_POINTLIST ||
1915 gs_prog_data->output_topology == _3DPRIM_LINESTRIP;
1916 } else if (tes) {
1917 const struct brw_tes_prog_data *tes_data = (void *) tes->prog_data;
1918 points_or_lines =
1919 tes_data->output_topology == BRW_TESS_OUTPUT_TOPOLOGY_LINE ||
1920 tes_data->output_topology == BRW_TESS_OUTPUT_TOPOLOGY_POINT;
1921 }
1922
1923 if (ice->shaders.output_topology_is_points_or_lines != points_or_lines) {
1924 /* Outbound to XY Clip enables */
1925 ice->shaders.output_topology_is_points_or_lines = points_or_lines;
1926 ice->state.dirty |= IRIS_DIRTY_CLIP;
1927 }
1928 }
1929
1930 gl_shader_stage last_stage = last_vue_stage(ice);
1931 struct iris_compiled_shader *shader = ice->shaders.prog[last_stage];
1932 struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[last_stage];
1933 update_last_vue_map(ice, shader->prog_data);
1934 if (ice->state.streamout != shader->streamout) {
1935 ice->state.streamout = shader->streamout;
1936 ice->state.dirty |= IRIS_DIRTY_SO_DECL_LIST | IRIS_DIRTY_STREAMOUT;
1937 }
1938
1939 if (ice->state.streamout_active) {
1940 for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
1941 struct iris_stream_output_target *so =
1942 (void *) ice->state.so_target[i];
1943 if (so)
1944 so->stride = ish->stream_output.stride[i] * sizeof(uint32_t);
1945 }
1946 }
1947
1948 if (stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_FS)
1949 iris_update_compiled_fs(ice);
1950
1951 /* Changing shader interfaces may require a URB configuration. */
1952 if (!(dirty & IRIS_DIRTY_URB)) {
1953 for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
1954 struct brw_vue_prog_data *old = old_prog_datas[i];
1955 struct brw_vue_prog_data *new = get_vue_prog_data(ice, i);
1956 if (!!old != !!new ||
1957 (new && new->urb_entry_size != old->urb_entry_size)) {
1958 ice->state.dirty |= IRIS_DIRTY_URB;
1959 break;
1960 }
1961 }
1962 }
1963
1964 for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_FRAGMENT; i++) {
1965 if (ice->state.stage_dirty & (IRIS_STAGE_DIRTY_CONSTANTS_VS << i))
1966 iris_update_pull_constant_descriptors(ice, i);
1967 }
1968 }
1969
1970 static struct iris_compiled_shader *
iris_compile_cs(struct iris_context * ice,struct iris_uncompiled_shader * ish,const struct iris_cs_prog_key * key)1971 iris_compile_cs(struct iris_context *ice,
1972 struct iris_uncompiled_shader *ish,
1973 const struct iris_cs_prog_key *key)
1974 {
1975 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1976 const struct brw_compiler *compiler = screen->compiler;
1977 void *mem_ctx = ralloc_context(NULL);
1978 struct brw_cs_prog_data *cs_prog_data =
1979 rzalloc(mem_ctx, struct brw_cs_prog_data);
1980 struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
1981 enum brw_param_builtin *system_values;
1982 const struct gen_device_info *devinfo = &screen->devinfo;
1983 unsigned num_system_values;
1984 unsigned num_cbufs;
1985
1986 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1987
1988 NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics);
1989
1990 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data,
1991 ish->kernel_input_size,
1992 &system_values, &num_system_values, &num_cbufs);
1993
1994 struct iris_binding_table bt;
1995 iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1996 num_system_values, num_cbufs);
1997
1998 struct brw_cs_prog_key brw_key = iris_to_brw_cs_key(devinfo, key);
1999
2000 char *error_str = NULL;
2001 const unsigned *program =
2002 brw_compile_cs(compiler, &ice->dbg, mem_ctx, &brw_key, cs_prog_data,
2003 nir, -1, NULL, &error_str);
2004 if (program == NULL) {
2005 dbg_printf("Failed to compile compute shader: %s\n", error_str);
2006 ralloc_free(mem_ctx);
2007 return false;
2008 }
2009
2010 if (ish->compiled_once) {
2011 iris_debug_recompile(ice, &nir->info, &brw_key.base);
2012 } else {
2013 ish->compiled_once = true;
2014 }
2015
2016 struct iris_compiled_shader *shader =
2017 iris_upload_shader(ice, IRIS_CACHE_CS, sizeof(*key), key, program,
2018 prog_data, NULL, system_values, num_system_values,
2019 ish->kernel_input_size, num_cbufs, &bt);
2020
2021 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
2022
2023 ralloc_free(mem_ctx);
2024 return shader;
2025 }
2026
2027 static void
iris_update_compiled_cs(struct iris_context * ice)2028 iris_update_compiled_cs(struct iris_context *ice)
2029 {
2030 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_COMPUTE];
2031 struct iris_uncompiled_shader *ish =
2032 ice->shaders.uncompiled[MESA_SHADER_COMPUTE];
2033
2034 struct iris_cs_prog_key key = { KEY_ID(base) };
2035 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2036 screen->vtbl.populate_cs_key(ice, &key);
2037
2038 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_CS];
2039 struct iris_compiled_shader *shader =
2040 iris_find_cached_shader(ice, IRIS_CACHE_CS, sizeof(key), &key);
2041
2042 if (!shader)
2043 shader = iris_disk_cache_retrieve(ice, ish, &key, sizeof(key));
2044
2045 if (!shader)
2046 shader = iris_compile_cs(ice, ish, &key);
2047
2048 if (old != shader) {
2049 ice->shaders.prog[IRIS_CACHE_CS] = shader;
2050 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_CS |
2051 IRIS_STAGE_DIRTY_BINDINGS_CS |
2052 IRIS_STAGE_DIRTY_CONSTANTS_CS;
2053 shs->sysvals_need_upload = true;
2054 }
2055 }
2056
2057 void
iris_update_compiled_compute_shader(struct iris_context * ice)2058 iris_update_compiled_compute_shader(struct iris_context *ice)
2059 {
2060 if (ice->state.stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_CS)
2061 iris_update_compiled_cs(ice);
2062
2063 if (ice->state.stage_dirty & IRIS_STAGE_DIRTY_CONSTANTS_CS)
2064 iris_update_pull_constant_descriptors(ice, MESA_SHADER_COMPUTE);
2065 }
2066
2067 void
iris_fill_cs_push_const_buffer(struct brw_cs_prog_data * cs_prog_data,unsigned threads,uint32_t * dst)2068 iris_fill_cs_push_const_buffer(struct brw_cs_prog_data *cs_prog_data,
2069 unsigned threads,
2070 uint32_t *dst)
2071 {
2072 assert(brw_cs_push_const_total_size(cs_prog_data, threads) > 0);
2073 assert(cs_prog_data->push.cross_thread.size == 0);
2074 assert(cs_prog_data->push.per_thread.dwords == 1);
2075 assert(cs_prog_data->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID);
2076 for (unsigned t = 0; t < threads; t++)
2077 dst[8 * t] = t;
2078 }
2079
2080 /**
2081 * Allocate scratch BOs as needed for the given per-thread size and stage.
2082 */
2083 struct iris_bo *
iris_get_scratch_space(struct iris_context * ice,unsigned per_thread_scratch,gl_shader_stage stage)2084 iris_get_scratch_space(struct iris_context *ice,
2085 unsigned per_thread_scratch,
2086 gl_shader_stage stage)
2087 {
2088 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2089 struct iris_bufmgr *bufmgr = screen->bufmgr;
2090 const struct gen_device_info *devinfo = &screen->devinfo;
2091
2092 unsigned encoded_size = ffs(per_thread_scratch) - 11;
2093 assert(encoded_size < (1 << 16));
2094
2095 struct iris_bo **bop = &ice->shaders.scratch_bos[encoded_size][stage];
2096
2097 /* The documentation for 3DSTATE_PS "Scratch Space Base Pointer" says:
2098 *
2099 * "Scratch Space per slice is computed based on 4 sub-slices. SW
2100 * must allocate scratch space enough so that each slice has 4
2101 * slices allowed."
2102 *
2103 * According to the other driver team, this applies to compute shaders
2104 * as well. This is not currently documented at all.
2105 *
2106 * This hack is no longer necessary on Gen11+.
2107 *
2108 * For, Gen11+, scratch space allocation is based on the number of threads
2109 * in the base configuration.
2110 */
2111 unsigned subslice_total = screen->subslice_total;
2112 if (devinfo->gen == 12)
2113 subslice_total = (devinfo->is_dg1 || devinfo->gt == 2 ? 6 : 2);
2114 else if (devinfo->gen == 11)
2115 subslice_total = 8;
2116 else if (devinfo->gen < 11)
2117 subslice_total = 4 * devinfo->num_slices;
2118 assert(subslice_total >= screen->subslice_total);
2119
2120 if (!*bop) {
2121 unsigned scratch_ids_per_subslice = devinfo->max_cs_threads;
2122
2123 if (devinfo->gen >= 12) {
2124 /* Same as ICL below, but with 16 EUs. */
2125 scratch_ids_per_subslice = 16 * 8;
2126 } else if (devinfo->gen == 11) {
2127 /* The MEDIA_VFE_STATE docs say:
2128 *
2129 * "Starting with this configuration, the Maximum Number of
2130 * Threads must be set to (#EU * 8) for GPGPU dispatches.
2131 *
2132 * Although there are only 7 threads per EU in the configuration,
2133 * the FFTID is calculated as if there are 8 threads per EU,
2134 * which in turn requires a larger amount of Scratch Space to be
2135 * allocated by the driver."
2136 */
2137 scratch_ids_per_subslice = 8 * 8;
2138 }
2139
2140 uint32_t max_threads[] = {
2141 [MESA_SHADER_VERTEX] = devinfo->max_vs_threads,
2142 [MESA_SHADER_TESS_CTRL] = devinfo->max_tcs_threads,
2143 [MESA_SHADER_TESS_EVAL] = devinfo->max_tes_threads,
2144 [MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads,
2145 [MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads,
2146 [MESA_SHADER_COMPUTE] = scratch_ids_per_subslice * subslice_total,
2147 };
2148
2149 uint32_t size = per_thread_scratch * max_threads[stage];
2150
2151 *bop = iris_bo_alloc(bufmgr, "scratch", size, IRIS_MEMZONE_SHADER);
2152 }
2153
2154 return *bop;
2155 }
2156
2157 /* ------------------------------------------------------------------- */
2158
2159 /**
2160 * The pipe->create_[stage]_state() driver hooks.
2161 *
2162 * Performs basic NIR preprocessing, records any state dependencies, and
2163 * returns an iris_uncompiled_shader as the Gallium CSO.
2164 *
2165 * Actual shader compilation to assembly happens later, at first use.
2166 */
2167 static void *
iris_create_uncompiled_shader(struct pipe_context * ctx,nir_shader * nir,const struct pipe_stream_output_info * so_info)2168 iris_create_uncompiled_shader(struct pipe_context *ctx,
2169 nir_shader *nir,
2170 const struct pipe_stream_output_info *so_info)
2171 {
2172 struct iris_screen *screen = (struct iris_screen *)ctx->screen;
2173 const struct gen_device_info *devinfo = &screen->devinfo;
2174
2175 struct iris_uncompiled_shader *ish =
2176 calloc(1, sizeof(struct iris_uncompiled_shader));
2177 if (!ish)
2178 return NULL;
2179
2180 NIR_PASS(ish->needs_edge_flag, nir, iris_fix_edge_flags);
2181
2182 brw_preprocess_nir(screen->compiler, nir, NULL);
2183
2184 NIR_PASS_V(nir, brw_nir_lower_image_load_store, devinfo,
2185 &ish->uses_atomic_load_store);
2186 NIR_PASS_V(nir, iris_lower_storage_image_derefs);
2187
2188 nir_sweep(nir);
2189
2190 ish->program_id = get_new_program_id(screen);
2191 ish->nir = nir;
2192 if (so_info) {
2193 memcpy(&ish->stream_output, so_info, sizeof(*so_info));
2194 update_so_info(&ish->stream_output, nir->info.outputs_written);
2195 }
2196
2197 /* Save this now before potentially dropping nir->info.name */
2198 if (nir->info.name && strncmp(nir->info.name, "ARB", 3) == 0)
2199 ish->use_alt_mode = true;
2200
2201 if (screen->disk_cache) {
2202 /* Serialize the NIR to a binary blob that we can hash for the disk
2203 * cache. Drop unnecessary information (like variable names)
2204 * so the serialized NIR is smaller, and also to let us detect more
2205 * isomorphic shaders when hashing, increasing cache hits.
2206 */
2207 struct blob blob;
2208 blob_init(&blob);
2209 nir_serialize(&blob, nir, true);
2210 _mesa_sha1_compute(blob.data, blob.size, ish->nir_sha1);
2211 blob_finish(&blob);
2212 }
2213
2214 return ish;
2215 }
2216
2217 static struct iris_uncompiled_shader *
iris_create_shader_state(struct pipe_context * ctx,const struct pipe_shader_state * state)2218 iris_create_shader_state(struct pipe_context *ctx,
2219 const struct pipe_shader_state *state)
2220 {
2221 struct nir_shader *nir;
2222
2223 if (state->type == PIPE_SHADER_IR_TGSI)
2224 nir = tgsi_to_nir(state->tokens, ctx->screen, false);
2225 else
2226 nir = state->ir.nir;
2227
2228 return iris_create_uncompiled_shader(ctx, nir, &state->stream_output);
2229 }
2230
2231 static void *
iris_create_vs_state(struct pipe_context * ctx,const struct pipe_shader_state * state)2232 iris_create_vs_state(struct pipe_context *ctx,
2233 const struct pipe_shader_state *state)
2234 {
2235 struct iris_context *ice = (void *) ctx;
2236 struct iris_screen *screen = (void *) ctx->screen;
2237 struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
2238
2239 /* User clip planes */
2240 if (ish->nir->info.clip_distance_array_size == 0)
2241 ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
2242
2243 if (screen->precompile) {
2244 struct iris_vs_prog_key key = { KEY_ID(vue.base) };
2245
2246 if (!iris_disk_cache_retrieve(ice, ish, &key, sizeof(key)))
2247 iris_compile_vs(ice, ish, &key);
2248 }
2249
2250 return ish;
2251 }
2252
2253 static void *
iris_create_tcs_state(struct pipe_context * ctx,const struct pipe_shader_state * state)2254 iris_create_tcs_state(struct pipe_context *ctx,
2255 const struct pipe_shader_state *state)
2256 {
2257 struct iris_context *ice = (void *) ctx;
2258 struct iris_screen *screen = (void *) ctx->screen;
2259 const struct brw_compiler *compiler = screen->compiler;
2260 struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
2261 struct shader_info *info = &ish->nir->info;
2262
2263 if (screen->precompile) {
2264 const unsigned _GL_TRIANGLES = 0x0004;
2265 struct iris_tcs_prog_key key = {
2266 KEY_ID(vue.base),
2267 // XXX: make sure the linker fills this out from the TES...
2268 .tes_primitive_mode =
2269 info->tess.primitive_mode ? info->tess.primitive_mode
2270 : _GL_TRIANGLES,
2271 .outputs_written = info->outputs_written,
2272 .patch_outputs_written = info->patch_outputs_written,
2273 };
2274
2275 /* 8_PATCH mode needs the key to contain the input patch dimensionality.
2276 * We don't have that information, so we randomly guess that the input
2277 * and output patches are the same size. This is a bad guess, but we
2278 * can't do much better.
2279 */
2280 if (compiler->use_tcs_8_patch)
2281 key.input_vertices = info->tess.tcs_vertices_out;
2282
2283 if (!iris_disk_cache_retrieve(ice, ish, &key, sizeof(key)))
2284 iris_compile_tcs(ice, ish, &key);
2285 }
2286
2287 return ish;
2288 }
2289
2290 static void *
iris_create_tes_state(struct pipe_context * ctx,const struct pipe_shader_state * state)2291 iris_create_tes_state(struct pipe_context *ctx,
2292 const struct pipe_shader_state *state)
2293 {
2294 struct iris_context *ice = (void *) ctx;
2295 struct iris_screen *screen = (void *) ctx->screen;
2296 struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
2297 struct shader_info *info = &ish->nir->info;
2298
2299 /* User clip planes */
2300 if (ish->nir->info.clip_distance_array_size == 0)
2301 ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
2302
2303 if (screen->precompile) {
2304 struct iris_tes_prog_key key = {
2305 KEY_ID(vue.base),
2306 // XXX: not ideal, need TCS output/TES input unification
2307 .inputs_read = info->inputs_read,
2308 .patch_inputs_read = info->patch_inputs_read,
2309 };
2310
2311 if (!iris_disk_cache_retrieve(ice, ish, &key, sizeof(key)))
2312 iris_compile_tes(ice, ish, &key);
2313 }
2314
2315 return ish;
2316 }
2317
2318 static void *
iris_create_gs_state(struct pipe_context * ctx,const struct pipe_shader_state * state)2319 iris_create_gs_state(struct pipe_context *ctx,
2320 const struct pipe_shader_state *state)
2321 {
2322 struct iris_context *ice = (void *) ctx;
2323 struct iris_screen *screen = (void *) ctx->screen;
2324 struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
2325
2326 /* User clip planes */
2327 if (ish->nir->info.clip_distance_array_size == 0)
2328 ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
2329
2330 if (screen->precompile) {
2331 struct iris_gs_prog_key key = { KEY_ID(vue.base) };
2332
2333 if (!iris_disk_cache_retrieve(ice, ish, &key, sizeof(key)))
2334 iris_compile_gs(ice, ish, &key);
2335 }
2336
2337 return ish;
2338 }
2339
2340 static void *
iris_create_fs_state(struct pipe_context * ctx,const struct pipe_shader_state * state)2341 iris_create_fs_state(struct pipe_context *ctx,
2342 const struct pipe_shader_state *state)
2343 {
2344 struct iris_context *ice = (void *) ctx;
2345 struct iris_screen *screen = (void *) ctx->screen;
2346 struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
2347 struct shader_info *info = &ish->nir->info;
2348
2349 ish->nos |= (1ull << IRIS_NOS_FRAMEBUFFER) |
2350 (1ull << IRIS_NOS_DEPTH_STENCIL_ALPHA) |
2351 (1ull << IRIS_NOS_RASTERIZER) |
2352 (1ull << IRIS_NOS_BLEND);
2353
2354 /* The program key needs the VUE map if there are > 16 inputs */
2355 if (util_bitcount64(ish->nir->info.inputs_read &
2356 BRW_FS_VARYING_INPUT_MASK) > 16) {
2357 ish->nos |= (1ull << IRIS_NOS_LAST_VUE_MAP);
2358 }
2359
2360 if (screen->precompile) {
2361 const uint64_t color_outputs = info->outputs_written &
2362 ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) |
2363 BITFIELD64_BIT(FRAG_RESULT_STENCIL) |
2364 BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK));
2365
2366 bool can_rearrange_varyings =
2367 util_bitcount64(info->inputs_read & BRW_FS_VARYING_INPUT_MASK) <= 16;
2368
2369 const struct gen_device_info *devinfo = &screen->devinfo;
2370 struct iris_fs_prog_key key = {
2371 KEY_ID(base),
2372 .nr_color_regions = util_bitcount(color_outputs),
2373 .coherent_fb_fetch = devinfo->gen >= 9,
2374 .input_slots_valid =
2375 can_rearrange_varyings ? 0 : info->inputs_read | VARYING_BIT_POS,
2376 };
2377
2378 if (!iris_disk_cache_retrieve(ice, ish, &key, sizeof(key)))
2379 iris_compile_fs(ice, ish, &key, NULL);
2380 }
2381
2382 return ish;
2383 }
2384
2385 static void *
iris_create_compute_state(struct pipe_context * ctx,const struct pipe_compute_state * state)2386 iris_create_compute_state(struct pipe_context *ctx,
2387 const struct pipe_compute_state *state)
2388 {
2389 struct iris_context *ice = (void *) ctx;
2390 struct iris_screen *screen = (void *) ctx->screen;
2391 const nir_shader_compiler_options *options =
2392 screen->compiler->glsl_compiler_options[MESA_SHADER_COMPUTE].NirOptions;
2393
2394 nir_shader *nir;
2395 switch (state->ir_type) {
2396 case PIPE_SHADER_IR_NIR:
2397 nir = (void *)state->prog;
2398 break;
2399
2400 case PIPE_SHADER_IR_NIR_SERIALIZED: {
2401 struct blob_reader reader;
2402 const struct pipe_binary_program_header *hdr = state->prog;
2403 blob_reader_init(&reader, hdr->blob, hdr->num_bytes);
2404 nir = nir_deserialize(NULL, options, &reader);
2405 break;
2406 }
2407
2408 default:
2409 unreachable("Unsupported IR");
2410 }
2411
2412 /* Most of iris doesn't really care about the difference between compute
2413 * shaders and kernels. We also tend to hard-code COMPUTE everywhere so
2414 * it's way easier if we just normalize to COMPUTE here.
2415 */
2416 assert(nir->info.stage == MESA_SHADER_COMPUTE ||
2417 nir->info.stage == MESA_SHADER_KERNEL);
2418 nir->info.stage = MESA_SHADER_COMPUTE;
2419
2420 struct iris_uncompiled_shader *ish =
2421 iris_create_uncompiled_shader(ctx, nir, NULL);
2422 ish->kernel_input_size = state->req_input_mem;
2423 ish->kernel_shared_size = state->req_local_mem;
2424
2425 // XXX: disallow more than 64KB of shared variables
2426
2427 if (screen->precompile) {
2428 struct iris_cs_prog_key key = { KEY_ID(base) };
2429
2430 if (!iris_disk_cache_retrieve(ice, ish, &key, sizeof(key)))
2431 iris_compile_cs(ice, ish, &key);
2432 }
2433
2434 return ish;
2435 }
2436
2437 /**
2438 * The pipe->delete_[stage]_state() driver hooks.
2439 *
2440 * Frees the iris_uncompiled_shader.
2441 */
2442 static void
iris_delete_shader_state(struct pipe_context * ctx,void * state,gl_shader_stage stage)2443 iris_delete_shader_state(struct pipe_context *ctx, void *state, gl_shader_stage stage)
2444 {
2445 struct iris_uncompiled_shader *ish = state;
2446 struct iris_context *ice = (void *) ctx;
2447
2448 if (ice->shaders.uncompiled[stage] == ish) {
2449 ice->shaders.uncompiled[stage] = NULL;
2450 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_UNCOMPILED_VS << stage;
2451 }
2452
2453 iris_delete_shader_variants(ice, ish);
2454
2455 ralloc_free(ish->nir);
2456 free(ish);
2457 }
2458
2459 static void
iris_delete_vs_state(struct pipe_context * ctx,void * state)2460 iris_delete_vs_state(struct pipe_context *ctx, void *state)
2461 {
2462 iris_delete_shader_state(ctx, state, MESA_SHADER_VERTEX);
2463 }
2464
2465 static void
iris_delete_tcs_state(struct pipe_context * ctx,void * state)2466 iris_delete_tcs_state(struct pipe_context *ctx, void *state)
2467 {
2468 iris_delete_shader_state(ctx, state, MESA_SHADER_TESS_CTRL);
2469 }
2470
2471 static void
iris_delete_tes_state(struct pipe_context * ctx,void * state)2472 iris_delete_tes_state(struct pipe_context *ctx, void *state)
2473 {
2474 iris_delete_shader_state(ctx, state, MESA_SHADER_TESS_EVAL);
2475 }
2476
2477 static void
iris_delete_gs_state(struct pipe_context * ctx,void * state)2478 iris_delete_gs_state(struct pipe_context *ctx, void *state)
2479 {
2480 iris_delete_shader_state(ctx, state, MESA_SHADER_GEOMETRY);
2481 }
2482
2483 static void
iris_delete_fs_state(struct pipe_context * ctx,void * state)2484 iris_delete_fs_state(struct pipe_context *ctx, void *state)
2485 {
2486 iris_delete_shader_state(ctx, state, MESA_SHADER_FRAGMENT);
2487 }
2488
2489 static void
iris_delete_cs_state(struct pipe_context * ctx,void * state)2490 iris_delete_cs_state(struct pipe_context *ctx, void *state)
2491 {
2492 iris_delete_shader_state(ctx, state, MESA_SHADER_COMPUTE);
2493 }
2494
2495 /**
2496 * The pipe->bind_[stage]_state() driver hook.
2497 *
2498 * Binds an uncompiled shader as the current one for a particular stage.
2499 * Updates dirty tracking to account for the shader's NOS.
2500 */
2501 static void
bind_shader_state(struct iris_context * ice,struct iris_uncompiled_shader * ish,gl_shader_stage stage)2502 bind_shader_state(struct iris_context *ice,
2503 struct iris_uncompiled_shader *ish,
2504 gl_shader_stage stage)
2505 {
2506 uint64_t stage_dirty_bit = IRIS_STAGE_DIRTY_UNCOMPILED_VS << stage;
2507 const uint64_t nos = ish ? ish->nos : 0;
2508
2509 const struct shader_info *old_info = iris_get_shader_info(ice, stage);
2510 const struct shader_info *new_info = ish ? &ish->nir->info : NULL;
2511
2512 if ((old_info ? util_last_bit(old_info->textures_used) : 0) !=
2513 (new_info ? util_last_bit(new_info->textures_used) : 0)) {
2514 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_SAMPLER_STATES_VS << stage;
2515 }
2516
2517 ice->shaders.uncompiled[stage] = ish;
2518 ice->state.stage_dirty |= stage_dirty_bit;
2519
2520 /* Record that CSOs need to mark IRIS_DIRTY_UNCOMPILED_XS when they change
2521 * (or that they no longer need to do so).
2522 */
2523 for (int i = 0; i < IRIS_NOS_COUNT; i++) {
2524 if (nos & (1 << i))
2525 ice->state.stage_dirty_for_nos[i] |= stage_dirty_bit;
2526 else
2527 ice->state.stage_dirty_for_nos[i] &= ~stage_dirty_bit;
2528 }
2529 }
2530
2531 static void
iris_bind_vs_state(struct pipe_context * ctx,void * state)2532 iris_bind_vs_state(struct pipe_context *ctx, void *state)
2533 {
2534 struct iris_context *ice = (struct iris_context *)ctx;
2535 struct iris_uncompiled_shader *new_ish = state;
2536
2537 if (new_ish &&
2538 ice->state.window_space_position !=
2539 new_ish->nir->info.vs.window_space_position) {
2540 ice->state.window_space_position =
2541 new_ish->nir->info.vs.window_space_position;
2542
2543 ice->state.dirty |= IRIS_DIRTY_CLIP |
2544 IRIS_DIRTY_RASTER |
2545 IRIS_DIRTY_CC_VIEWPORT;
2546 }
2547
2548 bind_shader_state((void *) ctx, state, MESA_SHADER_VERTEX);
2549 }
2550
2551 static void
iris_bind_tcs_state(struct pipe_context * ctx,void * state)2552 iris_bind_tcs_state(struct pipe_context *ctx, void *state)
2553 {
2554 bind_shader_state((void *) ctx, state, MESA_SHADER_TESS_CTRL);
2555 }
2556
2557 static void
iris_bind_tes_state(struct pipe_context * ctx,void * state)2558 iris_bind_tes_state(struct pipe_context *ctx, void *state)
2559 {
2560 struct iris_context *ice = (struct iris_context *)ctx;
2561
2562 /* Enabling/disabling optional stages requires a URB reconfiguration. */
2563 if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])
2564 ice->state.dirty |= IRIS_DIRTY_URB;
2565
2566 bind_shader_state((void *) ctx, state, MESA_SHADER_TESS_EVAL);
2567 }
2568
2569 static void
iris_bind_gs_state(struct pipe_context * ctx,void * state)2570 iris_bind_gs_state(struct pipe_context *ctx, void *state)
2571 {
2572 struct iris_context *ice = (struct iris_context *)ctx;
2573
2574 /* Enabling/disabling optional stages requires a URB reconfiguration. */
2575 if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])
2576 ice->state.dirty |= IRIS_DIRTY_URB;
2577
2578 bind_shader_state((void *) ctx, state, MESA_SHADER_GEOMETRY);
2579 }
2580
2581 static void
iris_bind_fs_state(struct pipe_context * ctx,void * state)2582 iris_bind_fs_state(struct pipe_context *ctx, void *state)
2583 {
2584 struct iris_context *ice = (struct iris_context *) ctx;
2585 struct iris_screen *screen = (struct iris_screen *) ctx->screen;
2586 const struct gen_device_info *devinfo = &screen->devinfo;
2587 struct iris_uncompiled_shader *old_ish =
2588 ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
2589 struct iris_uncompiled_shader *new_ish = state;
2590
2591 const unsigned color_bits =
2592 BITFIELD64_BIT(FRAG_RESULT_COLOR) |
2593 BITFIELD64_RANGE(FRAG_RESULT_DATA0, BRW_MAX_DRAW_BUFFERS);
2594
2595 /* Fragment shader outputs influence HasWriteableRT */
2596 if (!old_ish || !new_ish ||
2597 (old_ish->nir->info.outputs_written & color_bits) !=
2598 (new_ish->nir->info.outputs_written & color_bits))
2599 ice->state.dirty |= IRIS_DIRTY_PS_BLEND;
2600
2601 if (devinfo->gen == 8)
2602 ice->state.dirty |= IRIS_DIRTY_PMA_FIX;
2603
2604 bind_shader_state((void *) ctx, state, MESA_SHADER_FRAGMENT);
2605 }
2606
2607 static void
iris_bind_cs_state(struct pipe_context * ctx,void * state)2608 iris_bind_cs_state(struct pipe_context *ctx, void *state)
2609 {
2610 bind_shader_state((void *) ctx, state, MESA_SHADER_COMPUTE);
2611 }
2612
2613 void
iris_init_program_functions(struct pipe_context * ctx)2614 iris_init_program_functions(struct pipe_context *ctx)
2615 {
2616 ctx->create_vs_state = iris_create_vs_state;
2617 ctx->create_tcs_state = iris_create_tcs_state;
2618 ctx->create_tes_state = iris_create_tes_state;
2619 ctx->create_gs_state = iris_create_gs_state;
2620 ctx->create_fs_state = iris_create_fs_state;
2621 ctx->create_compute_state = iris_create_compute_state;
2622
2623 ctx->delete_vs_state = iris_delete_vs_state;
2624 ctx->delete_tcs_state = iris_delete_tcs_state;
2625 ctx->delete_tes_state = iris_delete_tes_state;
2626 ctx->delete_gs_state = iris_delete_gs_state;
2627 ctx->delete_fs_state = iris_delete_fs_state;
2628 ctx->delete_compute_state = iris_delete_cs_state;
2629
2630 ctx->bind_vs_state = iris_bind_vs_state;
2631 ctx->bind_tcs_state = iris_bind_tcs_state;
2632 ctx->bind_tes_state = iris_bind_tes_state;
2633 ctx->bind_gs_state = iris_bind_gs_state;
2634 ctx->bind_fs_state = iris_bind_fs_state;
2635 ctx->bind_compute_state = iris_bind_cs_state;
2636 }
2637