1 /*
2 * Copyright © Microsoft Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "d3d12_nir_passes.h"
25 #include "d3d12_compiler.h"
26 #include "nir_builder.h"
27 #include "nir_builtin_builder.h"
28 #include "nir_deref.h"
29 #include "nir_format_convert.h"
30 #include "program/prog_instruction.h"
31 #include "dxil_nir.h"
32
33 /**
34 * Lower Y Flip:
35 *
36 * We can't do a Y flip simply by negating the viewport height,
37 * so we need to lower the flip into the NIR shader.
38 */
39
40 nir_def *
d3d12_get_state_var(nir_builder * b,enum d3d12_state_var var_enum,const char * var_name,const struct glsl_type * var_type,nir_variable ** out_var)41 d3d12_get_state_var(nir_builder *b,
42 enum d3d12_state_var var_enum,
43 const char *var_name,
44 const struct glsl_type *var_type,
45 nir_variable **out_var)
46 {
47 const gl_state_index16 tokens[STATE_LENGTH] = { STATE_INTERNAL_DRIVER, var_enum };
48 if (*out_var == NULL) {
49 nir_variable *var = nir_state_variable_create(b->shader, var_type,
50 var_name, tokens);
51 var->data.how_declared = nir_var_hidden;
52 *out_var = var;
53 }
54 return nir_load_var(b, *out_var);
55 }
56
57 static void
lower_pos_write(nir_builder * b,struct nir_instr * instr,nir_variable ** flip)58 lower_pos_write(nir_builder *b, struct nir_instr *instr, nir_variable **flip)
59 {
60 if (instr->type != nir_instr_type_intrinsic)
61 return;
62
63 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
64 if (intr->intrinsic != nir_intrinsic_store_deref)
65 return;
66
67 nir_variable *var = nir_intrinsic_get_var(intr, 0);
68 if (var->data.mode != nir_var_shader_out ||
69 var->data.location != VARYING_SLOT_POS)
70 return;
71
72 b->cursor = nir_before_instr(&intr->instr);
73
74 nir_def *pos = intr->src[1].ssa;
75 nir_def *flip_y = d3d12_get_state_var(b, D3D12_STATE_VAR_Y_FLIP, "d3d12_FlipY",
76 glsl_float_type(), flip);
77 nir_def *def = nir_vec4(b,
78 nir_channel(b, pos, 0),
79 nir_fmul(b, nir_channel(b, pos, 1), flip_y),
80 nir_channel(b, pos, 2),
81 nir_channel(b, pos, 3));
82 nir_src_rewrite(intr->src + 1, def);
83 }
84
85 void
d3d12_lower_yflip(nir_shader * nir)86 d3d12_lower_yflip(nir_shader *nir)
87 {
88 nir_variable *flip = NULL;
89
90 if (nir->info.stage != MESA_SHADER_VERTEX &&
91 nir->info.stage != MESA_SHADER_TESS_EVAL &&
92 nir->info.stage != MESA_SHADER_GEOMETRY)
93 return;
94
95 nir_foreach_function_impl(impl, nir) {
96 nir_builder b = nir_builder_create(impl);
97
98 nir_foreach_block(block, impl) {
99 nir_foreach_instr_safe(instr, block) {
100 lower_pos_write(&b, instr, &flip);
101 }
102 }
103
104 nir_metadata_preserve(impl, nir_metadata_block_index |
105 nir_metadata_dominance);
106 }
107 }
108
109 static void
lower_pos_read(nir_builder * b,struct nir_instr * instr,nir_variable ** depth_transform_var)110 lower_pos_read(nir_builder *b, struct nir_instr *instr,
111 nir_variable **depth_transform_var)
112 {
113 if (instr->type != nir_instr_type_intrinsic)
114 return;
115
116 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
117 if (intr->intrinsic != nir_intrinsic_load_deref)
118 return;
119
120 nir_variable *var = nir_intrinsic_get_var(intr, 0);
121 if (var->data.mode != nir_var_shader_in ||
122 var->data.location != VARYING_SLOT_POS)
123 return;
124
125 b->cursor = nir_after_instr(instr);
126
127 nir_def *pos = nir_instr_def(instr);
128 nir_def *depth = nir_channel(b, pos, 2);
129
130 assert(depth_transform_var);
131 nir_def *depth_transform = d3d12_get_state_var(b, D3D12_STATE_VAR_DEPTH_TRANSFORM,
132 "d3d12_DepthTransform",
133 glsl_vec_type(2),
134 depth_transform_var);
135 depth = nir_fmad(b, depth, nir_channel(b, depth_transform, 0),
136 nir_channel(b, depth_transform, 1));
137
138 pos = nir_vector_insert_imm(b, pos, depth, 2);
139
140 nir_def_rewrite_uses_after(&intr->def, pos,
141 pos->parent_instr);
142 }
143
144 void
d3d12_lower_depth_range(nir_shader * nir)145 d3d12_lower_depth_range(nir_shader *nir)
146 {
147 assert(nir->info.stage == MESA_SHADER_FRAGMENT);
148 nir_variable *depth_transform = NULL;
149 nir_foreach_function_impl(impl, nir) {
150 nir_builder b = nir_builder_create(impl);
151
152 nir_foreach_block(block, impl) {
153 nir_foreach_instr_safe(instr, block) {
154 lower_pos_read(&b, instr, &depth_transform);
155 }
156 }
157
158 nir_metadata_preserve(impl, nir_metadata_block_index |
159 nir_metadata_dominance);
160 }
161 }
162
163 struct compute_state_vars {
164 nir_variable *num_workgroups;
165 };
166
167 static bool
lower_compute_state_vars(nir_builder * b,nir_instr * instr,void * _state)168 lower_compute_state_vars(nir_builder *b, nir_instr *instr, void *_state)
169 {
170 if (instr->type != nir_instr_type_intrinsic)
171 return false;
172
173 b->cursor = nir_after_instr(instr);
174 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
175 struct compute_state_vars *vars = _state;
176 nir_def *result = NULL;
177 switch (intr->intrinsic) {
178 case nir_intrinsic_load_num_workgroups:
179 result = d3d12_get_state_var(b, D3D12_STATE_VAR_NUM_WORKGROUPS, "d3d12_NumWorkgroups",
180 glsl_vec_type(3), &vars->num_workgroups);
181 break;
182 default:
183 return false;
184 }
185
186 nir_def_rewrite_uses(&intr->def, result);
187 nir_instr_remove(instr);
188 return true;
189 }
190
191 bool
d3d12_lower_compute_state_vars(nir_shader * nir)192 d3d12_lower_compute_state_vars(nir_shader *nir)
193 {
194 assert(nir->info.stage == MESA_SHADER_COMPUTE);
195 struct compute_state_vars vars = { 0 };
196 return nir_shader_instructions_pass(nir, lower_compute_state_vars,
197 nir_metadata_block_index | nir_metadata_dominance, &vars);
198 }
199
200 static bool
is_color_output(nir_variable * var)201 is_color_output(nir_variable *var)
202 {
203 return (var->data.mode == nir_var_shader_out &&
204 (var->data.location == FRAG_RESULT_COLOR ||
205 var->data.location >= FRAG_RESULT_DATA0));
206 }
207
208 static void
lower_uint_color_write(nir_builder * b,struct nir_instr * instr,bool is_signed)209 lower_uint_color_write(nir_builder *b, struct nir_instr *instr, bool is_signed)
210 {
211 const unsigned NUM_BITS = 8;
212 const unsigned bits[4] = { NUM_BITS, NUM_BITS, NUM_BITS, NUM_BITS };
213
214 if (instr->type != nir_instr_type_intrinsic)
215 return;
216
217 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
218 if (intr->intrinsic != nir_intrinsic_store_deref)
219 return;
220
221 nir_variable *var = nir_intrinsic_get_var(intr, 0);
222 if (!is_color_output(var))
223 return;
224
225 b->cursor = nir_before_instr(&intr->instr);
226
227 nir_def *col = intr->src[1].ssa;
228 nir_def *def = is_signed ? nir_format_float_to_snorm(b, col, bits) :
229 nir_format_float_to_unorm(b, col, bits);
230 if (is_signed)
231 def = nir_bcsel(b, nir_ilt_imm(b, def, 0),
232 nir_iadd_imm(b, def, 1ull << NUM_BITS),
233 def);
234 nir_src_rewrite(intr->src + 1, def);
235 }
236
237 void
d3d12_lower_uint_cast(nir_shader * nir,bool is_signed)238 d3d12_lower_uint_cast(nir_shader *nir, bool is_signed)
239 {
240 if (nir->info.stage != MESA_SHADER_FRAGMENT)
241 return;
242
243 nir_foreach_function_impl(impl, nir) {
244 nir_builder b = nir_builder_create(impl);
245
246 nir_foreach_block(block, impl) {
247 nir_foreach_instr_safe(instr, block) {
248 lower_uint_color_write(&b, instr, is_signed);
249 }
250 }
251
252 nir_metadata_preserve(impl, nir_metadata_block_index |
253 nir_metadata_dominance);
254 }
255 }
256
257 static bool
lower_load_draw_params(nir_builder * b,nir_intrinsic_instr * intr,void * draw_params)258 lower_load_draw_params(nir_builder *b, nir_intrinsic_instr *intr,
259 void *draw_params)
260 {
261 if (intr->intrinsic != nir_intrinsic_load_first_vertex &&
262 intr->intrinsic != nir_intrinsic_load_base_instance &&
263 intr->intrinsic != nir_intrinsic_load_draw_id &&
264 intr->intrinsic != nir_intrinsic_load_is_indexed_draw)
265 return false;
266
267 b->cursor = nir_before_instr(&intr->instr);
268
269 nir_def *load = d3d12_get_state_var(b, D3D12_STATE_VAR_DRAW_PARAMS, "d3d12_DrawParams",
270 glsl_uvec4_type(), draw_params);
271 unsigned channel = intr->intrinsic == nir_intrinsic_load_first_vertex ? 0 :
272 intr->intrinsic == nir_intrinsic_load_base_instance ? 1 :
273 intr->intrinsic == nir_intrinsic_load_draw_id ? 2 : 3;
274 nir_def_rewrite_uses(&intr->def, nir_channel(b, load, channel));
275 nir_instr_remove(&intr->instr);
276
277 return true;
278 }
279
280 bool
d3d12_lower_load_draw_params(struct nir_shader * nir)281 d3d12_lower_load_draw_params(struct nir_shader *nir)
282 {
283 nir_variable *draw_params = NULL;
284 if (nir->info.stage != MESA_SHADER_VERTEX)
285 return false;
286
287 return nir_shader_intrinsics_pass(nir, lower_load_draw_params,
288 nir_metadata_block_index | nir_metadata_dominance,
289 &draw_params);
290 }
291
292 static bool
lower_load_patch_vertices_in(nir_builder * b,nir_intrinsic_instr * intr,void * _state)293 lower_load_patch_vertices_in(nir_builder *b, nir_intrinsic_instr *intr,
294 void *_state)
295 {
296 if (intr->intrinsic != nir_intrinsic_load_patch_vertices_in)
297 return false;
298
299 b->cursor = nir_before_instr(&intr->instr);
300 nir_def *load = b->shader->info.stage == MESA_SHADER_TESS_CTRL ?
301 d3d12_get_state_var(b, D3D12_STATE_VAR_PATCH_VERTICES_IN, "d3d12_FirstVertex", glsl_uint_type(), _state) :
302 nir_imm_int(b, b->shader->info.tess.tcs_vertices_out);
303 nir_def_rewrite_uses(&intr->def, load);
304 nir_instr_remove(&intr->instr);
305 return true;
306 }
307
308 bool
d3d12_lower_load_patch_vertices_in(struct nir_shader * nir)309 d3d12_lower_load_patch_vertices_in(struct nir_shader *nir)
310 {
311 nir_variable *var = NULL;
312
313 if (nir->info.stage != MESA_SHADER_TESS_CTRL &&
314 nir->info.stage != MESA_SHADER_TESS_EVAL)
315 return false;
316
317 return nir_shader_intrinsics_pass(nir, lower_load_patch_vertices_in,
318 nir_metadata_block_index | nir_metadata_dominance,
319 &var);
320 }
321
322 struct invert_depth_state
323 {
324 unsigned viewport_mask;
325 bool clip_halfz;
326 nir_def *viewport_index;
327 nir_instr *store_pos_instr;
328 };
329
330 static void
invert_depth_impl(nir_builder * b,struct invert_depth_state * state)331 invert_depth_impl(nir_builder *b, struct invert_depth_state *state)
332 {
333 assert(state->store_pos_instr);
334
335 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(state->store_pos_instr);
336 if (state->viewport_index) {
337 /* Cursor is assigned before calling. Make sure that storing pos comes
338 * after computing the viewport.
339 */
340 nir_instr_move(b->cursor, &intr->instr);
341 }
342
343 b->cursor = nir_before_instr(&intr->instr);
344
345 nir_def *pos = intr->src[1].ssa;
346
347 if (state->viewport_index) {
348 nir_push_if(b, nir_test_mask(b, nir_ishl(b, nir_imm_int(b, 1), state->viewport_index), state->viewport_mask));
349 }
350 nir_def *old_depth = nir_channel(b, pos, 2);
351 nir_def *new_depth = nir_fneg(b, old_depth);
352 if (state->clip_halfz)
353 new_depth = nir_fadd_imm(b, new_depth, 1.0);
354 nir_def *def = nir_vec4(b,
355 nir_channel(b, pos, 0),
356 nir_channel(b, pos, 1),
357 new_depth,
358 nir_channel(b, pos, 3));
359 if (state->viewport_index) {
360 nir_pop_if(b, NULL);
361 def = nir_if_phi(b, def, pos);
362 }
363 nir_src_rewrite(intr->src + 1, def);
364
365 state->viewport_index = NULL;
366 state->store_pos_instr = NULL;
367 }
368
369 static void
invert_depth_instr(nir_builder * b,struct nir_instr * instr,struct invert_depth_state * state)370 invert_depth_instr(nir_builder *b, struct nir_instr *instr, struct invert_depth_state *state)
371 {
372 if (instr->type != nir_instr_type_intrinsic)
373 return;
374
375 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
376 if (intr->intrinsic == nir_intrinsic_store_deref) {
377 nir_variable *var = nir_intrinsic_get_var(intr, 0);
378 if (var->data.mode != nir_var_shader_out)
379 return;
380
381 if (var->data.location == VARYING_SLOT_VIEWPORT)
382 state->viewport_index = intr->src[1].ssa;
383 if (var->data.location == VARYING_SLOT_POS)
384 state->store_pos_instr = instr;
385 } else if (intr->intrinsic == nir_intrinsic_emit_vertex) {
386 b->cursor = nir_before_instr(instr);
387 invert_depth_impl(b, state);
388 }
389 }
390
391 /* In OpenGL the windows space depth value z_w is evaluated according to "s * z_d + b"
392 * with "s = (far - near) / 2" (depth clip:minus_one_to_one) [OpenGL 3.3, 2.13.1].
393 * When we switch the far and near value to satisfy DirectX requirements we have
394 * to compensate by inverting "z_d' = -z_d" with this lowering pass.
395 * When depth clip is set zero_to_one, we compensate with "z_d' = 1.0f - z_d" instead.
396 */
397 void
d3d12_nir_invert_depth(nir_shader * shader,unsigned viewport_mask,bool clip_halfz)398 d3d12_nir_invert_depth(nir_shader *shader, unsigned viewport_mask, bool clip_halfz)
399 {
400 if (shader->info.stage != MESA_SHADER_VERTEX &&
401 shader->info.stage != MESA_SHADER_TESS_EVAL &&
402 shader->info.stage != MESA_SHADER_GEOMETRY)
403 return;
404
405 struct invert_depth_state state = { viewport_mask, clip_halfz };
406 nir_foreach_function_impl(impl, shader) {
407 nir_builder b = nir_builder_create(impl);
408
409 nir_foreach_block(block, impl) {
410 nir_foreach_instr_safe(instr, block) {
411 invert_depth_instr(&b, instr, &state);
412 }
413 }
414
415 if (state.store_pos_instr) {
416 b.cursor = nir_after_block(impl->end_block);
417 invert_depth_impl(&b, &state);
418 }
419
420 nir_metadata_preserve(impl, nir_metadata_block_index |
421 nir_metadata_dominance);
422 }
423 }
424
425
426 /**
427 * Lower State Vars:
428 *
429 * All uniforms related to internal D3D12 variables are
430 * condensed into a UBO that is appended at the end of the
431 * current ones.
432 */
433
434 static unsigned
get_state_var_offset(struct d3d12_shader * shader,enum d3d12_state_var var)435 get_state_var_offset(struct d3d12_shader *shader, enum d3d12_state_var var)
436 {
437 for (unsigned i = 0; i < shader->num_state_vars; ++i) {
438 if (shader->state_vars[i].var == var)
439 return shader->state_vars[i].offset;
440 }
441
442 unsigned offset = shader->state_vars_size;
443 shader->state_vars[shader->num_state_vars].offset = offset;
444 shader->state_vars[shader->num_state_vars].var = var;
445 shader->state_vars_size += 4; /* Use 4-words slots no matter the variable size */
446 shader->num_state_vars++;
447
448 return offset;
449 }
450
451 static bool
lower_instr(nir_intrinsic_instr * instr,nir_builder * b,struct d3d12_shader * shader,unsigned binding)452 lower_instr(nir_intrinsic_instr *instr, nir_builder *b,
453 struct d3d12_shader *shader, unsigned binding)
454 {
455 nir_variable *variable = NULL;
456 nir_deref_instr *deref = NULL;
457
458 b->cursor = nir_before_instr(&instr->instr);
459
460 if (instr->intrinsic == nir_intrinsic_load_uniform) {
461 nir_foreach_variable_with_modes(var, b->shader, nir_var_uniform) {
462 if (var->data.driver_location == nir_intrinsic_base(instr)) {
463 variable = var;
464 break;
465 }
466 }
467 } else if (instr->intrinsic == nir_intrinsic_load_deref) {
468 deref = nir_src_as_deref(instr->src[0]);
469 variable = nir_intrinsic_get_var(instr, 0);
470 }
471
472 if (variable == NULL ||
473 variable->num_state_slots != 1 ||
474 variable->state_slots[0].tokens[0] != STATE_INTERNAL_DRIVER)
475 return false;
476
477 enum d3d12_state_var var = variable->state_slots[0].tokens[1];
478 nir_def *ubo_idx = nir_imm_int(b, binding);
479 nir_def *ubo_offset = nir_imm_int(b, get_state_var_offset(shader, var) * 4);
480 nir_def *load =
481 nir_load_ubo(b, instr->num_components, instr->def.bit_size,
482 ubo_idx, ubo_offset,
483 .align_mul = 16,
484 .align_offset = 0,
485 .range_base = 0,
486 .range = ~0,
487 );
488
489 nir_def_rewrite_uses(&instr->def, load);
490
491 /* Remove the old load_* instruction and any parent derefs */
492 nir_instr_remove(&instr->instr);
493 for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
494 /* If anyone is using this deref, leave it alone */
495 if (!list_is_empty(&d->def.uses))
496 break;
497
498 nir_instr_remove(&d->instr);
499 }
500
501 return true;
502 }
503
504 bool
d3d12_lower_state_vars(nir_shader * nir,struct d3d12_shader * shader)505 d3d12_lower_state_vars(nir_shader *nir, struct d3d12_shader *shader)
506 {
507 bool progress = false;
508
509 /* The state var UBO is added after all the other UBOs if it already
510 * exists it will be replaced by using the same binding.
511 * In the event there are no other UBO's, use binding slot 1 to
512 * be consistent with other non-default UBO's */
513 unsigned binding = MAX2(nir->info.num_ubos, 1);
514
515 nir_foreach_variable_with_modes_safe(var, nir, nir_var_uniform) {
516 if (var->num_state_slots == 1 &&
517 var->state_slots[0].tokens[0] == STATE_INTERNAL_DRIVER) {
518 if (var->data.mode == nir_var_mem_ubo) {
519 binding = var->data.binding;
520 }
521 }
522 }
523
524 nir_foreach_function_impl(impl, nir) {
525 nir_builder builder = nir_builder_create(impl);
526 nir_foreach_block(block, impl) {
527 nir_foreach_instr_safe(instr, block) {
528 if (instr->type == nir_instr_type_intrinsic)
529 progress |= lower_instr(nir_instr_as_intrinsic(instr),
530 &builder,
531 shader,
532 binding);
533 }
534 }
535
536 nir_metadata_preserve(impl, nir_metadata_block_index |
537 nir_metadata_dominance);
538 }
539
540 if (progress) {
541 assert(shader->num_state_vars > 0);
542
543 shader->state_vars_used = true;
544
545 /* Remove state variables */
546 nir_foreach_variable_with_modes_safe(var, nir, nir_var_uniform) {
547 if (var->num_state_slots == 1 &&
548 var->state_slots[0].tokens[0] == STATE_INTERNAL_DRIVER) {
549 exec_node_remove(&var->node);
550 nir->num_uniforms--;
551 }
552 }
553
554 const gl_state_index16 tokens[STATE_LENGTH] = { STATE_INTERNAL_DRIVER };
555 const struct glsl_type *type = glsl_array_type(glsl_vec4_type(),
556 shader->state_vars_size / 4, 0);
557 nir_variable *ubo = nir_variable_create(nir, nir_var_mem_ubo, type,
558 "d3d12_state_vars");
559 if (binding >= nir->info.num_ubos)
560 nir->info.num_ubos = binding + 1;
561 ubo->data.binding = binding;
562 ubo->num_state_slots = 1;
563 ubo->state_slots = ralloc_array(ubo, nir_state_slot, 1);
564 memcpy(ubo->state_slots[0].tokens, tokens,
565 sizeof(ubo->state_slots[0].tokens));
566
567 struct glsl_struct_field field = {
568 .type = type,
569 .name = "data",
570 .location = -1,
571 };
572 ubo->interface_type =
573 glsl_interface_type(&field, 1, GLSL_INTERFACE_PACKING_STD430,
574 false, "__d3d12_state_vars_interface");
575 }
576
577 return progress;
578 }
579
580 void
d3d12_add_missing_dual_src_target(struct nir_shader * s,unsigned missing_mask)581 d3d12_add_missing_dual_src_target(struct nir_shader *s,
582 unsigned missing_mask)
583 {
584 assert(missing_mask != 0);
585 nir_builder b;
586 nir_function_impl *impl = nir_shader_get_entrypoint(s);
587 b = nir_builder_at(nir_before_impl(impl));
588
589 nir_def *zero = nir_imm_zero(&b, 4, 32);
590 for (unsigned i = 0; i < 2; ++i) {
591
592 if (!(missing_mask & (1u << i)))
593 continue;
594
595 const char *name = i == 0 ? "gl_FragData[0]" :
596 "gl_SecondaryFragDataEXT[0]";
597 nir_variable *out = nir_variable_create(s, nir_var_shader_out,
598 glsl_vec4_type(), name);
599 out->data.location = FRAG_RESULT_DATA0;
600 out->data.driver_location = i;
601 out->data.index = i;
602
603 nir_store_var(&b, out, zero, 0xf);
604 }
605 nir_metadata_preserve(impl, nir_metadata_block_index |
606 nir_metadata_dominance);
607 }
608
609 void
d3d12_lower_primitive_id(nir_shader * shader)610 d3d12_lower_primitive_id(nir_shader *shader)
611 {
612 nir_builder b;
613 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
614 nir_def *primitive_id;
615 b = nir_builder_create(impl);
616
617 nir_variable *primitive_id_var = nir_variable_create(shader, nir_var_shader_out,
618 glsl_uint_type(), "primitive_id");
619 primitive_id_var->data.location = VARYING_SLOT_PRIMITIVE_ID;
620 primitive_id_var->data.interpolation = INTERP_MODE_FLAT;
621
622 nir_foreach_block(block, impl) {
623 b.cursor = nir_before_block(block);
624 primitive_id = nir_load_primitive_id(&b);
625
626 nir_foreach_instr_safe(instr, block) {
627 if (instr->type != nir_instr_type_intrinsic ||
628 nir_instr_as_intrinsic(instr)->intrinsic != nir_intrinsic_emit_vertex)
629 continue;
630
631 b.cursor = nir_before_instr(instr);
632 nir_store_var(&b, primitive_id_var, primitive_id, 0x1);
633 }
634 }
635
636 nir_metadata_preserve(impl, nir_metadata_none);
637 }
638
639 static void
lower_triangle_strip_store(nir_builder * b,nir_intrinsic_instr * intr,nir_variable * vertex_count_var,nir_variable ** varyings)640 lower_triangle_strip_store(nir_builder *b, nir_intrinsic_instr *intr,
641 nir_variable *vertex_count_var,
642 nir_variable **varyings)
643 {
644 /**
645 * tmp_varying[slot][min(vertex_count, 2)] = src
646 */
647 nir_def *vertex_count = nir_load_var(b, vertex_count_var);
648 nir_def *index = nir_imin(b, vertex_count, nir_imm_int(b, 2));
649 nir_variable *var = nir_intrinsic_get_var(intr, 0);
650
651 if (var->data.mode != nir_var_shader_out)
652 return;
653
654 nir_deref_instr *deref = nir_build_deref_array(b, nir_build_deref_var(b, varyings[var->data.location]), index);
655 nir_def *value = intr->src[1].ssa;
656 nir_store_deref(b, deref, value, 0xf);
657 nir_instr_remove(&intr->instr);
658 }
659
660 static void
lower_triangle_strip_emit_vertex(nir_builder * b,nir_intrinsic_instr * intr,nir_variable * vertex_count_var,nir_variable ** varyings,nir_variable ** out_varyings)661 lower_triangle_strip_emit_vertex(nir_builder *b, nir_intrinsic_instr *intr,
662 nir_variable *vertex_count_var,
663 nir_variable **varyings,
664 nir_variable **out_varyings)
665 {
666 // TODO xfb + flat shading + last_pv
667 /**
668 * if (vertex_count >= 2) {
669 * for (i = 0; i < 3; i++) {
670 * foreach(slot)
671 * out[slot] = tmp_varying[slot][i];
672 * EmitVertex();
673 * }
674 * EndPrimitive();
675 * foreach(slot)
676 * tmp_varying[slot][vertex_count % 2] = tmp_varying[slot][2];
677 * }
678 * vertex_count++;
679 */
680
681 nir_def *two = nir_imm_int(b, 2);
682 nir_def *vertex_count = nir_load_var(b, vertex_count_var);
683 nir_def *count_cmp = nir_uge(b, vertex_count, two);
684 nir_if *count_check = nir_push_if(b, count_cmp);
685
686 for (int j = 0; j < 3; ++j) {
687 for (int i = 0; i < VARYING_SLOT_MAX; ++i) {
688 if (!varyings[i])
689 continue;
690 nir_copy_deref(b, nir_build_deref_var(b, out_varyings[i]),
691 nir_build_deref_array_imm(b, nir_build_deref_var(b, varyings[i]), j));
692 }
693 nir_emit_vertex(b, 0);
694 }
695
696 for (int i = 0; i < VARYING_SLOT_MAX; ++i) {
697 if (!varyings[i])
698 continue;
699 nir_copy_deref(b, nir_build_deref_array(b, nir_build_deref_var(b, varyings[i]), nir_umod(b, vertex_count, two)),
700 nir_build_deref_array(b, nir_build_deref_var(b, varyings[i]), two));
701 }
702
703 nir_end_primitive(b, .stream_id = 0);
704
705 nir_pop_if(b, count_check);
706
707 vertex_count = nir_iadd_imm(b, vertex_count, 1);
708 nir_store_var(b, vertex_count_var, vertex_count, 0x1);
709
710 nir_instr_remove(&intr->instr);
711 }
712
713 static void
lower_triangle_strip_end_primitive(nir_builder * b,nir_intrinsic_instr * intr,nir_variable * vertex_count_var)714 lower_triangle_strip_end_primitive(nir_builder *b, nir_intrinsic_instr *intr,
715 nir_variable *vertex_count_var)
716 {
717 /**
718 * vertex_count = 0;
719 */
720 nir_store_var(b, vertex_count_var, nir_imm_int(b, 0), 0x1);
721 nir_instr_remove(&intr->instr);
722 }
723
724 void
d3d12_lower_triangle_strip(nir_shader * shader)725 d3d12_lower_triangle_strip(nir_shader *shader)
726 {
727 nir_builder b;
728 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
729 nir_variable *tmp_vars[VARYING_SLOT_MAX] = {0};
730 nir_variable *out_vars[VARYING_SLOT_MAX] = {0};
731 b = nir_builder_create(impl);
732
733 shader->info.gs.vertices_out = (shader->info.gs.vertices_out - 2) * 3;
734
735 nir_variable *vertex_count_var =
736 nir_local_variable_create(impl, glsl_uint_type(), "vertex_count");
737
738 nir_block *first = nir_start_block(impl);
739 b.cursor = nir_before_block(first);
740 nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
741 const struct glsl_type *type = glsl_array_type(var->type, 3, 0);
742 tmp_vars[var->data.location] = nir_local_variable_create(impl, type, "tmp_var");
743 out_vars[var->data.location] = var;
744 }
745 nir_store_var(&b, vertex_count_var, nir_imm_int(&b, 0), 1);
746
747 nir_foreach_block(block, impl) {
748 nir_foreach_instr_safe(instr, block) {
749 if (instr->type != nir_instr_type_intrinsic)
750 continue;
751
752 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
753 switch (intrin->intrinsic) {
754 case nir_intrinsic_store_deref:
755 b.cursor = nir_before_instr(instr);
756 lower_triangle_strip_store(&b, intrin, vertex_count_var, tmp_vars);
757 break;
758 case nir_intrinsic_emit_vertex_with_counter:
759 case nir_intrinsic_emit_vertex:
760 b.cursor = nir_before_instr(instr);
761 lower_triangle_strip_emit_vertex(&b, intrin, vertex_count_var,
762 tmp_vars, out_vars);
763 break;
764 case nir_intrinsic_end_primitive:
765 case nir_intrinsic_end_primitive_with_counter:
766 b.cursor = nir_before_instr(instr);
767 lower_triangle_strip_end_primitive(&b, intrin, vertex_count_var);
768 break;
769 default:
770 break;
771 }
772 }
773 }
774
775 nir_metadata_preserve(impl, nir_metadata_none);
776 NIR_PASS_V(shader, nir_lower_var_copies);
777 }
778
779 static bool
is_multisampling_instr(const nir_instr * instr,const void * _data)780 is_multisampling_instr(const nir_instr *instr, const void *_data)
781 {
782 if (instr->type != nir_instr_type_intrinsic)
783 return false;
784 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
785 if (intr->intrinsic == nir_intrinsic_store_output) {
786 nir_io_semantics semantics = nir_intrinsic_io_semantics(intr);
787 return semantics.location == FRAG_RESULT_SAMPLE_MASK;
788 } else if (intr->intrinsic == nir_intrinsic_store_deref) {
789 nir_variable *var = nir_intrinsic_get_var(intr, 0);
790 return var->data.location == FRAG_RESULT_SAMPLE_MASK;
791 } else if (intr->intrinsic == nir_intrinsic_load_sample_id ||
792 intr->intrinsic == nir_intrinsic_load_sample_mask_in)
793 return true;
794 return false;
795 }
796
797 static nir_def *
lower_multisampling_instr(nir_builder * b,nir_instr * instr,void * _data)798 lower_multisampling_instr(nir_builder *b, nir_instr *instr, void *_data)
799 {
800 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
801 switch (intr->intrinsic) {
802 case nir_intrinsic_store_output:
803 case nir_intrinsic_store_deref:
804 return NIR_LOWER_INSTR_PROGRESS_REPLACE;
805 case nir_intrinsic_load_sample_id:
806 return nir_imm_int(b, 0);
807 case nir_intrinsic_load_sample_mask_in:
808 return nir_b2i32(b, nir_ine_imm(b, &intr->def, 0));
809 default:
810 unreachable("Invalid intrinsic");
811 }
812 }
813
814 bool
d3d12_disable_multisampling(nir_shader * s)815 d3d12_disable_multisampling(nir_shader *s)
816 {
817 if (s->info.stage != MESA_SHADER_FRAGMENT)
818 return false;
819 bool progress = nir_shader_lower_instructions(s, is_multisampling_instr, lower_multisampling_instr, NULL);
820
821 nir_foreach_variable_with_modes_safe(var, s, nir_var_shader_out) {
822 if (var->data.location == FRAG_RESULT_SAMPLE_MASK) {
823 exec_node_remove(&var->node);
824 s->info.outputs_written &= ~(1ull << FRAG_RESULT_SAMPLE_MASK);
825 progress = true;
826 }
827 }
828 nir_foreach_variable_with_modes_safe(var, s, nir_var_system_value) {
829 if (var->data.location == SYSTEM_VALUE_SAMPLE_MASK_IN ||
830 var->data.location == SYSTEM_VALUE_SAMPLE_ID) {
831 exec_node_remove(&var->node);
832 progress = true;
833 }
834 var->data.sample = false;
835 }
836 BITSET_CLEAR(s->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID);
837 s->info.fs.uses_sample_qualifier = false;
838 s->info.fs.uses_sample_shading = false;
839 return progress;
840 }
841
842 struct var_split_subvar_state {
843 nir_variable *var;
844 uint8_t stream;
845 uint8_t num_components;
846 };
847 struct var_split_var_state {
848 unsigned num_subvars;
849 struct var_split_subvar_state subvars[4];
850 };
851 struct var_split_state {
852 struct var_split_var_state vars[2][VARYING_SLOT_MAX];
853 };
854
855 static bool
split_varying_accesses(nir_builder * b,nir_intrinsic_instr * intr,void * _state)856 split_varying_accesses(nir_builder *b, nir_intrinsic_instr *intr,
857 void *_state)
858 {
859 if (intr->intrinsic != nir_intrinsic_store_deref &&
860 intr->intrinsic != nir_intrinsic_load_deref)
861 return false;
862
863 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
864 if (!nir_deref_mode_is(deref, nir_var_shader_out) &&
865 !nir_deref_mode_is(deref, nir_var_shader_in))
866 return false;
867
868 nir_variable *var = nir_deref_instr_get_variable(deref);
869 if (!var)
870 return false;
871
872 uint32_t mode_index = deref->modes == nir_var_shader_out ? 0 : 1;
873
874 struct var_split_state *state = _state;
875 struct var_split_var_state *var_state = &state->vars[mode_index][var->data.location];
876 if (var_state->num_subvars <= 1)
877 return false;
878
879 nir_deref_path path;
880 nir_deref_path_init(&path, deref, b->shader);
881 assert(path.path[0]->deref_type == nir_deref_type_var && path.path[0]->var == var);
882
883 unsigned first_channel = 0;
884 nir_def *loads[2];
885 for (unsigned subvar = 0; subvar < var_state->num_subvars; ++subvar) {
886 b->cursor = nir_after_instr(&path.path[0]->instr);
887 nir_deref_instr *new_path = nir_build_deref_var(b, var_state->subvars[subvar].var);
888
889 for (unsigned i = 1; path.path[i]; ++i) {
890 b->cursor = nir_after_instr(&path.path[i]->instr);
891 new_path = nir_build_deref_follower(b, new_path, path.path[i]);
892 }
893
894 b->cursor = nir_before_instr(&intr->instr);
895 if (intr->intrinsic == nir_intrinsic_store_deref) {
896 unsigned mask_num_channels = (1 << var_state->subvars[subvar].num_components) - 1;
897 unsigned orig_write_mask = nir_intrinsic_write_mask(intr);
898 nir_def *sub_value = nir_channels(b, intr->src[1].ssa, mask_num_channels << first_channel);
899
900 first_channel += var_state->subvars[subvar].num_components;
901
902 unsigned new_write_mask = (orig_write_mask >> first_channel) & mask_num_channels;
903 nir_build_store_deref(b, &new_path->def, sub_value, new_write_mask, nir_intrinsic_access(intr));
904 } else {
905 /* The load path only handles splitting dvec3/dvec4 */
906 assert(subvar == 0 || subvar == 1);
907 assert(intr->def.num_components >= 3);
908 loads[subvar] = nir_build_load_deref(b, var_state->subvars[subvar].num_components, intr->def.bit_size, &new_path->def);
909 }
910 }
911
912 nir_deref_path_finish(&path);
913 if (intr->intrinsic == nir_intrinsic_load_deref) {
914 nir_def *result = nir_extract_bits(b, loads, 2, 0, intr->def.num_components, intr->def.bit_size);
915 nir_def_rewrite_uses(&intr->def, result);
916 }
917 nir_instr_free_and_dce(&intr->instr);
918 return true;
919 }
920
921 bool
d3d12_split_needed_varyings(nir_shader * s)922 d3d12_split_needed_varyings(nir_shader *s)
923 {
924 struct var_split_state state;
925 memset(&state, 0, sizeof(state));
926
927 bool progress = false;
928 nir_foreach_variable_with_modes_safe(var, s, nir_var_shader_out | nir_var_shader_in) {
929 uint32_t mode_index = var->data.mode == nir_var_shader_out ? 0 : 1;
930 struct var_split_var_state *var_state = &state.vars[mode_index][var->data.location];
931 struct var_split_subvar_state *subvars = var_state->subvars;
932 if ((var->data.stream & NIR_STREAM_PACKED) != 0 &&
933 s->info.stage == MESA_SHADER_GEOMETRY &&
934 var->data.mode == nir_var_shader_out) {
935 for (unsigned i = 0; i < glsl_get_vector_elements(var->type); ++i) {
936 unsigned stream = (var->data.stream >> (2 * (i + var->data.location_frac))) & 0x3;
937 if (var_state->num_subvars == 0 || stream != subvars[var_state->num_subvars - 1].stream) {
938 subvars[var_state->num_subvars].stream = stream;
939 subvars[var_state->num_subvars].num_components = 1;
940 var_state->num_subvars++;
941 } else {
942 subvars[var_state->num_subvars - 1].num_components++;
943 }
944 }
945
946 var->data.stream = subvars[0].stream;
947 if (var_state->num_subvars == 1)
948 continue;
949
950 progress = true;
951
952 subvars[0].var = var;
953 var->type = glsl_vector_type(glsl_get_base_type(var->type), subvars[0].num_components);
954 unsigned location_frac = var->data.location_frac + subvars[0].num_components;
955 for (unsigned subvar = 1; subvar < var_state->num_subvars; ++subvar) {
956 char *name = ralloc_asprintf(s, "unpacked:%s_stream%d", var->name, subvars[subvar].stream);
957 nir_variable *new_var = nir_variable_create(s, nir_var_shader_out,
958 glsl_vector_type(glsl_get_base_type(var->type), subvars[subvar].num_components),
959 name);
960
961 new_var->data = var->data;
962 new_var->data.stream = subvars[subvar].stream;
963 new_var->data.location_frac = location_frac;
964 location_frac += subvars[subvar].num_components;
965 subvars[subvar].var = new_var;
966 }
967 } else if (glsl_type_is_64bit(glsl_without_array(var->type)) &&
968 glsl_get_components(glsl_without_array(var->type)) >= 3) {
969 progress = true;
970 assert(var->data.location_frac == 0);
971 uint32_t components = glsl_get_components(glsl_without_array(var->type));
972 var_state->num_subvars = 2;
973 subvars[0].var = var;
974 subvars[0].num_components = 2;
975 subvars[0].stream = var->data.stream;
976 const struct glsl_type *base_type = glsl_without_array(var->type);
977 var->type = glsl_type_wrap_in_arrays(glsl_vector_type(glsl_get_base_type(base_type), 2), var->type);
978
979 subvars[1].var = nir_variable_clone(var, s);
980 subvars[1].num_components = components - 2;
981 subvars[1].stream = var->data.stream;
982 exec_node_insert_after(&var->node, &subvars[1].var->node);
983 subvars[1].var->type = glsl_type_wrap_in_arrays(glsl_vector_type(glsl_get_base_type(base_type), components - 2), var->type);
984 subvars[1].var->data.location++;
985 subvars[1].var->data.driver_location++;
986 }
987 }
988
989 if (progress) {
990 nir_shader_intrinsics_pass(s, split_varying_accesses,
991 nir_metadata_block_index | nir_metadata_dominance,
992 &state);
993 } else {
994 nir_shader_preserve_all_metadata(s);
995 }
996
997 return progress;
998 }
999
1000 static void
write_0(nir_builder * b,nir_deref_instr * deref)1001 write_0(nir_builder *b, nir_deref_instr *deref)
1002 {
1003 if (glsl_type_is_array_or_matrix(deref->type)) {
1004 for (unsigned i = 0; i < glsl_get_length(deref->type); ++i)
1005 write_0(b, nir_build_deref_array_imm(b, deref, i));
1006 } else if (glsl_type_is_struct(deref->type)) {
1007 for (unsigned i = 0; i < glsl_get_length(deref->type); ++i)
1008 write_0(b, nir_build_deref_struct(b, deref, i));
1009 } else {
1010 nir_def *scalar = nir_imm_intN_t(b, 0, glsl_get_bit_size(deref->type));
1011 nir_def *scalar_arr[NIR_MAX_VEC_COMPONENTS];
1012 unsigned num_comps = glsl_get_components(deref->type);
1013 unsigned writemask = (1 << num_comps) - 1;
1014 for (unsigned i = 0; i < num_comps; ++i)
1015 scalar_arr[i] = scalar;
1016 nir_def *zero_val = nir_vec(b, scalar_arr, num_comps);
1017 nir_store_deref(b, deref, zero_val, writemask);
1018 }
1019 }
1020
1021 void
d3d12_write_0_to_new_varying(nir_shader * s,nir_variable * var)1022 d3d12_write_0_to_new_varying(nir_shader *s, nir_variable *var)
1023 {
1024 /* Skip per-vertex HS outputs */
1025 if (s->info.stage == MESA_SHADER_TESS_CTRL && !var->data.patch)
1026 return;
1027
1028 nir_foreach_function_impl(impl, s) {
1029 nir_builder b = nir_builder_create(impl);
1030
1031 nir_foreach_block(block, impl) {
1032 b.cursor = nir_before_block(block);
1033 if (s->info.stage != MESA_SHADER_GEOMETRY) {
1034 write_0(&b, nir_build_deref_var(&b, var));
1035 break;
1036 }
1037
1038 nir_foreach_instr_safe(instr, block) {
1039 if (instr->type != nir_instr_type_intrinsic)
1040 continue;
1041 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1042 if (intr->intrinsic != nir_intrinsic_emit_vertex)
1043 continue;
1044
1045 b.cursor = nir_before_instr(instr);
1046 write_0(&b, nir_build_deref_var(&b, var));
1047 }
1048 }
1049
1050 nir_metadata_preserve(impl, nir_metadata_block_index | nir_metadata_dominance);
1051 }
1052 }
1053