• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © Microsoft Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "dzn_nir.h"
25 
26 #include "spirv_to_dxil.h"
27 #include "nir_to_dxil.h"
28 #include "nir_builder.h"
29 #include "nir_builtin_builder.h"
30 #include "dxil_nir.h"
31 #include "vk_nir_convert_ycbcr.h"
32 
33 static nir_def *
dzn_nir_create_bo_desc(nir_builder * b,nir_variable_mode mode,uint32_t desc_set,uint32_t binding,const char * name,unsigned access)34 dzn_nir_create_bo_desc(nir_builder *b,
35                        nir_variable_mode mode,
36                        uint32_t desc_set,
37                        uint32_t binding,
38                        const char *name,
39                        unsigned access)
40 {
41    struct glsl_struct_field field = {
42       .type = mode == nir_var_mem_ubo ?
43               glsl_array_type(glsl_uint_type(), 4096, 4) :
44               glsl_uint_type(),
45       .name = "dummy_int",
46    };
47    const struct glsl_type *dummy_type =
48       glsl_struct_type(&field, 1, "dummy_type", false);
49 
50    nir_variable *var =
51       nir_variable_create(b->shader, mode, dummy_type, name);
52    var->data.descriptor_set = desc_set;
53    var->data.binding = binding;
54    var->data.access = access;
55 
56    assert(mode == nir_var_mem_ubo || mode == nir_var_mem_ssbo);
57    if (mode == nir_var_mem_ubo)
58       b->shader->info.num_ubos++;
59    else
60       b->shader->info.num_ssbos++;
61 
62    VkDescriptorType desc_type =
63       var->data.mode == nir_var_mem_ubo ?
64       VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER :
65       VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
66    nir_address_format addr_format = nir_address_format_32bit_index_offset;
67    nir_def *index =
68       nir_vulkan_resource_index(b,
69                                 nir_address_format_num_components(addr_format),
70                                 nir_address_format_bit_size(addr_format),
71                                 nir_imm_int(b, 0),
72                                 .desc_set = desc_set,
73                                 .binding = binding,
74                                 .desc_type = desc_type);
75 
76    nir_def *desc =
77       nir_load_vulkan_descriptor(b,
78                                  nir_address_format_num_components(addr_format),
79                                  nir_address_format_bit_size(addr_format),
80                                  index,
81                                  .desc_type = desc_type);
82 
83    return nir_channel(b, desc, 0);
84 }
85 
86 nir_shader *
dzn_nir_indirect_draw_shader(enum dzn_indirect_draw_type type)87 dzn_nir_indirect_draw_shader(enum dzn_indirect_draw_type type)
88 {
89    const char *type_str[] = {
90       "draw",
91       "draw_count",
92       "indexed_draw",
93       "indexed_draw_count",
94       "draw_triangle_fan",
95       "draw_count_triangle_fan",
96       "indexed_draw_triangle_fan",
97       "indexed_draw_count_triangle_fan",
98       "indexed_draw_triangle_fan_prim_restart",
99       "indexed_draw_count_triangle_fan_prim_restart",
100    };
101 
102    assert(type < ARRAY_SIZE(type_str));
103 
104    bool indexed = type == DZN_INDIRECT_INDEXED_DRAW ||
105                   type == DZN_INDIRECT_INDEXED_DRAW_COUNT ||
106                   type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN ||
107                   type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN ||
108                   type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN_PRIM_RESTART ||
109                   type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN_PRIM_RESTART;
110    bool triangle_fan = type == DZN_INDIRECT_DRAW_TRIANGLE_FAN ||
111                        type == DZN_INDIRECT_DRAW_COUNT_TRIANGLE_FAN ||
112                        type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN ||
113                        type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN ||
114                        type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN_PRIM_RESTART ||
115                        type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN_PRIM_RESTART;
116    bool indirect_count = type == DZN_INDIRECT_DRAW_COUNT ||
117                          type == DZN_INDIRECT_INDEXED_DRAW_COUNT ||
118                          type == DZN_INDIRECT_DRAW_COUNT_TRIANGLE_FAN ||
119                          type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN ||
120                          type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN_PRIM_RESTART;
121    bool prim_restart = type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN_PRIM_RESTART ||
122                        type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN_PRIM_RESTART;
123    nir_builder b =
124       nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
125                                      dxil_get_base_nir_compiler_options(),
126                                      "dzn_meta_indirect_%s()",
127                                      type_str[type]);
128    b.shader->info.internal = true;
129 
130    nir_def *params_desc =
131       dzn_nir_create_bo_desc(&b, nir_var_mem_ubo, 0, 0, "params", 0);
132    nir_def *draw_buf_desc =
133       dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 1, "draw_buf", ACCESS_NON_WRITEABLE);
134    nir_def *exec_buf_desc =
135       dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 2, "exec_buf", ACCESS_NON_READABLE);
136 
137    unsigned params_size;
138    if (triangle_fan)
139       params_size = sizeof(struct dzn_indirect_draw_triangle_fan_rewrite_params);
140    else
141       params_size = sizeof(struct dzn_indirect_draw_rewrite_params);
142 
143    nir_def *params =
144       nir_load_ubo(&b, params_size / 4, 32,
145                    params_desc, nir_imm_int(&b, 0),
146                    .align_mul = 4, .align_offset = 0, .range_base = 0, .range = ~0);
147 
148    nir_def *draw_stride = nir_channel(&b, params, 0);
149    nir_def *exec_stride =
150       triangle_fan ?
151       nir_imm_int(&b, sizeof(struct dzn_indirect_triangle_fan_draw_exec_params)) :
152       nir_imm_int(&b, sizeof(struct dzn_indirect_draw_exec_params));
153    nir_def *index =
154       nir_channel(&b, nir_load_global_invocation_id(&b, 32), 0);
155 
156    if (indirect_count) {
157       nir_def *count_buf_desc =
158          dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 3, "count_buf", ACCESS_NON_WRITEABLE);
159 
160       nir_def *draw_count =
161          nir_load_ssbo(&b, 1, 32, count_buf_desc, nir_imm_int(&b, 0), .align_mul = 4);
162 
163       nir_push_if(&b, nir_ieq_imm(&b, index, 0));
164       nir_store_ssbo(&b, draw_count, exec_buf_desc, nir_imm_int(&b, 0),
165                     .write_mask = 0x1, .access = ACCESS_NON_READABLE,
166                     .align_mul = 16);
167       nir_pop_if(&b, NULL);
168 
169       nir_push_if(&b, nir_ult(&b, index, draw_count));
170    }
171 
172    nir_def *draw_offset = nir_imul(&b, draw_stride, index);
173 
174    /* The first entry contains the indirect count */
175    nir_def *exec_offset =
176       indirect_count ?
177       nir_imul(&b, exec_stride, nir_iadd_imm(&b, index, 1)) :
178       nir_imul(&b, exec_stride, index);
179 
180    nir_def *draw_info1 =
181       nir_load_ssbo(&b, 4, 32, draw_buf_desc, draw_offset, .align_mul = 4);
182    nir_def *draw_info2 =
183       indexed ?
184       nir_load_ssbo(&b, 1, 32, draw_buf_desc,
185                     nir_iadd_imm(&b, draw_offset, 16), .align_mul = 4) :
186       nir_imm_int(&b, 0);
187 
188    nir_def *first_vertex = nir_channel(&b, draw_info1, indexed ? 3 : 2);
189    nir_def *base_instance =
190       indexed ? draw_info2 : nir_channel(&b, draw_info1, 3);
191 
192    nir_def *exec_vals[8] = {
193       first_vertex,
194       base_instance,
195       index,
196    };
197 
198    if (triangle_fan) {
199       /* Patch {vertex,index}_count and first_index */
200       nir_def *triangle_count =
201          nir_usub_sat(&b, nir_channel(&b, draw_info1, 0), nir_imm_int(&b, 2));
202       exec_vals[3] = nir_imul_imm(&b, triangle_count, 3);
203       exec_vals[4] = nir_channel(&b, draw_info1, 1);
204       exec_vals[5] = nir_imm_int(&b, 0);
205       exec_vals[6] = first_vertex;
206       exec_vals[7] = base_instance;
207 
208       nir_def *triangle_fan_exec_buf_desc =
209          dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 4,
210                                 "triangle_fan_exec_buf",
211                                 ACCESS_NON_READABLE);
212       nir_def *triangle_fan_index_buf_stride = nir_channel(&b, params, 1);
213       nir_def *triangle_fan_index_buf_addr_lo =
214          nir_iadd(&b, nir_channel(&b, params, 2),
215                   nir_imul(&b, triangle_fan_index_buf_stride, index));
216 
217       nir_def *triangle_fan_exec_vals[9] = { 0 };
218       uint32_t triangle_fan_exec_param_count = 0;
219       nir_def *addr_lo_overflow =
220          nir_ult(&b, triangle_fan_index_buf_addr_lo, nir_channel(&b, params, 2));
221       nir_def *triangle_fan_index_buf_addr_hi =
222          nir_iadd(&b, nir_channel(&b, params, 3),
223                   nir_bcsel(&b, addr_lo_overflow, nir_imm_int(&b, 1), nir_imm_int(&b, 0)));
224 
225       triangle_fan_exec_vals[triangle_fan_exec_param_count++] = triangle_fan_index_buf_addr_lo;
226       triangle_fan_exec_vals[triangle_fan_exec_param_count++] = triangle_fan_index_buf_addr_hi;
227 
228       if (prim_restart) {
229          triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_channel(&b, draw_info1, 2);
230          triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_channel(&b, draw_info1, 0);
231          uint32_t index_count_offset =
232             offsetof(struct dzn_indirect_triangle_fan_draw_exec_params, indexed_draw.index_count);
233          nir_def *exec_buf_start =
234             nir_load_ubo(&b, 2, 32,
235                          params_desc, nir_imm_int(&b, 16),
236                          .align_mul = 4, .align_offset = 0, .range_base = 0, .range = ~0);
237          nir_def *exec_buf_start_lo =
238             nir_iadd(&b, nir_imm_int(&b, index_count_offset),
239                      nir_iadd(&b, nir_channel(&b, exec_buf_start, 0),
240                               nir_imul(&b, exec_stride, index)));
241          addr_lo_overflow = nir_ult(&b, exec_buf_start_lo, nir_channel(&b, exec_buf_start, 0));
242          nir_def *exec_buf_start_hi =
243             nir_iadd(&b, nir_channel(&b, exec_buf_start, 0),
244                      nir_bcsel(&b, addr_lo_overflow, nir_imm_int(&b, 1), nir_imm_int(&b, 0)));
245          triangle_fan_exec_vals[triangle_fan_exec_param_count++] = exec_buf_start_lo;
246          triangle_fan_exec_vals[triangle_fan_exec_param_count++] = exec_buf_start_hi;
247          triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_imm_int(&b, 1);
248       } else {
249          triangle_fan_exec_vals[triangle_fan_exec_param_count++] =
250             indexed ? nir_channel(&b, draw_info1, 2) : nir_imm_int(&b, 0);
251          triangle_fan_exec_vals[triangle_fan_exec_param_count++] =
252             triangle_count;
253       }
254       triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_imm_int(&b, 1);
255       triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_imm_int(&b, 1);
256 
257       unsigned rewrite_index_exec_params =
258          prim_restart ?
259          sizeof(struct dzn_indirect_triangle_fan_prim_restart_rewrite_index_exec_params) :
260          sizeof(struct dzn_indirect_triangle_fan_rewrite_index_exec_params);
261       nir_def *triangle_fan_exec_stride =
262          nir_imm_int(&b, rewrite_index_exec_params);
263       nir_def *triangle_fan_exec_offset =
264          nir_imul(&b, triangle_fan_exec_stride, index);
265 
266       for (uint32_t i = 0; i < triangle_fan_exec_param_count; i += 4) {
267          unsigned comps = MIN2(triangle_fan_exec_param_count - i, 4);
268          uint32_t mask = (1 << comps) - 1;
269 
270          nir_store_ssbo(&b, nir_vec(&b, &triangle_fan_exec_vals[i], comps),
271                         triangle_fan_exec_buf_desc,
272                         nir_iadd_imm(&b, triangle_fan_exec_offset, i * 4),
273                         .write_mask = mask, .access = ACCESS_NON_READABLE, .align_mul = 4);
274       }
275 
276       nir_def *ibview_vals[] = {
277          triangle_fan_index_buf_addr_lo,
278          triangle_fan_index_buf_addr_hi,
279          triangle_fan_index_buf_stride,
280          nir_imm_int(&b, DXGI_FORMAT_R32_UINT),
281       };
282 
283       nir_store_ssbo(&b, nir_vec(&b, ibview_vals, ARRAY_SIZE(ibview_vals)),
284                      exec_buf_desc, exec_offset,
285                      .write_mask = 0xf, .access = ACCESS_NON_READABLE, .align_mul = 16);
286       exec_offset = nir_iadd_imm(&b, exec_offset, ARRAY_SIZE(ibview_vals) * 4);
287    } else {
288       exec_vals[3] = nir_channel(&b, draw_info1, 0);
289       exec_vals[4] = nir_channel(&b, draw_info1, 1);
290       exec_vals[5] = nir_channel(&b, draw_info1, 2);
291       exec_vals[6] = nir_channel(&b, draw_info1, 3);
292       exec_vals[7] = draw_info2;
293    }
294 
295    nir_store_ssbo(&b, nir_vec(&b, exec_vals, 4),
296                   exec_buf_desc, exec_offset,
297                   .write_mask = 0xf, .access = ACCESS_NON_READABLE, .align_mul = 16);
298    nir_store_ssbo(&b, nir_vec(&b, &exec_vals[4], 4),
299                   exec_buf_desc, nir_iadd_imm(&b, exec_offset, 16),
300                   .write_mask = 0xf, .access = ACCESS_NON_READABLE, .align_mul = 16);
301 
302    if (indirect_count)
303       nir_pop_if(&b, NULL);
304 
305    return b.shader;
306 }
307 
308 nir_shader *
dzn_nir_triangle_fan_prim_restart_rewrite_index_shader(uint8_t old_index_size)309 dzn_nir_triangle_fan_prim_restart_rewrite_index_shader(uint8_t old_index_size)
310 {
311    assert(old_index_size == 2 || old_index_size == 4);
312 
313    nir_builder b =
314       nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
315                                      dxil_get_base_nir_compiler_options(),
316                                      "dzn_meta_triangle_prim_rewrite_index(old_index_size=%d)",
317                                      old_index_size);
318    b.shader->info.internal = true;
319 
320    nir_def *params_desc =
321       dzn_nir_create_bo_desc(&b, nir_var_mem_ubo, 0, 0, "params", 0);
322    nir_def *new_index_buf_desc =
323       dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 1,
324                              "new_index_buf", ACCESS_NON_READABLE);
325    nir_def *old_index_buf_desc =
326       dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 2,
327                              "old_index_buf", ACCESS_NON_WRITEABLE);
328    nir_def *new_index_count_ptr_desc =
329       dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 3,
330                              "new_index_count_ptr", ACCESS_NON_READABLE);
331 
332    nir_def *params =
333       nir_load_ubo(&b, sizeof(struct dzn_triangle_fan_prim_restart_rewrite_index_params) / 4, 32,
334                    params_desc, nir_imm_int(&b, 0),
335                    .align_mul = 4, .align_offset = 0, .range_base = 0, .range = ~0);
336 
337    nir_def *prim_restart_val =
338       nir_imm_int(&b, old_index_size == 2 ? 0xffff : 0xffffffff);
339    nir_variable *old_index_ptr_var =
340       nir_local_variable_create(b.impl, glsl_uint_type(), "old_index_ptr_var");
341    nir_def *old_index_ptr = nir_channel(&b, params, 0);
342    nir_store_var(&b, old_index_ptr_var, old_index_ptr, 1);
343    nir_variable *new_index_ptr_var =
344       nir_local_variable_create(b.impl, glsl_uint_type(), "new_index_ptr_var");
345    nir_store_var(&b, new_index_ptr_var, nir_imm_int(&b, 0), 1);
346    nir_def *old_index_count = nir_channel(&b, params, 1);
347    nir_variable *index0_var =
348       nir_local_variable_create(b.impl, glsl_uint_type(), "index0_var");
349    nir_store_var(&b, index0_var, prim_restart_val, 1);
350 
351    /*
352     * Filter out all primitive-restart magic values, and generate a triangle list
353     * from the triangle fan definition.
354     *
355     * Basically:
356     *
357     * new_index_ptr = 0;
358     * index0 = restart_prim_value; // 0xffff or 0xffffffff
359     * for (old_index_ptr = firstIndex; old_index_ptr < indexCount;) {
360     *    // If we have no starting-point we need at least 3 vertices,
361     *    // otherwise we can do with two. If there's not enough vertices
362     *    // to form a primitive, we just bail out.
363     *    min_indices = index0 == restart_prim_value ? 3 : 2;
364     *    if (old_index_ptr + min_indices > firstIndex + indexCount)
365     *       break;
366     *
367     *    if (index0 == restart_prim_value) {
368     *       // No starting point, skip all entries until we have a
369     *       // non-primitive-restart value
370     *       index0 = old_index_buf[old_index_ptr++];
371     *       continue;
372     *    }
373     *
374     *    // If at least one index contains the primitive-restart pattern,
375          // ignore this triangle, and skip the unused entries
376     *    if (old_index_buf[old_index_ptr + 1] == restart_prim_value) {
377     *       old_index_ptr += 2;
378     *       continue;
379     *    }
380     *    if (old_index_buf[old_index_ptr] == restart_prim_value) {
381     *       old_index_ptr++;
382     *       continue;
383     *    }
384     *
385     *    // We have a valid primitive, queue it to the new index buffer
386     *    new_index_buf[new_index_ptr++] = old_index_buf[old_index_ptr];
387     *    new_index_buf[new_index_ptr++] = old_index_buf[old_index_ptr + 1];
388     *    new_index_buf[new_index_ptr++] = index0;
389     * }
390     *
391     * expressed in NIR, which admitedly is not super easy to grasp with.
392     * TODO: Might be a good thing to use use the CL compiler we have and turn
393     * those shaders into CL kernels.
394     */
395    nir_push_loop(&b);
396 
397    old_index_ptr = nir_load_var(&b, old_index_ptr_var);
398    nir_def *index0 = nir_load_var(&b, index0_var);
399 
400    nir_def *read_index_count =
401       nir_bcsel(&b, nir_ieq(&b, index0, prim_restart_val),
402                 nir_imm_int(&b, 3), nir_imm_int(&b, 2));
403    nir_push_if(&b, nir_ult(&b, old_index_count, nir_iadd(&b, old_index_ptr, read_index_count)));
404    nir_jump(&b, nir_jump_break);
405    nir_pop_if(&b, NULL);
406 
407    nir_def *old_index_offset =
408       nir_imul_imm(&b, old_index_ptr, old_index_size);
409 
410    nir_push_if(&b, nir_ieq(&b, index0, prim_restart_val));
411    nir_def *index_val =
412       nir_load_ssbo(&b, 1, 32, old_index_buf_desc,
413                     old_index_size == 2 ? nir_iand_imm(&b, old_index_offset, ~3ULL) : old_index_offset,
414                     .align_mul = 4);
415    if (old_index_size == 2) {
416      index_val = nir_bcsel(&b, nir_test_mask(&b, old_index_offset, 0x2),
417                            nir_ushr_imm(&b, index_val, 16),
418                            nir_iand_imm(&b, index_val, 0xffff));
419    }
420 
421    nir_store_var(&b, index0_var, index_val, 1);
422    nir_store_var(&b, old_index_ptr_var, nir_iadd_imm(&b, old_index_ptr, 1), 1);
423    nir_jump(&b, nir_jump_continue);
424    nir_pop_if(&b, NULL);
425 
426    nir_def *index12 =
427       nir_load_ssbo(&b, 2, 32, old_index_buf_desc,
428                     old_index_size == 2 ? nir_iand_imm(&b, old_index_offset, ~3ULL) : old_index_offset,
429                     .align_mul = 4);
430    if (old_index_size == 2) {
431       nir_def *indices[] = {
432          nir_iand_imm(&b, nir_channel(&b, index12, 0), 0xffff),
433          nir_ushr_imm(&b, nir_channel(&b, index12, 0), 16),
434          nir_iand_imm(&b, nir_channel(&b, index12, 1), 0xffff),
435       };
436 
437       index12 = nir_bcsel(&b, nir_test_mask(&b, old_index_offset, 0x2),
438                           nir_vec2(&b, indices[1], indices[2]),
439                           nir_vec2(&b, indices[0], indices[1]));
440    }
441 
442    nir_push_if(&b, nir_ieq(&b, nir_channel(&b, index12, 1), prim_restart_val));
443    nir_store_var(&b, old_index_ptr_var, nir_iadd_imm(&b, old_index_ptr, 2), 1);
444    nir_store_var(&b, index0_var, prim_restart_val, 1);
445    nir_jump(&b, nir_jump_continue);
446    nir_push_else(&b, NULL);
447    nir_store_var(&b, old_index_ptr_var, nir_iadd_imm(&b, old_index_ptr, 1), 1);
448    nir_push_if(&b, nir_ieq(&b, nir_channel(&b, index12, 0), prim_restart_val));
449    nir_store_var(&b, index0_var, prim_restart_val, 1);
450    nir_jump(&b, nir_jump_continue);
451    nir_push_else(&b, NULL);
452    nir_def *new_indices =
453       nir_vec3(&b, nir_channel(&b, index12, 0), nir_channel(&b, index12, 1), index0);
454    nir_def *new_index_ptr = nir_load_var(&b, new_index_ptr_var);
455    nir_def *new_index_offset = nir_imul_imm(&b, new_index_ptr, sizeof(uint32_t));
456    nir_store_ssbo(&b, new_indices, new_index_buf_desc,
457                   new_index_offset,
458                   .write_mask = 7, .access = ACCESS_NON_READABLE, .align_mul = 4);
459    nir_store_var(&b, new_index_ptr_var, nir_iadd_imm(&b, new_index_ptr, 3), 1);
460    nir_pop_if(&b, NULL);
461    nir_pop_if(&b, NULL);
462    nir_pop_loop(&b, NULL);
463 
464    nir_store_ssbo(&b, nir_load_var(&b, new_index_ptr_var),
465                   new_index_count_ptr_desc, nir_imm_int(&b, 0),
466                   .write_mask = 1, .access = ACCESS_NON_READABLE, .align_mul = 4);
467 
468    return b.shader;
469 }
470 
471 nir_shader *
dzn_nir_triangle_fan_rewrite_index_shader(uint8_t old_index_size)472 dzn_nir_triangle_fan_rewrite_index_shader(uint8_t old_index_size)
473 {
474    assert(old_index_size == 0 || old_index_size == 2 || old_index_size == 4);
475 
476    nir_builder b =
477       nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
478                                      dxil_get_base_nir_compiler_options(),
479                                      "dzn_meta_triangle_rewrite_index(old_index_size=%d)",
480                                      old_index_size);
481    b.shader->info.internal = true;
482 
483    nir_def *params_desc =
484       dzn_nir_create_bo_desc(&b, nir_var_mem_ubo, 0, 0, "params", 0);
485    nir_def *new_index_buf_desc =
486       dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 1,
487                              "new_index_buf", ACCESS_NON_READABLE);
488 
489    nir_def *old_index_buf_desc = NULL;
490    if (old_index_size > 0) {
491       old_index_buf_desc =
492          dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 2,
493                                 "old_index_buf", ACCESS_NON_WRITEABLE);
494    }
495 
496    nir_def *params =
497       nir_load_ubo(&b, sizeof(struct dzn_triangle_fan_rewrite_index_params) / 4, 32,
498                    params_desc, nir_imm_int(&b, 0),
499                    .align_mul = 4, .align_offset = 0, .range_base = 0, .range = ~0);
500 
501    nir_def *triangle = nir_channel(&b, nir_load_global_invocation_id(&b, 32), 0);
502    nir_def *new_indices;
503 
504    if (old_index_size > 0) {
505       nir_def *old_first_index = nir_channel(&b, params, 0);
506       nir_def *old_index0_offset =
507          nir_imul_imm(&b, old_first_index, old_index_size);
508       nir_def *old_index1_offset =
509          nir_imul_imm(&b, nir_iadd(&b, nir_iadd_imm(&b, triangle, 1), old_first_index),
510                       old_index_size);
511 
512       nir_def *old_index0 =
513          nir_load_ssbo(&b, 1, 32, old_index_buf_desc,
514                        old_index_size == 2 ? nir_iand_imm(&b, old_index0_offset, ~3ULL) : old_index0_offset,
515                        .align_mul = 4);
516 
517       if (old_index_size == 2) {
518         old_index0 = nir_bcsel(&b, nir_test_mask(&b, old_index0_offset, 0x2),
519                                nir_ushr_imm(&b, old_index0, 16),
520                                nir_iand_imm(&b, old_index0, 0xffff));
521       }
522 
523       nir_def *old_index12 =
524          nir_load_ssbo(&b, 2, 32, old_index_buf_desc,
525                        old_index_size == 2 ? nir_iand_imm(&b, old_index1_offset, ~3ULL) : old_index1_offset,
526                        .align_mul = 4);
527       if (old_index_size == 2) {
528          nir_def *indices[] = {
529             nir_iand_imm(&b, nir_channel(&b, old_index12, 0), 0xffff),
530             nir_ushr_imm(&b, nir_channel(&b, old_index12, 0), 16),
531             nir_iand_imm(&b, nir_channel(&b, old_index12, 1), 0xffff),
532          };
533 
534          old_index12 = nir_bcsel(&b, nir_test_mask(&b, old_index1_offset, 0x2),
535                                  nir_vec2(&b, indices[1], indices[2]),
536                                  nir_vec2(&b, indices[0], indices[1]));
537       }
538 
539       /* TODO: VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT */
540       new_indices =
541          nir_vec3(&b, nir_channel(&b, old_index12, 0),
542                   nir_channel(&b, old_index12, 1), old_index0);
543    } else {
544       new_indices =
545          nir_vec3(&b,
546                   nir_iadd_imm(&b, triangle, 1),
547                   nir_iadd_imm(&b, triangle, 2),
548                   nir_imm_int(&b, 0));
549    }
550 
551    nir_def *new_index_offset =
552       nir_imul_imm(&b, triangle, 4 * 3);
553 
554    nir_store_ssbo(&b, new_indices, new_index_buf_desc,
555                   new_index_offset,
556                   .write_mask = 7, .access = ACCESS_NON_READABLE, .align_mul = 4);
557 
558    return b.shader;
559 }
560 
561 nir_shader *
dzn_nir_blit_vs(void)562 dzn_nir_blit_vs(void)
563 {
564    nir_builder b =
565       nir_builder_init_simple_shader(MESA_SHADER_VERTEX,
566                                      dxil_get_base_nir_compiler_options(),
567                                      "dzn_meta_blit_vs()");
568    b.shader->info.internal = true;
569 
570    nir_def *params_desc =
571       dzn_nir_create_bo_desc(&b, nir_var_mem_ubo, 0, 0, "params", 0);
572 
573    nir_variable *out_pos =
574       nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(),
575                           "gl_Position");
576    out_pos->data.location = VARYING_SLOT_POS;
577    out_pos->data.driver_location = 0;
578 
579    nir_variable *out_coords =
580       nir_variable_create(b.shader, nir_var_shader_out, glsl_vec_type(3),
581                           "coords");
582    out_coords->data.location = VARYING_SLOT_TEX0;
583    out_coords->data.driver_location = 1;
584 
585    nir_def *vertex = nir_load_vertex_id(&b);
586    nir_def *coords_arr[4] = {
587       nir_load_ubo(&b, 4, 32, params_desc, nir_imm_int(&b, 0),
588                    .align_mul = 16, .align_offset = 0, .range_base = 0, .range = ~0),
589       nir_load_ubo(&b, 4, 32, params_desc, nir_imm_int(&b, 16),
590                    .align_mul = 16, .align_offset = 0, .range_base = 0, .range = ~0),
591       nir_load_ubo(&b, 4, 32, params_desc, nir_imm_int(&b, 32),
592                    .align_mul = 16, .align_offset = 0, .range_base = 0, .range = ~0),
593       nir_load_ubo(&b, 4, 32, params_desc, nir_imm_int(&b, 48),
594                    .align_mul = 16, .align_offset = 0, .range_base = 0, .range = ~0),
595    };
596    nir_def *coords =
597       nir_bcsel(&b, nir_ieq_imm(&b, vertex, 0), coords_arr[0],
598                 nir_bcsel(&b, nir_ieq_imm(&b, vertex, 1), coords_arr[1],
599                           nir_bcsel(&b, nir_ieq_imm(&b, vertex, 2), coords_arr[2], coords_arr[3])));
600    nir_def *pos =
601       nir_vec4(&b, nir_channel(&b, coords, 0), nir_channel(&b, coords, 1),
602                nir_imm_float(&b, 0.0), nir_imm_float(&b, 1.0));
603    nir_def *z_coord =
604       nir_load_ubo(&b, 1, 32, params_desc, nir_imm_int(&b, 4 * 4 * sizeof(float)),
605                    .align_mul = 64, .align_offset = 0, .range_base = 0, .range = ~0);
606    coords = nir_vec3(&b, nir_channel(&b, coords, 2), nir_channel(&b, coords, 3), z_coord);
607 
608    nir_store_var(&b, out_pos, pos, 0xf);
609    nir_store_var(&b, out_coords, coords, 0x7);
610    return b.shader;
611 }
612 
613 nir_shader *
dzn_nir_blit_fs(const struct dzn_nir_blit_info * info)614 dzn_nir_blit_fs(const struct dzn_nir_blit_info *info)
615 {
616    bool ms = info->src_samples > 1;
617    nir_alu_type nir_out_type =
618       nir_get_nir_type_for_glsl_base_type(info->out_type);
619    uint32_t coord_comps =
620       glsl_get_sampler_dim_coordinate_components(info->sampler_dim) +
621       info->src_is_array;
622 
623    nir_builder b =
624       nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
625                                      dxil_get_base_nir_compiler_options(),
626                                      "dzn_meta_blit_fs()");
627    b.shader->info.internal = true;
628 
629    const struct glsl_type *tex_type =
630       glsl_texture_type(info->sampler_dim, info->src_is_array, info->out_type);
631    nir_variable *tex_var =
632       nir_variable_create(b.shader, nir_var_uniform, tex_type, "texture");
633    nir_deref_instr *tex_deref = nir_build_deref_var(&b, tex_var);
634 
635    nir_variable *pos_var =
636       nir_variable_create(b.shader, nir_var_shader_in,
637                           glsl_vector_type(GLSL_TYPE_FLOAT, 4),
638                           "gl_FragCoord");
639    pos_var->data.location = VARYING_SLOT_POS;
640    pos_var->data.driver_location = 0;
641 
642    nir_variable *coord_var =
643       nir_variable_create(b.shader, nir_var_shader_in,
644                           glsl_vector_type(GLSL_TYPE_FLOAT, 3),
645                           "coord");
646    coord_var->data.location = VARYING_SLOT_TEX0;
647    coord_var->data.driver_location = 1;
648    nir_def *coord =
649       nir_trim_vector(&b, nir_load_var(&b, coord_var), coord_comps);
650 
651    uint32_t out_comps =
652       (info->loc == FRAG_RESULT_DEPTH || info->loc == FRAG_RESULT_STENCIL) ? 1 : 4;
653    nir_variable *out = NULL;
654    if (!info->stencil_fallback) {
655       out = nir_variable_create(b.shader, nir_var_shader_out,
656                                 glsl_vector_type(info->out_type, out_comps),
657                                 "out");
658       out->data.location = info->loc;
659    }
660 
661    nir_def *res = NULL;
662 
663    if (info->resolve_mode != dzn_blit_resolve_none) {
664       enum dzn_blit_resolve_mode resolve_mode = info->resolve_mode;
665 
666       nir_op resolve_op = nir_op_mov;
667       switch (resolve_mode) {
668       case dzn_blit_resolve_average:
669          /* When resolving a float type, we need to calculate the average of all
670           * samples. For integer resolve, Vulkan says that one sample should be
671           * chosen without telling which. Let's just pick the first one in that
672           * case.
673           */
674          if (info->out_type == GLSL_TYPE_FLOAT)
675             resolve_op = nir_op_fadd;
676          else
677             resolve_mode = dzn_blit_resolve_sample_zero;
678          break;
679       case dzn_blit_resolve_min:
680          switch (info->out_type) {
681          case GLSL_TYPE_FLOAT: resolve_op = nir_op_fmin; break;
682          case GLSL_TYPE_INT: resolve_op = nir_op_imin; break;
683          case GLSL_TYPE_UINT: resolve_op = nir_op_umin; break;
684          }
685          break;
686       case dzn_blit_resolve_max:
687          switch (info->out_type) {
688          case GLSL_TYPE_FLOAT: resolve_op = nir_op_fmax; break;
689          case GLSL_TYPE_INT: resolve_op = nir_op_imax; break;
690          case GLSL_TYPE_UINT: resolve_op = nir_op_umax; break;
691          }
692          break;
693       case dzn_blit_resolve_none:
694       case dzn_blit_resolve_sample_zero:
695          break;
696       }
697 
698       unsigned nsamples = resolve_mode == dzn_blit_resolve_sample_zero ?
699                           1 : info->src_samples;
700       for (unsigned s = 0; s < nsamples; s++) {
701          nir_tex_instr *tex = nir_tex_instr_create(b.shader, 4);
702 
703          tex->op = nir_texop_txf_ms;
704          tex->dest_type = nir_out_type;
705          tex->texture_index = 0;
706          tex->is_array = info->src_is_array;
707          tex->sampler_dim = info->sampler_dim;
708 
709          tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord,
710                                            nir_f2i32(&b, coord));
711          tex->coord_components = coord_comps;
712 
713          tex->src[1] = nir_tex_src_for_ssa(nir_tex_src_ms_index,
714                                            nir_imm_int(&b, s));
715 
716          tex->src[2] = nir_tex_src_for_ssa(nir_tex_src_lod,
717                                            nir_imm_int(&b, 0));
718 
719          tex->src[3] = nir_tex_src_for_ssa(nir_tex_src_texture_deref,
720                                            &tex_deref->def);
721 
722          nir_def_init(&tex->instr, &tex->def, 4, 32);
723 
724          nir_builder_instr_insert(&b, &tex->instr);
725          res = res ? nir_build_alu2(&b, resolve_op, res, &tex->def) : &tex->def;
726       }
727 
728       if (resolve_mode == dzn_blit_resolve_average)
729          res = nir_fmul_imm(&b, res, 1.0f / nsamples);
730    } else {
731       nir_tex_instr *tex =
732          nir_tex_instr_create(b.shader, ms ? 4 : 3);
733 
734       tex->dest_type = nir_out_type;
735       tex->is_array = info->src_is_array;
736       tex->sampler_dim = info->sampler_dim;
737 
738       if (ms) {
739          tex->op = nir_texop_txf_ms;
740 
741          tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord,
742                                            nir_f2i32(&b, coord));
743          tex->coord_components = coord_comps;
744 
745          tex->src[1] = nir_tex_src_for_ssa(nir_tex_src_ms_index,
746                                            nir_load_sample_id(&b));
747 
748          tex->src[2] = nir_tex_src_for_ssa(nir_tex_src_lod,
749                                            nir_imm_int(&b, 0));
750 
751          tex->src[3] = nir_tex_src_for_ssa(nir_tex_src_texture_deref,
752                                            &tex_deref->def);
753       } else {
754          nir_variable *sampler_var =
755             nir_variable_create(b.shader, nir_var_uniform, glsl_bare_sampler_type(), "sampler");
756          nir_deref_instr *sampler_deref = nir_build_deref_var(&b, sampler_var);
757 
758          tex->op = nir_texop_tex;
759          tex->sampler_index = 0;
760 
761          tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord, coord);
762          tex->coord_components = coord_comps;
763 
764          tex->src[1] = nir_tex_src_for_ssa(nir_tex_src_texture_deref,
765                                            &tex_deref->def);
766 
767          tex->src[2] = nir_tex_src_for_ssa(nir_tex_src_sampler_deref,
768                                            &sampler_deref->def);
769       }
770 
771       nir_def_init(&tex->instr, &tex->def, 4, 32);
772       nir_builder_instr_insert(&b, &tex->instr);
773       res = &tex->def;
774    }
775 
776    if (info->stencil_fallback) {
777       nir_def *mask_desc =
778          dzn_nir_create_bo_desc(&b, nir_var_mem_ubo, 0, 0, "mask", 0);
779       nir_def *mask = nir_load_ubo(&b, 1, 32, mask_desc, nir_imm_int(&b, 0),
780          .align_mul = 16, .align_offset = 0, .range_base = 0, .range = ~0);
781       nir_def *fail = nir_ieq_imm(&b, nir_iand(&b, nir_channel(&b, res, 0), mask), 0);
782       nir_discard_if(&b, fail);
783    } else {
784       nir_store_var(&b, out, nir_trim_vector(&b, res, out_comps), 0xf);
785    }
786 
787    return b.shader;
788 }
789 
790 static nir_def *
cull_face(nir_builder * b,nir_variable * vertices,bool ccw)791 cull_face(nir_builder *b, nir_variable *vertices, bool ccw)
792 {
793    nir_def *v0 =
794       nir_load_deref(b, nir_build_deref_array(b, nir_build_deref_var(b, vertices), nir_imm_int(b, 0)));
795    nir_def *v1 =
796       nir_load_deref(b, nir_build_deref_array(b, nir_build_deref_var(b, vertices), nir_imm_int(b, 1)));
797    nir_def *v2 =
798       nir_load_deref(b, nir_build_deref_array(b, nir_build_deref_var(b, vertices), nir_imm_int(b, 2)));
799 
800    nir_def *dir = nir_fdot(b, nir_cross4(b, nir_fsub(b, v1, v0),
801                                                 nir_fsub(b, v2, v0)),
802                                nir_imm_vec4(b, 0.0, 0.0, -1.0, 0.0));
803    if (ccw)
804       return nir_fle_imm(b, dir, 0.0f);
805    else
806       return nir_fgt_imm(b, dir, 0.0f);
807 }
808 
809 static void
copy_vars(nir_builder * b,nir_deref_instr * dst,nir_deref_instr * src)810 copy_vars(nir_builder *b, nir_deref_instr *dst, nir_deref_instr *src)
811 {
812    assert(glsl_get_bare_type(dst->type) == glsl_get_bare_type(src->type));
813    if (glsl_type_is_struct(dst->type)) {
814       for (unsigned i = 0; i < glsl_get_length(dst->type); ++i) {
815          copy_vars(b, nir_build_deref_struct(b, dst, i), nir_build_deref_struct(b, src, i));
816       }
817    } else if (glsl_type_is_array_or_matrix(dst->type)) {
818       copy_vars(b, nir_build_deref_array_wildcard(b, dst), nir_build_deref_array_wildcard(b, src));
819    } else {
820       nir_copy_deref(b, dst, src);
821    }
822 }
823 
824 static nir_def *
load_dynamic_depth_bias(nir_builder * b,struct dzn_nir_point_gs_info * info)825 load_dynamic_depth_bias(nir_builder *b, struct dzn_nir_point_gs_info *info)
826 {
827    nir_address_format ubo_format = nir_address_format_32bit_index_offset;
828    unsigned offset = offsetof(struct dxil_spirv_vertex_runtime_data, depth_bias);
829 
830    nir_def *index = nir_vulkan_resource_index(
831       b, nir_address_format_num_components(ubo_format),
832       nir_address_format_bit_size(ubo_format),
833       nir_imm_int(b, 0),
834       .desc_set = info->runtime_data_cbv.register_space,
835       .binding = info->runtime_data_cbv.base_shader_register,
836       .desc_type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
837 
838    nir_def *load_desc = nir_load_vulkan_descriptor(
839       b, nir_address_format_num_components(ubo_format),
840       nir_address_format_bit_size(ubo_format),
841       index, .desc_type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
842 
843    return nir_load_ubo(
844       b, 1, 32,
845       nir_channel(b, load_desc, 0),
846       nir_imm_int(b, offset),
847       .align_mul = 256,
848       .align_offset = offset);
849 }
850 
851 nir_shader *
dzn_nir_polygon_point_mode_gs(const nir_shader * previous_shader,struct dzn_nir_point_gs_info * info)852 dzn_nir_polygon_point_mode_gs(const nir_shader *previous_shader, struct dzn_nir_point_gs_info *info)
853 {
854    nir_builder builder;
855    nir_builder *b = &builder;
856    nir_variable *pos_var = NULL;
857 
858    unsigned num_vars = 0;
859    nir_variable *in[VARYING_SLOT_MAX];
860    nir_variable *out[VARYING_SLOT_MAX];
861 
862 
863    builder = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY,
864                                             dxil_get_base_nir_compiler_options(),
865                                             "implicit_gs");
866 
867    nir_shader *nir = b->shader;
868    nir->info.inputs_read = nir->info.outputs_written = previous_shader->info.outputs_written;
869    nir->info.outputs_written |= (1ull << VARYING_SLOT_VAR12);
870    nir->info.gs.input_primitive = MESA_PRIM_TRIANGLES;
871    nir->info.gs.output_primitive = MESA_PRIM_POINTS;
872    nir->info.gs.vertices_in = 3;
873    nir->info.gs.vertices_out = 3;
874    nir->info.gs.invocations = 1;
875    nir->info.gs.active_stream_mask = 1;
876 
877    nir_foreach_shader_out_variable(var, previous_shader) {
878       char tmp[100];
879       snprintf(tmp, ARRAY_SIZE(tmp), "in_%d", num_vars);
880       in[num_vars] = nir_variable_create(nir,
881                                          nir_var_shader_in,
882                                          glsl_array_type(var->type, 3, 0),
883                                          tmp);
884       in[num_vars]->data = var->data;
885       in[num_vars]->data.mode = nir_var_shader_in;
886 
887       if (var->data.location == VARYING_SLOT_POS)
888          pos_var = in[num_vars];
889 
890       snprintf(tmp, ARRAY_SIZE(tmp), "out_%d", num_vars);
891       out[num_vars] = nir_variable_create(nir, nir_var_shader_out, var->type, tmp);
892       out[num_vars]->data = var->data;
893 
894       num_vars++;
895    }
896 
897    nir_variable *front_facing_var = nir_variable_create(nir,
898                                                         nir_var_shader_out,
899                                                         glsl_uint_type(),
900                                                         "gl_FrontFacing");
901    front_facing_var->data.location = VARYING_SLOT_VAR12;
902    front_facing_var->data.driver_location = num_vars;
903    front_facing_var->data.interpolation = INTERP_MODE_FLAT;
904 
905    nir_def *depth_bias_scale = NULL;
906    if (info->depth_bias) {
907       switch (info->ds_fmt) {
908       case DXGI_FORMAT_D16_UNORM:
909          depth_bias_scale = nir_imm_float(b, 1.0f / (1 << 16));
910          break;
911       case DXGI_FORMAT_D24_UNORM_S8_UINT:
912          depth_bias_scale = nir_imm_float(b, 1.0f / (1 << 24));
913          break;
914       case DXGI_FORMAT_D32_FLOAT:
915       case DXGI_FORMAT_D32_FLOAT_S8X24_UINT: {
916          nir_deref_instr *deref_pos = nir_build_deref_var(b, pos_var);
917          nir_def *max_z = NULL;
918          for (uint32_t i = 0; i < 3; ++i) {
919             nir_def *pos = nir_load_deref(b, nir_build_deref_array_imm(b, deref_pos, i));
920             nir_def *z = nir_iand_imm(b, nir_channel(b, pos, 2), 0x7fffffff);
921             max_z = i == 0 ? z : nir_imax(b, z, max_z);
922          }
923          nir_def *exponent = nir_ishr_imm(b, nir_iand_imm(b, max_z, 0x7f800000), 23);
924          depth_bias_scale = nir_fexp2(b, nir_i2f32(b, nir_iadd_imm(b, exponent, -23)));
925          break;
926       }
927       default:
928          depth_bias_scale = nir_imm_float(b, 0.0f);
929       }
930    }
931 
932    /* Temporary variable "loop_index" to loop over input vertices */
933    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
934    nir_variable *loop_index_var =
935       nir_local_variable_create(impl, glsl_uint_type(), "loop_index");
936    nir_deref_instr *loop_index_deref = nir_build_deref_var(b, loop_index_var);
937    nir_store_deref(b, loop_index_deref, nir_imm_int(b, 0), 1);
938 
939    nir_def *cull_pass = nir_imm_true(b);
940    nir_def *front_facing;
941    assert(info->cull_mode != VK_CULL_MODE_FRONT_AND_BACK);
942    if (info->cull_mode == VK_CULL_MODE_FRONT_BIT) {
943       cull_pass = cull_face(b, pos_var, info->front_ccw);
944       front_facing = nir_b2i32(b, cull_pass);
945    } else if (info->cull_mode == VK_CULL_MODE_BACK_BIT) {
946       cull_pass = cull_face(b, pos_var, !info->front_ccw);
947       front_facing = nir_inot(b, nir_b2i32(b, cull_pass));
948    } else
949       front_facing = nir_i2i32(b, cull_face(b, pos_var, info->front_ccw));
950 
951    /**
952     *  if (cull_pass) {
953     *     while {
954     *        if (loop_index >= 3)
955     *           break;
956     */
957    nir_if *cull_check = nir_push_if(b, cull_pass);
958    nir_loop *loop = nir_push_loop(b);
959 
960    nir_def *loop_index = nir_load_deref(b, loop_index_deref);
961    nir_def *cmp = nir_ige(b, loop_index,
962                               nir_imm_int(b, 3));
963    nir_if *loop_check = nir_push_if(b, cmp);
964    nir_jump(b, nir_jump_break);
965    nir_pop_if(b, loop_check);
966 
967    /**
968     *        [...] // Copy all variables
969     *        EmitVertex();
970     */
971    for (unsigned i = 0; i < num_vars; ++i) {
972       nir_def *index = loop_index;
973       nir_deref_instr *in_value = nir_build_deref_array(b, nir_build_deref_var(b, in[i]), index);
974       if (in[i] == pos_var && info->depth_bias) {
975          nir_def *bias_val;
976          if (info->depth_bias_dynamic) {
977             bias_val = load_dynamic_depth_bias(b, info);
978          } else {
979             assert(info->slope_scaled_depth_bias == 0.0f);
980             bias_val = nir_imm_float(b, info->constant_depth_bias);
981          }
982          bias_val = nir_fmul(b, bias_val, depth_bias_scale);
983          nir_def *old_val = nir_load_deref(b, in_value);
984          nir_def *new_val = nir_vector_insert_imm(b, old_val,
985                                                       nir_fadd(b, nir_channel(b, old_val, 2), bias_val),
986                                                       2);
987          nir_store_var(b, out[i], new_val, 0xf);
988       } else {
989          copy_vars(b, nir_build_deref_var(b, out[i]), in_value);
990       }
991    }
992    nir_store_var(b, front_facing_var, front_facing, 0x1);
993    nir_emit_vertex(b, 0);
994 
995    /**
996     *        loop_index++;
997     *     }
998     *  }
999     */
1000    nir_store_deref(b, loop_index_deref, nir_iadd_imm(b, loop_index, 1), 1);
1001    nir_pop_loop(b, loop);
1002    nir_pop_if(b, cull_check);
1003 
1004    nir_validate_shader(nir, "in dzn_nir_polygon_point_mode_gs");
1005 
1006    NIR_PASS_V(nir, nir_lower_var_copies);
1007    return b->shader;
1008 }
1009