• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © Microsoft Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "dzn_nir.h"
25 
26 #include "spirv_to_dxil.h"
27 #include "nir_to_dxil.h"
28 #include "nir_builder.h"
29 #include "nir_vulkan.h"
30 
31 static nir_ssa_def *
dzn_nir_create_bo_desc(nir_builder * b,nir_variable_mode mode,uint32_t desc_set,uint32_t binding,const char * name,unsigned access)32 dzn_nir_create_bo_desc(nir_builder *b,
33                        nir_variable_mode mode,
34                        uint32_t desc_set,
35                        uint32_t binding,
36                        const char *name,
37                        unsigned access)
38 {
39    struct glsl_struct_field field = {
40       .type = mode == nir_var_mem_ubo ?
41               glsl_array_type(glsl_uint_type(), 4096, 4) :
42               glsl_uint_type(),
43       .name = "dummy_int",
44    };
45    const struct glsl_type *dummy_type =
46       glsl_struct_type(&field, 1, "dummy_type", false);
47 
48    nir_variable *var =
49       nir_variable_create(b->shader, mode, dummy_type, name);
50    var->data.descriptor_set = desc_set;
51    var->data.binding = binding;
52    var->data.access = access;
53 
54    assert(mode == nir_var_mem_ubo || mode == nir_var_mem_ssbo);
55    if (mode == nir_var_mem_ubo)
56       b->shader->info.num_ubos++;
57    else
58       b->shader->info.num_ssbos++;
59 
60    VkDescriptorType desc_type =
61       var->data.mode == nir_var_mem_ubo ?
62       VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER :
63       VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
64    nir_address_format addr_format = nir_address_format_32bit_index_offset;
65    nir_ssa_def *index =
66       nir_vulkan_resource_index(b,
67                                 nir_address_format_num_components(addr_format),
68                                 nir_address_format_bit_size(addr_format),
69                                 nir_imm_int(b, 0),
70                                 .desc_set = desc_set,
71                                 .binding = binding,
72                                 .desc_type = desc_type);
73 
74    nir_ssa_def *desc =
75       nir_load_vulkan_descriptor(b,
76                                  nir_address_format_num_components(addr_format),
77                                  nir_address_format_bit_size(addr_format),
78                                  index,
79                                  .desc_type = desc_type);
80 
81    return nir_channel(b, desc, 0);
82 }
83 
84 nir_shader *
dzn_nir_indirect_draw_shader(enum dzn_indirect_draw_type type)85 dzn_nir_indirect_draw_shader(enum dzn_indirect_draw_type type)
86 {
87    const char *type_str[] = {
88       "draw",
89       "draw_count",
90       "indexed_draw",
91       "indexed_draw_count",
92       "draw_triangle_fan",
93       "draw_count_triangle_fan",
94       "indexed_draw_triangle_fan",
95       "indexed_draw_count_triangle_fan",
96       "indexed_draw_triangle_fan_prim_restart",
97       "indexed_draw_count_triangle_fan_prim_restart",
98    };
99 
100    assert(type < ARRAY_SIZE(type_str));
101 
102    bool indexed = type == DZN_INDIRECT_INDEXED_DRAW ||
103                   type == DZN_INDIRECT_INDEXED_DRAW_COUNT ||
104                   type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN ||
105                   type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN ||
106                   type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN_PRIM_RESTART ||
107                   type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN_PRIM_RESTART;
108    bool triangle_fan = type == DZN_INDIRECT_DRAW_TRIANGLE_FAN ||
109                        type == DZN_INDIRECT_DRAW_COUNT_TRIANGLE_FAN ||
110                        type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN ||
111                        type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN ||
112                        type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN_PRIM_RESTART ||
113                        type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN_PRIM_RESTART;
114    bool indirect_count = type == DZN_INDIRECT_DRAW_COUNT ||
115                          type == DZN_INDIRECT_INDEXED_DRAW_COUNT ||
116                          type == DZN_INDIRECT_DRAW_COUNT_TRIANGLE_FAN ||
117                          type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN ||
118                          type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN_PRIM_RESTART;
119    bool prim_restart = type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN_PRIM_RESTART ||
120                        type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN_PRIM_RESTART;
121    nir_builder b =
122       nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
123                                      dxil_get_nir_compiler_options(),
124                                      "dzn_meta_indirect_%s()",
125                                      type_str[type]);
126    b.shader->info.internal = true;
127 
128    nir_ssa_def *params_desc =
129       dzn_nir_create_bo_desc(&b, nir_var_mem_ubo, 0, 0, "params", 0);
130    nir_ssa_def *draw_buf_desc =
131       dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 1, "draw_buf", ACCESS_NON_WRITEABLE);
132    nir_ssa_def *exec_buf_desc =
133       dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 2, "exec_buf", ACCESS_NON_READABLE);
134 
135    unsigned params_size;
136    if (triangle_fan)
137       params_size = sizeof(struct dzn_indirect_draw_triangle_fan_rewrite_params);
138    else
139       params_size = sizeof(struct dzn_indirect_draw_rewrite_params);
140 
141    nir_ssa_def *params =
142       nir_load_ubo(&b, params_size / 4, 32,
143                    params_desc, nir_imm_int(&b, 0),
144                    .align_mul = 4, .align_offset = 0, .range_base = 0, .range = ~0);
145 
146    nir_ssa_def *draw_stride = nir_channel(&b, params, 0);
147    nir_ssa_def *exec_stride =
148       triangle_fan ?
149       nir_imm_int(&b, sizeof(struct dzn_indirect_triangle_fan_draw_exec_params)) :
150       nir_imm_int(&b, sizeof(struct dzn_indirect_draw_exec_params));
151    nir_ssa_def *index =
152       nir_channel(&b, nir_load_global_invocation_id(&b, 32), 0);
153 
154    if (indirect_count) {
155       nir_ssa_def *count_buf_desc =
156          dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 3, "count_buf", ACCESS_NON_WRITEABLE);
157 
158       nir_ssa_def *draw_count =
159          nir_load_ssbo(&b, 1, 32, count_buf_desc, nir_imm_int(&b, 0), .align_mul = 4);
160 
161       nir_push_if(&b, nir_ieq(&b, index, nir_imm_int(&b, 0)));
162       nir_store_ssbo(&b, draw_count, exec_buf_desc, nir_imm_int(&b, 0),
163                     .write_mask = 0x1, .access = ACCESS_NON_READABLE,
164                     .align_mul = 16);
165       nir_pop_if(&b, NULL);
166 
167       nir_push_if(&b, nir_ult(&b, index, draw_count));
168    }
169 
170    nir_ssa_def *draw_offset = nir_imul(&b, draw_stride, index);
171 
172    /* The first entry contains the indirect count */
173    nir_ssa_def *exec_offset =
174       indirect_count ?
175       nir_imul(&b, exec_stride, nir_iadd_imm(&b, index, 1)) :
176       nir_imul(&b, exec_stride, index);
177 
178    nir_ssa_def *draw_info1 =
179       nir_load_ssbo(&b, 4, 32, draw_buf_desc, draw_offset, .align_mul = 4);
180    nir_ssa_def *draw_info2 =
181       indexed ?
182       nir_load_ssbo(&b, 1, 32, draw_buf_desc,
183                     nir_iadd_imm(&b, draw_offset, 16), .align_mul = 4) :
184       nir_imm_int(&b, 0);
185 
186    nir_ssa_def *first_vertex = nir_channel(&b, draw_info1, indexed ? 3 : 2);
187    nir_ssa_def *base_instance =
188       indexed ? draw_info2 : nir_channel(&b, draw_info1, 3);
189 
190    nir_ssa_def *exec_vals[8] = {
191       first_vertex,
192       base_instance,
193       index,
194    };
195 
196    if (triangle_fan) {
197       /* Patch {vertex,index}_count and first_index */
198       nir_ssa_def *triangle_count =
199          nir_usub_sat(&b, nir_channel(&b, draw_info1, 0), nir_imm_int(&b, 2));
200       exec_vals[3] = nir_imul_imm(&b, triangle_count, 3);
201       exec_vals[4] = nir_channel(&b, draw_info1, 1);
202       exec_vals[5] = nir_imm_int(&b, 0);
203       exec_vals[6] = first_vertex;
204       exec_vals[7] = base_instance;
205 
206       nir_ssa_def *triangle_fan_exec_buf_desc =
207          dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 4,
208                                 "triangle_fan_exec_buf",
209                                 ACCESS_NON_READABLE);
210       nir_ssa_def *triangle_fan_index_buf_stride = nir_channel(&b, params, 1);
211       nir_ssa_def *triangle_fan_index_buf_addr_lo =
212          nir_iadd(&b, nir_channel(&b, params, 2),
213                   nir_imul(&b, triangle_fan_index_buf_stride, index));
214 
215       nir_ssa_def *triangle_fan_exec_vals[9] = { 0 };
216       uint32_t triangle_fan_exec_param_count = 0;
217       nir_ssa_def *addr_lo_overflow =
218          nir_ult(&b, triangle_fan_index_buf_addr_lo, nir_channel(&b, params, 2));
219       nir_ssa_def *triangle_fan_index_buf_addr_hi =
220          nir_iadd(&b, nir_channel(&b, params, 3),
221                   nir_bcsel(&b, addr_lo_overflow, nir_imm_int(&b, 1), nir_imm_int(&b, 0)));
222 
223       triangle_fan_exec_vals[triangle_fan_exec_param_count++] = triangle_fan_index_buf_addr_lo;
224       triangle_fan_exec_vals[triangle_fan_exec_param_count++] = triangle_fan_index_buf_addr_hi;
225 
226       if (prim_restart) {
227          triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_channel(&b, draw_info1, 2);
228          triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_channel(&b, draw_info1, 0);
229          uint32_t index_count_offset =
230             offsetof(struct dzn_indirect_triangle_fan_draw_exec_params, indexed_draw.index_count);
231          nir_ssa_def *exec_buf_start =
232             nir_load_ubo(&b, 2, 32,
233                          params_desc, nir_imm_int(&b, 16),
234                          .align_mul = 4, .align_offset = 0, .range_base = 0, .range = ~0);
235          nir_ssa_def *exec_buf_start_lo =
236             nir_iadd(&b, nir_imm_int(&b, index_count_offset),
237                      nir_iadd(&b, nir_channel(&b, exec_buf_start, 0),
238                               nir_imul(&b, exec_stride, index)));
239          addr_lo_overflow = nir_ult(&b, exec_buf_start_lo, nir_channel(&b, exec_buf_start, 0));
240          nir_ssa_def *exec_buf_start_hi =
241             nir_iadd(&b, nir_channel(&b, exec_buf_start, 0),
242                      nir_bcsel(&b, addr_lo_overflow, nir_imm_int(&b, 1), nir_imm_int(&b, 0)));
243          triangle_fan_exec_vals[triangle_fan_exec_param_count++] = exec_buf_start_lo;
244          triangle_fan_exec_vals[triangle_fan_exec_param_count++] = exec_buf_start_hi;
245          triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_imm_int(&b, 1);
246       } else {
247          triangle_fan_exec_vals[triangle_fan_exec_param_count++] =
248             indexed ? nir_channel(&b, draw_info1, 2) : nir_imm_int(&b, 0);
249          triangle_fan_exec_vals[triangle_fan_exec_param_count++] =
250             triangle_count;
251       }
252       triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_imm_int(&b, 1);
253       triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_imm_int(&b, 1);
254 
255       unsigned rewrite_index_exec_params =
256          prim_restart ?
257          sizeof(struct dzn_indirect_triangle_fan_prim_restart_rewrite_index_exec_params) :
258          sizeof(struct dzn_indirect_triangle_fan_rewrite_index_exec_params);
259       nir_ssa_def *triangle_fan_exec_stride =
260          nir_imm_int(&b, rewrite_index_exec_params);
261       nir_ssa_def *triangle_fan_exec_offset =
262          nir_imul(&b, triangle_fan_exec_stride, index);
263 
264       for (uint32_t i = 0; i < triangle_fan_exec_param_count; i += 4) {
265          unsigned comps = MIN2(triangle_fan_exec_param_count - i, 4);
266          uint32_t mask = (1 << comps) - 1;
267 
268          nir_store_ssbo(&b, nir_vec(&b, &triangle_fan_exec_vals[i], comps),
269                         triangle_fan_exec_buf_desc,
270                         nir_iadd_imm(&b, triangle_fan_exec_offset, i * 4),
271                         .write_mask = mask, .access = ACCESS_NON_READABLE, .align_mul = 4);
272       }
273 
274       nir_ssa_def *ibview_vals[] = {
275          triangle_fan_index_buf_addr_lo,
276          triangle_fan_index_buf_addr_hi,
277          triangle_fan_index_buf_stride,
278          nir_imm_int(&b, DXGI_FORMAT_R32_UINT),
279       };
280 
281       nir_store_ssbo(&b, nir_vec(&b, ibview_vals, ARRAY_SIZE(ibview_vals)),
282                      exec_buf_desc, exec_offset,
283                      .write_mask = 0xf, .access = ACCESS_NON_READABLE, .align_mul = 16);
284       exec_offset = nir_iadd_imm(&b, exec_offset, ARRAY_SIZE(ibview_vals) * 4);
285    } else {
286       exec_vals[3] = nir_channel(&b, draw_info1, 0);
287       exec_vals[4] = nir_channel(&b, draw_info1, 1);
288       exec_vals[5] = nir_channel(&b, draw_info1, 2);
289       exec_vals[6] = nir_channel(&b, draw_info1, 3);
290       exec_vals[7] = draw_info2;
291    }
292 
293    nir_store_ssbo(&b, nir_vec(&b, exec_vals, 4),
294                   exec_buf_desc, exec_offset,
295                   .write_mask = 0xf, .access = ACCESS_NON_READABLE, .align_mul = 16);
296    nir_store_ssbo(&b, nir_vec(&b, &exec_vals[4], 4),
297                   exec_buf_desc, nir_iadd_imm(&b, exec_offset, 16),
298                   .write_mask = 0xf, .access = ACCESS_NON_READABLE, .align_mul = 16);
299 
300    if (indirect_count)
301       nir_pop_if(&b, NULL);
302 
303    return b.shader;
304 }
305 
306 nir_shader *
dzn_nir_triangle_fan_prim_restart_rewrite_index_shader(uint8_t old_index_size)307 dzn_nir_triangle_fan_prim_restart_rewrite_index_shader(uint8_t old_index_size)
308 {
309    assert(old_index_size == 2 || old_index_size == 4);
310 
311    nir_builder b =
312       nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
313                                      dxil_get_nir_compiler_options(),
314                                      "dzn_meta_triangle_prim_rewrite_index(old_index_size=%d)",
315                                      old_index_size);
316    b.shader->info.internal = true;
317 
318    nir_ssa_def *params_desc =
319       dzn_nir_create_bo_desc(&b, nir_var_mem_ubo, 0, 0, "params", 0);
320    nir_ssa_def *new_index_buf_desc =
321       dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 1,
322                              "new_index_buf", ACCESS_NON_READABLE);
323    nir_ssa_def *old_index_buf_desc =
324       dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 2,
325                              "old_index_buf", ACCESS_NON_WRITEABLE);
326    nir_ssa_def *new_index_count_ptr_desc =
327       dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 3,
328                              "new_index_count_ptr", ACCESS_NON_READABLE);
329 
330    nir_ssa_def *params =
331       nir_load_ubo(&b, sizeof(struct dzn_triangle_fan_prim_restart_rewrite_index_params) / 4, 32,
332                    params_desc, nir_imm_int(&b, 0),
333                    .align_mul = 4, .align_offset = 0, .range_base = 0, .range = ~0);
334 
335    nir_ssa_def *prim_restart_val =
336       nir_imm_int(&b, old_index_size == 2 ? 0xffff : 0xffffffff);
337    nir_variable *old_index_ptr_var =
338       nir_local_variable_create(b.impl, glsl_uint_type(), "old_index_ptr_var");
339    nir_ssa_def *old_index_ptr = nir_channel(&b, params, 0);
340    nir_store_var(&b, old_index_ptr_var, old_index_ptr, 1);
341    nir_variable *new_index_ptr_var =
342       nir_local_variable_create(b.impl, glsl_uint_type(), "new_index_ptr_var");
343    nir_store_var(&b, new_index_ptr_var, nir_imm_int(&b, 0), 1);
344    nir_ssa_def *old_index_count = nir_channel(&b, params, 1);
345    nir_variable *index0_var =
346       nir_local_variable_create(b.impl, glsl_uint_type(), "index0_var");
347    nir_store_var(&b, index0_var, prim_restart_val, 1);
348 
349    /*
350     * Filter out all primitive-restart magic values, and generate a triangle list
351     * from the triangle fan definition.
352     *
353     * Basically:
354     *
355     * new_index_ptr = 0;
356     * index0 = restart_prim_value; // 0xffff or 0xffffffff
357     * for (old_index_ptr = firstIndex; old_index_ptr < indexCount;) {
358     *    // If we have no starting-point we need at least 3 vertices,
359     *    // otherwise we can do with two. If there's not enough vertices
360     *    // to form a primitive, we just bail out.
361     *    min_indices = index0 == restart_prim_value ? 3 : 2;
362     *    if (old_index_ptr + min_indices > firstIndex + indexCount)
363     *       break;
364     *
365     *    if (index0 == restart_prim_value) {
366     *       // No starting point, skip all entries until we have a
367     *       // non-primitive-restart value
368     *       index0 = old_index_buf[old_index_ptr++];
369     *       continue;
370     *    }
371     *
372     *    // If at least one index contains the primitive-restart pattern,
373          // ignore this triangle, and skip the unused entries
374     *    if (old_index_buf[old_index_ptr + 1] == restart_prim_value) {
375     *       old_index_ptr += 2;
376     *       continue;
377     *    }
378     *    if (old_index_buf[old_index_ptr] == restart_prim_value) {
379     *       old_index_ptr++;
380     *       continue;
381     *    }
382     *
383     *    // We have a valid primitive, queue it to the new index buffer
384     *    new_index_buf[new_index_ptr++] = old_index_buf[old_index_ptr];
385     *    new_index_buf[new_index_ptr++] = old_index_buf[old_index_ptr + 1];
386     *    new_index_buf[new_index_ptr++] = index0;
387     * }
388     *
389     * expressed in NIR, which admitedly is not super easy to grasp with.
390     * TODO: Might be a good thing to use use the CL compiler we have and turn
391     * those shaders into CL kernels.
392     */
393    nir_push_loop(&b);
394 
395    old_index_ptr = nir_load_var(&b, old_index_ptr_var);
396    nir_ssa_def *index0 = nir_load_var(&b, index0_var);
397 
398    nir_ssa_def *read_index_count =
399       nir_bcsel(&b, nir_ieq(&b, index0, prim_restart_val),
400                 nir_imm_int(&b, 3), nir_imm_int(&b, 2));
401    nir_push_if(&b, nir_ult(&b, old_index_count, nir_iadd(&b, old_index_ptr, read_index_count)));
402    nir_jump(&b, nir_jump_break);
403    nir_pop_if(&b, NULL);
404 
405    nir_ssa_def *old_index_offset =
406       nir_imul_imm(&b, old_index_ptr, old_index_size);
407 
408    nir_push_if(&b, nir_ieq(&b, index0, prim_restart_val));
409    nir_ssa_def *index_val =
410       nir_load_ssbo(&b, 1, 32, old_index_buf_desc,
411                     old_index_size == 2 ? nir_iand_imm(&b, old_index_offset, ~3ULL) : old_index_offset,
412                     .align_mul = 4);
413    if (old_index_size == 2) {
414      index_val = nir_bcsel(&b, nir_test_mask(&b, old_index_offset, 0x2),
415                            nir_ushr_imm(&b, index_val, 16),
416                            nir_iand_imm(&b, index_val, 0xffff));
417    }
418 
419    nir_store_var(&b, index0_var, index_val, 1);
420    nir_store_var(&b, old_index_ptr_var, nir_iadd_imm(&b, old_index_ptr, 1), 1);
421    nir_jump(&b, nir_jump_continue);
422    nir_pop_if(&b, NULL);
423 
424    nir_ssa_def *index12 =
425       nir_load_ssbo(&b, 2, 32, old_index_buf_desc,
426                     old_index_size == 2 ? nir_iand_imm(&b, old_index_offset, ~3ULL) : old_index_offset,
427                     .align_mul = 4);
428    if (old_index_size == 2) {
429       nir_ssa_def *indices[] = {
430          nir_iand_imm(&b, nir_channel(&b, index12, 0), 0xffff),
431          nir_ushr_imm(&b, nir_channel(&b, index12, 0), 16),
432          nir_iand_imm(&b, nir_channel(&b, index12, 1), 0xffff),
433       };
434 
435       index12 = nir_bcsel(&b, nir_test_mask(&b, old_index_offset, 0x2),
436                           nir_vec2(&b, indices[1], indices[2]),
437                           nir_vec2(&b, indices[0], indices[1]));
438    }
439 
440    nir_push_if(&b, nir_ieq(&b, nir_channel(&b, index12, 1), prim_restart_val));
441    nir_store_var(&b, old_index_ptr_var, nir_iadd_imm(&b, old_index_ptr, 2), 1);
442    nir_store_var(&b, index0_var, prim_restart_val, 1);
443    nir_jump(&b, nir_jump_continue);
444    nir_push_else(&b, NULL);
445    nir_store_var(&b, old_index_ptr_var, nir_iadd_imm(&b, old_index_ptr, 1), 1);
446    nir_push_if(&b, nir_ieq(&b, nir_channel(&b, index12, 0), prim_restart_val));
447    nir_store_var(&b, index0_var, prim_restart_val, 1);
448    nir_jump(&b, nir_jump_continue);
449    nir_push_else(&b, NULL);
450    nir_ssa_def *new_indices =
451       nir_vec3(&b, nir_channel(&b, index12, 0), nir_channel(&b, index12, 1), index0);
452    nir_ssa_def *new_index_ptr = nir_load_var(&b, new_index_ptr_var);
453    nir_ssa_def *new_index_offset = nir_imul_imm(&b, new_index_ptr, sizeof(uint32_t));
454    nir_store_ssbo(&b, new_indices, new_index_buf_desc,
455                   new_index_offset,
456                   .write_mask = 7, .access = ACCESS_NON_READABLE, .align_mul = 4);
457    nir_store_var(&b, new_index_ptr_var, nir_iadd_imm(&b, new_index_ptr, 3), 1);
458    nir_pop_if(&b, NULL);
459    nir_pop_if(&b, NULL);
460    nir_pop_loop(&b, NULL);
461 
462    nir_store_ssbo(&b, nir_load_var(&b, new_index_ptr_var),
463                   new_index_count_ptr_desc, nir_imm_int(&b, 0),
464                   .write_mask = 1, .access = ACCESS_NON_READABLE, .align_mul = 4);
465 
466    return b.shader;
467 }
468 
469 nir_shader *
dzn_nir_triangle_fan_rewrite_index_shader(uint8_t old_index_size)470 dzn_nir_triangle_fan_rewrite_index_shader(uint8_t old_index_size)
471 {
472    assert(old_index_size == 0 || old_index_size == 2 || old_index_size == 4);
473 
474    nir_builder b =
475       nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
476                                      dxil_get_nir_compiler_options(),
477                                      "dzn_meta_triangle_rewrite_index(old_index_size=%d)",
478                                      old_index_size);
479    b.shader->info.internal = true;
480 
481    nir_ssa_def *params_desc =
482       dzn_nir_create_bo_desc(&b, nir_var_mem_ubo, 0, 0, "params", 0);
483    nir_ssa_def *new_index_buf_desc =
484       dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 1,
485                              "new_index_buf", ACCESS_NON_READABLE);
486 
487    nir_ssa_def *old_index_buf_desc = NULL;
488    if (old_index_size > 0) {
489       old_index_buf_desc =
490          dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 2,
491                                 "old_index_buf", ACCESS_NON_WRITEABLE);
492    }
493 
494    nir_ssa_def *params =
495       nir_load_ubo(&b, sizeof(struct dzn_triangle_fan_rewrite_index_params) / 4, 32,
496                    params_desc, nir_imm_int(&b, 0),
497                    .align_mul = 4, .align_offset = 0, .range_base = 0, .range = ~0);
498 
499    nir_ssa_def *triangle = nir_channel(&b, nir_load_global_invocation_id(&b, 32), 0);
500    nir_ssa_def *new_indices;
501 
502    if (old_index_size > 0) {
503       nir_ssa_def *old_first_index = nir_channel(&b, params, 0);
504       nir_ssa_def *old_index0_offset =
505          nir_imul_imm(&b, old_first_index, old_index_size);
506       nir_ssa_def *old_index1_offset =
507          nir_imul_imm(&b, nir_iadd(&b, nir_iadd_imm(&b, triangle, 1), old_first_index),
508                       old_index_size);
509 
510       nir_ssa_def *old_index0 =
511          nir_load_ssbo(&b, 1, 32, old_index_buf_desc,
512                        old_index_size == 2 ? nir_iand_imm(&b, old_index0_offset, ~3ULL) : old_index0_offset,
513                        .align_mul = 4);
514 
515       if (old_index_size == 2) {
516         old_index0 = nir_bcsel(&b, nir_test_mask(&b, old_index0_offset, 0x2),
517                                nir_ushr_imm(&b, old_index0, 16),
518                                nir_iand_imm(&b, old_index0, 0xffff));
519       }
520 
521       nir_ssa_def *old_index12 =
522          nir_load_ssbo(&b, 2, 32, old_index_buf_desc,
523                        old_index_size == 2 ? nir_iand_imm(&b, old_index1_offset, ~3ULL) : old_index1_offset,
524                        .align_mul = 4);
525       if (old_index_size == 2) {
526          nir_ssa_def *indices[] = {
527             nir_iand_imm(&b, nir_channel(&b, old_index12, 0), 0xffff),
528             nir_ushr_imm(&b, nir_channel(&b, old_index12, 0), 16),
529             nir_iand_imm(&b, nir_channel(&b, old_index12, 1), 0xffff),
530          };
531 
532          old_index12 = nir_bcsel(&b, nir_test_mask(&b, old_index1_offset, 0x2),
533                                  nir_vec2(&b, indices[1], indices[2]),
534                                  nir_vec2(&b, indices[0], indices[1]));
535       }
536 
537       /* TODO: VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT */
538       new_indices =
539          nir_vec3(&b, nir_channel(&b, old_index12, 0),
540                   nir_channel(&b, old_index12, 1), old_index0);
541    } else {
542       new_indices =
543          nir_vec3(&b,
544                   nir_iadd_imm(&b, triangle, 1),
545                   nir_iadd_imm(&b, triangle, 2),
546                   nir_imm_int(&b, 0));
547    }
548 
549    nir_ssa_def *new_index_offset =
550       nir_imul_imm(&b, triangle, 4 * 3);
551 
552    nir_store_ssbo(&b, new_indices, new_index_buf_desc,
553                   new_index_offset,
554                   .write_mask = 7, .access = ACCESS_NON_READABLE, .align_mul = 4);
555 
556    return b.shader;
557 }
558 
559 nir_shader *
dzn_nir_blit_vs(void)560 dzn_nir_blit_vs(void)
561 {
562    nir_builder b =
563       nir_builder_init_simple_shader(MESA_SHADER_VERTEX,
564                                      dxil_get_nir_compiler_options(),
565                                      "dzn_meta_blit_vs()");
566    b.shader->info.internal = true;
567 
568    nir_ssa_def *params_desc =
569       dzn_nir_create_bo_desc(&b, nir_var_mem_ubo, 0, 0, "params", 0);
570 
571    nir_variable *out_pos =
572       nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(),
573                           "gl_Position");
574    out_pos->data.location = VARYING_SLOT_POS;
575    out_pos->data.driver_location = 0;
576 
577    nir_variable *out_coords =
578       nir_variable_create(b.shader, nir_var_shader_out, glsl_vec_type(3),
579                           "coords");
580    out_coords->data.location = VARYING_SLOT_TEX0;
581    out_coords->data.driver_location = 1;
582 
583    nir_ssa_def *vertex = nir_load_vertex_id(&b);
584    nir_ssa_def *base = nir_imul_imm(&b, vertex, 4 * sizeof(float));
585    nir_ssa_def *coords =
586       nir_load_ubo(&b, 4, 32, params_desc, base,
587                    .align_mul = 16, .align_offset = 0, .range_base = 0, .range = ~0);
588    nir_ssa_def *pos =
589       nir_vec4(&b, nir_channel(&b, coords, 0), nir_channel(&b, coords, 1),
590                nir_imm_float(&b, 0.0), nir_imm_float(&b, 1.0));
591    nir_ssa_def *z_coord =
592       nir_load_ubo(&b, 1, 32, params_desc, nir_imm_int(&b, 4 * 4 * sizeof(float)),
593                    .align_mul = 64, .align_offset = 0, .range_base = 0, .range = ~0);
594    coords = nir_vec3(&b, nir_channel(&b, coords, 2), nir_channel(&b, coords, 3), z_coord);
595 
596    nir_store_var(&b, out_pos, pos, 0xf);
597    nir_store_var(&b, out_coords, coords, 0x7);
598    return b.shader;
599 }
600 
601 nir_shader *
dzn_nir_blit_fs(const struct dzn_nir_blit_info * info)602 dzn_nir_blit_fs(const struct dzn_nir_blit_info *info)
603 {
604    bool ms = info->src_samples > 1;
605    nir_alu_type nir_out_type =
606       nir_get_nir_type_for_glsl_base_type(info->out_type);
607    uint32_t coord_comps =
608       glsl_get_sampler_dim_coordinate_components(info->sampler_dim) +
609       info->src_is_array;
610 
611    nir_builder b =
612       nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
613                                      dxil_get_nir_compiler_options(),
614                                      "dzn_meta_blit_fs()");
615    b.shader->info.internal = true;
616 
617    const struct glsl_type *tex_type =
618       glsl_texture_type(info->sampler_dim, info->src_is_array, info->out_type);
619    nir_variable *tex_var =
620       nir_variable_create(b.shader, nir_var_uniform, tex_type, "texture");
621    nir_deref_instr *tex_deref = nir_build_deref_var(&b, tex_var);
622 
623    nir_variable *pos_var =
624       nir_variable_create(b.shader, nir_var_shader_in,
625                           glsl_vector_type(GLSL_TYPE_FLOAT, 4),
626                           "gl_FragCoord");
627    pos_var->data.location = VARYING_SLOT_POS;
628    pos_var->data.driver_location = 0;
629 
630    nir_variable *coord_var =
631       nir_variable_create(b.shader, nir_var_shader_in,
632                           glsl_vector_type(GLSL_TYPE_FLOAT, 3),
633                           "coord");
634    coord_var->data.location = VARYING_SLOT_TEX0;
635    coord_var->data.driver_location = 1;
636    nir_ssa_def *coord =
637       nir_channels(&b, nir_load_var(&b, coord_var), (1 << coord_comps) - 1);
638 
639    uint32_t out_comps =
640       (info->loc == FRAG_RESULT_DEPTH || info->loc == FRAG_RESULT_STENCIL) ? 1 : 4;
641    nir_variable *out =
642       nir_variable_create(b.shader, nir_var_shader_out,
643                           glsl_vector_type(info->out_type, out_comps),
644                           "out");
645    out->data.location = info->loc;
646 
647    nir_ssa_def *res = NULL;
648 
649    if (info->resolve) {
650       /* When resolving a float type, we need to calculate the average of all
651        * samples. For integer resolve, Vulkan says that one sample should be
652        * chosen without telling which. Let's just pick the first one in that
653        * case.
654        */
655 
656       unsigned nsamples = info->out_type == GLSL_TYPE_FLOAT ?
657                           info->src_samples : 1;
658       for (unsigned s = 0; s < nsamples; s++) {
659          nir_tex_instr *tex = nir_tex_instr_create(b.shader, 4);
660 
661          tex->op = nir_texop_txf_ms;
662          tex->dest_type = nir_out_type;
663          tex->texture_index = 0;
664          tex->is_array = info->src_is_array;
665          tex->sampler_dim = info->sampler_dim;
666 
667          tex->src[0].src_type = nir_tex_src_coord;
668          tex->src[0].src = nir_src_for_ssa(nir_f2i32(&b, coord));
669          tex->coord_components = coord_comps;
670 
671          tex->src[1].src_type = nir_tex_src_ms_index;
672          tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, s));
673 
674          tex->src[2].src_type = nir_tex_src_lod;
675          tex->src[2].src = nir_src_for_ssa(nir_imm_int(&b, 0));
676 
677          tex->src[3].src_type = nir_tex_src_texture_deref;
678          tex->src[3].src = nir_src_for_ssa(&tex_deref->dest.ssa);
679 
680          nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);
681 
682          nir_builder_instr_insert(&b, &tex->instr);
683          res = res ? nir_fadd(&b, res, &tex->dest.ssa) : &tex->dest.ssa;
684       }
685 
686       if (nsamples > 1) {
687          unsigned type_sz = nir_alu_type_get_type_size(nir_out_type);
688          res = nir_fmul(&b, res, nir_imm_floatN_t(&b, 1.0f / nsamples, type_sz));
689       }
690    } else {
691       nir_tex_instr *tex =
692          nir_tex_instr_create(b.shader, ms ? 4 : 3);
693 
694       tex->dest_type = nir_out_type;
695       tex->is_array = info->src_is_array;
696       tex->sampler_dim = info->sampler_dim;
697 
698       if (ms) {
699          tex->op = nir_texop_txf_ms;
700 
701          tex->src[0].src_type = nir_tex_src_coord;
702          tex->src[0].src = nir_src_for_ssa(nir_f2i32(&b, coord));
703          tex->coord_components = coord_comps;
704 
705          tex->src[1].src_type = nir_tex_src_ms_index;
706          tex->src[1].src = nir_src_for_ssa(nir_load_sample_id(&b));
707 
708          tex->src[2].src_type = nir_tex_src_lod;
709          tex->src[2].src = nir_src_for_ssa(nir_imm_int(&b, 0));
710 
711          tex->src[3].src_type = nir_tex_src_texture_deref;
712          tex->src[3].src = nir_src_for_ssa(&tex_deref->dest.ssa);
713       } else {
714          nir_variable *sampler_var =
715             nir_variable_create(b.shader, nir_var_uniform, glsl_bare_sampler_type(), "sampler");
716          nir_deref_instr *sampler_deref = nir_build_deref_var(&b, sampler_var);
717 
718          tex->op = nir_texop_tex;
719          tex->sampler_index = 0;
720 
721          tex->src[0].src_type = nir_tex_src_coord;
722          tex->src[0].src = nir_src_for_ssa(coord);
723          tex->coord_components = coord_comps;
724 
725          tex->src[1].src_type = nir_tex_src_texture_deref;
726          tex->src[1].src = nir_src_for_ssa(&tex_deref->dest.ssa);
727 
728          tex->src[2].src_type = nir_tex_src_sampler_deref;
729          tex->src[2].src = nir_src_for_ssa(&sampler_deref->dest.ssa);
730       }
731 
732       nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);
733       nir_builder_instr_insert(&b, &tex->instr);
734       res = &tex->dest.ssa;
735    }
736 
737    nir_store_var(&b, out, nir_channels(&b, res, (1 << out_comps) - 1), 0xf);
738 
739    return b.shader;
740 }
741