1 /*
2 * Copyright © Microsoft Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "dzn_nir.h"
25
26 #include "spirv_to_dxil.h"
27 #include "nir_to_dxil.h"
28 #include "nir_builder.h"
29 #include "nir_vulkan.h"
30
31 static nir_ssa_def *
dzn_nir_create_bo_desc(nir_builder * b,nir_variable_mode mode,uint32_t desc_set,uint32_t binding,const char * name,unsigned access)32 dzn_nir_create_bo_desc(nir_builder *b,
33 nir_variable_mode mode,
34 uint32_t desc_set,
35 uint32_t binding,
36 const char *name,
37 unsigned access)
38 {
39 struct glsl_struct_field field = {
40 .type = mode == nir_var_mem_ubo ?
41 glsl_array_type(glsl_uint_type(), 4096, 4) :
42 glsl_uint_type(),
43 .name = "dummy_int",
44 };
45 const struct glsl_type *dummy_type =
46 glsl_struct_type(&field, 1, "dummy_type", false);
47
48 nir_variable *var =
49 nir_variable_create(b->shader, mode, dummy_type, name);
50 var->data.descriptor_set = desc_set;
51 var->data.binding = binding;
52 var->data.access = access;
53
54 assert(mode == nir_var_mem_ubo || mode == nir_var_mem_ssbo);
55 if (mode == nir_var_mem_ubo)
56 b->shader->info.num_ubos++;
57 else
58 b->shader->info.num_ssbos++;
59
60 VkDescriptorType desc_type =
61 var->data.mode == nir_var_mem_ubo ?
62 VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER :
63 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
64 nir_address_format addr_format = nir_address_format_32bit_index_offset;
65 nir_ssa_def *index =
66 nir_vulkan_resource_index(b,
67 nir_address_format_num_components(addr_format),
68 nir_address_format_bit_size(addr_format),
69 nir_imm_int(b, 0),
70 .desc_set = desc_set,
71 .binding = binding,
72 .desc_type = desc_type);
73
74 nir_ssa_def *desc =
75 nir_load_vulkan_descriptor(b,
76 nir_address_format_num_components(addr_format),
77 nir_address_format_bit_size(addr_format),
78 index,
79 .desc_type = desc_type);
80
81 return nir_channel(b, desc, 0);
82 }
83
84 nir_shader *
dzn_nir_indirect_draw_shader(enum dzn_indirect_draw_type type)85 dzn_nir_indirect_draw_shader(enum dzn_indirect_draw_type type)
86 {
87 const char *type_str[] = {
88 "draw",
89 "draw_count",
90 "indexed_draw",
91 "indexed_draw_count",
92 "draw_triangle_fan",
93 "draw_count_triangle_fan",
94 "indexed_draw_triangle_fan",
95 "indexed_draw_count_triangle_fan",
96 "indexed_draw_triangle_fan_prim_restart",
97 "indexed_draw_count_triangle_fan_prim_restart",
98 };
99
100 assert(type < ARRAY_SIZE(type_str));
101
102 bool indexed = type == DZN_INDIRECT_INDEXED_DRAW ||
103 type == DZN_INDIRECT_INDEXED_DRAW_COUNT ||
104 type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN ||
105 type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN ||
106 type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN_PRIM_RESTART ||
107 type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN_PRIM_RESTART;
108 bool triangle_fan = type == DZN_INDIRECT_DRAW_TRIANGLE_FAN ||
109 type == DZN_INDIRECT_DRAW_COUNT_TRIANGLE_FAN ||
110 type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN ||
111 type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN ||
112 type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN_PRIM_RESTART ||
113 type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN_PRIM_RESTART;
114 bool indirect_count = type == DZN_INDIRECT_DRAW_COUNT ||
115 type == DZN_INDIRECT_INDEXED_DRAW_COUNT ||
116 type == DZN_INDIRECT_DRAW_COUNT_TRIANGLE_FAN ||
117 type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN ||
118 type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN_PRIM_RESTART;
119 bool prim_restart = type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN_PRIM_RESTART ||
120 type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN_PRIM_RESTART;
121 nir_builder b =
122 nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
123 dxil_get_nir_compiler_options(),
124 "dzn_meta_indirect_%s()",
125 type_str[type]);
126 b.shader->info.internal = true;
127
128 nir_ssa_def *params_desc =
129 dzn_nir_create_bo_desc(&b, nir_var_mem_ubo, 0, 0, "params", 0);
130 nir_ssa_def *draw_buf_desc =
131 dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 1, "draw_buf", ACCESS_NON_WRITEABLE);
132 nir_ssa_def *exec_buf_desc =
133 dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 2, "exec_buf", ACCESS_NON_READABLE);
134
135 unsigned params_size;
136 if (triangle_fan)
137 params_size = sizeof(struct dzn_indirect_draw_triangle_fan_rewrite_params);
138 else
139 params_size = sizeof(struct dzn_indirect_draw_rewrite_params);
140
141 nir_ssa_def *params =
142 nir_load_ubo(&b, params_size / 4, 32,
143 params_desc, nir_imm_int(&b, 0),
144 .align_mul = 4, .align_offset = 0, .range_base = 0, .range = ~0);
145
146 nir_ssa_def *draw_stride = nir_channel(&b, params, 0);
147 nir_ssa_def *exec_stride =
148 triangle_fan ?
149 nir_imm_int(&b, sizeof(struct dzn_indirect_triangle_fan_draw_exec_params)) :
150 nir_imm_int(&b, sizeof(struct dzn_indirect_draw_exec_params));
151 nir_ssa_def *index =
152 nir_channel(&b, nir_load_global_invocation_id(&b, 32), 0);
153
154 if (indirect_count) {
155 nir_ssa_def *count_buf_desc =
156 dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 3, "count_buf", ACCESS_NON_WRITEABLE);
157
158 nir_ssa_def *draw_count =
159 nir_load_ssbo(&b, 1, 32, count_buf_desc, nir_imm_int(&b, 0), .align_mul = 4);
160
161 nir_push_if(&b, nir_ieq(&b, index, nir_imm_int(&b, 0)));
162 nir_store_ssbo(&b, draw_count, exec_buf_desc, nir_imm_int(&b, 0),
163 .write_mask = 0x1, .access = ACCESS_NON_READABLE,
164 .align_mul = 16);
165 nir_pop_if(&b, NULL);
166
167 nir_push_if(&b, nir_ult(&b, index, draw_count));
168 }
169
170 nir_ssa_def *draw_offset = nir_imul(&b, draw_stride, index);
171
172 /* The first entry contains the indirect count */
173 nir_ssa_def *exec_offset =
174 indirect_count ?
175 nir_imul(&b, exec_stride, nir_iadd_imm(&b, index, 1)) :
176 nir_imul(&b, exec_stride, index);
177
178 nir_ssa_def *draw_info1 =
179 nir_load_ssbo(&b, 4, 32, draw_buf_desc, draw_offset, .align_mul = 4);
180 nir_ssa_def *draw_info2 =
181 indexed ?
182 nir_load_ssbo(&b, 1, 32, draw_buf_desc,
183 nir_iadd_imm(&b, draw_offset, 16), .align_mul = 4) :
184 nir_imm_int(&b, 0);
185
186 nir_ssa_def *first_vertex = nir_channel(&b, draw_info1, indexed ? 3 : 2);
187 nir_ssa_def *base_instance =
188 indexed ? draw_info2 : nir_channel(&b, draw_info1, 3);
189
190 nir_ssa_def *exec_vals[8] = {
191 first_vertex,
192 base_instance,
193 index,
194 };
195
196 if (triangle_fan) {
197 /* Patch {vertex,index}_count and first_index */
198 nir_ssa_def *triangle_count =
199 nir_usub_sat(&b, nir_channel(&b, draw_info1, 0), nir_imm_int(&b, 2));
200 exec_vals[3] = nir_imul_imm(&b, triangle_count, 3);
201 exec_vals[4] = nir_channel(&b, draw_info1, 1);
202 exec_vals[5] = nir_imm_int(&b, 0);
203 exec_vals[6] = first_vertex;
204 exec_vals[7] = base_instance;
205
206 nir_ssa_def *triangle_fan_exec_buf_desc =
207 dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 4,
208 "triangle_fan_exec_buf",
209 ACCESS_NON_READABLE);
210 nir_ssa_def *triangle_fan_index_buf_stride = nir_channel(&b, params, 1);
211 nir_ssa_def *triangle_fan_index_buf_addr_lo =
212 nir_iadd(&b, nir_channel(&b, params, 2),
213 nir_imul(&b, triangle_fan_index_buf_stride, index));
214
215 nir_ssa_def *triangle_fan_exec_vals[9] = { 0 };
216 uint32_t triangle_fan_exec_param_count = 0;
217 nir_ssa_def *addr_lo_overflow =
218 nir_ult(&b, triangle_fan_index_buf_addr_lo, nir_channel(&b, params, 2));
219 nir_ssa_def *triangle_fan_index_buf_addr_hi =
220 nir_iadd(&b, nir_channel(&b, params, 3),
221 nir_bcsel(&b, addr_lo_overflow, nir_imm_int(&b, 1), nir_imm_int(&b, 0)));
222
223 triangle_fan_exec_vals[triangle_fan_exec_param_count++] = triangle_fan_index_buf_addr_lo;
224 triangle_fan_exec_vals[triangle_fan_exec_param_count++] = triangle_fan_index_buf_addr_hi;
225
226 if (prim_restart) {
227 triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_channel(&b, draw_info1, 2);
228 triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_channel(&b, draw_info1, 0);
229 uint32_t index_count_offset =
230 offsetof(struct dzn_indirect_triangle_fan_draw_exec_params, indexed_draw.index_count);
231 nir_ssa_def *exec_buf_start =
232 nir_load_ubo(&b, 2, 32,
233 params_desc, nir_imm_int(&b, 16),
234 .align_mul = 4, .align_offset = 0, .range_base = 0, .range = ~0);
235 nir_ssa_def *exec_buf_start_lo =
236 nir_iadd(&b, nir_imm_int(&b, index_count_offset),
237 nir_iadd(&b, nir_channel(&b, exec_buf_start, 0),
238 nir_imul(&b, exec_stride, index)));
239 addr_lo_overflow = nir_ult(&b, exec_buf_start_lo, nir_channel(&b, exec_buf_start, 0));
240 nir_ssa_def *exec_buf_start_hi =
241 nir_iadd(&b, nir_channel(&b, exec_buf_start, 0),
242 nir_bcsel(&b, addr_lo_overflow, nir_imm_int(&b, 1), nir_imm_int(&b, 0)));
243 triangle_fan_exec_vals[triangle_fan_exec_param_count++] = exec_buf_start_lo;
244 triangle_fan_exec_vals[triangle_fan_exec_param_count++] = exec_buf_start_hi;
245 triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_imm_int(&b, 1);
246 } else {
247 triangle_fan_exec_vals[triangle_fan_exec_param_count++] =
248 indexed ? nir_channel(&b, draw_info1, 2) : nir_imm_int(&b, 0);
249 triangle_fan_exec_vals[triangle_fan_exec_param_count++] =
250 triangle_count;
251 }
252 triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_imm_int(&b, 1);
253 triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_imm_int(&b, 1);
254
255 unsigned rewrite_index_exec_params =
256 prim_restart ?
257 sizeof(struct dzn_indirect_triangle_fan_prim_restart_rewrite_index_exec_params) :
258 sizeof(struct dzn_indirect_triangle_fan_rewrite_index_exec_params);
259 nir_ssa_def *triangle_fan_exec_stride =
260 nir_imm_int(&b, rewrite_index_exec_params);
261 nir_ssa_def *triangle_fan_exec_offset =
262 nir_imul(&b, triangle_fan_exec_stride, index);
263
264 for (uint32_t i = 0; i < triangle_fan_exec_param_count; i += 4) {
265 unsigned comps = MIN2(triangle_fan_exec_param_count - i, 4);
266 uint32_t mask = (1 << comps) - 1;
267
268 nir_store_ssbo(&b, nir_vec(&b, &triangle_fan_exec_vals[i], comps),
269 triangle_fan_exec_buf_desc,
270 nir_iadd_imm(&b, triangle_fan_exec_offset, i * 4),
271 .write_mask = mask, .access = ACCESS_NON_READABLE, .align_mul = 4);
272 }
273
274 nir_ssa_def *ibview_vals[] = {
275 triangle_fan_index_buf_addr_lo,
276 triangle_fan_index_buf_addr_hi,
277 triangle_fan_index_buf_stride,
278 nir_imm_int(&b, DXGI_FORMAT_R32_UINT),
279 };
280
281 nir_store_ssbo(&b, nir_vec(&b, ibview_vals, ARRAY_SIZE(ibview_vals)),
282 exec_buf_desc, exec_offset,
283 .write_mask = 0xf, .access = ACCESS_NON_READABLE, .align_mul = 16);
284 exec_offset = nir_iadd_imm(&b, exec_offset, ARRAY_SIZE(ibview_vals) * 4);
285 } else {
286 exec_vals[3] = nir_channel(&b, draw_info1, 0);
287 exec_vals[4] = nir_channel(&b, draw_info1, 1);
288 exec_vals[5] = nir_channel(&b, draw_info1, 2);
289 exec_vals[6] = nir_channel(&b, draw_info1, 3);
290 exec_vals[7] = draw_info2;
291 }
292
293 nir_store_ssbo(&b, nir_vec(&b, exec_vals, 4),
294 exec_buf_desc, exec_offset,
295 .write_mask = 0xf, .access = ACCESS_NON_READABLE, .align_mul = 16);
296 nir_store_ssbo(&b, nir_vec(&b, &exec_vals[4], 4),
297 exec_buf_desc, nir_iadd_imm(&b, exec_offset, 16),
298 .write_mask = 0xf, .access = ACCESS_NON_READABLE, .align_mul = 16);
299
300 if (indirect_count)
301 nir_pop_if(&b, NULL);
302
303 return b.shader;
304 }
305
306 nir_shader *
dzn_nir_triangle_fan_prim_restart_rewrite_index_shader(uint8_t old_index_size)307 dzn_nir_triangle_fan_prim_restart_rewrite_index_shader(uint8_t old_index_size)
308 {
309 assert(old_index_size == 2 || old_index_size == 4);
310
311 nir_builder b =
312 nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
313 dxil_get_nir_compiler_options(),
314 "dzn_meta_triangle_prim_rewrite_index(old_index_size=%d)",
315 old_index_size);
316 b.shader->info.internal = true;
317
318 nir_ssa_def *params_desc =
319 dzn_nir_create_bo_desc(&b, nir_var_mem_ubo, 0, 0, "params", 0);
320 nir_ssa_def *new_index_buf_desc =
321 dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 1,
322 "new_index_buf", ACCESS_NON_READABLE);
323 nir_ssa_def *old_index_buf_desc =
324 dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 2,
325 "old_index_buf", ACCESS_NON_WRITEABLE);
326 nir_ssa_def *new_index_count_ptr_desc =
327 dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 3,
328 "new_index_count_ptr", ACCESS_NON_READABLE);
329
330 nir_ssa_def *params =
331 nir_load_ubo(&b, sizeof(struct dzn_triangle_fan_prim_restart_rewrite_index_params) / 4, 32,
332 params_desc, nir_imm_int(&b, 0),
333 .align_mul = 4, .align_offset = 0, .range_base = 0, .range = ~0);
334
335 nir_ssa_def *prim_restart_val =
336 nir_imm_int(&b, old_index_size == 2 ? 0xffff : 0xffffffff);
337 nir_variable *old_index_ptr_var =
338 nir_local_variable_create(b.impl, glsl_uint_type(), "old_index_ptr_var");
339 nir_ssa_def *old_index_ptr = nir_channel(&b, params, 0);
340 nir_store_var(&b, old_index_ptr_var, old_index_ptr, 1);
341 nir_variable *new_index_ptr_var =
342 nir_local_variable_create(b.impl, glsl_uint_type(), "new_index_ptr_var");
343 nir_store_var(&b, new_index_ptr_var, nir_imm_int(&b, 0), 1);
344 nir_ssa_def *old_index_count = nir_channel(&b, params, 1);
345 nir_variable *index0_var =
346 nir_local_variable_create(b.impl, glsl_uint_type(), "index0_var");
347 nir_store_var(&b, index0_var, prim_restart_val, 1);
348
349 /*
350 * Filter out all primitive-restart magic values, and generate a triangle list
351 * from the triangle fan definition.
352 *
353 * Basically:
354 *
355 * new_index_ptr = 0;
356 * index0 = restart_prim_value; // 0xffff or 0xffffffff
357 * for (old_index_ptr = firstIndex; old_index_ptr < indexCount;) {
358 * // If we have no starting-point we need at least 3 vertices,
359 * // otherwise we can do with two. If there's not enough vertices
360 * // to form a primitive, we just bail out.
361 * min_indices = index0 == restart_prim_value ? 3 : 2;
362 * if (old_index_ptr + min_indices > firstIndex + indexCount)
363 * break;
364 *
365 * if (index0 == restart_prim_value) {
366 * // No starting point, skip all entries until we have a
367 * // non-primitive-restart value
368 * index0 = old_index_buf[old_index_ptr++];
369 * continue;
370 * }
371 *
372 * // If at least one index contains the primitive-restart pattern,
373 // ignore this triangle, and skip the unused entries
374 * if (old_index_buf[old_index_ptr + 1] == restart_prim_value) {
375 * old_index_ptr += 2;
376 * continue;
377 * }
378 * if (old_index_buf[old_index_ptr] == restart_prim_value) {
379 * old_index_ptr++;
380 * continue;
381 * }
382 *
383 * // We have a valid primitive, queue it to the new index buffer
384 * new_index_buf[new_index_ptr++] = old_index_buf[old_index_ptr];
385 * new_index_buf[new_index_ptr++] = old_index_buf[old_index_ptr + 1];
386 * new_index_buf[new_index_ptr++] = index0;
387 * }
388 *
389 * expressed in NIR, which admitedly is not super easy to grasp with.
390 * TODO: Might be a good thing to use use the CL compiler we have and turn
391 * those shaders into CL kernels.
392 */
393 nir_push_loop(&b);
394
395 old_index_ptr = nir_load_var(&b, old_index_ptr_var);
396 nir_ssa_def *index0 = nir_load_var(&b, index0_var);
397
398 nir_ssa_def *read_index_count =
399 nir_bcsel(&b, nir_ieq(&b, index0, prim_restart_val),
400 nir_imm_int(&b, 3), nir_imm_int(&b, 2));
401 nir_push_if(&b, nir_ult(&b, old_index_count, nir_iadd(&b, old_index_ptr, read_index_count)));
402 nir_jump(&b, nir_jump_break);
403 nir_pop_if(&b, NULL);
404
405 nir_ssa_def *old_index_offset =
406 nir_imul_imm(&b, old_index_ptr, old_index_size);
407
408 nir_push_if(&b, nir_ieq(&b, index0, prim_restart_val));
409 nir_ssa_def *index_val =
410 nir_load_ssbo(&b, 1, 32, old_index_buf_desc,
411 old_index_size == 2 ? nir_iand_imm(&b, old_index_offset, ~3ULL) : old_index_offset,
412 .align_mul = 4);
413 if (old_index_size == 2) {
414 index_val = nir_bcsel(&b, nir_test_mask(&b, old_index_offset, 0x2),
415 nir_ushr_imm(&b, index_val, 16),
416 nir_iand_imm(&b, index_val, 0xffff));
417 }
418
419 nir_store_var(&b, index0_var, index_val, 1);
420 nir_store_var(&b, old_index_ptr_var, nir_iadd_imm(&b, old_index_ptr, 1), 1);
421 nir_jump(&b, nir_jump_continue);
422 nir_pop_if(&b, NULL);
423
424 nir_ssa_def *index12 =
425 nir_load_ssbo(&b, 2, 32, old_index_buf_desc,
426 old_index_size == 2 ? nir_iand_imm(&b, old_index_offset, ~3ULL) : old_index_offset,
427 .align_mul = 4);
428 if (old_index_size == 2) {
429 nir_ssa_def *indices[] = {
430 nir_iand_imm(&b, nir_channel(&b, index12, 0), 0xffff),
431 nir_ushr_imm(&b, nir_channel(&b, index12, 0), 16),
432 nir_iand_imm(&b, nir_channel(&b, index12, 1), 0xffff),
433 };
434
435 index12 = nir_bcsel(&b, nir_test_mask(&b, old_index_offset, 0x2),
436 nir_vec2(&b, indices[1], indices[2]),
437 nir_vec2(&b, indices[0], indices[1]));
438 }
439
440 nir_push_if(&b, nir_ieq(&b, nir_channel(&b, index12, 1), prim_restart_val));
441 nir_store_var(&b, old_index_ptr_var, nir_iadd_imm(&b, old_index_ptr, 2), 1);
442 nir_store_var(&b, index0_var, prim_restart_val, 1);
443 nir_jump(&b, nir_jump_continue);
444 nir_push_else(&b, NULL);
445 nir_store_var(&b, old_index_ptr_var, nir_iadd_imm(&b, old_index_ptr, 1), 1);
446 nir_push_if(&b, nir_ieq(&b, nir_channel(&b, index12, 0), prim_restart_val));
447 nir_store_var(&b, index0_var, prim_restart_val, 1);
448 nir_jump(&b, nir_jump_continue);
449 nir_push_else(&b, NULL);
450 nir_ssa_def *new_indices =
451 nir_vec3(&b, nir_channel(&b, index12, 0), nir_channel(&b, index12, 1), index0);
452 nir_ssa_def *new_index_ptr = nir_load_var(&b, new_index_ptr_var);
453 nir_ssa_def *new_index_offset = nir_imul_imm(&b, new_index_ptr, sizeof(uint32_t));
454 nir_store_ssbo(&b, new_indices, new_index_buf_desc,
455 new_index_offset,
456 .write_mask = 7, .access = ACCESS_NON_READABLE, .align_mul = 4);
457 nir_store_var(&b, new_index_ptr_var, nir_iadd_imm(&b, new_index_ptr, 3), 1);
458 nir_pop_if(&b, NULL);
459 nir_pop_if(&b, NULL);
460 nir_pop_loop(&b, NULL);
461
462 nir_store_ssbo(&b, nir_load_var(&b, new_index_ptr_var),
463 new_index_count_ptr_desc, nir_imm_int(&b, 0),
464 .write_mask = 1, .access = ACCESS_NON_READABLE, .align_mul = 4);
465
466 return b.shader;
467 }
468
469 nir_shader *
dzn_nir_triangle_fan_rewrite_index_shader(uint8_t old_index_size)470 dzn_nir_triangle_fan_rewrite_index_shader(uint8_t old_index_size)
471 {
472 assert(old_index_size == 0 || old_index_size == 2 || old_index_size == 4);
473
474 nir_builder b =
475 nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
476 dxil_get_nir_compiler_options(),
477 "dzn_meta_triangle_rewrite_index(old_index_size=%d)",
478 old_index_size);
479 b.shader->info.internal = true;
480
481 nir_ssa_def *params_desc =
482 dzn_nir_create_bo_desc(&b, nir_var_mem_ubo, 0, 0, "params", 0);
483 nir_ssa_def *new_index_buf_desc =
484 dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 1,
485 "new_index_buf", ACCESS_NON_READABLE);
486
487 nir_ssa_def *old_index_buf_desc = NULL;
488 if (old_index_size > 0) {
489 old_index_buf_desc =
490 dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 2,
491 "old_index_buf", ACCESS_NON_WRITEABLE);
492 }
493
494 nir_ssa_def *params =
495 nir_load_ubo(&b, sizeof(struct dzn_triangle_fan_rewrite_index_params) / 4, 32,
496 params_desc, nir_imm_int(&b, 0),
497 .align_mul = 4, .align_offset = 0, .range_base = 0, .range = ~0);
498
499 nir_ssa_def *triangle = nir_channel(&b, nir_load_global_invocation_id(&b, 32), 0);
500 nir_ssa_def *new_indices;
501
502 if (old_index_size > 0) {
503 nir_ssa_def *old_first_index = nir_channel(&b, params, 0);
504 nir_ssa_def *old_index0_offset =
505 nir_imul_imm(&b, old_first_index, old_index_size);
506 nir_ssa_def *old_index1_offset =
507 nir_imul_imm(&b, nir_iadd(&b, nir_iadd_imm(&b, triangle, 1), old_first_index),
508 old_index_size);
509
510 nir_ssa_def *old_index0 =
511 nir_load_ssbo(&b, 1, 32, old_index_buf_desc,
512 old_index_size == 2 ? nir_iand_imm(&b, old_index0_offset, ~3ULL) : old_index0_offset,
513 .align_mul = 4);
514
515 if (old_index_size == 2) {
516 old_index0 = nir_bcsel(&b, nir_test_mask(&b, old_index0_offset, 0x2),
517 nir_ushr_imm(&b, old_index0, 16),
518 nir_iand_imm(&b, old_index0, 0xffff));
519 }
520
521 nir_ssa_def *old_index12 =
522 nir_load_ssbo(&b, 2, 32, old_index_buf_desc,
523 old_index_size == 2 ? nir_iand_imm(&b, old_index1_offset, ~3ULL) : old_index1_offset,
524 .align_mul = 4);
525 if (old_index_size == 2) {
526 nir_ssa_def *indices[] = {
527 nir_iand_imm(&b, nir_channel(&b, old_index12, 0), 0xffff),
528 nir_ushr_imm(&b, nir_channel(&b, old_index12, 0), 16),
529 nir_iand_imm(&b, nir_channel(&b, old_index12, 1), 0xffff),
530 };
531
532 old_index12 = nir_bcsel(&b, nir_test_mask(&b, old_index1_offset, 0x2),
533 nir_vec2(&b, indices[1], indices[2]),
534 nir_vec2(&b, indices[0], indices[1]));
535 }
536
537 /* TODO: VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT */
538 new_indices =
539 nir_vec3(&b, nir_channel(&b, old_index12, 0),
540 nir_channel(&b, old_index12, 1), old_index0);
541 } else {
542 new_indices =
543 nir_vec3(&b,
544 nir_iadd_imm(&b, triangle, 1),
545 nir_iadd_imm(&b, triangle, 2),
546 nir_imm_int(&b, 0));
547 }
548
549 nir_ssa_def *new_index_offset =
550 nir_imul_imm(&b, triangle, 4 * 3);
551
552 nir_store_ssbo(&b, new_indices, new_index_buf_desc,
553 new_index_offset,
554 .write_mask = 7, .access = ACCESS_NON_READABLE, .align_mul = 4);
555
556 return b.shader;
557 }
558
559 nir_shader *
dzn_nir_blit_vs(void)560 dzn_nir_blit_vs(void)
561 {
562 nir_builder b =
563 nir_builder_init_simple_shader(MESA_SHADER_VERTEX,
564 dxil_get_nir_compiler_options(),
565 "dzn_meta_blit_vs()");
566 b.shader->info.internal = true;
567
568 nir_ssa_def *params_desc =
569 dzn_nir_create_bo_desc(&b, nir_var_mem_ubo, 0, 0, "params", 0);
570
571 nir_variable *out_pos =
572 nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(),
573 "gl_Position");
574 out_pos->data.location = VARYING_SLOT_POS;
575 out_pos->data.driver_location = 0;
576
577 nir_variable *out_coords =
578 nir_variable_create(b.shader, nir_var_shader_out, glsl_vec_type(3),
579 "coords");
580 out_coords->data.location = VARYING_SLOT_TEX0;
581 out_coords->data.driver_location = 1;
582
583 nir_ssa_def *vertex = nir_load_vertex_id(&b);
584 nir_ssa_def *base = nir_imul_imm(&b, vertex, 4 * sizeof(float));
585 nir_ssa_def *coords =
586 nir_load_ubo(&b, 4, 32, params_desc, base,
587 .align_mul = 16, .align_offset = 0, .range_base = 0, .range = ~0);
588 nir_ssa_def *pos =
589 nir_vec4(&b, nir_channel(&b, coords, 0), nir_channel(&b, coords, 1),
590 nir_imm_float(&b, 0.0), nir_imm_float(&b, 1.0));
591 nir_ssa_def *z_coord =
592 nir_load_ubo(&b, 1, 32, params_desc, nir_imm_int(&b, 4 * 4 * sizeof(float)),
593 .align_mul = 64, .align_offset = 0, .range_base = 0, .range = ~0);
594 coords = nir_vec3(&b, nir_channel(&b, coords, 2), nir_channel(&b, coords, 3), z_coord);
595
596 nir_store_var(&b, out_pos, pos, 0xf);
597 nir_store_var(&b, out_coords, coords, 0x7);
598 return b.shader;
599 }
600
601 nir_shader *
dzn_nir_blit_fs(const struct dzn_nir_blit_info * info)602 dzn_nir_blit_fs(const struct dzn_nir_blit_info *info)
603 {
604 bool ms = info->src_samples > 1;
605 nir_alu_type nir_out_type =
606 nir_get_nir_type_for_glsl_base_type(info->out_type);
607 uint32_t coord_comps =
608 glsl_get_sampler_dim_coordinate_components(info->sampler_dim) +
609 info->src_is_array;
610
611 nir_builder b =
612 nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
613 dxil_get_nir_compiler_options(),
614 "dzn_meta_blit_fs()");
615 b.shader->info.internal = true;
616
617 const struct glsl_type *tex_type =
618 glsl_texture_type(info->sampler_dim, info->src_is_array, info->out_type);
619 nir_variable *tex_var =
620 nir_variable_create(b.shader, nir_var_uniform, tex_type, "texture");
621 nir_deref_instr *tex_deref = nir_build_deref_var(&b, tex_var);
622
623 nir_variable *pos_var =
624 nir_variable_create(b.shader, nir_var_shader_in,
625 glsl_vector_type(GLSL_TYPE_FLOAT, 4),
626 "gl_FragCoord");
627 pos_var->data.location = VARYING_SLOT_POS;
628 pos_var->data.driver_location = 0;
629
630 nir_variable *coord_var =
631 nir_variable_create(b.shader, nir_var_shader_in,
632 glsl_vector_type(GLSL_TYPE_FLOAT, 3),
633 "coord");
634 coord_var->data.location = VARYING_SLOT_TEX0;
635 coord_var->data.driver_location = 1;
636 nir_ssa_def *coord =
637 nir_channels(&b, nir_load_var(&b, coord_var), (1 << coord_comps) - 1);
638
639 uint32_t out_comps =
640 (info->loc == FRAG_RESULT_DEPTH || info->loc == FRAG_RESULT_STENCIL) ? 1 : 4;
641 nir_variable *out =
642 nir_variable_create(b.shader, nir_var_shader_out,
643 glsl_vector_type(info->out_type, out_comps),
644 "out");
645 out->data.location = info->loc;
646
647 nir_ssa_def *res = NULL;
648
649 if (info->resolve) {
650 /* When resolving a float type, we need to calculate the average of all
651 * samples. For integer resolve, Vulkan says that one sample should be
652 * chosen without telling which. Let's just pick the first one in that
653 * case.
654 */
655
656 unsigned nsamples = info->out_type == GLSL_TYPE_FLOAT ?
657 info->src_samples : 1;
658 for (unsigned s = 0; s < nsamples; s++) {
659 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 4);
660
661 tex->op = nir_texop_txf_ms;
662 tex->dest_type = nir_out_type;
663 tex->texture_index = 0;
664 tex->is_array = info->src_is_array;
665 tex->sampler_dim = info->sampler_dim;
666
667 tex->src[0].src_type = nir_tex_src_coord;
668 tex->src[0].src = nir_src_for_ssa(nir_f2i32(&b, coord));
669 tex->coord_components = coord_comps;
670
671 tex->src[1].src_type = nir_tex_src_ms_index;
672 tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, s));
673
674 tex->src[2].src_type = nir_tex_src_lod;
675 tex->src[2].src = nir_src_for_ssa(nir_imm_int(&b, 0));
676
677 tex->src[3].src_type = nir_tex_src_texture_deref;
678 tex->src[3].src = nir_src_for_ssa(&tex_deref->dest.ssa);
679
680 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);
681
682 nir_builder_instr_insert(&b, &tex->instr);
683 res = res ? nir_fadd(&b, res, &tex->dest.ssa) : &tex->dest.ssa;
684 }
685
686 if (nsamples > 1) {
687 unsigned type_sz = nir_alu_type_get_type_size(nir_out_type);
688 res = nir_fmul(&b, res, nir_imm_floatN_t(&b, 1.0f / nsamples, type_sz));
689 }
690 } else {
691 nir_tex_instr *tex =
692 nir_tex_instr_create(b.shader, ms ? 4 : 3);
693
694 tex->dest_type = nir_out_type;
695 tex->is_array = info->src_is_array;
696 tex->sampler_dim = info->sampler_dim;
697
698 if (ms) {
699 tex->op = nir_texop_txf_ms;
700
701 tex->src[0].src_type = nir_tex_src_coord;
702 tex->src[0].src = nir_src_for_ssa(nir_f2i32(&b, coord));
703 tex->coord_components = coord_comps;
704
705 tex->src[1].src_type = nir_tex_src_ms_index;
706 tex->src[1].src = nir_src_for_ssa(nir_load_sample_id(&b));
707
708 tex->src[2].src_type = nir_tex_src_lod;
709 tex->src[2].src = nir_src_for_ssa(nir_imm_int(&b, 0));
710
711 tex->src[3].src_type = nir_tex_src_texture_deref;
712 tex->src[3].src = nir_src_for_ssa(&tex_deref->dest.ssa);
713 } else {
714 nir_variable *sampler_var =
715 nir_variable_create(b.shader, nir_var_uniform, glsl_bare_sampler_type(), "sampler");
716 nir_deref_instr *sampler_deref = nir_build_deref_var(&b, sampler_var);
717
718 tex->op = nir_texop_tex;
719 tex->sampler_index = 0;
720
721 tex->src[0].src_type = nir_tex_src_coord;
722 tex->src[0].src = nir_src_for_ssa(coord);
723 tex->coord_components = coord_comps;
724
725 tex->src[1].src_type = nir_tex_src_texture_deref;
726 tex->src[1].src = nir_src_for_ssa(&tex_deref->dest.ssa);
727
728 tex->src[2].src_type = nir_tex_src_sampler_deref;
729 tex->src[2].src = nir_src_for_ssa(&sampler_deref->dest.ssa);
730 }
731
732 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);
733 nir_builder_instr_insert(&b, &tex->instr);
734 res = &tex->dest.ssa;
735 }
736
737 nir_store_var(&b, out, nir_channels(&b, res, (1 << out_comps) - 1), 0xf);
738
739 return b.shader;
740 }
741