1 /*
2 * Copyright 2022 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included
13 * in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "main/enums.h"
25 #include "main/context.h"
26
27 #include "st_context.h"
28 #include "st_nir.h"
29 #include "st_draw.h"
30
31 #include "nir.h"
32 #include "nir_builtin_builder.h"
33
34 #include "util/u_memory.h"
35
36 union state_key {
37 struct {
38 unsigned num_user_clip_planes:4;
39 unsigned face_culling_enabled:1;
40 unsigned result_offset_from_attribute:1;
41 unsigned primitive:4;
42 };
43 uint32_t u32;
44 };
45
46 enum primitive_state {
47 HW_SELECT_PRIM_NONE,
48 HW_SELECT_PRIM_POINTS,
49 HW_SELECT_PRIM_LINES,
50 HW_SELECT_PRIM_TRIANGLES,
51 HW_SELECT_PRIM_QUADS,
52 };
53
54 struct geometry_constant {
55 float depth_scale;
56 float depth_transport;
57 uint32_t culling_config;
58 uint32_t result_offset;
59 float clip_planes[MAX_CLIP_PLANES][4];
60 };
61
62 #define set_uniform_location(var, field, packed) \
63 do { \
64 unsigned offset = offsetof(struct geometry_constant, field); \
65 var->data.driver_location = offset >> (packed ? 2 : 4); \
66 var->data.location_frac = (offset >> 2) & 0x3; \
67 } while (0)
68
69 static nir_def *
has_nan_or_inf(nir_builder * b,nir_def * v)70 has_nan_or_inf(nir_builder *b, nir_def *v)
71 {
72 nir_def *nan = nir_bany_fnequal4(b, v, v);
73
74 nir_def *inf = nir_bany(b, nir_feq_imm(b, nir_fabs(b, v), INFINITY));
75
76 return nir_ior(b, nan, inf);
77 }
78
79 static void
return_if_true(nir_builder * b,nir_def * cond)80 return_if_true(nir_builder *b, nir_def *cond)
81 {
82 nir_if *if_cond = nir_push_if(b, cond);
83 nir_jump(b, nir_jump_return);
84 nir_pop_if(b, if_cond);
85 }
86
87 static void
get_input_vertices(nir_builder * b,nir_def ** v)88 get_input_vertices(nir_builder *b, nir_def **v)
89 {
90 const int num_in_vert = b->shader->info.gs.vertices_in;
91
92 nir_def *is_nan_or_inf = NULL;
93 for (int i = 0; i < num_in_vert; i++) {
94 v[i] = nir_load_per_vertex_input(b, 4, 32, nir_imm_int(b, i),
95 nir_imm_int(b, 0),
96 .io_semantics.location = VARYING_SLOT_POS);
97 nir_def *r = has_nan_or_inf(b, v[i]);
98 is_nan_or_inf = i ? nir_ior(b, is_nan_or_inf, r) : r;
99 }
100 return_if_true(b, is_nan_or_inf);
101 }
102
103 static void
face_culling(nir_builder * b,nir_def ** v,bool packed)104 face_culling(nir_builder *b, nir_def **v, bool packed)
105 {
106 /* use the z value of the face normal to determine if the face points to us:
107 * Nz = (x1 - x0) * (y2 - y0) - (y1 - y0) * (x2 - x0)
108 *
109 * it should be in NDC (Normalized Device Coordinate), but now we are in clip
110 * space (Vd = Vc / Vc.w), so multiply Nz with w0*w1*w2 to get the clip space
111 * value:
112 * det = x0 * (y1 * w2 - y2 * w1) +
113 * x1 * (y2 * w0 - y0 * w2) +
114 * x2 * (y0 * w1 - y1 * w0)
115 *
116 * we only care about the sign of the det, but also need to count the sign of
117 * w0/w1/w2 as a negtive w would change the direction of Nz < 0
118 */
119 nir_def *y1w2 = nir_fmul(b, nir_channel(b, v[1], 1), nir_channel(b, v[2], 3));
120 nir_def *y2w1 = nir_fmul(b, nir_channel(b, v[2], 1), nir_channel(b, v[1], 3));
121 nir_def *y2w0 = nir_fmul(b, nir_channel(b, v[2], 1), nir_channel(b, v[0], 3));
122 nir_def *y0w2 = nir_fmul(b, nir_channel(b, v[0], 1), nir_channel(b, v[2], 3));
123 nir_def *y0w1 = nir_fmul(b, nir_channel(b, v[0], 1), nir_channel(b, v[1], 3));
124 nir_def *y1w0 = nir_fmul(b, nir_channel(b, v[1], 1), nir_channel(b, v[0], 3));
125 nir_def *t0 = nir_fmul(b, nir_channel(b, v[0], 0), nir_fsub(b, y1w2, y2w1));
126 nir_def *t1 = nir_fmul(b, nir_channel(b, v[1], 0), nir_fsub(b, y2w0, y0w2));
127 nir_def *t2 = nir_fmul(b, nir_channel(b, v[2], 0), nir_fsub(b, y0w1, y1w0));
128 nir_def *det = nir_fadd(b, nir_fadd(b, t0, t1), t2);
129
130 /* invert det sign once any vertex w < 0 */
131 nir_def *n0 = nir_flt_imm(b, nir_channel(b, v[0], 3), 0);
132 nir_def *n1 = nir_flt_imm(b, nir_channel(b, v[1], 3), 0);
133 nir_def *n2 = nir_flt_imm(b, nir_channel(b, v[2], 3), 0);
134 nir_def *cond = nir_ixor(b, nir_ixor(b, n0, n1), n2);
135 det = nir_bcsel(b, cond, nir_fneg(b, det), det);
136
137 nir_variable *culling_config = nir_variable_create(
138 b->shader, nir_var_uniform, glsl_uint_type(), "culling_config");
139 set_uniform_location(culling_config, culling_config, packed);
140 nir_def *config = nir_i2b(b, nir_load_var(b, culling_config));
141
142 /* det < 0 then z points to camera */
143 nir_def *zero = nir_imm_zero(b, 1, det->bit_size);
144 nir_def *is_zero = nir_feq(b, det, zero);
145 nir_def *is_neg = nir_flt(b, det, zero);
146 nir_def *cull = nir_ixor(b, is_neg, config);
147 return_if_true(b, nir_ior(b, is_zero, cull));
148 }
149
150 static void
fast_frustum_culling(nir_builder * b,nir_def ** v)151 fast_frustum_culling(nir_builder *b, nir_def **v)
152 {
153 nir_def *cull = NULL;
154
155 /* there are six culling planes for the visible volume:
156 * 1. x + w = 0
157 * 2. -x + w = 0
158 * 3. y + w = 0
159 * 4. -y + w = 0
160 * 5. z + w = 0
161 * 6. -z + w = 0
162 *
163 * if all vertices of the primitive are outside (plane equation <0) of
164 * any plane, the primitive must be invisible.
165 */
166 for (int i = 0; i < 6; i++) {
167 nir_def *outside = NULL;
168
169 for (int j = 0; j < b->shader->info.gs.vertices_in; j++) {
170 nir_def *c = nir_channel(b, v[j], i >> 1);
171 if (i & 1)
172 c = nir_fneg(b, c);
173
174 nir_def *r = nir_flt(b, nir_channel(b, v[j], 3), c);
175 outside = j ? nir_iand(b, outside, r) : r;
176 }
177
178 cull = i ? nir_ior(b, cull, outside) : outside;
179 }
180
181 return_if_true(b, cull);
182 }
183
184 static nir_def *
get_intersection(nir_builder * b,nir_def * v1,nir_def * v2,nir_def * d1,nir_def * d2)185 get_intersection(nir_builder *b, nir_def *v1, nir_def *v2,
186 nir_def *d1, nir_def *d2)
187 {
188 nir_def *factor = nir_fdiv(b, d1, nir_fsub(b, d1, d2));
189 return nir_fmad(b, nir_fsub(b, v2, v1), factor, v1);
190 }
191
192 #define begin_for_loop(name, max) \
193 nir_variable *name##_index = \
194 nir_local_variable_create(b->impl, glsl_int_type(), #name "_i"); \
195 nir_store_var(b, name##_index, nir_imm_int(b, 0), 1); \
196 \
197 nir_loop *name = nir_push_loop(b); \
198 { \
199 nir_def *idx = nir_load_var(b, name##_index); \
200 nir_if *if_in_loop = nir_push_if(b, nir_ilt(b, idx, max));
201
202 #define end_for_loop(name) \
203 nir_store_var(b, name##_index, nir_iadd_imm(b, idx, 1), 1); \
204 nir_push_else(b, if_in_loop); \
205 nir_jump(b, nir_jump_break); \
206 nir_pop_if(b, if_in_loop); \
207 } \
208 nir_pop_loop(b, name);
209
210 static void
clip_with_plane(nir_builder * b,nir_variable * vert,nir_variable * num_vert,int max_vert,nir_def * plane)211 clip_with_plane(nir_builder *b, nir_variable *vert, nir_variable *num_vert,
212 int max_vert, nir_def *plane)
213 {
214 nir_variable *all_clipped = nir_local_variable_create(
215 b->impl, glsl_bool_type(), "all_clipped");
216 nir_store_var(b, all_clipped, nir_imm_true(b), 1);
217
218 nir_variable *dist = nir_local_variable_create(
219 b->impl, glsl_array_type(glsl_float_type(), max_vert, 0), "dist");
220
221 nir_def *num = nir_load_var(b, num_vert);
222 begin_for_loop(dist_loop, num)
223 {
224 nir_def *v = nir_load_array_var(b, vert, idx);
225 nir_def *d = nir_fdot(b, v, plane);
226 nir_store_array_var(b, dist, idx, d, 1);
227
228 nir_def *clipped = nir_flt_imm(b, d, 0);
229 nir_store_var(b, all_clipped,
230 nir_iand(b, nir_load_var(b, all_clipped), clipped), 1);
231 }
232 end_for_loop(dist_loop)
233
234 return_if_true(b, nir_load_var(b, all_clipped));
235
236 /* Use +/0/- to denote the dist[i] sign, which means:
237 * +: inside plane
238 * -: outside plane
239 * 0: just on the plane
240 *
241 * Some example:
242 * ++++: all vertex not clipped
243 * ----: all vertex clipped
244 * +-++: one vertex clipped, need to insert two vertex at '-', array grow
245 * +--+: two vertex clipped, need to insert two vertex at '--', array same
246 * +---: three vertex clipped, need to insert two vertex at '---', array trim
247 * +-0+: one vertex clipped, need to insert one vertex at '-', array same
248 *
249 * Plane clip only produce convex polygon, so '-' must be contigous, there's
250 * no '+-+-', so one clip plane can only grow array by 1.
251 */
252
253 /* when array grow or '-' has been replaced with inserted vertex, save the
254 * original vert to be used by following calculation.
255 */
256 nir_variable *saved =
257 nir_local_variable_create(b->impl, glsl_vec4_type(), "saved");
258
259 nir_variable *vert_index =
260 nir_local_variable_create(b->impl, glsl_int_type(), "vert_index");
261 nir_store_var(b, vert_index, nir_imm_int(b, 0), 1);
262
263 begin_for_loop(vert_loop, num)
264 {
265 nir_def *di = nir_load_array_var(b, dist, idx);
266 nir_if *if_clipped = nir_push_if(b, nir_flt_imm(b, di, 0));
267 {
268 /* - case, we need to take care of sign change and insert vertex */
269
270 nir_def *prev = nir_bcsel(b, nir_ieq_imm(b, idx, 0),
271 nir_iadd_imm(b, num, -1),
272 nir_iadd_imm(b, idx, -1));
273 nir_def *dp = nir_load_array_var(b, dist, prev);
274 nir_if *prev_if = nir_push_if(b, nir_fgt_imm(b, dp, 0));
275 {
276 /* +- case, replace - with inserted vertex
277 * assert(vert_index <= idx), array is sure to not grow here
278 * but need to save vert[idx] when vert_index==idx
279 */
280
281 nir_def *vi = nir_load_array_var(b, vert, idx);
282 nir_store_var(b, saved, vi, 0xf);
283
284 nir_def *vp = nir_load_array_var(b, vert, prev);
285 nir_def *iv = get_intersection(b, vp, vi, dp, di);
286 nir_def *index = nir_load_var(b, vert_index);
287 nir_store_array_var(b, vert, index, iv, 0xf);
288
289 nir_store_var(b, vert_index, nir_iadd_imm(b, index, 1), 1);
290 }
291 nir_pop_if(b, prev_if);
292
293 nir_def *next = nir_bcsel(b, nir_ieq(b, idx, nir_iadd_imm(b, num, -1)),
294 nir_imm_int(b, 0), nir_iadd_imm(b, idx, 1));
295 nir_def *dn = nir_load_array_var(b, dist, next);
296 nir_if *next_if = nir_push_if(b, nir_fgt_imm(b, dn, 0));
297 {
298 /* -+ case, may grow array:
299 * vert_index > idx: +-+ case, grow array, current vertex in 'saved',
300 * save next + to 'saved', will replace it with inserted vertex.
301 * vert_index <= idx: --+ case, will replace last - with inserted vertex,
302 * no need to save last -, because + case won't use - value.
303 */
304
305 nir_def *index = nir_load_var(b, vert_index);
306 nir_def *vi = nir_bcsel(b, nir_flt(b, idx, index),
307 nir_load_var(b, saved),
308 nir_load_array_var(b, vert, idx));
309 nir_def *vn = nir_load_array_var(b, vert, next);
310 nir_def *iv = get_intersection(b, vn, vi, dn, di);
311
312 nir_store_var(b, saved, nir_load_array_var(b, vert, index), 0xf);
313 nir_store_array_var(b, vert, index, iv, 0xf);
314
315 nir_store_var(b, vert_index, nir_iadd_imm(b, index, 1), 1);
316 }
317 nir_pop_if(b, next_if);
318 }
319 nir_push_else(b, if_clipped);
320 {
321 /* +/0 case, just keep the vert
322 * vert_index > idx: array grew case, vert[idx] is inserted vertex or prev
323 * +/0 vertex, current vertex is in 'saved', need to save next vertex
324 * vert_index < idx: array trim case
325 */
326
327 nir_def *index = nir_load_var(b, vert_index);
328 nir_def *vi = nir_bcsel(b, nir_flt(b, idx, index),
329 nir_load_var(b, saved),
330 nir_load_array_var(b, vert, idx));
331
332 nir_store_var(b, saved, nir_load_array_var(b, vert, index), 0xf);
333 nir_store_array_var(b, vert, index, vi, 0xf);
334
335 nir_store_var(b, vert_index, nir_iadd_imm(b, index, 1), 1);
336 }
337 nir_pop_if(b, if_clipped);
338 }
339 end_for_loop(vert_loop);
340
341 nir_store_var(b, num_vert, nir_load_var(b, vert_index), 0x1);
342 }
343
344 static nir_def *
get_user_clip_plane(nir_builder * b,int index,bool packed)345 get_user_clip_plane(nir_builder *b, int index, bool packed)
346 {
347 char name[16];
348 snprintf(name, sizeof(name), "gl_ClipPlane%d", index);
349 nir_variable *plane = nir_variable_create(
350 b->shader, nir_var_uniform, glsl_vec4_type(), name);
351
352 set_uniform_location(plane, clip_planes[index][0], packed);
353
354 return nir_load_var(b, plane);
355 }
356
357 static void
get_depth_range_transform(nir_builder * b,bool packed,nir_def ** trans)358 get_depth_range_transform(nir_builder *b, bool packed, nir_def **trans)
359 {
360 nir_variable *depth_scale = nir_variable_create(
361 b->shader, nir_var_uniform, glsl_float_type(), "depth_scale");
362 set_uniform_location(depth_scale, depth_scale, packed);
363
364 nir_variable *depth_transport = nir_variable_create(
365 b->shader, nir_var_uniform, glsl_float_type(), "depth_transport");
366 set_uniform_location(depth_transport, depth_transport, packed);
367
368 trans[0] = nir_load_var(b, depth_scale);
369 trans[1] = nir_load_var(b, depth_transport);
370 }
371
372 static nir_def *
get_window_space_depth(nir_builder * b,nir_def * v,nir_def ** trans)373 get_window_space_depth(nir_builder *b, nir_def *v, nir_def **trans)
374 {
375 nir_def *z = nir_channel(b, v, 2);
376 nir_def *w = nir_channel(b, v, 3);
377
378 /* do perspective division, if w==0, xyz must be 0 too (otherwise can't pass
379 * the clip test), 0/0=NaN, but we want it to be the nearest point.
380 */
381 nir_def *c = nir_feq_imm(b, w, 0);
382 nir_def *d = nir_bcsel(b, c, nir_imm_float(b, -1), nir_fdiv(b, z, w));
383
384 /* map [-1, 1] to [near, far] set by glDepthRange(near, far) */
385 return nir_fmad(b, trans[0], d, trans[1]);
386 }
387
388 static void
update_result_buffer(nir_builder * b,nir_def * dmin,nir_def * dmax,bool offset_from_attribute,bool packed)389 update_result_buffer(nir_builder *b, nir_def *dmin, nir_def *dmax,
390 bool offset_from_attribute, bool packed)
391 {
392 nir_def *offset;
393 if (offset_from_attribute) {
394 offset = nir_load_per_vertex_input(b, 4, 32, nir_imm_int(b, 0),
395 nir_imm_int(b, 0),
396 .io_semantics.location = VARYING_SLOT_VAR0);
397 } else {
398 nir_variable *uni_offset = nir_variable_create(
399 b->shader, nir_var_uniform, glsl_uint_type(), "result_offset");
400 set_uniform_location(uni_offset, result_offset, packed);
401 offset = nir_load_var(b, uni_offset);
402 }
403
404 nir_variable_create(b->shader, nir_var_mem_ssbo,
405 glsl_array_type(glsl_uint_type(), 0, 0), "result");
406 /* driver_location = 0 (slot 0) */
407
408 nir_def *ssbo = nir_imm_int(b, 0);
409 nir_ssbo_atomic(b, 32, ssbo, offset, nir_imm_int(b, 1),
410 .atomic_op = nir_atomic_op_xchg);
411 nir_ssbo_atomic(b, 32, ssbo, nir_iadd_imm(b, offset, 4), dmin,
412 .atomic_op = nir_atomic_op_umin);
413 nir_ssbo_atomic(b, 32, ssbo, nir_iadd_imm(b, offset, 8), dmax,
414 .atomic_op = nir_atomic_op_umax);
415 }
416
417 static void
build_point_nir_shader(nir_builder * b,union state_key state,bool packed)418 build_point_nir_shader(nir_builder *b, union state_key state, bool packed)
419 {
420 assert(b->shader->info.gs.vertices_in == 1);
421
422 nir_def *v;
423 get_input_vertices(b, &v);
424
425 fast_frustum_culling(b, &v);
426
427 nir_def *outside = NULL;
428 for (int i = 0; i < state.num_user_clip_planes; i++) {
429 nir_def *p = get_user_clip_plane(b, i, packed);
430 nir_def *d = nir_fdot(b, v, p);
431 nir_def *r = nir_flt_imm(b, d, 0);
432 outside = i ? nir_ior(b, outside, r) : r;
433 }
434 if (outside)
435 return_if_true(b, outside);
436
437 nir_def *trans[2];
438 get_depth_range_transform(b, packed, trans);
439
440 nir_def *depth = get_window_space_depth(b, v, trans);
441 nir_def *fdepth = nir_fmul_imm(b, depth, 4294967295.0);
442 nir_def *idepth = nir_f2uN(b, fdepth, 32);
443
444 update_result_buffer(b, idepth, idepth, state.result_offset_from_attribute, packed);
445 }
446
447 static nir_variable *
create_clip_planes(nir_builder * b,int num_clip_planes,bool packed)448 create_clip_planes(nir_builder *b, int num_clip_planes, bool packed)
449 {
450 nir_variable *clip_planes = nir_local_variable_create(
451 b->impl, glsl_array_type(glsl_vec4_type(), num_clip_planes, 0), "clip_planes");
452
453 nir_def *unit_clip_planes[6] = {
454 nir_imm_vec4(b, 1, 0, 0, 1),
455 nir_imm_vec4(b, -1, 0, 0, 1),
456 nir_imm_vec4(b, 0, 1, 0, 1),
457 nir_imm_vec4(b, 0, -1, 0, 1),
458 nir_imm_vec4(b, 0, 0, 1, 1),
459 nir_imm_vec4(b, 0, 0, -1, 1),
460 };
461 for (int i = 0; i < 6; i++)
462 nir_store_array_var_imm(b, clip_planes, i, unit_clip_planes[i], 0xf);
463
464 for (int i = 6; i < num_clip_planes; i++) {
465 nir_def *p = get_user_clip_plane(b, i - 6, packed);
466 nir_store_array_var_imm(b, clip_planes, i, p, 0xf);
467 }
468
469 return clip_planes;
470 }
471
472 static void
build_line_nir_shader(nir_builder * b,union state_key state,bool packed)473 build_line_nir_shader(nir_builder *b, union state_key state, bool packed)
474 {
475 assert(b->shader->info.gs.vertices_in == 2);
476
477 nir_def *v[2];
478 get_input_vertices(b, v);
479
480 fast_frustum_culling(b, v);
481
482 nir_variable *vert0 = nir_local_variable_create(b->impl, glsl_vec4_type(), "vert0");
483 nir_store_var(b, vert0, v[0], 0xf);
484
485 nir_variable *vert1 = nir_local_variable_create(b->impl, glsl_vec4_type(), "vert1");
486 nir_store_var(b, vert1, v[1], 0xf);
487
488 const int num_clip_planes = 6 + state.num_user_clip_planes;
489 nir_variable *clip_planes = create_clip_planes(b, num_clip_planes, packed);
490
491 begin_for_loop(clip_loop, nir_imm_int(b, num_clip_planes))
492 {
493 nir_def *plane = nir_load_array_var(b, clip_planes, idx);
494 nir_def *v0 = nir_load_var(b, vert0);
495 nir_def *v1 = nir_load_var(b, vert1);
496 nir_def *d0 = nir_fdot(b, v0, plane);
497 nir_def *d1 = nir_fdot(b, v1, plane);
498 nir_def *n0 = nir_flt_imm(b, d0, 0);
499 nir_def *n1 = nir_flt_imm(b, d1, 0);
500
501 return_if_true(b, nir_iand(b, n0, n1));
502
503 nir_if *clip_if = nir_push_if(b, nir_ior(b, n0, n1));
504 {
505 nir_def *iv = get_intersection(b, v0, v1, d0, d1);
506 nir_store_var(b, vert0, nir_bcsel(b, n0, iv, v0), 0xf);
507 nir_store_var(b, vert1, nir_bcsel(b, n1, iv, v1), 0xf);
508 }
509 nir_pop_if(b, clip_if);
510 }
511 end_for_loop(clip_loop)
512
513 nir_def *trans[2];
514 get_depth_range_transform(b, packed, trans);
515
516 nir_def *d0 = get_window_space_depth(b, nir_load_var(b, vert0), trans);
517 nir_def *d1 = get_window_space_depth(b, nir_load_var(b, vert1), trans);
518
519 nir_def *dmin = nir_fmin(b, d0, d1);
520 nir_def *dmax = nir_fmax(b, d0, d1);
521
522 nir_def *fdmin = nir_fmul_imm(b, dmin, 4294967295.0);
523 nir_def *idmin = nir_f2uN(b, fdmin, 32);
524
525 nir_def *fdmax = nir_fmul_imm(b, dmax, 4294967295.0);
526 nir_def *idmax = nir_f2uN(b, fdmax, 32);
527
528 update_result_buffer(b, idmin, idmax, state.result_offset_from_attribute, packed);
529 }
530
531 static void
build_planar_primitive_nir_shader(nir_builder * b,union state_key state,bool packed)532 build_planar_primitive_nir_shader(nir_builder *b, union state_key state, bool packed)
533 {
534 const int num_in_vert = b->shader->info.gs.vertices_in;
535 assert(num_in_vert == 3 || num_in_vert == 4);
536
537 nir_def *v[4];
538 get_input_vertices(b, v);
539
540 if (state.face_culling_enabled)
541 face_culling(b, v, packed);
542
543 /* fast frustum culling, this should filter out most primitives */
544 fast_frustum_culling(b, v);
545
546 const int num_clip_planes = 6 + state.num_user_clip_planes;
547 const int max_vert = num_in_vert + num_clip_planes;
548
549 /* TODO: could use shared memory (ie. AMD GPU LDS) for this array
550 * to reduce register usage.
551 */
552 nir_variable *vert = nir_local_variable_create(
553 b->impl, glsl_array_type(glsl_vec4_type(), max_vert, 0), "vert");
554 for (int i = 0; i < num_in_vert; i++)
555 nir_store_array_var_imm(b, vert, i, v[i], 0xf);
556
557 nir_variable *num_vert =
558 nir_local_variable_create(b->impl, glsl_int_type(), "num_vert");
559 nir_store_var(b, num_vert, nir_imm_int(b, num_in_vert), 1);
560
561 nir_variable *clip_planes = create_clip_planes(b, num_clip_planes, packed);
562
563 /* accurate clipping with all clip planes */
564 begin_for_loop(clip_loop, nir_imm_int(b, num_clip_planes))
565 {
566 nir_def *plane = nir_load_array_var(b, clip_planes, idx);
567 clip_with_plane(b, vert, num_vert, max_vert, plane);
568 }
569 end_for_loop(clip_loop)
570
571 nir_def *trans[2];
572 get_depth_range_transform(b, packed, trans);
573
574 nir_variable *dmin =
575 nir_local_variable_create(b->impl, glsl_float_type(), "dmin");
576 nir_store_var(b, dmin, nir_imm_float(b, 1), 1);
577
578 nir_variable *dmax =
579 nir_local_variable_create(b->impl, glsl_float_type(), "dmax");
580 nir_store_var(b, dmax, nir_imm_float(b, 0), 1);
581
582 begin_for_loop(depth_loop, nir_load_var(b, num_vert))
583 {
584 nir_def *vtx = nir_load_array_var(b, vert, idx);
585 nir_def *depth = get_window_space_depth(b, vtx, trans);
586 nir_store_var(b, dmin, nir_fmin(b, nir_load_var(b, dmin), depth), 1);
587 nir_store_var(b, dmax, nir_fmax(b, nir_load_var(b, dmax), depth), 1);
588 }
589 end_for_loop(depth_loop)
590
591 nir_def *fdmin = nir_fmul_imm(b, nir_load_var(b, dmin), 4294967295.0);
592 nir_def *idmin = nir_f2uN(b, fdmin, 32);
593
594 nir_def *fdmax = nir_fmul_imm(b, nir_load_var(b, dmax), 4294967295.0);
595 nir_def *idmax = nir_f2uN(b, fdmax, 32);
596
597 update_result_buffer(b, idmin, idmax, state.result_offset_from_attribute, packed);
598 }
599
600 static void *
hw_select_create_gs(struct st_context * st,union state_key state)601 hw_select_create_gs(struct st_context *st, union state_key state)
602 {
603 const nir_shader_compiler_options *options =
604 st_get_nir_compiler_options(st, MESA_SHADER_GEOMETRY);
605
606 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options,
607 "hw select GS");
608
609 nir_shader *nir = b.shader;
610 nir->info.inputs_read = VARYING_BIT_POS;
611 nir->num_uniforms = DIV_ROUND_UP(sizeof(struct geometry_constant), (4 * sizeof(float)));
612 nir->info.num_ssbos = 1;
613 nir->info.gs.output_primitive = MESA_PRIM_POINTS;
614 nir->info.gs.vertices_out = 1;
615 nir->info.gs.invocations = 1;
616 nir->info.gs.active_stream_mask = 1;
617 nir->info.io_lowered = true;
618
619 if (state.result_offset_from_attribute)
620 nir->info.inputs_read |= VARYING_BIT_VAR(0);
621
622 bool packed = st->ctx->Const.PackedDriverUniformStorage;
623
624 switch (state.primitive) {
625 case HW_SELECT_PRIM_POINTS:
626 nir->info.gs.input_primitive = MESA_PRIM_POINTS;
627 nir->info.gs.vertices_in = 1;
628 build_point_nir_shader(&b, state, packed);
629 break;
630 case HW_SELECT_PRIM_LINES:
631 nir->info.gs.input_primitive = MESA_PRIM_LINES;
632 nir->info.gs.vertices_in = 2;
633 build_line_nir_shader(&b, state, packed);
634 break;
635 case HW_SELECT_PRIM_TRIANGLES:
636 nir->info.gs.input_primitive = MESA_PRIM_TRIANGLES;
637 nir->info.gs.vertices_in = 3;
638 build_planar_primitive_nir_shader(&b, state, packed);
639 break;
640 case HW_SELECT_PRIM_QUADS:
641 /* geometry shader has no quad primitive, use lines_adjacency instead */
642 nir->info.gs.input_primitive = MESA_PRIM_LINES_ADJACENCY;
643 nir->info.gs.vertices_in = 4;
644 build_planar_primitive_nir_shader(&b, state, packed);
645 break;
646 default:
647 unreachable("unexpected primitive");
648 }
649
650 nir_lower_returns(nir);
651
652 return st_nir_finish_builtin_shader(st, nir);
653 }
654
655 bool
st_draw_hw_select_prepare_common(struct gl_context * ctx)656 st_draw_hw_select_prepare_common(struct gl_context *ctx)
657 {
658 struct st_context *st = st_context(ctx);
659 if (ctx->GeometryProgram._Current ||
660 ctx->TessCtrlProgram._Current ||
661 ctx->TessEvalProgram._Current) {
662 fprintf(stderr, "HW GL_SELECT does not support user geometry/tessellation shader\n");
663 return false;
664 }
665
666 struct geometry_constant consts;
667
668 float n = ctx->ViewportArray[0].Near;
669 float f = ctx->ViewportArray[0].Far;
670 consts.depth_scale = (f - n) / 2;
671 consts.depth_transport = (f + n) / 2;
672
673 /* this field is not used when face culling disabled */
674 consts.culling_config =
675 (ctx->Polygon.CullFaceMode == GL_BACK) ^
676 (ctx->Polygon.FrontFace == GL_CCW);
677
678 /* this field is not used when passing result offset by attribute */
679 consts.result_offset = st->ctx->Select.ResultOffset;
680
681 int num_planes = 0;
682 u_foreach_bit(i, ctx->Transform.ClipPlanesEnabled) {
683 COPY_4V(consts.clip_planes[num_planes], ctx->Transform._ClipUserPlane[i]);
684 num_planes++;
685 }
686
687 struct pipe_constant_buffer cb;
688 cb.buffer = NULL;
689 cb.user_buffer = &consts;
690 cb.buffer_offset = 0;
691 cb.buffer_size = sizeof(consts) - (MAX_CLIP_PLANES - num_planes) * 4 * sizeof(float);
692
693 struct pipe_context *pipe = st->pipe;
694 pipe->set_constant_buffer(pipe, PIPE_SHADER_GEOMETRY, 0, false, &cb);
695
696 struct pipe_shader_buffer buffer;
697 memset(&buffer, 0, sizeof(buffer));
698 buffer.buffer = ctx->Select.Result->buffer;
699 buffer.buffer_size = MAX_NAME_STACK_RESULT_NUM * 3 * sizeof(int);
700
701 pipe->set_shader_buffers(pipe, PIPE_SHADER_GEOMETRY, 0, 1, &buffer, 0x1);
702
703 return true;
704 }
705
706 static union state_key
make_state_key(struct gl_context * ctx,int mode)707 make_state_key(struct gl_context *ctx, int mode)
708 {
709 union state_key state = {0};
710
711 switch (mode) {
712 case GL_POINTS:
713 state.primitive = HW_SELECT_PRIM_POINTS;
714 break;
715 case GL_LINES:
716 case GL_LINE_STRIP:
717 case GL_LINE_LOOP:
718 state.primitive = HW_SELECT_PRIM_LINES;
719 break;
720 case GL_QUADS:
721 state.primitive = HW_SELECT_PRIM_QUADS;
722 break;
723 case GL_TRIANGLES:
724 case GL_TRIANGLE_STRIP:
725 case GL_TRIANGLE_FAN:
726 /* These will be broken into triangles. */
727 case GL_QUAD_STRIP:
728 case GL_POLYGON:
729 state.primitive = HW_SELECT_PRIM_TRIANGLES;
730 break;
731 default:
732 fprintf(stderr, "HW GL_SELECT does not support draw mode %s\n",
733 _mesa_enum_to_string(mode));
734 return (union state_key){0};
735 }
736
737 /* TODO: support gl_ClipDistance/gl_CullDistance, but it costs more regs */
738 struct gl_program *vp = ctx->VertexProgram._Current;
739 if (vp->info.clip_distance_array_size || vp->info.cull_distance_array_size) {
740 fprintf(stderr, "HW GL_SELECT does not support gl_ClipDistance/gl_CullDistance\n");
741 return (union state_key){0};
742 }
743
744 state.num_user_clip_planes = util_bitcount(ctx->Transform.ClipPlanesEnabled);
745
746 /* face culling only apply to 2D primitives */
747 if (state.primitive == HW_SELECT_PRIM_QUADS ||
748 state.primitive == HW_SELECT_PRIM_TRIANGLES)
749 state.face_culling_enabled = ctx->Polygon.CullFlag;
750
751 state.result_offset_from_attribute =
752 ctx->VertexProgram._VPMode == VP_MODE_FF &&
753 (ctx->VertexProgram._VaryingInputs & VERT_BIT_SELECT_RESULT_OFFSET);
754
755 return state;
756 }
757
758 bool
st_draw_hw_select_prepare_mode(struct gl_context * ctx,struct pipe_draw_info * info)759 st_draw_hw_select_prepare_mode(struct gl_context *ctx, struct pipe_draw_info *info)
760 {
761 union state_key key = make_state_key(ctx, info->mode);
762 if (!key.u32)
763 return false;
764
765 struct st_context *st = st_context(ctx);
766 if (!st->hw_select_shaders)
767 st->hw_select_shaders = _mesa_hash_table_create_u32_keys(NULL);
768
769 struct hash_entry *he = _mesa_hash_table_search(st->hw_select_shaders,
770 (void*)(uintptr_t)key.u32);
771 void *gs;
772 if (!he) {
773 gs = hw_select_create_gs(st, key);
774 if (!gs)
775 return false;
776
777 _mesa_hash_table_insert(st->hw_select_shaders, (void*)(uintptr_t)key.u32, gs);
778 } else
779 gs = he->data;
780
781 struct cso_context *cso = st->cso_context;
782 cso_set_geometry_shader_handle(cso, gs);
783
784 /* Replace draw mode with equivalent one which geometry shader support.
785 *
786 * New mode consume same vertex buffer structure and produce primitive with
787 * same vertices (no need to be same type of primitive, because geometry shader
788 * operate on vertives and emit nothing).
789 *
790 * We can break QUAD and POLYGON to triangles with same shape. But we can't futher
791 * break them into single line or point because new primitive need to contain >=3
792 * vertices so that it's still handled in 2D (planar) way instead of 1D (line) or
793 * 0D (point) way which have different algorithm.
794 */
795 switch (info->mode) {
796 case GL_QUADS:
797 info->mode = GL_LINES_ADJACENCY;
798 break;
799 case GL_QUAD_STRIP:
800 info->mode = GL_TRIANGLE_STRIP;
801 break;
802 case GL_POLYGON:
803 info->mode = GL_TRIANGLE_FAN;
804 break;
805 default:
806 break;
807 }
808
809 /* Only normal glBegin/End draws pass result offset by attribute to avoid flush
810 * vertices when change name stack, so multiple glBegin/End sections before/after
811 * name stack calls can be merged to a single draw call. To achieve this We mark
812 * name stack result buffer used in glEnd instead of the last draw call.
813 *
814 * Other case like glDrawArrays and display list replay won't merge draws cross
815 * name stack calls, so we just mark name stack result buffer used here.
816 */
817 if (!key.result_offset_from_attribute)
818 ctx->Select.ResultUsed = GL_TRUE;
819
820 return true;
821 }
822