1 /*
2 * Copyright 2022 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included
13 * in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "main/enums.h"
25 #include "main/context.h"
26
27 #include "st_context.h"
28 #include "st_nir.h"
29 #include "st_draw.h"
30
31 #include "nir.h"
32 #include "nir_builtin_builder.h"
33
34 #include "u_memory.h"
35
36 union state_key {
37 struct {
38 unsigned num_user_clip_planes:4;
39 unsigned face_culling_enabled:1;
40 unsigned result_offset_from_attribute:1;
41 unsigned primitive:4;
42 };
43 uint32_t u32;
44 };
45
46 enum primitive_state {
47 HW_SELECT_PRIM_NONE,
48 HW_SELECT_PRIM_POINTS,
49 HW_SELECT_PRIM_LINES,
50 HW_SELECT_PRIM_TRIANGLES,
51 HW_SELECT_PRIM_QUADS,
52 };
53
54 struct geometry_constant {
55 float depth_scale;
56 float depth_transport;
57 uint32_t culling_config;
58 uint32_t result_offset;
59 float clip_planes[MAX_CLIP_PLANES][4];
60 };
61
62 #define set_uniform_location(var, field, packed) \
63 do { \
64 unsigned offset = Offset(struct geometry_constant, field); \
65 var->data.driver_location = offset >> (packed ? 2 : 4); \
66 var->data.location_frac = (offset >> 2) & 0x3; \
67 } while (0)
68
69 static nir_ssa_def *
has_nan_or_inf(nir_builder * b,nir_ssa_def * v)70 has_nan_or_inf(nir_builder *b, nir_ssa_def *v)
71 {
72 nir_ssa_def *nan = nir_bany_fnequal4(b, v, v);
73
74 nir_ssa_def *imm = nir_imm_float(b, INFINITY);
75 nir_ssa_def *inf = nir_bany(b, nir_feq(b, nir_fabs(b, v), imm));
76
77 return nir_ior(b, nan, inf);
78 }
79
80 static void
return_if_true(nir_builder * b,nir_ssa_def * cond)81 return_if_true(nir_builder *b, nir_ssa_def *cond)
82 {
83 nir_if *if_cond = nir_push_if(b, cond);
84 nir_jump(b, nir_jump_return);
85 nir_pop_if(b, if_cond);
86 }
87
88 static void
get_input_vertices(nir_builder * b,nir_ssa_def ** v)89 get_input_vertices(nir_builder *b, nir_ssa_def **v)
90 {
91 const int num_in_vert = b->shader->info.gs.vertices_in;
92
93 nir_variable *in_pos = nir_variable_create(
94 b->shader, nir_var_shader_in, glsl_array_type(glsl_vec4_type(), num_in_vert, 0),
95 "gl_Position");
96 in_pos->data.location = VARYING_SLOT_POS;
97
98 nir_ssa_def *is_nan_or_inf = NULL;
99 for (int i = 0; i < num_in_vert; i++) {
100 v[i] = nir_load_array_var_imm(b, in_pos, i);
101 nir_ssa_def *r = has_nan_or_inf(b, v[i]);
102 is_nan_or_inf = i ? nir_ior(b, is_nan_or_inf, r) : r;
103 }
104 return_if_true(b, is_nan_or_inf);
105 }
106
107 static void
face_culling(nir_builder * b,nir_ssa_def ** v,bool packed)108 face_culling(nir_builder *b, nir_ssa_def **v, bool packed)
109 {
110 /* use the z value of the face normal to determine if the face points to us:
111 * Nz = (x1 - x0) * (y2 - y0) - (y1 - y0) * (x2 - x0)
112 *
113 * it should be in NDC (Normalized Device Coordinate), but now we are in clip
114 * space (Vd = Vc / Vc.w), so multiply Nz with w0*w1*w2 to get the clip space
115 * value:
116 * det = x0 * (y1 * w2 - y2 * w1) +
117 * x1 * (y2 * w0 - y0 * w2) +
118 * x2 * (y0 * w1 - y1 * w0)
119 *
120 * we only care about the sign of the det, but also need to count the sign of
121 * w0/w1/w2 as a negtive w would change the direction of Nz < 0
122 */
123 nir_ssa_def *y1w2 = nir_fmul(b, nir_channel(b, v[1], 1), nir_channel(b, v[2], 3));
124 nir_ssa_def *y2w1 = nir_fmul(b, nir_channel(b, v[2], 1), nir_channel(b, v[1], 3));
125 nir_ssa_def *y2w0 = nir_fmul(b, nir_channel(b, v[2], 1), nir_channel(b, v[0], 3));
126 nir_ssa_def *y0w2 = nir_fmul(b, nir_channel(b, v[0], 1), nir_channel(b, v[2], 3));
127 nir_ssa_def *y0w1 = nir_fmul(b, nir_channel(b, v[0], 1), nir_channel(b, v[1], 3));
128 nir_ssa_def *y1w0 = nir_fmul(b, nir_channel(b, v[1], 1), nir_channel(b, v[0], 3));
129 nir_ssa_def *t0 = nir_fmul(b, nir_channel(b, v[0], 0), nir_fsub(b, y1w2, y2w1));
130 nir_ssa_def *t1 = nir_fmul(b, nir_channel(b, v[1], 0), nir_fsub(b, y2w0, y0w2));
131 nir_ssa_def *t2 = nir_fmul(b, nir_channel(b, v[2], 0), nir_fsub(b, y0w1, y1w0));
132 nir_ssa_def *det = nir_fadd(b, nir_fadd(b, t0, t1), t2);
133
134 /* invert det sign once any vertex w < 0 */
135 nir_ssa_def *n0 = nir_flt(b, nir_channel(b, v[0], 3), nir_imm_float(b, 0));
136 nir_ssa_def *n1 = nir_flt(b, nir_channel(b, v[1], 3), nir_imm_float(b, 0));
137 nir_ssa_def *n2 = nir_flt(b, nir_channel(b, v[2], 3), nir_imm_float(b, 0));
138 nir_ssa_def *cond = nir_ixor(b, nir_ixor(b, n0, n1), n2);
139 det = nir_bcsel(b, cond, nir_fneg(b, det), det);
140
141 nir_variable *culling_config = nir_variable_create(
142 b->shader, nir_var_uniform, glsl_uint_type(), "culling_config");
143 set_uniform_location(culling_config, culling_config, packed);
144 nir_ssa_def *config = nir_i2b(b, nir_load_var(b, culling_config));
145
146 /* det < 0 then z points to camera */
147 nir_ssa_def *zero = nir_imm_zero(b, 1, det->bit_size);
148 nir_ssa_def *is_zero = nir_feq(b, det, zero);
149 nir_ssa_def *is_neg = nir_flt(b, det, zero);
150 nir_ssa_def *cull = nir_ixor(b, is_neg, config);
151 return_if_true(b, nir_ior(b, is_zero, cull));
152 }
153
154 static void
fast_frustum_culling(nir_builder * b,nir_ssa_def ** v)155 fast_frustum_culling(nir_builder *b, nir_ssa_def **v)
156 {
157 nir_ssa_def *cull = NULL;
158
159 /* there are six culling planes for the visible volume:
160 * 1. x + w = 0
161 * 2. -x + w = 0
162 * 3. y + w = 0
163 * 4. -y + w = 0
164 * 5. z + w = 0
165 * 6. -z + w = 0
166 *
167 * if all vertices of the primitive are outside (plane equation <0) of
168 * any plane, the primitive must be invisible.
169 */
170 for (int i = 0; i < 6; i++) {
171 nir_ssa_def *outside = NULL;
172
173 for (int j = 0; j < b->shader->info.gs.vertices_in; j++) {
174 nir_ssa_def *c = nir_channel(b, v[j], i >> 1);
175 if (i & 1)
176 c = nir_fneg(b, c);
177
178 nir_ssa_def *r = nir_flt(b, nir_channel(b, v[j], 3), c);
179 outside = j ? nir_iand(b, outside, r) : r;
180 }
181
182 cull = i ? nir_ior(b, cull, outside) : outside;
183 }
184
185 return_if_true(b, cull);
186 }
187
188 static nir_ssa_def *
get_intersection(nir_builder * b,nir_ssa_def * v1,nir_ssa_def * v2,nir_ssa_def * d1,nir_ssa_def * d2)189 get_intersection(nir_builder *b, nir_ssa_def *v1, nir_ssa_def *v2,
190 nir_ssa_def *d1, nir_ssa_def *d2)
191 {
192 nir_ssa_def *factor = nir_fdiv(b, d1, nir_fsub(b, d1, d2));
193 return nir_fmad(b, nir_fsub(b, v2, v1), factor, v1);
194 }
195
196 #define begin_for_loop(name, max) \
197 nir_variable *name##_index = \
198 nir_local_variable_create(b->impl, glsl_int_type(), #name "_i"); \
199 nir_store_var(b, name##_index, nir_imm_int(b, 0), 1); \
200 \
201 nir_loop *name = nir_push_loop(b); \
202 { \
203 nir_ssa_def *idx = nir_load_var(b, name##_index); \
204 nir_if *if_in_loop = nir_push_if(b, nir_ilt(b, idx, max));
205
206 #define end_for_loop(name) \
207 nir_store_var(b, name##_index, nir_iadd_imm(b, idx, 1), 1); \
208 nir_push_else(b, if_in_loop); \
209 nir_jump(b, nir_jump_break); \
210 nir_pop_if(b, if_in_loop); \
211 } \
212 nir_pop_loop(b, name);
213
214 static void
clip_with_plane(nir_builder * b,nir_variable * vert,nir_variable * num_vert,int max_vert,nir_ssa_def * plane)215 clip_with_plane(nir_builder *b, nir_variable *vert, nir_variable *num_vert,
216 int max_vert, nir_ssa_def *plane)
217 {
218 nir_variable *all_clipped = nir_local_variable_create(
219 b->impl, glsl_bool_type(), "all_clipped");
220 nir_store_var(b, all_clipped, nir_imm_true(b), 1);
221
222 nir_variable *dist = nir_local_variable_create(
223 b->impl, glsl_array_type(glsl_float_type(), max_vert, 0), "dist");
224
225 nir_ssa_def *num = nir_load_var(b, num_vert);
226 begin_for_loop(dist_loop, num)
227 {
228 nir_ssa_def *v = nir_load_array_var(b, vert, idx);
229 nir_ssa_def *d = nir_fdot(b, v, plane);
230 nir_store_array_var(b, dist, idx, d, 1);
231
232 nir_ssa_def *clipped = nir_flt(b, d, nir_imm_float(b, 0));
233 nir_store_var(b, all_clipped,
234 nir_iand(b, nir_load_var(b, all_clipped), clipped), 1);
235 }
236 end_for_loop(dist_loop)
237
238 return_if_true(b, nir_load_var(b, all_clipped));
239
240 /* Use +/0/- to denote the dist[i] sign, which means:
241 * +: inside plane
242 * -: outside plane
243 * 0: just on the plane
244 *
245 * Some example:
246 * ++++: all vertex not clipped
247 * ----: all vertex clipped
248 * +-++: one vertex clipped, need to insert two vertex at '-', array grow
249 * +--+: two vertex clipped, need to insert two vertex at '--', array same
250 * +---: three vertex clipped, need to insert two vertex at '---', array trim
251 * +-0+: one vertex clipped, need to insert one vertex at '-', array same
252 *
253 * Plane clip only produce convex polygon, so '-' must be contigous, there's
254 * no '+-+-', so one clip plane can only grow array by 1.
255 */
256
257 /* when array grow or '-' has been replaced with inserted vertex, save the
258 * original vert to be used by following calculation.
259 */
260 nir_variable *saved =
261 nir_local_variable_create(b->impl, glsl_vec4_type(), "saved");
262
263 nir_variable *vert_index =
264 nir_local_variable_create(b->impl, glsl_int_type(), "vert_index");
265 nir_store_var(b, vert_index, nir_imm_int(b, 0), 1);
266
267 begin_for_loop(vert_loop, num)
268 {
269 nir_ssa_def *di = nir_load_array_var(b, dist, idx);
270 nir_if *if_clipped = nir_push_if(b, nir_flt(b, di, nir_imm_float(b, 0)));
271 {
272 /* - case, we need to take care of sign change and insert vertex */
273
274 nir_ssa_def *prev = nir_bcsel(b, nir_ieq_imm(b, idx, 0),
275 nir_iadd_imm(b, num, -1),
276 nir_iadd_imm(b, idx, -1));
277 nir_ssa_def *dp = nir_load_array_var(b, dist, prev);
278 nir_if *prev_if = nir_push_if(b, nir_flt(b, nir_imm_float(b, 0), dp));
279 {
280 /* +- case, replace - with inserted vertex
281 * assert(vert_index <= idx), array is sure to not grow here
282 * but need to save vert[idx] when vert_index==idx
283 */
284
285 nir_ssa_def *vi = nir_load_array_var(b, vert, idx);
286 nir_store_var(b, saved, vi, 0xf);
287
288 nir_ssa_def *vp = nir_load_array_var(b, vert, prev);
289 nir_ssa_def *iv = get_intersection(b, vp, vi, dp, di);
290 nir_ssa_def *index = nir_load_var(b, vert_index);
291 nir_store_array_var(b, vert, index, iv, 0xf);
292
293 nir_store_var(b, vert_index, nir_iadd_imm(b, index, 1), 1);
294 }
295 nir_pop_if(b, prev_if);
296
297 nir_ssa_def *next = nir_bcsel(b, nir_ieq(b, idx, nir_iadd_imm(b, num, -1)),
298 nir_imm_int(b, 0), nir_iadd_imm(b, idx, 1));
299 nir_ssa_def *dn = nir_load_array_var(b, dist, next);
300 nir_if *next_if = nir_push_if(b, nir_flt(b, nir_imm_float(b, 0), dn));
301 {
302 /* -+ case, may grow array:
303 * vert_index > idx: +-+ case, grow array, current vertex in 'saved',
304 * save next + to 'saved', will replace it with inserted vertex.
305 * vert_index <= idx: --+ case, will replace last - with inserted vertex,
306 * no need to save last -, because + case won't use - value.
307 */
308
309 nir_ssa_def *index = nir_load_var(b, vert_index);
310 nir_ssa_def *vi = nir_bcsel(b, nir_flt(b, idx, index),
311 nir_load_var(b, saved),
312 nir_load_array_var(b, vert, idx));
313 nir_ssa_def *vn = nir_load_array_var(b, vert, next);
314 nir_ssa_def *iv = get_intersection(b, vn, vi, dn, di);
315
316 nir_store_var(b, saved, nir_load_array_var(b, vert, index), 0xf);
317 nir_store_array_var(b, vert, index, iv, 0xf);
318
319 nir_store_var(b, vert_index, nir_iadd_imm(b, index, 1), 1);
320 }
321 nir_pop_if(b, next_if);
322 }
323 nir_push_else(b, if_clipped);
324 {
325 /* +/0 case, just keep the vert
326 * vert_index > idx: array grew case, vert[idx] is inserted vertex or prev
327 * +/0 vertex, current vertex is in 'saved', need to save next vertex
328 * vert_index < idx: array trim case
329 */
330
331 nir_ssa_def *index = nir_load_var(b, vert_index);
332 nir_ssa_def *vi = nir_bcsel(b, nir_flt(b, idx, index),
333 nir_load_var(b, saved),
334 nir_load_array_var(b, vert, idx));
335
336 nir_store_var(b, saved, nir_load_array_var(b, vert, index), 0xf);
337 nir_store_array_var(b, vert, index, vi, 0xf);
338
339 nir_store_var(b, vert_index, nir_iadd_imm(b, index, 1), 1);
340 }
341 nir_pop_if(b, if_clipped);
342 }
343 end_for_loop(vert_loop);
344
345 nir_copy_var(b, num_vert, vert_index);
346 }
347
348 static nir_ssa_def *
get_user_clip_plane(nir_builder * b,int index,bool packed)349 get_user_clip_plane(nir_builder *b, int index, bool packed)
350 {
351 char name[16];
352 snprintf(name, sizeof(name), "gl_ClipPlane%d", index);
353 nir_variable *plane = nir_variable_create(
354 b->shader, nir_var_uniform, glsl_vec4_type(), name);
355
356 set_uniform_location(plane, clip_planes[index][0], packed);
357
358 return nir_load_var(b, plane);
359 }
360
361 static void
get_depth_range_transform(nir_builder * b,bool packed,nir_ssa_def ** trans)362 get_depth_range_transform(nir_builder *b, bool packed, nir_ssa_def **trans)
363 {
364 nir_variable *depth_scale = nir_variable_create(
365 b->shader, nir_var_uniform, glsl_float_type(), "depth_scale");
366 set_uniform_location(depth_scale, depth_scale, packed);
367
368 nir_variable *depth_transport = nir_variable_create(
369 b->shader, nir_var_uniform, glsl_float_type(), "depth_transport");
370 set_uniform_location(depth_transport, depth_transport, packed);
371
372 trans[0] = nir_load_var(b, depth_scale);
373 trans[1] = nir_load_var(b, depth_transport);
374 }
375
376 static nir_ssa_def *
get_window_space_depth(nir_builder * b,nir_ssa_def * v,nir_ssa_def ** trans)377 get_window_space_depth(nir_builder *b, nir_ssa_def *v, nir_ssa_def **trans)
378 {
379 nir_ssa_def *z = nir_channel(b, v, 2);
380 nir_ssa_def *w = nir_channel(b, v, 3);
381
382 /* do perspective division, if w==0, xyz must be 0 too (otherwise can't pass
383 * the clip test), 0/0=NaN, but we want it to be the nearest point.
384 */
385 nir_ssa_def *c = nir_feq(b, w, nir_imm_float(b, 0));
386 nir_ssa_def *d = nir_bcsel(b, c, nir_imm_float(b, -1), nir_fdiv(b, z, w));
387
388 /* map [-1, 1] to [near, far] set by glDepthRange(near, far) */
389 return nir_fmad(b, trans[0], d, trans[1]);
390 }
391
392 static void
update_result_buffer(nir_builder * b,nir_ssa_def * dmin,nir_ssa_def * dmax,bool offset_from_attribute,bool packed)393 update_result_buffer(nir_builder *b, nir_ssa_def *dmin, nir_ssa_def *dmax,
394 bool offset_from_attribute, bool packed)
395 {
396 nir_ssa_def *offset;
397 if (offset_from_attribute) {
398 nir_variable *in_offset = nir_variable_create(
399 b->shader, nir_var_shader_in,
400 glsl_array_type(glsl_uint_type(), b->shader->info.gs.vertices_in, 0),
401 "result_offset");
402 in_offset->data.location = VARYING_SLOT_VAR0;
403 offset = nir_load_array_var_imm(b, in_offset, 0);
404 } else {
405 nir_variable *uni_offset = nir_variable_create(
406 b->shader, nir_var_uniform, glsl_uint_type(), "result_offset");
407 set_uniform_location(uni_offset, result_offset, packed);
408 offset = nir_load_var(b, uni_offset);
409 }
410
411 nir_variable_create(b->shader, nir_var_mem_ssbo,
412 glsl_array_type(glsl_uint_type(), 0, 0), "result");
413 /* driver_location = 0 (slot 0) */
414
415 nir_ssa_def *ssbo = nir_imm_int(b, 0);
416 nir_ssbo_atomic_exchange(b, 32, ssbo, offset, nir_imm_int(b, 1));
417 nir_ssbo_atomic_umin(b, 32, ssbo, nir_iadd_imm(b, offset, 4), dmin);
418 nir_ssbo_atomic_umax(b, 32, ssbo, nir_iadd_imm(b, offset, 8), dmax);
419 }
420
421 static void
build_point_nir_shader(nir_builder * b,union state_key state,bool packed)422 build_point_nir_shader(nir_builder *b, union state_key state, bool packed)
423 {
424 assert(b->shader->info.gs.vertices_in == 1);
425
426 nir_ssa_def *v;
427 get_input_vertices(b, &v);
428
429 fast_frustum_culling(b, &v);
430
431 nir_ssa_def *outside = NULL;
432 for (int i = 0; i < state.num_user_clip_planes; i++) {
433 nir_ssa_def *p = get_user_clip_plane(b, i, packed);
434 nir_ssa_def *d = nir_fdot(b, v, p);
435 nir_ssa_def *r = nir_flt(b, d, nir_imm_float(b, 0));
436 outside = i ? nir_ior(b, outside, r) : r;
437 }
438 if (outside)
439 return_if_true(b, outside);
440
441 nir_ssa_def *trans[2];
442 get_depth_range_transform(b, packed, trans);
443
444 nir_ssa_def *depth = get_window_space_depth(b, v, trans);
445 nir_ssa_def *fdepth = nir_fmul_imm(b, depth, 4294967295.0);
446 nir_ssa_def *idepth = nir_f2uN(b, fdepth, 32);
447
448 update_result_buffer(b, idepth, idepth, state.result_offset_from_attribute, packed);
449 }
450
451 static nir_variable *
create_clip_planes(nir_builder * b,int num_clip_planes,bool packed)452 create_clip_planes(nir_builder *b, int num_clip_planes, bool packed)
453 {
454 nir_variable *clip_planes = nir_local_variable_create(
455 b->impl, glsl_array_type(glsl_vec4_type(), num_clip_planes, 0), "clip_planes");
456
457 nir_ssa_def *unit_clip_planes[6] = {
458 nir_imm_vec4(b, 1, 0, 0, 1),
459 nir_imm_vec4(b, -1, 0, 0, 1),
460 nir_imm_vec4(b, 0, 1, 0, 1),
461 nir_imm_vec4(b, 0, -1, 0, 1),
462 nir_imm_vec4(b, 0, 0, 1, 1),
463 nir_imm_vec4(b, 0, 0, -1, 1),
464 };
465 for (int i = 0; i < 6; i++)
466 nir_store_array_var_imm(b, clip_planes, i, unit_clip_planes[i], 0xf);
467
468 for (int i = 6; i < num_clip_planes; i++) {
469 nir_ssa_def *p = get_user_clip_plane(b, i - 6, packed);
470 nir_store_array_var_imm(b, clip_planes, i, p, 0xf);
471 }
472
473 return clip_planes;
474 }
475
476 static void
build_line_nir_shader(nir_builder * b,union state_key state,bool packed)477 build_line_nir_shader(nir_builder *b, union state_key state, bool packed)
478 {
479 assert(b->shader->info.gs.vertices_in == 2);
480
481 nir_ssa_def *v[2];
482 get_input_vertices(b, v);
483
484 fast_frustum_culling(b, v);
485
486 nir_variable *vert0 = nir_local_variable_create(b->impl, glsl_vec4_type(), "vert0");
487 nir_store_var(b, vert0, v[0], 0xf);
488
489 nir_variable *vert1 = nir_local_variable_create(b->impl, glsl_vec4_type(), "vert1");
490 nir_store_var(b, vert1, v[1], 0xf);
491
492 const int num_clip_planes = 6 + state.num_user_clip_planes;
493 nir_variable *clip_planes = create_clip_planes(b, num_clip_planes, packed);
494
495 begin_for_loop(clip_loop, nir_imm_int(b, num_clip_planes))
496 {
497 nir_ssa_def *plane = nir_load_array_var(b, clip_planes, idx);
498 nir_ssa_def *v0 = nir_load_var(b, vert0);
499 nir_ssa_def *v1 = nir_load_var(b, vert1);
500 nir_ssa_def *d0 = nir_fdot(b, v0, plane);
501 nir_ssa_def *d1 = nir_fdot(b, v1, plane);
502 nir_ssa_def *n0 = nir_flt(b, d0, nir_imm_float(b, 0));
503 nir_ssa_def *n1 = nir_flt(b, d1, nir_imm_float(b, 0));
504
505 return_if_true(b, nir_iand(b, n0, n1));
506
507 nir_if *clip_if = nir_push_if(b, nir_ior(b, n0, n1));
508 {
509 nir_ssa_def *iv = get_intersection(b, v0, v1, d0, d1);
510 nir_store_var(b, vert0, nir_bcsel(b, n0, iv, v0), 0xf);
511 nir_store_var(b, vert1, nir_bcsel(b, n1, iv, v1), 0xf);
512 }
513 nir_pop_if(b, clip_if);
514 }
515 end_for_loop(clip_loop)
516
517 nir_ssa_def *trans[2];
518 get_depth_range_transform(b, packed, trans);
519
520 nir_ssa_def *d0 = get_window_space_depth(b, nir_load_var(b, vert0), trans);
521 nir_ssa_def *d1 = get_window_space_depth(b, nir_load_var(b, vert1), trans);
522
523 nir_ssa_def *dmin = nir_fmin(b, d0, d1);
524 nir_ssa_def *dmax = nir_fmax(b, d0, d1);
525
526 nir_ssa_def *fdmin = nir_fmul_imm(b, dmin, 4294967295.0);
527 nir_ssa_def *idmin = nir_f2uN(b, fdmin, 32);
528
529 nir_ssa_def *fdmax = nir_fmul_imm(b, dmax, 4294967295.0);
530 nir_ssa_def *idmax = nir_f2uN(b, fdmax, 32);
531
532 update_result_buffer(b, idmin, idmax, state.result_offset_from_attribute, packed);
533 }
534
535 static void
build_planar_primitive_nir_shader(nir_builder * b,union state_key state,bool packed)536 build_planar_primitive_nir_shader(nir_builder *b, union state_key state, bool packed)
537 {
538 const int num_in_vert = b->shader->info.gs.vertices_in;
539 assert(num_in_vert == 3 || num_in_vert == 4);
540
541 nir_ssa_def *v[4];
542 get_input_vertices(b, v);
543
544 if (state.face_culling_enabled)
545 face_culling(b, v, packed);
546
547 /* fast frustum culling, this should filter out most primitives */
548 fast_frustum_culling(b, v);
549
550 const int num_clip_planes = 6 + state.num_user_clip_planes;
551 const int max_vert = num_in_vert + num_clip_planes;
552
553 /* TODO: could use shared memory (ie. AMD GPU LDS) for this array
554 * to reduce register usage.
555 */
556 nir_variable *vert = nir_local_variable_create(
557 b->impl, glsl_array_type(glsl_vec4_type(), max_vert, 0), "vert");
558 for (int i = 0; i < num_in_vert; i++)
559 nir_store_array_var_imm(b, vert, i, v[i], 0xf);
560
561 nir_variable *num_vert =
562 nir_local_variable_create(b->impl, glsl_int_type(), "num_vert");
563 nir_store_var(b, num_vert, nir_imm_int(b, num_in_vert), 1);
564
565 nir_variable *clip_planes = create_clip_planes(b, num_clip_planes, packed);
566
567 /* accurate clipping with all clip planes */
568 begin_for_loop(clip_loop, nir_imm_int(b, num_clip_planes))
569 {
570 nir_ssa_def *plane = nir_load_array_var(b, clip_planes, idx);
571 clip_with_plane(b, vert, num_vert, max_vert, plane);
572 }
573 end_for_loop(clip_loop)
574
575 nir_ssa_def *trans[2];
576 get_depth_range_transform(b, packed, trans);
577
578 nir_variable *dmin =
579 nir_local_variable_create(b->impl, glsl_float_type(), "dmin");
580 nir_store_var(b, dmin, nir_imm_float(b, 1), 1);
581
582 nir_variable *dmax =
583 nir_local_variable_create(b->impl, glsl_float_type(), "dmax");
584 nir_store_var(b, dmax, nir_imm_float(b, 0), 1);
585
586 begin_for_loop(depth_loop, nir_load_var(b, num_vert))
587 {
588 nir_ssa_def *vtx = nir_load_array_var(b, vert, idx);
589 nir_ssa_def *depth = get_window_space_depth(b, vtx, trans);
590 nir_store_var(b, dmin, nir_fmin(b, nir_load_var(b, dmin), depth), 1);
591 nir_store_var(b, dmax, nir_fmax(b, nir_load_var(b, dmax), depth), 1);
592 }
593 end_for_loop(depth_loop)
594
595 nir_ssa_def *fdmin = nir_fmul_imm(b, nir_load_var(b, dmin), 4294967295.0);
596 nir_ssa_def *idmin = nir_f2uN(b, fdmin, 32);
597
598 nir_ssa_def *fdmax = nir_fmul_imm(b, nir_load_var(b, dmax), 4294967295.0);
599 nir_ssa_def *idmax = nir_f2uN(b, fdmax, 32);
600
601 update_result_buffer(b, idmin, idmax, state.result_offset_from_attribute, packed);
602 }
603
604 static void *
hw_select_create_gs(struct st_context * st,union state_key state)605 hw_select_create_gs(struct st_context *st, union state_key state)
606 {
607 const nir_shader_compiler_options *options =
608 st_get_nir_compiler_options(st, MESA_SHADER_GEOMETRY);
609
610 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options,
611 "hw select GS");
612
613 nir_shader *nir = b.shader;
614 nir->info.inputs_read = VARYING_BIT_POS;
615 nir->num_uniforms = DIV_ROUND_UP(sizeof(struct geometry_constant), (4 * sizeof(float)));
616 nir->info.num_ssbos = 1;
617 nir->info.gs.output_primitive = SHADER_PRIM_POINTS;
618 nir->info.gs.vertices_out = 1;
619 nir->info.gs.invocations = 1;
620 nir->info.gs.active_stream_mask = 1;
621
622 if (state.result_offset_from_attribute)
623 nir->info.inputs_read |= VARYING_BIT_VAR(0);
624
625 bool packed = st->ctx->Const.PackedDriverUniformStorage;
626
627 switch (state.primitive) {
628 case HW_SELECT_PRIM_POINTS:
629 nir->info.gs.input_primitive = SHADER_PRIM_POINTS;
630 nir->info.gs.vertices_in = 1;
631 build_point_nir_shader(&b, state, packed);
632 break;
633 case HW_SELECT_PRIM_LINES:
634 nir->info.gs.input_primitive = SHADER_PRIM_LINES;
635 nir->info.gs.vertices_in = 2;
636 build_line_nir_shader(&b, state, packed);
637 break;
638 case HW_SELECT_PRIM_TRIANGLES:
639 nir->info.gs.input_primitive = SHADER_PRIM_TRIANGLES;
640 nir->info.gs.vertices_in = 3;
641 build_planar_primitive_nir_shader(&b, state, packed);
642 break;
643 case HW_SELECT_PRIM_QUADS:
644 /* geometry shader has no quad primitive, use lines_adjacency instead */
645 nir->info.gs.input_primitive = SHADER_PRIM_LINES_ADJACENCY;
646 nir->info.gs.vertices_in = 4;
647 build_planar_primitive_nir_shader(&b, state, packed);
648 break;
649 default:
650 unreachable("unexpected primitive");
651 }
652
653 nir_lower_returns(nir);
654
655 return st_nir_finish_builtin_shader(st, nir);
656 }
657
658 bool
st_draw_hw_select_prepare_common(struct gl_context * ctx)659 st_draw_hw_select_prepare_common(struct gl_context *ctx)
660 {
661 struct st_context *st = st_context(ctx);
662 if (st->gp || st->tcp || st->tep) {
663 fprintf(stderr, "HW GL_SELECT does not support user geometry/tessellation shader\n");
664 return false;
665 }
666
667 struct geometry_constant consts;
668
669 float n = ctx->ViewportArray[0].Near;
670 float f = ctx->ViewportArray[0].Far;
671 consts.depth_scale = (f - n) / 2;
672 consts.depth_transport = (f + n) / 2;
673
674 /* this field is not used when face culling disabled */
675 consts.culling_config =
676 (ctx->Polygon.CullFaceMode == GL_BACK) ^
677 (ctx->Polygon.FrontFace == GL_CCW);
678
679 /* this field is not used when passing result offset by attribute */
680 consts.result_offset = st->ctx->Select.ResultOffset;
681
682 int num_planes = 0;
683 u_foreach_bit(i, ctx->Transform.ClipPlanesEnabled) {
684 COPY_4V(consts.clip_planes[num_planes], ctx->Transform._ClipUserPlane[i]);
685 num_planes++;
686 }
687
688 struct pipe_constant_buffer cb;
689 cb.buffer = NULL;
690 cb.user_buffer = &consts;
691 cb.buffer_offset = 0;
692 cb.buffer_size = sizeof(consts) - (MAX_CLIP_PLANES - num_planes) * 4 * sizeof(float);
693
694 struct pipe_context *pipe = st->pipe;
695 pipe->set_constant_buffer(pipe, PIPE_SHADER_GEOMETRY, 0, false, &cb);
696
697 struct pipe_shader_buffer buffer;
698 memset(&buffer, 0, sizeof(buffer));
699 buffer.buffer = ctx->Select.Result->buffer;
700 buffer.buffer_size = MAX_NAME_STACK_RESULT_NUM * 3 * sizeof(int);
701
702 pipe->set_shader_buffers(pipe, PIPE_SHADER_GEOMETRY, 0, 1, &buffer, 0x1);
703
704 return true;
705 }
706
707 static union state_key
make_state_key(struct gl_context * ctx,int mode)708 make_state_key(struct gl_context *ctx, int mode)
709 {
710 union state_key state = {0};
711
712 switch (mode) {
713 case GL_POINTS:
714 state.primitive = HW_SELECT_PRIM_POINTS;
715 break;
716 case GL_LINES:
717 case GL_LINE_STRIP:
718 case GL_LINE_LOOP:
719 state.primitive = HW_SELECT_PRIM_LINES;
720 break;
721 case GL_QUADS:
722 state.primitive = HW_SELECT_PRIM_QUADS;
723 break;
724 case GL_TRIANGLES:
725 case GL_TRIANGLE_STRIP:
726 case GL_TRIANGLE_FAN:
727 /* These will be broken into triangles. */
728 case GL_QUAD_STRIP:
729 case GL_POLYGON:
730 state.primitive = HW_SELECT_PRIM_TRIANGLES;
731 break;
732 default:
733 fprintf(stderr, "HW GL_SELECT does not support draw mode %s\n",
734 _mesa_enum_to_string(mode));
735 return (union state_key){0};
736 }
737
738 /* TODO: support gl_ClipDistance/gl_CullDistance, but it costs more regs */
739 struct gl_program *vp = ctx->st->vp;
740 if (vp->info.clip_distance_array_size || vp->info.cull_distance_array_size) {
741 fprintf(stderr, "HW GL_SELECT does not support gl_ClipDistance/gl_CullDistance\n");
742 return (union state_key){0};
743 }
744
745 state.num_user_clip_planes = util_bitcount(ctx->Transform.ClipPlanesEnabled);
746
747 /* face culling only apply to 2D primitives */
748 if (state.primitive == HW_SELECT_PRIM_QUADS ||
749 state.primitive == HW_SELECT_PRIM_TRIANGLES)
750 state.face_culling_enabled = ctx->Polygon.CullFlag;
751
752 state.result_offset_from_attribute =
753 ctx->VertexProgram._VPMode == VP_MODE_FF &&
754 (ctx->VertexProgram._VaryingInputs & VERT_BIT_SELECT_RESULT_OFFSET);
755
756 return state;
757 }
758
759 bool
st_draw_hw_select_prepare_mode(struct gl_context * ctx,struct pipe_draw_info * info)760 st_draw_hw_select_prepare_mode(struct gl_context *ctx, struct pipe_draw_info *info)
761 {
762 union state_key key = make_state_key(ctx, info->mode);
763 if (!key.u32)
764 return false;
765
766 struct st_context *st = st_context(ctx);
767 if (!st->hw_select_shaders)
768 st->hw_select_shaders = _mesa_hash_table_create_u32_keys(NULL);
769
770 struct hash_entry *he = _mesa_hash_table_search(st->hw_select_shaders,
771 (void*)(uintptr_t)key.u32);
772 void *gs;
773 if (!he) {
774 gs = hw_select_create_gs(st, key);
775 if (!gs)
776 return false;
777
778 _mesa_hash_table_insert(st->hw_select_shaders, (void*)(uintptr_t)key.u32, gs);
779 } else
780 gs = he->data;
781
782 struct cso_context *cso = st->cso_context;
783 cso_set_geometry_shader_handle(cso, gs);
784
785 /* Replace draw mode with equivalent one which geometry shader support.
786 *
787 * New mode consume same vertex buffer structure and produce primitive with
788 * same vertices (no need to be same type of primitive, because geometry shader
789 * operate on vertives and emit nothing).
790 *
791 * We can break QUAD and POLYGON to triangles with same shape. But we can't futher
792 * break them into single line or point because new primitive need to contain >=3
793 * vertices so that it's still handled in 2D (planar) way instead of 1D (line) or
794 * 0D (point) way which have different algorithm.
795 */
796 switch (info->mode) {
797 case GL_QUADS:
798 info->mode = GL_LINES_ADJACENCY;
799 break;
800 case GL_QUAD_STRIP:
801 info->mode = GL_TRIANGLE_STRIP;
802 break;
803 case GL_POLYGON:
804 info->mode = GL_TRIANGLE_FAN;
805 break;
806 default:
807 break;
808 }
809
810 /* Only normal glBegin/End draws pass result offset by attribute to avoid flush
811 * vertices when change name stack, so multiple glBegin/End sections before/after
812 * name stack calls can be merged to a single draw call. To achieve this We mark
813 * name stack result buffer used in glEnd instead of the last draw call.
814 *
815 * Other case like glDrawArrays and display list replay won't merge draws cross
816 * name stack calls, so we just mark name stack result buffer used here.
817 */
818 if (!key.result_offset_from_attribute)
819 ctx->Select.ResultUsed = GL_TRUE;
820
821 return true;
822 }
823