• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2022 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included
13  * in all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21  * OTHER DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "main/enums.h"
25 #include "main/context.h"
26 
27 #include "st_context.h"
28 #include "st_nir.h"
29 #include "st_draw.h"
30 
31 #include "nir.h"
32 #include "nir_builtin_builder.h"
33 
34 #include "u_memory.h"
35 
36 union state_key {
37    struct {
38       unsigned num_user_clip_planes:4;
39       unsigned face_culling_enabled:1;
40       unsigned result_offset_from_attribute:1;
41       unsigned primitive:4;
42    };
43    uint32_t u32;
44 };
45 
46 enum primitive_state {
47    HW_SELECT_PRIM_NONE,
48    HW_SELECT_PRIM_POINTS,
49    HW_SELECT_PRIM_LINES,
50    HW_SELECT_PRIM_TRIANGLES,
51    HW_SELECT_PRIM_QUADS,
52 };
53 
54 struct geometry_constant {
55    float depth_scale;
56    float depth_transport;
57    uint32_t culling_config;
58    uint32_t result_offset;
59    float clip_planes[MAX_CLIP_PLANES][4];
60 };
61 
62 #define set_uniform_location(var, field, packed)                 \
63    do {                                                          \
64       unsigned offset = Offset(struct geometry_constant, field); \
65       var->data.driver_location = offset >> (packed ? 2 : 4);    \
66       var->data.location_frac = (offset >> 2) & 0x3;             \
67    } while (0)
68 
69 static nir_ssa_def *
has_nan_or_inf(nir_builder * b,nir_ssa_def * v)70 has_nan_or_inf(nir_builder *b, nir_ssa_def *v)
71 {
72    nir_ssa_def *nan = nir_bany_fnequal4(b, v, v);
73 
74    nir_ssa_def *imm = nir_imm_float(b, INFINITY);
75    nir_ssa_def *inf = nir_bany(b, nir_feq(b, nir_fabs(b, v), imm));
76 
77    return nir_ior(b, nan, inf);
78 }
79 
80 static void
return_if_true(nir_builder * b,nir_ssa_def * cond)81 return_if_true(nir_builder *b, nir_ssa_def *cond)
82 {
83    nir_if *if_cond = nir_push_if(b, cond);
84    nir_jump(b, nir_jump_return);
85    nir_pop_if(b, if_cond);
86 }
87 
88 static void
get_input_vertices(nir_builder * b,nir_ssa_def ** v)89 get_input_vertices(nir_builder *b, nir_ssa_def **v)
90 {
91    const int num_in_vert = b->shader->info.gs.vertices_in;
92 
93    nir_variable *in_pos = nir_variable_create(
94       b->shader, nir_var_shader_in, glsl_array_type(glsl_vec4_type(), num_in_vert, 0),
95       "gl_Position");
96    in_pos->data.location = VARYING_SLOT_POS;
97 
98    nir_ssa_def *is_nan_or_inf = NULL;
99    for (int i = 0; i < num_in_vert; i++) {
100       v[i] = nir_load_array_var_imm(b, in_pos, i);
101       nir_ssa_def *r = has_nan_or_inf(b, v[i]);
102       is_nan_or_inf = i ? nir_ior(b, is_nan_or_inf, r) : r;
103    }
104    return_if_true(b, is_nan_or_inf);
105 }
106 
107 static void
face_culling(nir_builder * b,nir_ssa_def ** v,bool packed)108 face_culling(nir_builder *b, nir_ssa_def **v, bool packed)
109 {
110    /* use the z value of the face normal to determine if the face points to us:
111     *   Nz = (x1 - x0) * (y2 - y0) - (y1 - y0) * (x2 - x0)
112     *
113     * it should be in NDC (Normalized Device Coordinate), but now we are in clip
114     * space (Vd = Vc / Vc.w), so multiply Nz with w0*w1*w2 to get the clip space
115     * value:
116     *   det = x0 * (y1 * w2 - y2 * w1) +
117     *         x1 * (y2 * w0 - y0 * w2) +
118     *         x2 * (y0 * w1 - y1 * w0)
119     *
120     * we only care about the sign of the det, but also need to count the sign of
121     * w0/w1/w2 as a negtive w would change the direction of Nz < 0
122     */
123    nir_ssa_def *y1w2 = nir_fmul(b, nir_channel(b, v[1], 1), nir_channel(b, v[2], 3));
124    nir_ssa_def *y2w1 = nir_fmul(b, nir_channel(b, v[2], 1), nir_channel(b, v[1], 3));
125    nir_ssa_def *y2w0 = nir_fmul(b, nir_channel(b, v[2], 1), nir_channel(b, v[0], 3));
126    nir_ssa_def *y0w2 = nir_fmul(b, nir_channel(b, v[0], 1), nir_channel(b, v[2], 3));
127    nir_ssa_def *y0w1 = nir_fmul(b, nir_channel(b, v[0], 1), nir_channel(b, v[1], 3));
128    nir_ssa_def *y1w0 = nir_fmul(b, nir_channel(b, v[1], 1), nir_channel(b, v[0], 3));
129    nir_ssa_def *t0 = nir_fmul(b, nir_channel(b, v[0], 0), nir_fsub(b, y1w2, y2w1));
130    nir_ssa_def *t1 = nir_fmul(b, nir_channel(b, v[1], 0), nir_fsub(b, y2w0, y0w2));
131    nir_ssa_def *t2 = nir_fmul(b, nir_channel(b, v[2], 0), nir_fsub(b, y0w1, y1w0));
132    nir_ssa_def *det = nir_fadd(b, nir_fadd(b, t0, t1), t2);
133 
134    /* invert det sign once any vertex w < 0 */
135    nir_ssa_def *n0 = nir_flt(b, nir_channel(b, v[0], 3), nir_imm_float(b, 0));
136    nir_ssa_def *n1 = nir_flt(b, nir_channel(b, v[1], 3), nir_imm_float(b, 0));
137    nir_ssa_def *n2 = nir_flt(b, nir_channel(b, v[2], 3), nir_imm_float(b, 0));
138    nir_ssa_def *cond = nir_ixor(b, nir_ixor(b, n0, n1), n2);
139    det = nir_bcsel(b, cond, nir_fneg(b, det), det);
140 
141    nir_variable *culling_config = nir_variable_create(
142       b->shader, nir_var_uniform, glsl_uint_type(), "culling_config");
143    set_uniform_location(culling_config, culling_config, packed);
144    nir_ssa_def *config = nir_i2b(b, nir_load_var(b, culling_config));
145 
146    /* det < 0 then z points to camera */
147    nir_ssa_def *zero = nir_imm_zero(b, 1, det->bit_size);
148    nir_ssa_def *is_zero = nir_feq(b, det, zero);
149    nir_ssa_def *is_neg = nir_flt(b, det, zero);
150    nir_ssa_def *cull = nir_ixor(b, is_neg, config);
151    return_if_true(b, nir_ior(b, is_zero, cull));
152 }
153 
154 static void
fast_frustum_culling(nir_builder * b,nir_ssa_def ** v)155 fast_frustum_culling(nir_builder *b, nir_ssa_def **v)
156 {
157    nir_ssa_def *cull = NULL;
158 
159    /* there are six culling planes for the visible volume:
160     *   1.  x + w = 0
161     *   2. -x + w = 0
162     *   3.  y + w = 0
163     *   4. -y + w = 0
164     *   5.  z + w = 0
165     *   6. -z + w = 0
166     *
167     * if all vertices of the primitive are outside (plane equation <0) of
168     * any plane, the primitive must be invisible.
169     */
170    for (int i = 0; i < 6; i++) {
171       nir_ssa_def *outside = NULL;
172 
173       for (int j = 0; j < b->shader->info.gs.vertices_in; j++) {
174          nir_ssa_def *c = nir_channel(b, v[j], i >> 1);
175          if (i & 1)
176             c = nir_fneg(b, c);
177 
178          nir_ssa_def *r = nir_flt(b, nir_channel(b, v[j], 3), c);
179          outside = j ? nir_iand(b, outside, r) : r;
180       }
181 
182       cull = i ? nir_ior(b, cull, outside) : outside;
183    }
184 
185    return_if_true(b, cull);
186 }
187 
188 static nir_ssa_def *
get_intersection(nir_builder * b,nir_ssa_def * v1,nir_ssa_def * v2,nir_ssa_def * d1,nir_ssa_def * d2)189 get_intersection(nir_builder *b, nir_ssa_def *v1, nir_ssa_def *v2,
190                  nir_ssa_def *d1, nir_ssa_def *d2)
191 {
192    nir_ssa_def *factor = nir_fdiv(b, d1, nir_fsub(b, d1, d2));
193    return nir_fmad(b, nir_fsub(b, v2, v1), factor, v1);
194 }
195 
196 #define begin_for_loop(name, max)                                       \
197    nir_variable *name##_index =                                         \
198       nir_local_variable_create(b->impl, glsl_int_type(), #name "_i");  \
199    nir_store_var(b, name##_index, nir_imm_int(b, 0), 1);                \
200                                                                         \
201    nir_loop *name = nir_push_loop(b);                                   \
202    {                                                                    \
203       nir_ssa_def *idx = nir_load_var(b, name##_index);                 \
204       nir_if *if_in_loop = nir_push_if(b, nir_ilt(b, idx, max));
205 
206 #define end_for_loop(name)                                              \
207          nir_store_var(b, name##_index, nir_iadd_imm(b, idx, 1), 1);    \
208       nir_push_else(b, if_in_loop);                                     \
209          nir_jump(b, nir_jump_break);                                   \
210       nir_pop_if(b, if_in_loop);                                        \
211    }                                                                    \
212    nir_pop_loop(b, name);
213 
214 static void
clip_with_plane(nir_builder * b,nir_variable * vert,nir_variable * num_vert,int max_vert,nir_ssa_def * plane)215 clip_with_plane(nir_builder *b, nir_variable *vert, nir_variable *num_vert,
216                 int max_vert, nir_ssa_def *plane)
217 {
218    nir_variable *all_clipped = nir_local_variable_create(
219       b->impl, glsl_bool_type(), "all_clipped");
220    nir_store_var(b, all_clipped, nir_imm_true(b), 1);
221 
222    nir_variable *dist = nir_local_variable_create(
223       b->impl, glsl_array_type(glsl_float_type(), max_vert, 0), "dist");
224 
225    nir_ssa_def *num = nir_load_var(b, num_vert);
226    begin_for_loop(dist_loop, num)
227    {
228       nir_ssa_def *v = nir_load_array_var(b, vert, idx);
229       nir_ssa_def *d = nir_fdot(b, v, plane);
230       nir_store_array_var(b, dist, idx, d, 1);
231 
232       nir_ssa_def *clipped = nir_flt(b, d, nir_imm_float(b, 0));
233       nir_store_var(b, all_clipped,
234                     nir_iand(b, nir_load_var(b, all_clipped), clipped), 1);
235    }
236    end_for_loop(dist_loop)
237 
238    return_if_true(b, nir_load_var(b, all_clipped));
239 
240    /* Use +/0/- to denote the dist[i] sign, which means:
241     * +: inside plane
242     * -: outside plane
243     * 0: just on the plane
244     *
245     * Some example:
246     * ++++: all vertex not clipped
247     * ----: all vertex clipped
248     * +-++: one vertex clipped, need to insert two vertex at '-', array grow
249     * +--+: two vertex clipped, need to insert two vertex at '--', array same
250     * +---: three vertex clipped, need to insert two vertex at '---', array trim
251     * +-0+: one vertex clipped, need to insert one vertex at '-', array same
252     *
253     * Plane clip only produce convex polygon, so '-' must be contigous, there's
254     * no '+-+-', so one clip plane can only grow array by 1.
255     */
256 
257    /* when array grow or '-' has been replaced with inserted vertex, save the
258     * original vert to be used by following calculation.
259     */
260    nir_variable *saved =
261       nir_local_variable_create(b->impl, glsl_vec4_type(), "saved");
262 
263    nir_variable *vert_index =
264       nir_local_variable_create(b->impl, glsl_int_type(), "vert_index");
265    nir_store_var(b, vert_index, nir_imm_int(b, 0), 1);
266 
267    begin_for_loop(vert_loop, num)
268    {
269       nir_ssa_def *di = nir_load_array_var(b, dist, idx);
270       nir_if *if_clipped = nir_push_if(b, nir_flt(b, di, nir_imm_float(b, 0)));
271       {
272          /* - case, we need to take care of sign change and insert vertex */
273 
274          nir_ssa_def *prev = nir_bcsel(b, nir_ieq_imm(b, idx, 0),
275                                        nir_iadd_imm(b, num, -1),
276                                        nir_iadd_imm(b, idx, -1));
277          nir_ssa_def *dp = nir_load_array_var(b, dist, prev);
278          nir_if *prev_if = nir_push_if(b, nir_flt(b, nir_imm_float(b, 0), dp));
279          {
280             /* +- case, replace - with inserted vertex
281              * assert(vert_index <= idx), array is sure to not grow here
282              * but need to save vert[idx] when vert_index==idx
283              */
284 
285             nir_ssa_def *vi = nir_load_array_var(b, vert, idx);
286             nir_store_var(b, saved, vi, 0xf);
287 
288             nir_ssa_def *vp = nir_load_array_var(b, vert, prev);
289             nir_ssa_def *iv = get_intersection(b, vp, vi, dp, di);
290             nir_ssa_def *index = nir_load_var(b, vert_index);
291             nir_store_array_var(b, vert, index, iv, 0xf);
292 
293             nir_store_var(b, vert_index, nir_iadd_imm(b, index, 1), 1);
294          }
295          nir_pop_if(b, prev_if);
296 
297          nir_ssa_def *next = nir_bcsel(b, nir_ieq(b, idx, nir_iadd_imm(b, num, -1)),
298                                        nir_imm_int(b, 0), nir_iadd_imm(b, idx, 1));
299          nir_ssa_def *dn = nir_load_array_var(b, dist, next);
300          nir_if *next_if = nir_push_if(b, nir_flt(b, nir_imm_float(b, 0), dn));
301          {
302             /* -+ case, may grow array:
303              *   vert_index > idx: +-+ case, grow array, current vertex in 'saved',
304              *     save next + to 'saved', will replace it with inserted vertex.
305              *   vert_index <= idx: --+ case, will replace last - with inserted vertex,
306              *     no need to save last -, because + case won't use - value.
307              */
308 
309             nir_ssa_def *index = nir_load_var(b, vert_index);
310             nir_ssa_def *vi = nir_bcsel(b, nir_flt(b, idx, index),
311                                         nir_load_var(b, saved),
312                                         nir_load_array_var(b, vert, idx));
313             nir_ssa_def *vn = nir_load_array_var(b, vert, next);
314             nir_ssa_def *iv = get_intersection(b, vn, vi, dn, di);
315 
316             nir_store_var(b, saved, nir_load_array_var(b, vert, index), 0xf);
317             nir_store_array_var(b, vert, index, iv, 0xf);
318 
319             nir_store_var(b, vert_index, nir_iadd_imm(b, index, 1), 1);
320          }
321          nir_pop_if(b, next_if);
322       }
323       nir_push_else(b, if_clipped);
324       {
325          /* +/0 case, just keep the vert
326           *   vert_index > idx: array grew case, vert[idx] is inserted vertex or prev
327           *     +/0 vertex, current vertex is in 'saved', need to save next vertex
328           *   vert_index < idx: array trim case
329           */
330 
331          nir_ssa_def *index = nir_load_var(b, vert_index);
332          nir_ssa_def *vi = nir_bcsel(b, nir_flt(b, idx, index),
333                                      nir_load_var(b, saved),
334                                      nir_load_array_var(b, vert, idx));
335 
336          nir_store_var(b, saved, nir_load_array_var(b, vert, index), 0xf);
337          nir_store_array_var(b, vert, index, vi, 0xf);
338 
339          nir_store_var(b, vert_index, nir_iadd_imm(b, index, 1), 1);
340       }
341       nir_pop_if(b, if_clipped);
342    }
343    end_for_loop(vert_loop);
344 
345    nir_copy_var(b, num_vert, vert_index);
346 }
347 
348 static nir_ssa_def *
get_user_clip_plane(nir_builder * b,int index,bool packed)349 get_user_clip_plane(nir_builder *b, int index, bool packed)
350 {
351    char name[16];
352    snprintf(name, sizeof(name), "gl_ClipPlane%d", index);
353    nir_variable *plane = nir_variable_create(
354       b->shader, nir_var_uniform, glsl_vec4_type(), name);
355 
356    set_uniform_location(plane, clip_planes[index][0], packed);
357 
358    return nir_load_var(b, plane);
359 }
360 
361 static void
get_depth_range_transform(nir_builder * b,bool packed,nir_ssa_def ** trans)362 get_depth_range_transform(nir_builder *b, bool packed, nir_ssa_def **trans)
363 {
364    nir_variable *depth_scale = nir_variable_create(
365       b->shader, nir_var_uniform, glsl_float_type(), "depth_scale");
366    set_uniform_location(depth_scale, depth_scale, packed);
367 
368    nir_variable *depth_transport = nir_variable_create(
369       b->shader, nir_var_uniform, glsl_float_type(), "depth_transport");
370    set_uniform_location(depth_transport, depth_transport, packed);
371 
372    trans[0] = nir_load_var(b, depth_scale);
373    trans[1] = nir_load_var(b, depth_transport);
374 }
375 
376 static nir_ssa_def *
get_window_space_depth(nir_builder * b,nir_ssa_def * v,nir_ssa_def ** trans)377 get_window_space_depth(nir_builder *b, nir_ssa_def *v, nir_ssa_def **trans)
378 {
379    nir_ssa_def *z = nir_channel(b, v, 2);
380    nir_ssa_def *w = nir_channel(b, v, 3);
381 
382    /* do perspective division, if w==0, xyz must be 0 too (otherwise can't pass
383     * the clip test), 0/0=NaN, but we want it to be the nearest point.
384     */
385    nir_ssa_def *c = nir_feq(b, w, nir_imm_float(b, 0));
386    nir_ssa_def *d = nir_bcsel(b, c, nir_imm_float(b, -1), nir_fdiv(b, z, w));
387 
388    /* map [-1, 1] to [near, far] set by glDepthRange(near, far) */
389    return nir_fmad(b, trans[0], d, trans[1]);
390 }
391 
392 static void
update_result_buffer(nir_builder * b,nir_ssa_def * dmin,nir_ssa_def * dmax,bool offset_from_attribute,bool packed)393 update_result_buffer(nir_builder *b, nir_ssa_def *dmin, nir_ssa_def *dmax,
394                      bool offset_from_attribute, bool packed)
395 {
396    nir_ssa_def *offset;
397    if (offset_from_attribute) {
398       nir_variable *in_offset = nir_variable_create(
399          b->shader, nir_var_shader_in,
400          glsl_array_type(glsl_uint_type(), b->shader->info.gs.vertices_in, 0),
401          "result_offset");
402       in_offset->data.location = VARYING_SLOT_VAR0;
403       offset = nir_load_array_var_imm(b, in_offset, 0);
404    } else {
405       nir_variable *uni_offset = nir_variable_create(
406          b->shader, nir_var_uniform, glsl_uint_type(), "result_offset");
407       set_uniform_location(uni_offset, result_offset, packed);
408       offset = nir_load_var(b, uni_offset);
409    }
410 
411    nir_variable_create(b->shader, nir_var_mem_ssbo,
412                        glsl_array_type(glsl_uint_type(), 0, 0), "result");
413    /* driver_location = 0 (slot 0) */
414 
415    nir_ssa_def *ssbo = nir_imm_int(b, 0);
416    nir_ssbo_atomic_exchange(b, 32, ssbo, offset, nir_imm_int(b, 1));
417    nir_ssbo_atomic_umin(b, 32, ssbo, nir_iadd_imm(b, offset, 4), dmin);
418    nir_ssbo_atomic_umax(b, 32, ssbo, nir_iadd_imm(b, offset, 8), dmax);
419 }
420 
421 static void
build_point_nir_shader(nir_builder * b,union state_key state,bool packed)422 build_point_nir_shader(nir_builder *b, union state_key state, bool packed)
423 {
424    assert(b->shader->info.gs.vertices_in == 1);
425 
426    nir_ssa_def *v;
427    get_input_vertices(b, &v);
428 
429    fast_frustum_culling(b, &v);
430 
431    nir_ssa_def *outside = NULL;
432    for (int i = 0; i < state.num_user_clip_planes; i++) {
433       nir_ssa_def *p = get_user_clip_plane(b, i, packed);
434       nir_ssa_def *d = nir_fdot(b, v, p);
435       nir_ssa_def *r = nir_flt(b, d, nir_imm_float(b, 0));
436       outside = i ? nir_ior(b, outside, r) : r;
437    }
438    if (outside)
439       return_if_true(b, outside);
440 
441    nir_ssa_def *trans[2];
442    get_depth_range_transform(b, packed, trans);
443 
444    nir_ssa_def *depth = get_window_space_depth(b, v, trans);
445    nir_ssa_def *fdepth = nir_fmul_imm(b, depth, 4294967295.0);
446    nir_ssa_def *idepth = nir_f2uN(b, fdepth, 32);
447 
448    update_result_buffer(b, idepth, idepth, state.result_offset_from_attribute, packed);
449 }
450 
451 static nir_variable *
create_clip_planes(nir_builder * b,int num_clip_planes,bool packed)452 create_clip_planes(nir_builder *b, int num_clip_planes, bool packed)
453 {
454    nir_variable *clip_planes = nir_local_variable_create(
455       b->impl, glsl_array_type(glsl_vec4_type(), num_clip_planes, 0), "clip_planes");
456 
457    nir_ssa_def *unit_clip_planes[6] = {
458       nir_imm_vec4(b,  1,  0,  0,  1),
459       nir_imm_vec4(b, -1,  0,  0,  1),
460       nir_imm_vec4(b,  0,  1,  0,  1),
461       nir_imm_vec4(b,  0, -1,  0,  1),
462       nir_imm_vec4(b,  0,  0,  1,  1),
463       nir_imm_vec4(b,  0,  0, -1,  1),
464    };
465    for (int i = 0; i < 6; i++)
466       nir_store_array_var_imm(b, clip_planes, i, unit_clip_planes[i], 0xf);
467 
468    for (int i = 6; i < num_clip_planes; i++) {
469       nir_ssa_def *p = get_user_clip_plane(b, i - 6, packed);
470       nir_store_array_var_imm(b, clip_planes, i, p, 0xf);
471    }
472 
473    return clip_planes;
474 }
475 
476 static void
build_line_nir_shader(nir_builder * b,union state_key state,bool packed)477 build_line_nir_shader(nir_builder *b, union state_key state, bool packed)
478 {
479    assert(b->shader->info.gs.vertices_in == 2);
480 
481    nir_ssa_def *v[2];
482    get_input_vertices(b, v);
483 
484    fast_frustum_culling(b, v);
485 
486    nir_variable *vert0 = nir_local_variable_create(b->impl, glsl_vec4_type(), "vert0");
487    nir_store_var(b, vert0, v[0], 0xf);
488 
489    nir_variable *vert1 = nir_local_variable_create(b->impl, glsl_vec4_type(), "vert1");
490    nir_store_var(b, vert1, v[1], 0xf);
491 
492    const int num_clip_planes = 6 + state.num_user_clip_planes;
493    nir_variable *clip_planes = create_clip_planes(b, num_clip_planes, packed);
494 
495    begin_for_loop(clip_loop, nir_imm_int(b, num_clip_planes))
496    {
497       nir_ssa_def *plane = nir_load_array_var(b, clip_planes, idx);
498       nir_ssa_def *v0 = nir_load_var(b, vert0);
499       nir_ssa_def *v1 = nir_load_var(b, vert1);
500       nir_ssa_def *d0 = nir_fdot(b, v0, plane);
501       nir_ssa_def *d1 = nir_fdot(b, v1, plane);
502       nir_ssa_def *n0 = nir_flt(b, d0, nir_imm_float(b, 0));
503       nir_ssa_def *n1 = nir_flt(b, d1, nir_imm_float(b, 0));
504 
505       return_if_true(b, nir_iand(b, n0, n1));
506 
507       nir_if *clip_if = nir_push_if(b, nir_ior(b, n0, n1));
508       {
509          nir_ssa_def *iv = get_intersection(b, v0, v1, d0, d1);
510          nir_store_var(b, vert0, nir_bcsel(b, n0, iv, v0), 0xf);
511          nir_store_var(b, vert1, nir_bcsel(b, n1, iv, v1), 0xf);
512       }
513       nir_pop_if(b, clip_if);
514    }
515    end_for_loop(clip_loop)
516 
517    nir_ssa_def *trans[2];
518    get_depth_range_transform(b, packed, trans);
519 
520    nir_ssa_def *d0 = get_window_space_depth(b, nir_load_var(b, vert0), trans);
521    nir_ssa_def *d1 = get_window_space_depth(b, nir_load_var(b, vert1), trans);
522 
523    nir_ssa_def *dmin = nir_fmin(b, d0, d1);
524    nir_ssa_def *dmax = nir_fmax(b, d0, d1);
525 
526    nir_ssa_def *fdmin = nir_fmul_imm(b, dmin, 4294967295.0);
527    nir_ssa_def *idmin = nir_f2uN(b, fdmin, 32);
528 
529    nir_ssa_def *fdmax = nir_fmul_imm(b, dmax, 4294967295.0);
530    nir_ssa_def *idmax = nir_f2uN(b, fdmax, 32);
531 
532    update_result_buffer(b, idmin, idmax, state.result_offset_from_attribute, packed);
533 }
534 
535 static void
build_planar_primitive_nir_shader(nir_builder * b,union state_key state,bool packed)536 build_planar_primitive_nir_shader(nir_builder *b, union state_key state, bool packed)
537 {
538    const int num_in_vert = b->shader->info.gs.vertices_in;
539    assert(num_in_vert == 3 || num_in_vert == 4);
540 
541    nir_ssa_def *v[4];
542    get_input_vertices(b, v);
543 
544    if (state.face_culling_enabled)
545       face_culling(b, v, packed);
546 
547    /* fast frustum culling, this should filter out most primitives */
548    fast_frustum_culling(b, v);
549 
550    const int num_clip_planes = 6 + state.num_user_clip_planes;
551    const int max_vert = num_in_vert + num_clip_planes;
552 
553    /* TODO: could use shared memory (ie. AMD GPU LDS) for this array
554     * to reduce register usage.
555     */
556    nir_variable *vert = nir_local_variable_create(
557       b->impl, glsl_array_type(glsl_vec4_type(), max_vert, 0), "vert");
558    for (int i = 0; i < num_in_vert; i++)
559       nir_store_array_var_imm(b, vert, i, v[i], 0xf);
560 
561    nir_variable *num_vert =
562       nir_local_variable_create(b->impl, glsl_int_type(), "num_vert");
563    nir_store_var(b, num_vert, nir_imm_int(b, num_in_vert), 1);
564 
565    nir_variable *clip_planes = create_clip_planes(b, num_clip_planes, packed);
566 
567    /* accurate clipping with all clip planes */
568    begin_for_loop(clip_loop, nir_imm_int(b, num_clip_planes))
569    {
570       nir_ssa_def *plane = nir_load_array_var(b, clip_planes, idx);
571       clip_with_plane(b, vert, num_vert, max_vert, plane);
572    }
573    end_for_loop(clip_loop)
574 
575    nir_ssa_def *trans[2];
576    get_depth_range_transform(b, packed, trans);
577 
578    nir_variable *dmin =
579       nir_local_variable_create(b->impl, glsl_float_type(), "dmin");
580    nir_store_var(b, dmin, nir_imm_float(b, 1), 1);
581 
582    nir_variable *dmax =
583       nir_local_variable_create(b->impl, glsl_float_type(), "dmax");
584    nir_store_var(b, dmax, nir_imm_float(b, 0), 1);
585 
586    begin_for_loop(depth_loop, nir_load_var(b, num_vert))
587    {
588       nir_ssa_def *vtx = nir_load_array_var(b, vert, idx);
589       nir_ssa_def *depth = get_window_space_depth(b, vtx, trans);
590       nir_store_var(b, dmin, nir_fmin(b, nir_load_var(b, dmin), depth), 1);
591       nir_store_var(b, dmax, nir_fmax(b, nir_load_var(b, dmax), depth), 1);
592    }
593    end_for_loop(depth_loop)
594 
595    nir_ssa_def *fdmin = nir_fmul_imm(b, nir_load_var(b, dmin), 4294967295.0);
596    nir_ssa_def *idmin = nir_f2uN(b, fdmin, 32);
597 
598    nir_ssa_def *fdmax = nir_fmul_imm(b, nir_load_var(b, dmax), 4294967295.0);
599    nir_ssa_def *idmax = nir_f2uN(b, fdmax, 32);
600 
601    update_result_buffer(b, idmin, idmax, state.result_offset_from_attribute, packed);
602 }
603 
604 static void *
hw_select_create_gs(struct st_context * st,union state_key state)605 hw_select_create_gs(struct st_context *st, union state_key state)
606 {
607    const nir_shader_compiler_options *options =
608       st_get_nir_compiler_options(st, MESA_SHADER_GEOMETRY);
609 
610    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options,
611                                                   "hw select GS");
612 
613    nir_shader *nir = b.shader;
614    nir->info.inputs_read = VARYING_BIT_POS;
615    nir->num_uniforms = DIV_ROUND_UP(sizeof(struct geometry_constant), (4 * sizeof(float)));
616    nir->info.num_ssbos = 1;
617    nir->info.gs.output_primitive = SHADER_PRIM_POINTS;
618    nir->info.gs.vertices_out = 1;
619    nir->info.gs.invocations = 1;
620    nir->info.gs.active_stream_mask = 1;
621 
622    if (state.result_offset_from_attribute)
623       nir->info.inputs_read |= VARYING_BIT_VAR(0);
624 
625    bool packed = st->ctx->Const.PackedDriverUniformStorage;
626 
627    switch (state.primitive) {
628    case HW_SELECT_PRIM_POINTS:
629       nir->info.gs.input_primitive = SHADER_PRIM_POINTS;
630       nir->info.gs.vertices_in = 1;
631       build_point_nir_shader(&b, state, packed);
632       break;
633    case HW_SELECT_PRIM_LINES:
634       nir->info.gs.input_primitive = SHADER_PRIM_LINES;
635       nir->info.gs.vertices_in = 2;
636       build_line_nir_shader(&b, state, packed);
637       break;
638    case HW_SELECT_PRIM_TRIANGLES:
639       nir->info.gs.input_primitive = SHADER_PRIM_TRIANGLES;
640       nir->info.gs.vertices_in = 3;
641       build_planar_primitive_nir_shader(&b, state, packed);
642       break;
643    case HW_SELECT_PRIM_QUADS:
644       /* geometry shader has no quad primitive, use lines_adjacency instead */
645       nir->info.gs.input_primitive = SHADER_PRIM_LINES_ADJACENCY;
646       nir->info.gs.vertices_in = 4;
647       build_planar_primitive_nir_shader(&b, state, packed);
648       break;
649    default:
650       unreachable("unexpected primitive");
651    }
652 
653    nir_lower_returns(nir);
654 
655    return st_nir_finish_builtin_shader(st, nir);
656 }
657 
658 bool
st_draw_hw_select_prepare_common(struct gl_context * ctx)659 st_draw_hw_select_prepare_common(struct gl_context *ctx)
660 {
661    struct st_context *st = st_context(ctx);
662    if (st->gp || st->tcp || st->tep) {
663       fprintf(stderr, "HW GL_SELECT does not support user geometry/tessellation shader\n");
664       return false;
665    }
666 
667    struct geometry_constant consts;
668 
669    float n = ctx->ViewportArray[0].Near;
670    float f = ctx->ViewportArray[0].Far;
671    consts.depth_scale = (f - n) / 2;
672    consts.depth_transport = (f + n) / 2;
673 
674    /* this field is not used when face culling disabled */
675    consts.culling_config =
676       (ctx->Polygon.CullFaceMode == GL_BACK) ^
677       (ctx->Polygon.FrontFace == GL_CCW);
678 
679    /* this field is not used when passing result offset by attribute */
680    consts.result_offset = st->ctx->Select.ResultOffset;
681 
682    int num_planes = 0;
683    u_foreach_bit(i, ctx->Transform.ClipPlanesEnabled) {
684       COPY_4V(consts.clip_planes[num_planes], ctx->Transform._ClipUserPlane[i]);
685       num_planes++;
686    }
687 
688    struct pipe_constant_buffer cb;
689    cb.buffer = NULL;
690    cb.user_buffer = &consts;
691    cb.buffer_offset = 0;
692    cb.buffer_size = sizeof(consts) - (MAX_CLIP_PLANES - num_planes) * 4 * sizeof(float);
693 
694    struct pipe_context *pipe = st->pipe;
695    pipe->set_constant_buffer(pipe, PIPE_SHADER_GEOMETRY, 0, false, &cb);
696 
697    struct pipe_shader_buffer buffer;
698    memset(&buffer, 0, sizeof(buffer));
699    buffer.buffer = ctx->Select.Result->buffer;
700    buffer.buffer_size = MAX_NAME_STACK_RESULT_NUM * 3 * sizeof(int);
701 
702    pipe->set_shader_buffers(pipe, PIPE_SHADER_GEOMETRY, 0, 1, &buffer, 0x1);
703 
704    return true;
705 }
706 
707 static union state_key
make_state_key(struct gl_context * ctx,int mode)708 make_state_key(struct gl_context *ctx, int mode)
709 {
710    union state_key state = {0};
711 
712    switch (mode) {
713    case GL_POINTS:
714       state.primitive = HW_SELECT_PRIM_POINTS;
715       break;
716    case GL_LINES:
717    case GL_LINE_STRIP:
718    case GL_LINE_LOOP:
719       state.primitive = HW_SELECT_PRIM_LINES;
720       break;
721    case GL_QUADS:
722       state.primitive = HW_SELECT_PRIM_QUADS;
723       break;
724    case GL_TRIANGLES:
725    case GL_TRIANGLE_STRIP:
726    case GL_TRIANGLE_FAN:
727       /* These will be broken into triangles. */
728    case GL_QUAD_STRIP:
729    case GL_POLYGON:
730       state.primitive = HW_SELECT_PRIM_TRIANGLES;
731       break;
732    default:
733       fprintf(stderr, "HW GL_SELECT does not support draw mode %s\n",
734               _mesa_enum_to_string(mode));
735       return (union state_key){0};
736    }
737 
738    /* TODO: support gl_ClipDistance/gl_CullDistance, but it costs more regs */
739    struct gl_program *vp = ctx->st->vp;
740    if (vp->info.clip_distance_array_size || vp->info.cull_distance_array_size) {
741       fprintf(stderr, "HW GL_SELECT does not support gl_ClipDistance/gl_CullDistance\n");
742       return (union state_key){0};
743    }
744 
745    state.num_user_clip_planes = util_bitcount(ctx->Transform.ClipPlanesEnabled);
746 
747    /* face culling only apply to 2D primitives */
748    if (state.primitive == HW_SELECT_PRIM_QUADS ||
749        state.primitive == HW_SELECT_PRIM_TRIANGLES)
750       state.face_culling_enabled = ctx->Polygon.CullFlag;
751 
752    state.result_offset_from_attribute =
753       ctx->VertexProgram._VPMode == VP_MODE_FF &&
754       (ctx->VertexProgram._VaryingInputs & VERT_BIT_SELECT_RESULT_OFFSET);
755 
756    return state;
757 }
758 
759 bool
st_draw_hw_select_prepare_mode(struct gl_context * ctx,struct pipe_draw_info * info)760 st_draw_hw_select_prepare_mode(struct gl_context *ctx, struct pipe_draw_info *info)
761 {
762    union state_key key = make_state_key(ctx, info->mode);
763    if (!key.u32)
764       return false;
765 
766    struct st_context *st = st_context(ctx);
767    if (!st->hw_select_shaders)
768       st->hw_select_shaders = _mesa_hash_table_create_u32_keys(NULL);
769 
770    struct hash_entry *he = _mesa_hash_table_search(st->hw_select_shaders,
771                                                    (void*)(uintptr_t)key.u32);
772    void *gs;
773    if (!he) {
774       gs = hw_select_create_gs(st, key);
775       if (!gs)
776          return false;
777 
778       _mesa_hash_table_insert(st->hw_select_shaders, (void*)(uintptr_t)key.u32, gs);
779    } else
780       gs = he->data;
781 
782    struct cso_context *cso = st->cso_context;
783    cso_set_geometry_shader_handle(cso, gs);
784 
785    /* Replace draw mode with equivalent one which geometry shader support.
786     *
787     * New mode consume same vertex buffer structure and produce primitive with
788     * same vertices (no need to be same type of primitive, because geometry shader
789     * operate on vertives and emit nothing).
790     *
791     * We can break QUAD and POLYGON to triangles with same shape. But we can't futher
792     * break them into single line or point because new primitive need to contain >=3
793     * vertices so that it's still handled in 2D (planar) way instead of 1D (line) or
794     * 0D (point) way which have different algorithm.
795     */
796    switch (info->mode) {
797    case GL_QUADS:
798       info->mode = GL_LINES_ADJACENCY;
799       break;
800    case GL_QUAD_STRIP:
801       info->mode = GL_TRIANGLE_STRIP;
802       break;
803    case GL_POLYGON:
804       info->mode = GL_TRIANGLE_FAN;
805       break;
806    default:
807       break;
808    }
809 
810    /* Only normal glBegin/End draws pass result offset by attribute to avoid flush
811     * vertices when change name stack, so multiple glBegin/End sections before/after
812     * name stack calls can be merged to a single draw call. To achieve this We mark
813     * name stack result buffer used in glEnd instead of the last draw call.
814     *
815     * Other case like glDrawArrays and display list replay won't merge draws cross
816     * name stack calls, so we just mark name stack result buffer used here.
817     */
818    if (!key.result_offset_from_attribute)
819       ctx->Select.ResultUsed = GL_TRUE;
820 
821    return true;
822 }
823