• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2019 Advanced Micro Devices, Inc.
3  * Copyright 2021 Valve Corporation
4  *
5  * SPDX-License-Identifier: MIT
6  */
7 
8 #include "ac_nir.h"
9 #include "ac_nir_helpers.h"
10 #include "nir_builder.h"
11 
12 /* This code is adapted from ac_llvm_cull.c, hence the copyright to AMD. */
13 
14 typedef struct
15 {
16    nir_def *w_reflection;
17    nir_def *all_w_negative_or_zero_or_nan;
18    nir_def *any_w_negative;
19 } position_w_info;
20 
21 static void
analyze_position_w(nir_builder * b,nir_def * pos[][4],unsigned num_vertices,position_w_info * w_info)22 analyze_position_w(nir_builder *b, nir_def *pos[][4], unsigned num_vertices,
23                    position_w_info *w_info)
24 {
25    w_info->all_w_negative_or_zero_or_nan = nir_imm_true(b);
26    w_info->w_reflection = nir_imm_false(b);
27    w_info->any_w_negative = nir_imm_false(b);
28 
29    for (unsigned i = 0; i < num_vertices; ++i) {
30       nir_def *neg_w = nir_flt_imm(b, pos[i][3], 0.0f);
31       nir_def *neg_or_zero_or_nan_w = nir_fgeu(b, nir_imm_float(b, 0.0f), pos[i][3]);
32 
33       w_info->w_reflection = nir_ixor(b, neg_w, w_info->w_reflection);
34       w_info->any_w_negative = nir_ior(b, neg_w, w_info->any_w_negative);
35       w_info->all_w_negative_or_zero_or_nan = nir_iand(b, neg_or_zero_or_nan_w, w_info->all_w_negative_or_zero_or_nan);
36    }
37 }
38 
39 static nir_def *
cull_face_triangle(nir_builder * b,nir_def * pos[3][4],const position_w_info * w_info)40 cull_face_triangle(nir_builder *b, nir_def *pos[3][4], const position_w_info *w_info)
41 {
42    nir_def *det_t0 = nir_fsub(b, pos[2][0], pos[0][0]);
43    nir_def *det_t1 = nir_fsub(b, pos[1][1], pos[0][1]);
44    nir_def *det_t2 = nir_fsub(b, pos[0][0], pos[1][0]);
45    nir_def *det_t3 = nir_fsub(b, pos[0][1], pos[2][1]);
46    nir_def *det_p0 = nir_fmul(b, det_t0, det_t1);
47    nir_def *det_p1 = nir_fmul(b, det_t2, det_t3);
48    nir_def *det = nir_fsub(b, det_p0, det_p1);
49 
50    det = nir_bcsel(b, w_info->w_reflection, nir_fneg(b, det), det);
51 
52    nir_def *front_facing_ccw = nir_fgt_imm(b, det, 0.0f);
53    nir_def *zero_area = nir_feq_imm(b, det, 0.0f);
54    nir_def *ccw = nir_load_cull_ccw_amd(b);
55    nir_def *front_facing = nir_ieq(b, front_facing_ccw, ccw);
56    nir_def *cull_front = nir_load_cull_front_face_enabled_amd(b);
57    nir_def *cull_back = nir_load_cull_back_face_enabled_amd(b);
58 
59    nir_def *face_culled = nir_bcsel(b, front_facing, cull_front, cull_back);
60    face_culled = nir_ior(b, face_culled, zero_area);
61 
62    /* Don't reject NaN and +/-infinity, these are tricky.
63     * Just trust fixed-function HW to handle these cases correctly.
64     */
65    return nir_iand(b, face_culled, nir_fisfinite(b, det));
66 }
67 
68 static void
calc_bbox_triangle(nir_builder * b,nir_def * pos[3][4],nir_def * bbox_min[2],nir_def * bbox_max[2])69 calc_bbox_triangle(nir_builder *b, nir_def *pos[3][4], nir_def *bbox_min[2], nir_def *bbox_max[2])
70 {
71    for (unsigned chan = 0; chan < 2; ++chan) {
72       bbox_min[chan] = nir_fmin(b, pos[0][chan], nir_fmin(b, pos[1][chan], pos[2][chan]));
73       bbox_max[chan] = nir_fmax(b, pos[0][chan], nir_fmax(b, pos[1][chan], pos[2][chan]));
74    }
75 }
76 
77 static nir_def *
cull_frustrum(nir_builder * b,nir_def * bbox_min[2],nir_def * bbox_max[2])78 cull_frustrum(nir_builder *b, nir_def *bbox_min[2], nir_def *bbox_max[2])
79 {
80    nir_def *prim_outside_view = nir_imm_false(b);
81 
82    for (unsigned chan = 0; chan < 2; ++chan) {
83       prim_outside_view = nir_ior(b, prim_outside_view, nir_flt_imm(b, bbox_max[chan], -1.0f));
84       prim_outside_view = nir_ior(b, prim_outside_view, nir_fgt_imm(b, bbox_min[chan], 1.0f));
85    }
86 
87    return prim_outside_view;
88 }
89 
90 static nir_def *
cull_small_primitive_triangle(nir_builder * b,nir_def * bbox_min[2],nir_def * bbox_max[2])91 cull_small_primitive_triangle(nir_builder *b, nir_def *bbox_min[2], nir_def *bbox_max[2])
92 {
93    nir_def *vp = nir_load_cull_triangle_viewport_xy_scale_and_offset_amd(b);
94    nir_def *small_prim_precision = nir_load_cull_small_triangle_precision_amd(b);
95    nir_def *rejected = nir_imm_false(b);
96 
97    for (unsigned chan = 0; chan < 2; ++chan) {
98       nir_def *vp_scale = nir_channel(b, vp, chan);
99       nir_def *vp_translate = nir_channel(b, vp, 2 + chan);
100 
101       /* Convert the position to screen-space coordinates. */
102       nir_def *min = nir_ffma(b, bbox_min[chan], vp_scale, vp_translate);
103       nir_def *max = nir_ffma(b, bbox_max[chan], vp_scale, vp_translate);
104 
105       /* Scale the bounding box according to precision. */
106       min = nir_fsub(b, min, small_prim_precision);
107       max = nir_fadd(b, max, small_prim_precision);
108 
109       /* Determine if the bbox intersects the sample point, by checking if the min and max round to the same int. */
110       min = nir_fround_even(b, min);
111       max = nir_fround_even(b, max);
112 
113       nir_def *rounded_to_eq = nir_feq(b, min, max);
114       rejected = nir_ior(b, rejected, rounded_to_eq);
115    }
116 
117    return rejected;
118 }
119 
120 static nir_def *
ac_nir_cull_triangle(nir_builder * b,nir_def * initially_accepted,nir_def * pos[3][4],position_w_info * w_info,ac_nir_cull_accepted accept_func,void * state)121 ac_nir_cull_triangle(nir_builder *b,
122                      nir_def *initially_accepted,
123                      nir_def *pos[3][4],
124                      position_w_info *w_info,
125                      ac_nir_cull_accepted accept_func,
126                      void *state)
127 {
128    nir_def *accepted = initially_accepted;
129    accepted = nir_iand(b, accepted, nir_inot(b, w_info->all_w_negative_or_zero_or_nan));
130    accepted = nir_iand(b, accepted, nir_inot(b, cull_face_triangle(b, pos, w_info)));
131 
132    nir_def *bbox_accepted = NULL;
133 
134    nir_if *if_accepted = nir_push_if(b, accepted);
135    {
136       nir_def *bbox_min[2] = {0}, *bbox_max[2] = {0};
137       calc_bbox_triangle(b, pos, bbox_min, bbox_max);
138 
139       nir_def *prim_outside_view = cull_frustrum(b, bbox_min, bbox_max);
140       nir_def *bbox_rejected = prim_outside_view;
141 
142       nir_if *if_cull_small_prims = nir_push_if(b, nir_load_cull_small_triangles_enabled_amd(b));
143       {
144          nir_def *small_prim_rejected = cull_small_primitive_triangle(b, bbox_min, bbox_max);
145          bbox_rejected = nir_ior(b, bbox_rejected, small_prim_rejected);
146       }
147       nir_pop_if(b, if_cull_small_prims);
148 
149       bbox_rejected = nir_if_phi(b, bbox_rejected, prim_outside_view);
150       bbox_accepted = nir_ior(b, nir_inot(b, bbox_rejected), w_info->any_w_negative);
151 
152       /* for caller which need to react when primitive is accepted */
153       if (accept_func) {
154          nir_if *if_still_accepted = nir_push_if(b, bbox_accepted);
155          if_still_accepted->control = nir_selection_control_divergent_always_taken;
156          {
157             accept_func(b, state);
158          }
159          nir_pop_if(b, if_still_accepted);
160       }
161    }
162    nir_pop_if(b, if_accepted);
163 
164    return nir_if_phi(b, bbox_accepted, accepted);
165 }
166 
167 static void
rotate_45degrees(nir_builder * b,nir_def * v[2])168 rotate_45degrees(nir_builder *b, nir_def *v[2])
169 {
170    /* Rotating a triangle by 45 degrees:
171     *
172     *    x2  =  x*cos(45) - y*sin(45)
173     *    y2  =  x*sin(45) + y*cos(45)
174     *
175     * Since sin(45) == cos(45), we can write:
176     *
177     *    x2  =  x*cos(45) - y*cos(45)  =  (x - y) * cos(45)
178     *    y2  =  x*cos(45) + y*cos(45)  =  (x + y) * cos(45)
179     *
180     * The width of each square (rotated diamond) is sqrt(0.5), so we have to scale it to 1
181     * by multiplying by 1/sqrt(0.5) = sqrt(2) because we want round() to give us the position
182     * of the closest center of the square (rotated diamond). After scaling, we get:
183     *
184     *    x2  =  (x - y) * cos(45) * sqrt(2)
185     *    y2  =  (x + y) * cos(45) * sqrt(2)
186     *
187     * Since cos(45) * sqrt(2) = 1, we get:
188     *
189     *    x2  =  x - y
190     *    y2  =  x + y
191     */
192    nir_def *result[2];
193    result[0] = nir_fsub(b, v[0], v[1]);
194    result[1] = nir_fadd(b, v[0], v[1]);
195 
196    memcpy(v, result, sizeof(result));
197 }
198 
199 static void
calc_bbox_line(nir_builder * b,nir_def * pos[3][4],nir_def * bbox_min[2],nir_def * bbox_max[2])200 calc_bbox_line(nir_builder *b, nir_def *pos[3][4], nir_def *bbox_min[2], nir_def *bbox_max[2])
201 {
202    nir_def *clip_half_line_width = nir_load_clip_half_line_width_amd(b);
203 
204    for (unsigned chan = 0; chan < 2; ++chan) {
205       bbox_min[chan] = nir_fmin(b, pos[0][chan], pos[1][chan]);
206       bbox_max[chan] = nir_fmax(b, pos[0][chan], pos[1][chan]);
207 
208       nir_def *width = nir_channel(b, clip_half_line_width, chan);
209       bbox_min[chan] = nir_fsub(b, bbox_min[chan], width);
210       bbox_max[chan] = nir_fadd(b, bbox_max[chan], width);
211    }
212 }
213 
214 static nir_def *
cull_small_primitive_line(nir_builder * b,nir_def * pos[3][4],nir_def * bbox_min[2],nir_def * bbox_max[2],nir_def * prim_is_small_else)215 cull_small_primitive_line(nir_builder *b, nir_def *pos[3][4],
216                           nir_def *bbox_min[2], nir_def *bbox_max[2],
217                           nir_def *prim_is_small_else)
218 {
219    nir_def *prim_is_small = NULL;
220 
221    /* Small primitive filter - eliminate lines that are too small to affect a sample. */
222    nir_if *if_cull_small_prims = nir_push_if(b, nir_load_cull_small_lines_enabled_amd(b));
223    {
224       /* This only works with lines without perpendicular end caps (lines with perpendicular
225        * end caps are rasterized as quads and thus can't be culled as small prims in 99% of
226        * cases because line_width >= 1).
227        *
228        * This takes advantage of the diamond exit rule, which says that every pixel
229        * has a diamond inside it touching the pixel boundary and only if a line exits
230        * the diamond, that pixel is filled. If a line enters the diamond or stays
231        * outside the diamond, the pixel isn't filled.
232        *
233        * This algorithm is a little simpler than that. The space outside all diamonds also
234        * has the same diamond shape, which we'll call corner diamonds.
235        *
236        * The idea is to cull all lines that are entirely inside a diamond, including
237        * corner diamonds. If a line is entirely inside a diamond, it can be culled because
238        * it doesn't exit it. If a line is entirely inside a corner diamond, it can be culled
239        * because it doesn't enter any diamond and thus can't exit any diamond.
240        *
241        * The viewport is rotated by 45 degrees to turn diamonds into squares, and a bounding
242        * box test is used to determine whether a line is entirely inside any square (diamond).
243        *
244        * The line width doesn't matter. Wide lines only duplicate filled pixels in either X or
245        * Y direction from the filled pixels. MSAA also doesn't matter. MSAA should ideally use
246        * perpendicular end caps that enable quad rasterization for lines. Thus, this should
247        * always use non-MSAA viewport transformation and non-MSAA small prim precision.
248        *
249        * A good test is piglit/lineloop because it draws 10k subpixel lines in a circle.
250        * It should contain no holes if this matches hw behavior.
251        */
252       nir_def *v0[2], *v1[2];
253       nir_def *vp = nir_load_cull_line_viewport_xy_scale_and_offset_amd(b);
254 
255       /* Get vertex positions in pixels. */
256       for (unsigned chan = 0; chan < 2; chan++) {
257          nir_def *vp_scale = nir_channel(b, vp, chan);
258          nir_def *vp_translate = nir_channel(b, vp, 2 + chan);
259 
260          v0[chan] = nir_ffma(b, pos[0][chan], vp_scale, vp_translate);
261          v1[chan] = nir_ffma(b, pos[1][chan], vp_scale, vp_translate);
262       }
263 
264       /* Rotate the viewport by 45 degrees, so that diamonds become squares. */
265       rotate_45degrees(b, v0);
266       rotate_45degrees(b, v1);
267 
268       nir_def *small_prim_precision = nir_load_cull_small_line_precision_amd(b);
269 
270       nir_def *rounded_to_eq[2];
271       for (unsigned chan = 0; chan < 2; chan++) {
272          /* Compute the bounding box around both vertices. We do this because we must
273           * enlarge the line area by the precision of the rasterizer.
274           */
275          nir_def *min = nir_fmin(b, v0[chan], v1[chan]);
276          nir_def *max = nir_fmax(b, v0[chan], v1[chan]);
277 
278          /* Enlarge the bounding box by the precision of the rasterizer. */
279          min = nir_fsub(b, min, small_prim_precision);
280          max = nir_fadd(b, max, small_prim_precision);
281 
282          /* Round the bounding box corners. If both rounded corners are equal,
283           * the bounding box is entirely inside a square (diamond).
284           */
285          min = nir_fround_even(b, min);
286          max = nir_fround_even(b, max);
287 
288          rounded_to_eq[chan] = nir_feq(b, min, max);
289       }
290 
291       prim_is_small = nir_iand(b, rounded_to_eq[0], rounded_to_eq[1]);
292       prim_is_small = nir_ior(b, prim_is_small, prim_is_small_else);
293    }
294    nir_pop_if(b, if_cull_small_prims);
295 
296    return nir_if_phi(b, prim_is_small, prim_is_small_else);
297 }
298 
299 static nir_def *
ac_nir_cull_line(nir_builder * b,nir_def * initially_accepted,nir_def * pos[3][4],position_w_info * w_info,ac_nir_cull_accepted accept_func,void * state)300 ac_nir_cull_line(nir_builder *b,
301                  nir_def *initially_accepted,
302                  nir_def *pos[3][4],
303                  position_w_info *w_info,
304                  ac_nir_cull_accepted accept_func,
305                  void *state)
306 {
307    nir_def *accepted = initially_accepted;
308    accepted = nir_iand(b, accepted, nir_inot(b, w_info->all_w_negative_or_zero_or_nan));
309 
310    nir_def *bbox_accepted = NULL;
311 
312    nir_if *if_accepted = nir_push_if(b, accepted);
313    {
314       nir_def *bbox_min[2] = {0}, *bbox_max[2] = {0};
315       calc_bbox_line(b, pos, bbox_min, bbox_max);
316 
317       /* Frustrum culling - eliminate lines that are fully outside the view. */
318       nir_def *prim_outside_view = cull_frustrum(b, bbox_min, bbox_max);
319       nir_def *prim_invisible =
320          cull_small_primitive_line(b, pos, bbox_min, bbox_max, prim_outside_view);
321 
322       bbox_accepted = nir_ior(b, nir_inot(b, prim_invisible), w_info->any_w_negative);
323 
324       /* for caller which need to react when primitive is accepted */
325       if (accept_func) {
326          nir_if *if_still_accepted = nir_push_if(b, bbox_accepted);
327          {
328             accept_func(b, state);
329          }
330          nir_pop_if(b, if_still_accepted);
331       }
332    }
333    nir_pop_if(b, if_accepted);
334 
335    return nir_if_phi(b, bbox_accepted, accepted);
336 }
337 
338 nir_def *
ac_nir_cull_primitive(nir_builder * b,nir_def * initially_accepted,nir_def * pos[3][4],unsigned num_vertices,ac_nir_cull_accepted accept_func,void * state)339 ac_nir_cull_primitive(nir_builder *b,
340                       nir_def *initially_accepted,
341                       nir_def *pos[3][4],
342                       unsigned num_vertices,
343                       ac_nir_cull_accepted accept_func,
344                       void *state)
345 {
346    position_w_info w_info = {0};
347    analyze_position_w(b, pos, num_vertices, &w_info);
348 
349    if (num_vertices == 3)
350       return ac_nir_cull_triangle(b, initially_accepted, pos, &w_info, accept_func, state);
351    else if (num_vertices == 2)
352       return ac_nir_cull_line(b, initially_accepted, pos, &w_info, accept_func, state);
353    else
354       unreachable("point culling not implemented");
355 
356    return NULL;
357 }
358