• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2019 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
15  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
16  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
17  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
18  * USE OR OTHER DEALINGS IN THE SOFTWARE.
19  *
20  * The above copyright notice and this permission notice (including the
21  * next paragraph) shall be included in all copies or substantial portions
22  * of the Software.
23  *
24  */
25 
26 #include "ac_llvm_cull.h"
27 
28 #include <llvm-c/Core.h>
29 
30 struct ac_position_w_info {
31    /* If a primitive intersects the W=0 plane, it causes a reflection
32     * of the determinant used for face culling. Every vertex behind
33     * the W=0 plane negates the determinant, so having 2 vertices behind
34     * the plane has no effect. This is i1 true if the determinant should be
35     * negated.
36     */
37    LLVMValueRef w_reflection;
38 
39    /* If we simplify the "-w <= p <= w" view culling equation, we get
40     * "-w <= w", which can't be satisfied when w is negative.
41     * In perspective projection, a negative W means that the primitive
42     * is behind the viewer, but the equation is independent of the type
43     * of projection.
44     *
45     * w_accepted is false when all W are negative and therefore
46     * the primitive is invisible.
47     */
48    LLVMValueRef w_accepted;
49 
50    LLVMValueRef all_w_positive;
51    LLVMValueRef any_w_negative;
52 };
53 
ac_analyze_position_w(struct ac_llvm_context * ctx,LLVMValueRef pos[3][4],struct ac_position_w_info * w)54 static void ac_analyze_position_w(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4],
55                                   struct ac_position_w_info *w)
56 {
57    LLVMBuilderRef builder = ctx->builder;
58    LLVMValueRef all_w_negative = ctx->i1true;
59 
60    w->w_reflection = ctx->i1false;
61    w->any_w_negative = ctx->i1false;
62 
63    for (unsigned i = 0; i < 3; i++) {
64       LLVMValueRef neg_w;
65 
66       neg_w = LLVMBuildFCmp(builder, LLVMRealOLT, pos[i][3], ctx->f32_0, "");
67       /* If neg_w is true, negate w_reflection. */
68       w->w_reflection = LLVMBuildXor(builder, w->w_reflection, neg_w, "");
69       w->any_w_negative = LLVMBuildOr(builder, w->any_w_negative, neg_w, "");
70       all_w_negative = LLVMBuildAnd(builder, all_w_negative, neg_w, "");
71    }
72    w->all_w_positive = LLVMBuildNot(builder, w->any_w_negative, "");
73    w->w_accepted = LLVMBuildNot(builder, all_w_negative, "");
74 }
75 
76 /* Perform front/back face culling and return true if the primitive is accepted. */
ac_cull_face(struct ac_llvm_context * ctx,LLVMValueRef pos[3][4],struct ac_position_w_info * w,bool cull_front,bool cull_back,bool cull_zero_area)77 static LLVMValueRef ac_cull_face(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4],
78                                  struct ac_position_w_info *w, bool cull_front, bool cull_back,
79                                  bool cull_zero_area)
80 {
81    LLVMBuilderRef builder = ctx->builder;
82 
83    if (cull_front && cull_back)
84       return ctx->i1false;
85 
86    if (!cull_front && !cull_back && !cull_zero_area)
87       return ctx->i1true;
88 
89    /* Front/back face culling. Also if the determinant == 0, the triangle
90     * area is 0.
91     */
92    LLVMValueRef det_t0 = LLVMBuildFSub(builder, pos[2][0], pos[0][0], "");
93    LLVMValueRef det_t1 = LLVMBuildFSub(builder, pos[1][1], pos[0][1], "");
94    LLVMValueRef det_t2 = LLVMBuildFSub(builder, pos[0][0], pos[1][0], "");
95    LLVMValueRef det_t3 = LLVMBuildFSub(builder, pos[0][1], pos[2][1], "");
96    LLVMValueRef det_p0 = LLVMBuildFMul(builder, det_t0, det_t1, "");
97    LLVMValueRef det_p1 = LLVMBuildFMul(builder, det_t2, det_t3, "");
98    LLVMValueRef det = LLVMBuildFSub(builder, det_p0, det_p1, "");
99 
100    /* Negative W negates the determinant. */
101    det = LLVMBuildSelect(builder, w->w_reflection, LLVMBuildFNeg(builder, det, ""), det, "");
102 
103    LLVMValueRef accepted = NULL;
104    if (cull_front) {
105       LLVMRealPredicate cond = cull_zero_area ? LLVMRealOGT : LLVMRealOGE;
106       accepted = LLVMBuildFCmp(builder, cond, det, ctx->f32_0, "");
107    } else if (cull_back) {
108       LLVMRealPredicate cond = cull_zero_area ? LLVMRealOLT : LLVMRealOLE;
109       accepted = LLVMBuildFCmp(builder, cond, det, ctx->f32_0, "");
110    } else if (cull_zero_area) {
111       accepted = LLVMBuildFCmp(builder, LLVMRealONE, det, ctx->f32_0, "");
112    }
113    return accepted;
114 }
115 
116 /* Perform view culling and small primitive elimination and return true
117  * if the primitive is accepted and initially_accepted == true. */
cull_bbox(struct ac_llvm_context * ctx,LLVMValueRef pos[3][4],LLVMValueRef initially_accepted,struct ac_position_w_info * w,LLVMValueRef vp_scale[2],LLVMValueRef vp_translate[2],LLVMValueRef small_prim_precision,bool cull_view_xy,bool cull_view_near_z,bool cull_view_far_z,bool cull_small_prims,bool use_halfz_clip_space)118 static LLVMValueRef cull_bbox(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4],
119                               LLVMValueRef initially_accepted, struct ac_position_w_info *w,
120                               LLVMValueRef vp_scale[2], LLVMValueRef vp_translate[2],
121                               LLVMValueRef small_prim_precision, bool cull_view_xy,
122                               bool cull_view_near_z, bool cull_view_far_z, bool cull_small_prims,
123                               bool use_halfz_clip_space)
124 {
125    LLVMBuilderRef builder = ctx->builder;
126 
127    if (!cull_view_xy && !cull_view_near_z && !cull_view_far_z && !cull_small_prims)
128       return initially_accepted;
129 
130    /* Skip the culling if the primitive has already been rejected or
131     * if any W is negative. The bounding box culling doesn't work when
132     * W is negative.
133     */
134    LLVMValueRef cond = LLVMBuildAnd(builder, initially_accepted, w->all_w_positive, "");
135    LLVMValueRef accepted_var = ac_build_alloca_undef(ctx, ctx->i1, "");
136    LLVMBuildStore(builder, initially_accepted, accepted_var);
137 
138    ac_build_ifcc(ctx, cond, 10000000 /* does this matter? */);
139    {
140       LLVMValueRef bbox_min[3], bbox_max[3];
141       LLVMValueRef accepted = initially_accepted;
142 
143       /* Compute the primitive bounding box for easy culling. */
144       for (unsigned chan = 0; chan < (cull_view_near_z || cull_view_far_z ? 3 : 2); chan++) {
145          bbox_min[chan] = ac_build_fmin(ctx, pos[0][chan], pos[1][chan]);
146          bbox_min[chan] = ac_build_fmin(ctx, bbox_min[chan], pos[2][chan]);
147 
148          bbox_max[chan] = ac_build_fmax(ctx, pos[0][chan], pos[1][chan]);
149          bbox_max[chan] = ac_build_fmax(ctx, bbox_max[chan], pos[2][chan]);
150       }
151 
152       /* View culling. */
153       if (cull_view_xy || cull_view_near_z || cull_view_far_z) {
154          for (unsigned chan = 0; chan < 3; chan++) {
155             LLVMValueRef visible;
156 
157             if ((cull_view_xy && chan <= 1) || (cull_view_near_z && chan == 2)) {
158                float t = chan == 2 && use_halfz_clip_space ? 0 : -1;
159                visible = LLVMBuildFCmp(builder, LLVMRealOGE, bbox_max[chan],
160                                        LLVMConstReal(ctx->f32, t), "");
161                accepted = LLVMBuildAnd(builder, accepted, visible, "");
162             }
163 
164             if ((cull_view_xy && chan <= 1) || (cull_view_far_z && chan == 2)) {
165                visible = LLVMBuildFCmp(builder, LLVMRealOLE, bbox_min[chan], ctx->f32_1, "");
166                accepted = LLVMBuildAnd(builder, accepted, visible, "");
167             }
168          }
169       }
170 
171       /* Small primitive elimination. */
172       if (cull_small_prims) {
173          /* Assuming a sample position at (0.5, 0.5), if we round
174           * the bounding box min/max extents and the results of
175           * the rounding are equal in either the X or Y direction,
176           * the bounding box does not intersect the sample.
177           *
178           * See these GDC slides for pictures:
179           * https://frostbite-wp-prd.s3.amazonaws.com/wp-content/uploads/2016/03/29204330/GDC_2016_Compute.pdf
180           */
181          LLVMValueRef min, max, not_equal[2], visible;
182 
183          for (unsigned chan = 0; chan < 2; chan++) {
184             /* Convert the position to screen-space coordinates. */
185             min = ac_build_fmad(ctx, bbox_min[chan], vp_scale[chan], vp_translate[chan]);
186             max = ac_build_fmad(ctx, bbox_max[chan], vp_scale[chan], vp_translate[chan]);
187             /* Scale the bounding box according to the precision of
188              * the rasterizer and the number of MSAA samples. */
189             min = LLVMBuildFSub(builder, min, small_prim_precision, "");
190             max = LLVMBuildFAdd(builder, max, small_prim_precision, "");
191 
192             /* Determine if the bbox intersects the sample point.
193              * It also works for MSAA, but vp_scale, vp_translate,
194              * and small_prim_precision are computed differently.
195              */
196             min = ac_build_round(ctx, min);
197             max = ac_build_round(ctx, max);
198             not_equal[chan] = LLVMBuildFCmp(builder, LLVMRealONE, min, max, "");
199          }
200          visible = LLVMBuildAnd(builder, not_equal[0], not_equal[1], "");
201          accepted = LLVMBuildAnd(builder, accepted, visible, "");
202       }
203 
204       LLVMBuildStore(builder, accepted, accepted_var);
205    }
206    ac_build_endif(ctx, 10000000);
207 
208    return LLVMBuildLoad(builder, accepted_var, "");
209 }
210 
211 /**
212  * Return i1 true if the primitive is accepted (not culled).
213  *
214  * \param pos                   Vertex positions 3x vec4
215  * \param initially_accepted    AND'ed with the result. Some computations can be
216  *                              skipped if this is false.
217  * \param vp_scale              Viewport scale XY.
218  *                              For MSAA, multiply them by the number of samples.
219  * \param vp_translate          Viewport translation XY.
220  *                              For MSAA, multiply them by the number of samples.
221  * \param small_prim_precision  Precision of small primitive culling. This should
222  *                              be the same as or greater than the precision of
223  *                              the rasterizer. Set to num_samples / 2^subpixel_bits.
224  *                              subpixel_bits are defined by the quantization mode.
225  * \param options               See ac_cull_options.
226  */
ac_cull_triangle(struct ac_llvm_context * ctx,LLVMValueRef pos[3][4],LLVMValueRef initially_accepted,LLVMValueRef vp_scale[2],LLVMValueRef vp_translate[2],LLVMValueRef small_prim_precision,struct ac_cull_options * options)227 LLVMValueRef ac_cull_triangle(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4],
228                               LLVMValueRef initially_accepted, LLVMValueRef vp_scale[2],
229                               LLVMValueRef vp_translate[2], LLVMValueRef small_prim_precision,
230                               struct ac_cull_options *options)
231 {
232    struct ac_position_w_info w;
233    ac_analyze_position_w(ctx, pos, &w);
234 
235    /* W culling. */
236    LLVMValueRef accepted = options->cull_w ? w.w_accepted : ctx->i1true;
237    accepted = LLVMBuildAnd(ctx->builder, accepted, initially_accepted, "");
238 
239    /* Face culling. */
240    accepted = LLVMBuildAnd(
241       ctx->builder, accepted,
242       ac_cull_face(ctx, pos, &w, options->cull_front, options->cull_back, options->cull_zero_area),
243       "");
244 
245    /* View culling and small primitive elimination. */
246    accepted = cull_bbox(ctx, pos, accepted, &w, vp_scale, vp_translate, small_prim_precision,
247                         options->cull_view_xy, options->cull_view_near_z, options->cull_view_far_z,
248                         options->cull_small_prims, options->use_halfz_clip_space);
249    return accepted;
250 }
251