• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2010 VMware.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 
29 #include "util/u_math.h"
30 #include "util/u_memory.h"
31 #include "util/os_time.h"
32 #include "gallivm/lp_bld_arit.h"
33 #include "gallivm/lp_bld_bitarit.h"
34 #include "gallivm/lp_bld_const.h"
35 #include "gallivm/lp_bld_debug.h"
36 #include "gallivm/lp_bld_init.h"
37 #include "gallivm/lp_bld_logic.h"
38 #include "gallivm/lp_bld_intr.h"
39 #include "gallivm/lp_bld_flow.h"
40 #include "gallivm/lp_bld_type.h"
41 
42 #include "lp_perf.h"
43 #include "lp_debug.h"
44 #include "lp_flush.h"
45 #include "lp_screen.h"
46 #include "lp_context.h"
47 #include "lp_state.h"
48 #include "lp_state_fs.h"
49 #include "lp_state_setup.h"
50 
51 
52 /** Setup shader number (for debugging) */
53 static unsigned setup_no = 0;
54 
55 
56 /* currently organized to interpolate full float[4] attributes even
57  * when some elements are unused.  Later, can pack vertex data more
58  * closely.
59  */
60 
61 
62 struct lp_setup_args
63 {
64    /* Function arguments:
65     */
66    LLVMValueRef v0;
67    LLVMValueRef v1;
68    LLVMValueRef v2;
69    LLVMValueRef facing;		/* boolean */
70    LLVMValueRef a0;
71    LLVMValueRef dadx;
72    LLVMValueRef dady;
73    LLVMValueRef key;
74 
75    /* Derived:
76     */
77    LLVMValueRef x0_center;
78    LLVMValueRef y0_center;
79    LLVMValueRef dy20_ooa;
80    LLVMValueRef dy01_ooa;
81    LLVMValueRef dx20_ooa;
82    LLVMValueRef dx01_ooa;
83    struct lp_build_context bld;
84 };
85 
86 
87 static void
store_coef(struct gallivm_state * gallivm,const struct lp_setup_args * args,unsigned slot,LLVMValueRef a0,LLVMValueRef dadx,LLVMValueRef dady)88 store_coef(struct gallivm_state *gallivm,
89            const struct lp_setup_args *args,
90            unsigned slot,
91            LLVMValueRef a0,
92            LLVMValueRef dadx,
93            LLVMValueRef dady)
94 {
95    LLVMBuilderRef builder = gallivm->builder;
96    LLVMValueRef idx = lp_build_const_int32(gallivm, slot);
97 
98    LLVMBuildStore(builder,
99                   a0,
100                   LLVMBuildGEP(builder, args->a0, &idx, 1, ""));
101 
102    LLVMBuildStore(builder,
103                   dadx,
104                   LLVMBuildGEP(builder, args->dadx, &idx, 1, ""));
105 
106    LLVMBuildStore(builder,
107                   dady,
108                   LLVMBuildGEP(builder, args->dady, &idx, 1, ""));
109 }
110 
111 
112 static void
emit_constant_coef4(struct gallivm_state * gallivm,const struct lp_setup_args * args,unsigned slot,LLVMValueRef vert)113 emit_constant_coef4(struct gallivm_state *gallivm,
114                     const struct lp_setup_args *args,
115                     unsigned slot,
116                     LLVMValueRef vert)
117 {
118    store_coef(gallivm, args, slot, vert, args->bld.zero, args->bld.zero);
119 }
120 
121 
122 /**
123  * Setup the fragment input attribute with the front-facing value.
124  * \param frontface  is the triangle front facing?
125  */
126 static void
emit_facing_coef(struct gallivm_state * gallivm,struct lp_setup_args * args,unsigned slot)127 emit_facing_coef(struct gallivm_state *gallivm,
128                  struct lp_setup_args *args,
129                  unsigned slot)
130 {
131    LLVMBuilderRef builder = gallivm->builder;
132    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
133    LLVMValueRef a0_0 = args->facing;
134    LLVMValueRef a0_0f = LLVMBuildSIToFP(builder, a0_0, float_type, "");
135    LLVMValueRef a0, face_val;
136    const unsigned char swizzles[4] = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_0,
137                                        PIPE_SWIZZLE_0, PIPE_SWIZZLE_0 };
138    /* Our face val is either 1 or 0 so we do
139     * face = (val * 2) - 1
140     * to make it 1 or -1
141     */
142    face_val =
143       LLVMBuildFAdd(builder,
144                     LLVMBuildFMul(builder, a0_0f,
145                                   lp_build_const_float(gallivm, 2.0),
146                                   ""),
147                     lp_build_const_float(gallivm, -1.0),
148                     "facing");
149    face_val = lp_build_broadcast_scalar(&args->bld, face_val);
150    a0 = lp_build_swizzle_aos(&args->bld, face_val, swizzles);
151 
152    store_coef(gallivm, args, slot, a0, args->bld.zero, args->bld.zero);
153 }
154 
155 
156 static LLVMValueRef
vert_attrib(struct gallivm_state * gallivm,LLVMValueRef vert,int attr,int elem,const char * name)157 vert_attrib(struct gallivm_state *gallivm,
158             LLVMValueRef vert,
159             int attr,
160             int elem,
161             const char *name)
162 {
163    LLVMBuilderRef b = gallivm->builder;
164    LLVMValueRef idx[2];
165    idx[0] = lp_build_const_int32(gallivm, attr);
166    idx[1] = lp_build_const_int32(gallivm, elem);
167    return LLVMBuildLoad(b, LLVMBuildGEP(b, vert, idx, 2, ""), name);
168 }
169 
170 
171 static void
lp_twoside(struct gallivm_state * gallivm,struct lp_setup_args * args,const struct lp_setup_variant_key * key,int bcolor_slot,LLVMValueRef attribv[3])172 lp_twoside(struct gallivm_state *gallivm,
173            struct lp_setup_args *args,
174            const struct lp_setup_variant_key *key,
175            int bcolor_slot,
176            LLVMValueRef attribv[3])
177 {
178    LLVMBuilderRef b = gallivm->builder;
179    LLVMValueRef a0_back, a1_back, a2_back;
180    LLVMValueRef idx2 = lp_build_const_int32(gallivm, bcolor_slot);
181 
182    LLVMValueRef facing = args->facing;
183    LLVMValueRef front_facing = LLVMBuildICmp(b, LLVMIntEQ, facing,
184                                              lp_build_const_int32(gallivm, 0), ""); /** need i1 for if condition */
185 
186    a0_back = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx2, 1, ""), "v0a_back");
187    a1_back = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx2, 1, ""), "v1a_back");
188    a2_back = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx2, 1, ""), "v2a_back");
189 
190    /* Possibly swap the front and back attrib values,
191     *
192     * Prefer select to if so we don't have to worry about phis or
193     * allocas.
194     */
195    attribv[0] = LLVMBuildSelect(b, front_facing, a0_back, attribv[0], "");
196    attribv[1] = LLVMBuildSelect(b, front_facing, a1_back, attribv[1], "");
197    attribv[2] = LLVMBuildSelect(b, front_facing, a2_back, attribv[2], "");
198 }
199 
200 
201 static LLVMValueRef
lp_do_offset_tri(struct gallivm_state * gallivm,struct lp_setup_args * args,const struct lp_setup_variant_key * key,LLVMValueRef inv_det,LLVMValueRef dxyz01,LLVMValueRef dxyz20,LLVMValueRef attribv[3])202 lp_do_offset_tri(struct gallivm_state *gallivm,
203                  struct lp_setup_args *args,
204                  const struct lp_setup_variant_key *key,
205                  LLVMValueRef inv_det,
206                  LLVMValueRef dxyz01,
207                  LLVMValueRef dxyz20,
208                  LLVMValueRef attribv[3])
209 {
210    LLVMBuilderRef b = gallivm->builder;
211    struct lp_build_context flt_scalar_bld;
212    struct lp_build_context int_scalar_bld;
213    struct lp_build_context *bld = &args->bld;
214    LLVMValueRef zoffset, mult;
215    LLVMValueRef dzdxdzdy, dzdx, dzdy, dzxyz20, dyzzx01, dyzzx01_dzxyz20, dzx01_dyz20;
216    LLVMValueRef max, max_value, res12;
217    LLVMValueRef shuffles[4];
218    LLVMTypeRef shuf_type = LLVMInt32TypeInContext(gallivm->context);
219    LLVMValueRef onei = lp_build_const_int32(gallivm, 1);
220    LLVMValueRef zeroi = lp_build_const_int32(gallivm, 0);
221    LLVMValueRef twoi = lp_build_const_int32(gallivm, 2);
222    LLVMValueRef threei  = lp_build_const_int32(gallivm, 3);
223 
224    /* (res12) = cross(e,f).xy */
225    shuffles[0] = twoi;
226    shuffles[1] = zeroi;
227    shuffles[2] = onei;
228    shuffles[3] = twoi;
229    dzxyz20 = LLVMBuildShuffleVector(b, dxyz20, dxyz20, LLVMConstVector(shuffles, 4), "");
230 
231    shuffles[0] = onei;
232    shuffles[1] = twoi;
233    shuffles[2] = twoi;
234    shuffles[3] = zeroi;
235    dyzzx01 = LLVMBuildShuffleVector(b, dxyz01, dxyz01, LLVMConstVector(shuffles, 4), "");
236 
237    dyzzx01_dzxyz20 = LLVMBuildFMul(b, dzxyz20, dyzzx01, "dyzzx01_dzxyz20");
238 
239    shuffles[0] = twoi;
240    shuffles[1] = threei;
241    shuffles[2] = LLVMGetUndef(shuf_type);
242    shuffles[3] = LLVMGetUndef(shuf_type);
243    dzx01_dyz20 = LLVMBuildShuffleVector(b, dyzzx01_dzxyz20, dyzzx01_dzxyz20,
244                                         LLVMConstVector(shuffles, 4), "");
245 
246    res12 = LLVMBuildFSub(b, dyzzx01_dzxyz20, dzx01_dyz20, "res12");
247 
248    /* dzdx = fabsf(res1 * inv_det), dydx = fabsf(res2 * inv_det)*/
249    dzdxdzdy = LLVMBuildFMul(b, res12, inv_det, "dzdxdzdy");
250    dzdxdzdy = lp_build_abs(bld, dzdxdzdy);
251 
252    dzdx = LLVMBuildExtractElement(b, dzdxdzdy, zeroi, "");
253    dzdy = LLVMBuildExtractElement(b, dzdxdzdy, onei, "");
254 
255    /* mult = MAX2(dzdx, dzdy) * pgon_offset_scale */
256    max = LLVMBuildFCmp(b, LLVMRealUGT, dzdx, dzdy, "");
257    max_value = LLVMBuildSelect(b, max, dzdx, dzdy, "max");
258 
259    mult = LLVMBuildFMul(b, max_value,
260                         lp_build_const_float(gallivm,
261                                              key->pgon_offset_scale), "");
262 
263    lp_build_context_init(&flt_scalar_bld, gallivm, lp_type_float_vec(32, 32));
264 
265    if (key->floating_point_depth) {
266       /*
267        * bias = pgon_offset_units * 2^(exponent(max(abs(z0), abs(z1), abs(z2))) -
268        *           mantissa_bits) + MAX2(dzdx, dzdy) * pgon_offset_scale
269        *
270        * NOTE: Assumes IEEE float32.
271        */
272       LLVMValueRef c23_shifted, exp_mask, bias, exp;
273       LLVMValueRef maxz_value, maxz0z1_value;
274 
275       lp_build_context_init(&int_scalar_bld, gallivm, lp_type_int_vec(32, 32));
276 
277       c23_shifted = lp_build_const_int32(gallivm, 23 << 23);
278       exp_mask = lp_build_const_int32(gallivm, 0xff << 23);
279 
280       maxz0z1_value = lp_build_max(&flt_scalar_bld,
281                          lp_build_abs(&flt_scalar_bld,
282                             LLVMBuildExtractElement(b, attribv[0], twoi, "")),
283                          lp_build_abs(&flt_scalar_bld,
284                             LLVMBuildExtractElement(b, attribv[1], twoi, "")));
285 
286       maxz_value = lp_build_max(&flt_scalar_bld,
287                       lp_build_abs(&flt_scalar_bld,
288                          LLVMBuildExtractElement(b, attribv[2], twoi, "")),
289                       maxz0z1_value);
290 
291       exp = LLVMBuildBitCast(b, maxz_value, int_scalar_bld.vec_type, "");
292       exp = lp_build_and(&int_scalar_bld, exp, exp_mask);
293       exp = lp_build_sub(&int_scalar_bld, exp, c23_shifted);
294       /* Clamping to zero means mrd will be zero for very small numbers,
295        * but specs do not indicate this should be prevented by clamping
296        * mrd to smallest normal number instead. */
297       exp = lp_build_max(&int_scalar_bld, exp, int_scalar_bld.zero);
298       exp = LLVMBuildBitCast(b, exp, flt_scalar_bld.vec_type, "");
299 
300       bias = LLVMBuildFMul(b, exp,
301                            lp_build_const_float(gallivm, key->pgon_offset_units),
302                            "bias");
303 
304       zoffset = LLVMBuildFAdd(b, bias, mult, "zoffset");
305    } else {
306       /*
307        * bias = pgon_offset_units + MAX2(dzdx, dzdy) * pgon_offset_scale
308        */
309       zoffset = LLVMBuildFAdd(b,
310                               lp_build_const_float(gallivm, key->pgon_offset_units),
311                               mult, "zoffset");
312    }
313 
314    if (key->pgon_offset_clamp > 0) {
315       zoffset = lp_build_min(&flt_scalar_bld,
316                              lp_build_const_float(gallivm, key->pgon_offset_clamp),
317                              zoffset);
318    } else if (key->pgon_offset_clamp < 0) {
319       zoffset = lp_build_max(&flt_scalar_bld,
320                              lp_build_const_float(gallivm, key->pgon_offset_clamp),
321                              zoffset);
322    }
323 
324    return zoffset;
325 }
326 
327 
328 static void
load_attribute(struct gallivm_state * gallivm,struct lp_setup_args * args,const struct lp_setup_variant_key * key,unsigned vert_attr,LLVMValueRef attribv[3])329 load_attribute(struct gallivm_state *gallivm,
330                struct lp_setup_args *args,
331                const struct lp_setup_variant_key *key,
332                unsigned vert_attr,
333                LLVMValueRef attribv[3])
334 {
335    LLVMBuilderRef b = gallivm->builder;
336    LLVMValueRef idx = lp_build_const_int32(gallivm, vert_attr);
337 
338    /* Load the vertex data
339     */
340    attribv[0] = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx, 1, ""), "v0a");
341    attribv[1] = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx, 1, ""), "v1a");
342    attribv[2] = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx, 1, ""), "v2a");
343 
344    /* Potentially modify it according to twoside, etc:
345     */
346    if (key->twoside) {
347       if (vert_attr == key->color_slot && key->bcolor_slot >= 0)
348          lp_twoside(gallivm, args, key, key->bcolor_slot, attribv);
349       else if (vert_attr == key->spec_slot && key->bspec_slot >= 0)
350          lp_twoside(gallivm, args, key, key->bspec_slot, attribv);
351    }
352 }
353 
354 
355 /*
356  * FIXME: interpolation is always done wrt fb origin (0/0).
357  * However, if some (small) tri is far away from the origin and gradients
358  * are large, this can lead to HUGE errors, since the a0 value calculated
359  * here can get very large (with the actual values inside the triangle way
360  * smaller), leading to complete loss of accuracy. This could be prevented
361  * by using some point inside (or at corner) of the tri as interpolation
362  * origin, or just use barycentric interpolation (which GL suggests and is
363  * what real hw does - you can get the barycentric coordinates from the
364  * edge functions in rasterization in principle (though we skip these
365  * sometimes completely in case of tris covering a block fully,
366  * which obviously wouldn't work)).
367  */
368 static void
calc_coef4(struct gallivm_state * gallivm,struct lp_setup_args * args,LLVMValueRef a0,LLVMValueRef a1,LLVMValueRef a2,LLVMValueRef out[3])369 calc_coef4(struct gallivm_state *gallivm,
370            struct lp_setup_args *args,
371            LLVMValueRef a0,
372            LLVMValueRef a1,
373            LLVMValueRef a2,
374            LLVMValueRef out[3])
375 {
376    LLVMBuilderRef b = gallivm->builder;
377    LLVMValueRef attr_0;
378    LLVMValueRef dy20_ooa = args->dy20_ooa;
379    LLVMValueRef dy01_ooa = args->dy01_ooa;
380    LLVMValueRef dx20_ooa = args->dx20_ooa;
381    LLVMValueRef dx01_ooa = args->dx01_ooa;
382    LLVMValueRef x0_center = args->x0_center;
383    LLVMValueRef y0_center = args->y0_center;
384    LLVMValueRef da01 = LLVMBuildFSub(b, a0, a1, "da01");
385    LLVMValueRef da20 = LLVMBuildFSub(b, a2, a0, "da20");
386 
387    /* Calculate dadx (vec4f)
388     */
389    LLVMValueRef da01_dy20_ooa = LLVMBuildFMul(b, da01, dy20_ooa, "da01_dy20_ooa");
390    LLVMValueRef da20_dy01_ooa = LLVMBuildFMul(b, da20, dy01_ooa, "da20_dy01_ooa");
391    LLVMValueRef dadx          = LLVMBuildFSub(b, da01_dy20_ooa, da20_dy01_ooa, "dadx");
392 
393    /* Calculate dady (vec4f)
394     */
395    LLVMValueRef da01_dx20_ooa = LLVMBuildFMul(b, da01, dx20_ooa, "da01_dx20_ooa");
396    LLVMValueRef da20_dx01_ooa = LLVMBuildFMul(b, da20, dx01_ooa, "da20_dx01_ooa");
397    LLVMValueRef dady          = LLVMBuildFSub(b, da20_dx01_ooa, da01_dx20_ooa, "dady");
398 
399    /* Calculate a0 - the attribute value at the origin
400     */
401    LLVMValueRef dadx_x0    = LLVMBuildFMul(b, dadx, x0_center, "dadx_x0");
402    LLVMValueRef dady_y0    = LLVMBuildFMul(b, dady, y0_center, "dady_y0");
403    LLVMValueRef attr_v0    = LLVMBuildFAdd(b, dadx_x0, dady_y0, "attr_v0");
404    attr_0                  = LLVMBuildFSub(b, a0, attr_v0, "attr_0");
405 
406    out[0] = attr_0;
407    out[1] = dadx;
408    out[2] = dady;
409 }
410 
411 
412 static void
emit_coef4(struct gallivm_state * gallivm,struct lp_setup_args * args,unsigned slot,LLVMValueRef a0,LLVMValueRef a1,LLVMValueRef a2)413 emit_coef4(struct gallivm_state *gallivm,
414            struct lp_setup_args *args,
415            unsigned slot,
416            LLVMValueRef a0,
417            LLVMValueRef a1,
418            LLVMValueRef a2)
419 {
420    LLVMValueRef coeffs[3];
421    calc_coef4(gallivm, args, a0, a1, a2, coeffs);
422    store_coef(gallivm, args, slot, coeffs[0], coeffs[1], coeffs[2]);
423 }
424 
425 
426 static void
emit_linear_coef(struct gallivm_state * gallivm,struct lp_setup_args * args,unsigned slot,LLVMValueRef attribv[3])427 emit_linear_coef(struct gallivm_state *gallivm,
428                  struct lp_setup_args *args,
429                  unsigned slot,
430                  LLVMValueRef attribv[3])
431 {
432    /* nothing to do anymore */
433    emit_coef4(gallivm, args, slot, attribv[0], attribv[1], attribv[2]);
434 }
435 
436 
437 /**
438  * Compute a0, dadx and dady for a perspective-corrected interpolant,
439  * for a triangle.
440  * We basically multiply the vertex value by 1/w before computing
441  * the plane coefficients (a0, dadx, dady).
442  * Later, when we compute the value at a particular fragment position we'll
443  * divide the interpolated value by the interpolated W at that fragment.
444  */
445 static void
apply_perspective_corr(struct gallivm_state * gallivm,struct lp_setup_args * args,unsigned slot,LLVMValueRef attribv[3])446 apply_perspective_corr(struct gallivm_state *gallivm,
447                        struct lp_setup_args *args,
448                        unsigned slot,
449                        LLVMValueRef attribv[3])
450 {
451    LLVMBuilderRef b = gallivm->builder;
452 
453    /* premultiply by 1/w  (v[0][3] is always 1/w):
454     */
455    LLVMValueRef v0_oow = lp_build_broadcast_scalar(&args->bld,
456                             vert_attrib(gallivm, args->v0, 0, 3, "v0_oow"));
457    LLVMValueRef v1_oow = lp_build_broadcast_scalar(&args->bld,
458                             vert_attrib(gallivm, args->v1, 0, 3, "v1_oow"));
459    LLVMValueRef v2_oow = lp_build_broadcast_scalar(&args->bld,
460                             vert_attrib(gallivm, args->v2, 0, 3, "v2_oow"));
461 
462    attribv[0] = LLVMBuildFMul(b, attribv[0], v0_oow, "v0_oow_v0a");
463    attribv[1] = LLVMBuildFMul(b, attribv[1], v1_oow, "v1_oow_v1a");
464    attribv[2] = LLVMBuildFMul(b, attribv[2], v2_oow, "v2_oow_v2a");
465 }
466 
467 
468 /**
469  * Compute the inputs-> dadx, dady, a0 values.
470  */
471 static void
emit_tri_coef(struct gallivm_state * gallivm,const struct lp_setup_variant_key * key,struct lp_setup_args * args)472 emit_tri_coef(struct gallivm_state *gallivm,
473               const struct lp_setup_variant_key *key,
474               struct lp_setup_args *args)
475 {
476    LLVMValueRef attribs[3];
477 
478    /* setup interpolation for all the remaining attributes */
479    for (unsigned slot = 0; slot < key->num_inputs; slot++) {
480       switch (key->inputs[slot].interp) {
481       case LP_INTERP_CONSTANT:
482          load_attribute(gallivm, args, key, key->inputs[slot].src_index, attribs);
483          if (key->flatshade_first) {
484             emit_constant_coef4(gallivm, args, slot+1, attribs[0]);
485          } else {
486             emit_constant_coef4(gallivm, args, slot+1, attribs[2]);
487          }
488          break;
489 
490       case LP_INTERP_LINEAR:
491          load_attribute(gallivm, args, key, key->inputs[slot].src_index, attribs);
492          emit_linear_coef(gallivm, args, slot+1, attribs);
493          break;
494 
495       case LP_INTERP_PERSPECTIVE:
496          load_attribute(gallivm, args, key, key->inputs[slot].src_index, attribs);
497          apply_perspective_corr(gallivm, args, slot+1, attribs);
498          emit_linear_coef(gallivm, args, slot+1, attribs);
499          break;
500 
501       case LP_INTERP_POSITION:
502          /*
503           * The generated pixel interpolators will pick up the coeffs from
504           * slot 0.
505           */
506          break;
507 
508       case LP_INTERP_FACING:
509          emit_facing_coef(gallivm, args, slot+1);
510          break;
511 
512       default:
513          assert(0);
514       }
515    }
516 }
517 
518 
519 /* XXX: generic code:
520  */
521 static void
set_noalias(LLVMBuilderRef builder,LLVMValueRef function,const LLVMTypeRef * arg_types,int nr_args)522 set_noalias(LLVMBuilderRef builder,
523             LLVMValueRef function,
524             const LLVMTypeRef *arg_types,
525             int nr_args)
526 {
527    for (int i = 0; i < nr_args; ++i) {
528       if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) {
529          lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS);
530       }
531    }
532 }
533 
534 
535 static void
init_args(struct gallivm_state * gallivm,const struct lp_setup_variant_key * key,struct lp_setup_args * args)536 init_args(struct gallivm_state *gallivm,
537           const struct lp_setup_variant_key *key,
538           struct lp_setup_args *args)
539 {
540    LLVMBuilderRef b = gallivm->builder;
541    LLVMTypeRef shuf_type = LLVMInt32TypeInContext(gallivm->context);
542    LLVMValueRef onef = lp_build_const_float(gallivm, 1.0);
543    LLVMValueRef onei = lp_build_const_int32(gallivm, 1);
544    LLVMValueRef zeroi = lp_build_const_int32(gallivm, 0);
545    LLVMValueRef pixel_center, xy0_center, dxy01, dxy20, dyx20;
546    LLVMValueRef e, f, ef, ooa;
547    LLVMValueRef shuffles[4], shuf10;
548    LLVMValueRef attr_pos[3];
549    LLVMValueRef polygon_offset;
550    struct lp_type typef4 = lp_type_float_vec(32, 128);
551    struct lp_build_context bld;
552 
553    lp_build_context_init(&bld, gallivm, typef4);
554    args->bld = bld;
555 
556    /* The internal position input is in slot zero:
557     */
558    load_attribute(gallivm, args, key, 0, attr_pos);
559 
560    pixel_center = lp_build_const_vec(gallivm, typef4,
561                                      (!key->multisample && key->pixel_center_half) ? 0.5 : 0.0);
562 
563    /*
564     * xy are first two elems in v0a/v1a/v2a but just use vec4 arit
565     * also offset_tri uses actually xyz in them
566     */
567    xy0_center = LLVMBuildFSub(b, attr_pos[0], pixel_center, "xy0_center" );
568 
569    dxy01 = LLVMBuildFSub(b, attr_pos[0], attr_pos[1], "dxy01");
570    dxy20 = LLVMBuildFSub(b, attr_pos[2], attr_pos[0], "dxy20");
571 
572    shuffles[0] = onei;
573    shuffles[1] = zeroi;
574    shuffles[2] = LLVMGetUndef(shuf_type);
575    shuffles[3] = LLVMGetUndef(shuf_type);
576    shuf10 = LLVMConstVector(shuffles, 4);
577 
578    dyx20 = LLVMBuildShuffleVector(b, dxy20, dxy20, shuf10, "");
579 
580    ef = LLVMBuildFMul(b, dxy01, dyx20, "ef");
581    e = LLVMBuildExtractElement(b, ef, zeroi, "");
582    f = LLVMBuildExtractElement(b, ef, onei, "");
583 
584    ooa  = LLVMBuildFDiv(b, onef, LLVMBuildFSub(b, e, f, ""), "ooa");
585 
586    ooa = lp_build_broadcast_scalar(&bld, ooa);
587 
588    /* tri offset calc shares a lot of arithmetic, do it here */
589    if (key->pgon_offset_scale != 0.0f || key->pgon_offset_units != 0.0f) {
590       polygon_offset = lp_do_offset_tri(gallivm, args, key, ooa, dxy01, dxy20, attr_pos);
591    } else {
592       polygon_offset = lp_build_const_float(gallivm, 0.0f);
593    }
594 
595    dxy20 = LLVMBuildFMul(b, dxy20, ooa, "");
596    dxy01 = LLVMBuildFMul(b, dxy01, ooa, "");
597 
598    args->dy20_ooa  = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy20, onei);
599    args->dy01_ooa  = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy01, onei);
600 
601    args->dx20_ooa  = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy20, zeroi);
602    args->dx01_ooa  = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy01, zeroi);
603 
604    args->x0_center = lp_build_extract_broadcast(gallivm, typef4, typef4, xy0_center, zeroi);
605    args->y0_center = lp_build_extract_broadcast(gallivm, typef4, typef4, xy0_center, onei);
606 
607    LLVMValueRef coeffs[3];
608    calc_coef4(gallivm, args, attr_pos[0], attr_pos[1], attr_pos[2], coeffs);
609 
610    /* This is a bit sneaky:
611     * Because we observe that the X component of A0 is otherwise unused,
612     * we can overwrite it with the computed polygon-offset value, to make
613     * sure it's available in the fragment shader without having to change
614     * the interface (which is error-prone).
615     */
616    coeffs[0] = LLVMBuildInsertElement(b, coeffs[0], polygon_offset,
617                                       lp_build_const_int32(gallivm, 0), "");
618 
619    store_coef(gallivm, args, 0, coeffs[0], coeffs[1], coeffs[2]);
620 }
621 
622 
623 /**
624  * Generate the runtime callable function for the coefficient calculation.
625  *
626  */
627 static struct lp_setup_variant *
generate_setup_variant(struct lp_setup_variant_key * key,struct llvmpipe_context * lp)628 generate_setup_variant(struct lp_setup_variant_key *key,
629                        struct llvmpipe_context *lp)
630 {
631    int64_t t0 = 0, t1;
632 
633    if (0)
634       goto fail;
635 
636    struct lp_setup_variant *variant = CALLOC_STRUCT(lp_setup_variant);
637    if (!variant)
638       goto fail;
639 
640    variant->no = setup_no++;
641 
642    char func_name[64];
643    snprintf(func_name, sizeof(func_name), "setup_variant_%u",
644             variant->no);
645 
646    struct gallivm_state *gallivm;
647    variant->gallivm = gallivm = gallivm_create(func_name, lp->context, NULL);
648    if (!variant->gallivm) {
649       goto fail;
650    }
651 
652    LLVMBuilderRef builder = gallivm->builder;
653 
654    if (LP_DEBUG & DEBUG_COUNTERS) {
655       t0 = os_time_get();
656    }
657 
658    memcpy(&variant->key, key, key->size);
659    variant->list_item_global.base = variant;
660 
661    /* Currently always deal with full 4-wide vertex attributes from
662     * the vertices.
663     */
664 
665    LLVMTypeRef vec4f_type =
666       LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4);
667 
668    LLVMTypeRef arg_types[8];
669    arg_types[0] = LLVMPointerType(vec4f_type, 0);        /* v0 */
670    arg_types[1] = LLVMPointerType(vec4f_type, 0);        /* v1 */
671    arg_types[2] = LLVMPointerType(vec4f_type, 0);        /* v2 */
672    arg_types[3] = LLVMInt32TypeInContext(gallivm->context); /* facing */
673    arg_types[4] = LLVMPointerType(vec4f_type, 0);	/* a0, aligned */
674    arg_types[5] = LLVMPointerType(vec4f_type, 0);	/* dadx, aligned */
675    arg_types[6] = LLVMPointerType(vec4f_type, 0);	/* dady, aligned */
676    arg_types[7] = LLVMPointerType(vec4f_type, 0);	/* key (placeholder) */
677 
678    LLVMTypeRef func_type =
679       LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context),
680                        arg_types, ARRAY_SIZE(arg_types), 0);
681 
682    variant->function = LLVMAddFunction(gallivm->module, func_name, func_type);
683    if (!variant->function)
684       goto fail;
685 
686    LLVMSetFunctionCallConv(variant->function, LLVMCCallConv);
687 
688    struct lp_setup_args args;
689    args.v0       = LLVMGetParam(variant->function, 0);
690    args.v1       = LLVMGetParam(variant->function, 1);
691    args.v2       = LLVMGetParam(variant->function, 2);
692    args.facing   = LLVMGetParam(variant->function, 3);
693    args.a0       = LLVMGetParam(variant->function, 4);
694    args.dadx     = LLVMGetParam(variant->function, 5);
695    args.dady     = LLVMGetParam(variant->function, 6);
696    args.key      = LLVMGetParam(variant->function, 7);
697 
698    lp_build_name(args.v0, "in_v0");
699    lp_build_name(args.v1, "in_v1");
700    lp_build_name(args.v2, "in_v2");
701    lp_build_name(args.facing, "in_facing");
702    lp_build_name(args.a0, "out_a0");
703    lp_build_name(args.dadx, "out_dadx");
704    lp_build_name(args.dady, "out_dady");
705    lp_build_name(args.key, "key");
706 
707    /*
708     * Function body
709     */
710    LLVMBasicBlockRef block =
711       LLVMAppendBasicBlockInContext(gallivm->context,
712                                     variant->function, "entry");
713    LLVMPositionBuilderAtEnd(builder, block);
714 
715    set_noalias(builder, variant->function, arg_types, ARRAY_SIZE(arg_types));
716    init_args(gallivm, &variant->key, &args);
717    emit_tri_coef(gallivm, &variant->key, &args);
718 
719    LLVMBuildRetVoid(builder);
720 
721    gallivm_verify_function(gallivm, variant->function);
722 
723    gallivm_compile_module(gallivm);
724 
725    variant->jit_function = (lp_jit_setup_triangle)
726       gallivm_jit_function(gallivm, variant->function);
727    if (!variant->jit_function)
728       goto fail;
729 
730    gallivm_free_ir(variant->gallivm);
731 
732    /*
733     * Update timing information:
734     */
735    if (LP_DEBUG & DEBUG_COUNTERS) {
736       t1 = os_time_get();
737       LP_COUNT_ADD(llvm_compile_time, t1 - t0);
738       LP_COUNT_ADD(nr_llvm_compiles, 1);
739    }
740 
741    return variant;
742 
743 fail:
744    if (variant) {
745       if (variant->gallivm) {
746          gallivm_destroy(variant->gallivm);
747       }
748       FREE(variant);
749    }
750 
751    return NULL;
752 }
753 
754 
755 static void
lp_make_setup_variant_key(const struct llvmpipe_context * lp,struct lp_setup_variant_key * key)756 lp_make_setup_variant_key(const struct llvmpipe_context *lp,
757                           struct lp_setup_variant_key *key)
758 {
759    const struct lp_fragment_shader *fs = lp->fs;
760 
761    assert(sizeof key->inputs[0] == sizeof(uint));
762 
763    key->num_inputs = fs->info.base.num_inputs;
764    key->flatshade_first = lp->rasterizer->flatshade_first;
765    key->pixel_center_half = lp->rasterizer->half_pixel_center;
766    key->multisample = lp->rasterizer->multisample;
767    key->twoside = lp->rasterizer->light_twoside;
768    key->size = Offset(struct lp_setup_variant_key, inputs[key->num_inputs]);
769 
770    key->color_slot = lp->color_slot[0];
771    key->bcolor_slot = lp->bcolor_slot[0];
772    key->spec_slot = lp->color_slot[1];
773    key->bspec_slot = lp->bcolor_slot[1];
774 
775    /*
776     * If depth is floating point, depth bias is calculated with respect
777     * to the primitive's maximum Z value. Retain the original depth bias
778     * value until that stage.
779     */
780    key->floating_point_depth = lp->floating_point_depth;
781 
782    if (key->floating_point_depth) {
783       key->pgon_offset_units = (float) lp->rasterizer->offset_units;
784    } else {
785       key->pgon_offset_units =
786          (float) (lp->rasterizer->offset_units * lp->mrd * 2);
787    }
788 
789    key->pgon_offset_scale = lp->rasterizer->offset_scale;
790    key->pgon_offset_clamp = lp->rasterizer->offset_clamp;
791    key->uses_constant_interp = 0;
792    key->pad = 0;
793 
794    memcpy(key->inputs, fs->inputs, key->num_inputs * sizeof key->inputs[0]);
795 
796    for (unsigned i = 0; i < key->num_inputs; i++) {
797       if (key->inputs[i].interp == LP_INTERP_COLOR) {
798          if (lp->rasterizer->flatshade)
799             key->inputs[i].interp = LP_INTERP_CONSTANT;
800          else
801             key->inputs[i].interp = LP_INTERP_PERSPECTIVE;
802       }
803       if (key->inputs[i].interp == LP_INTERP_CONSTANT) {
804          key->uses_constant_interp = 1;
805       }
806    }
807 }
808 
809 
810 static void
remove_setup_variant(struct llvmpipe_context * lp,struct lp_setup_variant * variant)811 remove_setup_variant(struct llvmpipe_context *lp,
812                      struct lp_setup_variant *variant)
813 {
814    if (gallivm_debug & GALLIVM_DEBUG_IR) {
815       debug_printf("llvmpipe: del setup_variant #%u total %u\n",
816                    variant->no, lp->nr_setup_variants);
817    }
818 
819    if (variant->gallivm) {
820       gallivm_destroy(variant->gallivm);
821    }
822 
823    list_del(&variant->list_item_global.list);
824    lp->nr_setup_variants--;
825    FREE(variant);
826 }
827 
828 
829 /* When the number of setup variants exceeds a threshold, cull a
830  * fraction (currently a quarter) of them.
831  */
832 static void
cull_setup_variants(struct llvmpipe_context * lp)833 cull_setup_variants(struct llvmpipe_context *lp)
834 {
835    struct pipe_context *pipe = &lp->pipe;
836 
837    /*
838     * XXX: we need to flush the context until we have some sort of reference
839     * counting in fragment shaders as they may still be binned
840     * Flushing alone might not be sufficient we need to wait on it too.
841     */
842    llvmpipe_finish(pipe, __FUNCTION__);
843 
844    for (int i = 0; i < LP_MAX_SETUP_VARIANTS / 4; i++) {
845       struct lp_setup_variant_list_item *item;
846       if (list_is_empty(&lp->setup_variants_list.list)) {
847          break;
848       }
849       item = list_last_entry(&lp->setup_variants_list.list,
850                              struct lp_setup_variant_list_item, list);
851       assert(item);
852       assert(item->base);
853       remove_setup_variant(lp, item->base);
854    }
855 }
856 
857 
858 /**
859  * Update fragment/vertex shader linkage state.  This is called just
860  * prior to drawing something when some fragment-related state has
861  * changed.
862  */
863 void
llvmpipe_update_setup(struct llvmpipe_context * lp)864 llvmpipe_update_setup(struct llvmpipe_context *lp)
865 {
866    struct lp_setup_variant_key *key = &lp->setup_variant.key;
867    struct lp_setup_variant *variant = NULL;
868    struct lp_setup_variant_list_item *li;
869 
870    lp_make_setup_variant_key(lp, key);
871 
872    LIST_FOR_EACH_ENTRY(li, &lp->setup_variants_list.list, list) {
873       if (li->base->key.size == key->size &&
874          memcmp(&li->base->key, key, key->size) == 0) {
875          variant = li->base;
876          break;
877       }
878    }
879 
880    if (variant) {
881       list_move_to(&variant->list_item_global.list, &lp->setup_variants_list.list);
882    } else {
883       if (lp->nr_setup_variants >= LP_MAX_SETUP_VARIANTS) {
884          cull_setup_variants(lp);
885       }
886 
887       variant = generate_setup_variant(key, lp);
888       if (variant) {
889          list_add(&variant->list_item_global.list, &lp->setup_variants_list.list);
890          lp->nr_setup_variants++;
891       }
892    }
893 
894    lp_setup_set_setup_variant(lp->setup, variant);
895 }
896 
897 
898 void
lp_delete_setup_variants(struct llvmpipe_context * lp)899 lp_delete_setup_variants(struct llvmpipe_context *lp)
900 {
901    struct lp_setup_variant_list_item *li, *next;
902    LIST_FOR_EACH_ENTRY_SAFE(li, next, &lp->setup_variants_list.list, list) {
903       remove_setup_variant(lp, li->base);
904    }
905 }
906 
907 
908 void
lp_dump_setup_coef(const struct lp_setup_variant_key * key,const float (* sa0)[4],const float (* sdadx)[4],const float (* sdady)[4])909 lp_dump_setup_coef(const struct lp_setup_variant_key *key,
910                    const float (*sa0)[4],
911                    const float (*sdadx)[4],
912                    const float (*sdady)[4])
913 {
914    for (int i = 0; i < TGSI_NUM_CHANNELS; i++) {
915       float a0   = sa0  [0][i];
916       float dadx = sdadx[0][i];
917       float dady = sdady[0][i];
918 
919       debug_printf("POS.%c: a0 = %f, dadx = %f, dady = %f\n",
920                    "xyzw"[i], a0, dadx, dady);
921    }
922 
923    for (int slot = 0; slot < key->num_inputs; slot++) {
924       unsigned usage_mask = key->inputs[slot].usage_mask;
925       for (int i = 0; i < TGSI_NUM_CHANNELS; i++) {
926          if (usage_mask & (1 << i)) {
927             float a0   = sa0  [1 + slot][i];
928             float dadx = sdadx[1 + slot][i];
929             float dady = sdady[1 + slot][i];
930 
931             debug_printf("IN[%u].%c: a0 = %f, dadx = %f, dady = %f\n",
932                          slot, "xyzw"[i], a0, dadx, dady);
933          }
934       }
935    }
936 }
937