1 /**************************************************************************
2 *
3 * Copyright 2010 VMware.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 #include "util/u_math.h"
30 #include "util/u_memory.h"
31 #include "util/os_time.h"
32 #include "gallivm/lp_bld_arit.h"
33 #include "gallivm/lp_bld_bitarit.h"
34 #include "gallivm/lp_bld_const.h"
35 #include "gallivm/lp_bld_debug.h"
36 #include "gallivm/lp_bld_init.h"
37 #include "gallivm/lp_bld_logic.h"
38 #include "gallivm/lp_bld_intr.h"
39 #include "gallivm/lp_bld_flow.h"
40 #include "gallivm/lp_bld_type.h"
41
42 #include "lp_perf.h"
43 #include "lp_debug.h"
44 #include "lp_flush.h"
45 #include "lp_screen.h"
46 #include "lp_context.h"
47 #include "lp_state.h"
48 #include "lp_state_fs.h"
49 #include "lp_state_setup.h"
50
51
52 /** Setup shader number (for debugging) */
53 static unsigned setup_no = 0;
54
55
56 /* currently organized to interpolate full float[4] attributes even
57 * when some elements are unused. Later, can pack vertex data more
58 * closely.
59 */
60
61
62 struct lp_setup_args
63 {
64 /* Function arguments:
65 */
66 LLVMValueRef v0;
67 LLVMValueRef v1;
68 LLVMValueRef v2;
69 LLVMValueRef facing; /* boolean */
70 LLVMValueRef a0;
71 LLVMValueRef dadx;
72 LLVMValueRef dady;
73 LLVMValueRef key;
74
75 /* Derived:
76 */
77 LLVMValueRef x0_center;
78 LLVMValueRef y0_center;
79 LLVMValueRef dy20_ooa;
80 LLVMValueRef dy01_ooa;
81 LLVMValueRef dx20_ooa;
82 LLVMValueRef dx01_ooa;
83 struct lp_build_context bld;
84 };
85
86
87 static void
store_coef(struct gallivm_state * gallivm,const struct lp_setup_args * args,unsigned slot,LLVMValueRef a0,LLVMValueRef dadx,LLVMValueRef dady)88 store_coef(struct gallivm_state *gallivm,
89 const struct lp_setup_args *args,
90 unsigned slot,
91 LLVMValueRef a0,
92 LLVMValueRef dadx,
93 LLVMValueRef dady)
94 {
95 LLVMBuilderRef builder = gallivm->builder;
96 LLVMValueRef idx = lp_build_const_int32(gallivm, slot);
97
98 LLVMBuildStore(builder,
99 a0,
100 LLVMBuildGEP(builder, args->a0, &idx, 1, ""));
101
102 LLVMBuildStore(builder,
103 dadx,
104 LLVMBuildGEP(builder, args->dadx, &idx, 1, ""));
105
106 LLVMBuildStore(builder,
107 dady,
108 LLVMBuildGEP(builder, args->dady, &idx, 1, ""));
109 }
110
111
112 static void
emit_constant_coef4(struct gallivm_state * gallivm,const struct lp_setup_args * args,unsigned slot,LLVMValueRef vert)113 emit_constant_coef4(struct gallivm_state *gallivm,
114 const struct lp_setup_args *args,
115 unsigned slot,
116 LLVMValueRef vert)
117 {
118 store_coef(gallivm, args, slot, vert, args->bld.zero, args->bld.zero);
119 }
120
121
122 /**
123 * Setup the fragment input attribute with the front-facing value.
124 * \param frontface is the triangle front facing?
125 */
126 static void
emit_facing_coef(struct gallivm_state * gallivm,struct lp_setup_args * args,unsigned slot)127 emit_facing_coef(struct gallivm_state *gallivm,
128 struct lp_setup_args *args,
129 unsigned slot)
130 {
131 LLVMBuilderRef builder = gallivm->builder;
132 LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
133 LLVMValueRef a0_0 = args->facing;
134 LLVMValueRef a0_0f = LLVMBuildSIToFP(builder, a0_0, float_type, "");
135 LLVMValueRef a0, face_val;
136 const unsigned char swizzles[4] = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_0,
137 PIPE_SWIZZLE_0, PIPE_SWIZZLE_0 };
138 /* Our face val is either 1 or 0 so we do
139 * face = (val * 2) - 1
140 * to make it 1 or -1
141 */
142 face_val =
143 LLVMBuildFAdd(builder,
144 LLVMBuildFMul(builder, a0_0f,
145 lp_build_const_float(gallivm, 2.0),
146 ""),
147 lp_build_const_float(gallivm, -1.0),
148 "facing");
149 face_val = lp_build_broadcast_scalar(&args->bld, face_val);
150 a0 = lp_build_swizzle_aos(&args->bld, face_val, swizzles);
151
152 store_coef(gallivm, args, slot, a0, args->bld.zero, args->bld.zero);
153 }
154
155
156 static LLVMValueRef
vert_attrib(struct gallivm_state * gallivm,LLVMValueRef vert,int attr,int elem,const char * name)157 vert_attrib(struct gallivm_state *gallivm,
158 LLVMValueRef vert,
159 int attr,
160 int elem,
161 const char *name)
162 {
163 LLVMBuilderRef b = gallivm->builder;
164 LLVMValueRef idx[2];
165 idx[0] = lp_build_const_int32(gallivm, attr);
166 idx[1] = lp_build_const_int32(gallivm, elem);
167 return LLVMBuildLoad(b, LLVMBuildGEP(b, vert, idx, 2, ""), name);
168 }
169
170
171 static void
lp_twoside(struct gallivm_state * gallivm,struct lp_setup_args * args,const struct lp_setup_variant_key * key,int bcolor_slot,LLVMValueRef attribv[3])172 lp_twoside(struct gallivm_state *gallivm,
173 struct lp_setup_args *args,
174 const struct lp_setup_variant_key *key,
175 int bcolor_slot,
176 LLVMValueRef attribv[3])
177 {
178 LLVMBuilderRef b = gallivm->builder;
179 LLVMValueRef a0_back, a1_back, a2_back;
180 LLVMValueRef idx2 = lp_build_const_int32(gallivm, bcolor_slot);
181
182 LLVMValueRef facing = args->facing;
183 LLVMValueRef front_facing = LLVMBuildICmp(b, LLVMIntEQ, facing,
184 lp_build_const_int32(gallivm, 0), ""); /** need i1 for if condition */
185
186 a0_back = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx2, 1, ""), "v0a_back");
187 a1_back = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx2, 1, ""), "v1a_back");
188 a2_back = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx2, 1, ""), "v2a_back");
189
190 /* Possibly swap the front and back attrib values,
191 *
192 * Prefer select to if so we don't have to worry about phis or
193 * allocas.
194 */
195 attribv[0] = LLVMBuildSelect(b, front_facing, a0_back, attribv[0], "");
196 attribv[1] = LLVMBuildSelect(b, front_facing, a1_back, attribv[1], "");
197 attribv[2] = LLVMBuildSelect(b, front_facing, a2_back, attribv[2], "");
198 }
199
200
201 static LLVMValueRef
lp_do_offset_tri(struct gallivm_state * gallivm,struct lp_setup_args * args,const struct lp_setup_variant_key * key,LLVMValueRef inv_det,LLVMValueRef dxyz01,LLVMValueRef dxyz20,LLVMValueRef attribv[3])202 lp_do_offset_tri(struct gallivm_state *gallivm,
203 struct lp_setup_args *args,
204 const struct lp_setup_variant_key *key,
205 LLVMValueRef inv_det,
206 LLVMValueRef dxyz01,
207 LLVMValueRef dxyz20,
208 LLVMValueRef attribv[3])
209 {
210 LLVMBuilderRef b = gallivm->builder;
211 struct lp_build_context flt_scalar_bld;
212 struct lp_build_context int_scalar_bld;
213 struct lp_build_context *bld = &args->bld;
214 LLVMValueRef zoffset, mult;
215 LLVMValueRef dzdxdzdy, dzdx, dzdy, dzxyz20, dyzzx01, dyzzx01_dzxyz20, dzx01_dyz20;
216 LLVMValueRef max, max_value, res12;
217 LLVMValueRef shuffles[4];
218 LLVMTypeRef shuf_type = LLVMInt32TypeInContext(gallivm->context);
219 LLVMValueRef onei = lp_build_const_int32(gallivm, 1);
220 LLVMValueRef zeroi = lp_build_const_int32(gallivm, 0);
221 LLVMValueRef twoi = lp_build_const_int32(gallivm, 2);
222 LLVMValueRef threei = lp_build_const_int32(gallivm, 3);
223
224 /* (res12) = cross(e,f).xy */
225 shuffles[0] = twoi;
226 shuffles[1] = zeroi;
227 shuffles[2] = onei;
228 shuffles[3] = twoi;
229 dzxyz20 = LLVMBuildShuffleVector(b, dxyz20, dxyz20, LLVMConstVector(shuffles, 4), "");
230
231 shuffles[0] = onei;
232 shuffles[1] = twoi;
233 shuffles[2] = twoi;
234 shuffles[3] = zeroi;
235 dyzzx01 = LLVMBuildShuffleVector(b, dxyz01, dxyz01, LLVMConstVector(shuffles, 4), "");
236
237 dyzzx01_dzxyz20 = LLVMBuildFMul(b, dzxyz20, dyzzx01, "dyzzx01_dzxyz20");
238
239 shuffles[0] = twoi;
240 shuffles[1] = threei;
241 shuffles[2] = LLVMGetUndef(shuf_type);
242 shuffles[3] = LLVMGetUndef(shuf_type);
243 dzx01_dyz20 = LLVMBuildShuffleVector(b, dyzzx01_dzxyz20, dyzzx01_dzxyz20,
244 LLVMConstVector(shuffles, 4), "");
245
246 res12 = LLVMBuildFSub(b, dyzzx01_dzxyz20, dzx01_dyz20, "res12");
247
248 /* dzdx = fabsf(res1 * inv_det), dydx = fabsf(res2 * inv_det)*/
249 dzdxdzdy = LLVMBuildFMul(b, res12, inv_det, "dzdxdzdy");
250 dzdxdzdy = lp_build_abs(bld, dzdxdzdy);
251
252 dzdx = LLVMBuildExtractElement(b, dzdxdzdy, zeroi, "");
253 dzdy = LLVMBuildExtractElement(b, dzdxdzdy, onei, "");
254
255 /* mult = MAX2(dzdx, dzdy) * pgon_offset_scale */
256 max = LLVMBuildFCmp(b, LLVMRealUGT, dzdx, dzdy, "");
257 max_value = LLVMBuildSelect(b, max, dzdx, dzdy, "max");
258
259 mult = LLVMBuildFMul(b, max_value,
260 lp_build_const_float(gallivm,
261 key->pgon_offset_scale), "");
262
263 lp_build_context_init(&flt_scalar_bld, gallivm, lp_type_float_vec(32, 32));
264
265 if (key->floating_point_depth) {
266 /*
267 * bias = pgon_offset_units * 2^(exponent(max(abs(z0), abs(z1), abs(z2))) -
268 * mantissa_bits) + MAX2(dzdx, dzdy) * pgon_offset_scale
269 *
270 * NOTE: Assumes IEEE float32.
271 */
272 LLVMValueRef c23_shifted, exp_mask, bias, exp;
273 LLVMValueRef maxz_value, maxz0z1_value;
274
275 lp_build_context_init(&int_scalar_bld, gallivm, lp_type_int_vec(32, 32));
276
277 c23_shifted = lp_build_const_int32(gallivm, 23 << 23);
278 exp_mask = lp_build_const_int32(gallivm, 0xff << 23);
279
280 maxz0z1_value = lp_build_max(&flt_scalar_bld,
281 lp_build_abs(&flt_scalar_bld,
282 LLVMBuildExtractElement(b, attribv[0], twoi, "")),
283 lp_build_abs(&flt_scalar_bld,
284 LLVMBuildExtractElement(b, attribv[1], twoi, "")));
285
286 maxz_value = lp_build_max(&flt_scalar_bld,
287 lp_build_abs(&flt_scalar_bld,
288 LLVMBuildExtractElement(b, attribv[2], twoi, "")),
289 maxz0z1_value);
290
291 exp = LLVMBuildBitCast(b, maxz_value, int_scalar_bld.vec_type, "");
292 exp = lp_build_and(&int_scalar_bld, exp, exp_mask);
293 exp = lp_build_sub(&int_scalar_bld, exp, c23_shifted);
294 /* Clamping to zero means mrd will be zero for very small numbers,
295 * but specs do not indicate this should be prevented by clamping
296 * mrd to smallest normal number instead. */
297 exp = lp_build_max(&int_scalar_bld, exp, int_scalar_bld.zero);
298 exp = LLVMBuildBitCast(b, exp, flt_scalar_bld.vec_type, "");
299
300 bias = LLVMBuildFMul(b, exp,
301 lp_build_const_float(gallivm, key->pgon_offset_units),
302 "bias");
303
304 zoffset = LLVMBuildFAdd(b, bias, mult, "zoffset");
305 } else {
306 /*
307 * bias = pgon_offset_units + MAX2(dzdx, dzdy) * pgon_offset_scale
308 */
309 zoffset = LLVMBuildFAdd(b,
310 lp_build_const_float(gallivm, key->pgon_offset_units),
311 mult, "zoffset");
312 }
313
314 if (key->pgon_offset_clamp > 0) {
315 zoffset = lp_build_min(&flt_scalar_bld,
316 lp_build_const_float(gallivm, key->pgon_offset_clamp),
317 zoffset);
318 } else if (key->pgon_offset_clamp < 0) {
319 zoffset = lp_build_max(&flt_scalar_bld,
320 lp_build_const_float(gallivm, key->pgon_offset_clamp),
321 zoffset);
322 }
323
324 return zoffset;
325 }
326
327
328 static void
load_attribute(struct gallivm_state * gallivm,struct lp_setup_args * args,const struct lp_setup_variant_key * key,unsigned vert_attr,LLVMValueRef attribv[3])329 load_attribute(struct gallivm_state *gallivm,
330 struct lp_setup_args *args,
331 const struct lp_setup_variant_key *key,
332 unsigned vert_attr,
333 LLVMValueRef attribv[3])
334 {
335 LLVMBuilderRef b = gallivm->builder;
336 LLVMValueRef idx = lp_build_const_int32(gallivm, vert_attr);
337
338 /* Load the vertex data
339 */
340 attribv[0] = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx, 1, ""), "v0a");
341 attribv[1] = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx, 1, ""), "v1a");
342 attribv[2] = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx, 1, ""), "v2a");
343
344 /* Potentially modify it according to twoside, etc:
345 */
346 if (key->twoside) {
347 if (vert_attr == key->color_slot && key->bcolor_slot >= 0)
348 lp_twoside(gallivm, args, key, key->bcolor_slot, attribv);
349 else if (vert_attr == key->spec_slot && key->bspec_slot >= 0)
350 lp_twoside(gallivm, args, key, key->bspec_slot, attribv);
351 }
352 }
353
354
355 /*
356 * FIXME: interpolation is always done wrt fb origin (0/0).
357 * However, if some (small) tri is far away from the origin and gradients
358 * are large, this can lead to HUGE errors, since the a0 value calculated
359 * here can get very large (with the actual values inside the triangle way
360 * smaller), leading to complete loss of accuracy. This could be prevented
361 * by using some point inside (or at corner) of the tri as interpolation
362 * origin, or just use barycentric interpolation (which GL suggests and is
363 * what real hw does - you can get the barycentric coordinates from the
364 * edge functions in rasterization in principle (though we skip these
365 * sometimes completely in case of tris covering a block fully,
366 * which obviously wouldn't work)).
367 */
368 static void
calc_coef4(struct gallivm_state * gallivm,struct lp_setup_args * args,LLVMValueRef a0,LLVMValueRef a1,LLVMValueRef a2,LLVMValueRef out[3])369 calc_coef4(struct gallivm_state *gallivm,
370 struct lp_setup_args *args,
371 LLVMValueRef a0,
372 LLVMValueRef a1,
373 LLVMValueRef a2,
374 LLVMValueRef out[3])
375 {
376 LLVMBuilderRef b = gallivm->builder;
377 LLVMValueRef attr_0;
378 LLVMValueRef dy20_ooa = args->dy20_ooa;
379 LLVMValueRef dy01_ooa = args->dy01_ooa;
380 LLVMValueRef dx20_ooa = args->dx20_ooa;
381 LLVMValueRef dx01_ooa = args->dx01_ooa;
382 LLVMValueRef x0_center = args->x0_center;
383 LLVMValueRef y0_center = args->y0_center;
384 LLVMValueRef da01 = LLVMBuildFSub(b, a0, a1, "da01");
385 LLVMValueRef da20 = LLVMBuildFSub(b, a2, a0, "da20");
386
387 /* Calculate dadx (vec4f)
388 */
389 LLVMValueRef da01_dy20_ooa = LLVMBuildFMul(b, da01, dy20_ooa, "da01_dy20_ooa");
390 LLVMValueRef da20_dy01_ooa = LLVMBuildFMul(b, da20, dy01_ooa, "da20_dy01_ooa");
391 LLVMValueRef dadx = LLVMBuildFSub(b, da01_dy20_ooa, da20_dy01_ooa, "dadx");
392
393 /* Calculate dady (vec4f)
394 */
395 LLVMValueRef da01_dx20_ooa = LLVMBuildFMul(b, da01, dx20_ooa, "da01_dx20_ooa");
396 LLVMValueRef da20_dx01_ooa = LLVMBuildFMul(b, da20, dx01_ooa, "da20_dx01_ooa");
397 LLVMValueRef dady = LLVMBuildFSub(b, da20_dx01_ooa, da01_dx20_ooa, "dady");
398
399 /* Calculate a0 - the attribute value at the origin
400 */
401 LLVMValueRef dadx_x0 = LLVMBuildFMul(b, dadx, x0_center, "dadx_x0");
402 LLVMValueRef dady_y0 = LLVMBuildFMul(b, dady, y0_center, "dady_y0");
403 LLVMValueRef attr_v0 = LLVMBuildFAdd(b, dadx_x0, dady_y0, "attr_v0");
404 attr_0 = LLVMBuildFSub(b, a0, attr_v0, "attr_0");
405
406 out[0] = attr_0;
407 out[1] = dadx;
408 out[2] = dady;
409 }
410
411
412 static void
emit_coef4(struct gallivm_state * gallivm,struct lp_setup_args * args,unsigned slot,LLVMValueRef a0,LLVMValueRef a1,LLVMValueRef a2)413 emit_coef4(struct gallivm_state *gallivm,
414 struct lp_setup_args *args,
415 unsigned slot,
416 LLVMValueRef a0,
417 LLVMValueRef a1,
418 LLVMValueRef a2)
419 {
420 LLVMValueRef coeffs[3];
421 calc_coef4(gallivm, args, a0, a1, a2, coeffs);
422 store_coef(gallivm, args, slot, coeffs[0], coeffs[1], coeffs[2]);
423 }
424
425
426 static void
emit_linear_coef(struct gallivm_state * gallivm,struct lp_setup_args * args,unsigned slot,LLVMValueRef attribv[3])427 emit_linear_coef(struct gallivm_state *gallivm,
428 struct lp_setup_args *args,
429 unsigned slot,
430 LLVMValueRef attribv[3])
431 {
432 /* nothing to do anymore */
433 emit_coef4(gallivm, args, slot, attribv[0], attribv[1], attribv[2]);
434 }
435
436
437 /**
438 * Compute a0, dadx and dady for a perspective-corrected interpolant,
439 * for a triangle.
440 * We basically multiply the vertex value by 1/w before computing
441 * the plane coefficients (a0, dadx, dady).
442 * Later, when we compute the value at a particular fragment position we'll
443 * divide the interpolated value by the interpolated W at that fragment.
444 */
445 static void
apply_perspective_corr(struct gallivm_state * gallivm,struct lp_setup_args * args,unsigned slot,LLVMValueRef attribv[3])446 apply_perspective_corr(struct gallivm_state *gallivm,
447 struct lp_setup_args *args,
448 unsigned slot,
449 LLVMValueRef attribv[3])
450 {
451 LLVMBuilderRef b = gallivm->builder;
452
453 /* premultiply by 1/w (v[0][3] is always 1/w):
454 */
455 LLVMValueRef v0_oow = lp_build_broadcast_scalar(&args->bld,
456 vert_attrib(gallivm, args->v0, 0, 3, "v0_oow"));
457 LLVMValueRef v1_oow = lp_build_broadcast_scalar(&args->bld,
458 vert_attrib(gallivm, args->v1, 0, 3, "v1_oow"));
459 LLVMValueRef v2_oow = lp_build_broadcast_scalar(&args->bld,
460 vert_attrib(gallivm, args->v2, 0, 3, "v2_oow"));
461
462 attribv[0] = LLVMBuildFMul(b, attribv[0], v0_oow, "v0_oow_v0a");
463 attribv[1] = LLVMBuildFMul(b, attribv[1], v1_oow, "v1_oow_v1a");
464 attribv[2] = LLVMBuildFMul(b, attribv[2], v2_oow, "v2_oow_v2a");
465 }
466
467
468 /**
469 * Compute the inputs-> dadx, dady, a0 values.
470 */
471 static void
emit_tri_coef(struct gallivm_state * gallivm,const struct lp_setup_variant_key * key,struct lp_setup_args * args)472 emit_tri_coef(struct gallivm_state *gallivm,
473 const struct lp_setup_variant_key *key,
474 struct lp_setup_args *args)
475 {
476 LLVMValueRef attribs[3];
477
478 /* setup interpolation for all the remaining attributes */
479 for (unsigned slot = 0; slot < key->num_inputs; slot++) {
480 switch (key->inputs[slot].interp) {
481 case LP_INTERP_CONSTANT:
482 load_attribute(gallivm, args, key, key->inputs[slot].src_index, attribs);
483 if (key->flatshade_first) {
484 emit_constant_coef4(gallivm, args, slot+1, attribs[0]);
485 } else {
486 emit_constant_coef4(gallivm, args, slot+1, attribs[2]);
487 }
488 break;
489
490 case LP_INTERP_LINEAR:
491 load_attribute(gallivm, args, key, key->inputs[slot].src_index, attribs);
492 emit_linear_coef(gallivm, args, slot+1, attribs);
493 break;
494
495 case LP_INTERP_PERSPECTIVE:
496 load_attribute(gallivm, args, key, key->inputs[slot].src_index, attribs);
497 apply_perspective_corr(gallivm, args, slot+1, attribs);
498 emit_linear_coef(gallivm, args, slot+1, attribs);
499 break;
500
501 case LP_INTERP_POSITION:
502 /*
503 * The generated pixel interpolators will pick up the coeffs from
504 * slot 0.
505 */
506 break;
507
508 case LP_INTERP_FACING:
509 emit_facing_coef(gallivm, args, slot+1);
510 break;
511
512 default:
513 assert(0);
514 }
515 }
516 }
517
518
519 /* XXX: generic code:
520 */
521 static void
set_noalias(LLVMBuilderRef builder,LLVMValueRef function,const LLVMTypeRef * arg_types,int nr_args)522 set_noalias(LLVMBuilderRef builder,
523 LLVMValueRef function,
524 const LLVMTypeRef *arg_types,
525 int nr_args)
526 {
527 for (int i = 0; i < nr_args; ++i) {
528 if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) {
529 lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS);
530 }
531 }
532 }
533
534
535 static void
init_args(struct gallivm_state * gallivm,const struct lp_setup_variant_key * key,struct lp_setup_args * args)536 init_args(struct gallivm_state *gallivm,
537 const struct lp_setup_variant_key *key,
538 struct lp_setup_args *args)
539 {
540 LLVMBuilderRef b = gallivm->builder;
541 LLVMTypeRef shuf_type = LLVMInt32TypeInContext(gallivm->context);
542 LLVMValueRef onef = lp_build_const_float(gallivm, 1.0);
543 LLVMValueRef onei = lp_build_const_int32(gallivm, 1);
544 LLVMValueRef zeroi = lp_build_const_int32(gallivm, 0);
545 LLVMValueRef pixel_center, xy0_center, dxy01, dxy20, dyx20;
546 LLVMValueRef e, f, ef, ooa;
547 LLVMValueRef shuffles[4], shuf10;
548 LLVMValueRef attr_pos[3];
549 LLVMValueRef polygon_offset;
550 struct lp_type typef4 = lp_type_float_vec(32, 128);
551 struct lp_build_context bld;
552
553 lp_build_context_init(&bld, gallivm, typef4);
554 args->bld = bld;
555
556 /* The internal position input is in slot zero:
557 */
558 load_attribute(gallivm, args, key, 0, attr_pos);
559
560 pixel_center = lp_build_const_vec(gallivm, typef4,
561 (!key->multisample && key->pixel_center_half) ? 0.5 : 0.0);
562
563 /*
564 * xy are first two elems in v0a/v1a/v2a but just use vec4 arit
565 * also offset_tri uses actually xyz in them
566 */
567 xy0_center = LLVMBuildFSub(b, attr_pos[0], pixel_center, "xy0_center" );
568
569 dxy01 = LLVMBuildFSub(b, attr_pos[0], attr_pos[1], "dxy01");
570 dxy20 = LLVMBuildFSub(b, attr_pos[2], attr_pos[0], "dxy20");
571
572 shuffles[0] = onei;
573 shuffles[1] = zeroi;
574 shuffles[2] = LLVMGetUndef(shuf_type);
575 shuffles[3] = LLVMGetUndef(shuf_type);
576 shuf10 = LLVMConstVector(shuffles, 4);
577
578 dyx20 = LLVMBuildShuffleVector(b, dxy20, dxy20, shuf10, "");
579
580 ef = LLVMBuildFMul(b, dxy01, dyx20, "ef");
581 e = LLVMBuildExtractElement(b, ef, zeroi, "");
582 f = LLVMBuildExtractElement(b, ef, onei, "");
583
584 ooa = LLVMBuildFDiv(b, onef, LLVMBuildFSub(b, e, f, ""), "ooa");
585
586 ooa = lp_build_broadcast_scalar(&bld, ooa);
587
588 /* tri offset calc shares a lot of arithmetic, do it here */
589 if (key->pgon_offset_scale != 0.0f || key->pgon_offset_units != 0.0f) {
590 polygon_offset = lp_do_offset_tri(gallivm, args, key, ooa, dxy01, dxy20, attr_pos);
591 } else {
592 polygon_offset = lp_build_const_float(gallivm, 0.0f);
593 }
594
595 dxy20 = LLVMBuildFMul(b, dxy20, ooa, "");
596 dxy01 = LLVMBuildFMul(b, dxy01, ooa, "");
597
598 args->dy20_ooa = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy20, onei);
599 args->dy01_ooa = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy01, onei);
600
601 args->dx20_ooa = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy20, zeroi);
602 args->dx01_ooa = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy01, zeroi);
603
604 args->x0_center = lp_build_extract_broadcast(gallivm, typef4, typef4, xy0_center, zeroi);
605 args->y0_center = lp_build_extract_broadcast(gallivm, typef4, typef4, xy0_center, onei);
606
607 LLVMValueRef coeffs[3];
608 calc_coef4(gallivm, args, attr_pos[0], attr_pos[1], attr_pos[2], coeffs);
609
610 /* This is a bit sneaky:
611 * Because we observe that the X component of A0 is otherwise unused,
612 * we can overwrite it with the computed polygon-offset value, to make
613 * sure it's available in the fragment shader without having to change
614 * the interface (which is error-prone).
615 */
616 coeffs[0] = LLVMBuildInsertElement(b, coeffs[0], polygon_offset,
617 lp_build_const_int32(gallivm, 0), "");
618
619 store_coef(gallivm, args, 0, coeffs[0], coeffs[1], coeffs[2]);
620 }
621
622
623 /**
624 * Generate the runtime callable function for the coefficient calculation.
625 *
626 */
627 static struct lp_setup_variant *
generate_setup_variant(struct lp_setup_variant_key * key,struct llvmpipe_context * lp)628 generate_setup_variant(struct lp_setup_variant_key *key,
629 struct llvmpipe_context *lp)
630 {
631 int64_t t0 = 0, t1;
632
633 if (0)
634 goto fail;
635
636 struct lp_setup_variant *variant = CALLOC_STRUCT(lp_setup_variant);
637 if (!variant)
638 goto fail;
639
640 variant->no = setup_no++;
641
642 char func_name[64];
643 snprintf(func_name, sizeof(func_name), "setup_variant_%u",
644 variant->no);
645
646 struct gallivm_state *gallivm;
647 variant->gallivm = gallivm = gallivm_create(func_name, lp->context, NULL);
648 if (!variant->gallivm) {
649 goto fail;
650 }
651
652 LLVMBuilderRef builder = gallivm->builder;
653
654 if (LP_DEBUG & DEBUG_COUNTERS) {
655 t0 = os_time_get();
656 }
657
658 memcpy(&variant->key, key, key->size);
659 variant->list_item_global.base = variant;
660
661 /* Currently always deal with full 4-wide vertex attributes from
662 * the vertices.
663 */
664
665 LLVMTypeRef vec4f_type =
666 LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4);
667
668 LLVMTypeRef arg_types[8];
669 arg_types[0] = LLVMPointerType(vec4f_type, 0); /* v0 */
670 arg_types[1] = LLVMPointerType(vec4f_type, 0); /* v1 */
671 arg_types[2] = LLVMPointerType(vec4f_type, 0); /* v2 */
672 arg_types[3] = LLVMInt32TypeInContext(gallivm->context); /* facing */
673 arg_types[4] = LLVMPointerType(vec4f_type, 0); /* a0, aligned */
674 arg_types[5] = LLVMPointerType(vec4f_type, 0); /* dadx, aligned */
675 arg_types[6] = LLVMPointerType(vec4f_type, 0); /* dady, aligned */
676 arg_types[7] = LLVMPointerType(vec4f_type, 0); /* key (placeholder) */
677
678 LLVMTypeRef func_type =
679 LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context),
680 arg_types, ARRAY_SIZE(arg_types), 0);
681
682 variant->function = LLVMAddFunction(gallivm->module, func_name, func_type);
683 if (!variant->function)
684 goto fail;
685
686 LLVMSetFunctionCallConv(variant->function, LLVMCCallConv);
687
688 struct lp_setup_args args;
689 args.v0 = LLVMGetParam(variant->function, 0);
690 args.v1 = LLVMGetParam(variant->function, 1);
691 args.v2 = LLVMGetParam(variant->function, 2);
692 args.facing = LLVMGetParam(variant->function, 3);
693 args.a0 = LLVMGetParam(variant->function, 4);
694 args.dadx = LLVMGetParam(variant->function, 5);
695 args.dady = LLVMGetParam(variant->function, 6);
696 args.key = LLVMGetParam(variant->function, 7);
697
698 lp_build_name(args.v0, "in_v0");
699 lp_build_name(args.v1, "in_v1");
700 lp_build_name(args.v2, "in_v2");
701 lp_build_name(args.facing, "in_facing");
702 lp_build_name(args.a0, "out_a0");
703 lp_build_name(args.dadx, "out_dadx");
704 lp_build_name(args.dady, "out_dady");
705 lp_build_name(args.key, "key");
706
707 /*
708 * Function body
709 */
710 LLVMBasicBlockRef block =
711 LLVMAppendBasicBlockInContext(gallivm->context,
712 variant->function, "entry");
713 LLVMPositionBuilderAtEnd(builder, block);
714
715 set_noalias(builder, variant->function, arg_types, ARRAY_SIZE(arg_types));
716 init_args(gallivm, &variant->key, &args);
717 emit_tri_coef(gallivm, &variant->key, &args);
718
719 LLVMBuildRetVoid(builder);
720
721 gallivm_verify_function(gallivm, variant->function);
722
723 gallivm_compile_module(gallivm);
724
725 variant->jit_function = (lp_jit_setup_triangle)
726 gallivm_jit_function(gallivm, variant->function);
727 if (!variant->jit_function)
728 goto fail;
729
730 gallivm_free_ir(variant->gallivm);
731
732 /*
733 * Update timing information:
734 */
735 if (LP_DEBUG & DEBUG_COUNTERS) {
736 t1 = os_time_get();
737 LP_COUNT_ADD(llvm_compile_time, t1 - t0);
738 LP_COUNT_ADD(nr_llvm_compiles, 1);
739 }
740
741 return variant;
742
743 fail:
744 if (variant) {
745 if (variant->gallivm) {
746 gallivm_destroy(variant->gallivm);
747 }
748 FREE(variant);
749 }
750
751 return NULL;
752 }
753
754
755 static void
lp_make_setup_variant_key(const struct llvmpipe_context * lp,struct lp_setup_variant_key * key)756 lp_make_setup_variant_key(const struct llvmpipe_context *lp,
757 struct lp_setup_variant_key *key)
758 {
759 const struct lp_fragment_shader *fs = lp->fs;
760
761 assert(sizeof key->inputs[0] == sizeof(uint));
762
763 key->num_inputs = fs->info.base.num_inputs;
764 key->flatshade_first = lp->rasterizer->flatshade_first;
765 key->pixel_center_half = lp->rasterizer->half_pixel_center;
766 key->multisample = lp->rasterizer->multisample;
767 key->twoside = lp->rasterizer->light_twoside;
768 key->size = Offset(struct lp_setup_variant_key, inputs[key->num_inputs]);
769
770 key->color_slot = lp->color_slot[0];
771 key->bcolor_slot = lp->bcolor_slot[0];
772 key->spec_slot = lp->color_slot[1];
773 key->bspec_slot = lp->bcolor_slot[1];
774
775 /*
776 * If depth is floating point, depth bias is calculated with respect
777 * to the primitive's maximum Z value. Retain the original depth bias
778 * value until that stage.
779 */
780 key->floating_point_depth = lp->floating_point_depth;
781
782 if (key->floating_point_depth) {
783 key->pgon_offset_units = (float) lp->rasterizer->offset_units;
784 } else {
785 key->pgon_offset_units =
786 (float) (lp->rasterizer->offset_units * lp->mrd * 2);
787 }
788
789 key->pgon_offset_scale = lp->rasterizer->offset_scale;
790 key->pgon_offset_clamp = lp->rasterizer->offset_clamp;
791 key->uses_constant_interp = 0;
792 key->pad = 0;
793
794 memcpy(key->inputs, fs->inputs, key->num_inputs * sizeof key->inputs[0]);
795
796 for (unsigned i = 0; i < key->num_inputs; i++) {
797 if (key->inputs[i].interp == LP_INTERP_COLOR) {
798 if (lp->rasterizer->flatshade)
799 key->inputs[i].interp = LP_INTERP_CONSTANT;
800 else
801 key->inputs[i].interp = LP_INTERP_PERSPECTIVE;
802 }
803 if (key->inputs[i].interp == LP_INTERP_CONSTANT) {
804 key->uses_constant_interp = 1;
805 }
806 }
807 }
808
809
810 static void
remove_setup_variant(struct llvmpipe_context * lp,struct lp_setup_variant * variant)811 remove_setup_variant(struct llvmpipe_context *lp,
812 struct lp_setup_variant *variant)
813 {
814 if (gallivm_debug & GALLIVM_DEBUG_IR) {
815 debug_printf("llvmpipe: del setup_variant #%u total %u\n",
816 variant->no, lp->nr_setup_variants);
817 }
818
819 if (variant->gallivm) {
820 gallivm_destroy(variant->gallivm);
821 }
822
823 list_del(&variant->list_item_global.list);
824 lp->nr_setup_variants--;
825 FREE(variant);
826 }
827
828
829 /* When the number of setup variants exceeds a threshold, cull a
830 * fraction (currently a quarter) of them.
831 */
832 static void
cull_setup_variants(struct llvmpipe_context * lp)833 cull_setup_variants(struct llvmpipe_context *lp)
834 {
835 struct pipe_context *pipe = &lp->pipe;
836
837 /*
838 * XXX: we need to flush the context until we have some sort of reference
839 * counting in fragment shaders as they may still be binned
840 * Flushing alone might not be sufficient we need to wait on it too.
841 */
842 llvmpipe_finish(pipe, __FUNCTION__);
843
844 for (int i = 0; i < LP_MAX_SETUP_VARIANTS / 4; i++) {
845 struct lp_setup_variant_list_item *item;
846 if (list_is_empty(&lp->setup_variants_list.list)) {
847 break;
848 }
849 item = list_last_entry(&lp->setup_variants_list.list,
850 struct lp_setup_variant_list_item, list);
851 assert(item);
852 assert(item->base);
853 remove_setup_variant(lp, item->base);
854 }
855 }
856
857
858 /**
859 * Update fragment/vertex shader linkage state. This is called just
860 * prior to drawing something when some fragment-related state has
861 * changed.
862 */
863 void
llvmpipe_update_setup(struct llvmpipe_context * lp)864 llvmpipe_update_setup(struct llvmpipe_context *lp)
865 {
866 struct lp_setup_variant_key *key = &lp->setup_variant.key;
867 struct lp_setup_variant *variant = NULL;
868 struct lp_setup_variant_list_item *li;
869
870 lp_make_setup_variant_key(lp, key);
871
872 LIST_FOR_EACH_ENTRY(li, &lp->setup_variants_list.list, list) {
873 if (li->base->key.size == key->size &&
874 memcmp(&li->base->key, key, key->size) == 0) {
875 variant = li->base;
876 break;
877 }
878 }
879
880 if (variant) {
881 list_move_to(&variant->list_item_global.list, &lp->setup_variants_list.list);
882 } else {
883 if (lp->nr_setup_variants >= LP_MAX_SETUP_VARIANTS) {
884 cull_setup_variants(lp);
885 }
886
887 variant = generate_setup_variant(key, lp);
888 if (variant) {
889 list_add(&variant->list_item_global.list, &lp->setup_variants_list.list);
890 lp->nr_setup_variants++;
891 }
892 }
893
894 lp_setup_set_setup_variant(lp->setup, variant);
895 }
896
897
898 void
lp_delete_setup_variants(struct llvmpipe_context * lp)899 lp_delete_setup_variants(struct llvmpipe_context *lp)
900 {
901 struct lp_setup_variant_list_item *li, *next;
902 LIST_FOR_EACH_ENTRY_SAFE(li, next, &lp->setup_variants_list.list, list) {
903 remove_setup_variant(lp, li->base);
904 }
905 }
906
907
908 void
lp_dump_setup_coef(const struct lp_setup_variant_key * key,const float (* sa0)[4],const float (* sdadx)[4],const float (* sdady)[4])909 lp_dump_setup_coef(const struct lp_setup_variant_key *key,
910 const float (*sa0)[4],
911 const float (*sdadx)[4],
912 const float (*sdady)[4])
913 {
914 for (int i = 0; i < TGSI_NUM_CHANNELS; i++) {
915 float a0 = sa0 [0][i];
916 float dadx = sdadx[0][i];
917 float dady = sdady[0][i];
918
919 debug_printf("POS.%c: a0 = %f, dadx = %f, dady = %f\n",
920 "xyzw"[i], a0, dadx, dady);
921 }
922
923 for (int slot = 0; slot < key->num_inputs; slot++) {
924 unsigned usage_mask = key->inputs[slot].usage_mask;
925 for (int i = 0; i < TGSI_NUM_CHANNELS; i++) {
926 if (usage_mask & (1 << i)) {
927 float a0 = sa0 [1 + slot][i];
928 float dadx = sdadx[1 + slot][i];
929 float dady = sdady[1 + slot][i];
930
931 debug_printf("IN[%u].%c: a0 = %f, dadx = %f, dady = %f\n",
932 slot, "xyzw"[i], a0, dadx, dady);
933 }
934 }
935 }
936 }
937