1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * Texture sampling -- common code.
31 *
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 */
34
35 #include "pipe/p_defines.h"
36 #include "pipe/p_state.h"
37 #include "util/format/u_format.h"
38 #include "util/u_math.h"
39 #include "util/u_cpu_detect.h"
40 #include "lp_bld_arit.h"
41 #include "lp_bld_const.h"
42 #include "lp_bld_debug.h"
43 #include "lp_bld_printf.h"
44 #include "lp_bld_flow.h"
45 #include "lp_bld_sample.h"
46 #include "lp_bld_swizzle.h"
47 #include "lp_bld_type.h"
48 #include "lp_bld_logic.h"
49 #include "lp_bld_pack.h"
50 #include "lp_bld_quad.h"
51 #include "lp_bld_bitarit.h"
52
53
54 /*
55 * Bri-linear factor. Should be greater than one.
56 */
57 #define BRILINEAR_FACTOR 2
58
59 /**
60 * Does the given texture wrap mode allow sampling the texture border color?
61 * XXX maybe move this into gallium util code.
62 */
63 boolean
lp_sampler_wrap_mode_uses_border_color(enum pipe_tex_wrap mode,enum pipe_tex_filter min_img_filter,enum pipe_tex_filter mag_img_filter)64 lp_sampler_wrap_mode_uses_border_color(enum pipe_tex_wrap mode,
65 enum pipe_tex_filter min_img_filter,
66 enum pipe_tex_filter mag_img_filter)
67 {
68 switch (mode) {
69 case PIPE_TEX_WRAP_REPEAT:
70 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
71 case PIPE_TEX_WRAP_MIRROR_REPEAT:
72 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
73 return FALSE;
74 case PIPE_TEX_WRAP_CLAMP:
75 case PIPE_TEX_WRAP_MIRROR_CLAMP:
76 if (min_img_filter == PIPE_TEX_FILTER_NEAREST &&
77 mag_img_filter == PIPE_TEX_FILTER_NEAREST) {
78 return FALSE;
79 } else {
80 return TRUE;
81 }
82 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
83 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
84 return TRUE;
85 default:
86 assert(0 && "unexpected wrap mode");
87 return FALSE;
88 }
89 }
90
91
92 /**
93 * Initialize lp_sampler_static_texture_state object with the gallium
94 * texture/sampler_view state (this contains the parts which are
95 * considered static).
96 */
97 void
lp_sampler_static_texture_state(struct lp_static_texture_state * state,const struct pipe_sampler_view * view)98 lp_sampler_static_texture_state(struct lp_static_texture_state *state,
99 const struct pipe_sampler_view *view)
100 {
101 memset(state, 0, sizeof *state);
102
103 if (!view || !view->texture)
104 return;
105
106 const struct pipe_resource *texture = view->texture;
107
108 state->format = view->format;
109 state->swizzle_r = view->swizzle_r;
110 state->swizzle_g = view->swizzle_g;
111 state->swizzle_b = view->swizzle_b;
112 state->swizzle_a = view->swizzle_a;
113 assert(state->swizzle_r < PIPE_SWIZZLE_NONE);
114 assert(state->swizzle_g < PIPE_SWIZZLE_NONE);
115 assert(state->swizzle_b < PIPE_SWIZZLE_NONE);
116 assert(state->swizzle_a < PIPE_SWIZZLE_NONE);
117
118 state->target = view->target;
119 state->pot_width = util_is_power_of_two_or_zero(texture->width0);
120 state->pot_height = util_is_power_of_two_or_zero(texture->height0);
121 state->pot_depth = util_is_power_of_two_or_zero(texture->depth0);
122 state->level_zero_only = !view->u.tex.last_level;
123
124 /*
125 * the layer / element / level parameters are all either dynamic
126 * state or handled transparently wrt execution.
127 */
128 }
129
130 /**
131 * Initialize lp_sampler_static_texture_state object with the gallium
132 * texture/sampler_view state (this contains the parts which are
133 * considered static).
134 */
135 void
lp_sampler_static_texture_state_image(struct lp_static_texture_state * state,const struct pipe_image_view * view)136 lp_sampler_static_texture_state_image(struct lp_static_texture_state *state,
137 const struct pipe_image_view *view)
138 {
139 memset(state, 0, sizeof *state);
140
141 if (!view || !view->resource)
142 return;
143
144 const struct pipe_resource *resource = view->resource;
145
146 state->format = view->format;
147 state->swizzle_r = PIPE_SWIZZLE_X;
148 state->swizzle_g = PIPE_SWIZZLE_Y;
149 state->swizzle_b = PIPE_SWIZZLE_Z;
150 state->swizzle_a = PIPE_SWIZZLE_W;
151 assert(state->swizzle_r < PIPE_SWIZZLE_NONE);
152 assert(state->swizzle_g < PIPE_SWIZZLE_NONE);
153 assert(state->swizzle_b < PIPE_SWIZZLE_NONE);
154 assert(state->swizzle_a < PIPE_SWIZZLE_NONE);
155
156 state->target = view->resource->target;
157 state->pot_width = util_is_power_of_two_or_zero(resource->width0);
158 state->pot_height = util_is_power_of_two_or_zero(resource->height0);
159 state->pot_depth = util_is_power_of_two_or_zero(resource->depth0);
160 state->level_zero_only = 0;
161
162 /*
163 * the layer / element / level parameters are all either dynamic
164 * state or handled transparently wrt execution.
165 */
166 }
167
168 /**
169 * Initialize lp_sampler_static_sampler_state object with the gallium sampler
170 * state (this contains the parts which are considered static).
171 */
172 void
lp_sampler_static_sampler_state(struct lp_static_sampler_state * state,const struct pipe_sampler_state * sampler)173 lp_sampler_static_sampler_state(struct lp_static_sampler_state *state,
174 const struct pipe_sampler_state *sampler)
175 {
176 memset(state, 0, sizeof *state);
177
178 if (!sampler)
179 return;
180
181 /*
182 * We don't copy sampler state over unless it is actually enabled, to avoid
183 * spurious recompiles, as the sampler static state is part of the shader
184 * key.
185 *
186 * Ideally gallium frontends or cso_cache module would make all state
187 * canonical, but until that happens it's better to be safe than sorry here.
188 *
189 * XXX: Actually there's much more than can be done here, especially
190 * regarding 1D/2D/3D/CUBE textures, wrap modes, etc.
191 */
192
193 state->wrap_s = sampler->wrap_s;
194 state->wrap_t = sampler->wrap_t;
195 state->wrap_r = sampler->wrap_r;
196 state->min_img_filter = sampler->min_img_filter;
197 state->mag_img_filter = sampler->mag_img_filter;
198 state->min_mip_filter = sampler->min_mip_filter;
199 state->seamless_cube_map = sampler->seamless_cube_map;
200 state->reduction_mode = sampler->reduction_mode;
201 state->aniso = sampler->max_anisotropy > 1.0f;
202
203 if (sampler->max_lod > 0.0f) {
204 state->max_lod_pos = 1;
205 }
206
207 if (sampler->lod_bias != 0.0f) {
208 state->lod_bias_non_zero = 1;
209 }
210
211 if (state->min_mip_filter != PIPE_TEX_MIPFILTER_NONE ||
212 state->min_img_filter != state->mag_img_filter) {
213
214 /* If min_lod == max_lod we can greatly simplify mipmap selection.
215 * This is a case that occurs during automatic mipmap generation.
216 */
217 if (sampler->min_lod == sampler->max_lod) {
218 state->min_max_lod_equal = 1;
219 } else {
220 if (sampler->min_lod > 0.0f) {
221 state->apply_min_lod = 1;
222 }
223
224 /*
225 * XXX this won't do anything with the mesa state tracker which always
226 * sets max_lod to not more than actually present mip maps...
227 */
228 if (sampler->max_lod < (PIPE_MAX_TEXTURE_LEVELS - 1)) {
229 state->apply_max_lod = 1;
230 }
231 }
232 }
233
234 state->compare_mode = sampler->compare_mode;
235 if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) {
236 state->compare_func = sampler->compare_func;
237 }
238
239 state->normalized_coords = sampler->normalized_coords;
240 }
241
242 /* build aniso pmin value */
243 static LLVMValueRef
lp_build_pmin(struct lp_build_sample_context * bld,unsigned texture_unit,LLVMValueRef s,LLVMValueRef t,LLVMValueRef max_aniso)244 lp_build_pmin(struct lp_build_sample_context *bld,
245 unsigned texture_unit,
246 LLVMValueRef s,
247 LLVMValueRef t,
248 LLVMValueRef max_aniso)
249 {
250 struct gallivm_state *gallivm = bld->gallivm;
251 LLVMBuilderRef builder = bld->gallivm->builder;
252 struct lp_build_context *coord_bld = &bld->coord_bld;
253 struct lp_build_context *int_size_bld = &bld->int_size_in_bld;
254 struct lp_build_context *float_size_bld = &bld->float_size_in_bld;
255 struct lp_build_context *pmin_bld = &bld->lodf_bld;
256 LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
257 LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
258 LLVMValueRef index1 = LLVMConstInt(i32t, 1, 0);
259 LLVMValueRef ddx_ddy = lp_build_packed_ddx_ddy_twocoord(coord_bld, s, t);
260 LLVMValueRef int_size, float_size;
261 LLVMValueRef first_level, first_level_vec;
262 unsigned length = coord_bld->type.length;
263 unsigned num_quads = length / 4;
264 boolean pmin_per_quad = pmin_bld->type.length != length;
265 unsigned i;
266
267 first_level = bld->dynamic_state->first_level(bld->dynamic_state, bld->gallivm,
268 bld->context_ptr, texture_unit, NULL);
269 first_level_vec = lp_build_broadcast_scalar(int_size_bld, first_level);
270 int_size = lp_build_minify(int_size_bld, bld->int_size, first_level_vec, TRUE);
271 float_size = lp_build_int_to_float(float_size_bld, int_size);
272 max_aniso = lp_build_broadcast_scalar(coord_bld, max_aniso);
273 max_aniso = lp_build_mul(coord_bld, max_aniso, max_aniso);
274
275 static const unsigned char swizzle01[] = { /* no-op swizzle */
276 0, 1,
277 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
278 };
279 static const unsigned char swizzle23[] = {
280 2, 3,
281 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
282 };
283 LLVMValueRef ddx_ddys, ddx_ddyt, floatdim, shuffles[LP_MAX_VECTOR_LENGTH / 4];
284
285 for (i = 0; i < num_quads; i++) {
286 shuffles[i*4+0] = shuffles[i*4+1] = index0;
287 shuffles[i*4+2] = shuffles[i*4+3] = index1;
288 }
289 floatdim = LLVMBuildShuffleVector(builder, float_size, float_size,
290 LLVMConstVector(shuffles, length), "");
291 ddx_ddy = lp_build_mul(coord_bld, ddx_ddy, floatdim);
292
293 ddx_ddy = lp_build_mul(coord_bld, ddx_ddy, ddx_ddy);
294
295 ddx_ddys = lp_build_swizzle_aos(coord_bld, ddx_ddy, swizzle01);
296 ddx_ddyt = lp_build_swizzle_aos(coord_bld, ddx_ddy, swizzle23);
297
298 LLVMValueRef px2_py2 = lp_build_add(coord_bld, ddx_ddys, ddx_ddyt);
299
300 static const unsigned char swizzle0[] = { /* no-op swizzle */
301 0, LP_BLD_SWIZZLE_DONTCARE,
302 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
303 };
304 static const unsigned char swizzle1[] = {
305 1, LP_BLD_SWIZZLE_DONTCARE,
306 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
307 };
308 LLVMValueRef px2 = lp_build_swizzle_aos(coord_bld, px2_py2, swizzle0);
309 LLVMValueRef py2 = lp_build_swizzle_aos(coord_bld, px2_py2, swizzle1);
310
311 LLVMValueRef pmax2 = lp_build_max(coord_bld, px2, py2);
312 LLVMValueRef pmin2 = lp_build_min(coord_bld, px2, py2);
313
314 LLVMValueRef temp = lp_build_mul(coord_bld, pmin2, max_aniso);
315
316 LLVMValueRef comp = lp_build_compare(gallivm, coord_bld->type, PIPE_FUNC_GREATER,
317 pmin2, temp);
318
319 LLVMValueRef pmin2_alt = lp_build_div(coord_bld, pmax2, max_aniso);
320
321 pmin2 = lp_build_select(coord_bld, comp, pmin2_alt, pmin2);
322
323 if (pmin_per_quad)
324 pmin2 = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
325 pmin_bld->type, pmin2, 0);
326 else
327 pmin2 = lp_build_swizzle_scalar_aos(pmin_bld, pmin2, 0, 4);
328 return pmin2;
329 }
330
331 /**
332 * Generate code to compute coordinate gradient (rho).
333 * \param derivs partial derivatives of (s, t, r, q) with respect to X and Y
334 *
335 * The resulting rho has bld->levelf format (per quad or per element).
336 */
337 static LLVMValueRef
lp_build_rho(struct lp_build_sample_context * bld,unsigned texture_unit,LLVMValueRef s,LLVMValueRef t,LLVMValueRef r,LLVMValueRef cube_rho,const struct lp_derivatives * derivs)338 lp_build_rho(struct lp_build_sample_context *bld,
339 unsigned texture_unit,
340 LLVMValueRef s,
341 LLVMValueRef t,
342 LLVMValueRef r,
343 LLVMValueRef cube_rho,
344 const struct lp_derivatives *derivs)
345 {
346 struct gallivm_state *gallivm = bld->gallivm;
347 struct lp_build_context *int_size_bld = &bld->int_size_in_bld;
348 struct lp_build_context *float_size_bld = &bld->float_size_in_bld;
349 struct lp_build_context *float_bld = &bld->float_bld;
350 struct lp_build_context *coord_bld = &bld->coord_bld;
351 struct lp_build_context *rho_bld = &bld->lodf_bld;
352 const unsigned dims = bld->dims;
353 LLVMValueRef ddx_ddy[2] = {NULL};
354 LLVMBuilderRef builder = bld->gallivm->builder;
355 LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
356 LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
357 LLVMValueRef index1 = LLVMConstInt(i32t, 1, 0);
358 LLVMValueRef index2 = LLVMConstInt(i32t, 2, 0);
359 LLVMValueRef rho_vec;
360 LLVMValueRef int_size, float_size;
361 LLVMValueRef rho;
362 LLVMValueRef first_level, first_level_vec;
363 unsigned length = coord_bld->type.length;
364 unsigned num_quads = length / 4;
365 boolean rho_per_quad = rho_bld->type.length != length;
366 boolean no_rho_opt = bld->no_rho_approx && (dims > 1);
367 unsigned i;
368 LLVMValueRef i32undef = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
369 LLVMValueRef rho_xvec, rho_yvec;
370
371 /* Note that all simplified calculations will only work for isotropic filtering */
372
373 /*
374 * rho calcs are always per quad except for explicit derivs (excluding
375 * the messy cube maps for now) when requested.
376 */
377
378 first_level = bld->dynamic_state->first_level(bld->dynamic_state, bld->gallivm,
379 bld->context_ptr, texture_unit, NULL);
380 first_level_vec = lp_build_broadcast_scalar(int_size_bld, first_level);
381 int_size = lp_build_minify(int_size_bld, bld->int_size, first_level_vec, TRUE);
382 float_size = lp_build_int_to_float(float_size_bld, int_size);
383
384 if (cube_rho) {
385 LLVMValueRef cubesize;
386 LLVMValueRef index0 = lp_build_const_int32(gallivm, 0);
387
388 /*
389 * Cube map code did already everything except size mul and per-quad extraction.
390 * Luckily cube maps are always quadratic!
391 */
392 if (rho_per_quad) {
393 rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
394 rho_bld->type, cube_rho, 0);
395 }
396 else {
397 rho = lp_build_swizzle_scalar_aos(coord_bld, cube_rho, 0, 4);
398 }
399 /* Could optimize this for single quad just skip the broadcast */
400 cubesize = lp_build_extract_broadcast(gallivm, bld->float_size_in_type,
401 rho_bld->type, float_size, index0);
402 /* skipping sqrt hence returning rho squared */
403 cubesize = lp_build_mul(rho_bld, cubesize, cubesize);
404 rho = lp_build_mul(rho_bld, cubesize, rho);
405 }
406 else if (derivs) {
407 LLVMValueRef ddmax[3] = { NULL }, ddx[3] = { NULL }, ddy[3] = { NULL };
408 for (i = 0; i < dims; i++) {
409 LLVMValueRef floatdim;
410 LLVMValueRef indexi = lp_build_const_int32(gallivm, i);
411
412 floatdim = lp_build_extract_broadcast(gallivm, bld->float_size_in_type,
413 coord_bld->type, float_size, indexi);
414
415 /*
416 * note that for rho_per_quad case could reduce math (at some shuffle
417 * cost), but for now use same code to per-pixel lod case.
418 */
419 if (no_rho_opt) {
420 ddx[i] = lp_build_mul(coord_bld, floatdim, derivs->ddx[i]);
421 ddy[i] = lp_build_mul(coord_bld, floatdim, derivs->ddy[i]);
422 ddx[i] = lp_build_mul(coord_bld, ddx[i], ddx[i]);
423 ddy[i] = lp_build_mul(coord_bld, ddy[i], ddy[i]);
424 }
425 else {
426 LLVMValueRef tmpx, tmpy;
427 tmpx = lp_build_abs(coord_bld, derivs->ddx[i]);
428 tmpy = lp_build_abs(coord_bld, derivs->ddy[i]);
429 ddmax[i] = lp_build_max(coord_bld, tmpx, tmpy);
430 ddmax[i] = lp_build_mul(coord_bld, floatdim, ddmax[i]);
431 }
432 }
433 if (no_rho_opt) {
434 rho_xvec = lp_build_add(coord_bld, ddx[0], ddx[1]);
435 rho_yvec = lp_build_add(coord_bld, ddy[0], ddy[1]);
436 if (dims > 2) {
437 rho_xvec = lp_build_add(coord_bld, rho_xvec, ddx[2]);
438 rho_yvec = lp_build_add(coord_bld, rho_yvec, ddy[2]);
439 }
440 rho = lp_build_max(coord_bld, rho_xvec, rho_yvec);
441 /* skipping sqrt hence returning rho squared */
442 }
443 else {
444 rho = ddmax[0];
445 if (dims > 1) {
446 rho = lp_build_max(coord_bld, rho, ddmax[1]);
447 if (dims > 2) {
448 rho = lp_build_max(coord_bld, rho, ddmax[2]);
449 }
450 }
451 }
452
453 LLVMValueRef rho_is_inf = lp_build_is_inf_or_nan(gallivm, coord_bld->type, rho);
454 rho = lp_build_select(coord_bld, rho_is_inf, coord_bld->zero, rho);
455
456 if (rho_per_quad) {
457 /*
458 * rho_vec contains per-pixel rho, convert to scalar per quad.
459 */
460 rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
461 rho_bld->type, rho, 0);
462 }
463 }
464 else {
465 /*
466 * This looks all a bit complex, but it's not that bad
467 * (the shuffle code makes it look worse than it is).
468 * Still, might not be ideal for all cases.
469 */
470 static const unsigned char swizzle0[] = { /* no-op swizzle */
471 0, LP_BLD_SWIZZLE_DONTCARE,
472 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
473 };
474 static const unsigned char swizzle1[] = {
475 1, LP_BLD_SWIZZLE_DONTCARE,
476 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
477 };
478 static const unsigned char swizzle2[] = {
479 2, LP_BLD_SWIZZLE_DONTCARE,
480 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
481 };
482
483 if (dims < 2) {
484 ddx_ddy[0] = lp_build_packed_ddx_ddy_onecoord(coord_bld, s);
485 }
486 else if (dims >= 2) {
487 ddx_ddy[0] = lp_build_packed_ddx_ddy_twocoord(coord_bld, s, t);
488 if (dims > 2) {
489 ddx_ddy[1] = lp_build_packed_ddx_ddy_onecoord(coord_bld, r);
490 }
491 }
492
493 if (no_rho_opt) {
494 static const unsigned char swizzle01[] = { /* no-op swizzle */
495 0, 1,
496 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
497 };
498 static const unsigned char swizzle23[] = {
499 2, 3,
500 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
501 };
502 LLVMValueRef ddx_ddys, ddx_ddyt, floatdim, shuffles[LP_MAX_VECTOR_LENGTH / 4];
503
504 for (i = 0; i < num_quads; i++) {
505 shuffles[i*4+0] = shuffles[i*4+1] = index0;
506 shuffles[i*4+2] = shuffles[i*4+3] = index1;
507 }
508 floatdim = LLVMBuildShuffleVector(builder, float_size, float_size,
509 LLVMConstVector(shuffles, length), "");
510 ddx_ddy[0] = lp_build_mul(coord_bld, ddx_ddy[0], floatdim);
511 ddx_ddy[0] = lp_build_mul(coord_bld, ddx_ddy[0], ddx_ddy[0]);
512 ddx_ddys = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle01);
513 ddx_ddyt = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle23);
514 rho_vec = lp_build_add(coord_bld, ddx_ddys, ddx_ddyt);
515
516 if (dims > 2) {
517 static const unsigned char swizzle02[] = {
518 0, 2,
519 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
520 };
521 floatdim = lp_build_extract_broadcast(gallivm, bld->float_size_in_type,
522 coord_bld->type, float_size, index2);
523 ddx_ddy[1] = lp_build_mul(coord_bld, ddx_ddy[1], floatdim);
524 ddx_ddy[1] = lp_build_mul(coord_bld, ddx_ddy[1], ddx_ddy[1]);
525 ddx_ddy[1] = lp_build_swizzle_aos(coord_bld, ddx_ddy[1], swizzle02);
526 rho_vec = lp_build_add(coord_bld, rho_vec, ddx_ddy[1]);
527 }
528
529 rho_xvec = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle0);
530 rho_yvec = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle1);
531 rho = lp_build_max(coord_bld, rho_xvec, rho_yvec);
532
533 if (rho_per_quad) {
534 rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
535 rho_bld->type, rho, 0);
536 }
537 else {
538 rho = lp_build_swizzle_scalar_aos(coord_bld, rho, 0, 4);
539 }
540 /* skipping sqrt hence returning rho squared */
541 }
542 else {
543 ddx_ddy[0] = lp_build_abs(coord_bld, ddx_ddy[0]);
544 if (dims > 2) {
545 ddx_ddy[1] = lp_build_abs(coord_bld, ddx_ddy[1]);
546 }
547 else {
548 ddx_ddy[1] = NULL; /* silence compiler warning */
549 }
550
551 if (dims < 2) {
552 rho_xvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle0);
553 rho_yvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle2);
554 }
555 else if (dims == 2) {
556 static const unsigned char swizzle02[] = {
557 0, 2,
558 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
559 };
560 static const unsigned char swizzle13[] = {
561 1, 3,
562 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
563 };
564 rho_xvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle02);
565 rho_yvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle13);
566 }
567 else {
568 LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH];
569 LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH];
570 assert(dims == 3);
571 for (i = 0; i < num_quads; i++) {
572 shuffles1[4*i + 0] = lp_build_const_int32(gallivm, 4*i);
573 shuffles1[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 2);
574 shuffles1[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i);
575 shuffles1[4*i + 3] = i32undef;
576 shuffles2[4*i + 0] = lp_build_const_int32(gallivm, 4*i + 1);
577 shuffles2[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 3);
578 shuffles2[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i + 2);
579 shuffles2[4*i + 3] = i32undef;
580 }
581 rho_xvec = LLVMBuildShuffleVector(builder, ddx_ddy[0], ddx_ddy[1],
582 LLVMConstVector(shuffles1, length), "");
583 rho_yvec = LLVMBuildShuffleVector(builder, ddx_ddy[0], ddx_ddy[1],
584 LLVMConstVector(shuffles2, length), "");
585 }
586
587 rho_vec = lp_build_max(coord_bld, rho_xvec, rho_yvec);
588
589 if (bld->coord_type.length > 4) {
590 /* expand size to each quad */
591 if (dims > 1) {
592 /* could use some broadcast_vector helper for this? */
593 LLVMValueRef src[LP_MAX_VECTOR_LENGTH/4];
594 for (i = 0; i < num_quads; i++) {
595 src[i] = float_size;
596 }
597 float_size = lp_build_concat(bld->gallivm, src, float_size_bld->type, num_quads);
598 }
599 else {
600 float_size = lp_build_broadcast_scalar(coord_bld, float_size);
601 }
602 rho_vec = lp_build_mul(coord_bld, rho_vec, float_size);
603
604 if (dims <= 1) {
605 rho = rho_vec;
606 }
607 else {
608 if (dims >= 2) {
609 LLVMValueRef rho_s, rho_t, rho_r;
610
611 rho_s = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle0);
612 rho_t = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle1);
613
614 rho = lp_build_max(coord_bld, rho_s, rho_t);
615
616 if (dims >= 3) {
617 rho_r = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle2);
618 rho = lp_build_max(coord_bld, rho, rho_r);
619 }
620 }
621 }
622 if (rho_per_quad) {
623 rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
624 rho_bld->type, rho, 0);
625 }
626 else {
627 rho = lp_build_swizzle_scalar_aos(coord_bld, rho, 0, 4);
628 }
629 }
630 else {
631 if (dims <= 1) {
632 rho_vec = LLVMBuildExtractElement(builder, rho_vec, index0, "");
633 }
634 rho_vec = lp_build_mul(float_size_bld, rho_vec, float_size);
635
636 if (dims <= 1) {
637 rho = rho_vec;
638 }
639 else {
640 if (dims >= 2) {
641 LLVMValueRef rho_s, rho_t, rho_r;
642
643 rho_s = LLVMBuildExtractElement(builder, rho_vec, index0, "");
644 rho_t = LLVMBuildExtractElement(builder, rho_vec, index1, "");
645
646 rho = lp_build_max(float_bld, rho_s, rho_t);
647
648 if (dims >= 3) {
649 rho_r = LLVMBuildExtractElement(builder, rho_vec, index2, "");
650 rho = lp_build_max(float_bld, rho, rho_r);
651 }
652 }
653 }
654 if (!rho_per_quad) {
655 rho = lp_build_broadcast_scalar(rho_bld, rho);
656 }
657 }
658 }
659 }
660
661 return rho;
662 }
663
664
665 /*
666 * Bri-linear lod computation
667 *
668 * Use a piece-wise linear approximation of log2 such that:
669 * - round to nearest, for values in the neighborhood of -1, 0, 1, 2, etc.
670 * - linear approximation for values in the neighborhood of 0.5, 1.5., etc,
671 * with the steepness specified in 'factor'
672 * - exact result for 0.5, 1.5, etc.
673 *
674 *
675 * 1.0 - /----*
676 * /
677 * /
678 * /
679 * 0.5 - *
680 * /
681 * /
682 * /
683 * 0.0 - *----/
684 *
685 * | |
686 * 2^0 2^1
687 *
688 * This is a technique also commonly used in hardware:
689 * - http://ixbtlabs.com/articles2/gffx/nv40-rx800-3.html
690 *
691 * TODO: For correctness, this should only be applied when texture is known to
692 * have regular mipmaps, i.e., mipmaps derived from the base level.
693 *
694 * TODO: This could be done in fixed point, where applicable.
695 */
696 static void
lp_build_brilinear_lod(struct lp_build_context * bld,LLVMValueRef lod,double factor,LLVMValueRef * out_lod_ipart,LLVMValueRef * out_lod_fpart)697 lp_build_brilinear_lod(struct lp_build_context *bld,
698 LLVMValueRef lod,
699 double factor,
700 LLVMValueRef *out_lod_ipart,
701 LLVMValueRef *out_lod_fpart)
702 {
703 LLVMValueRef lod_fpart;
704 double pre_offset = (factor - 0.5)/factor - 0.5;
705 double post_offset = 1 - factor;
706
707 if (0) {
708 lp_build_printf(bld->gallivm, "lod = %f\n", lod);
709 }
710
711 lod = lp_build_add(bld, lod,
712 lp_build_const_vec(bld->gallivm, bld->type, pre_offset));
713
714 lp_build_ifloor_fract(bld, lod, out_lod_ipart, &lod_fpart);
715
716 lod_fpart = lp_build_mad(bld, lod_fpart,
717 lp_build_const_vec(bld->gallivm, bld->type, factor),
718 lp_build_const_vec(bld->gallivm, bld->type, post_offset));
719
720 /*
721 * It's not necessary to clamp lod_fpart since:
722 * - the above expression will never produce numbers greater than one.
723 * - the mip filtering branch is only taken if lod_fpart is positive
724 */
725
726 *out_lod_fpart = lod_fpart;
727
728 if (0) {
729 lp_build_printf(bld->gallivm, "lod_ipart = %i\n", *out_lod_ipart);
730 lp_build_printf(bld->gallivm, "lod_fpart = %f\n\n", *out_lod_fpart);
731 }
732 }
733
734
735 /*
736 * Combined log2 and brilinear lod computation.
737 *
738 * It's in all identical to calling lp_build_fast_log2() and
739 * lp_build_brilinear_lod() above, but by combining we can compute the integer
740 * and fractional part independently.
741 */
742 static void
lp_build_brilinear_rho(struct lp_build_context * bld,LLVMValueRef rho,double factor,LLVMValueRef * out_lod_ipart,LLVMValueRef * out_lod_fpart)743 lp_build_brilinear_rho(struct lp_build_context *bld,
744 LLVMValueRef rho,
745 double factor,
746 LLVMValueRef *out_lod_ipart,
747 LLVMValueRef *out_lod_fpart)
748 {
749 LLVMValueRef lod_ipart;
750 LLVMValueRef lod_fpart;
751
752 const double pre_factor = (2*factor - 0.5)/(M_SQRT2*factor);
753 const double post_offset = 1 - 2*factor;
754
755 assert(bld->type.floating);
756
757 assert(lp_check_value(bld->type, rho));
758
759 /*
760 * The pre factor will make the intersections with the exact powers of two
761 * happen precisely where we want them to be, which means that the integer
762 * part will not need any post adjustments.
763 */
764 rho = lp_build_mul(bld, rho,
765 lp_build_const_vec(bld->gallivm, bld->type, pre_factor));
766
767 /* ipart = ifloor(log2(rho)) */
768 lod_ipart = lp_build_extract_exponent(bld, rho, 0);
769
770 /* fpart = rho / 2**ipart */
771 lod_fpart = lp_build_extract_mantissa(bld, rho);
772
773 lod_fpart = lp_build_mad(bld, lod_fpart,
774 lp_build_const_vec(bld->gallivm, bld->type, factor),
775 lp_build_const_vec(bld->gallivm, bld->type, post_offset));
776
777 /*
778 * Like lp_build_brilinear_lod, it's not necessary to clamp lod_fpart since:
779 * - the above expression will never produce numbers greater than one.
780 * - the mip filtering branch is only taken if lod_fpart is positive
781 */
782
783 *out_lod_ipart = lod_ipart;
784 *out_lod_fpart = lod_fpart;
785 }
786
787
788 /**
789 * Fast implementation of iround(log2(sqrt(x))), based on
790 * log2(x^n) == n*log2(x).
791 *
792 * Gives accurate results all the time.
793 * (Could be trivially extended to handle other power-of-two roots.)
794 */
795 static LLVMValueRef
lp_build_ilog2_sqrt(struct lp_build_context * bld,LLVMValueRef x)796 lp_build_ilog2_sqrt(struct lp_build_context *bld,
797 LLVMValueRef x)
798 {
799 LLVMBuilderRef builder = bld->gallivm->builder;
800 LLVMValueRef ipart;
801 struct lp_type i_type = lp_int_type(bld->type);
802 LLVMValueRef one = lp_build_const_int_vec(bld->gallivm, i_type, 1);
803
804 assert(bld->type.floating);
805
806 assert(lp_check_value(bld->type, x));
807
808 /* ipart = log2(x) + 0.5 = 0.5*(log2(x^2) + 1.0) */
809 ipart = lp_build_extract_exponent(bld, x, 1);
810 ipart = LLVMBuildAShr(builder, ipart, one, "");
811
812 return ipart;
813 }
814
815
816 /**
817 * Generate code to compute texture level of detail (lambda).
818 * \param derivs partial derivatives of (s, t, r, q) with respect to X and Y
819 * \param lod_bias optional float vector with the shader lod bias
820 * \param explicit_lod optional float vector with the explicit lod
821 * \param cube_rho rho calculated by cube coord mapping (optional)
822 * \param out_lod_ipart integer part of lod
823 * \param out_lod_fpart float part of lod (never larger than 1 but may be negative)
824 * \param out_lod_positive (mask) if lod is positive (i.e. texture is minified)
825 *
826 * The resulting lod can be scalar per quad or be per element.
827 */
828 void
lp_build_lod_selector(struct lp_build_sample_context * bld,boolean is_lodq,unsigned texture_unit,unsigned sampler_unit,LLVMValueRef s,LLVMValueRef t,LLVMValueRef r,LLVMValueRef cube_rho,const struct lp_derivatives * derivs,LLVMValueRef lod_bias,LLVMValueRef explicit_lod,enum pipe_tex_mipfilter mip_filter,LLVMValueRef max_aniso,LLVMValueRef * out_lod,LLVMValueRef * out_lod_ipart,LLVMValueRef * out_lod_fpart,LLVMValueRef * out_lod_positive)829 lp_build_lod_selector(struct lp_build_sample_context *bld,
830 boolean is_lodq,
831 unsigned texture_unit,
832 unsigned sampler_unit,
833 LLVMValueRef s,
834 LLVMValueRef t,
835 LLVMValueRef r,
836 LLVMValueRef cube_rho,
837 const struct lp_derivatives *derivs,
838 LLVMValueRef lod_bias, /* optional */
839 LLVMValueRef explicit_lod, /* optional */
840 enum pipe_tex_mipfilter mip_filter,
841 LLVMValueRef max_aniso,
842 LLVMValueRef *out_lod,
843 LLVMValueRef *out_lod_ipart,
844 LLVMValueRef *out_lod_fpart,
845 LLVMValueRef *out_lod_positive)
846
847 {
848 LLVMBuilderRef builder = bld->gallivm->builder;
849 struct lp_sampler_dynamic_state *dynamic_state = bld->dynamic_state;
850 struct lp_build_context *lodf_bld = &bld->lodf_bld;
851 LLVMValueRef lod;
852
853 *out_lod_ipart = bld->lodi_bld.zero;
854 *out_lod_positive = bld->lodi_bld.zero;
855 *out_lod_fpart = lodf_bld->zero;
856
857 /*
858 * For determining min/mag, we follow GL 4.1 spec, 3.9.12 Texture Magnification:
859 * "Implementations may either unconditionally assume c = 0 for the minification
860 * vs. magnification switch-over point, or may choose to make c depend on the
861 * combination of minification and magnification modes as follows: if the
862 * magnification filter is given by LINEAR and the minification filter is given
863 * by NEAREST_MIPMAP_NEAREST or NEAREST_MIPMAP_LINEAR, then c = 0.5. This is
864 * done to ensure that a minified texture does not appear "sharper" than a
865 * magnified texture. Otherwise c = 0."
866 * And 3.9.11 Texture Minification:
867 * "If lod is less than or equal to the constant c (see section 3.9.12) the
868 * texture is said to be magnified; if it is greater, the texture is minified."
869 * So, using 0 as switchover point always, and using magnification for lod == 0.
870 * Note that the always c = 0 behavior is new (first appearing in GL 3.1 spec),
871 * old GL versions required 0.5 for the modes listed above.
872 * I have no clue about the (undocumented) wishes of d3d9/d3d10 here!
873 */
874
875 if (bld->static_sampler_state->min_max_lod_equal && !is_lodq) {
876 /* User is forcing sampling from a particular mipmap level.
877 * This is hit during mipmap generation.
878 */
879 LLVMValueRef min_lod =
880 dynamic_state->min_lod(dynamic_state, bld->gallivm,
881 bld->context_ptr, sampler_unit);
882
883 lod = lp_build_broadcast_scalar(lodf_bld, min_lod);
884 }
885 else {
886 if (explicit_lod) {
887 if (bld->num_lods != bld->coord_type.length)
888 lod = lp_build_pack_aos_scalars(bld->gallivm, bld->coord_bld.type,
889 lodf_bld->type, explicit_lod, 0);
890 else
891 lod = explicit_lod;
892 }
893 else {
894 LLVMValueRef rho;
895 boolean rho_squared = (bld->no_rho_approx &&
896 (bld->dims > 1)) || cube_rho;
897
898 if (bld->static_sampler_state->aniso &&
899 !explicit_lod) {
900 rho = lp_build_pmin(bld, texture_unit, s, t, max_aniso);
901 rho_squared = true;
902 } else
903 rho = lp_build_rho(bld, texture_unit, s, t, r, cube_rho, derivs);
904
905 /*
906 * Compute lod = log2(rho)
907 */
908
909 if (!lod_bias && !is_lodq &&
910 !bld->static_sampler_state->aniso &&
911 !bld->static_sampler_state->lod_bias_non_zero &&
912 !bld->static_sampler_state->apply_max_lod &&
913 !bld->static_sampler_state->apply_min_lod) {
914 /*
915 * Special case when there are no post-log2 adjustments, which
916 * saves instructions but keeping the integer and fractional lod
917 * computations separate from the start.
918 */
919
920 if (mip_filter == PIPE_TEX_MIPFILTER_NONE ||
921 mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
922 /*
923 * Don't actually need both values all the time, lod_ipart is
924 * needed for nearest mipfilter, lod_positive if min != mag.
925 */
926 if (rho_squared) {
927 *out_lod_ipart = lp_build_ilog2_sqrt(lodf_bld, rho);
928 }
929 else {
930 *out_lod_ipart = lp_build_ilog2(lodf_bld, rho);
931 }
932 *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
933 rho, lodf_bld->one);
934 return;
935 }
936 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR &&
937 !bld->no_brilinear && !rho_squared &&
938 !bld->static_sampler_state->aniso) {
939 /*
940 * This can't work if rho is squared. Not sure if it could be
941 * fixed while keeping it worthwile, could also do sqrt here
942 * but brilinear and no_rho_opt seems like a combination not
943 * making much sense anyway so just use ordinary path below.
944 */
945 lp_build_brilinear_rho(lodf_bld, rho, BRILINEAR_FACTOR,
946 out_lod_ipart, out_lod_fpart);
947 *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
948 rho, lodf_bld->one);
949 return;
950 }
951 }
952
953 if (0) {
954 lod = lp_build_log2(lodf_bld, rho);
955 }
956 else {
957 /* get more accurate results if we just sqaure rho always */
958 if (!rho_squared)
959 rho = lp_build_mul(lodf_bld, rho, rho);
960 lod = lp_build_fast_log2(lodf_bld, rho);
961 }
962
963 /* log2(x^2) == 0.5*log2(x) */
964 lod = lp_build_mul(lodf_bld, lod,
965 lp_build_const_vec(bld->gallivm, lodf_bld->type, 0.5F));
966
967 /* add shader lod bias */
968 if (lod_bias) {
969 if (bld->num_lods != bld->coord_type.length)
970 lod_bias = lp_build_pack_aos_scalars(bld->gallivm, bld->coord_bld.type,
971 lodf_bld->type, lod_bias, 0);
972 lod = LLVMBuildFAdd(builder, lod, lod_bias, "shader_lod_bias");
973 }
974 }
975
976 /* add sampler lod bias */
977 if (bld->static_sampler_state->lod_bias_non_zero) {
978 LLVMValueRef sampler_lod_bias =
979 dynamic_state->lod_bias(dynamic_state, bld->gallivm,
980 bld->context_ptr, sampler_unit);
981 sampler_lod_bias = lp_build_broadcast_scalar(lodf_bld,
982 sampler_lod_bias);
983 lod = LLVMBuildFAdd(builder, lod, sampler_lod_bias, "sampler_lod_bias");
984 }
985
986 if (is_lodq) {
987 *out_lod = lod;
988 }
989
990 /* clamp lod */
991 if (bld->static_sampler_state->apply_max_lod) {
992 LLVMValueRef max_lod =
993 dynamic_state->max_lod(dynamic_state, bld->gallivm,
994 bld->context_ptr, sampler_unit);
995 max_lod = lp_build_broadcast_scalar(lodf_bld, max_lod);
996
997 lod = lp_build_min(lodf_bld, lod, max_lod);
998 }
999 if (bld->static_sampler_state->apply_min_lod) {
1000 LLVMValueRef min_lod =
1001 dynamic_state->min_lod(dynamic_state, bld->gallivm,
1002 bld->context_ptr, sampler_unit);
1003 min_lod = lp_build_broadcast_scalar(lodf_bld, min_lod);
1004
1005 lod = lp_build_max(lodf_bld, lod, min_lod);
1006 }
1007
1008 if (is_lodq) {
1009 *out_lod_fpart = lod;
1010 return;
1011 }
1012 }
1013
1014 *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
1015 lod, lodf_bld->zero);
1016
1017 if (bld->static_sampler_state->aniso) {
1018 *out_lod_ipart = lp_build_itrunc(lodf_bld, lod);
1019 } else if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1020 if (!bld->no_brilinear) {
1021 lp_build_brilinear_lod(lodf_bld, lod, BRILINEAR_FACTOR,
1022 out_lod_ipart, out_lod_fpart);
1023 }
1024 else {
1025 lp_build_ifloor_fract(lodf_bld, lod, out_lod_ipart, out_lod_fpart);
1026 }
1027
1028 lp_build_name(*out_lod_fpart, "lod_fpart");
1029 }
1030 else {
1031 *out_lod_ipart = lp_build_iround(lodf_bld, lod);
1032 }
1033
1034 lp_build_name(*out_lod_ipart, "lod_ipart");
1035
1036 return;
1037 }
1038
1039
1040 /**
1041 * For PIPE_TEX_MIPFILTER_NEAREST, convert int part of lod
1042 * to actual mip level.
1043 * Note: this is all scalar per quad code.
1044 * \param lod_ipart int texture level of detail
1045 * \param level_out returns integer
1046 * \param out_of_bounds returns per coord out_of_bounds mask if provided
1047 */
1048 void
lp_build_nearest_mip_level(struct lp_build_sample_context * bld,unsigned texture_unit,LLVMValueRef lod_ipart,LLVMValueRef * level_out,LLVMValueRef * out_of_bounds)1049 lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
1050 unsigned texture_unit,
1051 LLVMValueRef lod_ipart,
1052 LLVMValueRef *level_out,
1053 LLVMValueRef *out_of_bounds)
1054 {
1055 struct lp_build_context *leveli_bld = &bld->leveli_bld;
1056 struct lp_sampler_dynamic_state *dynamic_state = bld->dynamic_state;
1057 LLVMValueRef first_level, last_level, level;
1058
1059 first_level = dynamic_state->first_level(dynamic_state, bld->gallivm,
1060 bld->context_ptr, texture_unit, NULL);
1061 last_level = dynamic_state->last_level(dynamic_state, bld->gallivm,
1062 bld->context_ptr, texture_unit, NULL);
1063 first_level = lp_build_broadcast_scalar(leveli_bld, first_level);
1064 last_level = lp_build_broadcast_scalar(leveli_bld, last_level);
1065
1066 level = lp_build_add(leveli_bld, lod_ipart, first_level);
1067
1068 if (out_of_bounds) {
1069 LLVMValueRef out, out1;
1070 out = lp_build_cmp(leveli_bld, PIPE_FUNC_LESS, level, first_level);
1071 out1 = lp_build_cmp(leveli_bld, PIPE_FUNC_GREATER, level, last_level);
1072 out = lp_build_or(leveli_bld, out, out1);
1073 if (bld->num_mips == bld->coord_bld.type.length) {
1074 *out_of_bounds = out;
1075 }
1076 else if (bld->num_mips == 1) {
1077 *out_of_bounds = lp_build_broadcast_scalar(&bld->int_coord_bld, out);
1078 }
1079 else {
1080 assert(bld->num_mips == bld->coord_bld.type.length / 4);
1081 *out_of_bounds = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
1082 leveli_bld->type,
1083 bld->int_coord_bld.type,
1084 out);
1085 }
1086 level = lp_build_andnot(&bld->int_coord_bld, level, *out_of_bounds);
1087 *level_out = level;
1088 }
1089 else {
1090 /* clamp level to legal range of levels */
1091 *level_out = lp_build_clamp(leveli_bld, level, first_level, last_level);
1092
1093 }
1094 }
1095
1096
1097 /**
1098 * For PIPE_TEX_MIPFILTER_LINEAR, convert per-quad (or per element) int LOD(s)
1099 * to two (per-quad) (adjacent) mipmap level indexes, and fix up float lod
1100 * part accordingly.
1101 * Later, we'll sample from those two mipmap levels and interpolate between them.
1102 */
1103 void
lp_build_linear_mip_levels(struct lp_build_sample_context * bld,unsigned texture_unit,LLVMValueRef lod_ipart,LLVMValueRef * lod_fpart_inout,LLVMValueRef * level0_out,LLVMValueRef * level1_out)1104 lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
1105 unsigned texture_unit,
1106 LLVMValueRef lod_ipart,
1107 LLVMValueRef *lod_fpart_inout,
1108 LLVMValueRef *level0_out,
1109 LLVMValueRef *level1_out)
1110 {
1111 LLVMBuilderRef builder = bld->gallivm->builder;
1112 struct lp_sampler_dynamic_state *dynamic_state = bld->dynamic_state;
1113 struct lp_build_context *leveli_bld = &bld->leveli_bld;
1114 struct lp_build_context *levelf_bld = &bld->levelf_bld;
1115 LLVMValueRef first_level, last_level;
1116 LLVMValueRef clamp_min;
1117 LLVMValueRef clamp_max;
1118
1119 assert(bld->num_lods == bld->num_mips);
1120
1121 first_level = dynamic_state->first_level(dynamic_state, bld->gallivm,
1122 bld->context_ptr, texture_unit, NULL);
1123 last_level = dynamic_state->last_level(dynamic_state, bld->gallivm,
1124 bld->context_ptr, texture_unit, NULL);
1125 first_level = lp_build_broadcast_scalar(leveli_bld, first_level);
1126 last_level = lp_build_broadcast_scalar(leveli_bld, last_level);
1127
1128 *level0_out = lp_build_add(leveli_bld, lod_ipart, first_level);
1129 *level1_out = lp_build_add(leveli_bld, *level0_out, leveli_bld->one);
1130
1131 /*
1132 * Clamp both *level0_out and *level1_out to [first_level, last_level], with
1133 * the minimum number of comparisons, and zeroing lod_fpart in the extreme
1134 * ends in the process.
1135 */
1136
1137 /* *level0_out < first_level */
1138 clamp_min = LLVMBuildICmp(builder, LLVMIntSLT,
1139 *level0_out, first_level,
1140 "clamp_lod_to_first");
1141
1142 *level0_out = LLVMBuildSelect(builder, clamp_min,
1143 first_level, *level0_out, "");
1144
1145 *level1_out = LLVMBuildSelect(builder, clamp_min,
1146 first_level, *level1_out, "");
1147
1148 *lod_fpart_inout = LLVMBuildSelect(builder, clamp_min,
1149 levelf_bld->zero, *lod_fpart_inout, "");
1150
1151 /* *level0_out >= last_level */
1152 clamp_max = LLVMBuildICmp(builder, LLVMIntSGE,
1153 *level0_out, last_level,
1154 "clamp_lod_to_last");
1155
1156 *level0_out = LLVMBuildSelect(builder, clamp_max,
1157 last_level, *level0_out, "");
1158
1159 *level1_out = LLVMBuildSelect(builder, clamp_max,
1160 last_level, *level1_out, "");
1161
1162 *lod_fpart_inout = LLVMBuildSelect(builder, clamp_max,
1163 levelf_bld->zero, *lod_fpart_inout, "");
1164
1165 lp_build_name(*level0_out, "texture%u_miplevel0", texture_unit);
1166 lp_build_name(*level1_out, "texture%u_miplevel1", texture_unit);
1167 lp_build_name(*lod_fpart_inout, "texture%u_mipweight", texture_unit);
1168 }
1169
1170 /**
1171 * A helper function that factorizes this common pattern.
1172 */
1173 static LLVMValueRef
load_mip(struct gallivm_state * gallivm,LLVMValueRef offsets,LLVMValueRef index1)1174 load_mip(struct gallivm_state *gallivm, LLVMValueRef offsets, LLVMValueRef index1) {
1175 LLVMValueRef zero = lp_build_const_int32(gallivm, 0);
1176 LLVMValueRef indexes[2] = {zero, index1};
1177 LLVMValueRef ptr = LLVMBuildGEP(gallivm->builder, offsets, indexes, ARRAY_SIZE(indexes), "");
1178 return LLVMBuildLoad(gallivm->builder, ptr, "");
1179 }
1180
1181 /**
1182 * Return pointer to a single mipmap level.
1183 * \param level integer mipmap level
1184 */
1185 LLVMValueRef
lp_build_get_mipmap_level(struct lp_build_sample_context * bld,LLVMValueRef level)1186 lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
1187 LLVMValueRef level)
1188 {
1189 LLVMValueRef mip_offset = load_mip(bld->gallivm, bld->mip_offsets, level);
1190 LLVMBuilderRef builder = bld->gallivm->builder;
1191 LLVMValueRef data_ptr = LLVMBuildGEP(builder, bld->base_ptr, &mip_offset, 1, "");
1192 return data_ptr;
1193 }
1194
1195 /**
1196 * Return (per-pixel) offsets to mip levels.
1197 * \param level integer mipmap level
1198 */
1199 LLVMValueRef
lp_build_get_mip_offsets(struct lp_build_sample_context * bld,LLVMValueRef level)1200 lp_build_get_mip_offsets(struct lp_build_sample_context *bld,
1201 LLVMValueRef level)
1202 {
1203 LLVMBuilderRef builder = bld->gallivm->builder;
1204 LLVMValueRef offsets, offset1;
1205
1206 if (bld->num_mips == 1) {
1207 offset1 = load_mip(bld->gallivm, bld->mip_offsets, level);
1208 offsets = lp_build_broadcast_scalar(&bld->int_coord_bld, offset1);
1209 }
1210 else if (bld->num_mips == bld->coord_bld.type.length / 4) {
1211 unsigned i;
1212
1213 offsets = bld->int_coord_bld.undef;
1214 for (i = 0; i < bld->num_mips; i++) {
1215 LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1216 offset1 = load_mip(bld->gallivm, bld->mip_offsets, LLVMBuildExtractElement(builder, level, indexi, ""));
1217 LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, 4 * i);
1218 offsets = LLVMBuildInsertElement(builder, offsets, offset1, indexo, "");
1219 }
1220 offsets = lp_build_swizzle_scalar_aos(&bld->int_coord_bld, offsets, 0, 4);
1221 }
1222 else {
1223 unsigned i;
1224
1225 assert (bld->num_mips == bld->coord_bld.type.length);
1226
1227 offsets = bld->int_coord_bld.undef;
1228 for (i = 0; i < bld->num_mips; i++) {
1229 LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1230 offset1 = load_mip(bld->gallivm, bld->mip_offsets, LLVMBuildExtractElement(builder, level, indexi, ""));
1231 offsets = LLVMBuildInsertElement(builder, offsets, offset1, indexi, "");
1232 }
1233 }
1234 return offsets;
1235 }
1236
1237
1238 /**
1239 * Codegen equivalent for u_minify().
1240 * @param lod_scalar if lod is a (broadcasted) scalar
1241 * Return max(1, base_size >> level);
1242 */
1243 LLVMValueRef
lp_build_minify(struct lp_build_context * bld,LLVMValueRef base_size,LLVMValueRef level,boolean lod_scalar)1244 lp_build_minify(struct lp_build_context *bld,
1245 LLVMValueRef base_size,
1246 LLVMValueRef level,
1247 boolean lod_scalar)
1248 {
1249 LLVMBuilderRef builder = bld->gallivm->builder;
1250 assert(lp_check_value(bld->type, base_size));
1251 assert(lp_check_value(bld->type, level));
1252
1253 if (level == bld->zero) {
1254 /* if we're using mipmap level zero, no minification is needed */
1255 return base_size;
1256 }
1257 else {
1258 LLVMValueRef size;
1259 assert(bld->type.sign);
1260 if (lod_scalar ||
1261 (util_get_cpu_caps()->has_avx2 || !util_get_cpu_caps()->has_sse)) {
1262 size = LLVMBuildLShr(builder, base_size, level, "minify");
1263 size = lp_build_max(bld, size, bld->one);
1264 }
1265 else {
1266 /*
1267 * emulate shift with float mul, since intel "forgot" shifts with
1268 * per-element shift count until avx2, which results in terrible
1269 * scalar extraction (both count and value), scalar shift,
1270 * vector reinsertion. Should not be an issue on any non-x86 cpu
1271 * with a vector instruction set.
1272 * On cpus with AMD's XOP this should also be unnecessary but I'm
1273 * not sure if llvm would emit this with current flags.
1274 */
1275 LLVMValueRef const127, const23, lf;
1276 struct lp_type ftype;
1277 struct lp_build_context fbld;
1278 ftype = lp_type_float_vec(32, bld->type.length * bld->type.width);
1279 lp_build_context_init(&fbld, bld->gallivm, ftype);
1280 const127 = lp_build_const_int_vec(bld->gallivm, bld->type, 127);
1281 const23 = lp_build_const_int_vec(bld->gallivm, bld->type, 23);
1282
1283 /* calculate 2^(-level) float */
1284 lf = lp_build_sub(bld, const127, level);
1285 lf = lp_build_shl(bld, lf, const23);
1286 lf = LLVMBuildBitCast(builder, lf, fbld.vec_type, "");
1287
1288 /* finish shift operation by doing float mul */
1289 base_size = lp_build_int_to_float(&fbld, base_size);
1290 size = lp_build_mul(&fbld, base_size, lf);
1291 /*
1292 * do the max also with floats because
1293 * a) non-emulated int max requires sse41
1294 * (this is actually a lie as we could cast to 16bit values
1295 * as 16bit is sufficient and 16bit int max is sse2)
1296 * b) with avx we can do int max 4-wide but float max 8-wide
1297 */
1298 size = lp_build_max(&fbld, size, fbld.one);
1299 size = lp_build_itrunc(&fbld, size);
1300 }
1301 return size;
1302 }
1303 }
1304
1305
1306 /**
1307 * Dereference stride_array[mipmap_level] array to get a stride.
1308 * Return stride as a vector.
1309 */
1310 static LLVMValueRef
lp_build_get_level_stride_vec(struct lp_build_sample_context * bld,LLVMValueRef stride_array,LLVMValueRef level)1311 lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
1312 LLVMValueRef stride_array, LLVMValueRef level)
1313 {
1314 LLVMBuilderRef builder = bld->gallivm->builder;
1315 LLVMValueRef stride, stride1;
1316 if (bld->num_mips == 1) {
1317 stride1 = load_mip(bld->gallivm, stride_array, level);
1318 stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride1);
1319 }
1320 else if (bld->num_mips == bld->coord_bld.type.length / 4) {
1321 LLVMValueRef stride1;
1322 unsigned i;
1323
1324 stride = bld->int_coord_bld.undef;
1325 for (i = 0; i < bld->num_mips; i++) {
1326 LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1327 stride1 = load_mip(bld->gallivm, stride_array, LLVMBuildExtractElement(builder, level, indexi, ""));
1328 LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, 4 * i);
1329 stride = LLVMBuildInsertElement(builder, stride, stride1, indexo, "");
1330 }
1331 stride = lp_build_swizzle_scalar_aos(&bld->int_coord_bld, stride, 0, 4);
1332 }
1333 else {
1334 LLVMValueRef stride1;
1335 unsigned i;
1336
1337 assert (bld->num_mips == bld->coord_bld.type.length);
1338
1339 stride = bld->int_coord_bld.undef;
1340 for (i = 0; i < bld->coord_bld.type.length; i++) {
1341 LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1342 stride1 = load_mip(bld->gallivm, stride_array, LLVMBuildExtractElement(builder, level, indexi, ""));
1343 stride = LLVMBuildInsertElement(builder, stride, stride1, indexi, "");
1344 }
1345 }
1346 return stride;
1347 }
1348
1349
1350 /**
1351 * When sampling a mipmap, we need to compute the width, height, depth
1352 * of the source levels from the level indexes. This helper function
1353 * does that.
1354 */
1355 void
lp_build_mipmap_level_sizes(struct lp_build_sample_context * bld,LLVMValueRef ilevel,LLVMValueRef * out_size,LLVMValueRef * row_stride_vec,LLVMValueRef * img_stride_vec)1356 lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
1357 LLVMValueRef ilevel,
1358 LLVMValueRef *out_size,
1359 LLVMValueRef *row_stride_vec,
1360 LLVMValueRef *img_stride_vec)
1361 {
1362 const unsigned dims = bld->dims;
1363 LLVMValueRef ilevel_vec;
1364
1365 /*
1366 * Compute width, height, depth at mipmap level 'ilevel'
1367 */
1368 if (bld->num_mips == 1) {
1369 ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel);
1370 *out_size = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec, TRUE);
1371 }
1372 else {
1373 LLVMValueRef int_size_vec;
1374 LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
1375 unsigned num_quads = bld->coord_bld.type.length / 4;
1376 unsigned i;
1377
1378 if (bld->num_mips == num_quads) {
1379 /*
1380 * XXX: this should be #ifndef SANE_INSTRUCTION_SET.
1381 * intel "forgot" the variable shift count instruction until avx2.
1382 * A harmless 8x32 shift gets translated into 32 instructions
1383 * (16 extracts, 8 scalar shifts, 8 inserts), llvm is apparently
1384 * unable to recognize if there are really just 2 different shift
1385 * count values. So do the shift 4-wide before expansion.
1386 */
1387 struct lp_build_context bld4;
1388 struct lp_type type4;
1389
1390 type4 = bld->int_coord_bld.type;
1391 type4.length = 4;
1392
1393 lp_build_context_init(&bld4, bld->gallivm, type4);
1394
1395 if (bld->dims == 1) {
1396 assert(bld->int_size_in_bld.type.length == 1);
1397 int_size_vec = lp_build_broadcast_scalar(&bld4,
1398 bld->int_size);
1399 }
1400 else {
1401 assert(bld->int_size_in_bld.type.length == 4);
1402 int_size_vec = bld->int_size;
1403 }
1404
1405 for (i = 0; i < num_quads; i++) {
1406 LLVMValueRef ileveli;
1407 LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1408
1409 ileveli = lp_build_extract_broadcast(bld->gallivm,
1410 bld->leveli_bld.type,
1411 bld4.type,
1412 ilevel,
1413 indexi);
1414 tmp[i] = lp_build_minify(&bld4, int_size_vec, ileveli, TRUE);
1415 }
1416 /*
1417 * out_size is [w0, h0, d0, _, w1, h1, d1, _, ...] vector for dims > 1,
1418 * [w0, w0, w0, w0, w1, w1, w1, w1, ...] otherwise.
1419 */
1420 *out_size = lp_build_concat(bld->gallivm,
1421 tmp,
1422 bld4.type,
1423 num_quads);
1424 }
1425 else {
1426 /* FIXME: this is terrible and results in _huge_ vector
1427 * (for the dims > 1 case).
1428 * Should refactor this (together with extract_image_sizes) and do
1429 * something more useful. Could for instance if we have width,height
1430 * with 4-wide vector pack all elements into a 8xi16 vector
1431 * (on which we can still do useful math) instead of using a 16xi32
1432 * vector.
1433 * For dims == 1 this will create [w0, w1, w2, w3, ...] vector.
1434 * For dims > 1 this will create [w0, h0, d0, _, w1, h1, d1, _, ...] vector.
1435 */
1436 assert(bld->num_mips == bld->coord_bld.type.length);
1437 if (bld->dims == 1) {
1438 assert(bld->int_size_in_bld.type.length == 1);
1439 int_size_vec = lp_build_broadcast_scalar(&bld->int_coord_bld,
1440 bld->int_size);
1441 *out_size = lp_build_minify(&bld->int_coord_bld, int_size_vec, ilevel, FALSE);
1442 }
1443 else {
1444 LLVMValueRef ilevel1;
1445 for (i = 0; i < bld->num_mips; i++) {
1446 LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1447 ilevel1 = lp_build_extract_broadcast(bld->gallivm, bld->int_coord_type,
1448 bld->int_size_in_bld.type, ilevel, indexi);
1449 tmp[i] = bld->int_size;
1450 tmp[i] = lp_build_minify(&bld->int_size_in_bld, tmp[i], ilevel1, TRUE);
1451 }
1452 *out_size = lp_build_concat(bld->gallivm, tmp,
1453 bld->int_size_in_bld.type,
1454 bld->num_mips);
1455 }
1456 }
1457 }
1458
1459 if (dims >= 2) {
1460 *row_stride_vec = lp_build_get_level_stride_vec(bld,
1461 bld->row_stride_array,
1462 ilevel);
1463 }
1464 if (dims == 3 || has_layer_coord(bld->static_texture_state->target)) {
1465 *img_stride_vec = lp_build_get_level_stride_vec(bld,
1466 bld->img_stride_array,
1467 ilevel);
1468 }
1469 }
1470
1471
1472 /**
1473 * Extract and broadcast texture size.
1474 *
1475 * @param size_type type of the texture size vector (either
1476 * bld->int_size_type or bld->float_size_type)
1477 * @param coord_type type of the texture size vector (either
1478 * bld->int_coord_type or bld->coord_type)
1479 * @param size vector with the texture size (width, height, depth)
1480 */
1481 void
lp_build_extract_image_sizes(struct lp_build_sample_context * bld,struct lp_build_context * size_bld,struct lp_type coord_type,LLVMValueRef size,LLVMValueRef * out_width,LLVMValueRef * out_height,LLVMValueRef * out_depth)1482 lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
1483 struct lp_build_context *size_bld,
1484 struct lp_type coord_type,
1485 LLVMValueRef size,
1486 LLVMValueRef *out_width,
1487 LLVMValueRef *out_height,
1488 LLVMValueRef *out_depth)
1489 {
1490 const unsigned dims = bld->dims;
1491 LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
1492 struct lp_type size_type = size_bld->type;
1493
1494 if (bld->num_mips == 1) {
1495 *out_width = lp_build_extract_broadcast(bld->gallivm,
1496 size_type,
1497 coord_type,
1498 size,
1499 LLVMConstInt(i32t, 0, 0));
1500 if (dims >= 2) {
1501 *out_height = lp_build_extract_broadcast(bld->gallivm,
1502 size_type,
1503 coord_type,
1504 size,
1505 LLVMConstInt(i32t, 1, 0));
1506 if (dims == 3) {
1507 *out_depth = lp_build_extract_broadcast(bld->gallivm,
1508 size_type,
1509 coord_type,
1510 size,
1511 LLVMConstInt(i32t, 2, 0));
1512 }
1513 }
1514 }
1515 else {
1516 unsigned num_quads = bld->coord_bld.type.length / 4;
1517
1518 if (dims == 1) {
1519 *out_width = size;
1520 }
1521 else if (bld->num_mips == num_quads) {
1522 *out_width = lp_build_swizzle_scalar_aos(size_bld, size, 0, 4);
1523 if (dims >= 2) {
1524 *out_height = lp_build_swizzle_scalar_aos(size_bld, size, 1, 4);
1525 if (dims == 3) {
1526 *out_depth = lp_build_swizzle_scalar_aos(size_bld, size, 2, 4);
1527 }
1528 }
1529 }
1530 else {
1531 assert(bld->num_mips == bld->coord_type.length);
1532 *out_width = lp_build_pack_aos_scalars(bld->gallivm, size_type,
1533 coord_type, size, 0);
1534 if (dims >= 2) {
1535 *out_height = lp_build_pack_aos_scalars(bld->gallivm, size_type,
1536 coord_type, size, 1);
1537 if (dims == 3) {
1538 *out_depth = lp_build_pack_aos_scalars(bld->gallivm, size_type,
1539 coord_type, size, 2);
1540 }
1541 }
1542 }
1543 }
1544 }
1545
1546
1547 /**
1548 * Unnormalize coords.
1549 *
1550 * @param flt_size vector with the integer texture size (width, height, depth)
1551 */
1552 void
lp_build_unnormalized_coords(struct lp_build_sample_context * bld,LLVMValueRef flt_size,LLVMValueRef * s,LLVMValueRef * t,LLVMValueRef * r)1553 lp_build_unnormalized_coords(struct lp_build_sample_context *bld,
1554 LLVMValueRef flt_size,
1555 LLVMValueRef *s,
1556 LLVMValueRef *t,
1557 LLVMValueRef *r)
1558 {
1559 const unsigned dims = bld->dims;
1560 LLVMValueRef width;
1561 LLVMValueRef height = NULL;
1562 LLVMValueRef depth = NULL;
1563
1564 lp_build_extract_image_sizes(bld,
1565 &bld->float_size_bld,
1566 bld->coord_type,
1567 flt_size,
1568 &width,
1569 &height,
1570 &depth);
1571
1572 /* s = s * width, t = t * height */
1573 *s = lp_build_mul(&bld->coord_bld, *s, width);
1574 if (dims >= 2) {
1575 *t = lp_build_mul(&bld->coord_bld, *t, height);
1576 if (dims >= 3) {
1577 *r = lp_build_mul(&bld->coord_bld, *r, depth);
1578 }
1579 }
1580 }
1581
1582 /**
1583 * Generate new coords and faces for cubemap texels falling off the face.
1584 *
1585 * @param face face (center) of the pixel
1586 * @param x0 lower x coord
1587 * @param x1 higher x coord (must be x0 + 1)
1588 * @param y0 lower y coord
1589 * @param y1 higher y coord (must be x0 + 1)
1590 * @param max_coord texture cube (level) size - 1
1591 * @param next_faces new face values when falling off
1592 * @param next_xcoords new x coord values when falling off
1593 * @param next_ycoords new y coord values when falling off
1594 *
1595 * The arrays hold the new values when under/overflow of
1596 * lower x, higher x, lower y, higher y coord would occur (in this order).
1597 * next_xcoords/next_ycoords have two entries each (for both new lower and
1598 * higher coord).
1599 */
1600 void
lp_build_cube_new_coords(struct lp_build_context * ivec_bld,LLVMValueRef face,LLVMValueRef x0,LLVMValueRef x1,LLVMValueRef y0,LLVMValueRef y1,LLVMValueRef max_coord,LLVMValueRef next_faces[4],LLVMValueRef next_xcoords[4][2],LLVMValueRef next_ycoords[4][2])1601 lp_build_cube_new_coords(struct lp_build_context *ivec_bld,
1602 LLVMValueRef face,
1603 LLVMValueRef x0,
1604 LLVMValueRef x1,
1605 LLVMValueRef y0,
1606 LLVMValueRef y1,
1607 LLVMValueRef max_coord,
1608 LLVMValueRef next_faces[4],
1609 LLVMValueRef next_xcoords[4][2],
1610 LLVMValueRef next_ycoords[4][2])
1611 {
1612 /*
1613 * Lookup tables aren't nice for simd code hence try some logic here.
1614 * (Note that while it would not be necessary to do per-sample (4) lookups
1615 * when using a LUT as it's impossible that texels fall off of positive
1616 * and negative edges simultaneously, it would however be necessary to
1617 * do 2 lookups for corner handling as in this case texels both fall off
1618 * of x and y axes.)
1619 */
1620 /*
1621 * Next faces (for face 012345):
1622 * x < 0.0 : 451110
1623 * x >= 1.0 : 540001
1624 * y < 0.0 : 225422
1625 * y >= 1.0 : 334533
1626 * Hence nfx+ (and nfy+) == nfx- (nfy-) xor 1
1627 * nfx-: face > 1 ? (face == 5 ? 0 : 1) : (4 + face & 1)
1628 * nfy+: face & ~4 > 1 ? face + 2 : 3;
1629 * This could also use pshufb instead, but would need (manually coded)
1630 * ssse3 intrinsic (llvm won't do non-constant shuffles).
1631 */
1632 struct gallivm_state *gallivm = ivec_bld->gallivm;
1633 LLVMValueRef sel, sel_f2345, sel_f23, sel_f2, tmpsel, tmp;
1634 LLVMValueRef faceand1, sel_fand1, maxmx0, maxmx1, maxmy0, maxmy1;
1635 LLVMValueRef c2 = lp_build_const_int_vec(gallivm, ivec_bld->type, 2);
1636 LLVMValueRef c3 = lp_build_const_int_vec(gallivm, ivec_bld->type, 3);
1637 LLVMValueRef c4 = lp_build_const_int_vec(gallivm, ivec_bld->type, 4);
1638 LLVMValueRef c5 = lp_build_const_int_vec(gallivm, ivec_bld->type, 5);
1639
1640 sel = lp_build_cmp(ivec_bld, PIPE_FUNC_EQUAL, face, c5);
1641 tmpsel = lp_build_select(ivec_bld, sel, ivec_bld->zero, ivec_bld->one);
1642 sel_f2345 = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, face, ivec_bld->one);
1643 faceand1 = lp_build_and(ivec_bld, face, ivec_bld->one);
1644 tmp = lp_build_add(ivec_bld, faceand1, c4);
1645 next_faces[0] = lp_build_select(ivec_bld, sel_f2345, tmpsel, tmp);
1646 next_faces[1] = lp_build_xor(ivec_bld, next_faces[0], ivec_bld->one);
1647
1648 tmp = lp_build_andnot(ivec_bld, face, c4);
1649 sel_f23 = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, tmp, ivec_bld->one);
1650 tmp = lp_build_add(ivec_bld, face, c2);
1651 next_faces[3] = lp_build_select(ivec_bld, sel_f23, tmp, c3);
1652 next_faces[2] = lp_build_xor(ivec_bld, next_faces[3], ivec_bld->one);
1653
1654 /*
1655 * new xcoords (for face 012345):
1656 * x < 0.0 : max max t max-t max max
1657 * x >= 1.0 : 0 0 max-t t 0 0
1658 * y < 0.0 : max 0 max-s s s max-s
1659 * y >= 1.0 : max 0 s max-s s max-s
1660 *
1661 * ncx[1] = face & ~4 > 1 ? (face == 2 ? max-t : t) : 0
1662 * ncx[0] = max - ncx[1]
1663 * ncx[3] = face > 1 ? (face & 1 ? max-s : s) : (face & 1) ? 0 : max
1664 * ncx[2] = face & ~4 > 1 ? max - ncx[3] : ncx[3]
1665 */
1666 sel_f2 = lp_build_cmp(ivec_bld, PIPE_FUNC_EQUAL, face, c2);
1667 maxmy0 = lp_build_sub(ivec_bld, max_coord, y0);
1668 tmp = lp_build_select(ivec_bld, sel_f2, maxmy0, y0);
1669 next_xcoords[1][0] = lp_build_select(ivec_bld, sel_f23, tmp, ivec_bld->zero);
1670 next_xcoords[0][0] = lp_build_sub(ivec_bld, max_coord, next_xcoords[1][0]);
1671 maxmy1 = lp_build_sub(ivec_bld, max_coord, y1);
1672 tmp = lp_build_select(ivec_bld, sel_f2, maxmy1, y1);
1673 next_xcoords[1][1] = lp_build_select(ivec_bld, sel_f23, tmp, ivec_bld->zero);
1674 next_xcoords[0][1] = lp_build_sub(ivec_bld, max_coord, next_xcoords[1][1]);
1675
1676 sel_fand1 = lp_build_cmp(ivec_bld, PIPE_FUNC_EQUAL, faceand1, ivec_bld->one);
1677
1678 tmpsel = lp_build_select(ivec_bld, sel_fand1, ivec_bld->zero, max_coord);
1679 maxmx0 = lp_build_sub(ivec_bld, max_coord, x0);
1680 tmp = lp_build_select(ivec_bld, sel_fand1, maxmx0, x0);
1681 next_xcoords[3][0] = lp_build_select(ivec_bld, sel_f2345, tmp, tmpsel);
1682 tmp = lp_build_sub(ivec_bld, max_coord, next_xcoords[3][0]);
1683 next_xcoords[2][0] = lp_build_select(ivec_bld, sel_f23, tmp, next_xcoords[3][0]);
1684 maxmx1 = lp_build_sub(ivec_bld, max_coord, x1);
1685 tmp = lp_build_select(ivec_bld, sel_fand1, maxmx1, x1);
1686 next_xcoords[3][1] = lp_build_select(ivec_bld, sel_f2345, tmp, tmpsel);
1687 tmp = lp_build_sub(ivec_bld, max_coord, next_xcoords[3][1]);
1688 next_xcoords[2][1] = lp_build_select(ivec_bld, sel_f23, tmp, next_xcoords[3][1]);
1689
1690 /*
1691 * new ycoords (for face 012345):
1692 * x < 0.0 : t t 0 max t t
1693 * x >= 1.0 : t t 0 max t t
1694 * y < 0.0 : max-s s 0 max max 0
1695 * y >= 1.0 : s max-s 0 max 0 max
1696 *
1697 * ncy[0] = face & ~4 > 1 ? (face == 2 ? 0 : max) : t
1698 * ncy[1] = ncy[0]
1699 * ncy[3] = face > 1 ? (face & 1 ? max : 0) : (face & 1) ? max-s : max
1700 * ncx[2] = face & ~4 > 1 ? max - ncx[3] : ncx[3]
1701 */
1702 tmp = lp_build_select(ivec_bld, sel_f2, ivec_bld->zero, max_coord);
1703 next_ycoords[0][0] = lp_build_select(ivec_bld, sel_f23, tmp, y0);
1704 next_ycoords[1][0] = next_ycoords[0][0];
1705 next_ycoords[0][1] = lp_build_select(ivec_bld, sel_f23, tmp, y1);
1706 next_ycoords[1][1] = next_ycoords[0][1];
1707
1708 tmpsel = lp_build_select(ivec_bld, sel_fand1, maxmx0, x0);
1709 tmp = lp_build_select(ivec_bld, sel_fand1, max_coord, ivec_bld->zero);
1710 next_ycoords[3][0] = lp_build_select(ivec_bld, sel_f2345, tmp, tmpsel);
1711 tmp = lp_build_sub(ivec_bld, max_coord, next_ycoords[3][0]);
1712 next_ycoords[2][0] = lp_build_select(ivec_bld, sel_f23, next_ycoords[3][0], tmp);
1713 tmpsel = lp_build_select(ivec_bld, sel_fand1, maxmx1, x1);
1714 tmp = lp_build_select(ivec_bld, sel_fand1, max_coord, ivec_bld->zero);
1715 next_ycoords[3][1] = lp_build_select(ivec_bld, sel_f2345, tmp, tmpsel);
1716 tmp = lp_build_sub(ivec_bld, max_coord, next_ycoords[3][1]);
1717 next_ycoords[2][1] = lp_build_select(ivec_bld, sel_f23, next_ycoords[3][1], tmp);
1718 }
1719
1720
1721 /** Helper used by lp_build_cube_lookup() */
1722 static LLVMValueRef
lp_build_cube_imapos(struct lp_build_context * coord_bld,LLVMValueRef coord)1723 lp_build_cube_imapos(struct lp_build_context *coord_bld, LLVMValueRef coord)
1724 {
1725 /* ima = +0.5 / abs(coord); */
1726 LLVMValueRef posHalf = lp_build_const_vec(coord_bld->gallivm, coord_bld->type, 0.5);
1727 LLVMValueRef absCoord = lp_build_abs(coord_bld, coord);
1728 /* avoid div by zero */
1729 LLVMValueRef sel = lp_build_cmp(coord_bld, PIPE_FUNC_GREATER, absCoord, coord_bld->zero);
1730 LLVMValueRef div = lp_build_div(coord_bld, posHalf, absCoord);
1731 LLVMValueRef ima = lp_build_select(coord_bld, sel, div, coord_bld->zero);
1732 return ima;
1733 }
1734
1735
1736 /** Helper for doing 3-wise selection.
1737 * Returns sel1 ? val2 : (sel0 ? val0 : val1).
1738 */
1739 static LLVMValueRef
lp_build_select3(struct lp_build_context * sel_bld,LLVMValueRef sel0,LLVMValueRef sel1,LLVMValueRef val0,LLVMValueRef val1,LLVMValueRef val2)1740 lp_build_select3(struct lp_build_context *sel_bld,
1741 LLVMValueRef sel0,
1742 LLVMValueRef sel1,
1743 LLVMValueRef val0,
1744 LLVMValueRef val1,
1745 LLVMValueRef val2)
1746 {
1747 LLVMValueRef tmp;
1748 tmp = lp_build_select(sel_bld, sel0, val0, val1);
1749 return lp_build_select(sel_bld, sel1, val2, tmp);
1750 }
1751
1752
1753 /**
1754 * Generate code to do cube face selection and compute per-face texcoords.
1755 */
1756 void
lp_build_cube_lookup(struct lp_build_sample_context * bld,LLVMValueRef * coords,const struct lp_derivatives * derivs_in,LLVMValueRef * rho,struct lp_derivatives * derivs_out,boolean need_derivs)1757 lp_build_cube_lookup(struct lp_build_sample_context *bld,
1758 LLVMValueRef *coords,
1759 const struct lp_derivatives *derivs_in, /* optional */
1760 LLVMValueRef *rho,
1761 struct lp_derivatives *derivs_out, /* optional */
1762 boolean need_derivs)
1763 {
1764 struct lp_build_context *coord_bld = &bld->coord_bld;
1765 LLVMBuilderRef builder = bld->gallivm->builder;
1766 struct gallivm_state *gallivm = bld->gallivm;
1767 LLVMValueRef si, ti, ri;
1768
1769 /*
1770 * Do per-pixel face selection. We cannot however (as we used to do)
1771 * simply calculate the derivs afterwards (which is very bogus for
1772 * explicit derivs btw) because the values would be "random" when
1773 * not all pixels lie on the same face. So what we do here is just
1774 * calculate the derivatives after scaling the coords by the absolute
1775 * value of the inverse major axis, and essentially do rho calculation
1776 * steps as if it were a 3d texture. This is perfect if all pixels hit
1777 * the same face, but not so great at edges, I believe the max error
1778 * should be sqrt(2) with no_rho_approx or 2 otherwise (essentially measuring
1779 * the 3d distance between 2 points on the cube instead of measuring up/down
1780 * the edge). Still this is possibly a win over just selecting the same face
1781 * for all pixels. Unfortunately, something like that doesn't work for
1782 * explicit derivatives.
1783 */
1784 struct lp_build_context *cint_bld = &bld->int_coord_bld;
1785 struct lp_type intctype = cint_bld->type;
1786 LLVMTypeRef coord_vec_type = coord_bld->vec_type;
1787 LLVMTypeRef cint_vec_type = cint_bld->vec_type;
1788 LLVMValueRef as, at, ar, face, face_s, face_t;
1789 LLVMValueRef as_ge_at, maxasat, ar_ge_as_at;
1790 LLVMValueRef snewx, tnewx, snewy, tnewy, snewz, tnewz;
1791 LLVMValueRef tnegi, rnegi;
1792 LLVMValueRef ma, mai, signma, signmabit, imahalfpos;
1793 LLVMValueRef posHalf = lp_build_const_vec(gallivm, coord_bld->type, 0.5);
1794 LLVMValueRef signmask = lp_build_const_int_vec(gallivm, intctype,
1795 1LL << (intctype.width - 1));
1796 LLVMValueRef signshift = lp_build_const_int_vec(gallivm, intctype,
1797 intctype.width -1);
1798 LLVMValueRef facex = lp_build_const_int_vec(gallivm, intctype, PIPE_TEX_FACE_POS_X);
1799 LLVMValueRef facey = lp_build_const_int_vec(gallivm, intctype, PIPE_TEX_FACE_POS_Y);
1800 LLVMValueRef facez = lp_build_const_int_vec(gallivm, intctype, PIPE_TEX_FACE_POS_Z);
1801 LLVMValueRef s = coords[0];
1802 LLVMValueRef t = coords[1];
1803 LLVMValueRef r = coords[2];
1804
1805 assert(PIPE_TEX_FACE_NEG_X == PIPE_TEX_FACE_POS_X + 1);
1806 assert(PIPE_TEX_FACE_NEG_Y == PIPE_TEX_FACE_POS_Y + 1);
1807 assert(PIPE_TEX_FACE_NEG_Z == PIPE_TEX_FACE_POS_Z + 1);
1808
1809 /*
1810 * get absolute value (for x/y/z face selection) and sign bit
1811 * (for mirroring minor coords and pos/neg face selection)
1812 * of the original coords.
1813 */
1814 as = lp_build_abs(&bld->coord_bld, s);
1815 at = lp_build_abs(&bld->coord_bld, t);
1816 ar = lp_build_abs(&bld->coord_bld, r);
1817
1818 /*
1819 * major face determination: select x if x > y else select y
1820 * select z if z >= max(x,y) else select previous result
1821 * if some axis are the same we chose z over y, y over x - the
1822 * dx10 spec seems to ask for it while OpenGL doesn't care (if we
1823 * wouldn't care could save a select or two if using different
1824 * compares and doing at_g_as_ar last since tnewx and tnewz are the
1825 * same).
1826 */
1827 as_ge_at = lp_build_cmp(coord_bld, PIPE_FUNC_GREATER, as, at);
1828 maxasat = lp_build_max(coord_bld, as, at);
1829 ar_ge_as_at = lp_build_cmp(coord_bld, PIPE_FUNC_GEQUAL, ar, maxasat);
1830
1831 if (need_derivs) {
1832 /*
1833 * XXX: This is really really complex.
1834 * It is a bit overkill to use this for implicit derivatives as well,
1835 * no way this is worth the cost in practice, but seems to be the
1836 * only way for getting accurate and per-pixel lod values.
1837 */
1838 LLVMValueRef ima, imahalf, tmp, ddx[3], ddy[3];
1839 LLVMValueRef madx, mady, madxdivma, madydivma;
1840 LLVMValueRef sdxi, tdxi, rdxi, sdyi, tdyi, rdyi;
1841 LLVMValueRef tdxnegi, rdxnegi, tdynegi, rdynegi;
1842 LLVMValueRef sdxnewx, sdxnewy, sdxnewz, tdxnewx, tdxnewy, tdxnewz;
1843 LLVMValueRef sdynewx, sdynewy, sdynewz, tdynewx, tdynewy, tdynewz;
1844 LLVMValueRef face_sdx, face_tdx, face_sdy, face_tdy;
1845 /*
1846 * s = 1/2 * ( sc / ma + 1)
1847 * t = 1/2 * ( tc / ma + 1)
1848 *
1849 * s' = 1/2 * (sc' * ma - sc * ma') / ma^2
1850 * t' = 1/2 * (tc' * ma - tc * ma') / ma^2
1851 *
1852 * dx.s = 0.5 * (dx.sc - sc * dx.ma / ma) / ma
1853 * dx.t = 0.5 * (dx.tc - tc * dx.ma / ma) / ma
1854 * dy.s = 0.5 * (dy.sc - sc * dy.ma / ma) / ma
1855 * dy.t = 0.5 * (dy.tc - tc * dy.ma / ma) / ma
1856 */
1857
1858 /* select ma, calculate ima */
1859 ma = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, s, t, r);
1860 mai = LLVMBuildBitCast(builder, ma, cint_vec_type, "");
1861 signmabit = LLVMBuildAnd(builder, mai, signmask, "");
1862 ima = lp_build_div(coord_bld, coord_bld->one, ma);
1863 imahalf = lp_build_mul(coord_bld, posHalf, ima);
1864 imahalfpos = lp_build_abs(coord_bld, imahalf);
1865
1866 if (!derivs_in) {
1867 ddx[0] = lp_build_ddx(coord_bld, s);
1868 ddx[1] = lp_build_ddx(coord_bld, t);
1869 ddx[2] = lp_build_ddx(coord_bld, r);
1870 ddy[0] = lp_build_ddy(coord_bld, s);
1871 ddy[1] = lp_build_ddy(coord_bld, t);
1872 ddy[2] = lp_build_ddy(coord_bld, r);
1873 }
1874 else {
1875 ddx[0] = derivs_in->ddx[0];
1876 ddx[1] = derivs_in->ddx[1];
1877 ddx[2] = derivs_in->ddx[2];
1878 ddy[0] = derivs_in->ddy[0];
1879 ddy[1] = derivs_in->ddy[1];
1880 ddy[2] = derivs_in->ddy[2];
1881 }
1882
1883 /* select major derivatives */
1884 madx = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, ddx[0], ddx[1], ddx[2]);
1885 mady = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, ddy[0], ddy[1], ddy[2]);
1886
1887 si = LLVMBuildBitCast(builder, s, cint_vec_type, "");
1888 ti = LLVMBuildBitCast(builder, t, cint_vec_type, "");
1889 ri = LLVMBuildBitCast(builder, r, cint_vec_type, "");
1890
1891 sdxi = LLVMBuildBitCast(builder, ddx[0], cint_vec_type, "");
1892 tdxi = LLVMBuildBitCast(builder, ddx[1], cint_vec_type, "");
1893 rdxi = LLVMBuildBitCast(builder, ddx[2], cint_vec_type, "");
1894
1895 sdyi = LLVMBuildBitCast(builder, ddy[0], cint_vec_type, "");
1896 tdyi = LLVMBuildBitCast(builder, ddy[1], cint_vec_type, "");
1897 rdyi = LLVMBuildBitCast(builder, ddy[2], cint_vec_type, "");
1898
1899 /*
1900 * compute all possible new s/t coords, which does the mirroring,
1901 * and do the same for derivs minor axes.
1902 * snewx = signma * -r;
1903 * tnewx = -t;
1904 * snewy = s;
1905 * tnewy = signma * r;
1906 * snewz = signma * s;
1907 * tnewz = -t;
1908 */
1909 tnegi = LLVMBuildXor(builder, ti, signmask, "");
1910 rnegi = LLVMBuildXor(builder, ri, signmask, "");
1911 tdxnegi = LLVMBuildXor(builder, tdxi, signmask, "");
1912 rdxnegi = LLVMBuildXor(builder, rdxi, signmask, "");
1913 tdynegi = LLVMBuildXor(builder, tdyi, signmask, "");
1914 rdynegi = LLVMBuildXor(builder, rdyi, signmask, "");
1915
1916 snewx = LLVMBuildXor(builder, signmabit, rnegi, "");
1917 tnewx = tnegi;
1918 sdxnewx = LLVMBuildXor(builder, signmabit, rdxnegi, "");
1919 tdxnewx = tdxnegi;
1920 sdynewx = LLVMBuildXor(builder, signmabit, rdynegi, "");
1921 tdynewx = tdynegi;
1922
1923 snewy = si;
1924 tnewy = LLVMBuildXor(builder, signmabit, ri, "");
1925 sdxnewy = sdxi;
1926 tdxnewy = LLVMBuildXor(builder, signmabit, rdxi, "");
1927 sdynewy = sdyi;
1928 tdynewy = LLVMBuildXor(builder, signmabit, rdyi, "");
1929
1930 snewz = LLVMBuildXor(builder, signmabit, si, "");
1931 tnewz = tnegi;
1932 sdxnewz = LLVMBuildXor(builder, signmabit, sdxi, "");
1933 tdxnewz = tdxnegi;
1934 sdynewz = LLVMBuildXor(builder, signmabit, sdyi, "");
1935 tdynewz = tdynegi;
1936
1937 /* select the mirrored values */
1938 face = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, facex, facey, facez);
1939 face_s = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, snewx, snewy, snewz);
1940 face_t = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tnewx, tnewy, tnewz);
1941 face_sdx = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, sdxnewx, sdxnewy, sdxnewz);
1942 face_tdx = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tdxnewx, tdxnewy, tdxnewz);
1943 face_sdy = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, sdynewx, sdynewy, sdynewz);
1944 face_tdy = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tdynewx, tdynewy, tdynewz);
1945
1946 face_s = LLVMBuildBitCast(builder, face_s, coord_vec_type, "");
1947 face_t = LLVMBuildBitCast(builder, face_t, coord_vec_type, "");
1948 face_sdx = LLVMBuildBitCast(builder, face_sdx, coord_vec_type, "");
1949 face_tdx = LLVMBuildBitCast(builder, face_tdx, coord_vec_type, "");
1950 face_sdy = LLVMBuildBitCast(builder, face_sdy, coord_vec_type, "");
1951 face_tdy = LLVMBuildBitCast(builder, face_tdy, coord_vec_type, "");
1952
1953 /* deriv math, dx.s = 0.5 * (dx.sc - sc * dx.ma / ma) / ma */
1954 madxdivma = lp_build_mul(coord_bld, madx, ima);
1955 tmp = lp_build_mul(coord_bld, madxdivma, face_s);
1956 tmp = lp_build_sub(coord_bld, face_sdx, tmp);
1957 derivs_out->ddx[0] = lp_build_mul(coord_bld, tmp, imahalf);
1958
1959 /* dx.t = 0.5 * (dx.tc - tc * dx.ma / ma) / ma */
1960 tmp = lp_build_mul(coord_bld, madxdivma, face_t);
1961 tmp = lp_build_sub(coord_bld, face_tdx, tmp);
1962 derivs_out->ddx[1] = lp_build_mul(coord_bld, tmp, imahalf);
1963
1964 /* dy.s = 0.5 * (dy.sc - sc * dy.ma / ma) / ma */
1965 madydivma = lp_build_mul(coord_bld, mady, ima);
1966 tmp = lp_build_mul(coord_bld, madydivma, face_s);
1967 tmp = lp_build_sub(coord_bld, face_sdy, tmp);
1968 derivs_out->ddy[0] = lp_build_mul(coord_bld, tmp, imahalf);
1969
1970 /* dy.t = 0.5 * (dy.tc - tc * dy.ma / ma) / ma */
1971 tmp = lp_build_mul(coord_bld, madydivma, face_t);
1972 tmp = lp_build_sub(coord_bld, face_tdy, tmp);
1973 derivs_out->ddy[1] = lp_build_mul(coord_bld, tmp, imahalf);
1974
1975 signma = LLVMBuildLShr(builder, mai, signshift, "");
1976 coords[2] = LLVMBuildOr(builder, face, signma, "face");
1977
1978 /* project coords */
1979 face_s = lp_build_mul(coord_bld, face_s, imahalfpos);
1980 face_t = lp_build_mul(coord_bld, face_t, imahalfpos);
1981
1982 coords[0] = lp_build_add(coord_bld, face_s, posHalf);
1983 coords[1] = lp_build_add(coord_bld, face_t, posHalf);
1984
1985 return;
1986 }
1987
1988 ma = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, s, t, r);
1989 mai = LLVMBuildBitCast(builder, ma, cint_vec_type, "");
1990 signmabit = LLVMBuildAnd(builder, mai, signmask, "");
1991
1992 si = LLVMBuildBitCast(builder, s, cint_vec_type, "");
1993 ti = LLVMBuildBitCast(builder, t, cint_vec_type, "");
1994 ri = LLVMBuildBitCast(builder, r, cint_vec_type, "");
1995
1996 /*
1997 * compute all possible new s/t coords, which does the mirroring
1998 * snewx = signma * -r;
1999 * tnewx = -t;
2000 * snewy = s;
2001 * tnewy = signma * r;
2002 * snewz = signma * s;
2003 * tnewz = -t;
2004 */
2005 tnegi = LLVMBuildXor(builder, ti, signmask, "");
2006 rnegi = LLVMBuildXor(builder, ri, signmask, "");
2007
2008 snewx = LLVMBuildXor(builder, signmabit, rnegi, "");
2009 tnewx = tnegi;
2010
2011 snewy = si;
2012 tnewy = LLVMBuildXor(builder, signmabit, ri, "");
2013
2014 snewz = LLVMBuildXor(builder, signmabit, si, "");
2015 tnewz = tnegi;
2016
2017 /* select the mirrored values */
2018 face_s = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, snewx, snewy, snewz);
2019 face_t = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tnewx, tnewy, tnewz);
2020 face = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, facex, facey, facez);
2021
2022 face_s = LLVMBuildBitCast(builder, face_s, coord_vec_type, "");
2023 face_t = LLVMBuildBitCast(builder, face_t, coord_vec_type, "");
2024
2025 /* add +1 for neg face */
2026 /* XXX with AVX probably want to use another select here -
2027 * as long as we ensure vblendvps gets used we can actually
2028 * skip the comparison and just use sign as a "mask" directly.
2029 */
2030 signma = LLVMBuildLShr(builder, mai, signshift, "");
2031 coords[2] = LLVMBuildOr(builder, face, signma, "face");
2032
2033 /* project coords */
2034 if (!need_derivs) {
2035 imahalfpos = lp_build_cube_imapos(coord_bld, ma);
2036 face_s = lp_build_mul(coord_bld, face_s, imahalfpos);
2037 face_t = lp_build_mul(coord_bld, face_t, imahalfpos);
2038 }
2039
2040 coords[0] = lp_build_add(coord_bld, face_s, posHalf);
2041 coords[1] = lp_build_add(coord_bld, face_t, posHalf);
2042 }
2043
2044
2045 /**
2046 * Compute the partial offset of a pixel block along an arbitrary axis.
2047 *
2048 * @param coord coordinate in pixels
2049 * @param stride number of bytes between rows of successive pixel blocks
2050 * @param block_length number of pixels in a pixels block along the coordinate
2051 * axis
2052 * @param out_offset resulting relative offset of the pixel block in bytes
2053 * @param out_subcoord resulting sub-block pixel coordinate
2054 */
2055 void
lp_build_sample_partial_offset(struct lp_build_context * bld,unsigned block_length,LLVMValueRef coord,LLVMValueRef stride,LLVMValueRef * out_offset,LLVMValueRef * out_subcoord)2056 lp_build_sample_partial_offset(struct lp_build_context *bld,
2057 unsigned block_length,
2058 LLVMValueRef coord,
2059 LLVMValueRef stride,
2060 LLVMValueRef *out_offset,
2061 LLVMValueRef *out_subcoord)
2062 {
2063 LLVMBuilderRef builder = bld->gallivm->builder;
2064 LLVMValueRef offset;
2065 LLVMValueRef subcoord;
2066
2067 if (block_length == 1) {
2068 subcoord = bld->zero;
2069 }
2070 else {
2071 /*
2072 * Pixel blocks have power of two dimensions. LLVM should convert the
2073 * rem/div to bit arithmetic.
2074 * TODO: Verify this.
2075 * It does indeed BUT it does transform it to scalar (and back) when doing so
2076 * (using roughly extract, shift/and, mov, unpack) (llvm 2.7).
2077 * The generated code looks seriously unfunny and is quite expensive.
2078 */
2079 #if 0
2080 LLVMValueRef block_width = lp_build_const_int_vec(bld->type, block_length);
2081 subcoord = LLVMBuildURem(builder, coord, block_width, "");
2082 coord = LLVMBuildUDiv(builder, coord, block_width, "");
2083 #else
2084 unsigned logbase2 = util_logbase2(block_length);
2085 LLVMValueRef block_shift = lp_build_const_int_vec(bld->gallivm, bld->type, logbase2);
2086 LLVMValueRef block_mask = lp_build_const_int_vec(bld->gallivm, bld->type, block_length - 1);
2087 subcoord = LLVMBuildAnd(builder, coord, block_mask, "");
2088 coord = LLVMBuildLShr(builder, coord, block_shift, "");
2089 #endif
2090 }
2091
2092 offset = lp_build_mul(bld, coord, stride);
2093
2094 assert(out_offset);
2095 assert(out_subcoord);
2096
2097 *out_offset = offset;
2098 *out_subcoord = subcoord;
2099 }
2100
2101
2102 /**
2103 * Compute the offset of a pixel block.
2104 *
2105 * x, y, z, y_stride, z_stride are vectors, and they refer to pixels.
2106 *
2107 * Returns the relative offset and i,j sub-block coordinates
2108 */
2109 void
lp_build_sample_offset(struct lp_build_context * bld,const struct util_format_description * format_desc,LLVMValueRef x,LLVMValueRef y,LLVMValueRef z,LLVMValueRef y_stride,LLVMValueRef z_stride,LLVMValueRef * out_offset,LLVMValueRef * out_i,LLVMValueRef * out_j)2110 lp_build_sample_offset(struct lp_build_context *bld,
2111 const struct util_format_description *format_desc,
2112 LLVMValueRef x,
2113 LLVMValueRef y,
2114 LLVMValueRef z,
2115 LLVMValueRef y_stride,
2116 LLVMValueRef z_stride,
2117 LLVMValueRef *out_offset,
2118 LLVMValueRef *out_i,
2119 LLVMValueRef *out_j)
2120 {
2121 LLVMValueRef x_stride;
2122 LLVMValueRef offset;
2123
2124 x_stride = lp_build_const_vec(bld->gallivm, bld->type,
2125 format_desc->block.bits/8);
2126
2127 lp_build_sample_partial_offset(bld,
2128 format_desc->block.width,
2129 x, x_stride,
2130 &offset, out_i);
2131
2132 if (y && y_stride) {
2133 LLVMValueRef y_offset;
2134 lp_build_sample_partial_offset(bld,
2135 format_desc->block.height,
2136 y, y_stride,
2137 &y_offset, out_j);
2138 offset = lp_build_add(bld, offset, y_offset);
2139 }
2140 else {
2141 *out_j = bld->zero;
2142 }
2143
2144 if (z && z_stride) {
2145 LLVMValueRef z_offset;
2146 LLVMValueRef k;
2147 lp_build_sample_partial_offset(bld,
2148 1, /* pixel blocks are always 2D */
2149 z, z_stride,
2150 &z_offset, &k);
2151 offset = lp_build_add(bld, offset, z_offset);
2152 }
2153
2154 *out_offset = offset;
2155 }
2156
2157 static LLVMValueRef
lp_build_sample_min(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef v0,LLVMValueRef v1)2158 lp_build_sample_min(struct lp_build_context *bld,
2159 LLVMValueRef x,
2160 LLVMValueRef v0,
2161 LLVMValueRef v1)
2162 {
2163 /* if the incoming LERP weight is 0 then the min/max
2164 * should ignore that value. */
2165 LLVMValueRef mask = lp_build_compare(bld->gallivm,
2166 bld->type,
2167 PIPE_FUNC_NOTEQUAL,
2168 x, bld->zero);
2169 LLVMValueRef min = lp_build_min(bld, v0, v1);
2170
2171 return lp_build_select(bld, mask, min, v0);
2172 }
2173
2174 static LLVMValueRef
lp_build_sample_max(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef v0,LLVMValueRef v1)2175 lp_build_sample_max(struct lp_build_context *bld,
2176 LLVMValueRef x,
2177 LLVMValueRef v0,
2178 LLVMValueRef v1)
2179 {
2180 /* if the incoming LERP weight is 0 then the min/max
2181 * should ignore that value. */
2182 LLVMValueRef mask = lp_build_compare(bld->gallivm,
2183 bld->type,
2184 PIPE_FUNC_NOTEQUAL,
2185 x, bld->zero);
2186 LLVMValueRef max = lp_build_max(bld, v0, v1);
2187
2188 return lp_build_select(bld, mask, max, v0);
2189 }
2190
2191 static LLVMValueRef
lp_build_sample_min_2d(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef y,LLVMValueRef a,LLVMValueRef b,LLVMValueRef c,LLVMValueRef d)2192 lp_build_sample_min_2d(struct lp_build_context *bld,
2193 LLVMValueRef x,
2194 LLVMValueRef y,
2195 LLVMValueRef a,
2196 LLVMValueRef b,
2197 LLVMValueRef c,
2198 LLVMValueRef d)
2199 {
2200 LLVMValueRef v0 = lp_build_sample_min(bld, x, a, b);
2201 LLVMValueRef v1 = lp_build_sample_min(bld, x, c, d);
2202 return lp_build_sample_min(bld, y, v0, v1);
2203 }
2204
2205 static LLVMValueRef
lp_build_sample_max_2d(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef y,LLVMValueRef a,LLVMValueRef b,LLVMValueRef c,LLVMValueRef d)2206 lp_build_sample_max_2d(struct lp_build_context *bld,
2207 LLVMValueRef x,
2208 LLVMValueRef y,
2209 LLVMValueRef a,
2210 LLVMValueRef b,
2211 LLVMValueRef c,
2212 LLVMValueRef d)
2213 {
2214 LLVMValueRef v0 = lp_build_sample_max(bld, x, a, b);
2215 LLVMValueRef v1 = lp_build_sample_max(bld, x, c, d);
2216 return lp_build_sample_max(bld, y, v0, v1);
2217 }
2218
2219 static LLVMValueRef
lp_build_sample_min_3d(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef y,LLVMValueRef z,LLVMValueRef a,LLVMValueRef b,LLVMValueRef c,LLVMValueRef d,LLVMValueRef e,LLVMValueRef f,LLVMValueRef g,LLVMValueRef h)2220 lp_build_sample_min_3d(struct lp_build_context *bld,
2221 LLVMValueRef x,
2222 LLVMValueRef y,
2223 LLVMValueRef z,
2224 LLVMValueRef a, LLVMValueRef b,
2225 LLVMValueRef c, LLVMValueRef d,
2226 LLVMValueRef e, LLVMValueRef f,
2227 LLVMValueRef g, LLVMValueRef h)
2228 {
2229 LLVMValueRef v0 = lp_build_sample_min_2d(bld, x, y, a, b, c, d);
2230 LLVMValueRef v1 = lp_build_sample_min_2d(bld, x, y, e, f, g, h);
2231 return lp_build_sample_min(bld, z, v0, v1);
2232 }
2233
2234 static LLVMValueRef
lp_build_sample_max_3d(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef y,LLVMValueRef z,LLVMValueRef a,LLVMValueRef b,LLVMValueRef c,LLVMValueRef d,LLVMValueRef e,LLVMValueRef f,LLVMValueRef g,LLVMValueRef h)2235 lp_build_sample_max_3d(struct lp_build_context *bld,
2236 LLVMValueRef x,
2237 LLVMValueRef y,
2238 LLVMValueRef z,
2239 LLVMValueRef a, LLVMValueRef b,
2240 LLVMValueRef c, LLVMValueRef d,
2241 LLVMValueRef e, LLVMValueRef f,
2242 LLVMValueRef g, LLVMValueRef h)
2243 {
2244 LLVMValueRef v0 = lp_build_sample_max_2d(bld, x, y, a, b, c, d);
2245 LLVMValueRef v1 = lp_build_sample_max_2d(bld, x, y, e, f, g, h);
2246 return lp_build_sample_max(bld, z, v0, v1);
2247 }
2248
2249 void
lp_build_reduce_filter(struct lp_build_context * bld,enum pipe_tex_reduction_mode mode,unsigned flags,unsigned num_chan,LLVMValueRef x,LLVMValueRef * v00,LLVMValueRef * v01,LLVMValueRef * out)2250 lp_build_reduce_filter(struct lp_build_context *bld,
2251 enum pipe_tex_reduction_mode mode,
2252 unsigned flags,
2253 unsigned num_chan,
2254 LLVMValueRef x,
2255 LLVMValueRef *v00,
2256 LLVMValueRef *v01,
2257 LLVMValueRef *out)
2258 {
2259 unsigned chan;
2260 switch (mode) {
2261 case PIPE_TEX_REDUCTION_MIN:
2262 for (chan = 0; chan < num_chan; chan++)
2263 out[chan] = lp_build_sample_min(bld, x, v00[chan], v01[chan]);
2264 break;
2265 case PIPE_TEX_REDUCTION_MAX:
2266 for (chan = 0; chan < num_chan; chan++)
2267 out[chan] = lp_build_sample_max(bld, x, v00[chan], v01[chan]);
2268 break;
2269 case PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE:
2270 default:
2271 for (chan = 0; chan < num_chan; chan++)
2272 out[chan] = lp_build_lerp(bld, x, v00[chan], v01[chan], flags);
2273 break;
2274 }
2275 }
2276
2277 void
lp_build_reduce_filter_2d(struct lp_build_context * bld,enum pipe_tex_reduction_mode mode,unsigned flags,unsigned num_chan,LLVMValueRef x,LLVMValueRef y,LLVMValueRef * v00,LLVMValueRef * v01,LLVMValueRef * v10,LLVMValueRef * v11,LLVMValueRef * out)2278 lp_build_reduce_filter_2d(struct lp_build_context *bld,
2279 enum pipe_tex_reduction_mode mode,
2280 unsigned flags,
2281 unsigned num_chan,
2282 LLVMValueRef x,
2283 LLVMValueRef y,
2284 LLVMValueRef *v00,
2285 LLVMValueRef *v01,
2286 LLVMValueRef *v10,
2287 LLVMValueRef *v11,
2288 LLVMValueRef *out)
2289 {
2290 unsigned chan;
2291 switch (mode) {
2292 case PIPE_TEX_REDUCTION_MIN:
2293 for (chan = 0; chan < num_chan; chan++)
2294 out[chan] = lp_build_sample_min_2d(bld, x, y, v00[chan], v01[chan], v10[chan], v11[chan]);
2295 break;
2296 case PIPE_TEX_REDUCTION_MAX:
2297 for (chan = 0; chan < num_chan; chan++)
2298 out[chan] = lp_build_sample_max_2d(bld, x, y, v00[chan], v01[chan], v10[chan], v11[chan]);
2299 break;
2300 case PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE:
2301 default:
2302 for (chan = 0; chan < num_chan; chan++)
2303 out[chan] = lp_build_lerp_2d(bld, x, y, v00[chan], v01[chan], v10[chan], v11[chan], flags);
2304 break;
2305 }
2306 }
2307
2308 void
lp_build_reduce_filter_3d(struct lp_build_context * bld,enum pipe_tex_reduction_mode mode,unsigned flags,unsigned num_chan,LLVMValueRef x,LLVMValueRef y,LLVMValueRef z,LLVMValueRef * v000,LLVMValueRef * v001,LLVMValueRef * v010,LLVMValueRef * v011,LLVMValueRef * v100,LLVMValueRef * v101,LLVMValueRef * v110,LLVMValueRef * v111,LLVMValueRef * out)2309 lp_build_reduce_filter_3d(struct lp_build_context *bld,
2310 enum pipe_tex_reduction_mode mode,
2311 unsigned flags,
2312 unsigned num_chan,
2313 LLVMValueRef x,
2314 LLVMValueRef y,
2315 LLVMValueRef z,
2316 LLVMValueRef *v000,
2317 LLVMValueRef *v001,
2318 LLVMValueRef *v010,
2319 LLVMValueRef *v011,
2320 LLVMValueRef *v100,
2321 LLVMValueRef *v101,
2322 LLVMValueRef *v110,
2323 LLVMValueRef *v111,
2324 LLVMValueRef *out)
2325 {
2326 unsigned chan;
2327 switch (mode) {
2328 case PIPE_TEX_REDUCTION_MIN:
2329 for (chan = 0; chan < num_chan; chan++)
2330 out[chan] = lp_build_sample_min_3d(bld, x, y, z,
2331 v000[chan], v001[chan], v010[chan], v011[chan],
2332 v100[chan], v101[chan], v110[chan], v111[chan]);
2333 break;
2334 case PIPE_TEX_REDUCTION_MAX:
2335 for (chan = 0; chan < num_chan; chan++)
2336 out[chan] = lp_build_sample_max_3d(bld, x, y, z,
2337 v000[chan], v001[chan], v010[chan], v011[chan],
2338 v100[chan], v101[chan], v110[chan], v111[chan]);
2339 break;
2340 case PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE:
2341 default:
2342 for (chan = 0; chan < num_chan; chan++)
2343 out[chan] = lp_build_lerp_3d(bld, x, y, z,
2344 v000[chan], v001[chan], v010[chan], v011[chan],
2345 v100[chan], v101[chan], v110[chan], v111[chan],
2346 flags);
2347 break;
2348 }
2349 }
2350
2351 /*
2352 * generated from
2353 * const float alpha = 2;
2354 * for (unsigned i = 0; i < WEIGHT_LUT_SIZE; i++) {
2355 * const float r2 = (float) i / (float) (WEIGHT_LUT_SIZE - 1);
2356 * const float weight = (float)expf(-alpha * r2);
2357 */
2358 static const float aniso_filter_table[1024] = {
2359 1.000000, 0.998047, 0.996098, 0.994152, 0.992210, 0.990272, 0.988338, 0.986408,
2360 0.984481, 0.982559, 0.980640, 0.978724, 0.976813, 0.974905, 0.973001, 0.971100,
2361 0.969204, 0.967311, 0.965421, 0.963536, 0.961654, 0.959776, 0.957901, 0.956030,
2362 0.954163, 0.952299, 0.950439, 0.948583, 0.946730, 0.944881, 0.943036, 0.941194,
2363 0.939356, 0.937521, 0.935690, 0.933862, 0.932038, 0.930218, 0.928401, 0.926588,
2364 0.924778, 0.922972, 0.921169, 0.919370, 0.917575, 0.915782, 0.913994, 0.912209,
2365 0.910427, 0.908649, 0.906874, 0.905103, 0.903335, 0.901571, 0.899810, 0.898052,
2366 0.896298, 0.894548, 0.892801, 0.891057, 0.889317, 0.887580, 0.885846, 0.884116,
2367 0.882389, 0.880666, 0.878946, 0.877229, 0.875516, 0.873806, 0.872099, 0.870396,
2368 0.868696, 0.866999, 0.865306, 0.863616, 0.861929, 0.860245, 0.858565, 0.856888,
2369 0.855215, 0.853544, 0.851877, 0.850213, 0.848553, 0.846896, 0.845241, 0.843591,
2370 0.841943, 0.840299, 0.838657, 0.837019, 0.835385, 0.833753, 0.832124, 0.830499,
2371 0.828877, 0.827258, 0.825643, 0.824030, 0.822421, 0.820814, 0.819211, 0.817611,
2372 0.816014, 0.814420, 0.812830, 0.811242, 0.809658, 0.808076, 0.806498, 0.804923,
2373 0.803351, 0.801782, 0.800216, 0.798653, 0.797093, 0.795536, 0.793982, 0.792432,
2374 0.790884, 0.789339, 0.787798, 0.786259, 0.784723, 0.783191, 0.781661, 0.780134,
2375 0.778610, 0.777090, 0.775572, 0.774057, 0.772545, 0.771037, 0.769531, 0.768028,
2376 0.766528, 0.765030, 0.763536, 0.762045, 0.760557, 0.759071, 0.757589, 0.756109,
2377 0.754632, 0.753158, 0.751687, 0.750219, 0.748754, 0.747291, 0.745832, 0.744375,
2378 0.742921, 0.741470, 0.740022, 0.738577, 0.737134, 0.735694, 0.734258, 0.732823,
2379 0.731392, 0.729964, 0.728538, 0.727115, 0.725695, 0.724278, 0.722863, 0.721451,
2380 0.720042, 0.718636, 0.717232, 0.715831, 0.714433, 0.713038, 0.711645, 0.710255,
2381 0.708868, 0.707483, 0.706102, 0.704723, 0.703346, 0.701972, 0.700601, 0.699233,
2382 0.697867, 0.696504, 0.695144, 0.693786, 0.692431, 0.691079, 0.689729, 0.688382,
2383 0.687037, 0.685696, 0.684356, 0.683020, 0.681686, 0.680354, 0.679025, 0.677699,
2384 0.676376, 0.675054, 0.673736, 0.672420, 0.671107, 0.669796, 0.668488, 0.667182,
2385 0.665879, 0.664579, 0.663281, 0.661985, 0.660692, 0.659402, 0.658114, 0.656828,
2386 0.655546, 0.654265, 0.652987, 0.651712, 0.650439, 0.649169, 0.647901, 0.646635,
2387 0.645372, 0.644112, 0.642854, 0.641598, 0.640345, 0.639095, 0.637846, 0.636601,
2388 0.635357, 0.634116, 0.632878, 0.631642, 0.630408, 0.629177, 0.627948, 0.626721,
2389 0.625497, 0.624276, 0.623056, 0.621839, 0.620625, 0.619413, 0.618203, 0.616996,
2390 0.615790, 0.614588, 0.613387, 0.612189, 0.610994, 0.609800, 0.608609, 0.607421,
2391 0.606234, 0.605050, 0.603868, 0.602689, 0.601512, 0.600337, 0.599165, 0.597994,
2392 0.596826, 0.595661, 0.594497, 0.593336, 0.592177, 0.591021, 0.589866, 0.588714,
2393 0.587564, 0.586417, 0.585272, 0.584128, 0.582988, 0.581849, 0.580712, 0.579578,
2394 0.578446, 0.577317, 0.576189, 0.575064, 0.573940, 0.572819, 0.571701, 0.570584,
2395 0.569470, 0.568357, 0.567247, 0.566139, 0.565034, 0.563930, 0.562829, 0.561729,
2396 0.560632, 0.559537, 0.558444, 0.557354, 0.556265, 0.555179, 0.554094, 0.553012,
2397 0.551932, 0.550854, 0.549778, 0.548704, 0.547633, 0.546563, 0.545496, 0.544430,
2398 0.543367, 0.542306, 0.541246, 0.540189, 0.539134, 0.538081, 0.537030, 0.535981,
2399 0.534935, 0.533890, 0.532847, 0.531806, 0.530768, 0.529731, 0.528696, 0.527664,
2400 0.526633, 0.525604, 0.524578, 0.523553, 0.522531, 0.521510, 0.520492, 0.519475,
2401 0.518460, 0.517448, 0.516437, 0.515429, 0.514422, 0.513417, 0.512414, 0.511414,
2402 0.510415, 0.509418, 0.508423, 0.507430, 0.506439, 0.505450, 0.504462, 0.503477,
2403 0.502494, 0.501512, 0.500533, 0.499555, 0.498580, 0.497606, 0.496634, 0.495664,
2404 0.494696, 0.493730, 0.492765, 0.491803, 0.490842, 0.489884, 0.488927, 0.487972,
2405 0.487019, 0.486068, 0.485118, 0.484171, 0.483225, 0.482281, 0.481339, 0.480399,
2406 0.479461, 0.478524, 0.477590, 0.476657, 0.475726, 0.474797, 0.473870, 0.472944,
2407 0.472020, 0.471098, 0.470178, 0.469260, 0.468343, 0.467429, 0.466516, 0.465605,
2408 0.464695, 0.463788, 0.462882, 0.461978, 0.461075, 0.460175, 0.459276, 0.458379,
2409 0.457484, 0.456590, 0.455699, 0.454809, 0.453920, 0.453034, 0.452149, 0.451266,
2410 0.450384, 0.449505, 0.448627, 0.447751, 0.446876, 0.446003, 0.445132, 0.444263,
2411 0.443395, 0.442529, 0.441665, 0.440802, 0.439941, 0.439082, 0.438224, 0.437368,
2412 0.436514, 0.435662, 0.434811, 0.433961, 0.433114, 0.432268, 0.431424, 0.430581,
2413 0.429740, 0.428901, 0.428063, 0.427227, 0.426393, 0.425560, 0.424729, 0.423899,
2414 0.423071, 0.422245, 0.421420, 0.420597, 0.419776, 0.418956, 0.418137, 0.417321,
2415 0.416506, 0.415692, 0.414880, 0.414070, 0.413261, 0.412454, 0.411648, 0.410844,
2416 0.410042, 0.409241, 0.408442, 0.407644, 0.406848, 0.406053, 0.405260, 0.404469,
2417 0.403679, 0.402890, 0.402103, 0.401318, 0.400534, 0.399752, 0.398971, 0.398192,
2418 0.397414, 0.396638, 0.395863, 0.395090, 0.394319, 0.393548, 0.392780, 0.392013,
2419 0.391247, 0.390483, 0.389720, 0.388959, 0.388199, 0.387441, 0.386684, 0.385929,
2420 0.385175, 0.384423, 0.383672, 0.382923, 0.382175, 0.381429, 0.380684, 0.379940,
2421 0.379198, 0.378457, 0.377718, 0.376980, 0.376244, 0.375509, 0.374776, 0.374044,
2422 0.373313, 0.372584, 0.371856, 0.371130, 0.370405, 0.369682, 0.368960, 0.368239,
2423 0.367520, 0.366802, 0.366086, 0.365371, 0.364657, 0.363945, 0.363234, 0.362525,
2424 0.361817, 0.361110, 0.360405, 0.359701, 0.358998, 0.358297, 0.357597, 0.356899,
2425 0.356202, 0.355506, 0.354812, 0.354119, 0.353427, 0.352737, 0.352048, 0.351360,
2426 0.350674, 0.349989, 0.349306, 0.348623, 0.347942, 0.347263, 0.346585, 0.345908,
2427 0.345232, 0.344558, 0.343885, 0.343213, 0.342543, 0.341874, 0.341206, 0.340540,
2428 0.339874, 0.339211, 0.338548, 0.337887, 0.337227, 0.336568, 0.335911, 0.335255,
2429 0.334600, 0.333947, 0.333294, 0.332643, 0.331994, 0.331345, 0.330698, 0.330052,
2430 0.329408, 0.328764, 0.328122, 0.327481, 0.326842, 0.326203, 0.325566, 0.324930,
2431 0.324296, 0.323662, 0.323030, 0.322399, 0.321770, 0.321141, 0.320514, 0.319888,
2432 0.319263, 0.318639, 0.318017, 0.317396, 0.316776, 0.316157, 0.315540, 0.314924,
2433 0.314309, 0.313695, 0.313082, 0.312470, 0.311860, 0.311251, 0.310643, 0.310036,
2434 0.309431, 0.308827, 0.308223, 0.307621, 0.307021, 0.306421, 0.305822, 0.305225,
2435 0.304629, 0.304034, 0.303440, 0.302847, 0.302256, 0.301666, 0.301076, 0.300488,
2436 0.299902, 0.299316, 0.298731, 0.298148, 0.297565, 0.296984, 0.296404, 0.295825,
2437 0.295247, 0.294671, 0.294095, 0.293521, 0.292948, 0.292375, 0.291804, 0.291234,
2438 0.290666, 0.290098, 0.289531, 0.288966, 0.288401, 0.287838, 0.287276, 0.286715,
2439 0.286155, 0.285596, 0.285038, 0.284482, 0.283926, 0.283371, 0.282818, 0.282266,
2440 0.281714, 0.281164, 0.280615, 0.280067, 0.279520, 0.278974, 0.278429, 0.277885,
2441 0.277342, 0.276801, 0.276260, 0.275721, 0.275182, 0.274645, 0.274108, 0.273573,
2442 0.273038, 0.272505, 0.271973, 0.271442, 0.270912, 0.270382, 0.269854, 0.269327,
2443 0.268801, 0.268276, 0.267752, 0.267229, 0.266707, 0.266186, 0.265667, 0.265148,
2444 0.264630, 0.264113, 0.263597, 0.263082, 0.262568, 0.262056, 0.261544, 0.261033,
2445 0.260523, 0.260014, 0.259506, 0.259000, 0.258494, 0.257989, 0.257485, 0.256982,
2446 0.256480, 0.255979, 0.255479, 0.254980, 0.254482, 0.253985, 0.253489, 0.252994,
2447 0.252500, 0.252007, 0.251515, 0.251023, 0.250533, 0.250044, 0.249555, 0.249068,
2448 0.248582, 0.248096, 0.247611, 0.247128, 0.246645, 0.246163, 0.245683, 0.245203,
2449 0.244724, 0.244246, 0.243769, 0.243293, 0.242818, 0.242343, 0.241870, 0.241398,
2450 0.240926, 0.240456, 0.239986, 0.239517, 0.239049, 0.238583, 0.238117, 0.237651,
2451 0.237187, 0.236724, 0.236262, 0.235800, 0.235340, 0.234880, 0.234421, 0.233963,
2452 0.233506, 0.233050, 0.232595, 0.232141, 0.231688, 0.231235, 0.230783, 0.230333,
2453 0.229883, 0.229434, 0.228986, 0.228538, 0.228092, 0.227647, 0.227202, 0.226758,
2454 0.226315, 0.225873, 0.225432, 0.224992, 0.224552, 0.224114, 0.223676, 0.223239,
2455 0.222803, 0.222368, 0.221934, 0.221500, 0.221068, 0.220636, 0.220205, 0.219775,
2456 0.219346, 0.218917, 0.218490, 0.218063, 0.217637, 0.217212, 0.216788, 0.216364,
2457 0.215942, 0.215520, 0.215099, 0.214679, 0.214260, 0.213841, 0.213423, 0.213007,
2458 0.212591, 0.212175, 0.211761, 0.211347, 0.210935, 0.210523, 0.210111, 0.209701,
2459 0.209291, 0.208883, 0.208475, 0.208068, 0.207661, 0.207256, 0.206851, 0.206447,
2460 0.206044, 0.205641, 0.205239, 0.204839, 0.204439, 0.204039, 0.203641, 0.203243,
2461 0.202846, 0.202450, 0.202054, 0.201660, 0.201266, 0.200873, 0.200481, 0.200089,
2462 0.199698, 0.199308, 0.198919, 0.198530, 0.198143, 0.197756, 0.197369, 0.196984,
2463 0.196599, 0.196215, 0.195832, 0.195449, 0.195068, 0.194687, 0.194306, 0.193927,
2464 0.193548, 0.193170, 0.192793, 0.192416, 0.192041, 0.191665, 0.191291, 0.190917,
2465 0.190545, 0.190172, 0.189801, 0.189430, 0.189060, 0.188691, 0.188323, 0.187955,
2466 0.187588, 0.187221, 0.186856, 0.186491, 0.186126, 0.185763, 0.185400, 0.185038,
2467 0.184676, 0.184316, 0.183956, 0.183597, 0.183238, 0.182880, 0.182523, 0.182166,
2468 0.181811, 0.181455, 0.181101, 0.180747, 0.180394, 0.180042, 0.179690, 0.179339,
2469 0.178989, 0.178640, 0.178291, 0.177942, 0.177595, 0.177248, 0.176902, 0.176556,
2470 0.176211, 0.175867, 0.175524, 0.175181, 0.174839, 0.174497, 0.174157, 0.173816,
2471 0.173477, 0.173138, 0.172800, 0.172462, 0.172126, 0.171789, 0.171454, 0.171119,
2472 0.170785, 0.170451, 0.170118, 0.169786, 0.169454, 0.169124, 0.168793, 0.168463,
2473 0.168134, 0.167806, 0.167478, 0.167151, 0.166825, 0.166499, 0.166174, 0.165849,
2474 0.165525, 0.165202, 0.164879, 0.164557, 0.164236, 0.163915, 0.163595, 0.163275,
2475 0.162957, 0.162638, 0.162321, 0.162004, 0.161687, 0.161371, 0.161056, 0.160742,
2476 0.160428, 0.160114, 0.159802, 0.159489, 0.159178, 0.158867, 0.158557, 0.158247,
2477 0.157938, 0.157630, 0.157322, 0.157014, 0.156708, 0.156402, 0.156096, 0.155791,
2478 0.155487, 0.155183, 0.154880, 0.154578, 0.154276, 0.153975, 0.153674, 0.153374,
2479 0.153074, 0.152775, 0.152477, 0.152179, 0.151882, 0.151585, 0.151289, 0.150994,
2480 0.150699, 0.150404, 0.150111, 0.149817, 0.149525, 0.149233, 0.148941, 0.148650,
2481 0.148360, 0.148070, 0.147781, 0.147492, 0.147204, 0.146917, 0.146630, 0.146344,
2482 0.146058, 0.145772, 0.145488, 0.145204, 0.144920, 0.144637, 0.144354, 0.144072,
2483 0.143791, 0.143510, 0.143230, 0.142950, 0.142671, 0.142392, 0.142114, 0.141837,
2484 0.141560, 0.141283, 0.141007, 0.140732, 0.140457, 0.140183, 0.139909, 0.139636,
2485 0.139363, 0.139091, 0.138819, 0.138548, 0.138277, 0.138007, 0.137738, 0.137469,
2486 0.137200, 0.136932, 0.136665, 0.136398, 0.136131, 0.135865, 0.135600, 0.135335,
2487 };
2488
2489 const float *
lp_build_sample_aniso_filter_table(void)2490 lp_build_sample_aniso_filter_table(void)
2491 {
2492 return aniso_filter_table;
2493 }
2494