1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * Texture sampling -- common code.
31 *
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 */
34
35 #include "pipe/p_defines.h"
36 #include "pipe/p_state.h"
37 #include "util/format/u_format.h"
38 #include "util/u_math.h"
39 #include "util/u_cpu_detect.h"
40 #include "lp_bld_arit.h"
41 #include "lp_bld_const.h"
42 #include "lp_bld_debug.h"
43 #include "lp_bld_printf.h"
44 #include "lp_bld_flow.h"
45 #include "lp_bld_sample.h"
46 #include "lp_bld_swizzle.h"
47 #include "lp_bld_type.h"
48 #include "lp_bld_logic.h"
49 #include "lp_bld_pack.h"
50 #include "lp_bld_quad.h"
51 #include "lp_bld_bitarit.h"
52
53
54 /*
55 * Bri-linear factor. Should be greater than one.
56 */
57 #define BRILINEAR_FACTOR 2
58
59 /**
60 * Does the given texture wrap mode allow sampling the texture border color?
61 * XXX maybe move this into gallium util code.
62 */
63 boolean
lp_sampler_wrap_mode_uses_border_color(unsigned mode,unsigned min_img_filter,unsigned mag_img_filter)64 lp_sampler_wrap_mode_uses_border_color(unsigned mode,
65 unsigned min_img_filter,
66 unsigned mag_img_filter)
67 {
68 switch (mode) {
69 case PIPE_TEX_WRAP_REPEAT:
70 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
71 case PIPE_TEX_WRAP_MIRROR_REPEAT:
72 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
73 return FALSE;
74 case PIPE_TEX_WRAP_CLAMP:
75 case PIPE_TEX_WRAP_MIRROR_CLAMP:
76 if (min_img_filter == PIPE_TEX_FILTER_NEAREST &&
77 mag_img_filter == PIPE_TEX_FILTER_NEAREST) {
78 return FALSE;
79 } else {
80 return TRUE;
81 }
82 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
83 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
84 return TRUE;
85 default:
86 assert(0 && "unexpected wrap mode");
87 return FALSE;
88 }
89 }
90
91
92 /**
93 * Initialize lp_sampler_static_texture_state object with the gallium
94 * texture/sampler_view state (this contains the parts which are
95 * considered static).
96 */
97 void
lp_sampler_static_texture_state(struct lp_static_texture_state * state,const struct pipe_sampler_view * view)98 lp_sampler_static_texture_state(struct lp_static_texture_state *state,
99 const struct pipe_sampler_view *view)
100 {
101 const struct pipe_resource *texture;
102
103 memset(state, 0, sizeof *state);
104
105 if (!view || !view->texture)
106 return;
107
108 texture = view->texture;
109
110 state->format = view->format;
111 state->swizzle_r = view->swizzle_r;
112 state->swizzle_g = view->swizzle_g;
113 state->swizzle_b = view->swizzle_b;
114 state->swizzle_a = view->swizzle_a;
115
116 state->target = view->target;
117 state->pot_width = util_is_power_of_two_or_zero(texture->width0);
118 state->pot_height = util_is_power_of_two_or_zero(texture->height0);
119 state->pot_depth = util_is_power_of_two_or_zero(texture->depth0);
120 state->level_zero_only = !view->u.tex.last_level;
121
122 /*
123 * the layer / element / level parameters are all either dynamic
124 * state or handled transparently wrt execution.
125 */
126 }
127
128 /**
129 * Initialize lp_sampler_static_texture_state object with the gallium
130 * texture/sampler_view state (this contains the parts which are
131 * considered static).
132 */
133 void
lp_sampler_static_texture_state_image(struct lp_static_texture_state * state,const struct pipe_image_view * view)134 lp_sampler_static_texture_state_image(struct lp_static_texture_state *state,
135 const struct pipe_image_view *view)
136 {
137 const struct pipe_resource *resource;
138
139 memset(state, 0, sizeof *state);
140
141 if (!view || !view->resource)
142 return;
143
144 resource = view->resource;
145
146 state->format = view->format;
147 state->swizzle_r = PIPE_SWIZZLE_X;
148 state->swizzle_g = PIPE_SWIZZLE_Y;
149 state->swizzle_b = PIPE_SWIZZLE_Z;
150 state->swizzle_a = PIPE_SWIZZLE_W;
151
152 state->target = view->resource->target;
153 state->pot_width = util_is_power_of_two_or_zero(resource->width0);
154 state->pot_height = util_is_power_of_two_or_zero(resource->height0);
155 state->pot_depth = util_is_power_of_two_or_zero(resource->depth0);
156 state->level_zero_only = 0;
157
158 /*
159 * the layer / element / level parameters are all either dynamic
160 * state or handled transparently wrt execution.
161 */
162 }
163
164 /**
165 * Initialize lp_sampler_static_sampler_state object with the gallium sampler
166 * state (this contains the parts which are considered static).
167 */
168 void
lp_sampler_static_sampler_state(struct lp_static_sampler_state * state,const struct pipe_sampler_state * sampler)169 lp_sampler_static_sampler_state(struct lp_static_sampler_state *state,
170 const struct pipe_sampler_state *sampler)
171 {
172 memset(state, 0, sizeof *state);
173
174 if (!sampler)
175 return;
176
177 /*
178 * We don't copy sampler state over unless it is actually enabled, to avoid
179 * spurious recompiles, as the sampler static state is part of the shader
180 * key.
181 *
182 * Ideally gallium frontends or cso_cache module would make all state
183 * canonical, but until that happens it's better to be safe than sorry here.
184 *
185 * XXX: Actually there's much more than can be done here, especially
186 * regarding 1D/2D/3D/CUBE textures, wrap modes, etc.
187 */
188
189 state->wrap_s = sampler->wrap_s;
190 state->wrap_t = sampler->wrap_t;
191 state->wrap_r = sampler->wrap_r;
192 state->min_img_filter = sampler->min_img_filter;
193 state->mag_img_filter = sampler->mag_img_filter;
194 state->min_mip_filter = sampler->min_mip_filter;
195 state->seamless_cube_map = sampler->seamless_cube_map;
196 state->reduction_mode = sampler->reduction_mode;
197 state->aniso = sampler->max_anisotropy > 1.0f;
198
199 if (sampler->max_lod > 0.0f) {
200 state->max_lod_pos = 1;
201 }
202
203 if (sampler->lod_bias != 0.0f) {
204 state->lod_bias_non_zero = 1;
205 }
206
207 if (state->min_mip_filter != PIPE_TEX_MIPFILTER_NONE ||
208 state->min_img_filter != state->mag_img_filter) {
209
210 /* If min_lod == max_lod we can greatly simplify mipmap selection.
211 * This is a case that occurs during automatic mipmap generation.
212 */
213 if (sampler->min_lod == sampler->max_lod) {
214 state->min_max_lod_equal = 1;
215 } else {
216 if (sampler->min_lod > 0.0f) {
217 state->apply_min_lod = 1;
218 }
219
220 /*
221 * XXX this won't do anything with the mesa state tracker which always
222 * sets max_lod to not more than actually present mip maps...
223 */
224 if (sampler->max_lod < (PIPE_MAX_TEXTURE_LEVELS - 1)) {
225 state->apply_max_lod = 1;
226 }
227 }
228 }
229
230 state->compare_mode = sampler->compare_mode;
231 if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) {
232 state->compare_func = sampler->compare_func;
233 }
234
235 state->normalized_coords = sampler->normalized_coords;
236 }
237
238 /* build aniso pmin value */
239 static LLVMValueRef
lp_build_pmin(struct lp_build_sample_context * bld,unsigned texture_unit,LLVMValueRef s,LLVMValueRef t,LLVMValueRef max_aniso)240 lp_build_pmin(struct lp_build_sample_context *bld,
241 unsigned texture_unit,
242 LLVMValueRef s,
243 LLVMValueRef t,
244 LLVMValueRef max_aniso)
245 {
246 struct gallivm_state *gallivm = bld->gallivm;
247 LLVMBuilderRef builder = bld->gallivm->builder;
248 struct lp_build_context *coord_bld = &bld->coord_bld;
249 struct lp_build_context *int_size_bld = &bld->int_size_in_bld;
250 struct lp_build_context *float_size_bld = &bld->float_size_in_bld;
251 struct lp_build_context *pmin_bld = &bld->lodf_bld;
252 LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
253 LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
254 LLVMValueRef index1 = LLVMConstInt(i32t, 1, 0);
255 LLVMValueRef ddx_ddy = lp_build_packed_ddx_ddy_twocoord(coord_bld, s, t);
256 LLVMValueRef int_size, float_size;
257 LLVMValueRef first_level, first_level_vec;
258 unsigned length = coord_bld->type.length;
259 unsigned num_quads = length / 4;
260 boolean pmin_per_quad = pmin_bld->type.length != length;
261 unsigned i;
262
263 first_level = bld->dynamic_state->first_level(bld->dynamic_state, bld->gallivm,
264 bld->context_ptr, texture_unit, NULL);
265 first_level_vec = lp_build_broadcast_scalar(int_size_bld, first_level);
266 int_size = lp_build_minify(int_size_bld, bld->int_size, first_level_vec, TRUE);
267 float_size = lp_build_int_to_float(float_size_bld, int_size);
268 max_aniso = lp_build_broadcast_scalar(coord_bld, max_aniso);
269 max_aniso = lp_build_mul(coord_bld, max_aniso, max_aniso);
270
271 static const unsigned char swizzle01[] = { /* no-op swizzle */
272 0, 1,
273 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
274 };
275 static const unsigned char swizzle23[] = {
276 2, 3,
277 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
278 };
279 LLVMValueRef ddx_ddys, ddx_ddyt, floatdim, shuffles[LP_MAX_VECTOR_LENGTH / 4];
280
281 for (i = 0; i < num_quads; i++) {
282 shuffles[i*4+0] = shuffles[i*4+1] = index0;
283 shuffles[i*4+2] = shuffles[i*4+3] = index1;
284 }
285 floatdim = LLVMBuildShuffleVector(builder, float_size, float_size,
286 LLVMConstVector(shuffles, length), "");
287 ddx_ddy = lp_build_mul(coord_bld, ddx_ddy, floatdim);
288
289 ddx_ddy = lp_build_mul(coord_bld, ddx_ddy, ddx_ddy);
290
291 ddx_ddys = lp_build_swizzle_aos(coord_bld, ddx_ddy, swizzle01);
292 ddx_ddyt = lp_build_swizzle_aos(coord_bld, ddx_ddy, swizzle23);
293
294 LLVMValueRef px2_py2 = lp_build_add(coord_bld, ddx_ddys, ddx_ddyt);
295
296 static const unsigned char swizzle0[] = { /* no-op swizzle */
297 0, LP_BLD_SWIZZLE_DONTCARE,
298 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
299 };
300 static const unsigned char swizzle1[] = {
301 1, LP_BLD_SWIZZLE_DONTCARE,
302 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
303 };
304 LLVMValueRef px2 = lp_build_swizzle_aos(coord_bld, px2_py2, swizzle0);
305 LLVMValueRef py2 = lp_build_swizzle_aos(coord_bld, px2_py2, swizzle1);
306
307 LLVMValueRef pmax2 = lp_build_max(coord_bld, px2, py2);
308 LLVMValueRef pmin2 = lp_build_min(coord_bld, px2, py2);
309
310 LLVMValueRef temp = lp_build_mul(coord_bld, pmin2, max_aniso);
311
312 LLVMValueRef comp = lp_build_compare(gallivm, coord_bld->type, PIPE_FUNC_GREATER,
313 pmin2, temp);
314
315 LLVMValueRef pmin2_alt = lp_build_div(coord_bld, pmax2, max_aniso);
316
317 pmin2 = lp_build_select(coord_bld, comp, pmin2_alt, pmin2);
318
319 if (pmin_per_quad)
320 pmin2 = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
321 pmin_bld->type, pmin2, 0);
322 else
323 pmin2 = lp_build_swizzle_scalar_aos(pmin_bld, pmin2, 0, 4);
324 return pmin2;
325 }
326
327 /**
328 * Generate code to compute coordinate gradient (rho).
329 * \param derivs partial derivatives of (s, t, r, q) with respect to X and Y
330 *
331 * The resulting rho has bld->levelf format (per quad or per element).
332 */
333 static LLVMValueRef
lp_build_rho(struct lp_build_sample_context * bld,unsigned texture_unit,LLVMValueRef s,LLVMValueRef t,LLVMValueRef r,LLVMValueRef cube_rho,const struct lp_derivatives * derivs)334 lp_build_rho(struct lp_build_sample_context *bld,
335 unsigned texture_unit,
336 LLVMValueRef s,
337 LLVMValueRef t,
338 LLVMValueRef r,
339 LLVMValueRef cube_rho,
340 const struct lp_derivatives *derivs)
341 {
342 struct gallivm_state *gallivm = bld->gallivm;
343 struct lp_build_context *int_size_bld = &bld->int_size_in_bld;
344 struct lp_build_context *float_size_bld = &bld->float_size_in_bld;
345 struct lp_build_context *float_bld = &bld->float_bld;
346 struct lp_build_context *coord_bld = &bld->coord_bld;
347 struct lp_build_context *rho_bld = &bld->lodf_bld;
348 const unsigned dims = bld->dims;
349 LLVMValueRef ddx_ddy[2] = {NULL};
350 LLVMBuilderRef builder = bld->gallivm->builder;
351 LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
352 LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
353 LLVMValueRef index1 = LLVMConstInt(i32t, 1, 0);
354 LLVMValueRef index2 = LLVMConstInt(i32t, 2, 0);
355 LLVMValueRef rho_vec;
356 LLVMValueRef int_size, float_size;
357 LLVMValueRef rho;
358 LLVMValueRef first_level, first_level_vec;
359 unsigned length = coord_bld->type.length;
360 unsigned num_quads = length / 4;
361 boolean rho_per_quad = rho_bld->type.length != length;
362 boolean no_rho_opt = bld->no_rho_approx && (dims > 1);
363 unsigned i;
364 LLVMValueRef i32undef = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
365 LLVMValueRef rho_xvec, rho_yvec;
366
367 /* Note that all simplified calculations will only work for isotropic filtering */
368
369 /*
370 * rho calcs are always per quad except for explicit derivs (excluding
371 * the messy cube maps for now) when requested.
372 */
373
374 first_level = bld->dynamic_state->first_level(bld->dynamic_state, bld->gallivm,
375 bld->context_ptr, texture_unit, NULL);
376 first_level_vec = lp_build_broadcast_scalar(int_size_bld, first_level);
377 int_size = lp_build_minify(int_size_bld, bld->int_size, first_level_vec, TRUE);
378 float_size = lp_build_int_to_float(float_size_bld, int_size);
379
380 if (cube_rho) {
381 LLVMValueRef cubesize;
382 LLVMValueRef index0 = lp_build_const_int32(gallivm, 0);
383
384 /*
385 * Cube map code did already everything except size mul and per-quad extraction.
386 * Luckily cube maps are always quadratic!
387 */
388 if (rho_per_quad) {
389 rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
390 rho_bld->type, cube_rho, 0);
391 }
392 else {
393 rho = lp_build_swizzle_scalar_aos(coord_bld, cube_rho, 0, 4);
394 }
395 /* Could optimize this for single quad just skip the broadcast */
396 cubesize = lp_build_extract_broadcast(gallivm, bld->float_size_in_type,
397 rho_bld->type, float_size, index0);
398 /* skipping sqrt hence returning rho squared */
399 cubesize = lp_build_mul(rho_bld, cubesize, cubesize);
400 rho = lp_build_mul(rho_bld, cubesize, rho);
401 }
402 else if (derivs) {
403 LLVMValueRef ddmax[3] = { NULL }, ddx[3] = { NULL }, ddy[3] = { NULL };
404 for (i = 0; i < dims; i++) {
405 LLVMValueRef floatdim;
406 LLVMValueRef indexi = lp_build_const_int32(gallivm, i);
407
408 floatdim = lp_build_extract_broadcast(gallivm, bld->float_size_in_type,
409 coord_bld->type, float_size, indexi);
410
411 /*
412 * note that for rho_per_quad case could reduce math (at some shuffle
413 * cost), but for now use same code to per-pixel lod case.
414 */
415 if (no_rho_opt) {
416 ddx[i] = lp_build_mul(coord_bld, floatdim, derivs->ddx[i]);
417 ddy[i] = lp_build_mul(coord_bld, floatdim, derivs->ddy[i]);
418 ddx[i] = lp_build_mul(coord_bld, ddx[i], ddx[i]);
419 ddy[i] = lp_build_mul(coord_bld, ddy[i], ddy[i]);
420 }
421 else {
422 LLVMValueRef tmpx, tmpy;
423 tmpx = lp_build_abs(coord_bld, derivs->ddx[i]);
424 tmpy = lp_build_abs(coord_bld, derivs->ddy[i]);
425 ddmax[i] = lp_build_max(coord_bld, tmpx, tmpy);
426 ddmax[i] = lp_build_mul(coord_bld, floatdim, ddmax[i]);
427 }
428 }
429 if (no_rho_opt) {
430 rho_xvec = lp_build_add(coord_bld, ddx[0], ddx[1]);
431 rho_yvec = lp_build_add(coord_bld, ddy[0], ddy[1]);
432 if (dims > 2) {
433 rho_xvec = lp_build_add(coord_bld, rho_xvec, ddx[2]);
434 rho_yvec = lp_build_add(coord_bld, rho_yvec, ddy[2]);
435 }
436 rho = lp_build_max(coord_bld, rho_xvec, rho_yvec);
437 /* skipping sqrt hence returning rho squared */
438 }
439 else {
440 rho = ddmax[0];
441 if (dims > 1) {
442 rho = lp_build_max(coord_bld, rho, ddmax[1]);
443 if (dims > 2) {
444 rho = lp_build_max(coord_bld, rho, ddmax[2]);
445 }
446 }
447 }
448 if (rho_per_quad) {
449 /*
450 * rho_vec contains per-pixel rho, convert to scalar per quad.
451 */
452 rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
453 rho_bld->type, rho, 0);
454 }
455 }
456 else {
457 /*
458 * This looks all a bit complex, but it's not that bad
459 * (the shuffle code makes it look worse than it is).
460 * Still, might not be ideal for all cases.
461 */
462 static const unsigned char swizzle0[] = { /* no-op swizzle */
463 0, LP_BLD_SWIZZLE_DONTCARE,
464 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
465 };
466 static const unsigned char swizzle1[] = {
467 1, LP_BLD_SWIZZLE_DONTCARE,
468 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
469 };
470 static const unsigned char swizzle2[] = {
471 2, LP_BLD_SWIZZLE_DONTCARE,
472 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
473 };
474
475 if (dims < 2) {
476 ddx_ddy[0] = lp_build_packed_ddx_ddy_onecoord(coord_bld, s);
477 }
478 else if (dims >= 2) {
479 ddx_ddy[0] = lp_build_packed_ddx_ddy_twocoord(coord_bld, s, t);
480 if (dims > 2) {
481 ddx_ddy[1] = lp_build_packed_ddx_ddy_onecoord(coord_bld, r);
482 }
483 }
484
485 if (no_rho_opt) {
486 static const unsigned char swizzle01[] = { /* no-op swizzle */
487 0, 1,
488 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
489 };
490 static const unsigned char swizzle23[] = {
491 2, 3,
492 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
493 };
494 LLVMValueRef ddx_ddys, ddx_ddyt, floatdim, shuffles[LP_MAX_VECTOR_LENGTH / 4];
495
496 for (i = 0; i < num_quads; i++) {
497 shuffles[i*4+0] = shuffles[i*4+1] = index0;
498 shuffles[i*4+2] = shuffles[i*4+3] = index1;
499 }
500 floatdim = LLVMBuildShuffleVector(builder, float_size, float_size,
501 LLVMConstVector(shuffles, length), "");
502 ddx_ddy[0] = lp_build_mul(coord_bld, ddx_ddy[0], floatdim);
503 ddx_ddy[0] = lp_build_mul(coord_bld, ddx_ddy[0], ddx_ddy[0]);
504 ddx_ddys = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle01);
505 ddx_ddyt = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle23);
506 rho_vec = lp_build_add(coord_bld, ddx_ddys, ddx_ddyt);
507
508 if (dims > 2) {
509 static const unsigned char swizzle02[] = {
510 0, 2,
511 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
512 };
513 floatdim = lp_build_extract_broadcast(gallivm, bld->float_size_in_type,
514 coord_bld->type, float_size, index2);
515 ddx_ddy[1] = lp_build_mul(coord_bld, ddx_ddy[1], floatdim);
516 ddx_ddy[1] = lp_build_mul(coord_bld, ddx_ddy[1], ddx_ddy[1]);
517 ddx_ddy[1] = lp_build_swizzle_aos(coord_bld, ddx_ddy[1], swizzle02);
518 rho_vec = lp_build_add(coord_bld, rho_vec, ddx_ddy[1]);
519 }
520
521 rho_xvec = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle0);
522 rho_yvec = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle1);
523 rho = lp_build_max(coord_bld, rho_xvec, rho_yvec);
524
525 if (rho_per_quad) {
526 rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
527 rho_bld->type, rho, 0);
528 }
529 else {
530 rho = lp_build_swizzle_scalar_aos(coord_bld, rho, 0, 4);
531 }
532 /* skipping sqrt hence returning rho squared */
533 }
534 else {
535 ddx_ddy[0] = lp_build_abs(coord_bld, ddx_ddy[0]);
536 if (dims > 2) {
537 ddx_ddy[1] = lp_build_abs(coord_bld, ddx_ddy[1]);
538 }
539 else {
540 ddx_ddy[1] = NULL; /* silence compiler warning */
541 }
542
543 if (dims < 2) {
544 rho_xvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle0);
545 rho_yvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle2);
546 }
547 else if (dims == 2) {
548 static const unsigned char swizzle02[] = {
549 0, 2,
550 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
551 };
552 static const unsigned char swizzle13[] = {
553 1, 3,
554 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
555 };
556 rho_xvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle02);
557 rho_yvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle13);
558 }
559 else {
560 LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH];
561 LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH];
562 assert(dims == 3);
563 for (i = 0; i < num_quads; i++) {
564 shuffles1[4*i + 0] = lp_build_const_int32(gallivm, 4*i);
565 shuffles1[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 2);
566 shuffles1[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i);
567 shuffles1[4*i + 3] = i32undef;
568 shuffles2[4*i + 0] = lp_build_const_int32(gallivm, 4*i + 1);
569 shuffles2[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 3);
570 shuffles2[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i + 2);
571 shuffles2[4*i + 3] = i32undef;
572 }
573 rho_xvec = LLVMBuildShuffleVector(builder, ddx_ddy[0], ddx_ddy[1],
574 LLVMConstVector(shuffles1, length), "");
575 rho_yvec = LLVMBuildShuffleVector(builder, ddx_ddy[0], ddx_ddy[1],
576 LLVMConstVector(shuffles2, length), "");
577 }
578
579 rho_vec = lp_build_max(coord_bld, rho_xvec, rho_yvec);
580
581 if (bld->coord_type.length > 4) {
582 /* expand size to each quad */
583 if (dims > 1) {
584 /* could use some broadcast_vector helper for this? */
585 LLVMValueRef src[LP_MAX_VECTOR_LENGTH/4];
586 for (i = 0; i < num_quads; i++) {
587 src[i] = float_size;
588 }
589 float_size = lp_build_concat(bld->gallivm, src, float_size_bld->type, num_quads);
590 }
591 else {
592 float_size = lp_build_broadcast_scalar(coord_bld, float_size);
593 }
594 rho_vec = lp_build_mul(coord_bld, rho_vec, float_size);
595
596 if (dims <= 1) {
597 rho = rho_vec;
598 }
599 else {
600 if (dims >= 2) {
601 LLVMValueRef rho_s, rho_t, rho_r;
602
603 rho_s = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle0);
604 rho_t = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle1);
605
606 rho = lp_build_max(coord_bld, rho_s, rho_t);
607
608 if (dims >= 3) {
609 rho_r = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle2);
610 rho = lp_build_max(coord_bld, rho, rho_r);
611 }
612 }
613 }
614 if (rho_per_quad) {
615 rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
616 rho_bld->type, rho, 0);
617 }
618 else {
619 rho = lp_build_swizzle_scalar_aos(coord_bld, rho, 0, 4);
620 }
621 }
622 else {
623 if (dims <= 1) {
624 rho_vec = LLVMBuildExtractElement(builder, rho_vec, index0, "");
625 }
626 rho_vec = lp_build_mul(float_size_bld, rho_vec, float_size);
627
628 if (dims <= 1) {
629 rho = rho_vec;
630 }
631 else {
632 if (dims >= 2) {
633 LLVMValueRef rho_s, rho_t, rho_r;
634
635 rho_s = LLVMBuildExtractElement(builder, rho_vec, index0, "");
636 rho_t = LLVMBuildExtractElement(builder, rho_vec, index1, "");
637
638 rho = lp_build_max(float_bld, rho_s, rho_t);
639
640 if (dims >= 3) {
641 rho_r = LLVMBuildExtractElement(builder, rho_vec, index2, "");
642 rho = lp_build_max(float_bld, rho, rho_r);
643 }
644 }
645 }
646 if (!rho_per_quad) {
647 rho = lp_build_broadcast_scalar(rho_bld, rho);
648 }
649 }
650 }
651 }
652
653 return rho;
654 }
655
656
657 /*
658 * Bri-linear lod computation
659 *
660 * Use a piece-wise linear approximation of log2 such that:
661 * - round to nearest, for values in the neighborhood of -1, 0, 1, 2, etc.
662 * - linear approximation for values in the neighborhood of 0.5, 1.5., etc,
663 * with the steepness specified in 'factor'
664 * - exact result for 0.5, 1.5, etc.
665 *
666 *
667 * 1.0 - /----*
668 * /
669 * /
670 * /
671 * 0.5 - *
672 * /
673 * /
674 * /
675 * 0.0 - *----/
676 *
677 * | |
678 * 2^0 2^1
679 *
680 * This is a technique also commonly used in hardware:
681 * - http://ixbtlabs.com/articles2/gffx/nv40-rx800-3.html
682 *
683 * TODO: For correctness, this should only be applied when texture is known to
684 * have regular mipmaps, i.e., mipmaps derived from the base level.
685 *
686 * TODO: This could be done in fixed point, where applicable.
687 */
688 static void
lp_build_brilinear_lod(struct lp_build_context * bld,LLVMValueRef lod,double factor,LLVMValueRef * out_lod_ipart,LLVMValueRef * out_lod_fpart)689 lp_build_brilinear_lod(struct lp_build_context *bld,
690 LLVMValueRef lod,
691 double factor,
692 LLVMValueRef *out_lod_ipart,
693 LLVMValueRef *out_lod_fpart)
694 {
695 LLVMValueRef lod_fpart;
696 double pre_offset = (factor - 0.5)/factor - 0.5;
697 double post_offset = 1 - factor;
698
699 if (0) {
700 lp_build_printf(bld->gallivm, "lod = %f\n", lod);
701 }
702
703 lod = lp_build_add(bld, lod,
704 lp_build_const_vec(bld->gallivm, bld->type, pre_offset));
705
706 lp_build_ifloor_fract(bld, lod, out_lod_ipart, &lod_fpart);
707
708 lod_fpart = lp_build_mad(bld, lod_fpart,
709 lp_build_const_vec(bld->gallivm, bld->type, factor),
710 lp_build_const_vec(bld->gallivm, bld->type, post_offset));
711
712 /*
713 * It's not necessary to clamp lod_fpart since:
714 * - the above expression will never produce numbers greater than one.
715 * - the mip filtering branch is only taken if lod_fpart is positive
716 */
717
718 *out_lod_fpart = lod_fpart;
719
720 if (0) {
721 lp_build_printf(bld->gallivm, "lod_ipart = %i\n", *out_lod_ipart);
722 lp_build_printf(bld->gallivm, "lod_fpart = %f\n\n", *out_lod_fpart);
723 }
724 }
725
726
727 /*
728 * Combined log2 and brilinear lod computation.
729 *
730 * It's in all identical to calling lp_build_fast_log2() and
731 * lp_build_brilinear_lod() above, but by combining we can compute the integer
732 * and fractional part independently.
733 */
734 static void
lp_build_brilinear_rho(struct lp_build_context * bld,LLVMValueRef rho,double factor,LLVMValueRef * out_lod_ipart,LLVMValueRef * out_lod_fpart)735 lp_build_brilinear_rho(struct lp_build_context *bld,
736 LLVMValueRef rho,
737 double factor,
738 LLVMValueRef *out_lod_ipart,
739 LLVMValueRef *out_lod_fpart)
740 {
741 LLVMValueRef lod_ipart;
742 LLVMValueRef lod_fpart;
743
744 const double pre_factor = (2*factor - 0.5)/(M_SQRT2*factor);
745 const double post_offset = 1 - 2*factor;
746
747 assert(bld->type.floating);
748
749 assert(lp_check_value(bld->type, rho));
750
751 /*
752 * The pre factor will make the intersections with the exact powers of two
753 * happen precisely where we want them to be, which means that the integer
754 * part will not need any post adjustments.
755 */
756 rho = lp_build_mul(bld, rho,
757 lp_build_const_vec(bld->gallivm, bld->type, pre_factor));
758
759 /* ipart = ifloor(log2(rho)) */
760 lod_ipart = lp_build_extract_exponent(bld, rho, 0);
761
762 /* fpart = rho / 2**ipart */
763 lod_fpart = lp_build_extract_mantissa(bld, rho);
764
765 lod_fpart = lp_build_mad(bld, lod_fpart,
766 lp_build_const_vec(bld->gallivm, bld->type, factor),
767 lp_build_const_vec(bld->gallivm, bld->type, post_offset));
768
769 /*
770 * Like lp_build_brilinear_lod, it's not necessary to clamp lod_fpart since:
771 * - the above expression will never produce numbers greater than one.
772 * - the mip filtering branch is only taken if lod_fpart is positive
773 */
774
775 *out_lod_ipart = lod_ipart;
776 *out_lod_fpart = lod_fpart;
777 }
778
779
780 /**
781 * Fast implementation of iround(log2(sqrt(x))), based on
782 * log2(x^n) == n*log2(x).
783 *
784 * Gives accurate results all the time.
785 * (Could be trivially extended to handle other power-of-two roots.)
786 */
787 static LLVMValueRef
lp_build_ilog2_sqrt(struct lp_build_context * bld,LLVMValueRef x)788 lp_build_ilog2_sqrt(struct lp_build_context *bld,
789 LLVMValueRef x)
790 {
791 LLVMBuilderRef builder = bld->gallivm->builder;
792 LLVMValueRef ipart;
793 struct lp_type i_type = lp_int_type(bld->type);
794 LLVMValueRef one = lp_build_const_int_vec(bld->gallivm, i_type, 1);
795
796 assert(bld->type.floating);
797
798 assert(lp_check_value(bld->type, x));
799
800 /* ipart = log2(x) + 0.5 = 0.5*(log2(x^2) + 1.0) */
801 ipart = lp_build_extract_exponent(bld, x, 1);
802 ipart = LLVMBuildAShr(builder, ipart, one, "");
803
804 return ipart;
805 }
806
807
808 /**
809 * Generate code to compute texture level of detail (lambda).
810 * \param derivs partial derivatives of (s, t, r, q) with respect to X and Y
811 * \param lod_bias optional float vector with the shader lod bias
812 * \param explicit_lod optional float vector with the explicit lod
813 * \param cube_rho rho calculated by cube coord mapping (optional)
814 * \param out_lod_ipart integer part of lod
815 * \param out_lod_fpart float part of lod (never larger than 1 but may be negative)
816 * \param out_lod_positive (mask) if lod is positive (i.e. texture is minified)
817 *
818 * The resulting lod can be scalar per quad or be per element.
819 */
820 void
lp_build_lod_selector(struct lp_build_sample_context * bld,boolean is_lodq,unsigned texture_unit,unsigned sampler_unit,LLVMValueRef s,LLVMValueRef t,LLVMValueRef r,LLVMValueRef cube_rho,const struct lp_derivatives * derivs,LLVMValueRef lod_bias,LLVMValueRef explicit_lod,unsigned mip_filter,LLVMValueRef max_aniso,LLVMValueRef * out_lod,LLVMValueRef * out_lod_ipart,LLVMValueRef * out_lod_fpart,LLVMValueRef * out_lod_positive)821 lp_build_lod_selector(struct lp_build_sample_context *bld,
822 boolean is_lodq,
823 unsigned texture_unit,
824 unsigned sampler_unit,
825 LLVMValueRef s,
826 LLVMValueRef t,
827 LLVMValueRef r,
828 LLVMValueRef cube_rho,
829 const struct lp_derivatives *derivs,
830 LLVMValueRef lod_bias, /* optional */
831 LLVMValueRef explicit_lod, /* optional */
832 unsigned mip_filter,
833 LLVMValueRef max_aniso,
834 LLVMValueRef *out_lod,
835 LLVMValueRef *out_lod_ipart,
836 LLVMValueRef *out_lod_fpart,
837 LLVMValueRef *out_lod_positive)
838
839 {
840 LLVMBuilderRef builder = bld->gallivm->builder;
841 struct lp_sampler_dynamic_state *dynamic_state = bld->dynamic_state;
842 struct lp_build_context *lodf_bld = &bld->lodf_bld;
843 LLVMValueRef lod;
844
845 *out_lod_ipart = bld->lodi_bld.zero;
846 *out_lod_positive = bld->lodi_bld.zero;
847 *out_lod_fpart = lodf_bld->zero;
848
849 /*
850 * For determining min/mag, we follow GL 4.1 spec, 3.9.12 Texture Magnification:
851 * "Implementations may either unconditionally assume c = 0 for the minification
852 * vs. magnification switch-over point, or may choose to make c depend on the
853 * combination of minification and magnification modes as follows: if the
854 * magnification filter is given by LINEAR and the minification filter is given
855 * by NEAREST_MIPMAP_NEAREST or NEAREST_MIPMAP_LINEAR, then c = 0.5. This is
856 * done to ensure that a minified texture does not appear "sharper" than a
857 * magnified texture. Otherwise c = 0."
858 * And 3.9.11 Texture Minification:
859 * "If lod is less than or equal to the constant c (see section 3.9.12) the
860 * texture is said to be magnified; if it is greater, the texture is minified."
861 * So, using 0 as switchover point always, and using magnification for lod == 0.
862 * Note that the always c = 0 behavior is new (first appearing in GL 3.1 spec),
863 * old GL versions required 0.5 for the modes listed above.
864 * I have no clue about the (undocumented) wishes of d3d9/d3d10 here!
865 */
866
867 if (bld->static_sampler_state->min_max_lod_equal && !is_lodq) {
868 /* User is forcing sampling from a particular mipmap level.
869 * This is hit during mipmap generation.
870 */
871 LLVMValueRef min_lod =
872 dynamic_state->min_lod(dynamic_state, bld->gallivm,
873 bld->context_ptr, sampler_unit);
874
875 lod = lp_build_broadcast_scalar(lodf_bld, min_lod);
876 }
877 else {
878 if (explicit_lod) {
879 if (bld->num_lods != bld->coord_type.length)
880 lod = lp_build_pack_aos_scalars(bld->gallivm, bld->coord_bld.type,
881 lodf_bld->type, explicit_lod, 0);
882 else
883 lod = explicit_lod;
884 }
885 else {
886 LLVMValueRef rho;
887 boolean rho_squared = (bld->no_rho_approx &&
888 (bld->dims > 1)) || cube_rho;
889
890 if (bld->static_sampler_state->aniso &&
891 !explicit_lod) {
892 rho = lp_build_pmin(bld, texture_unit, s, t, max_aniso);
893 rho_squared = true;
894 } else
895 rho = lp_build_rho(bld, texture_unit, s, t, r, cube_rho, derivs);
896
897 /*
898 * Compute lod = log2(rho)
899 */
900
901 if (!lod_bias && !is_lodq &&
902 !bld->static_sampler_state->aniso &&
903 !bld->static_sampler_state->lod_bias_non_zero &&
904 !bld->static_sampler_state->apply_max_lod &&
905 !bld->static_sampler_state->apply_min_lod) {
906 /*
907 * Special case when there are no post-log2 adjustments, which
908 * saves instructions but keeping the integer and fractional lod
909 * computations separate from the start.
910 */
911
912 if (mip_filter == PIPE_TEX_MIPFILTER_NONE ||
913 mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
914 /*
915 * Don't actually need both values all the time, lod_ipart is
916 * needed for nearest mipfilter, lod_positive if min != mag.
917 */
918 if (rho_squared) {
919 *out_lod_ipart = lp_build_ilog2_sqrt(lodf_bld, rho);
920 }
921 else {
922 *out_lod_ipart = lp_build_ilog2(lodf_bld, rho);
923 }
924 *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
925 rho, lodf_bld->one);
926 return;
927 }
928 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR &&
929 !bld->no_brilinear && !rho_squared &&
930 !bld->static_sampler_state->aniso) {
931 /*
932 * This can't work if rho is squared. Not sure if it could be
933 * fixed while keeping it worthwile, could also do sqrt here
934 * but brilinear and no_rho_opt seems like a combination not
935 * making much sense anyway so just use ordinary path below.
936 */
937 lp_build_brilinear_rho(lodf_bld, rho, BRILINEAR_FACTOR,
938 out_lod_ipart, out_lod_fpart);
939 *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
940 rho, lodf_bld->one);
941 return;
942 }
943 }
944
945 if (0) {
946 lod = lp_build_log2(lodf_bld, rho);
947 }
948 else {
949 /* get more accurate results if we just sqaure rho always */
950 if (!rho_squared)
951 rho = lp_build_mul(lodf_bld, rho, rho);
952 lod = lp_build_fast_log2(lodf_bld, rho);
953 }
954
955 /* log2(x^2) == 0.5*log2(x) */
956 lod = lp_build_mul(lodf_bld, lod,
957 lp_build_const_vec(bld->gallivm, lodf_bld->type, 0.5F));
958
959 /* add shader lod bias */
960 if (lod_bias) {
961 if (bld->num_lods != bld->coord_type.length)
962 lod_bias = lp_build_pack_aos_scalars(bld->gallivm, bld->coord_bld.type,
963 lodf_bld->type, lod_bias, 0);
964 lod = LLVMBuildFAdd(builder, lod, lod_bias, "shader_lod_bias");
965 }
966 }
967
968 /* add sampler lod bias */
969 if (bld->static_sampler_state->lod_bias_non_zero) {
970 LLVMValueRef sampler_lod_bias =
971 dynamic_state->lod_bias(dynamic_state, bld->gallivm,
972 bld->context_ptr, sampler_unit);
973 sampler_lod_bias = lp_build_broadcast_scalar(lodf_bld,
974 sampler_lod_bias);
975 lod = LLVMBuildFAdd(builder, lod, sampler_lod_bias, "sampler_lod_bias");
976 }
977
978 if (is_lodq) {
979 *out_lod = lod;
980 }
981
982 /* clamp lod */
983 if (bld->static_sampler_state->apply_max_lod) {
984 LLVMValueRef max_lod =
985 dynamic_state->max_lod(dynamic_state, bld->gallivm,
986 bld->context_ptr, sampler_unit);
987 max_lod = lp_build_broadcast_scalar(lodf_bld, max_lod);
988
989 lod = lp_build_min(lodf_bld, lod, max_lod);
990 }
991 if (bld->static_sampler_state->apply_min_lod) {
992 LLVMValueRef min_lod =
993 dynamic_state->min_lod(dynamic_state, bld->gallivm,
994 bld->context_ptr, sampler_unit);
995 min_lod = lp_build_broadcast_scalar(lodf_bld, min_lod);
996
997 lod = lp_build_max(lodf_bld, lod, min_lod);
998 }
999
1000 if (is_lodq) {
1001 *out_lod_fpart = lod;
1002 return;
1003 }
1004 }
1005
1006 *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
1007 lod, lodf_bld->zero);
1008
1009 if (bld->static_sampler_state->aniso) {
1010 *out_lod_ipart = lp_build_itrunc(lodf_bld, lod);
1011 } else if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1012 if (!bld->no_brilinear) {
1013 lp_build_brilinear_lod(lodf_bld, lod, BRILINEAR_FACTOR,
1014 out_lod_ipart, out_lod_fpart);
1015 }
1016 else {
1017 lp_build_ifloor_fract(lodf_bld, lod, out_lod_ipart, out_lod_fpart);
1018 }
1019
1020 lp_build_name(*out_lod_fpart, "lod_fpart");
1021 }
1022 else {
1023 *out_lod_ipart = lp_build_iround(lodf_bld, lod);
1024 }
1025
1026 lp_build_name(*out_lod_ipart, "lod_ipart");
1027
1028 return;
1029 }
1030
1031
1032 /**
1033 * For PIPE_TEX_MIPFILTER_NEAREST, convert int part of lod
1034 * to actual mip level.
1035 * Note: this is all scalar per quad code.
1036 * \param lod_ipart int texture level of detail
1037 * \param level_out returns integer
1038 * \param out_of_bounds returns per coord out_of_bounds mask if provided
1039 */
1040 void
lp_build_nearest_mip_level(struct lp_build_sample_context * bld,unsigned texture_unit,LLVMValueRef lod_ipart,LLVMValueRef * level_out,LLVMValueRef * out_of_bounds)1041 lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
1042 unsigned texture_unit,
1043 LLVMValueRef lod_ipart,
1044 LLVMValueRef *level_out,
1045 LLVMValueRef *out_of_bounds)
1046 {
1047 struct lp_build_context *leveli_bld = &bld->leveli_bld;
1048 struct lp_sampler_dynamic_state *dynamic_state = bld->dynamic_state;
1049 LLVMValueRef first_level, last_level, level;
1050
1051 first_level = dynamic_state->first_level(dynamic_state, bld->gallivm,
1052 bld->context_ptr, texture_unit, NULL);
1053 last_level = dynamic_state->last_level(dynamic_state, bld->gallivm,
1054 bld->context_ptr, texture_unit, NULL);
1055 first_level = lp_build_broadcast_scalar(leveli_bld, first_level);
1056 last_level = lp_build_broadcast_scalar(leveli_bld, last_level);
1057
1058 level = lp_build_add(leveli_bld, lod_ipart, first_level);
1059
1060 if (out_of_bounds) {
1061 LLVMValueRef out, out1;
1062 out = lp_build_cmp(leveli_bld, PIPE_FUNC_LESS, level, first_level);
1063 out1 = lp_build_cmp(leveli_bld, PIPE_FUNC_GREATER, level, last_level);
1064 out = lp_build_or(leveli_bld, out, out1);
1065 if (bld->num_mips == bld->coord_bld.type.length) {
1066 *out_of_bounds = out;
1067 }
1068 else if (bld->num_mips == 1) {
1069 *out_of_bounds = lp_build_broadcast_scalar(&bld->int_coord_bld, out);
1070 }
1071 else {
1072 assert(bld->num_mips == bld->coord_bld.type.length / 4);
1073 *out_of_bounds = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
1074 leveli_bld->type,
1075 bld->int_coord_bld.type,
1076 out);
1077 }
1078 level = lp_build_andnot(&bld->int_coord_bld, level, *out_of_bounds);
1079 *level_out = level;
1080 }
1081 else {
1082 /* clamp level to legal range of levels */
1083 *level_out = lp_build_clamp(leveli_bld, level, first_level, last_level);
1084
1085 }
1086 }
1087
1088
1089 /**
1090 * For PIPE_TEX_MIPFILTER_LINEAR, convert per-quad (or per element) int LOD(s)
1091 * to two (per-quad) (adjacent) mipmap level indexes, and fix up float lod
1092 * part accordingly.
1093 * Later, we'll sample from those two mipmap levels and interpolate between them.
1094 */
1095 void
lp_build_linear_mip_levels(struct lp_build_sample_context * bld,unsigned texture_unit,LLVMValueRef lod_ipart,LLVMValueRef * lod_fpart_inout,LLVMValueRef * level0_out,LLVMValueRef * level1_out)1096 lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
1097 unsigned texture_unit,
1098 LLVMValueRef lod_ipart,
1099 LLVMValueRef *lod_fpart_inout,
1100 LLVMValueRef *level0_out,
1101 LLVMValueRef *level1_out)
1102 {
1103 LLVMBuilderRef builder = bld->gallivm->builder;
1104 struct lp_sampler_dynamic_state *dynamic_state = bld->dynamic_state;
1105 struct lp_build_context *leveli_bld = &bld->leveli_bld;
1106 struct lp_build_context *levelf_bld = &bld->levelf_bld;
1107 LLVMValueRef first_level, last_level;
1108 LLVMValueRef clamp_min;
1109 LLVMValueRef clamp_max;
1110
1111 assert(bld->num_lods == bld->num_mips);
1112
1113 first_level = dynamic_state->first_level(dynamic_state, bld->gallivm,
1114 bld->context_ptr, texture_unit, NULL);
1115 last_level = dynamic_state->last_level(dynamic_state, bld->gallivm,
1116 bld->context_ptr, texture_unit, NULL);
1117 first_level = lp_build_broadcast_scalar(leveli_bld, first_level);
1118 last_level = lp_build_broadcast_scalar(leveli_bld, last_level);
1119
1120 *level0_out = lp_build_add(leveli_bld, lod_ipart, first_level);
1121 *level1_out = lp_build_add(leveli_bld, *level0_out, leveli_bld->one);
1122
1123 /*
1124 * Clamp both *level0_out and *level1_out to [first_level, last_level], with
1125 * the minimum number of comparisons, and zeroing lod_fpart in the extreme
1126 * ends in the process.
1127 */
1128
1129 /* *level0_out < first_level */
1130 clamp_min = LLVMBuildICmp(builder, LLVMIntSLT,
1131 *level0_out, first_level,
1132 "clamp_lod_to_first");
1133
1134 *level0_out = LLVMBuildSelect(builder, clamp_min,
1135 first_level, *level0_out, "");
1136
1137 *level1_out = LLVMBuildSelect(builder, clamp_min,
1138 first_level, *level1_out, "");
1139
1140 *lod_fpart_inout = LLVMBuildSelect(builder, clamp_min,
1141 levelf_bld->zero, *lod_fpart_inout, "");
1142
1143 /* *level0_out >= last_level */
1144 clamp_max = LLVMBuildICmp(builder, LLVMIntSGE,
1145 *level0_out, last_level,
1146 "clamp_lod_to_last");
1147
1148 *level0_out = LLVMBuildSelect(builder, clamp_max,
1149 last_level, *level0_out, "");
1150
1151 *level1_out = LLVMBuildSelect(builder, clamp_max,
1152 last_level, *level1_out, "");
1153
1154 *lod_fpart_inout = LLVMBuildSelect(builder, clamp_max,
1155 levelf_bld->zero, *lod_fpart_inout, "");
1156
1157 lp_build_name(*level0_out, "texture%u_miplevel0", texture_unit);
1158 lp_build_name(*level1_out, "texture%u_miplevel1", texture_unit);
1159 lp_build_name(*lod_fpart_inout, "texture%u_mipweight", texture_unit);
1160 }
1161
1162
1163 /**
1164 * Return pointer to a single mipmap level.
1165 * \param level integer mipmap level
1166 */
1167 LLVMValueRef
lp_build_get_mipmap_level(struct lp_build_sample_context * bld,LLVMValueRef level)1168 lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
1169 LLVMValueRef level)
1170 {
1171 LLVMBuilderRef builder = bld->gallivm->builder;
1172 LLVMValueRef indexes[2], data_ptr, mip_offset;
1173
1174 indexes[0] = lp_build_const_int32(bld->gallivm, 0);
1175 indexes[1] = level;
1176 mip_offset = LLVMBuildGEP(builder, bld->mip_offsets, indexes, 2, "");
1177 mip_offset = LLVMBuildLoad(builder, mip_offset, "");
1178 data_ptr = LLVMBuildGEP(builder, bld->base_ptr, &mip_offset, 1, "");
1179 return data_ptr;
1180 }
1181
1182 /**
1183 * Return (per-pixel) offsets to mip levels.
1184 * \param level integer mipmap level
1185 */
1186 LLVMValueRef
lp_build_get_mip_offsets(struct lp_build_sample_context * bld,LLVMValueRef level)1187 lp_build_get_mip_offsets(struct lp_build_sample_context *bld,
1188 LLVMValueRef level)
1189 {
1190 LLVMBuilderRef builder = bld->gallivm->builder;
1191 LLVMValueRef indexes[2], offsets, offset1;
1192
1193 indexes[0] = lp_build_const_int32(bld->gallivm, 0);
1194 if (bld->num_mips == 1) {
1195 indexes[1] = level;
1196 offset1 = LLVMBuildGEP(builder, bld->mip_offsets, indexes, 2, "");
1197 offset1 = LLVMBuildLoad(builder, offset1, "");
1198 offsets = lp_build_broadcast_scalar(&bld->int_coord_bld, offset1);
1199 }
1200 else if (bld->num_mips == bld->coord_bld.type.length / 4) {
1201 unsigned i;
1202
1203 offsets = bld->int_coord_bld.undef;
1204 for (i = 0; i < bld->num_mips; i++) {
1205 LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1206 LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, 4 * i);
1207 indexes[1] = LLVMBuildExtractElement(builder, level, indexi, "");
1208 offset1 = LLVMBuildGEP(builder, bld->mip_offsets, indexes, 2, "");
1209 offset1 = LLVMBuildLoad(builder, offset1, "");
1210 offsets = LLVMBuildInsertElement(builder, offsets, offset1, indexo, "");
1211 }
1212 offsets = lp_build_swizzle_scalar_aos(&bld->int_coord_bld, offsets, 0, 4);
1213 }
1214 else {
1215 unsigned i;
1216
1217 assert (bld->num_mips == bld->coord_bld.type.length);
1218
1219 offsets = bld->int_coord_bld.undef;
1220 for (i = 0; i < bld->num_mips; i++) {
1221 LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1222 indexes[1] = LLVMBuildExtractElement(builder, level, indexi, "");
1223 offset1 = LLVMBuildGEP(builder, bld->mip_offsets, indexes, 2, "");
1224 offset1 = LLVMBuildLoad(builder, offset1, "");
1225 offsets = LLVMBuildInsertElement(builder, offsets, offset1, indexi, "");
1226 }
1227 }
1228 return offsets;
1229 }
1230
1231
1232 /**
1233 * Codegen equivalent for u_minify().
1234 * @param lod_scalar if lod is a (broadcasted) scalar
1235 * Return max(1, base_size >> level);
1236 */
1237 LLVMValueRef
lp_build_minify(struct lp_build_context * bld,LLVMValueRef base_size,LLVMValueRef level,boolean lod_scalar)1238 lp_build_minify(struct lp_build_context *bld,
1239 LLVMValueRef base_size,
1240 LLVMValueRef level,
1241 boolean lod_scalar)
1242 {
1243 LLVMBuilderRef builder = bld->gallivm->builder;
1244 assert(lp_check_value(bld->type, base_size));
1245 assert(lp_check_value(bld->type, level));
1246
1247 if (level == bld->zero) {
1248 /* if we're using mipmap level zero, no minification is needed */
1249 return base_size;
1250 }
1251 else {
1252 LLVMValueRef size;
1253 assert(bld->type.sign);
1254 if (lod_scalar ||
1255 (util_get_cpu_caps()->has_avx2 || !util_get_cpu_caps()->has_sse)) {
1256 size = LLVMBuildLShr(builder, base_size, level, "minify");
1257 size = lp_build_max(bld, size, bld->one);
1258 }
1259 else {
1260 /*
1261 * emulate shift with float mul, since intel "forgot" shifts with
1262 * per-element shift count until avx2, which results in terrible
1263 * scalar extraction (both count and value), scalar shift,
1264 * vector reinsertion. Should not be an issue on any non-x86 cpu
1265 * with a vector instruction set.
1266 * On cpus with AMD's XOP this should also be unnecessary but I'm
1267 * not sure if llvm would emit this with current flags.
1268 */
1269 LLVMValueRef const127, const23, lf;
1270 struct lp_type ftype;
1271 struct lp_build_context fbld;
1272 ftype = lp_type_float_vec(32, bld->type.length * bld->type.width);
1273 lp_build_context_init(&fbld, bld->gallivm, ftype);
1274 const127 = lp_build_const_int_vec(bld->gallivm, bld->type, 127);
1275 const23 = lp_build_const_int_vec(bld->gallivm, bld->type, 23);
1276
1277 /* calculate 2^(-level) float */
1278 lf = lp_build_sub(bld, const127, level);
1279 lf = lp_build_shl(bld, lf, const23);
1280 lf = LLVMBuildBitCast(builder, lf, fbld.vec_type, "");
1281
1282 /* finish shift operation by doing float mul */
1283 base_size = lp_build_int_to_float(&fbld, base_size);
1284 size = lp_build_mul(&fbld, base_size, lf);
1285 /*
1286 * do the max also with floats because
1287 * a) non-emulated int max requires sse41
1288 * (this is actually a lie as we could cast to 16bit values
1289 * as 16bit is sufficient and 16bit int max is sse2)
1290 * b) with avx we can do int max 4-wide but float max 8-wide
1291 */
1292 size = lp_build_max(&fbld, size, fbld.one);
1293 size = lp_build_itrunc(&fbld, size);
1294 }
1295 return size;
1296 }
1297 }
1298
1299
1300 /**
1301 * Dereference stride_array[mipmap_level] array to get a stride.
1302 * Return stride as a vector.
1303 */
1304 static LLVMValueRef
lp_build_get_level_stride_vec(struct lp_build_sample_context * bld,LLVMValueRef stride_array,LLVMValueRef level)1305 lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
1306 LLVMValueRef stride_array, LLVMValueRef level)
1307 {
1308 LLVMBuilderRef builder = bld->gallivm->builder;
1309 LLVMValueRef indexes[2], stride, stride1;
1310 indexes[0] = lp_build_const_int32(bld->gallivm, 0);
1311 if (bld->num_mips == 1) {
1312 indexes[1] = level;
1313 stride1 = LLVMBuildGEP(builder, stride_array, indexes, 2, "");
1314 stride1 = LLVMBuildLoad(builder, stride1, "");
1315 stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride1);
1316 }
1317 else if (bld->num_mips == bld->coord_bld.type.length / 4) {
1318 LLVMValueRef stride1;
1319 unsigned i;
1320
1321 stride = bld->int_coord_bld.undef;
1322 for (i = 0; i < bld->num_mips; i++) {
1323 LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1324 LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, 4 * i);
1325 indexes[1] = LLVMBuildExtractElement(builder, level, indexi, "");
1326 stride1 = LLVMBuildGEP(builder, stride_array, indexes, 2, "");
1327 stride1 = LLVMBuildLoad(builder, stride1, "");
1328 stride = LLVMBuildInsertElement(builder, stride, stride1, indexo, "");
1329 }
1330 stride = lp_build_swizzle_scalar_aos(&bld->int_coord_bld, stride, 0, 4);
1331 }
1332 else {
1333 LLVMValueRef stride1;
1334 unsigned i;
1335
1336 assert (bld->num_mips == bld->coord_bld.type.length);
1337
1338 stride = bld->int_coord_bld.undef;
1339 for (i = 0; i < bld->coord_bld.type.length; i++) {
1340 LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1341 indexes[1] = LLVMBuildExtractElement(builder, level, indexi, "");
1342 stride1 = LLVMBuildGEP(builder, stride_array, indexes, 2, "");
1343 stride1 = LLVMBuildLoad(builder, stride1, "");
1344 stride = LLVMBuildInsertElement(builder, stride, stride1, indexi, "");
1345 }
1346 }
1347 return stride;
1348 }
1349
1350
1351 /**
1352 * When sampling a mipmap, we need to compute the width, height, depth
1353 * of the source levels from the level indexes. This helper function
1354 * does that.
1355 */
1356 void
lp_build_mipmap_level_sizes(struct lp_build_sample_context * bld,LLVMValueRef ilevel,LLVMValueRef * out_size,LLVMValueRef * row_stride_vec,LLVMValueRef * img_stride_vec)1357 lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
1358 LLVMValueRef ilevel,
1359 LLVMValueRef *out_size,
1360 LLVMValueRef *row_stride_vec,
1361 LLVMValueRef *img_stride_vec)
1362 {
1363 const unsigned dims = bld->dims;
1364 LLVMValueRef ilevel_vec;
1365
1366 /*
1367 * Compute width, height, depth at mipmap level 'ilevel'
1368 */
1369 if (bld->num_mips == 1) {
1370 ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel);
1371 *out_size = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec, TRUE);
1372 }
1373 else {
1374 LLVMValueRef int_size_vec;
1375 LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
1376 unsigned num_quads = bld->coord_bld.type.length / 4;
1377 unsigned i;
1378
1379 if (bld->num_mips == num_quads) {
1380 /*
1381 * XXX: this should be #ifndef SANE_INSTRUCTION_SET.
1382 * intel "forgot" the variable shift count instruction until avx2.
1383 * A harmless 8x32 shift gets translated into 32 instructions
1384 * (16 extracts, 8 scalar shifts, 8 inserts), llvm is apparently
1385 * unable to recognize if there are really just 2 different shift
1386 * count values. So do the shift 4-wide before expansion.
1387 */
1388 struct lp_build_context bld4;
1389 struct lp_type type4;
1390
1391 type4 = bld->int_coord_bld.type;
1392 type4.length = 4;
1393
1394 lp_build_context_init(&bld4, bld->gallivm, type4);
1395
1396 if (bld->dims == 1) {
1397 assert(bld->int_size_in_bld.type.length == 1);
1398 int_size_vec = lp_build_broadcast_scalar(&bld4,
1399 bld->int_size);
1400 }
1401 else {
1402 assert(bld->int_size_in_bld.type.length == 4);
1403 int_size_vec = bld->int_size;
1404 }
1405
1406 for (i = 0; i < num_quads; i++) {
1407 LLVMValueRef ileveli;
1408 LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1409
1410 ileveli = lp_build_extract_broadcast(bld->gallivm,
1411 bld->leveli_bld.type,
1412 bld4.type,
1413 ilevel,
1414 indexi);
1415 tmp[i] = lp_build_minify(&bld4, int_size_vec, ileveli, TRUE);
1416 }
1417 /*
1418 * out_size is [w0, h0, d0, _, w1, h1, d1, _, ...] vector for dims > 1,
1419 * [w0, w0, w0, w0, w1, w1, w1, w1, ...] otherwise.
1420 */
1421 *out_size = lp_build_concat(bld->gallivm,
1422 tmp,
1423 bld4.type,
1424 num_quads);
1425 }
1426 else {
1427 /* FIXME: this is terrible and results in _huge_ vector
1428 * (for the dims > 1 case).
1429 * Should refactor this (together with extract_image_sizes) and do
1430 * something more useful. Could for instance if we have width,height
1431 * with 4-wide vector pack all elements into a 8xi16 vector
1432 * (on which we can still do useful math) instead of using a 16xi32
1433 * vector.
1434 * For dims == 1 this will create [w0, w1, w2, w3, ...] vector.
1435 * For dims > 1 this will create [w0, h0, d0, _, w1, h1, d1, _, ...] vector.
1436 */
1437 assert(bld->num_mips == bld->coord_bld.type.length);
1438 if (bld->dims == 1) {
1439 assert(bld->int_size_in_bld.type.length == 1);
1440 int_size_vec = lp_build_broadcast_scalar(&bld->int_coord_bld,
1441 bld->int_size);
1442 *out_size = lp_build_minify(&bld->int_coord_bld, int_size_vec, ilevel, FALSE);
1443 }
1444 else {
1445 LLVMValueRef ilevel1;
1446 for (i = 0; i < bld->num_mips; i++) {
1447 LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1448 ilevel1 = lp_build_extract_broadcast(bld->gallivm, bld->int_coord_type,
1449 bld->int_size_in_bld.type, ilevel, indexi);
1450 tmp[i] = bld->int_size;
1451 tmp[i] = lp_build_minify(&bld->int_size_in_bld, tmp[i], ilevel1, TRUE);
1452 }
1453 *out_size = lp_build_concat(bld->gallivm, tmp,
1454 bld->int_size_in_bld.type,
1455 bld->num_mips);
1456 }
1457 }
1458 }
1459
1460 if (dims >= 2) {
1461 *row_stride_vec = lp_build_get_level_stride_vec(bld,
1462 bld->row_stride_array,
1463 ilevel);
1464 }
1465 if (dims == 3 || has_layer_coord(bld->static_texture_state->target)) {
1466 *img_stride_vec = lp_build_get_level_stride_vec(bld,
1467 bld->img_stride_array,
1468 ilevel);
1469 }
1470 }
1471
1472
1473 /**
1474 * Extract and broadcast texture size.
1475 *
1476 * @param size_type type of the texture size vector (either
1477 * bld->int_size_type or bld->float_size_type)
1478 * @param coord_type type of the texture size vector (either
1479 * bld->int_coord_type or bld->coord_type)
1480 * @param size vector with the texture size (width, height, depth)
1481 */
1482 void
lp_build_extract_image_sizes(struct lp_build_sample_context * bld,struct lp_build_context * size_bld,struct lp_type coord_type,LLVMValueRef size,LLVMValueRef * out_width,LLVMValueRef * out_height,LLVMValueRef * out_depth)1483 lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
1484 struct lp_build_context *size_bld,
1485 struct lp_type coord_type,
1486 LLVMValueRef size,
1487 LLVMValueRef *out_width,
1488 LLVMValueRef *out_height,
1489 LLVMValueRef *out_depth)
1490 {
1491 const unsigned dims = bld->dims;
1492 LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
1493 struct lp_type size_type = size_bld->type;
1494
1495 if (bld->num_mips == 1) {
1496 *out_width = lp_build_extract_broadcast(bld->gallivm,
1497 size_type,
1498 coord_type,
1499 size,
1500 LLVMConstInt(i32t, 0, 0));
1501 if (dims >= 2) {
1502 *out_height = lp_build_extract_broadcast(bld->gallivm,
1503 size_type,
1504 coord_type,
1505 size,
1506 LLVMConstInt(i32t, 1, 0));
1507 if (dims == 3) {
1508 *out_depth = lp_build_extract_broadcast(bld->gallivm,
1509 size_type,
1510 coord_type,
1511 size,
1512 LLVMConstInt(i32t, 2, 0));
1513 }
1514 }
1515 }
1516 else {
1517 unsigned num_quads = bld->coord_bld.type.length / 4;
1518
1519 if (dims == 1) {
1520 *out_width = size;
1521 }
1522 else if (bld->num_mips == num_quads) {
1523 *out_width = lp_build_swizzle_scalar_aos(size_bld, size, 0, 4);
1524 if (dims >= 2) {
1525 *out_height = lp_build_swizzle_scalar_aos(size_bld, size, 1, 4);
1526 if (dims == 3) {
1527 *out_depth = lp_build_swizzle_scalar_aos(size_bld, size, 2, 4);
1528 }
1529 }
1530 }
1531 else {
1532 assert(bld->num_mips == bld->coord_type.length);
1533 *out_width = lp_build_pack_aos_scalars(bld->gallivm, size_type,
1534 coord_type, size, 0);
1535 if (dims >= 2) {
1536 *out_height = lp_build_pack_aos_scalars(bld->gallivm, size_type,
1537 coord_type, size, 1);
1538 if (dims == 3) {
1539 *out_depth = lp_build_pack_aos_scalars(bld->gallivm, size_type,
1540 coord_type, size, 2);
1541 }
1542 }
1543 }
1544 }
1545 }
1546
1547
1548 /**
1549 * Unnormalize coords.
1550 *
1551 * @param flt_size vector with the integer texture size (width, height, depth)
1552 */
1553 void
lp_build_unnormalized_coords(struct lp_build_sample_context * bld,LLVMValueRef flt_size,LLVMValueRef * s,LLVMValueRef * t,LLVMValueRef * r)1554 lp_build_unnormalized_coords(struct lp_build_sample_context *bld,
1555 LLVMValueRef flt_size,
1556 LLVMValueRef *s,
1557 LLVMValueRef *t,
1558 LLVMValueRef *r)
1559 {
1560 const unsigned dims = bld->dims;
1561 LLVMValueRef width;
1562 LLVMValueRef height = NULL;
1563 LLVMValueRef depth = NULL;
1564
1565 lp_build_extract_image_sizes(bld,
1566 &bld->float_size_bld,
1567 bld->coord_type,
1568 flt_size,
1569 &width,
1570 &height,
1571 &depth);
1572
1573 /* s = s * width, t = t * height */
1574 *s = lp_build_mul(&bld->coord_bld, *s, width);
1575 if (dims >= 2) {
1576 *t = lp_build_mul(&bld->coord_bld, *t, height);
1577 if (dims >= 3) {
1578 *r = lp_build_mul(&bld->coord_bld, *r, depth);
1579 }
1580 }
1581 }
1582
1583 /**
1584 * Generate new coords and faces for cubemap texels falling off the face.
1585 *
1586 * @param face face (center) of the pixel
1587 * @param x0 lower x coord
1588 * @param x1 higher x coord (must be x0 + 1)
1589 * @param y0 lower y coord
1590 * @param y1 higher y coord (must be x0 + 1)
1591 * @param max_coord texture cube (level) size - 1
1592 * @param next_faces new face values when falling off
1593 * @param next_xcoords new x coord values when falling off
1594 * @param next_ycoords new y coord values when falling off
1595 *
1596 * The arrays hold the new values when under/overflow of
1597 * lower x, higher x, lower y, higher y coord would occur (in this order).
1598 * next_xcoords/next_ycoords have two entries each (for both new lower and
1599 * higher coord).
1600 */
1601 void
lp_build_cube_new_coords(struct lp_build_context * ivec_bld,LLVMValueRef face,LLVMValueRef x0,LLVMValueRef x1,LLVMValueRef y0,LLVMValueRef y1,LLVMValueRef max_coord,LLVMValueRef next_faces[4],LLVMValueRef next_xcoords[4][2],LLVMValueRef next_ycoords[4][2])1602 lp_build_cube_new_coords(struct lp_build_context *ivec_bld,
1603 LLVMValueRef face,
1604 LLVMValueRef x0,
1605 LLVMValueRef x1,
1606 LLVMValueRef y0,
1607 LLVMValueRef y1,
1608 LLVMValueRef max_coord,
1609 LLVMValueRef next_faces[4],
1610 LLVMValueRef next_xcoords[4][2],
1611 LLVMValueRef next_ycoords[4][2])
1612 {
1613 /*
1614 * Lookup tables aren't nice for simd code hence try some logic here.
1615 * (Note that while it would not be necessary to do per-sample (4) lookups
1616 * when using a LUT as it's impossible that texels fall off of positive
1617 * and negative edges simultaneously, it would however be necessary to
1618 * do 2 lookups for corner handling as in this case texels both fall off
1619 * of x and y axes.)
1620 */
1621 /*
1622 * Next faces (for face 012345):
1623 * x < 0.0 : 451110
1624 * x >= 1.0 : 540001
1625 * y < 0.0 : 225422
1626 * y >= 1.0 : 334533
1627 * Hence nfx+ (and nfy+) == nfx- (nfy-) xor 1
1628 * nfx-: face > 1 ? (face == 5 ? 0 : 1) : (4 + face & 1)
1629 * nfy+: face & ~4 > 1 ? face + 2 : 3;
1630 * This could also use pshufb instead, but would need (manually coded)
1631 * ssse3 intrinsic (llvm won't do non-constant shuffles).
1632 */
1633 struct gallivm_state *gallivm = ivec_bld->gallivm;
1634 LLVMValueRef sel, sel_f2345, sel_f23, sel_f2, tmpsel, tmp;
1635 LLVMValueRef faceand1, sel_fand1, maxmx0, maxmx1, maxmy0, maxmy1;
1636 LLVMValueRef c2 = lp_build_const_int_vec(gallivm, ivec_bld->type, 2);
1637 LLVMValueRef c3 = lp_build_const_int_vec(gallivm, ivec_bld->type, 3);
1638 LLVMValueRef c4 = lp_build_const_int_vec(gallivm, ivec_bld->type, 4);
1639 LLVMValueRef c5 = lp_build_const_int_vec(gallivm, ivec_bld->type, 5);
1640
1641 sel = lp_build_cmp(ivec_bld, PIPE_FUNC_EQUAL, face, c5);
1642 tmpsel = lp_build_select(ivec_bld, sel, ivec_bld->zero, ivec_bld->one);
1643 sel_f2345 = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, face, ivec_bld->one);
1644 faceand1 = lp_build_and(ivec_bld, face, ivec_bld->one);
1645 tmp = lp_build_add(ivec_bld, faceand1, c4);
1646 next_faces[0] = lp_build_select(ivec_bld, sel_f2345, tmpsel, tmp);
1647 next_faces[1] = lp_build_xor(ivec_bld, next_faces[0], ivec_bld->one);
1648
1649 tmp = lp_build_andnot(ivec_bld, face, c4);
1650 sel_f23 = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, tmp, ivec_bld->one);
1651 tmp = lp_build_add(ivec_bld, face, c2);
1652 next_faces[3] = lp_build_select(ivec_bld, sel_f23, tmp, c3);
1653 next_faces[2] = lp_build_xor(ivec_bld, next_faces[3], ivec_bld->one);
1654
1655 /*
1656 * new xcoords (for face 012345):
1657 * x < 0.0 : max max t max-t max max
1658 * x >= 1.0 : 0 0 max-t t 0 0
1659 * y < 0.0 : max 0 max-s s s max-s
1660 * y >= 1.0 : max 0 s max-s s max-s
1661 *
1662 * ncx[1] = face & ~4 > 1 ? (face == 2 ? max-t : t) : 0
1663 * ncx[0] = max - ncx[1]
1664 * ncx[3] = face > 1 ? (face & 1 ? max-s : s) : (face & 1) ? 0 : max
1665 * ncx[2] = face & ~4 > 1 ? max - ncx[3] : ncx[3]
1666 */
1667 sel_f2 = lp_build_cmp(ivec_bld, PIPE_FUNC_EQUAL, face, c2);
1668 maxmy0 = lp_build_sub(ivec_bld, max_coord, y0);
1669 tmp = lp_build_select(ivec_bld, sel_f2, maxmy0, y0);
1670 next_xcoords[1][0] = lp_build_select(ivec_bld, sel_f23, tmp, ivec_bld->zero);
1671 next_xcoords[0][0] = lp_build_sub(ivec_bld, max_coord, next_xcoords[1][0]);
1672 maxmy1 = lp_build_sub(ivec_bld, max_coord, y1);
1673 tmp = lp_build_select(ivec_bld, sel_f2, maxmy1, y1);
1674 next_xcoords[1][1] = lp_build_select(ivec_bld, sel_f23, tmp, ivec_bld->zero);
1675 next_xcoords[0][1] = lp_build_sub(ivec_bld, max_coord, next_xcoords[1][1]);
1676
1677 sel_fand1 = lp_build_cmp(ivec_bld, PIPE_FUNC_EQUAL, faceand1, ivec_bld->one);
1678
1679 tmpsel = lp_build_select(ivec_bld, sel_fand1, ivec_bld->zero, max_coord);
1680 maxmx0 = lp_build_sub(ivec_bld, max_coord, x0);
1681 tmp = lp_build_select(ivec_bld, sel_fand1, maxmx0, x0);
1682 next_xcoords[3][0] = lp_build_select(ivec_bld, sel_f2345, tmp, tmpsel);
1683 tmp = lp_build_sub(ivec_bld, max_coord, next_xcoords[3][0]);
1684 next_xcoords[2][0] = lp_build_select(ivec_bld, sel_f23, tmp, next_xcoords[3][0]);
1685 maxmx1 = lp_build_sub(ivec_bld, max_coord, x1);
1686 tmp = lp_build_select(ivec_bld, sel_fand1, maxmx1, x1);
1687 next_xcoords[3][1] = lp_build_select(ivec_bld, sel_f2345, tmp, tmpsel);
1688 tmp = lp_build_sub(ivec_bld, max_coord, next_xcoords[3][1]);
1689 next_xcoords[2][1] = lp_build_select(ivec_bld, sel_f23, tmp, next_xcoords[3][1]);
1690
1691 /*
1692 * new ycoords (for face 012345):
1693 * x < 0.0 : t t 0 max t t
1694 * x >= 1.0 : t t 0 max t t
1695 * y < 0.0 : max-s s 0 max max 0
1696 * y >= 1.0 : s max-s 0 max 0 max
1697 *
1698 * ncy[0] = face & ~4 > 1 ? (face == 2 ? 0 : max) : t
1699 * ncy[1] = ncy[0]
1700 * ncy[3] = face > 1 ? (face & 1 ? max : 0) : (face & 1) ? max-s : max
1701 * ncx[2] = face & ~4 > 1 ? max - ncx[3] : ncx[3]
1702 */
1703 tmp = lp_build_select(ivec_bld, sel_f2, ivec_bld->zero, max_coord);
1704 next_ycoords[0][0] = lp_build_select(ivec_bld, sel_f23, tmp, y0);
1705 next_ycoords[1][0] = next_ycoords[0][0];
1706 next_ycoords[0][1] = lp_build_select(ivec_bld, sel_f23, tmp, y1);
1707 next_ycoords[1][1] = next_ycoords[0][1];
1708
1709 tmpsel = lp_build_select(ivec_bld, sel_fand1, maxmx0, x0);
1710 tmp = lp_build_select(ivec_bld, sel_fand1, max_coord, ivec_bld->zero);
1711 next_ycoords[3][0] = lp_build_select(ivec_bld, sel_f2345, tmp, tmpsel);
1712 tmp = lp_build_sub(ivec_bld, max_coord, next_ycoords[3][0]);
1713 next_ycoords[2][0] = lp_build_select(ivec_bld, sel_f23, next_ycoords[3][0], tmp);
1714 tmpsel = lp_build_select(ivec_bld, sel_fand1, maxmx1, x1);
1715 tmp = lp_build_select(ivec_bld, sel_fand1, max_coord, ivec_bld->zero);
1716 next_ycoords[3][1] = lp_build_select(ivec_bld, sel_f2345, tmp, tmpsel);
1717 tmp = lp_build_sub(ivec_bld, max_coord, next_ycoords[3][1]);
1718 next_ycoords[2][1] = lp_build_select(ivec_bld, sel_f23, next_ycoords[3][1], tmp);
1719 }
1720
1721
1722 /** Helper used by lp_build_cube_lookup() */
1723 static LLVMValueRef
lp_build_cube_imapos(struct lp_build_context * coord_bld,LLVMValueRef coord)1724 lp_build_cube_imapos(struct lp_build_context *coord_bld, LLVMValueRef coord)
1725 {
1726 /* ima = +0.5 / abs(coord); */
1727 LLVMValueRef posHalf = lp_build_const_vec(coord_bld->gallivm, coord_bld->type, 0.5);
1728 LLVMValueRef absCoord = lp_build_abs(coord_bld, coord);
1729 LLVMValueRef ima = lp_build_div(coord_bld, posHalf, absCoord);
1730 return ima;
1731 }
1732
1733
1734 /** Helper for doing 3-wise selection.
1735 * Returns sel1 ? val2 : (sel0 ? val0 : val1).
1736 */
1737 static LLVMValueRef
lp_build_select3(struct lp_build_context * sel_bld,LLVMValueRef sel0,LLVMValueRef sel1,LLVMValueRef val0,LLVMValueRef val1,LLVMValueRef val2)1738 lp_build_select3(struct lp_build_context *sel_bld,
1739 LLVMValueRef sel0,
1740 LLVMValueRef sel1,
1741 LLVMValueRef val0,
1742 LLVMValueRef val1,
1743 LLVMValueRef val2)
1744 {
1745 LLVMValueRef tmp;
1746 tmp = lp_build_select(sel_bld, sel0, val0, val1);
1747 return lp_build_select(sel_bld, sel1, val2, tmp);
1748 }
1749
1750
1751 /**
1752 * Generate code to do cube face selection and compute per-face texcoords.
1753 */
1754 void
lp_build_cube_lookup(struct lp_build_sample_context * bld,LLVMValueRef * coords,const struct lp_derivatives * derivs_in,LLVMValueRef * rho,struct lp_derivatives * derivs_out,boolean need_derivs)1755 lp_build_cube_lookup(struct lp_build_sample_context *bld,
1756 LLVMValueRef *coords,
1757 const struct lp_derivatives *derivs_in, /* optional */
1758 LLVMValueRef *rho,
1759 struct lp_derivatives *derivs_out, /* optional */
1760 boolean need_derivs)
1761 {
1762 struct lp_build_context *coord_bld = &bld->coord_bld;
1763 LLVMBuilderRef builder = bld->gallivm->builder;
1764 struct gallivm_state *gallivm = bld->gallivm;
1765 LLVMValueRef si, ti, ri;
1766
1767 /*
1768 * Do per-pixel face selection. We cannot however (as we used to do)
1769 * simply calculate the derivs afterwards (which is very bogus for
1770 * explicit derivs btw) because the values would be "random" when
1771 * not all pixels lie on the same face. So what we do here is just
1772 * calculate the derivatives after scaling the coords by the absolute
1773 * value of the inverse major axis, and essentially do rho calculation
1774 * steps as if it were a 3d texture. This is perfect if all pixels hit
1775 * the same face, but not so great at edges, I believe the max error
1776 * should be sqrt(2) with no_rho_approx or 2 otherwise (essentially measuring
1777 * the 3d distance between 2 points on the cube instead of measuring up/down
1778 * the edge). Still this is possibly a win over just selecting the same face
1779 * for all pixels. Unfortunately, something like that doesn't work for
1780 * explicit derivatives.
1781 */
1782 struct lp_build_context *cint_bld = &bld->int_coord_bld;
1783 struct lp_type intctype = cint_bld->type;
1784 LLVMTypeRef coord_vec_type = coord_bld->vec_type;
1785 LLVMTypeRef cint_vec_type = cint_bld->vec_type;
1786 LLVMValueRef as, at, ar, face, face_s, face_t;
1787 LLVMValueRef as_ge_at, maxasat, ar_ge_as_at;
1788 LLVMValueRef snewx, tnewx, snewy, tnewy, snewz, tnewz;
1789 LLVMValueRef tnegi, rnegi;
1790 LLVMValueRef ma, mai, signma, signmabit, imahalfpos;
1791 LLVMValueRef posHalf = lp_build_const_vec(gallivm, coord_bld->type, 0.5);
1792 LLVMValueRef signmask = lp_build_const_int_vec(gallivm, intctype,
1793 1LL << (intctype.width - 1));
1794 LLVMValueRef signshift = lp_build_const_int_vec(gallivm, intctype,
1795 intctype.width -1);
1796 LLVMValueRef facex = lp_build_const_int_vec(gallivm, intctype, PIPE_TEX_FACE_POS_X);
1797 LLVMValueRef facey = lp_build_const_int_vec(gallivm, intctype, PIPE_TEX_FACE_POS_Y);
1798 LLVMValueRef facez = lp_build_const_int_vec(gallivm, intctype, PIPE_TEX_FACE_POS_Z);
1799 LLVMValueRef s = coords[0];
1800 LLVMValueRef t = coords[1];
1801 LLVMValueRef r = coords[2];
1802
1803 assert(PIPE_TEX_FACE_NEG_X == PIPE_TEX_FACE_POS_X + 1);
1804 assert(PIPE_TEX_FACE_NEG_Y == PIPE_TEX_FACE_POS_Y + 1);
1805 assert(PIPE_TEX_FACE_NEG_Z == PIPE_TEX_FACE_POS_Z + 1);
1806
1807 /*
1808 * get absolute value (for x/y/z face selection) and sign bit
1809 * (for mirroring minor coords and pos/neg face selection)
1810 * of the original coords.
1811 */
1812 as = lp_build_abs(&bld->coord_bld, s);
1813 at = lp_build_abs(&bld->coord_bld, t);
1814 ar = lp_build_abs(&bld->coord_bld, r);
1815
1816 /*
1817 * major face determination: select x if x > y else select y
1818 * select z if z >= max(x,y) else select previous result
1819 * if some axis are the same we chose z over y, y over x - the
1820 * dx10 spec seems to ask for it while OpenGL doesn't care (if we
1821 * wouldn't care could save a select or two if using different
1822 * compares and doing at_g_as_ar last since tnewx and tnewz are the
1823 * same).
1824 */
1825 as_ge_at = lp_build_cmp(coord_bld, PIPE_FUNC_GREATER, as, at);
1826 maxasat = lp_build_max(coord_bld, as, at);
1827 ar_ge_as_at = lp_build_cmp(coord_bld, PIPE_FUNC_GEQUAL, ar, maxasat);
1828
1829 if (need_derivs) {
1830 /*
1831 * XXX: This is really really complex.
1832 * It is a bit overkill to use this for implicit derivatives as well,
1833 * no way this is worth the cost in practice, but seems to be the
1834 * only way for getting accurate and per-pixel lod values.
1835 */
1836 LLVMValueRef ima, imahalf, tmp, ddx[3], ddy[3];
1837 LLVMValueRef madx, mady, madxdivma, madydivma;
1838 LLVMValueRef sdxi, tdxi, rdxi, sdyi, tdyi, rdyi;
1839 LLVMValueRef tdxnegi, rdxnegi, tdynegi, rdynegi;
1840 LLVMValueRef sdxnewx, sdxnewy, sdxnewz, tdxnewx, tdxnewy, tdxnewz;
1841 LLVMValueRef sdynewx, sdynewy, sdynewz, tdynewx, tdynewy, tdynewz;
1842 LLVMValueRef face_sdx, face_tdx, face_sdy, face_tdy;
1843 /*
1844 * s = 1/2 * ( sc / ma + 1)
1845 * t = 1/2 * ( tc / ma + 1)
1846 *
1847 * s' = 1/2 * (sc' * ma - sc * ma') / ma^2
1848 * t' = 1/2 * (tc' * ma - tc * ma') / ma^2
1849 *
1850 * dx.s = 0.5 * (dx.sc - sc * dx.ma / ma) / ma
1851 * dx.t = 0.5 * (dx.tc - tc * dx.ma / ma) / ma
1852 * dy.s = 0.5 * (dy.sc - sc * dy.ma / ma) / ma
1853 * dy.t = 0.5 * (dy.tc - tc * dy.ma / ma) / ma
1854 */
1855
1856 /* select ma, calculate ima */
1857 ma = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, s, t, r);
1858 mai = LLVMBuildBitCast(builder, ma, cint_vec_type, "");
1859 signmabit = LLVMBuildAnd(builder, mai, signmask, "");
1860 ima = lp_build_div(coord_bld, coord_bld->one, ma);
1861 imahalf = lp_build_mul(coord_bld, posHalf, ima);
1862 imahalfpos = lp_build_abs(coord_bld, imahalf);
1863
1864 if (!derivs_in) {
1865 ddx[0] = lp_build_ddx(coord_bld, s);
1866 ddx[1] = lp_build_ddx(coord_bld, t);
1867 ddx[2] = lp_build_ddx(coord_bld, r);
1868 ddy[0] = lp_build_ddy(coord_bld, s);
1869 ddy[1] = lp_build_ddy(coord_bld, t);
1870 ddy[2] = lp_build_ddy(coord_bld, r);
1871 }
1872 else {
1873 ddx[0] = derivs_in->ddx[0];
1874 ddx[1] = derivs_in->ddx[1];
1875 ddx[2] = derivs_in->ddx[2];
1876 ddy[0] = derivs_in->ddy[0];
1877 ddy[1] = derivs_in->ddy[1];
1878 ddy[2] = derivs_in->ddy[2];
1879 }
1880
1881 /* select major derivatives */
1882 madx = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, ddx[0], ddx[1], ddx[2]);
1883 mady = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, ddy[0], ddy[1], ddy[2]);
1884
1885 si = LLVMBuildBitCast(builder, s, cint_vec_type, "");
1886 ti = LLVMBuildBitCast(builder, t, cint_vec_type, "");
1887 ri = LLVMBuildBitCast(builder, r, cint_vec_type, "");
1888
1889 sdxi = LLVMBuildBitCast(builder, ddx[0], cint_vec_type, "");
1890 tdxi = LLVMBuildBitCast(builder, ddx[1], cint_vec_type, "");
1891 rdxi = LLVMBuildBitCast(builder, ddx[2], cint_vec_type, "");
1892
1893 sdyi = LLVMBuildBitCast(builder, ddy[0], cint_vec_type, "");
1894 tdyi = LLVMBuildBitCast(builder, ddy[1], cint_vec_type, "");
1895 rdyi = LLVMBuildBitCast(builder, ddy[2], cint_vec_type, "");
1896
1897 /*
1898 * compute all possible new s/t coords, which does the mirroring,
1899 * and do the same for derivs minor axes.
1900 * snewx = signma * -r;
1901 * tnewx = -t;
1902 * snewy = s;
1903 * tnewy = signma * r;
1904 * snewz = signma * s;
1905 * tnewz = -t;
1906 */
1907 tnegi = LLVMBuildXor(builder, ti, signmask, "");
1908 rnegi = LLVMBuildXor(builder, ri, signmask, "");
1909 tdxnegi = LLVMBuildXor(builder, tdxi, signmask, "");
1910 rdxnegi = LLVMBuildXor(builder, rdxi, signmask, "");
1911 tdynegi = LLVMBuildXor(builder, tdyi, signmask, "");
1912 rdynegi = LLVMBuildXor(builder, rdyi, signmask, "");
1913
1914 snewx = LLVMBuildXor(builder, signmabit, rnegi, "");
1915 tnewx = tnegi;
1916 sdxnewx = LLVMBuildXor(builder, signmabit, rdxnegi, "");
1917 tdxnewx = tdxnegi;
1918 sdynewx = LLVMBuildXor(builder, signmabit, rdynegi, "");
1919 tdynewx = tdynegi;
1920
1921 snewy = si;
1922 tnewy = LLVMBuildXor(builder, signmabit, ri, "");
1923 sdxnewy = sdxi;
1924 tdxnewy = LLVMBuildXor(builder, signmabit, rdxi, "");
1925 sdynewy = sdyi;
1926 tdynewy = LLVMBuildXor(builder, signmabit, rdyi, "");
1927
1928 snewz = LLVMBuildXor(builder, signmabit, si, "");
1929 tnewz = tnegi;
1930 sdxnewz = LLVMBuildXor(builder, signmabit, sdxi, "");
1931 tdxnewz = tdxnegi;
1932 sdynewz = LLVMBuildXor(builder, signmabit, sdyi, "");
1933 tdynewz = tdynegi;
1934
1935 /* select the mirrored values */
1936 face = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, facex, facey, facez);
1937 face_s = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, snewx, snewy, snewz);
1938 face_t = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tnewx, tnewy, tnewz);
1939 face_sdx = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, sdxnewx, sdxnewy, sdxnewz);
1940 face_tdx = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tdxnewx, tdxnewy, tdxnewz);
1941 face_sdy = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, sdynewx, sdynewy, sdynewz);
1942 face_tdy = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tdynewx, tdynewy, tdynewz);
1943
1944 face_s = LLVMBuildBitCast(builder, face_s, coord_vec_type, "");
1945 face_t = LLVMBuildBitCast(builder, face_t, coord_vec_type, "");
1946 face_sdx = LLVMBuildBitCast(builder, face_sdx, coord_vec_type, "");
1947 face_tdx = LLVMBuildBitCast(builder, face_tdx, coord_vec_type, "");
1948 face_sdy = LLVMBuildBitCast(builder, face_sdy, coord_vec_type, "");
1949 face_tdy = LLVMBuildBitCast(builder, face_tdy, coord_vec_type, "");
1950
1951 /* deriv math, dx.s = 0.5 * (dx.sc - sc * dx.ma / ma) / ma */
1952 madxdivma = lp_build_mul(coord_bld, madx, ima);
1953 tmp = lp_build_mul(coord_bld, madxdivma, face_s);
1954 tmp = lp_build_sub(coord_bld, face_sdx, tmp);
1955 derivs_out->ddx[0] = lp_build_mul(coord_bld, tmp, imahalf);
1956
1957 /* dx.t = 0.5 * (dx.tc - tc * dx.ma / ma) / ma */
1958 tmp = lp_build_mul(coord_bld, madxdivma, face_t);
1959 tmp = lp_build_sub(coord_bld, face_tdx, tmp);
1960 derivs_out->ddx[1] = lp_build_mul(coord_bld, tmp, imahalf);
1961
1962 /* dy.s = 0.5 * (dy.sc - sc * dy.ma / ma) / ma */
1963 madydivma = lp_build_mul(coord_bld, mady, ima);
1964 tmp = lp_build_mul(coord_bld, madydivma, face_s);
1965 tmp = lp_build_sub(coord_bld, face_sdy, tmp);
1966 derivs_out->ddy[0] = lp_build_mul(coord_bld, tmp, imahalf);
1967
1968 /* dy.t = 0.5 * (dy.tc - tc * dy.ma / ma) / ma */
1969 tmp = lp_build_mul(coord_bld, madydivma, face_t);
1970 tmp = lp_build_sub(coord_bld, face_tdy, tmp);
1971 derivs_out->ddy[1] = lp_build_mul(coord_bld, tmp, imahalf);
1972
1973 signma = LLVMBuildLShr(builder, mai, signshift, "");
1974 coords[2] = LLVMBuildOr(builder, face, signma, "face");
1975
1976 /* project coords */
1977 face_s = lp_build_mul(coord_bld, face_s, imahalfpos);
1978 face_t = lp_build_mul(coord_bld, face_t, imahalfpos);
1979
1980 coords[0] = lp_build_add(coord_bld, face_s, posHalf);
1981 coords[1] = lp_build_add(coord_bld, face_t, posHalf);
1982
1983 return;
1984 }
1985
1986 ma = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, s, t, r);
1987 mai = LLVMBuildBitCast(builder, ma, cint_vec_type, "");
1988 signmabit = LLVMBuildAnd(builder, mai, signmask, "");
1989
1990 si = LLVMBuildBitCast(builder, s, cint_vec_type, "");
1991 ti = LLVMBuildBitCast(builder, t, cint_vec_type, "");
1992 ri = LLVMBuildBitCast(builder, r, cint_vec_type, "");
1993
1994 /*
1995 * compute all possible new s/t coords, which does the mirroring
1996 * snewx = signma * -r;
1997 * tnewx = -t;
1998 * snewy = s;
1999 * tnewy = signma * r;
2000 * snewz = signma * s;
2001 * tnewz = -t;
2002 */
2003 tnegi = LLVMBuildXor(builder, ti, signmask, "");
2004 rnegi = LLVMBuildXor(builder, ri, signmask, "");
2005
2006 snewx = LLVMBuildXor(builder, signmabit, rnegi, "");
2007 tnewx = tnegi;
2008
2009 snewy = si;
2010 tnewy = LLVMBuildXor(builder, signmabit, ri, "");
2011
2012 snewz = LLVMBuildXor(builder, signmabit, si, "");
2013 tnewz = tnegi;
2014
2015 /* select the mirrored values */
2016 face_s = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, snewx, snewy, snewz);
2017 face_t = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tnewx, tnewy, tnewz);
2018 face = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, facex, facey, facez);
2019
2020 face_s = LLVMBuildBitCast(builder, face_s, coord_vec_type, "");
2021 face_t = LLVMBuildBitCast(builder, face_t, coord_vec_type, "");
2022
2023 /* add +1 for neg face */
2024 /* XXX with AVX probably want to use another select here -
2025 * as long as we ensure vblendvps gets used we can actually
2026 * skip the comparison and just use sign as a "mask" directly.
2027 */
2028 signma = LLVMBuildLShr(builder, mai, signshift, "");
2029 coords[2] = LLVMBuildOr(builder, face, signma, "face");
2030
2031 /* project coords */
2032 if (!need_derivs) {
2033 imahalfpos = lp_build_cube_imapos(coord_bld, ma);
2034 face_s = lp_build_mul(coord_bld, face_s, imahalfpos);
2035 face_t = lp_build_mul(coord_bld, face_t, imahalfpos);
2036 }
2037
2038 coords[0] = lp_build_add(coord_bld, face_s, posHalf);
2039 coords[1] = lp_build_add(coord_bld, face_t, posHalf);
2040 }
2041
2042
2043 /**
2044 * Compute the partial offset of a pixel block along an arbitrary axis.
2045 *
2046 * @param coord coordinate in pixels
2047 * @param stride number of bytes between rows of successive pixel blocks
2048 * @param block_length number of pixels in a pixels block along the coordinate
2049 * axis
2050 * @param out_offset resulting relative offset of the pixel block in bytes
2051 * @param out_subcoord resulting sub-block pixel coordinate
2052 */
2053 void
lp_build_sample_partial_offset(struct lp_build_context * bld,unsigned block_length,LLVMValueRef coord,LLVMValueRef stride,LLVMValueRef * out_offset,LLVMValueRef * out_subcoord)2054 lp_build_sample_partial_offset(struct lp_build_context *bld,
2055 unsigned block_length,
2056 LLVMValueRef coord,
2057 LLVMValueRef stride,
2058 LLVMValueRef *out_offset,
2059 LLVMValueRef *out_subcoord)
2060 {
2061 LLVMBuilderRef builder = bld->gallivm->builder;
2062 LLVMValueRef offset;
2063 LLVMValueRef subcoord;
2064
2065 if (block_length == 1) {
2066 subcoord = bld->zero;
2067 }
2068 else {
2069 /*
2070 * Pixel blocks have power of two dimensions. LLVM should convert the
2071 * rem/div to bit arithmetic.
2072 * TODO: Verify this.
2073 * It does indeed BUT it does transform it to scalar (and back) when doing so
2074 * (using roughly extract, shift/and, mov, unpack) (llvm 2.7).
2075 * The generated code looks seriously unfunny and is quite expensive.
2076 */
2077 #if 0
2078 LLVMValueRef block_width = lp_build_const_int_vec(bld->type, block_length);
2079 subcoord = LLVMBuildURem(builder, coord, block_width, "");
2080 coord = LLVMBuildUDiv(builder, coord, block_width, "");
2081 #else
2082 unsigned logbase2 = util_logbase2(block_length);
2083 LLVMValueRef block_shift = lp_build_const_int_vec(bld->gallivm, bld->type, logbase2);
2084 LLVMValueRef block_mask = lp_build_const_int_vec(bld->gallivm, bld->type, block_length - 1);
2085 subcoord = LLVMBuildAnd(builder, coord, block_mask, "");
2086 coord = LLVMBuildLShr(builder, coord, block_shift, "");
2087 #endif
2088 }
2089
2090 offset = lp_build_mul(bld, coord, stride);
2091
2092 assert(out_offset);
2093 assert(out_subcoord);
2094
2095 *out_offset = offset;
2096 *out_subcoord = subcoord;
2097 }
2098
2099
2100 /**
2101 * Compute the offset of a pixel block.
2102 *
2103 * x, y, z, y_stride, z_stride are vectors, and they refer to pixels.
2104 *
2105 * Returns the relative offset and i,j sub-block coordinates
2106 */
2107 void
lp_build_sample_offset(struct lp_build_context * bld,const struct util_format_description * format_desc,LLVMValueRef x,LLVMValueRef y,LLVMValueRef z,LLVMValueRef y_stride,LLVMValueRef z_stride,LLVMValueRef * out_offset,LLVMValueRef * out_i,LLVMValueRef * out_j)2108 lp_build_sample_offset(struct lp_build_context *bld,
2109 const struct util_format_description *format_desc,
2110 LLVMValueRef x,
2111 LLVMValueRef y,
2112 LLVMValueRef z,
2113 LLVMValueRef y_stride,
2114 LLVMValueRef z_stride,
2115 LLVMValueRef *out_offset,
2116 LLVMValueRef *out_i,
2117 LLVMValueRef *out_j)
2118 {
2119 LLVMValueRef x_stride;
2120 LLVMValueRef offset;
2121
2122 x_stride = lp_build_const_vec(bld->gallivm, bld->type,
2123 format_desc->block.bits/8);
2124
2125 lp_build_sample_partial_offset(bld,
2126 format_desc->block.width,
2127 x, x_stride,
2128 &offset, out_i);
2129
2130 if (y && y_stride) {
2131 LLVMValueRef y_offset;
2132 lp_build_sample_partial_offset(bld,
2133 format_desc->block.height,
2134 y, y_stride,
2135 &y_offset, out_j);
2136 offset = lp_build_add(bld, offset, y_offset);
2137 }
2138 else {
2139 *out_j = bld->zero;
2140 }
2141
2142 if (z && z_stride) {
2143 LLVMValueRef z_offset;
2144 LLVMValueRef k;
2145 lp_build_sample_partial_offset(bld,
2146 1, /* pixel blocks are always 2D */
2147 z, z_stride,
2148 &z_offset, &k);
2149 offset = lp_build_add(bld, offset, z_offset);
2150 }
2151
2152 *out_offset = offset;
2153 }
2154
2155 static LLVMValueRef
lp_build_sample_min(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef v0,LLVMValueRef v1)2156 lp_build_sample_min(struct lp_build_context *bld,
2157 LLVMValueRef x,
2158 LLVMValueRef v0,
2159 LLVMValueRef v1)
2160 {
2161 /* if the incoming LERP weight is 0 then the min/max
2162 * should ignore that value. */
2163 LLVMValueRef mask = lp_build_compare(bld->gallivm,
2164 bld->type,
2165 PIPE_FUNC_NOTEQUAL,
2166 x, bld->zero);
2167 LLVMValueRef min = lp_build_min(bld, v0, v1);
2168
2169 return lp_build_select(bld, mask, min, v0);
2170 }
2171
2172 static LLVMValueRef
lp_build_sample_max(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef v0,LLVMValueRef v1)2173 lp_build_sample_max(struct lp_build_context *bld,
2174 LLVMValueRef x,
2175 LLVMValueRef v0,
2176 LLVMValueRef v1)
2177 {
2178 /* if the incoming LERP weight is 0 then the min/max
2179 * should ignore that value. */
2180 LLVMValueRef mask = lp_build_compare(bld->gallivm,
2181 bld->type,
2182 PIPE_FUNC_NOTEQUAL,
2183 x, bld->zero);
2184 LLVMValueRef max = lp_build_max(bld, v0, v1);
2185
2186 return lp_build_select(bld, mask, max, v0);
2187 }
2188
2189 static LLVMValueRef
lp_build_sample_min_2d(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef y,LLVMValueRef a,LLVMValueRef b,LLVMValueRef c,LLVMValueRef d)2190 lp_build_sample_min_2d(struct lp_build_context *bld,
2191 LLVMValueRef x,
2192 LLVMValueRef y,
2193 LLVMValueRef a,
2194 LLVMValueRef b,
2195 LLVMValueRef c,
2196 LLVMValueRef d)
2197 {
2198 LLVMValueRef v0 = lp_build_sample_min(bld, x, a, b);
2199 LLVMValueRef v1 = lp_build_sample_min(bld, x, c, d);
2200 return lp_build_sample_min(bld, y, v0, v1);
2201 }
2202
2203 static LLVMValueRef
lp_build_sample_max_2d(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef y,LLVMValueRef a,LLVMValueRef b,LLVMValueRef c,LLVMValueRef d)2204 lp_build_sample_max_2d(struct lp_build_context *bld,
2205 LLVMValueRef x,
2206 LLVMValueRef y,
2207 LLVMValueRef a,
2208 LLVMValueRef b,
2209 LLVMValueRef c,
2210 LLVMValueRef d)
2211 {
2212 LLVMValueRef v0 = lp_build_sample_max(bld, x, a, b);
2213 LLVMValueRef v1 = lp_build_sample_max(bld, x, c, d);
2214 return lp_build_sample_max(bld, y, v0, v1);
2215 }
2216
2217 static LLVMValueRef
lp_build_sample_min_3d(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef y,LLVMValueRef z,LLVMValueRef a,LLVMValueRef b,LLVMValueRef c,LLVMValueRef d,LLVMValueRef e,LLVMValueRef f,LLVMValueRef g,LLVMValueRef h)2218 lp_build_sample_min_3d(struct lp_build_context *bld,
2219 LLVMValueRef x,
2220 LLVMValueRef y,
2221 LLVMValueRef z,
2222 LLVMValueRef a, LLVMValueRef b,
2223 LLVMValueRef c, LLVMValueRef d,
2224 LLVMValueRef e, LLVMValueRef f,
2225 LLVMValueRef g, LLVMValueRef h)
2226 {
2227 LLVMValueRef v0 = lp_build_sample_min_2d(bld, x, y, a, b, c, d);
2228 LLVMValueRef v1 = lp_build_sample_min_2d(bld, x, y, e, f, g, h);
2229 return lp_build_sample_min(bld, z, v0, v1);
2230 }
2231
2232 static LLVMValueRef
lp_build_sample_max_3d(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef y,LLVMValueRef z,LLVMValueRef a,LLVMValueRef b,LLVMValueRef c,LLVMValueRef d,LLVMValueRef e,LLVMValueRef f,LLVMValueRef g,LLVMValueRef h)2233 lp_build_sample_max_3d(struct lp_build_context *bld,
2234 LLVMValueRef x,
2235 LLVMValueRef y,
2236 LLVMValueRef z,
2237 LLVMValueRef a, LLVMValueRef b,
2238 LLVMValueRef c, LLVMValueRef d,
2239 LLVMValueRef e, LLVMValueRef f,
2240 LLVMValueRef g, LLVMValueRef h)
2241 {
2242 LLVMValueRef v0 = lp_build_sample_max_2d(bld, x, y, a, b, c, d);
2243 LLVMValueRef v1 = lp_build_sample_max_2d(bld, x, y, e, f, g, h);
2244 return lp_build_sample_max(bld, z, v0, v1);
2245 }
2246
2247 void
lp_build_reduce_filter(struct lp_build_context * bld,enum pipe_tex_reduction_mode mode,unsigned flags,unsigned num_chan,LLVMValueRef x,LLVMValueRef * v00,LLVMValueRef * v01,LLVMValueRef * out)2248 lp_build_reduce_filter(struct lp_build_context *bld,
2249 enum pipe_tex_reduction_mode mode,
2250 unsigned flags,
2251 unsigned num_chan,
2252 LLVMValueRef x,
2253 LLVMValueRef *v00,
2254 LLVMValueRef *v01,
2255 LLVMValueRef *out)
2256 {
2257 unsigned chan;
2258 switch (mode) {
2259 case PIPE_TEX_REDUCTION_MIN:
2260 for (chan = 0; chan < num_chan; chan++)
2261 out[chan] = lp_build_sample_min(bld, x, v00[chan], v01[chan]);
2262 break;
2263 case PIPE_TEX_REDUCTION_MAX:
2264 for (chan = 0; chan < num_chan; chan++)
2265 out[chan] = lp_build_sample_max(bld, x, v00[chan], v01[chan]);
2266 break;
2267 case PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE:
2268 default:
2269 for (chan = 0; chan < num_chan; chan++)
2270 out[chan] = lp_build_lerp(bld, x, v00[chan], v01[chan], flags);
2271 break;
2272 }
2273 }
2274
2275 void
lp_build_reduce_filter_2d(struct lp_build_context * bld,enum pipe_tex_reduction_mode mode,unsigned flags,unsigned num_chan,LLVMValueRef x,LLVMValueRef y,LLVMValueRef * v00,LLVMValueRef * v01,LLVMValueRef * v10,LLVMValueRef * v11,LLVMValueRef * out)2276 lp_build_reduce_filter_2d(struct lp_build_context *bld,
2277 enum pipe_tex_reduction_mode mode,
2278 unsigned flags,
2279 unsigned num_chan,
2280 LLVMValueRef x,
2281 LLVMValueRef y,
2282 LLVMValueRef *v00,
2283 LLVMValueRef *v01,
2284 LLVMValueRef *v10,
2285 LLVMValueRef *v11,
2286 LLVMValueRef *out)
2287 {
2288 unsigned chan;
2289 switch (mode) {
2290 case PIPE_TEX_REDUCTION_MIN:
2291 for (chan = 0; chan < num_chan; chan++)
2292 out[chan] = lp_build_sample_min_2d(bld, x, y, v00[chan], v01[chan], v10[chan], v11[chan]);
2293 break;
2294 case PIPE_TEX_REDUCTION_MAX:
2295 for (chan = 0; chan < num_chan; chan++)
2296 out[chan] = lp_build_sample_max_2d(bld, x, y, v00[chan], v01[chan], v10[chan], v11[chan]);
2297 break;
2298 case PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE:
2299 default:
2300 for (chan = 0; chan < num_chan; chan++)
2301 out[chan] = lp_build_lerp_2d(bld, x, y, v00[chan], v01[chan], v10[chan], v11[chan], flags);
2302 break;
2303 }
2304 }
2305
2306 void
lp_build_reduce_filter_3d(struct lp_build_context * bld,enum pipe_tex_reduction_mode mode,unsigned flags,unsigned num_chan,LLVMValueRef x,LLVMValueRef y,LLVMValueRef z,LLVMValueRef * v000,LLVMValueRef * v001,LLVMValueRef * v010,LLVMValueRef * v011,LLVMValueRef * v100,LLVMValueRef * v101,LLVMValueRef * v110,LLVMValueRef * v111,LLVMValueRef * out)2307 lp_build_reduce_filter_3d(struct lp_build_context *bld,
2308 enum pipe_tex_reduction_mode mode,
2309 unsigned flags,
2310 unsigned num_chan,
2311 LLVMValueRef x,
2312 LLVMValueRef y,
2313 LLVMValueRef z,
2314 LLVMValueRef *v000,
2315 LLVMValueRef *v001,
2316 LLVMValueRef *v010,
2317 LLVMValueRef *v011,
2318 LLVMValueRef *v100,
2319 LLVMValueRef *v101,
2320 LLVMValueRef *v110,
2321 LLVMValueRef *v111,
2322 LLVMValueRef *out)
2323 {
2324 unsigned chan;
2325 switch (mode) {
2326 case PIPE_TEX_REDUCTION_MIN:
2327 for (chan = 0; chan < num_chan; chan++)
2328 out[chan] = lp_build_sample_min_3d(bld, x, y, z,
2329 v000[chan], v001[chan], v010[chan], v011[chan],
2330 v100[chan], v101[chan], v110[chan], v111[chan]);
2331 break;
2332 case PIPE_TEX_REDUCTION_MAX:
2333 for (chan = 0; chan < num_chan; chan++)
2334 out[chan] = lp_build_sample_max_3d(bld, x, y, z,
2335 v000[chan], v001[chan], v010[chan], v011[chan],
2336 v100[chan], v101[chan], v110[chan], v111[chan]);
2337 break;
2338 case PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE:
2339 default:
2340 for (chan = 0; chan < num_chan; chan++)
2341 out[chan] = lp_build_lerp_3d(bld, x, y, z,
2342 v000[chan], v001[chan], v010[chan], v011[chan],
2343 v100[chan], v101[chan], v110[chan], v111[chan],
2344 flags);
2345 break;
2346 }
2347 }
2348
2349 /*
2350 * generated from
2351 * const float alpha = 2;
2352 * for (unsigned i = 0; i < WEIGHT_LUT_SIZE; i++) {
2353 * const float r2 = (float) i / (float) (WEIGHT_LUT_SIZE - 1);
2354 * const float weight = (float)expf(-alpha * r2);
2355 */
2356 static const float aniso_filter_table[1024] = {
2357 1.000000, 0.998047, 0.996098, 0.994152, 0.992210, 0.990272, 0.988338, 0.986408,
2358 0.984481, 0.982559, 0.980640, 0.978724, 0.976813, 0.974905, 0.973001, 0.971100,
2359 0.969204, 0.967311, 0.965421, 0.963536, 0.961654, 0.959776, 0.957901, 0.956030,
2360 0.954163, 0.952299, 0.950439, 0.948583, 0.946730, 0.944881, 0.943036, 0.941194,
2361 0.939356, 0.937521, 0.935690, 0.933862, 0.932038, 0.930218, 0.928401, 0.926588,
2362 0.924778, 0.922972, 0.921169, 0.919370, 0.917575, 0.915782, 0.913994, 0.912209,
2363 0.910427, 0.908649, 0.906874, 0.905103, 0.903335, 0.901571, 0.899810, 0.898052,
2364 0.896298, 0.894548, 0.892801, 0.891057, 0.889317, 0.887580, 0.885846, 0.884116,
2365 0.882389, 0.880666, 0.878946, 0.877229, 0.875516, 0.873806, 0.872099, 0.870396,
2366 0.868696, 0.866999, 0.865306, 0.863616, 0.861929, 0.860245, 0.858565, 0.856888,
2367 0.855215, 0.853544, 0.851877, 0.850213, 0.848553, 0.846896, 0.845241, 0.843591,
2368 0.841943, 0.840299, 0.838657, 0.837019, 0.835385, 0.833753, 0.832124, 0.830499,
2369 0.828877, 0.827258, 0.825643, 0.824030, 0.822421, 0.820814, 0.819211, 0.817611,
2370 0.816014, 0.814420, 0.812830, 0.811242, 0.809658, 0.808076, 0.806498, 0.804923,
2371 0.803351, 0.801782, 0.800216, 0.798653, 0.797093, 0.795536, 0.793982, 0.792432,
2372 0.790884, 0.789339, 0.787798, 0.786259, 0.784723, 0.783191, 0.781661, 0.780134,
2373 0.778610, 0.777090, 0.775572, 0.774057, 0.772545, 0.771037, 0.769531, 0.768028,
2374 0.766528, 0.765030, 0.763536, 0.762045, 0.760557, 0.759071, 0.757589, 0.756109,
2375 0.754632, 0.753158, 0.751687, 0.750219, 0.748754, 0.747291, 0.745832, 0.744375,
2376 0.742921, 0.741470, 0.740022, 0.738577, 0.737134, 0.735694, 0.734258, 0.732823,
2377 0.731392, 0.729964, 0.728538, 0.727115, 0.725695, 0.724278, 0.722863, 0.721451,
2378 0.720042, 0.718636, 0.717232, 0.715831, 0.714433, 0.713038, 0.711645, 0.710255,
2379 0.708868, 0.707483, 0.706102, 0.704723, 0.703346, 0.701972, 0.700601, 0.699233,
2380 0.697867, 0.696504, 0.695144, 0.693786, 0.692431, 0.691079, 0.689729, 0.688382,
2381 0.687037, 0.685696, 0.684356, 0.683020, 0.681686, 0.680354, 0.679025, 0.677699,
2382 0.676376, 0.675054, 0.673736, 0.672420, 0.671107, 0.669796, 0.668488, 0.667182,
2383 0.665879, 0.664579, 0.663281, 0.661985, 0.660692, 0.659402, 0.658114, 0.656828,
2384 0.655546, 0.654265, 0.652987, 0.651712, 0.650439, 0.649169, 0.647901, 0.646635,
2385 0.645372, 0.644112, 0.642854, 0.641598, 0.640345, 0.639095, 0.637846, 0.636601,
2386 0.635357, 0.634116, 0.632878, 0.631642, 0.630408, 0.629177, 0.627948, 0.626721,
2387 0.625497, 0.624276, 0.623056, 0.621839, 0.620625, 0.619413, 0.618203, 0.616996,
2388 0.615790, 0.614588, 0.613387, 0.612189, 0.610994, 0.609800, 0.608609, 0.607421,
2389 0.606234, 0.605050, 0.603868, 0.602689, 0.601512, 0.600337, 0.599165, 0.597994,
2390 0.596826, 0.595661, 0.594497, 0.593336, 0.592177, 0.591021, 0.589866, 0.588714,
2391 0.587564, 0.586417, 0.585272, 0.584128, 0.582988, 0.581849, 0.580712, 0.579578,
2392 0.578446, 0.577317, 0.576189, 0.575064, 0.573940, 0.572819, 0.571701, 0.570584,
2393 0.569470, 0.568357, 0.567247, 0.566139, 0.565034, 0.563930, 0.562829, 0.561729,
2394 0.560632, 0.559537, 0.558444, 0.557354, 0.556265, 0.555179, 0.554094, 0.553012,
2395 0.551932, 0.550854, 0.549778, 0.548704, 0.547633, 0.546563, 0.545496, 0.544430,
2396 0.543367, 0.542306, 0.541246, 0.540189, 0.539134, 0.538081, 0.537030, 0.535981,
2397 0.534935, 0.533890, 0.532847, 0.531806, 0.530768, 0.529731, 0.528696, 0.527664,
2398 0.526633, 0.525604, 0.524578, 0.523553, 0.522531, 0.521510, 0.520492, 0.519475,
2399 0.518460, 0.517448, 0.516437, 0.515429, 0.514422, 0.513417, 0.512414, 0.511414,
2400 0.510415, 0.509418, 0.508423, 0.507430, 0.506439, 0.505450, 0.504462, 0.503477,
2401 0.502494, 0.501512, 0.500533, 0.499555, 0.498580, 0.497606, 0.496634, 0.495664,
2402 0.494696, 0.493730, 0.492765, 0.491803, 0.490842, 0.489884, 0.488927, 0.487972,
2403 0.487019, 0.486068, 0.485118, 0.484171, 0.483225, 0.482281, 0.481339, 0.480399,
2404 0.479461, 0.478524, 0.477590, 0.476657, 0.475726, 0.474797, 0.473870, 0.472944,
2405 0.472020, 0.471098, 0.470178, 0.469260, 0.468343, 0.467429, 0.466516, 0.465605,
2406 0.464695, 0.463788, 0.462882, 0.461978, 0.461075, 0.460175, 0.459276, 0.458379,
2407 0.457484, 0.456590, 0.455699, 0.454809, 0.453920, 0.453034, 0.452149, 0.451266,
2408 0.450384, 0.449505, 0.448627, 0.447751, 0.446876, 0.446003, 0.445132, 0.444263,
2409 0.443395, 0.442529, 0.441665, 0.440802, 0.439941, 0.439082, 0.438224, 0.437368,
2410 0.436514, 0.435662, 0.434811, 0.433961, 0.433114, 0.432268, 0.431424, 0.430581,
2411 0.429740, 0.428901, 0.428063, 0.427227, 0.426393, 0.425560, 0.424729, 0.423899,
2412 0.423071, 0.422245, 0.421420, 0.420597, 0.419776, 0.418956, 0.418137, 0.417321,
2413 0.416506, 0.415692, 0.414880, 0.414070, 0.413261, 0.412454, 0.411648, 0.410844,
2414 0.410042, 0.409241, 0.408442, 0.407644, 0.406848, 0.406053, 0.405260, 0.404469,
2415 0.403679, 0.402890, 0.402103, 0.401318, 0.400534, 0.399752, 0.398971, 0.398192,
2416 0.397414, 0.396638, 0.395863, 0.395090, 0.394319, 0.393548, 0.392780, 0.392013,
2417 0.391247, 0.390483, 0.389720, 0.388959, 0.388199, 0.387441, 0.386684, 0.385929,
2418 0.385175, 0.384423, 0.383672, 0.382923, 0.382175, 0.381429, 0.380684, 0.379940,
2419 0.379198, 0.378457, 0.377718, 0.376980, 0.376244, 0.375509, 0.374776, 0.374044,
2420 0.373313, 0.372584, 0.371856, 0.371130, 0.370405, 0.369682, 0.368960, 0.368239,
2421 0.367520, 0.366802, 0.366086, 0.365371, 0.364657, 0.363945, 0.363234, 0.362525,
2422 0.361817, 0.361110, 0.360405, 0.359701, 0.358998, 0.358297, 0.357597, 0.356899,
2423 0.356202, 0.355506, 0.354812, 0.354119, 0.353427, 0.352737, 0.352048, 0.351360,
2424 0.350674, 0.349989, 0.349306, 0.348623, 0.347942, 0.347263, 0.346585, 0.345908,
2425 0.345232, 0.344558, 0.343885, 0.343213, 0.342543, 0.341874, 0.341206, 0.340540,
2426 0.339874, 0.339211, 0.338548, 0.337887, 0.337227, 0.336568, 0.335911, 0.335255,
2427 0.334600, 0.333947, 0.333294, 0.332643, 0.331994, 0.331345, 0.330698, 0.330052,
2428 0.329408, 0.328764, 0.328122, 0.327481, 0.326842, 0.326203, 0.325566, 0.324930,
2429 0.324296, 0.323662, 0.323030, 0.322399, 0.321770, 0.321141, 0.320514, 0.319888,
2430 0.319263, 0.318639, 0.318017, 0.317396, 0.316776, 0.316157, 0.315540, 0.314924,
2431 0.314309, 0.313695, 0.313082, 0.312470, 0.311860, 0.311251, 0.310643, 0.310036,
2432 0.309431, 0.308827, 0.308223, 0.307621, 0.307021, 0.306421, 0.305822, 0.305225,
2433 0.304629, 0.304034, 0.303440, 0.302847, 0.302256, 0.301666, 0.301076, 0.300488,
2434 0.299902, 0.299316, 0.298731, 0.298148, 0.297565, 0.296984, 0.296404, 0.295825,
2435 0.295247, 0.294671, 0.294095, 0.293521, 0.292948, 0.292375, 0.291804, 0.291234,
2436 0.290666, 0.290098, 0.289531, 0.288966, 0.288401, 0.287838, 0.287276, 0.286715,
2437 0.286155, 0.285596, 0.285038, 0.284482, 0.283926, 0.283371, 0.282818, 0.282266,
2438 0.281714, 0.281164, 0.280615, 0.280067, 0.279520, 0.278974, 0.278429, 0.277885,
2439 0.277342, 0.276801, 0.276260, 0.275721, 0.275182, 0.274645, 0.274108, 0.273573,
2440 0.273038, 0.272505, 0.271973, 0.271442, 0.270912, 0.270382, 0.269854, 0.269327,
2441 0.268801, 0.268276, 0.267752, 0.267229, 0.266707, 0.266186, 0.265667, 0.265148,
2442 0.264630, 0.264113, 0.263597, 0.263082, 0.262568, 0.262056, 0.261544, 0.261033,
2443 0.260523, 0.260014, 0.259506, 0.259000, 0.258494, 0.257989, 0.257485, 0.256982,
2444 0.256480, 0.255979, 0.255479, 0.254980, 0.254482, 0.253985, 0.253489, 0.252994,
2445 0.252500, 0.252007, 0.251515, 0.251023, 0.250533, 0.250044, 0.249555, 0.249068,
2446 0.248582, 0.248096, 0.247611, 0.247128, 0.246645, 0.246163, 0.245683, 0.245203,
2447 0.244724, 0.244246, 0.243769, 0.243293, 0.242818, 0.242343, 0.241870, 0.241398,
2448 0.240926, 0.240456, 0.239986, 0.239517, 0.239049, 0.238583, 0.238117, 0.237651,
2449 0.237187, 0.236724, 0.236262, 0.235800, 0.235340, 0.234880, 0.234421, 0.233963,
2450 0.233506, 0.233050, 0.232595, 0.232141, 0.231688, 0.231235, 0.230783, 0.230333,
2451 0.229883, 0.229434, 0.228986, 0.228538, 0.228092, 0.227647, 0.227202, 0.226758,
2452 0.226315, 0.225873, 0.225432, 0.224992, 0.224552, 0.224114, 0.223676, 0.223239,
2453 0.222803, 0.222368, 0.221934, 0.221500, 0.221068, 0.220636, 0.220205, 0.219775,
2454 0.219346, 0.218917, 0.218490, 0.218063, 0.217637, 0.217212, 0.216788, 0.216364,
2455 0.215942, 0.215520, 0.215099, 0.214679, 0.214260, 0.213841, 0.213423, 0.213007,
2456 0.212591, 0.212175, 0.211761, 0.211347, 0.210935, 0.210523, 0.210111, 0.209701,
2457 0.209291, 0.208883, 0.208475, 0.208068, 0.207661, 0.207256, 0.206851, 0.206447,
2458 0.206044, 0.205641, 0.205239, 0.204839, 0.204439, 0.204039, 0.203641, 0.203243,
2459 0.202846, 0.202450, 0.202054, 0.201660, 0.201266, 0.200873, 0.200481, 0.200089,
2460 0.199698, 0.199308, 0.198919, 0.198530, 0.198143, 0.197756, 0.197369, 0.196984,
2461 0.196599, 0.196215, 0.195832, 0.195449, 0.195068, 0.194687, 0.194306, 0.193927,
2462 0.193548, 0.193170, 0.192793, 0.192416, 0.192041, 0.191665, 0.191291, 0.190917,
2463 0.190545, 0.190172, 0.189801, 0.189430, 0.189060, 0.188691, 0.188323, 0.187955,
2464 0.187588, 0.187221, 0.186856, 0.186491, 0.186126, 0.185763, 0.185400, 0.185038,
2465 0.184676, 0.184316, 0.183956, 0.183597, 0.183238, 0.182880, 0.182523, 0.182166,
2466 0.181811, 0.181455, 0.181101, 0.180747, 0.180394, 0.180042, 0.179690, 0.179339,
2467 0.178989, 0.178640, 0.178291, 0.177942, 0.177595, 0.177248, 0.176902, 0.176556,
2468 0.176211, 0.175867, 0.175524, 0.175181, 0.174839, 0.174497, 0.174157, 0.173816,
2469 0.173477, 0.173138, 0.172800, 0.172462, 0.172126, 0.171789, 0.171454, 0.171119,
2470 0.170785, 0.170451, 0.170118, 0.169786, 0.169454, 0.169124, 0.168793, 0.168463,
2471 0.168134, 0.167806, 0.167478, 0.167151, 0.166825, 0.166499, 0.166174, 0.165849,
2472 0.165525, 0.165202, 0.164879, 0.164557, 0.164236, 0.163915, 0.163595, 0.163275,
2473 0.162957, 0.162638, 0.162321, 0.162004, 0.161687, 0.161371, 0.161056, 0.160742,
2474 0.160428, 0.160114, 0.159802, 0.159489, 0.159178, 0.158867, 0.158557, 0.158247,
2475 0.157938, 0.157630, 0.157322, 0.157014, 0.156708, 0.156402, 0.156096, 0.155791,
2476 0.155487, 0.155183, 0.154880, 0.154578, 0.154276, 0.153975, 0.153674, 0.153374,
2477 0.153074, 0.152775, 0.152477, 0.152179, 0.151882, 0.151585, 0.151289, 0.150994,
2478 0.150699, 0.150404, 0.150111, 0.149817, 0.149525, 0.149233, 0.148941, 0.148650,
2479 0.148360, 0.148070, 0.147781, 0.147492, 0.147204, 0.146917, 0.146630, 0.146344,
2480 0.146058, 0.145772, 0.145488, 0.145204, 0.144920, 0.144637, 0.144354, 0.144072,
2481 0.143791, 0.143510, 0.143230, 0.142950, 0.142671, 0.142392, 0.142114, 0.141837,
2482 0.141560, 0.141283, 0.141007, 0.140732, 0.140457, 0.140183, 0.139909, 0.139636,
2483 0.139363, 0.139091, 0.138819, 0.138548, 0.138277, 0.138007, 0.137738, 0.137469,
2484 0.137200, 0.136932, 0.136665, 0.136398, 0.136131, 0.135865, 0.135600, 0.135335,
2485 };
2486
2487 const float *
lp_build_sample_aniso_filter_table(void)2488 lp_build_sample_aniso_filter_table(void)
2489 {
2490 return aniso_filter_table;
2491 }
2492